whisper.zsh 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175
  1. #!/bin/zsh
  2. export WHISPER_PATH="${HOME}/.whisper"
  3. export WHISPER_MODELS=( 'medium.en' 'tiny.en' 'small.en' 'large-v3.turbo' )
  4. export WHISPER_MODEL=${WHISPER_MODELS[@]:0:1}
  5. cleanup() {
  6. tput cnorm
  7. }
  8. trap cleanup EXIT
  9. tput civis
  10. function build_model {
  11. cd $WHISPER_PATH
  12. make -j "$1"
  13. }
  14. function build_whisper {
  15. cd $WHISPER_PATH
  16. for model in $WHISPER_MODELS; do
  17. build_model $model
  18. done
  19. cmake -B build -DWHISPER_SDL2=ON
  20. cmake --build build --config Release
  21. }
  22. if [ ! -d "$WHISPER_PATH" ]; then
  23. if [[ "$1" == "build" ]]; then
  24. git clone git@github.com:ggerganov/whisper.cpp.git --depth 1 "$WHISPER_PATH"
  25. build_whisper
  26. exit 0
  27. else
  28. echo "$WHISPER_PATH does not exist; run \`whisper.zsh build\` to set up."
  29. exit 1
  30. fi
  31. fi
  32. if [[ "$1" == "update" ]]; then
  33. cd $WHISPER_PATH
  34. git pull
  35. rm -rf build
  36. build_whisper
  37. exit 0
  38. fi
  39. if [[ "$1" == "stream" ]]; then
  40. retry=true
  41. while $retry; do;
  42. # clear
  43. whisper_content_write_tmp_file="$(mktemp)"
  44. whisper_content_read_tmp_file="$(mktemp)"
  45. ollama_prompt_file="$(mktemp)"
  46. whisper_pid_tmp_file="$(mktemp)"
  47. $WHISPER_PATH/build/bin/whisper-stream --keep-context --flash-attn --length 10000 --step 2500 --keep 1000 --threads 4 --model "$WHISPER_PATH/models/ggml-${WHISPER_MODEL}.bin" 2>/dev/null > "$whisper_content_write_tmp_file" & echo $! >> "$whisper_pid_tmp_file"
  48. whisper_pid=$(cat "$whisper_pid_tmp_file")
  49. rm "$whisper_pid_tmp_file"
  50. oldselection=""
  51. selection=""
  52. killed=false
  53. while true; do;
  54. cp "$whisper_content_write_tmp_file" "$whisper_content_read_tmp_file"
  55. gsed -i 's/^.*\[2K\r//g;s/^ //g;s/^\[Start speaking\]$//g;/^[ ,.\-\_\*]*$/d;' "$whisper_content_read_tmp_file"
  56. clear
  57. if $killed; then;
  58. printf "Status: \e[31mStopped\e[0m"
  59. else
  60. printf "Status: \e[32mRunning $()\e[0m"
  61. fi;
  62. printf "\nAction: a) Save "
  63. if [[ "$selection" == "r" ]]; then
  64. printf "\e[31mr)\e[0m"
  65. else
  66. printf "r)"
  67. fi
  68. printf " Copy s) Retry t) Quit\nRephrase: "
  69. if [[ "$selection" == "n" ]]; then
  70. printf "\e[31mn)\e[0m"
  71. else
  72. printf "n)"
  73. fi
  74. printf " Normal "
  75. if [[ "$selection" == "e" ]]; then
  76. printf "\e[31me)\e[0m"
  77. else
  78. printf "e)"
  79. fi
  80. printf " Prompt "
  81. if [[ "$selection" == "i" ]]; then
  82. printf "\e[31mi)\e[0m"
  83. else
  84. printf "i)"
  85. fi
  86. printf " Programmer "
  87. if [[ "$selection" == "o" ]]; then
  88. printf "\e[31mo)\e[0m"
  89. else
  90. printf "o)"
  91. fi
  92. printf " Prose\n\n"
  93. echo "$(fold -w 80 -s $whisper_content_read_tmp_file)"
  94. read -k1 -s -t 0.1 selection
  95. case $selection in
  96. "")
  97. ;;
  98. *)
  99. kill -9 $whisper_pid &>/dev/null
  100. killed=true
  101. if [[ "$oldselection" == "$selection" ]]; then
  102. continue
  103. fi;
  104. oldselection="$selection"
  105. case $selection in
  106. a)
  107. saved="$(mktemp)"
  108. cp "$whisper_content_read_tmp_file" "$saved";
  109. echo "$saved";
  110. retry=false;
  111. break;
  112. ;;
  113. r)
  114. cat "$whisper_content_read_tmp_file" | pbcopy;
  115. ;;
  116. s)
  117. break;
  118. ;;
  119. t)
  120. retry=false;
  121. break;
  122. ;;
  123. n|e|i|o)
  124. echo "
  125. You are an expert in copy editing and audio transcription. Your assignment is to expertly improve the quality of audio transcriptions.
  126. The rules of the task are as follows:
  127. - Keep the transcription largely unaltered. You may remove or replace text when the speaker has corrected or repeated themselves.
  128. - Respond only with the cleaned-up transcription. Your own notes, labels, and descriptions are strictly NOT allowed! Headings and labels are NEVER allowed!
  129. - Small changes to punctuation and grammar are permitted, so long as they stay true to the original tone of the transcription.
  130. - Do NOT confirm the request, instead go straight to the cleaned-up transcription!
  131. - Prioritize delivering the transcription. Avoid non-essential comments or filler phrases such as 'Here's the...', or labels such as 'Transcription:'.
  132. - The speaker may give directions such as 'scratch that' or 'actually, I mean...', indicating that previous text should be replaced. Pay attention to these directions and edit the dictation accordingly.
  133. - Use Markdown for your output.
  134. The audio transcription you will receive is from a single speaker dictating into his laptop microphone.
  135. " > "$ollama_prompt_file"
  136. case $selection in
  137. e)
  138. echo "The speaker is dictating a text prompt to be used for a Large Language model. Therefore, this transcription must be formatted with the aim of being an excellent LLM prompt." >> "$ollama_prompt_file"
  139. ;;
  140. i)
  141. echo "The speaker is a programmer who is dictating a text prompt to be used for a Large Language model. Therefore, this transcription must be formatted with the aim of being an excellent LLM prompt. The context of the dictation is related to programming, so be sure to format parts of the text that appear to be code accordingly." >> "$ollama_prompt_file"
  142. ;;
  143. o)
  144. echo "The speaker is a professional who is writing prose. Take this into account and ensure that the result is professionally formatted with correct grammar and tone." >> "$ollama_prompt_file"
  145. ;;
  146. esac;
  147. echo "
  148. The transcription to be clean up is as follows:\n\n
  149. " >> "$ollama_prompt_file"
  150. gsed -i 's/^ +//g;/^ *$/d' "$whisper_content_read_tmp_file"
  151. cat "$whisper_content_read_tmp_file" >> "$ollama_prompt_file"
  152. ollama run qwen3:4b --keepalive=30s "$(cat $ollama_prompt_file)" 2>/dev/null > $whisper_content_write_tmp_file &
  153. ;;
  154. esac;
  155. ;;
  156. esac;
  157. done;
  158. rm "$whisper_content_write_tmp_file"
  159. rm "$whisper_content_read_tmp_file"
  160. rm "$ollama_prompt_file"
  161. done;
  162. exit 0
  163. fi;