#!/bin/zsh export WHISPER_PATH="${HOME}/.whisper" export WHISPER_MODELS=( 'medium.en' 'tiny.en' 'small.en' 'large-v3.turbo' ) export WHISPER_MODEL=${WHISPER_MODELS[@]:0:1} cleanup() { tput cnorm } trap cleanup EXIT tput civis function build_model { cd $WHISPER_PATH make -j "$1" } function build_whisper { cd $WHISPER_PATH for model in $WHISPER_MODELS; do build_model $model done cmake -B build -DWHISPER_SDL2=ON cmake --build build --config Release } if [ ! -d "$WHISPER_PATH" ]; then if [[ "$1" == "build" ]]; then git clone git@github.com:ggerganov/whisper.cpp.git --depth 1 "$WHISPER_PATH" build_whisper exit 0 else echo "$WHISPER_PATH does not exist; run \`whisper.zsh build\` to set up." exit 1 fi fi if [[ "$1" == "update" ]]; then cd $WHISPER_PATH git pull rm -rf build build_whisper exit 0 fi if [[ "$1" == "stream" ]]; then retry=true while $retry; do; # clear whisper_content_write_tmp_file="$(mktemp)" whisper_content_read_tmp_file="$(mktemp)" ollama_prompt_file="$(mktemp)" whisper_pid_tmp_file="$(mktemp)" $WHISPER_PATH/build/bin/whisper-stream --keep-context --flash-attn --length 10000 --step 2500 --keep 1000 --threads 4 --model "$WHISPER_PATH/models/ggml-${WHISPER_MODEL}.bin" 2>/dev/null > "$whisper_content_write_tmp_file" & echo $! >> "$whisper_pid_tmp_file" whisper_pid=$(cat "$whisper_pid_tmp_file") rm "$whisper_pid_tmp_file" oldselection="" selection="" killed=false while true; do; cp "$whisper_content_write_tmp_file" "$whisper_content_read_tmp_file" gsed -i 's/^.*\[2K\r//g;s/^ //g;s/^\[Start speaking\]$//g;/^[ ,.\-\_\*]*$/d;' "$whisper_content_read_tmp_file" clear if $killed; then; printf "Status: \e[31mStopped\e[0m" else printf "Status: \e[32mRunning $()\e[0m" fi; printf "\nAction: a) Save " if [[ "$selection" == "r" ]]; then printf "\e[31mr)\e[0m" else printf "r)" fi printf " Copy s) Retry t) Quit\nRephrase: " if [[ "$selection" == "n" ]]; then printf "\e[31mn)\e[0m" else printf "n)" fi printf " Normal " if [[ "$selection" == "e" ]]; then printf "\e[31me)\e[0m" else printf "e)" fi printf " Prompt " if [[ "$selection" == "i" ]]; then printf "\e[31mi)\e[0m" else printf "i)" fi printf " Programmer " if [[ "$selection" == "o" ]]; then printf "\e[31mo)\e[0m" else printf "o)" fi printf " Prose\n\n" echo "$(fold -w 80 -s $whisper_content_read_tmp_file)" read -k1 -s -t 0.1 selection case $selection in "") ;; *) kill -9 $whisper_pid &>/dev/null killed=true if [[ "$oldselection" == "$selection" ]]; then continue fi; oldselection="$selection" case $selection in a) saved="$(mktemp)" cp "$whisper_content_read_tmp_file" "$saved"; echo "$saved"; retry=false; break; ;; r) cat "$whisper_content_read_tmp_file" | pbcopy; ;; s) break; ;; t) retry=false; break; ;; n|e|i|o) echo " You are an expert in copy editing and audio transcription. Your assignment is to expertly improve the quality of audio transcriptions. The rules of the task are as follows: - Keep the transcription largely unaltered. You may remove or replace text when the speaker has corrected or repeated themselves. - Respond only with the cleaned-up transcription. Your own notes, labels, and descriptions are strictly NOT allowed! Headings and labels are NEVER allowed! - Small changes to punctuation and grammar are permitted, so long as they stay true to the original tone of the transcription. - Do NOT confirm the request, instead go straight to the cleaned-up transcription! - Prioritize delivering the transcription. Avoid non-essential comments or filler phrases such as 'Here's the...', or labels such as 'Transcription:'. - The speaker may give directions such as 'scratch that' or 'actually, I mean...', indicating that previous text should be replaced. Pay attention to these directions and edit the dictation accordingly. - Use Markdown for your output. The audio transcription you will receive is from a single speaker dictating into his laptop microphone. " > "$ollama_prompt_file" case $selection in e) echo "The speaker is dictating a text prompt to be used for a Large Language model. Therefore, this transcription must be formatted with the aim of being an excellent LLM prompt." >> "$ollama_prompt_file" ;; i) echo "The speaker is a programmer who is dictating a text prompt to be used for a Large Language model. Therefore, this transcription must be formatted with the aim of being an excellent LLM prompt. The context of the dictation is related to programming, so be sure to format parts of the text that appear to be code accordingly." >> "$ollama_prompt_file" ;; o) echo "The speaker is a professional who is writing prose. Take this into account and ensure that the result is professionally formatted with correct grammar and tone." >> "$ollama_prompt_file" ;; esac; echo " The transcription to be clean up is as follows:\n\n " >> "$ollama_prompt_file" gsed -i 's/^ +//g;/^ *$/d' "$whisper_content_read_tmp_file" cat "$whisper_content_read_tmp_file" >> "$ollama_prompt_file" ollama run qwen3:4b --keepalive=30s "$(cat $ollama_prompt_file)" 2>/dev/null > $whisper_content_write_tmp_file & ;; esac; ;; esac; done; rm "$whisper_content_write_tmp_file" rm "$whisper_content_read_tmp_file" rm "$ollama_prompt_file" done; exit 0 fi;