From 8fc9c9247dc4bcfaeb7ae8d83d165748a1032894 Mon Sep 17 00:00:00 2001 From: ns Date: Thu, 4 Jun 2026 23:27:25 -0500 Subject: [PATCH] added text cleanup step after transcription --- main.py | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/main.py b/main.py index 15ce217..eaab341 100644 --- a/main.py +++ b/main.py @@ -64,16 +64,34 @@ def transcribe_audio(filename: str) -> str: client = OpenAI(api_key=api_key) with open(filename, "rb") as audio_file: - report_status("󰙏") transcription = client.audio.transcriptions.create( model=model, file=audio_file, ) - report_status("󰥔") return transcription.text +def cleanup_text(text: str) -> str: + api_key, _ = parse_config() + client = OpenAI(api_key=api_key) + + response = client.chat.completions.create( + model="gpt-5.4-nano", + messages=[ + { + "role": "system", + "content": "IMPORTANT: your job is to clean up dictated text. you will remove filler words and correct punctuation and grammar. your goal should be to change as few of the meaningful words as possible, while removing words that are not meaningful. WARNING: do not change the phrasing or edit for clarity or style, simply remove filler words and clean up grammar.", + }, + {"role": "user", "content": text}, + ], + ) + res = response.choices[0].message.content + if res is None: + raise Exception("cleanup gave no output") + return res.strip() + + def grab_recording() -> str: output_filename = "/tmp/simplewhispr-recording.wav" # start ffmpeg recording @@ -111,13 +129,19 @@ def main(): report_status("󰥔") recording_file = grab_recording() + + report_status("󰙏") print("info: transcribing...") transcription = transcribe_audio(recording_file) - print(f"info: transcription: {transcription}") + print(f"info: raw transcription: {transcription}") + + print("info: cleaning up...") + cleaned_transcription = cleanup_text(transcription) + print(f"info: cleaned transcription: {cleaned_transcription}") # use wtype to type the output report_status("󰌌") - subprocess.run(["wtype", transcription]) + subprocess.run(["wtype", cleaned_transcription]) report_status("") if os.path.exists("/tmp/simplewhispr.pid"):