diff --git a/main.py b/main.py index 15ce217..eaab341 100644 --- a/main.py +++ b/main.py @@ -64,16 +64,34 @@ def transcribe_audio(filename: str) -> str: client = OpenAI(api_key=api_key) with open(filename, "rb") as audio_file: - report_status("󰙏") transcription = client.audio.transcriptions.create( model=model, file=audio_file, ) - report_status("󰥔") return transcription.text +def cleanup_text(text: str) -> str: + api_key, _ = parse_config() + client = OpenAI(api_key=api_key) + + response = client.chat.completions.create( + model="gpt-5.4-nano", + messages=[ + { + "role": "system", + "content": "IMPORTANT: your job is to clean up dictated text. you will remove filler words and correct punctuation and grammar. your goal should be to change as few of the meaningful words as possible, while removing words that are not meaningful. WARNING: do not change the phrasing or edit for clarity or style, simply remove filler words and clean up grammar.", + }, + {"role": "user", "content": text}, + ], + ) + res = response.choices[0].message.content + if res is None: + raise Exception("cleanup gave no output") + return res.strip() + + def grab_recording() -> str: output_filename = "/tmp/simplewhispr-recording.wav" # start ffmpeg recording @@ -111,13 +129,19 @@ def main(): report_status("󰥔") recording_file = grab_recording() + + report_status("󰙏") print("info: transcribing...") transcription = transcribe_audio(recording_file) - print(f"info: transcription: {transcription}") + print(f"info: raw transcription: {transcription}") + + print("info: cleaning up...") + cleaned_transcription = cleanup_text(transcription) + print(f"info: cleaned transcription: {cleaned_transcription}") # use wtype to type the output report_status("󰌌") - subprocess.run(["wtype", transcription]) + subprocess.run(["wtype", cleaned_transcription]) report_status("") if os.path.exists("/tmp/simplewhispr.pid"):