152 lines
4.1 KiB
Python
152 lines
4.1 KiB
Python
import json
|
|
import os
|
|
import signal
|
|
import subprocess
|
|
import sys
|
|
import time
|
|
from pathlib import Path
|
|
|
|
from openai import OpenAI
|
|
|
|
# flag to control recording
|
|
recording = True
|
|
status_file = None
|
|
|
|
|
|
def parse_config() -> tuple[str, str]:
|
|
config_path = Path.home() / ".config/simplewhispr/config.json"
|
|
|
|
if not config_path.exists():
|
|
raise Exception(f"fatal: config file not found at {config_path}")
|
|
|
|
try:
|
|
with open(config_path, "r") as f:
|
|
config = json.load(f)
|
|
api_key = config.get("openai_api_key")
|
|
model = config.get("model", "gpt-4o-mini-transcribe")
|
|
|
|
if not api_key:
|
|
raise ValueError("fatal: 'openai_api_key' not found in config file.")
|
|
|
|
return api_key, model
|
|
|
|
except Exception as e:
|
|
raise Exception(f"fatal: configuration error: {e}")
|
|
|
|
|
|
def report_status(message: str):
|
|
global status_file
|
|
if status_file is None:
|
|
try:
|
|
status_file = open("/tmp/simplewhispr-waybar.log", "w")
|
|
except OSError:
|
|
return
|
|
|
|
try:
|
|
status_file.write(f'{{"text":"{message}"}}\n')
|
|
status_file.flush()
|
|
except OSError:
|
|
pass
|
|
|
|
|
|
def handle_sigusr1(signum, frame):
|
|
global recording
|
|
print("\ninfo: SIGUSR1 received. stopping recording.")
|
|
recording = False
|
|
|
|
|
|
# register signal handler for SIGUSR1
|
|
signal.signal(signal.SIGUSR1, handle_sigusr1)
|
|
|
|
|
|
def transcribe_audio(filename: str) -> str:
|
|
api_key, model = parse_config()
|
|
client = OpenAI(api_key=api_key)
|
|
|
|
with open(filename, "rb") as audio_file:
|
|
transcription = client.audio.transcriptions.create(
|
|
model=model,
|
|
file=audio_file,
|
|
)
|
|
|
|
return transcription.text
|
|
|
|
|
|
def cleanup_text(text: str) -> str:
|
|
api_key, _ = parse_config()
|
|
client = OpenAI(api_key=api_key)
|
|
|
|
response = client.chat.completions.create(
|
|
model="gpt-5.4-nano",
|
|
messages=[
|
|
{
|
|
"role": "system",
|
|
"content": "IMPORTANT: your job is to clean up dictated text. you will remove filler words and correct punctuation and grammar. your goal should be to change as few of the meaningful words as possible, while removing words that are not meaningful. WARNING: do not change the phrasing or edit for clarity or style, simply remove filler words and clean up grammar.",
|
|
},
|
|
{"role": "user", "content": text},
|
|
],
|
|
)
|
|
res = response.choices[0].message.content
|
|
if res is None:
|
|
raise Exception("cleanup gave no output")
|
|
return res.strip()
|
|
|
|
|
|
def grab_recording() -> str:
|
|
output_filename = "/tmp/simplewhispr-recording.wav"
|
|
# start ffmpeg recording
|
|
cmd = ["ffmpeg", "-f", "pulse", "-i", "default", "-y", output_filename]
|
|
|
|
print(
|
|
f"info: starting recording to {output_filename}. send SIGUSR1 (kill -usr1 {os.getpid()}) to stop."
|
|
)
|
|
report_status("")
|
|
|
|
process = subprocess.Popen(
|
|
cmd, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, text=True
|
|
)
|
|
|
|
# wait for SIGUSR1
|
|
while recording:
|
|
time.sleep(0.5)
|
|
if process.poll() is not None:
|
|
print("fatal: ffmpeg exited unexpectedly.")
|
|
report_status("")
|
|
sys.exit(1)
|
|
|
|
# stop recording
|
|
process.terminate()
|
|
process.wait()
|
|
print("info: recording stopped.")
|
|
report_status("")
|
|
return output_filename
|
|
|
|
|
|
def main():
|
|
with open("/tmp/simplewhispr.pid", "w") as f:
|
|
f.write(str(os.getpid()))
|
|
|
|
report_status("")
|
|
|
|
recording_file = grab_recording()
|
|
|
|
report_status("")
|
|
print("info: transcribing...")
|
|
transcription = transcribe_audio(recording_file)
|
|
print(f"info: raw transcription: {transcription}")
|
|
|
|
print("info: cleaning up...")
|
|
cleaned_transcription = cleanup_text(transcription)
|
|
print(f"info: cleaned transcription: {cleaned_transcription}")
|
|
|
|
# use wtype to type the output
|
|
report_status("")
|
|
subprocess.run(["wtype", cleaned_transcription])
|
|
report_status("")
|
|
|
|
if os.path.exists("/tmp/simplewhispr.pid"):
|
|
os.remove("/tmp/simplewhispr.pid")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|