simplewhispr/main.py

import json
import os
import signal
import subprocess
import sys
import time
from pathlib import Path

from openai import OpenAI

# flag to control recording
recording = True
status_file = None


def parse_config() -> tuple[str, str]:
    config_path = Path.home() / ".config/simplewhispr/config.json"

    if not config_path.exists():
        raise Exception(f"fatal: config file not found at {config_path}")

    try:
        with open(config_path, "r") as f:
            config = json.load(f)
            api_key = config.get("openai_api_key")
            model = config.get("model", "gpt-4o-mini-transcribe")

            if not api_key:
                raise ValueError("fatal: 'openai_api_key' not found in config file.")

            return api_key, model

    except Exception as e:
        raise Exception(f"fatal: configuration error: {e}")


def report_status(message: str):
    global status_file
    if status_file is None:
        try:
            status_file = open("/tmp/simplewhispr-waybar.log", "w")
        except OSError:
            return

    try:
        status_file.write(f'{{"text":"{message}"}}\n')
        status_file.flush()
    except OSError:
        pass


def handle_sigusr1(signum, frame):
    global recording
    print("\ninfo: SIGUSR1 received. stopping recording.")
    recording = False


# register signal handler for SIGUSR1
signal.signal(signal.SIGUSR1, handle_sigusr1)


def transcribe_audio(filename: str) -> str:
    api_key, model = parse_config()
    client = OpenAI(api_key=api_key)

    with open(filename, "rb") as audio_file:
        report_status("󰙏")
        transcription = client.audio.transcriptions.create(
            model=model,
            file=audio_file,
        )

    report_status("󰥔")
    return transcription.text


def grab_recording() -> str:
    output_filename = "/tmp/simplewhispr-recording.wav"
    # start ffmpeg recording
    cmd = ["ffmpeg", "-f", "pulse", "-i", "default", "-y", output_filename]

    print(
        f"info: starting recording to {output_filename}. send SIGUSR1 (kill -usr1 {os.getpid()}) to stop."
    )
    report_status("󰍬")

    process = subprocess.Popen(
        cmd, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, text=True
    )

    # wait for SIGUSR1
    while recording:
        time.sleep(0.5)
        if process.poll() is not None:
            print("fatal: ffmpeg exited unexpectedly.")
            report_status("")
            sys.exit(1)

    # stop recording
    process.terminate()
    process.wait()
    print("info: recording stopped.")
    report_status("󰥔")
    return output_filename


def main():
    with open("/tmp/simplewhispr.pid", "w") as f:
        f.write(str(os.getpid()))

    report_status("󰥔")

    recording_file = grab_recording()
    print("info: transcribing...")
    transcription = transcribe_audio(recording_file)
    print(f"info: transcription: {transcription}")

    # use wtype to type the output
    report_status("󰌌")
    subprocess.run(["wtype", transcription])
    report_status("")

    if os.path.exists("/tmp/simplewhispr.pid"):
        os.remove("/tmp/simplewhispr.pid")


if __name__ == "__main__":
    main()