Sanad/voice/model_subprocess.py

148 lines
6.3 KiB
Python

"""Template supervisor — pair with voice/model_script.py when adding a new model.
The supervisor's job is to run a voice subprocess and tail its stdout for
state transitions + user transcripts. It is brand-specific on purpose:
each model's brain emits log lines in its own format, so each model gets
its own supervisor. See `gemini/subprocess.py` for the working reference.
How to add a new model (e.g. OpenAI Realtime):
1. cp voice/model_script.py openai/script.py
2. cp voice/model_subprocess.py openai/subprocess.py
3. In both files: rename `ModelBrain` → `OpenAIRealtimeBrain`,
`ModelSubprocess` → `OpenAIRealtimeSubprocess`.
4. In `openai/script.py`: fill in the TODO bodies (connect/send/receive).
Each `log.info("USER: %s", ...)` / `log.info("BOT: %s", ...)` /
state message must be a string your supervisor's `_track_line` below
can detect — keep them in lock-step.
5. In `openai/subprocess.py`: update `_track_line` to match the strings
your brain actually emits.
6. In `main.py`: swap `GeminiSubprocess` → `OpenAIRealtimeSubprocess` in
the `live_sub = _safe_construct(...)` line. In `voice/sanad_voice.py`,
add a branch to `_build_brain()` mapping `"openai"` → `OpenAIRealtimeBrain`.
7. Run with `SANAD_VOICE_BRAIN=openai python3 voice/sanad_voice.py eth0`.
Nothing in `gemini/` needs to change.
"""
from __future__ import annotations
import os
import signal
import subprocess
import sys
import threading
import time
from collections import deque
from datetime import datetime
from pathlib import Path
from typing import Any
from Project.Sanad.config import BASE_DIR, LOGS_DIR, SCRIPTS_DIR
from Project.Sanad.core.config_loader import section as _cfg_section
from Project.Sanad.core.logger import get_logger
log = get_logger("model_subprocess")
class ModelSubprocess:
"""Skeleton supervisor — adapt for your model.
Contract expected by `main.py` + `dashboard/routes/live_subprocess.py`:
start() — sync. Spawns the child, starts the log reader thread.
stop() — sync. SIGINT / SIGTERM / SIGKILL escalation.
status() — returns {state, state_message, running, pid, log_tail,
user_transcript, last_user_text, ...}.
log_tail : deque[str] last N cleaned stdout lines
user_transcript : deque[str] user transcripts parsed from child's log
last_user_text : str most recent transcript (convenience)
state : str one of {"stopped", "starting", "connecting",
"listening", "hearing", "interrupting",
"error", "warning", "crashed"}
"""
def __init__(self):
# TODO: set a config section key — e.g. `_cfg_section("openai", "subprocess")`.
# Create `config/<brand>_config.json > subprocess: { ... }` matching
# gemini_config.json's layout.
self._cfg = {} # _cfg_section("<brand>", "subprocess")
self._lock = threading.Lock()
self.process: subprocess.Popen | None = None
self.log_tail: deque[str] = deque(
maxlen=self._cfg.get("log_tail_size", 2000))
self.user_transcript: deque[str] = deque(
maxlen=self._cfg.get("transcript_tail_size", 30))
self._reader_thread: threading.Thread | None = None
self._log_file = None
self.state = "stopped"
self.state_message = "Idle."
self.last_user_text = ""
# ─── spawn / kill ─────────────────────────────────────
def start(self) -> dict:
# TODO: build env (include `SANAD_VOICE_BRAIN=<yourbrand>` so
# sanad_voice.py picks your brain), pick the script path, and
# `subprocess.Popen(...)`. Copy the gemini/subprocess.py body.
raise NotImplementedError
def stop(self, timeout: float = 3.0) -> dict:
# TODO: send SIGINT → wait → SIGTERM → wait → SIGKILL.
raise NotImplementedError
# ─── log parsing — the brand-specific part ────────────
def _track_line(self, line: str) -> None:
"""Translate your brain's log strings into state + transcripts.
KEEP THIS IN LOCK-STEP with the `log.info(...)` calls in your
brain. Minimum required detections:
connecting — child opened a session to the model
listening — session connected OR a turn finished
hearing — user transcript arrived (APPEND to user_transcript)
interrupting — barge-in / model interrupted
error — fatal session error
stopped — clean shutdown
"""
# Example (replace with your brain's actual strings):
#
# if "connecting to OpenAI" in line:
# self._set_state("connecting", line)
# elif "session open" in line:
# self._set_state("listening", "Listening for speech.")
# elif "USER: " in line:
# text = line.split("USER: ", 1)[1].strip()
# if text:
# self.last_user_text = text
# self.user_transcript.append(text)
# self._set_state("hearing", f"User: {text}")
# elif "BARGE-IN" in line:
# self._set_state("interrupting", line)
# elif "session error" in line:
# self._set_state("error", line)
# elif "cancelled — stopping" in line:
# self._set_state("stopped", line)
raise NotImplementedError
def _set_state(self, state: str, msg: str) -> None:
self.state = state
self.state_message = msg
# ─── status + introspection ───────────────────────────
def status(self) -> dict:
with self._lock:
proc = self.process
running = proc is not None and proc.poll() is None
return {
"running": running,
"pid": proc.pid if running else None,
"state": self.state,
"state_message": self.state_message,
"last_user_text": self.last_user_text,
"log_tail": list(self.log_tail)[-50:],
"user_transcript": list(self.user_transcript),
}