"""Gemini live subprocess supervisor. Spawns `voice/sanad_voice.py` as a managed child with `SANAD_VOICE_BRAIN=gemini`, tails the child's stdout, and extracts state transitions + user transcripts from the Gemini-specific log lines emitted by `gemini/script.py:GeminiBrain`. When a new model is added, build its own sibling supervisor (see `voice/model_subprocess.py` for the template) — do not refactor this file. """ from __future__ import annotations import os import signal import subprocess import sys import threading import time from collections import deque from datetime import datetime from typing import Any from pathlib import Path from Project.Sanad.config import BASE_DIR, LOGS_DIR, SCRIPTS_DIR, LIVE_TUNE from Project.Sanad.core.config_loader import section as _cfg_section from Project.Sanad.core.logger import get_logger log = get_logger("gemini_subprocess") _LS_CFG = _cfg_section("gemini", "subprocess") def _resolve_live_script() -> Path: """Locate the voice script to run as subprocess. Default: voice/sanad_voice.py (the canonical G1 built-in mic + AudioClient speaker path). Override with SANAD_LIVE_SCRIPT. """ override = os.environ.get("SANAD_LIVE_SCRIPT", "").strip() if override: p = Path(override).expanduser() if p.exists(): return p for c in (BASE_DIR / "voice" / "sanad_voice.py", SCRIPTS_DIR / "sanad_voice.py"): if c.exists(): return c return SCRIPTS_DIR / "sanad_voice.py" LIVE_SCRIPT = _resolve_live_script() LOG_TAIL_SIZE = _LS_CFG.get("log_tail_size", 2000) TRANSCRIPT_TAIL_SIZE = _LS_CFG.get("transcript_tail_size", 30) # Persistent on-disk log for the full subprocess session. LIVE_LOG_DIR = LOGS_DIR LIVE_LOG_NAME = _LS_CFG.get("log_name", "gemini_subprocess") _STOP_TIMEOUT_SEC = _LS_CFG.get("stop_timeout_sec", 3.0) _TERMINATE_TIMEOUT_SEC = _LS_CFG.get("terminate_timeout_sec", 2.0) _NOISY_PREFIXES = tuple(_LS_CFG.get("noisy_prefixes", [ "ALSA lib ", "Expression 'alsa_", "Cannot connect to server socket", "jack server is not running", ])) _NOISY_FRAGMENTS = tuple(_LS_CFG.get("noisy_fragments", [ "Unknown PCM", "Evaluate error", "snd_pcm_open_noupdate", "PaAlsaStream", "snd_config_evaluate", "snd_func_refer", ])) class GeminiSubprocess: def __init__(self): self._lock = threading.Lock() self.process: subprocess.Popen | None = None self.log_tail: deque[str] = deque(maxlen=LOG_TAIL_SIZE) self.user_transcript: deque[str] = deque(maxlen=TRANSCRIPT_TAIL_SIZE) self._reader_thread: threading.Thread | None = None self._log_file = None # opened per-session in _reader_loop self.state = "stopped" self.state_message = "Idle." self.last_user_text = "" self.suppressed_noise = 0 def _open_session_log(self, pid: int): """Open (or re-open) the per-day append log file for this session.""" try: LIVE_LOG_DIR.mkdir(parents=True, exist_ok=True) fname = f"{LIVE_LOG_NAME}_{datetime.now().strftime('%Y%m%d')}.log" fh = open(LIVE_LOG_DIR / fname, "a", encoding="utf-8", buffering=1) fh.write( f"\n===== live_gemini subprocess start " f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} pid={pid} =====\n" ) return fh except Exception as exc: log.warning("Could not open live-gemini log file: %s", exc) return None def _is_noisy(self, line: str) -> bool: return line.startswith(_NOISY_PREFIXES) or any(f in line for f in _NOISY_FRAGMENTS) def _set_state(self, state: str, msg: str): self.state = state self.state_message = msg def _track_line(self, line: str): """Parse Gemini-specific log markers emitted by `gemini/script.py`. Must stay in lock-step with the `log.info(...)` strings in `GeminiBrain`. If you add a new state, add the emit in the brain AND the matching detector here — in one PR. """ if "connecting to Gemini" in line: self._set_state("connecting", line) elif "connected — speak anytime" in line or "connected - speak anytime" in line: self._set_state("listening", "Listening for speech.") elif " USER: " in line or line.strip().startswith("USER:"): # GeminiBrain emits: log.info("USER: %s", text) text = line.split("USER:", 1)[1].strip() if text: self.last_user_text = text self.user_transcript.append(text) self._set_state("hearing", f"User: {text}") elif "BARGE-IN" in line or "Gemini interrupted" in line or "interrupt (" in line: self._set_state("interrupting", line) elif "listening" in line.lower() and "no speech" not in line: # Fires on "listening" (post-turn) — keep the state fresh. self._set_state("listening", "Listening for speech.") elif "session error" in line or "client recreation failed" in line: self._set_state("error", line) elif "server going away" in line or "session ended" in line or "session dead" in line: self._set_state("warning", line) elif "keyboard interrupt" in line or "cancelled — stopping" in line: self._set_state("stopped", line) def _reader_loop(self): proc = self.process if proc is None or proc.stdout is None: return # Every line goes to the on-disk log — including the ALSA noise # that we filter out of the in-memory tail. That way a field # post-mortem has the full raw capture if we need it. fh = self._open_session_log(proc.pid) self._log_file = fh for line in proc.stdout: clean = line.rstrip() if not clean: continue if fh is not None: try: fh.write(clean + "\n") except Exception: pass with self._lock: if self._is_noisy(clean): self.suppressed_noise += 1 continue self.log_tail.append(clean) self._track_line(clean) with self._lock: self.log_tail.append("Live Gemini process exited.") self._set_state("stopped", "Process exited.") if fh is not None: try: fh.write( f"===== live_gemini subprocess exit " f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} =====\n" ) fh.close() except Exception: pass self._log_file = None def is_running(self) -> bool: with self._lock: return self.process is not None and self.process.poll() is None def start(self) -> dict[str, Any]: with self._lock: if self.process is not None and self.process.poll() is None: return {"started": False, "message": "Already running.", "pid": self.process.pid} self._set_state("starting", "Starting...") script = LIVE_SCRIPT if not script.exists(): raise RuntimeError(f"Script not found: {script}") env = os.environ.copy() env.update({"PYTHONUNBUFFERED": "1", **LIVE_TUNE}) # Pass the current G1 speaker volume as an env var so the # subprocess can compute the correct barge-in threshold at # startup. Without this, sanad_voice.py would read the volume # from a stale or non-existent config file path and default to # 100, scaling the barge-in threshold wrong for any non-100% # volume. load_config() reads data/motions/config.json — the # file the dashboard writes to when the user moves the slider. try: from Project.Sanad.config import load_config _cfg = load_config() or {} _audio_cfg = _cfg.get("audio") if isinstance(_cfg.get("audio"), dict) else {} _g1_vol = int(_audio_cfg.get("g1_volume", 100)) _g1_vol = max(0, min(100, _g1_vol)) env["SANAD_G1_VOLUME"] = str(_g1_vol) log.info("Passing SANAD_G1_VOLUME=%d to subprocess", _g1_vol) except Exception as exc: log.warning("Could not read g1_volume for subprocess: %s", exc) # sanad_voice.py takes the DDS interface as the first positional arg dds_iface = env.get("SANAD_DDS_INTERFACE", "eth0") cmd = [sys.executable, str(script), dds_iface] proc = subprocess.Popen( cmd, cwd=str(script.parent), stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1, env=env, ) with self._lock: self.process = proc self.log_tail.append(f"Started: pid={proc.pid}") self._set_state("starting", f"pid={proc.pid}") self._reader_thread = threading.Thread(target=self._reader_loop, daemon=True) self._reader_thread.start() log.info("Live Gemini subprocess started: pid=%d", proc.pid) return {"started": True, "pid": proc.pid} def stop(self) -> dict[str, Any]: with self._lock: proc = self.process if proc is None or proc.poll() is not None: return {"stopped": False, "message": "Not running."} self._set_state("stopping", "Stopping...") try: proc.send_signal(signal.SIGINT) proc.wait(timeout=_STOP_TIMEOUT_SEC) except subprocess.TimeoutExpired: proc.terminate() try: proc.wait(timeout=_TERMINATE_TIMEOUT_SEC) except subprocess.TimeoutExpired: proc.kill() proc.wait(timeout=_TERMINATE_TIMEOUT_SEC) rc = proc.returncode with self._lock: self.process = None self.log_tail.append("Stopped.") self._set_state("stopped", "Stopped.") log.info("Live Gemini subprocess stopped (rc=%s)", rc) return {"stopped": True, "returncode": rc} def status(self) -> dict[str, Any]: with self._lock: running = self.process is not None and self.process.poll() is None return { "running": running, "pid": self.process.pid if running and self.process else None, "state": self.state, "state_message": self.state_message, "last_user_text": self.last_user_text, "user_transcript": list(self.user_transcript), "log_tail": list(self.log_tail), "suppressed_noise": self.suppressed_noise, }