261 lines
9.7 KiB
Python
261 lines
9.7 KiB
Python
"""Live Gemini Subprocess Manager — start/stop sanad_voice.py as managed child.
|
|
|
|
Mirrors gemini_voice_v2/LiveGeminiManager. Launches the voice script as a
|
|
subprocess, tails stdout, parses state transitions and user transcripts.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
import signal
|
|
import subprocess
|
|
import sys
|
|
import threading
|
|
import time
|
|
from collections import deque
|
|
from datetime import datetime
|
|
from typing import Any
|
|
|
|
from pathlib import Path
|
|
|
|
from Project.Sanad.config import BASE_DIR, LOGS_DIR, SCRIPTS_DIR, LIVE_TUNE
|
|
from Project.Sanad.core.config_loader import section as _cfg_section
|
|
from Project.Sanad.core.logger import get_logger
|
|
|
|
log = get_logger("live_subprocess")
|
|
|
|
_LS_CFG = _cfg_section("voice", "live_gemini_subprocess")
|
|
|
|
|
|
def _resolve_live_script() -> Path:
|
|
"""Locate the voice script to run as subprocess.
|
|
|
|
Default: voice/sanad_voice.py (the canonical G1 built-in mic +
|
|
AudioClient speaker path). Override with SANAD_LIVE_SCRIPT.
|
|
"""
|
|
override = os.environ.get("SANAD_LIVE_SCRIPT", "").strip()
|
|
if override:
|
|
p = Path(override).expanduser()
|
|
if p.exists():
|
|
return p
|
|
for c in (BASE_DIR / "voice" / "sanad_voice.py",
|
|
SCRIPTS_DIR / "sanad_voice.py"):
|
|
if c.exists():
|
|
return c
|
|
return SCRIPTS_DIR / "sanad_voice.py"
|
|
|
|
|
|
LIVE_SCRIPT = _resolve_live_script()
|
|
LOG_TAIL_SIZE = _LS_CFG.get("log_tail_size", 2000)
|
|
TRANSCRIPT_TAIL_SIZE = _LS_CFG.get("transcript_tail_size", 30)
|
|
|
|
# Persistent on-disk log for the full subprocess session.
|
|
LIVE_LOG_DIR = LOGS_DIR
|
|
LIVE_LOG_NAME = _LS_CFG.get("log_name", "live_gemini_subprocess")
|
|
|
|
_STOP_TIMEOUT_SEC = _LS_CFG.get("stop_timeout_sec", 3.0)
|
|
_TERMINATE_TIMEOUT_SEC = _LS_CFG.get("terminate_timeout_sec", 2.0)
|
|
|
|
_NOISY_PREFIXES = tuple(_LS_CFG.get("noisy_prefixes", [
|
|
"ALSA lib ", "Expression 'alsa_", "Cannot connect to server socket",
|
|
"jack server is not running",
|
|
]))
|
|
_NOISY_FRAGMENTS = tuple(_LS_CFG.get("noisy_fragments", [
|
|
"Unknown PCM", "Evaluate error", "snd_pcm_open_noupdate",
|
|
"PaAlsaStream", "snd_config_evaluate", "snd_func_refer",
|
|
]))
|
|
|
|
|
|
class LiveGeminiSubprocess:
|
|
def __init__(self):
|
|
self._lock = threading.Lock()
|
|
self.process: subprocess.Popen | None = None
|
|
self.log_tail: deque[str] = deque(maxlen=LOG_TAIL_SIZE)
|
|
self.user_transcript: deque[str] = deque(maxlen=TRANSCRIPT_TAIL_SIZE)
|
|
self._reader_thread: threading.Thread | None = None
|
|
self._log_file = None # opened per-session in _reader_loop
|
|
self.state = "stopped"
|
|
self.state_message = "Idle."
|
|
self.last_user_text = ""
|
|
self.suppressed_noise = 0
|
|
|
|
def _open_session_log(self, pid: int):
|
|
"""Open (or re-open) the per-day append log file for this session."""
|
|
try:
|
|
LIVE_LOG_DIR.mkdir(parents=True, exist_ok=True)
|
|
fname = f"{LIVE_LOG_NAME}_{datetime.now().strftime('%Y%m%d')}.log"
|
|
fh = open(LIVE_LOG_DIR / fname, "a", encoding="utf-8", buffering=1)
|
|
fh.write(
|
|
f"\n===== live_gemini subprocess start "
|
|
f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} pid={pid} =====\n"
|
|
)
|
|
return fh
|
|
except Exception as exc:
|
|
log.warning("Could not open live-gemini log file: %s", exc)
|
|
return None
|
|
|
|
def _is_noisy(self, line: str) -> bool:
|
|
return line.startswith(_NOISY_PREFIXES) or any(f in line for f in _NOISY_FRAGMENTS)
|
|
|
|
def _set_state(self, state: str, msg: str):
|
|
self.state = state
|
|
self.state_message = msg
|
|
|
|
def _track_line(self, line: str):
|
|
if "Connecting to Gemini" in line:
|
|
self._set_state("connecting", line)
|
|
elif "Connected! Sanad is listening" in line:
|
|
self._set_state("listening", "Listening for speech.")
|
|
elif "USER SAID:" in line:
|
|
text = line.split("USER SAID:", 1)[1].strip()
|
|
if text:
|
|
self.last_user_text = text
|
|
self.user_transcript.append(text)
|
|
self._set_state("hearing", f"User: {text}")
|
|
elif "Interruption!" in line:
|
|
self._set_state("interrupting", line)
|
|
elif any(k in line for k in ("Mic Error:", "Speaker Error:", "Fatal Error:")):
|
|
self._set_state("error", line)
|
|
elif "WebSocket closed." in line:
|
|
self._set_state("warning", line)
|
|
elif "Ma'a Salama" in line:
|
|
self._set_state("stopped", line)
|
|
|
|
def _reader_loop(self):
|
|
proc = self.process
|
|
if proc is None or proc.stdout is None:
|
|
return
|
|
# Every line goes to the on-disk log — including the ALSA noise
|
|
# that we filter out of the in-memory tail. That way a field
|
|
# post-mortem has the full raw capture if we need it.
|
|
fh = self._open_session_log(proc.pid)
|
|
self._log_file = fh
|
|
for line in proc.stdout:
|
|
clean = line.rstrip()
|
|
if not clean:
|
|
continue
|
|
if fh is not None:
|
|
try:
|
|
fh.write(clean + "\n")
|
|
except Exception:
|
|
pass
|
|
with self._lock:
|
|
if self._is_noisy(clean):
|
|
self.suppressed_noise += 1
|
|
continue
|
|
self.log_tail.append(clean)
|
|
self._track_line(clean)
|
|
with self._lock:
|
|
self.log_tail.append("Live Gemini process exited.")
|
|
self._set_state("stopped", "Process exited.")
|
|
if fh is not None:
|
|
try:
|
|
fh.write(
|
|
f"===== live_gemini subprocess exit "
|
|
f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} =====\n"
|
|
)
|
|
fh.close()
|
|
except Exception:
|
|
pass
|
|
self._log_file = None
|
|
|
|
def is_running(self) -> bool:
|
|
with self._lock:
|
|
return self.process is not None and self.process.poll() is None
|
|
|
|
def start(self) -> dict[str, Any]:
|
|
with self._lock:
|
|
if self.process is not None and self.process.poll() is None:
|
|
return {"started": False, "message": "Already running.", "pid": self.process.pid}
|
|
self._set_state("starting", "Starting...")
|
|
|
|
script = LIVE_SCRIPT
|
|
if not script.exists():
|
|
raise RuntimeError(f"Script not found: {script}")
|
|
|
|
env = os.environ.copy()
|
|
env.update({"PYTHONUNBUFFERED": "1", **LIVE_TUNE})
|
|
|
|
# Pass the current G1 speaker volume as an env var so the
|
|
# subprocess can compute the correct barge-in threshold at
|
|
# startup. Without this, sanad_voice.py would read the volume
|
|
# from a stale or non-existent config file path and default to
|
|
# 100, scaling the barge-in threshold wrong for any non-100%
|
|
# volume. load_config() reads data/motions/config.json — the
|
|
# file the dashboard writes to when the user moves the slider.
|
|
try:
|
|
from Project.Sanad.config import load_config
|
|
_cfg = load_config() or {}
|
|
_audio_cfg = _cfg.get("audio") if isinstance(_cfg.get("audio"), dict) else {}
|
|
_g1_vol = int(_audio_cfg.get("g1_volume", 100))
|
|
_g1_vol = max(0, min(100, _g1_vol))
|
|
env["SANAD_G1_VOLUME"] = str(_g1_vol)
|
|
log.info("Passing SANAD_G1_VOLUME=%d to subprocess", _g1_vol)
|
|
except Exception as exc:
|
|
log.warning("Could not read g1_volume for subprocess: %s", exc)
|
|
|
|
# sanad_voice.py takes the DDS interface as the first positional arg
|
|
dds_iface = env.get("SANAD_DDS_INTERFACE", "eth0")
|
|
cmd = [sys.executable, str(script), dds_iface]
|
|
proc = subprocess.Popen(
|
|
cmd,
|
|
cwd=str(script.parent),
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.STDOUT,
|
|
text=True,
|
|
bufsize=1,
|
|
env=env,
|
|
)
|
|
|
|
with self._lock:
|
|
self.process = proc
|
|
self.log_tail.append(f"Started: pid={proc.pid}")
|
|
self._set_state("starting", f"pid={proc.pid}")
|
|
self._reader_thread = threading.Thread(target=self._reader_loop, daemon=True)
|
|
self._reader_thread.start()
|
|
|
|
log.info("Live Gemini subprocess started: pid=%d", proc.pid)
|
|
return {"started": True, "pid": proc.pid}
|
|
|
|
def stop(self) -> dict[str, Any]:
|
|
with self._lock:
|
|
proc = self.process
|
|
if proc is None or proc.poll() is not None:
|
|
return {"stopped": False, "message": "Not running."}
|
|
self._set_state("stopping", "Stopping...")
|
|
|
|
try:
|
|
proc.send_signal(signal.SIGINT)
|
|
proc.wait(timeout=_STOP_TIMEOUT_SEC)
|
|
except subprocess.TimeoutExpired:
|
|
proc.terminate()
|
|
try:
|
|
proc.wait(timeout=_TERMINATE_TIMEOUT_SEC)
|
|
except subprocess.TimeoutExpired:
|
|
proc.kill()
|
|
proc.wait(timeout=_TERMINATE_TIMEOUT_SEC)
|
|
|
|
rc = proc.returncode
|
|
|
|
with self._lock:
|
|
self.process = None
|
|
self.log_tail.append("Stopped.")
|
|
self._set_state("stopped", "Stopped.")
|
|
|
|
log.info("Live Gemini subprocess stopped (rc=%s)", rc)
|
|
return {"stopped": True, "returncode": rc}
|
|
|
|
def status(self) -> dict[str, Any]:
|
|
with self._lock:
|
|
running = self.process is not None and self.process.poll() is None
|
|
return {
|
|
"running": running,
|
|
"pid": self.process.pid if running and self.process else None,
|
|
"state": self.state,
|
|
"state_message": self.state_message,
|
|
"last_user_text": self.last_user_text,
|
|
"user_transcript": list(self.user_transcript),
|
|
"log_tail": list(self.log_tail),
|
|
"suppressed_noise": self.suppressed_noise,
|
|
}
|