Sanad/voice/live_gemini_subprocess.py

"""Live Gemini Subprocess Manager — start/stop sanad_voice.py as managed child.

Mirrors gemini_voice_v2/LiveGeminiManager. Launches the voice script as a
subprocess, tails stdout, parses state transitions and user transcripts.
"""

from __future__ import annotations

import os
import signal
import subprocess
import sys
import threading
import time
from collections import deque
from datetime import datetime
from typing import Any

from pathlib import Path

from Project.Sanad.config import BASE_DIR, LOGS_DIR, SCRIPTS_DIR, LIVE_TUNE
from Project.Sanad.core.config_loader import section as _cfg_section
from Project.Sanad.core.logger import get_logger

log = get_logger("live_subprocess")

_LS_CFG = _cfg_section("voice", "live_gemini_subprocess")


def _resolve_live_script() -> Path:
    """Locate the voice script to run as subprocess.

    Default: voice/sanad_voice.py (the canonical G1 built-in mic +
    AudioClient speaker path). Override with SANAD_LIVE_SCRIPT.
    """
    override = os.environ.get("SANAD_LIVE_SCRIPT", "").strip()
    if override:
        p = Path(override).expanduser()
        if p.exists():
            return p
    for c in (BASE_DIR / "voice" / "sanad_voice.py",
              SCRIPTS_DIR / "sanad_voice.py"):
        if c.exists():
            return c
    return SCRIPTS_DIR / "sanad_voice.py"


LIVE_SCRIPT = _resolve_live_script()
LOG_TAIL_SIZE = _LS_CFG.get("log_tail_size", 2000)
TRANSCRIPT_TAIL_SIZE = _LS_CFG.get("transcript_tail_size", 30)

# Persistent on-disk log for the full subprocess session.
LIVE_LOG_DIR = LOGS_DIR
LIVE_LOG_NAME = _LS_CFG.get("log_name", "live_gemini_subprocess")

_STOP_TIMEOUT_SEC = _LS_CFG.get("stop_timeout_sec", 3.0)
_TERMINATE_TIMEOUT_SEC = _LS_CFG.get("terminate_timeout_sec", 2.0)

_NOISY_PREFIXES = tuple(_LS_CFG.get("noisy_prefixes", [
    "ALSA lib ", "Expression 'alsa_", "Cannot connect to server socket",
    "jack server is not running",
]))
_NOISY_FRAGMENTS = tuple(_LS_CFG.get("noisy_fragments", [
    "Unknown PCM", "Evaluate error", "snd_pcm_open_noupdate",
    "PaAlsaStream", "snd_config_evaluate", "snd_func_refer",
]))


class LiveGeminiSubprocess:
    def __init__(self):
        self._lock = threading.Lock()
        self.process: subprocess.Popen | None = None
        self.log_tail: deque[str] = deque(maxlen=LOG_TAIL_SIZE)
        self.user_transcript: deque[str] = deque(maxlen=TRANSCRIPT_TAIL_SIZE)
        self._reader_thread: threading.Thread | None = None
        self._log_file = None  # opened per-session in _reader_loop
        self.state = "stopped"
        self.state_message = "Idle."
        self.last_user_text = ""
        self.suppressed_noise = 0

    def _open_session_log(self, pid: int):
        """Open (or re-open) the per-day append log file for this session."""
        try:
            LIVE_LOG_DIR.mkdir(parents=True, exist_ok=True)
            fname = f"{LIVE_LOG_NAME}_{datetime.now().strftime('%Y%m%d')}.log"
            fh = open(LIVE_LOG_DIR / fname, "a", encoding="utf-8", buffering=1)
            fh.write(
                f"\n===== live_gemini subprocess start "
                f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} pid={pid} =====\n"
            )
            return fh
        except Exception as exc:
            log.warning("Could not open live-gemini log file: %s", exc)
            return None

    def _is_noisy(self, line: str) -> bool:
        return line.startswith(_NOISY_PREFIXES) or any(f in line for f in _NOISY_FRAGMENTS)

    def _set_state(self, state: str, msg: str):
        self.state = state
        self.state_message = msg

    def _track_line(self, line: str):
        if "Connecting to Gemini" in line:
            self._set_state("connecting", line)
        elif "Connected! Sanad is listening" in line:
            self._set_state("listening", "Listening for speech.")
        elif "USER SAID:" in line:
            text = line.split("USER SAID:", 1)[1].strip()
            if text:
                self.last_user_text = text
                self.user_transcript.append(text)
                self._set_state("hearing", f"User: {text}")
        elif "Interruption!" in line:
            self._set_state("interrupting", line)
        elif any(k in line for k in ("Mic Error:", "Speaker Error:", "Fatal Error:")):
            self._set_state("error", line)
        elif "WebSocket closed." in line:
            self._set_state("warning", line)
        elif "Ma'a Salama" in line:
            self._set_state("stopped", line)

    def _reader_loop(self):
        proc = self.process
        if proc is None or proc.stdout is None:
            return
        # Every line goes to the on-disk log — including the ALSA noise
        # that we filter out of the in-memory tail. That way a field
        # post-mortem has the full raw capture if we need it.
        fh = self._open_session_log(proc.pid)
        self._log_file = fh
        for line in proc.stdout:
            clean = line.rstrip()
            if not clean:
                continue
            if fh is not None:
                try:
                    fh.write(clean + "\n")
                except Exception:
                    pass
            with self._lock:
                if self._is_noisy(clean):
                    self.suppressed_noise += 1
                    continue
                self.log_tail.append(clean)
                self._track_line(clean)
        with self._lock:
            self.log_tail.append("Live Gemini process exited.")
            self._set_state("stopped", "Process exited.")
        if fh is not None:
            try:
                fh.write(
                    f"===== live_gemini subprocess exit "
                    f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} =====\n"
                )
                fh.close()
            except Exception:
                pass
            self._log_file = None

    def is_running(self) -> bool:
        with self._lock:
            return self.process is not None and self.process.poll() is None

    def start(self) -> dict[str, Any]:
        with self._lock:
            if self.process is not None and self.process.poll() is None:
                return {"started": False, "message": "Already running.", "pid": self.process.pid}
            self._set_state("starting", "Starting...")

        script = LIVE_SCRIPT
        if not script.exists():
            raise RuntimeError(f"Script not found: {script}")

        env = os.environ.copy()
        env.update({"PYTHONUNBUFFERED": "1", **LIVE_TUNE})

        # Pass the current G1 speaker volume as an env var so the
        # subprocess can compute the correct barge-in threshold at
        # startup. Without this, sanad_voice.py would read the volume
        # from a stale or non-existent config file path and default to
        # 100, scaling the barge-in threshold wrong for any non-100%
        # volume. load_config() reads data/motions/config.json — the
        # file the dashboard writes to when the user moves the slider.
        try:
            from Project.Sanad.config import load_config
            _cfg = load_config() or {}
            _audio_cfg = _cfg.get("audio") if isinstance(_cfg.get("audio"), dict) else {}
            _g1_vol = int(_audio_cfg.get("g1_volume", 100))
            _g1_vol = max(0, min(100, _g1_vol))
            env["SANAD_G1_VOLUME"] = str(_g1_vol)
            log.info("Passing SANAD_G1_VOLUME=%d to subprocess", _g1_vol)
        except Exception as exc:
            log.warning("Could not read g1_volume for subprocess: %s", exc)

        # sanad_voice.py takes the DDS interface as the first positional arg
        dds_iface = env.get("SANAD_DDS_INTERFACE", "eth0")
        cmd = [sys.executable, str(script), dds_iface]
        proc = subprocess.Popen(
            cmd,
            cwd=str(script.parent),
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
            text=True,
            bufsize=1,
            env=env,
        )

        with self._lock:
            self.process = proc
            self.log_tail.append(f"Started: pid={proc.pid}")
            self._set_state("starting", f"pid={proc.pid}")
            self._reader_thread = threading.Thread(target=self._reader_loop, daemon=True)
            self._reader_thread.start()

        log.info("Live Gemini subprocess started: pid=%d", proc.pid)
        return {"started": True, "pid": proc.pid}

    def stop(self) -> dict[str, Any]:
        with self._lock:
            proc = self.process
            if proc is None or proc.poll() is not None:
                return {"stopped": False, "message": "Not running."}
            self._set_state("stopping", "Stopping...")

        try:
            proc.send_signal(signal.SIGINT)
            proc.wait(timeout=_STOP_TIMEOUT_SEC)
        except subprocess.TimeoutExpired:
            proc.terminate()
            try:
                proc.wait(timeout=_TERMINATE_TIMEOUT_SEC)
            except subprocess.TimeoutExpired:
                proc.kill()
                proc.wait(timeout=_TERMINATE_TIMEOUT_SEC)

        rc = proc.returncode

        with self._lock:
            self.process = None
            self.log_tail.append("Stopped.")
            self._set_state("stopped", "Stopped.")

        log.info("Live Gemini subprocess stopped (rc=%s)", rc)
        return {"stopped": True, "returncode": rc}

    def status(self) -> dict[str, Any]:
        with self._lock:
            running = self.process is not None and self.process.poll() is None
            return {
                "running": running,
                "pid": self.process.pid if running and self.process else None,
                "state": self.state,
                "state_message": self.state_message,
                "last_user_text": self.last_user_text,
                "user_transcript": list(self.user_transcript),
                "log_tail": list(self.log_tail),
                "suppressed_noise": self.suppressed_noise,
            }