Marcus/Voice/gemini_script.py

"""Voice/gemini_script.py — subprocess manager for Gemini Live STT.

Runs in marcus's Python 3.8 env. The actual Gemini STT lives in
[Voice/gemini_runner.py](Voice/gemini_runner.py) which has to run in a
Python 3.10+ env (e.g. the `gemini_sdk` conda env on the Jetson) because
`google-genai` doesn't support Python 3.8.

This file spawns the runner as a subprocess, reads JSON-line transcripts
off its stdout, and turns them into the same `on_transcript` / `on_command`
callbacks the rest of marcus expects. The external API of class
`GeminiBrain` is unchanged from the previous in-process port — drop-in
swap for `Voice/marcus_voice.py::_voice_loop_gemini`.

Sanad uses the same subprocess pattern (its own `live_voice_loop.py`
parses log lines from a Gemini subprocess), so this matches Sanad's
architecture not just in mechanism but in shape.

────────────────────────────────────────────────────────────────────────
Subprocess lookup order for the Python 3.10+ binary:
    1. env  MARCUS_GEMINI_PYTHON                     (highest priority)
    2. config  stt.gemini_python_path
    3. auto-detect — try a list of common conda env paths
    4. raise — explicit error in voice.log
────────────────────────────────────────────────────────────────────────
"""

from __future__ import annotations

import json
import logging
import os
import subprocess
import sys
import threading
from typing import Callable, Optional

log = logging.getLogger("gemini_brain")


# Candidate conda-env paths for the Python 3.10+ binary. Override with
# MARCUS_GEMINI_PYTHON or stt.gemini_python_path if the env lives elsewhere.
_DEFAULT_CANDIDATES = [
    "~/miniconda3/envs/gemini_sdk/bin/python",
    "~/anaconda3/envs/gemini_sdk/bin/python",
    "~/.miniconda3/envs/gemini_sdk/bin/python",
    "/opt/conda/envs/gemini_sdk/bin/python",
    "~/miniconda3/envs/sanad/bin/python",
    "~/anaconda3/envs/sanad/bin/python",
]


def _resolve_runner_python(stt_cfg: dict) -> str:
    """Find the Python 3.10+ binary that can import google-genai."""
    explicit = (
        os.environ.get("MARCUS_GEMINI_PYTHON")
        or stt_cfg.get("gemini_python_path", "")
    )
    if explicit:
        path = os.path.expanduser(explicit)
        if os.path.isfile(path) and os.access(path, os.X_OK):
            return path
        raise FileNotFoundError(
            "MARCUS_GEMINI_PYTHON / stt.gemini_python_path = "
            "{!r} but that binary does not exist or is not executable".format(path)
        )
    for cand in _DEFAULT_CANDIDATES:
        path = os.path.expanduser(cand)
        if os.path.isfile(path) and os.access(path, os.X_OK):
            log.info("auto-detected gemini-runner python at %s", path)
            return path
    raise FileNotFoundError(
        "no Python 3.10+ env found for the Gemini runner. Set env "
        "MARCUS_GEMINI_PYTHON to the path of a conda env's python with "
        "`google-genai` installed (e.g. ~/miniconda3/envs/gemini_sdk/bin/python)."
    )


class GeminiBrain:
    """Subprocess-managing wrapper around Voice/gemini_runner.py.

    External API kept identical to the in-process version so callers don't
    care that Gemini lives in another Python:

        brain = GeminiBrain(audio_io, recorder, voice_name, system_prompt,
                            api_key=..., on_transcript=cb1, on_command=cb2)
        brain.start()
        ...
        brain.stop()

    `audio_io` and `recorder` are accepted for API parity but unused —
    the subprocess owns its own mic and writes its own WAVs (one process
    owning the whole audio path is simpler than streaming PCM over a pipe).
    """

    def __init__(
        self,
        audio_io,                                   # ignored (runner owns its own)
        recorder,                                   # ignored (runner owns its own)
        voice_name=None,                            # forwarded via env
        system_prompt="",                           # forwarded via env (or config)
        *,
        api_key: str = "",
        on_transcript: Optional[Callable[[str], None]] = None,
        on_command: Optional[Callable[[str, str], None]] = None,
    ):
        self._voice_name = voice_name or ""
        self._system_prompt = system_prompt or ""
        self._api_key = api_key
        self._on_transcript = on_transcript
        self._on_command = on_command

        self._proc = None                           # type: Optional[subprocess.Popen]
        self._reader_thread = None                  # type: Optional[threading.Thread]
        self._err_thread = None                     # type: Optional[threading.Thread]
        self._stopping = False

        # config-loaded lazily so import order doesn't matter
        try:
            from Core.config_loader import load_config
            cfg = load_config("Voice") or {}
        except Exception:
            cfg = {}
        self._stt = cfg.get("stt", {})

    # ─── lifecycle ────────────────────────────────────────

    def start(self) -> None:
        if self._proc is not None and self._proc.poll() is None:
            log.warning("GeminiBrain subprocess already running")
            return
        self._stopping = False

        try:
            python_bin = _resolve_runner_python(self._stt)
        except FileNotFoundError as e:
            log.error("%s", e)
            return

        runner = os.path.abspath(
            os.path.join(os.path.dirname(__file__), "gemini_runner.py")
        )
        if not os.path.isfile(runner):
            log.error("gemini_runner.py not found at %s", runner)
            return

        env = os.environ.copy()
        if self._api_key:
            env["MARCUS_GEMINI_API_KEY"] = self._api_key
        if self._voice_name:
            env["MARCUS_GEMINI_VOICE"] = self._voice_name
        # Forward the system prompt via env so the runner doesn't have to
        # re-read the JSON file (and so a trimmed inline string survives).
        if self._system_prompt:
            env["MARCUS_GEMINI_SYSTEM_PROMPT"] = self._system_prompt
        env["MARCUS_PROJECT_ROOT"] = os.path.dirname(os.path.dirname(runner))

        log.info("spawning gemini runner: %s -u %s", python_bin, runner)
        try:
            self._proc = subprocess.Popen(
                [python_bin, "-u", runner],
                stdin=subprocess.PIPE,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                cwd=os.path.dirname(os.path.dirname(runner)),
                env=env,
                bufsize=1,
                universal_newlines=True,
            )
        except Exception as e:
            log.error("failed to spawn gemini runner: %s", e)
            self._proc = None
            return

        self._reader_thread = threading.Thread(
            target=self._stdout_reader, daemon=True, name="gemini-stdout",
        )
        self._reader_thread.start()
        self._err_thread = threading.Thread(
            target=self._stderr_reader, daemon=True, name="gemini-stderr",
        )
        self._err_thread.start()

    def flush_mic(self) -> None:
        """
        Tell the runner subprocess to drop its buffered mic audio.

        Used before AND after the brain speaks via TtsMaker so the robot's
        own voice (which the mic picks up during playback) doesn't come
        back from Gemini as a fake user utterance and accidentally hit
        the wake-word gate.
        """
        proc = self._proc
        if proc is None or proc.stdin is None:
            return
        try:
            if not proc.stdin.closed:
                proc.stdin.write("flush\n")
                proc.stdin.flush()
        except Exception:
            pass

    def stop(self) -> None:
        self._stopping = True
        proc = self._proc
        if proc is None:
            return
        # Polite stop: send "stop\n" on stdin, then wait briefly, then SIGTERM.
        try:
            if proc.stdin and not proc.stdin.closed:
                try:
                    proc.stdin.write("stop\n")
                    proc.stdin.flush()
                except Exception:
                    pass
        except Exception:
            pass
        try:
            proc.wait(timeout=3)
        except Exception:
            try:
                proc.terminate()
            except Exception:
                pass
            try:
                proc.wait(timeout=2)
            except Exception:
                try:
                    proc.kill()
                except Exception:
                    pass
        self._proc = None

    # ─── stdout / stderr drainers ─────────────────────────

    def _stdout_reader(self) -> None:
        proc = self._proc
        if proc is None or proc.stdout is None:
            return
        for line in proc.stdout:
            if self._stopping:
                break
            line = line.strip()
            if not line:
                continue
            try:
                msg = json.loads(line)
            except Exception:
                # Non-JSON line — log it raw so we can debug runner crashes.
                log.warning("gemini-runner stdout (non-JSON): %s", line[:200])
                continue
            self._handle_msg(msg)

    def _stderr_reader(self) -> None:
        proc = self._proc
        if proc is None or proc.stderr is None:
            return
        for line in proc.stderr:
            line = line.rstrip()
            if line:
                log.warning("gemini-runner stderr: %s", line[:200])

    def _handle_msg(self, msg: dict) -> None:
        t = msg.get("type")
        if t == "user":
            text = (msg.get("text") or "").strip()
            if not text:
                return
            log.info("USER: %s", text)
            if self._on_transcript is not None:
                try:
                    self._on_transcript(text)
                except Exception as e:
                    log.error("on_transcript failed: %s", e)
            if self._on_command is not None:
                try:
                    self._on_command(text, "en")
                except Exception as e:
                    log.error("on_command failed: %s", e)
        elif t == "bot":
            txt = (msg.get("text") or "").strip()
            if txt:
                log.info("GEMINI: %s", txt[:120])
        elif t == "turn_end":
            log.info("listening")
        elif t == "ready":
            log.info("connected — listening for speech")
        elif t == "reconnect":
            log.info("server signalled reconnect: %s", msg.get("reason", ""))
        elif t == "log":
            level = msg.get("level", "info")
            text = msg.get("msg", "")
            if level == "error":
                log.error("[runner] %s", text)
            elif level == "warn":
                log.warning("[runner] %s", text)
            else:
                log.info("[runner] %s", text)
        else:
            log.debug("gemini-runner unknown type=%r: %s", t, msg)