"""Voice/gemini_script.py — subprocess manager for Gemini Live STT. Runs in marcus's Python 3.8 env. The actual Gemini STT lives in [Voice/gemini_runner.py](Voice/gemini_runner.py) which has to run in a Python 3.10+ env (e.g. the `gemini_sdk` conda env on the Jetson) because `google-genai` doesn't support Python 3.8. This file spawns the runner as a subprocess, reads JSON-line transcripts off its stdout, and turns them into the same `on_transcript` / `on_command` callbacks the rest of marcus expects. The external API of class `GeminiBrain` is unchanged from the previous in-process port — drop-in swap for `Voice/marcus_voice.py::_voice_loop_gemini`. Sanad uses the same subprocess pattern (its own `live_voice_loop.py` parses log lines from a Gemini subprocess), so this matches Sanad's architecture not just in mechanism but in shape. ──────────────────────────────────────────────────────────────────────── Subprocess lookup order for the Python 3.10+ binary: 1. env MARCUS_GEMINI_PYTHON (highest priority) 2. config stt.gemini_python_path 3. auto-detect — try a list of common conda env paths 4. raise — explicit error in voice.log ──────────────────────────────────────────────────────────────────────── """ from __future__ import annotations import json import logging import os import subprocess import sys import threading from typing import Callable, Optional log = logging.getLogger("gemini_brain") # Candidate conda-env paths for the Python 3.10+ binary. Override with # MARCUS_GEMINI_PYTHON or stt.gemini_python_path if the env lives elsewhere. _DEFAULT_CANDIDATES = [ "~/miniconda3/envs/gemini_sdk/bin/python", "~/anaconda3/envs/gemini_sdk/bin/python", "~/.miniconda3/envs/gemini_sdk/bin/python", "/opt/conda/envs/gemini_sdk/bin/python", "~/miniconda3/envs/sanad/bin/python", "~/anaconda3/envs/sanad/bin/python", ] def _resolve_runner_python(stt_cfg: dict) -> str: """Find the Python 3.10+ binary that can import google-genai.""" explicit = ( os.environ.get("MARCUS_GEMINI_PYTHON") or stt_cfg.get("gemini_python_path", "") ) if explicit: path = os.path.expanduser(explicit) if os.path.isfile(path) and os.access(path, os.X_OK): return path raise FileNotFoundError( "MARCUS_GEMINI_PYTHON / stt.gemini_python_path = " "{!r} but that binary does not exist or is not executable".format(path) ) for cand in _DEFAULT_CANDIDATES: path = os.path.expanduser(cand) if os.path.isfile(path) and os.access(path, os.X_OK): log.info("auto-detected gemini-runner python at %s", path) return path raise FileNotFoundError( "no Python 3.10+ env found for the Gemini runner. Set env " "MARCUS_GEMINI_PYTHON to the path of a conda env's python with " "`google-genai` installed (e.g. ~/miniconda3/envs/gemini_sdk/bin/python)." ) class GeminiBrain: """Subprocess-managing wrapper around Voice/gemini_runner.py. External API kept identical to the in-process version so callers don't care that Gemini lives in another Python: brain = GeminiBrain(audio_io, recorder, voice_name, system_prompt, api_key=..., on_transcript=cb1, on_command=cb2) brain.start() ... brain.stop() `audio_io` and `recorder` are accepted for API parity but unused — the subprocess owns its own mic and writes its own WAVs (one process owning the whole audio path is simpler than streaming PCM over a pipe). """ def __init__( self, audio_io, # ignored (runner owns its own) recorder, # ignored (runner owns its own) voice_name=None, # forwarded via env system_prompt="", # forwarded via env (or config) *, api_key: str = "", on_transcript: Optional[Callable[[str], None]] = None, on_command: Optional[Callable[[str, str], None]] = None, ): self._voice_name = voice_name or "" self._system_prompt = system_prompt or "" self._api_key = api_key self._on_transcript = on_transcript self._on_command = on_command self._proc = None # type: Optional[subprocess.Popen] self._reader_thread = None # type: Optional[threading.Thread] self._err_thread = None # type: Optional[threading.Thread] self._stopping = False # config-loaded lazily so import order doesn't matter try: from Core.config_loader import load_config cfg = load_config("Voice") or {} except Exception: cfg = {} self._stt = cfg.get("stt", {}) # ─── lifecycle ──────────────────────────────────────── def start(self) -> None: if self._proc is not None and self._proc.poll() is None: log.warning("GeminiBrain subprocess already running") return self._stopping = False try: python_bin = _resolve_runner_python(self._stt) except FileNotFoundError as e: log.error("%s", e) return runner = os.path.abspath( os.path.join(os.path.dirname(__file__), "gemini_runner.py") ) if not os.path.isfile(runner): log.error("gemini_runner.py not found at %s", runner) return env = os.environ.copy() if self._api_key: env["MARCUS_GEMINI_API_KEY"] = self._api_key if self._voice_name: env["MARCUS_GEMINI_VOICE"] = self._voice_name # Forward the system prompt via env so the runner doesn't have to # re-read the JSON file (and so a trimmed inline string survives). if self._system_prompt: env["MARCUS_GEMINI_SYSTEM_PROMPT"] = self._system_prompt env["MARCUS_PROJECT_ROOT"] = os.path.dirname(os.path.dirname(runner)) log.info("spawning gemini runner: %s -u %s", python_bin, runner) try: self._proc = subprocess.Popen( [python_bin, "-u", runner], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=os.path.dirname(os.path.dirname(runner)), env=env, bufsize=1, universal_newlines=True, ) except Exception as e: log.error("failed to spawn gemini runner: %s", e) self._proc = None return self._reader_thread = threading.Thread( target=self._stdout_reader, daemon=True, name="gemini-stdout", ) self._reader_thread.start() self._err_thread = threading.Thread( target=self._stderr_reader, daemon=True, name="gemini-stderr", ) self._err_thread.start() def flush_mic(self) -> None: """ Tell the runner subprocess to drop its buffered mic audio. Used before AND after the brain speaks via TtsMaker so the robot's own voice (which the mic picks up during playback) doesn't come back from Gemini as a fake user utterance and accidentally hit the wake-word gate. """ proc = self._proc if proc is None or proc.stdin is None: return try: if not proc.stdin.closed: proc.stdin.write("flush\n") proc.stdin.flush() except Exception: pass def stop(self) -> None: self._stopping = True proc = self._proc if proc is None: return # Polite stop: send "stop\n" on stdin, then wait briefly, then SIGTERM. try: if proc.stdin and not proc.stdin.closed: try: proc.stdin.write("stop\n") proc.stdin.flush() except Exception: pass except Exception: pass try: proc.wait(timeout=3) except Exception: try: proc.terminate() except Exception: pass try: proc.wait(timeout=2) except Exception: try: proc.kill() except Exception: pass self._proc = None # ─── stdout / stderr drainers ───────────────────────── def _stdout_reader(self) -> None: proc = self._proc if proc is None or proc.stdout is None: return for line in proc.stdout: if self._stopping: break line = line.strip() if not line: continue try: msg = json.loads(line) except Exception: # Non-JSON line — log it raw so we can debug runner crashes. log.warning("gemini-runner stdout (non-JSON): %s", line[:200]) continue self._handle_msg(msg) def _stderr_reader(self) -> None: proc = self._proc if proc is None or proc.stderr is None: return for line in proc.stderr: line = line.rstrip() if line: log.warning("gemini-runner stderr: %s", line[:200]) def _handle_msg(self, msg: dict) -> None: t = msg.get("type") if t == "user": text = (msg.get("text") or "").strip() if not text: return log.info("USER: %s", text) if self._on_transcript is not None: try: self._on_transcript(text) except Exception as e: log.error("on_transcript failed: %s", e) if self._on_command is not None: try: self._on_command(text, "en") except Exception as e: log.error("on_command failed: %s", e) elif t == "bot": txt = (msg.get("text") or "").strip() if txt: log.info("GEMINI: %s", txt[:120]) elif t == "turn_end": log.info("listening") elif t == "ready": log.info("connected — listening for speech") elif t == "reconnect": log.info("server signalled reconnect: %s", msg.get("reason", "")) elif t == "log": level = msg.get("level", "info") text = msg.get("msg", "") if level == "error": log.error("[runner] %s", text) elif level == "warn": log.warning("[runner] %s", text) else: log.info("[runner] %s", text) else: log.debug("gemini-runner unknown type=%r: %s", t, msg)