Update 2026-04-20 17:59:46

2026-04-20 17:59:47 +04:00 · 2026-04-20 17:59:47 +04:00 · 94e4a9c4cb
commit 94e4a9c4cb
parent 71c45027f5
26 changed files with 2186 additions and 790 deletions
--- a/config/gemini_config.json
+++ b/config/gemini_config.json
@ -0,0 +1,34 @@
 {
  "_description": "Tunables for gemini/* modules. Loaded via core.config_loader.load('gemini'). API credentials (api_key, model, voice_name) still live in core_config.json > gemini_defaults — single source of truth shared with config.py.",
  "client": {
    "_comment": "gemini/client.py — short-session WebSocket client used by dashboard /generate + typed replay. default_system_prompt comes from core.gemini_defaults.",
    "recv_timeout_sec": 30,
    "reconnect_max_attempts": 3,
    "reconnect_initial_delay_sec": 1.0,
    "reconnect_max_delay_sec": 10.0
  },
  "subprocess": {
    "_comment": "gemini/subprocess.py — GeminiSubprocess supervisor. Spawns voice/sanad_voice.py as a child, tails stdout for Gemini-specific log markers, exposes transcript + state to the dashboard.",
    "log_tail_size": 2000,
    "transcript_tail_size": 30,
    "log_name": "gemini_subprocess",
    "stop_timeout_sec": 3.0,
    "terminate_timeout_sec": 2.0,
    "noisy_prefixes": [
      "ALSA lib ",
      "Expression 'alsa_",
      "Cannot connect to server socket",
      "jack server is not running"
    ],
    "noisy_fragments": [
      "Unknown PCM",
      "Evaluate error",
      "snd_pcm_open_noupdate",
      "PaAlsaStream",
      "snd_config_evaluate",
      "snd_func_refer"
    ]
  }
 }
--- a/config/local_config.json
+++ b/config/local_config.json
@ -0,0 +1,92 @@
 {
  "_description": "Tunables for local/* — fully on-device voice pipeline (Silero VAD → Whisper → Qwen via llama.cpp → CosyVoice2). Loaded via core.config_loader.load('local').",
  "subprocess": {
    "_comment": "local/subprocess.py — LocalSubprocess supervisor. Mirrors gemini/subprocess.py. IMPORTANT: python_bin points at the `local` conda env (Python 3.8 + Jetson CUDA torch) so CosyVoice+Whisper run with GPU, while the dashboard/Gemini stack stays in gemini_sdk (Python 3.10).",
    "python_bin": "/home/unitree/miniconda3/envs/local/bin/python",
    "log_tail_size": 2000,
    "transcript_tail_size": 30,
    "log_name": "local_subprocess",
    "stop_timeout_sec": 5.0,
    "terminate_timeout_sec": 3.0,
    "noisy_prefixes": [
      "ALSA lib ",
      "Expression 'alsa_",
      "Cannot connect to server socket",
      "jack server is not running"
    ],
    "noisy_fragments": [
      "Unknown PCM",
      "Evaluate error",
      "snd_pcm_open_noupdate",
      "PaAlsaStream"
    ]
  },
  "vad": {
    "_comment": "Silero VAD — CPU. Emits speech_start / speech_end events.",
    "sample_rate": 16000,
    "frame_ms": 32,
    "threshold": 0.55,
    "min_silence_ms": 400,
    "min_speech_ms": 250,
    "pad_start_ms": 200,
    "pad_end_ms": 200,
    "device": "cpu"
  },
  "stt": {
    "_comment": "faster-whisper Large V3 Turbo, INT8 on GPU.",
    "model_name": "large-v3-turbo",
    "model_subdir": "faster-whisper-large-v3-turbo",
    "device": "cuda",
    "compute_type": "int8_float16",
    "beam_size": 1,
    "language": null,
    "vad_filter": false,
    "no_speech_threshold": 0.6,
    "min_utterance_chars": 2,
    "temperature": 0.0
  },
  "llm": {
    "_comment": "Qwen 2.5 Instruct via Ollama (default) OR self-managed llama.cpp. Set backend to pick.",
    "backend": "ollama",
    "_ollama_comment": "Ollama daemon — assumes `ollama serve` is running; `ollama pull qwen2.5:1.5b` to fetch.",
    "ollama_host": "127.0.0.1",
    "ollama_port": 11434,
    "ollama_model": "qwen2.5:1.5b",
    "ollama_keep_alive": "5m",
    "_llamacpp_comment": "Self-managed llama-server subprocess. Only used when backend='llama_cpp'.",
    "model_subdir": "qwen2.5-1.5b-instruct-q4_k_m.gguf",
    "server_binary": "llama-server",
    "host": "127.0.0.1",
    "port": 8080,
    "n_gpu_layers": 99,
    "ctx_size": 2048,
    "threads": 4,
    "startup_timeout_sec": 30,
    "_shared_comment": "Generation params — both backends.",
    "request_timeout_sec": 30,
    "max_tokens": 200,
    "temperature": 0.7,
    "top_p": 0.9,
    "stop": ["<|im_end|>", "\n\n\n"],
    "chunk_delimiters": ".,?!؟،",
    "chunk_min_chars": 8
  },
  "tts": {
    "_comment": "CosyVoice2 0.5B streaming — GPU. Uses a 3s reference WAV for voice cloning.",
    "model_subdir": "CosyVoice2-0.5B",
    "reference_wav_subdir": "khaleeji_reference_3s.wav",
    "reference_prompt": "",
    "stream_chunk_sec": 0.25,
    "sample_rate": 16000,
    "queue_max": 3,
    "device": "cuda"
  }
 }
--- a/config/voice_config.json
+++ b/config/voice_config.json
@ -50,39 +50,12 @@
    "dir_relative": "data/recordings"
  },
  "system_prompt": {
    "_comment": "Persona filename lives in core.script_files.persona; default text in core.gemini_defaults.default_system_prompt. This section is now metadata-only."
  },
  "typed_replay": {
    "_comment": "voice/typed_replay.py — max_text_len comes from dashboard.api_input",
    "monitor_chunk_size": 512,
    "monitor_tail_sec": 0.2
  },
  "live_gemini_subprocess": {
    "_comment": "voice/live_gemini_subprocess.py — LiveGeminiSubprocess",
    "log_tail_size": 2000,
    "transcript_tail_size": 30,
    "log_name": "live_gemini_subprocess",
    "stop_timeout_sec": 3.0,
    "terminate_timeout_sec": 2.0,
    "noisy_prefixes": [
      "ALSA lib ",
      "Expression 'alsa_",
      "Cannot connect to server socket",
      "jack server is not running"
    ],
    "noisy_fragments": [
      "Unknown PCM",
      "Evaluate error",
      "snd_pcm_open_noupdate",
      "PaAlsaStream",
      "snd_config_evaluate",
      "snd_func_refer"
    ]
  },
  "live_voice_loop": {
    "_comment": "voice/live_voice_loop.py — arm phrase dispatcher. arm_txt filename comes from core.script_files.arm_phrases",
    "trigger_log_size": 100,
@ -97,27 +70,5 @@
    "xvector_filename": "arabic_xvector_embedding.pt",
    "sample_rate": 16000,
    "channels": 1
  },
  "gemini_client": {
    "_comment": "voice/gemini_client.py — default_system_prompt comes from core.gemini_defaults",
    "recv_timeout_sec": 30,
    "reconnect_max_attempts": 3,
    "reconnect_initial_delay_sec": 1.0,
    "reconnect_max_delay_sec": 10.0
  },
  "asr_buffer": {
    "_comment": "text_utils.maybe_trigger_arm state machine defaults",
    "window_sec": 2.0,
    "short_token_bonus_sec": 1.0,
    "join_no_space_maxlen": 2,
    "max_chars": 120,
    "stream_max_chars": 80,
    "trigger_dedup_window_sec": 2.0,
    "pending_arm_ttl_sec": 6.0,
    "pending_arm_fallback_sec": 0.65,
    "dup_call_window_sec": 0.25,
    "dup_asr_repeat_window_sec": 0.9
  }
 }
--- a/core/brain.py
+++ b/core/brain.py
@ -46,7 +46,7 @@ class Brain:
        self._lock = asyncio.Lock()
        # Sub-modules are injected after construction so imports stay lazy.
-        self._voice = None      # voice.gemini_client.GeminiVoiceClient
+        self._voice = None      # gemini.client.GeminiVoiceClient
        self._audio_mgr = None  # voice.audio_manager.AudioManager
        self._arm = None        # motion.arm_controller.ArmController
        self._macro_rec = None  # motion.macro_recorder.MacroRecorder
--- a/dashboard/routes/live_voice.py
+++ b/dashboard/routes/live_voice.py
@ -1,6 +1,6 @@
 """Live Voice Commands — voice-to-arm phrase trigger dispatcher.
-Listens to LiveGeminiSubprocess user transcripts, matches against
+Listens to GeminiSubprocess user transcripts, matches against
 sanad_arm.txt phrases, and fires ARM.trigger_action_by_id.
 Endpoints:
--- a/dashboard/routes/voice.py
+++ b/dashboard/routes/voice.py
@ -193,7 +193,7 @@ async def update_api_key(payload: ApiKeyPayload):
        raise HTTPException(500, f"Could not save config: {exc}")
    # Hot-swap the in-memory module globals.
-    # Both Project.Sanad.config AND Project.Sanad.voice.gemini_client
+    # Both Project.Sanad.config AND Project.Sanad.gemini.client
    # have their OWN reference to GEMINI_API_KEY (the latter was created
    # at `from Project.Sanad.config import GEMINI_API_KEY` at import time).
    # Python's `from X import Y` binds a local name — updating config.Y
@ -205,10 +205,10 @@ async def update_api_key(payload: ApiKeyPayload):
        log.exception("could not patch config.GEMINI_API_KEY")
    try:
-        import Project.Sanad.voice.gemini_client as _gc
+        import Project.Sanad.gemini.client as _gc
        _gc.GEMINI_API_KEY = key
    except Exception:
-        log.exception("could not patch gemini_client.GEMINI_API_KEY")
+        log.exception("could not patch gemini.client.GEMINI_API_KEY")
    # Disconnect any live session so reconnect uses the new key.
    from Project.Sanad.main import voice_client
--- a/examples/voice_example.py
+++ b/examples/voice_example.py
@ -8,7 +8,7 @@ Usage:
    python3 voice_example.py gemini "hello"        # one-shot Gemini text→audio
    python3 voice_example.py local_tts "hello"     # local Coqui TTS
    python3 voice_example.py typed_replay "hello"  # typed replay engine
-    python3 voice_example.py live                  # spawn LiveGeminiSubprocess
+    python3 voice_example.py live                  # spawn GeminiSubprocess
    python3 voice_example.py status                # show status of all subsystems
 Assumes Project.Sanad is importable (run from repo root or with PYTHONPATH set).
@ -23,7 +23,7 @@ import sys
 def _demo_gemini(text: str) -> None:
    """One-shot: connect Gemini, send text, play reply."""
-    from Project.Sanad.voice.gemini_client import GeminiVoiceClient
+    from Project.Sanad.gemini.client import GeminiVoiceClient
    from Project.Sanad.voice.audio_manager import AudioManager
    async def run():
@ -55,7 +55,7 @@ def _demo_local_tts(text: str) -> None:
 def _demo_typed_replay(text: str) -> None:
    """Exercise the TypedReplayEngine end-to-end."""
-    from Project.Sanad.voice.gemini_client import GeminiVoiceClient
+    from Project.Sanad.gemini.client import GeminiVoiceClient
    from Project.Sanad.voice.audio_manager import AudioManager
    from Project.Sanad.voice.typed_replay import TypedReplayEngine
@ -73,9 +73,9 @@ def _demo_typed_replay(text: str) -> None:
 def _demo_live() -> None:
    """Spawn the live voice subprocess — same as dashboard /api/live-subprocess."""
-    from Project.Sanad.voice.live_gemini_subprocess import LiveGeminiSubprocess
+    from Project.Sanad.gemini.subprocess import GeminiSubprocess
-    mgr = LiveGeminiSubprocess()
+    mgr = GeminiSubprocess()
    info = mgr.start()
    print(f"[live] {info}")
    print("Running. Ctrl+C to stop.")
@ -90,7 +90,7 @@ def _demo_live() -> None:
 def _demo_status() -> None:
    """Print status of all voice subsystems."""
-    from Project.Sanad.voice.gemini_client import GeminiVoiceClient
+    from Project.Sanad.gemini.client import GeminiVoiceClient
    try:
        from Project.Sanad.voice.local_tts import LocalTTSEngine
    except Exception:
--- a/gemini/init.py
+++ b/gemini/init.py
--- a/voice/gemini_client.py
+++ b/voice/gemini_client.py
@ -30,7 +30,7 @@ from Project.Sanad.core.logger import get_logger
 log = get_logger("gemini_client")
-_GC = _cfg_section("voice", "gemini_client")
+_GC = _cfg_section("gemini", "client")
 # Default system prompt — SINGLE SOURCE in core.gemini_defaults
 _DEFAULT_SYSTEM_PROMPT = _cfg_section("core", "gemini_defaults").get(
    "default_system_prompt",
--- a/gemini/script.py
+++ b/gemini/script.py
@ -0,0 +1,370 @@
 """Gemini brain — live conversation loop using the google-genai SDK.
 Implements the VoiceBrain contract documented in `voice/model_script.py`:
    __init__(audio_io, recorder, voice_name, system_prompt)
    async run()
    stop()
 Owns everything Gemini-specific: the `genai.Client`, `LiveConnectConfig`,
 the session connect/receive loop, VAD-based barge-in, echo suppression,
 reconnect backoff. Hardware I/O is delegated to `audio_io` and per-turn
 WAV capture to `recorder` — both are model-agnostic.
 Env overrides:
    SANAD_GEMINI_MODEL   — Gemini Live model id (without "models/" prefix)
 """
 from __future__ import annotations
 import array
 import asyncio
 import os
 import time
 from typing import Any, Optional
 import numpy as np
 from google import genai
 from google.genai import types
 from Project.Sanad.config import (
    CHUNK_SIZE,
    GEMINI_API_KEY,
    GEMINI_VOICE,
    RECEIVE_SAMPLE_RATE,
    SEND_SAMPLE_RATE,
 )
 from Project.Sanad.core.config_loader import section as _cfg_section
 from Project.Sanad.core.logger import get_logger
 log = get_logger("gemini_brain")
 _SV = _cfg_section("voice", "sanad_voice")
 _VAD = _cfg_section("voice", "vad")
 _BI = _cfg_section("voice", "barge_in")
 _MODEL = os.environ.get(
    "SANAD_GEMINI_MODEL",
    "gemini-2.5-flash-native-audio-preview-12-2025",
 )
 _MIC_GAIN = _SV.get("mic_gain", 1.0)
 _SESSION_TIMEOUT = _SV.get("session_timeout_sec", 660)
 _MAX_RECONNECT_DELAY = _SV.get("max_reconnect_delay_sec", 30)
 _MAX_CONSECUTIVE_ERRORS = _SV.get("max_consecutive_errors", 10)
 _NO_MESSAGES_TIMEOUT = _SV.get("no_messages_timeout_sec", 30)
 _CHUNK_BYTES = CHUNK_SIZE * 2
 _SILENCE_PCM = b"\x00" * _CHUNK_BYTES
 def _audio_energy(pcm: bytes) -> int:
    try:
        samples = array.array("h", pcm)
        return sum(abs(s) for s in samples) // len(samples) if samples else 0
    except Exception:
        return 0
 class GeminiBrain:
    """Gemini Live conversation brain — reconnect-safe."""
    def __init__(self, audio_io, recorder, voice_name: Optional[str] = None,
                 system_prompt: str = ""):
        self._audio = audio_io
        self._mic = audio_io.mic
        self._speaker = audio_io.speaker
        self._recorder = recorder
        self._voice = voice_name or GEMINI_VOICE
        self._system_prompt = system_prompt
        self._api_key = GEMINI_API_KEY
        self._stop_flag = asyncio.Event()
        # per-session state (reset in the outer reconnect loop)
        self._speaking = False
        self._stream_started = False
        self._barge_block_until = 0.0
        self._ai_speak_start = 0.0
        self._last_ai_audio = 0.0
        self._done: Optional[asyncio.Event] = None
    def stop(self) -> None:
        """Signal the run loop to exit at the next opportunity."""
        try:
            self._stop_flag.set()
        except Exception:
            pass
    # ─── public entry point ───────────────────────────────
    async def run(self) -> None:
        client = genai.Client(api_key=self._api_key)
        config = self._build_config()
        session_num = 0
        start_time = time.time()
        consecutive_errors = 0
        while not self._stop_flag.is_set():
            session_num += 1
            self._reset_turn_state()
            uptime_min = (time.time() - start_time) / 60
            try:
                log.info("connecting to Gemini (session #%d, uptime %.0fm)...",
                         session_num, uptime_min)
                async with client.aio.live.connect(model=_MODEL, config=config) as session:
                    log.info("connected — speak anytime!")
                    consecutive_errors = 0
                    self._mic.flush()
                    self._done = asyncio.Event()
                    try:
                        await asyncio.wait_for(
                            asyncio.gather(
                                self._send_mic_loop(session),
                                self._receive_loop(session),
                            ),
                            timeout=_SESSION_TIMEOUT,
                        )
                    except asyncio.TimeoutError:
                        log.warning("session timed out after %ds", _SESSION_TIMEOUT)
                    except asyncio.CancelledError:
                        log.warning("session cancelled")
                    log.info("session #%d ended — reconnecting in 1s", session_num)
                    self._speaker.stop()
                    self._mic.flush()
                    await asyncio.sleep(1)
            except asyncio.CancelledError:
                log.info("cancelled — stopping")
                break
            except KeyboardInterrupt:
                log.info("keyboard interrupt — stopping")
                break
            except Exception as exc:
                consecutive_errors += 1
                delay = min(_MAX_RECONNECT_DELAY, 2 ** consecutive_errors)
                log.error("session error (#%d): %s — reconnecting in %ds",
                          consecutive_errors, exc, delay)
                await asyncio.sleep(delay)
                if consecutive_errors >= _MAX_CONSECUTIVE_ERRORS:
                    log.warning("%d consecutive errors — recreating client",
                                consecutive_errors)
                    try:
                        client = genai.Client(api_key=self._api_key)
                        consecutive_errors = 0
                    except Exception as ce:
                        log.error("client recreation failed: %s", ce)
    # ─── Gemini config ────────────────────────────────────
    def _build_config(self) -> types.LiveConnectConfig:
        return types.LiveConnectConfig(
            response_modalities=["AUDIO"],
            speech_config=types.SpeechConfig(
                voice_config=types.VoiceConfig(
                    prebuilt_voice_config=types.PrebuiltVoiceConfig(
                        voice_name=self._voice,
                    ),
                ),
            ),
            realtime_input_config=types.RealtimeInputConfig(
                automatic_activity_detection=types.AutomaticActivityDetection(
                    disabled=False,
                    start_of_speech_sensitivity=getattr(
                        types.StartSensitivity,
                        _VAD.get("start_sensitivity", "START_SENSITIVITY_HIGH"),
                    ),
                    end_of_speech_sensitivity=getattr(
                        types.EndSensitivity,
                        _VAD.get("end_sensitivity", "END_SENSITIVITY_LOW"),
                    ),
                    prefix_padding_ms=_VAD.get("prefix_padding_ms", 20),
                    silence_duration_ms=_VAD.get("silence_duration_ms", 200),
                ),
            ),
            input_audio_transcription=types.AudioTranscriptionConfig(),
            output_audio_transcription=types.AudioTranscriptionConfig(),
            system_instruction=types.Content(
                parts=[types.Part(text=self._system_prompt)],
            ),
        )
    # ─── state helpers ────────────────────────────────────
    def _reset_turn_state(self) -> None:
        self._speaking = False
        self._stream_started = False
        self._barge_block_until = 0.0
        self._ai_speak_start = 0.0
        self._last_ai_audio = 0.0
    def _interrupt(self, source: str = "local") -> None:
        self._speaking = False
        self._stream_started = False
        self._speaker.stop()
        self._mic.flush()
        self._recorder.finish_turn()
        log.info("interrupt (%s)", source)
    # ─── mic send loop ────────────────────────────────────
    async def _send_mic_loop(self, session: Any) -> None:
        threshold = _BI.get("threshold", 500)
        chunks_needed = _BI.get("loud_chunks_needed", 3)
        cooldown = _BI.get("cooldown_sec", 0.3)
        echo_suppress_below = _BI.get("echo_suppress_below", 500)
        grace = _BI.get("ai_speak_grace_sec", 0.15)
        loop = asyncio.get_event_loop()
        loud_count = 0
        last_activity = time.time()
        while not self._done.is_set() and not self._stop_flag.is_set():
            try:
                raw = await loop.run_in_executor(
                    None, self._mic.read_chunk, _CHUNK_BYTES,
                )
            except Exception:
                break
            samples = np.frombuffer(raw, dtype=np.int16).astype(np.float32)
            samples = np.clip(samples * _MIC_GAIN, -32768, 32767).astype(np.int16)
            data = samples.tobytes()
            energy = _audio_energy(data)
            now = time.time()
            # Barge-in: after AI starts speaking, sustained user energy cuts it.
            if self._speaking and now >= self._barge_block_until:
                if (now - self._ai_speak_start) >= grace:
                    if energy > threshold:
                        loud_count += 1
                    else:
                        loud_count = max(0, loud_count - 1)
                    if loud_count > chunks_needed:
                        log.info("BARGE-IN (e=%d)", energy)
                        self._interrupt("barge-in")
                        loud_count = 0
                        self._barge_block_until = now + cooldown
            # Echo suppression: while AI is speaking, mask quiet frames so the
            # mic doesn't feed the model its own voice bleed.
            send_data = data
            if self._speaking and energy < echo_suppress_below:
                send_data = _SILENCE_PCM
            # Record user audio when clearly speaking and AI isn't.
            if energy > 250 and not self._speaking:
                self._recorder.capture_user(data)
            # Keep-alive watchdog
            if energy > 250:
                last_activity = now
            elif now - last_activity > 10:
                log.info("alive (no speech %.0fs, e=%d)",
                         now - last_activity, energy)
                last_activity = now
            try:
                await session.send_realtime_input(
                    audio=types.Blob(
                        data=send_data,
                        mime_type=f"audio/pcm;rate={SEND_SAMPLE_RATE}",
                    ),
                )
            except asyncio.CancelledError:
                return
            except Exception as exc:
                log.warning("mic send failed: %s — ending session", exc)
                self._done.set()
                return
            await asyncio.sleep(CHUNK_SIZE / SEND_SAMPLE_RATE)
        log.info("send_mic task ended")
    # ─── receive loop ─────────────────────────────────────
    async def _receive_loop(self, session: Any) -> None:
        loop = asyncio.get_event_loop()
        try:
            last_recv = time.time()
            while not self._done.is_set() and not self._stop_flag.is_set():
                async for response in session.receive():
                    last_recv = time.time()
                    if self._done.is_set():
                        break
                    if hasattr(response, "go_away") and response.go_away is not None:
                        log.info("server going away — will reconnect")
                        self._done.set()
                        return
                    sc = response.server_content
                    if sc is None:
                        continue
                    if sc.interrupted is True:
                        if self._speaking:
                            log.info("Gemini interrupted")
                            self._interrupt("gemini")
                        continue
                    if sc.input_transcription:
                        text = (sc.input_transcription.text or "").strip()
                        if text and not self._speaking:
                            log.info("USER: %s", text)
                            self._recorder.add_user_text(text)
                    if sc.output_transcription:
                        text = (sc.output_transcription.text or "").strip()
                        if text:
                            log.info("BOT : %s", text)
                            self._recorder.add_robot_text(text)
                    if sc.model_turn:
                        for part in sc.model_turn.parts:
                            if part.inline_data and part.inline_data.data:
                                now = time.time()
                                if not self._speaking:
                                    self._ai_speak_start = now
                                    self._speaking = True
                                self._last_ai_audio = now
                                raw_audio = part.inline_data.data
                                self._recorder.capture_robot(raw_audio)
                                audio = np.frombuffer(raw_audio, dtype=np.int16)
                                if not self._stream_started:
                                    await loop.run_in_executor(
                                        None, self._speaker.begin_stream,
                                    )
                                    self._stream_started = True
                                await loop.run_in_executor(
                                    None, self._speaker.send_chunk,
                                    audio, RECEIVE_SAMPLE_RATE,
                                )
                    if sc.turn_complete:
                        if (self._speaking and self._stream_started
                                and not self._speaker.interrupted):
                            log.info("speaker %.1fs", self._speaker.total_sent_sec)
                            await loop.run_in_executor(
                                None, self._speaker.wait_finish,
                            )
                        elif self._speaking and self._speaker.interrupted:
                            log.info("speaker interrupted")
                        self._speaking = False
                        self._stream_started = False
                        self._mic.flush()
                        self._recorder.finish_turn()
                        log.info("listening")
                if time.time() - last_recv > _NO_MESSAGES_TIMEOUT:
                    log.warning("no messages from Gemini for %ds — session dead",
                                _NO_MESSAGES_TIMEOUT)
                    break
                await asyncio.sleep(0.1)
        except Exception as exc:
            log.warning("receive ended: %s", exc)
        finally:
            self._done.set()
--- a/voice/live_gemini_subprocess.py
+++ b/voice/live_gemini_subprocess.py
@ -1,7 +1,11 @@
-"""Live Gemini Subprocess Manager — start/stop sanad_voice.py as managed child.
+"""Gemini live subprocess supervisor.
-Mirrors gemini_voice_v2/LiveGeminiManager. Launches the voice script as a
+Spawns `voice/sanad_voice.py` as a managed child with `SANAD_VOICE_BRAIN=gemini`,
-subprocess, tails stdout, parses state transitions and user transcripts.
+tails the child's stdout, and extracts state transitions + user transcripts
 from the Gemini-specific log lines emitted by `gemini/script.py:GeminiBrain`.
 When a new model is added, build its own sibling supervisor (see
 `voice/model_subprocess.py` for the template) — do not refactor this file.
 """
 from __future__ import annotations
@ -22,9 +26,9 @@ from Project.Sanad.config import BASE_DIR, LOGS_DIR, SCRIPTS_DIR, LIVE_TUNE
 from Project.Sanad.core.config_loader import section as _cfg_section
 from Project.Sanad.core.logger import get_logger
-log = get_logger("live_subprocess")
+log = get_logger("gemini_subprocess")
-_LS_CFG = _cfg_section("voice", "live_gemini_subprocess")
+_LS_CFG = _cfg_section("gemini", "subprocess")
 def _resolve_live_script() -> Path:
@ -51,7 +55,7 @@ TRANSCRIPT_TAIL_SIZE = _LS_CFG.get("transcript_tail_size", 30)
 # Persistent on-disk log for the full subprocess session.
 LIVE_LOG_DIR = LOGS_DIR
-LIVE_LOG_NAME = _LS_CFG.get("log_name", "live_gemini_subprocess")
+LIVE_LOG_NAME = _LS_CFG.get("log_name", "gemini_subprocess")
 _STOP_TIMEOUT_SEC = _LS_CFG.get("stop_timeout_sec", 3.0)
 _TERMINATE_TIMEOUT_SEC = _LS_CFG.get("terminate_timeout_sec", 2.0)
@ -66,7 +70,7 @@ _NOISY_FRAGMENTS = tuple(_LS_CFG.get("noisy_fragments", [
 ]))
-class LiveGeminiSubprocess:
+class GeminiSubprocess:
    def __init__(self):
        self._lock = threading.Lock()
        self.process: subprocess.Popen | None = None
@ -102,23 +106,33 @@ class LiveGeminiSubprocess:
        self.state_message = msg
    def _track_line(self, line: str):
-        if "Connecting to Gemini" in line:
+        """Parse Gemini-specific log markers emitted by `gemini/script.py`.
        Must stay in lock-step with the `log.info(...)` strings in
        `GeminiBrain`. If you add a new state, add the emit in the brain
        AND the matching detector here — in one PR.
        """
        if "connecting to Gemini" in line:
            self._set_state("connecting", line)
-        elif "Connected! Sanad is listening" in line:
+        elif "connected — speak anytime" in line or "connected - speak anytime" in line:
            self._set_state("listening", "Listening for speech.")
-        elif "USER SAID:" in line:
+        elif " USER: " in line or line.strip().startswith("USER:"):
-            text = line.split("USER SAID:", 1)[1].strip()
+            # GeminiBrain emits: log.info("USER: %s", text)
            text = line.split("USER:", 1)[1].strip()
            if text:
                self.last_user_text = text
                self.user_transcript.append(text)
                self._set_state("hearing", f"User: {text}")
-        elif "Interruption!" in line:
+        elif "BARGE-IN" in line or "Gemini interrupted" in line or "interrupt (" in line:
            self._set_state("interrupting", line)
-        elif any(k in line for k in ("Mic Error:", "Speaker Error:", "Fatal Error:")):
+        elif "listening" in line.lower() and "no speech" not in line:
            # Fires on "listening" (post-turn) — keep the state fresh.
            self._set_state("listening", "Listening for speech.")
        elif "session error" in line or "client recreation failed" in line:
            self._set_state("error", line)
-        elif "WebSocket closed." in line:
+        elif "server going away" in line or "session ended" in line or "session dead" in line:
            self._set_state("warning", line)
-        elif "Ma'a Salama" in line:
+        elif "keyboard interrupt" in line or "cancelled — stopping" in line:
            self._set_state("stopped", line)
    def _reader_loop(self):
--- a/local/init.py
+++ b/local/init.py
--- a/local/llm.py
+++ b/local/llm.py
@ -0,0 +1,305 @@
 """LLM layer — Qwen 2.5 Instruct via Ollama (default) or self-managed llama.cpp.
 Phase 3 of the local pipeline. Two backends, selectable via
 `config/local_config.json > llm.backend`:
  "ollama"    — talk to a running `ollama serve` daemon (default).
                No subprocess management, no CUDA build. Just:
                    ollama pull qwen2.5:1.5b
                    # daemon usually auto-starts; if not: `ollama serve &`
  "llama_cpp" — launch our own `llama-server` subprocess. Requires
                a CUDA build of llama.cpp and a GGUF file at
                `model/local/<llm.model_subdir>`.
 Both backends stream tokens and chunk them on sentence delimiters so
 the TTS can start synthesising before the LLM finishes.
 """
 from __future__ import annotations
 import asyncio
 import json
 import shutil
 import subprocess
 import time
 from typing import AsyncIterator, Optional
 from Project.Sanad.config import MODEL_DIR
 from Project.Sanad.core.config_loader import section as _cfg_section
 from Project.Sanad.core.logger import get_logger
 log = get_logger("local_llm")
 _CFG = _cfg_section("local", "llm")
 BACKEND = (_CFG.get("backend") or "ollama").strip().lower()
 # Ollama
 OLLAMA_HOST = _CFG.get("ollama_host", "127.0.0.1")
 OLLAMA_PORT = int(_CFG.get("ollama_port", 11434))
 OLLAMA_MODEL = _CFG.get("ollama_model", "qwen2.5:1.5b")
 OLLAMA_KEEP_ALIVE = _CFG.get("ollama_keep_alive", "5m")
 # llama.cpp
 MODEL_SUBDIR = _CFG.get("model_subdir", "qwen2.5-1.5b-instruct-q4_k_m.gguf")
 SERVER_BIN = _CFG.get("server_binary", "llama-server")
 HOST = _CFG.get("host", "127.0.0.1")
 PORT = int(_CFG.get("port", 8080))
 N_GPU_LAYERS = _CFG.get("n_gpu_layers", 99)
 CTX_SIZE = _CFG.get("ctx_size", 2048)
 THREADS = _CFG.get("threads", 4)
 STARTUP_TIMEOUT = _CFG.get("startup_timeout_sec", 30)
 # Shared generation params
 REQUEST_TIMEOUT = _CFG.get("request_timeout_sec", 30)
 MAX_TOKENS = _CFG.get("max_tokens", 200)
 TEMPERATURE = _CFG.get("temperature", 0.7)
 TOP_P = _CFG.get("top_p", 0.9)
 STOP_SEQS = list(_CFG.get("stop", ["<|im_end|>"]))
 CHUNK_DELIMS = _CFG.get("chunk_delimiters", ".,?!؟،")
 CHUNK_MIN_CHARS = int(_CFG.get("chunk_min_chars", 8))
 LOCAL_MODEL_PATH = MODEL_DIR / "local" / MODEL_SUBDIR
 class LlamaServer:
    """Thin wrapper — owns subprocess (llama.cpp) or no-op (ollama)."""
    def __init__(self) -> None:
        self._proc: Optional[subprocess.Popen] = None
    # ─── lifecycle ────────────────────────────────────────
    def start(self) -> None:
        if BACKEND == "ollama":
            self._check_ollama()
            log.info("LLM backend=ollama model=%s (@ %s:%d)",
                     OLLAMA_MODEL, OLLAMA_HOST, OLLAMA_PORT)
            return
        if BACKEND == "llama_cpp":
            self._start_llama_cpp()
            return
        raise RuntimeError(f"unknown llm.backend: {BACKEND!r}")
    def stop(self) -> None:
        if self._proc is None:
            return
        try:
            self._proc.terminate()
            self._proc.wait(timeout=3)
        except subprocess.TimeoutExpired:
            self._proc.kill()
            self._proc.wait(timeout=2)
        except Exception as exc:
            log.warning("llama-server stop error: %s", exc)
        self._proc = None
    def alive(self) -> bool:
        if BACKEND == "ollama":
            return self._ping_ollama()
        return self._proc is not None and self._proc.poll() is None
    # ─── Ollama backend ───────────────────────────────────
    def _check_ollama(self) -> None:
        """Verify the Ollama daemon is running + the model is pulled."""
        import urllib.request
        tags_url = f"http://{OLLAMA_HOST}:{OLLAMA_PORT}/api/tags"
        try:
            with urllib.request.urlopen(tags_url, timeout=3) as r:
                body = json.loads(r.read().decode("utf-8"))
        except Exception as exc:
            raise RuntimeError(
                f"Ollama daemon not reachable at {tags_url} — is `ollama serve` running? ({exc})"
            )
        models = [m.get("name", "") for m in body.get("models", [])]
        if not any(OLLAMA_MODEL in m for m in models):
            raise RuntimeError(
                f"Ollama model {OLLAMA_MODEL!r} not pulled. "
                f"Run: `ollama pull {OLLAMA_MODEL}`. Available: {models}"
            )
    def _ping_ollama(self) -> bool:
        import urllib.request
        try:
            with urllib.request.urlopen(
                f"http://{OLLAMA_HOST}:{OLLAMA_PORT}/api/tags", timeout=1,
            ) as r:
                return r.status == 200
        except Exception:
            return False
    async def _stream_ollama(self, user_text: str, system_prompt: str,
                              cancel: asyncio.Event) -> AsyncIterator[str]:
        import aiohttp
        url = f"http://{OLLAMA_HOST}:{OLLAMA_PORT}/api/generate"
        payload = {
            "model": OLLAMA_MODEL,
            "system": system_prompt,
            "prompt": user_text,
            "stream": True,
            "keep_alive": OLLAMA_KEEP_ALIVE,
            "options": {
                "num_predict": MAX_TOKENS,
                "temperature": TEMPERATURE,
                "top_p": TOP_P,
                "stop": STOP_SEQS,
            },
        }
        buf = ""
        async with aiohttp.ClientSession() as sess:
            try:
                async with sess.post(
                        url, json=payload,
                        timeout=aiohttp.ClientTimeout(total=REQUEST_TIMEOUT)) as resp:
                    async for raw in resp.content:
                        if cancel.is_set():
                            log.info("LLM stream cancelled (barge-in)")
                            return
                        line = raw.decode("utf-8", errors="ignore").strip()
                        if not line:
                            continue
                        try:
                            obj = json.loads(line)
                        except json.JSONDecodeError:
                            continue
                        token = obj.get("response", "")
                        if token:
                            buf += token
                            if len(buf) >= CHUNK_MIN_CHARS and buf[-1] in CHUNK_DELIMS:
                                yield buf.strip()
                                buf = ""
                        if obj.get("done"):
                            break
            except asyncio.CancelledError:
                return
            except Exception as exc:
                log.warning("Ollama stream error: %s", exc)
                return
        if buf.strip():
            yield buf.strip()
    # ─── llama.cpp backend ────────────────────────────────
    def _start_llama_cpp(self) -> None:
        if self._proc is not None and self._proc.poll() is None:
            return
        if not LOCAL_MODEL_PATH.exists():
            raise RuntimeError(f"LLM model not found at {LOCAL_MODEL_PATH}")
        bin_path = shutil.which(SERVER_BIN) or SERVER_BIN
        cmd = [
            bin_path,
            "-m", str(LOCAL_MODEL_PATH),
            "--host", HOST,
            "--port", str(PORT),
            "--n-gpu-layers", str(N_GPU_LAYERS),
            "--ctx-size", str(CTX_SIZE),
            "--threads", str(THREADS),
            "--log-disable",
        ]
        log.info("launching llama-server: %s", " ".join(cmd))
        self._proc = subprocess.Popen(
            cmd,
            stdout=subprocess.DEVNULL,
            stderr=subprocess.PIPE,
            text=True,
        )
        self._wait_llama_cpp_ready()
        log.info("llama-server ready (pid=%d)", self._proc.pid)
    def _wait_llama_cpp_ready(self) -> None:
        import urllib.request
        deadline = time.time() + STARTUP_TIMEOUT
        url = f"http://{HOST}:{PORT}/health"
        while time.time() < deadline:
            if self._proc and self._proc.poll() is not None:
                stderr = self._proc.stderr.read() if self._proc.stderr else ""
                raise RuntimeError(
                    f"llama-server exited early (code={self._proc.returncode}): {stderr[:500]}"
                )
            try:
                with urllib.request.urlopen(url, timeout=1) as r:
                    if r.status == 200:
                        return
            except Exception:
                time.sleep(0.3)
        raise RuntimeError(f"llama-server did not come up within {STARTUP_TIMEOUT}s")
    async def _stream_llama_cpp(self, user_text: str, system_prompt: str,
                                 cancel: asyncio.Event) -> AsyncIterator[str]:
        import aiohttp
        prompt = self._format_chatml_prompt(user_text, system_prompt)
        payload = {
            "prompt": prompt,
            "stream": True,
            "n_predict": MAX_TOKENS,
            "temperature": TEMPERATURE,
            "top_p": TOP_P,
            "stop": STOP_SEQS,
            "cache_prompt": True,
        }
        url = f"http://{HOST}:{PORT}/completion"
        buf = ""
        async with aiohttp.ClientSession() as sess:
            try:
                async with sess.post(
                        url, json=payload,
                        timeout=aiohttp.ClientTimeout(total=REQUEST_TIMEOUT)) as resp:
                    async for raw in resp.content:
                        if cancel.is_set():
                            log.info("LLM stream cancelled (barge-in)")
                            return
                        line = raw.decode("utf-8", errors="ignore").strip()
                        if not line.startswith("data:"):
                            continue
                        line = line[len("data:"):].strip()
                        if not line or line == "[DONE]":
                            continue
                        try:
                            obj = json.loads(line)
                        except json.JSONDecodeError:
                            continue
                        token = obj.get("content", "")
                        if not token:
                            if obj.get("stop"):
                                break
                            continue
                        buf += token
                        if len(buf) >= CHUNK_MIN_CHARS and buf[-1] in CHUNK_DELIMS:
                            yield buf.strip()
                            buf = ""
            except asyncio.CancelledError:
                return
            except Exception as exc:
                log.warning("llama-server stream error: %s", exc)
                return
        if buf.strip():
            yield buf.strip()
    @staticmethod
    def _format_chatml_prompt(user_text: str, system_prompt: str) -> str:
        return (
            f"<|im_start|>system\n{system_prompt}<|im_end|>\n"
            f"<|im_start|>user\n{user_text}<|im_end|>\n"
            f"<|im_start|>assistant\n"
        )
    # ─── public streaming entry point ─────────────────────
    async def stream(self, user_text: str, system_prompt: str,
                     cancel: asyncio.Event) -> AsyncIterator[str]:
        """Yield sentence-sized text chunks as the LLM generates.
        Chunk boundaries: any char in `CHUNK_DELIMS` AND buffer length
        ≥ `CHUNK_MIN_CHARS`. The final buffer is flushed on completion
        even without a delimiter. If `cancel` is set, the request is
        aborted and the generator returns.
        """
        if BACKEND == "ollama":
            async for chunk in self._stream_ollama(user_text, system_prompt, cancel):
                yield chunk
        elif BACKEND == "llama_cpp":
            async for chunk in self._stream_llama_cpp(user_text, system_prompt, cancel):
                yield chunk
        else:
            raise RuntimeError(f"unknown llm.backend: {BACKEND!r}")
--- a/local/script.py
+++ b/local/script.py
@ -0,0 +1,259 @@
 """LocalBrain — fully on-device voice pipeline.
 Implements the same contract as `gemini/script.py:GeminiBrain` so
 `voice/sanad_voice.py` can swap it in via `SANAD_VOICE_BRAIN=local`.
 Wires together four subsystems:
  Phase 1 — Silero VAD (mic → speech boundaries)
  Phase 2 — faster-whisper (speech → text)
  Phase 3 — llama.cpp + Qwen (text → streaming text chunks)
  Phase 4 — CosyVoice2 streaming (text chunk → cloned-voice audio)
  Phase 5 — barge-in (user speaks → cancel LLM + stop speaker)
  Phase 6 — stability — model load fails cleanly, crashes are logged.
 Async structure:
  run() is the main coroutine. It spawns three tasks:
    _mic_task       — reads mic, VAD, Whisper, pushes user text to _llm_queue
    _dialogue_task  — pops user text, streams LLM tokens into _tts_queue
    _tts_task       — pops text chunks, synthesises, feeds the speaker
 Logging contract (matched by local/subprocess.py._track_line):
    "connecting to local pipeline"
    "listening"
    "USER: <text>"
    "BOT: <text>"
    "BARGE-IN (local)"
    "session error: <msg>"
 """
 from __future__ import annotations
 import asyncio
 import time
 from typing import Optional
 from Project.Sanad.core.config_loader import section as _cfg_section
 from Project.Sanad.core.logger import get_logger
 from Project.Sanad.local.llm import LlamaServer
 from Project.Sanad.local.stt import WhisperSTT
 from Project.Sanad.local.tts import CosyVoiceTTS
 from Project.Sanad.local.vad import SileroVAD, FRAME_SAMPLES
 log = get_logger("local_brain")
 _CFG_SV = _cfg_section("voice", "sanad_voice")
 _CHUNK_BYTES = FRAME_SAMPLES * 2  # int16 mono
 class LocalBrain:
    """Fully on-device Gemini replacement."""
    def __init__(self, audio_io, recorder, voice_name: Optional[str] = None,
                 system_prompt: str = ""):
        self._audio = audio_io
        self._mic = audio_io.mic
        self._speaker = audio_io.speaker
        self._recorder = recorder
        self._voice = voice_name
        self._system_prompt = system_prompt
        # subsystems — instantiated here, loaded in run()
        self._vad = SileroVAD()
        self._stt = WhisperSTT()
        self._llm = LlamaServer()
        self._tts = CosyVoiceTTS()
        # pipeline queues
        self._llm_queue: asyncio.Queue[str] = asyncio.Queue(maxsize=4)
        self._tts_queue: asyncio.Queue[str] = asyncio.Queue(maxsize=4)
        # control flags
        self._stop_flag = asyncio.Event()         # full shutdown
        self._interrupt = asyncio.Event()         # per-turn barge-in
        self._speaking = False
        self._speak_start_time = 0.0
    # ─── lifecycle ────────────────────────────────────────
    def stop(self) -> None:
        self._stop_flag.set()
        self._interrupt.set()
    async def run(self) -> None:
        """Main entry. Loads models, runs pipeline, handles shutdown."""
        log.info("connecting to local pipeline")
        try:
            await asyncio.to_thread(self._vad.start)
            await asyncio.to_thread(self._stt.start)
            await asyncio.to_thread(self._llm.start)
            await asyncio.to_thread(self._tts.start)
        except Exception as exc:
            log.error("session error: local pipeline startup failed — %s", exc)
            return
        log.info("listening")
        try:
            await asyncio.gather(
                self._mic_task(),
                self._dialogue_task(),
                self._tts_task(),
            )
        except asyncio.CancelledError:
            log.info("cancelled — stopping")
        except Exception as exc:
            log.error("session error: %s", exc)
        finally:
            try:
                self._llm.stop()
            except Exception:
                log.warning("LlamaServer.stop failed", exc_info=True)
            self._tts.stop()
            self._stt.stop()
            self._vad.stop()
            log.info("local pipeline stopped")
    # ─── barge-in ─────────────────────────────────────────
    def _begin_barge_in(self) -> None:
        """Called from mic task when user starts speaking while bot is."""
        if not self._speaking:
            return
        log.info("BARGE-IN (local)")
        self._interrupt.set()
        try:
            self._speaker.stop()
        except Exception:
            log.warning("speaker.stop during barge-in failed", exc_info=True)
        # drain pipelines — discard any pending LLM/TTS chunks for this turn
        self._drain_queue(self._llm_queue)
        self._drain_queue(self._tts_queue)
        self._speaking = False
        try:
            self._recorder.finish_turn()
        except Exception:
            pass
    @staticmethod
    def _drain_queue(q: asyncio.Queue) -> None:
        try:
            while True:
                q.get_nowait()
                q.task_done()
        except asyncio.QueueEmpty:
            pass
    # ─── Task 1: mic → VAD → Whisper → LLM queue ──────────
    async def _mic_task(self) -> None:
        loop = asyncio.get_event_loop()
        while not self._stop_flag.is_set():
            try:
                pcm = await loop.run_in_executor(
                    None, self._mic.read_chunk, _CHUNK_BYTES,
                )
            except Exception:
                await asyncio.sleep(0.01)
                continue
            event = self._vad.process(pcm)
            if event == "speech_start":
                # user started talking — if bot is speaking, it's a barge-in
                if self._speaking:
                    self._begin_barge_in()
            elif event == "speech_end":
                utt = self._vad.collected_audio()
                if not utt:
                    continue
                try:
                    self._recorder.capture_user(utt)
                except Exception:
                    pass
                text = await loop.run_in_executor(None, self._stt.transcribe, utt)
                if not text:
                    continue
                log.info("USER: %s", text)
                try:
                    self._recorder.add_user_text(text)
                except Exception:
                    pass
                # wake the LLM side — drop older pending item if full (latency > throughput)
                if self._llm_queue.full():
                    try:
                        self._llm_queue.get_nowait()
                    except asyncio.QueueEmpty:
                        pass
                await self._llm_queue.put(text)
    # ─── Task 2: LLM streaming → TTS queue ────────────────
    async def _dialogue_task(self) -> None:
        while not self._stop_flag.is_set():
            try:
                user_text = await asyncio.wait_for(
                    self._llm_queue.get(), timeout=0.2)
            except asyncio.TimeoutError:
                continue
            self._interrupt.clear()
            full_response = []
            async for chunk in self._llm.stream(
                    user_text, self._system_prompt, self._interrupt):
                if self._interrupt.is_set():
                    break
                full_response.append(chunk)
                await self._tts_queue.put(chunk)
            self._llm_queue.task_done()
            if full_response and not self._interrupt.is_set():
                bot_text = " ".join(full_response).strip()
                if bot_text:
                    log.info("BOT: %s", bot_text)
                    try:
                        self._recorder.add_robot_text(bot_text)
                    except Exception:
                        pass
    # ─── Task 3: TTS → speaker ────────────────────────────
    async def _tts_task(self) -> None:
        loop = asyncio.get_event_loop()
        while not self._stop_flag.is_set():
            try:
                chunk_text = await asyncio.wait_for(
                    self._tts_queue.get(), timeout=0.2)
            except asyncio.TimeoutError:
                # idle — if we've been speaking and queue drained, close stream
                if self._speaking and self._llm_queue.empty() and self._tts_queue.empty():
                    await loop.run_in_executor(None, self._speaker.wait_finish)
                    self._speaking = False
                    log.info("listening")
                    try:
                        self._recorder.finish_turn()
                    except Exception:
                        pass
                continue
            if self._interrupt.is_set():
                self._tts_queue.task_done()
                continue
            # synthesise this text chunk → stream to speaker
            if not self._speaking:
                await loop.run_in_executor(None, self._speaker.begin_stream)
                self._speaking = True
                self._speak_start_time = time.time()
            try:
                for pcm in self._tts.synthesize_stream(chunk_text):
                    if self._interrupt.is_set():
                        break
                    try:
                        self._recorder.capture_robot(pcm)
                    except Exception:
                        pass
                    await loop.run_in_executor(
                        None, self._speaker.send_chunk,
                        pcm, self._tts.output_rate,
                    )
            except Exception as exc:
                log.warning("TTS chunk failed: %s", exc)
            finally:
                self._tts_queue.task_done()
--- a/local/stt.py
+++ b/local/stt.py
@ -0,0 +1,96 @@
 """faster-whisper Large V3 Turbo — GPU INT8 transcription.
 Phase 2 of the local pipeline. Given an utterance (int16 PCM bytes at
 16 kHz), returns transcribed text. Short / empty / no-speech results are
 filtered out per config thresholds to avoid firing phantom triggers.
 Install (on the robot, in the `local` env):
    pip install faster-whisper==1.0.*
    # model auto-downloads from HuggingFace on first `WhisperModel(...)` call,
    # OR pre-download to model/local/faster-whisper-large-v3-turbo/ and point
    # `local.stt.model_subdir` at it.
 """
 from __future__ import annotations
 from typing import Optional
 import numpy as np
 from Project.Sanad.config import MODEL_DIR
 from Project.Sanad.core.config_loader import section as _cfg_section
 from Project.Sanad.core.logger import get_logger
 log = get_logger("local_stt")
 _CFG = _cfg_section("local", "stt")
 MODEL_NAME = _CFG.get("model_name", "large-v3-turbo")
 MODEL_SUBDIR = _CFG.get("model_subdir", "faster-whisper-large-v3-turbo")
 DEVICE = _CFG.get("device", "cuda")
 COMPUTE_TYPE = _CFG.get("compute_type", "int8_float16")
 BEAM_SIZE = _CFG.get("beam_size", 1)
 LANGUAGE = _CFG.get("language")          # None = auto-detect
 VAD_FILTER = _CFG.get("vad_filter", False)
 NO_SPEECH_THRESHOLD = _CFG.get("no_speech_threshold", 0.6)
 MIN_CHARS = _CFG.get("min_utterance_chars", 2)
 TEMPERATURE = _CFG.get("temperature", 0.0)
 LOCAL_MODEL_DIR = MODEL_DIR / "local" / MODEL_SUBDIR
 class WhisperSTT:
    """Thin wrapper around faster_whisper.WhisperModel."""
    def __init__(self) -> None:
        self._model = None
    def start(self) -> None:
        """Load the model into VRAM. ~4 s on first call, 100 ms after."""
        try:
            from faster_whisper import WhisperModel
        except ImportError as exc:
            raise RuntimeError(
                f"WhisperSTT requires 'faster-whisper': {exc}"
            )
        model_src = str(LOCAL_MODEL_DIR) if LOCAL_MODEL_DIR.exists() else MODEL_NAME
        log.info("loading Whisper: src=%s device=%s compute=%s",
                 model_src, DEVICE, COMPUTE_TYPE)
        self._model = WhisperModel(
            model_src,
            device=DEVICE,
            compute_type=COMPUTE_TYPE,
        )
        log.info("WhisperSTT ready")
    def transcribe(self, pcm: bytes) -> str:
        """Blocking transcription. Returns the full text or ''."""
        if self._model is None:
            log.warning("WhisperSTT.transcribe called before start()")
            return ""
        if not pcm:
            return ""
        audio = np.frombuffer(pcm, dtype=np.int16).astype(np.float32) / 32768.0
        if audio.size == 0:
            return ""
        try:
            segments, info = self._model.transcribe(
                audio,
                beam_size=BEAM_SIZE,
                language=LANGUAGE,
                vad_filter=VAD_FILTER,
                no_speech_threshold=NO_SPEECH_THRESHOLD,
                temperature=TEMPERATURE,
            )
            text = " ".join(seg.text.strip() for seg in segments).strip()
        except Exception as exc:
            log.warning("Whisper transcribe failed: %s", exc)
            return ""
        if len(text) < MIN_CHARS:
            log.debug("drop short transcript: %r", text)
            return ""
        return text
    def stop(self) -> None:
        self._model = None
--- a/local/subprocess.py
+++ b/local/subprocess.py
@ -0,0 +1,261 @@
 """Local live subprocess supervisor.
 Spawns `voice/sanad_voice.py` as a managed child with
 `SANAD_VOICE_BRAIN=local`, tails the child's stdout, and extracts state
 transitions + user transcripts from the log markers emitted by
 `local/script.py:LocalBrain`.
 Mirror of `gemini/subprocess.py`. Lives separately so the two supervisors
 stay decoupled — adding a new model does not touch this file.
 """
 from __future__ import annotations
 import os
 import signal
 import subprocess
 import sys
 import threading
 from collections import deque
 from datetime import datetime
 from pathlib import Path
 from typing import Any
 from Project.Sanad.config import BASE_DIR, LOGS_DIR, SCRIPTS_DIR, LIVE_TUNE
 from Project.Sanad.core.config_loader import section as _cfg_section
 from Project.Sanad.core.logger import get_logger
 log = get_logger("local_subprocess")
 _LS_CFG = _cfg_section("local", "subprocess")
 def _resolve_live_script() -> Path:
    """Locate the voice script to run as subprocess (same as Gemini's)."""
    override = os.environ.get("SANAD_LIVE_SCRIPT", "").strip()
    if override:
        p = Path(override).expanduser()
        if p.exists():
            return p
    for c in (BASE_DIR / "voice" / "sanad_voice.py",
              SCRIPTS_DIR / "sanad_voice.py"):
        if c.exists():
            return c
    return SCRIPTS_DIR / "sanad_voice.py"
 LIVE_SCRIPT = _resolve_live_script()
 LOG_TAIL_SIZE = _LS_CFG.get("log_tail_size", 2000)
 TRANSCRIPT_TAIL_SIZE = _LS_CFG.get("transcript_tail_size", 30)
 LIVE_LOG_DIR = LOGS_DIR
 LIVE_LOG_NAME = _LS_CFG.get("log_name", "local_subprocess")
 # Python binary for the child process. The local pipeline runs in a
 # separate conda env (Python 3.8 + Jetson CUDA torch + CosyVoice/Whisper);
 # the dashboard stays in gemini_sdk (Python 3.10). Override with
 # SANAD_LOCAL_PYTHON env var at runtime.
 LOCAL_PYTHON_BIN = os.environ.get(
    "SANAD_LOCAL_PYTHON",
    _LS_CFG.get("python_bin", sys.executable),
 )
 _STOP_TIMEOUT_SEC = _LS_CFG.get("stop_timeout_sec", 5.0)
 _TERMINATE_TIMEOUT_SEC = _LS_CFG.get("terminate_timeout_sec", 3.0)
 _NOISY_PREFIXES = tuple(_LS_CFG.get("noisy_prefixes", [
    "ALSA lib ", "Expression 'alsa_", "Cannot connect to server socket",
    "jack server is not running",
 ]))
 _NOISY_FRAGMENTS = tuple(_LS_CFG.get("noisy_fragments", [
    "Unknown PCM", "Evaluate error", "snd_pcm_open_noupdate", "PaAlsaStream",
 ]))
 class LocalSubprocess:
    def __init__(self):
        self._lock = threading.Lock()
        self.process: subprocess.Popen | None = None
        self.log_tail: deque[str] = deque(maxlen=LOG_TAIL_SIZE)
        self.user_transcript: deque[str] = deque(maxlen=TRANSCRIPT_TAIL_SIZE)
        self._reader_thread: threading.Thread | None = None
        self._log_file = None
        self.state = "stopped"
        self.state_message = "Idle."
        self.last_user_text = ""
        self.suppressed_noise = 0
    # ─── log I/O ──────────────────────────────────────────
    def _open_session_log(self, pid: int):
        try:
            LIVE_LOG_DIR.mkdir(parents=True, exist_ok=True)
            fname = f"{LIVE_LOG_NAME}_{datetime.now().strftime('%Y%m%d')}.log"
            fh = open(LIVE_LOG_DIR / fname, "a", encoding="utf-8", buffering=1)
            fh.write(
                f"\n===== local subprocess start "
                f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} pid={pid} =====\n"
            )
            return fh
        except Exception as exc:
            log.warning("Could not open local subprocess log file: %s", exc)
            return None
    def _is_noisy(self, line: str) -> bool:
        return line.startswith(_NOISY_PREFIXES) or any(f in line for f in _NOISY_FRAGMENTS)
    def _set_state(self, state: str, msg: str):
        self.state = state
        self.state_message = msg
    def _track_line(self, line: str):
        """Parse log markers emitted by `local/script.py:LocalBrain`.
        Must stay in lock-step with the `log.info(...)` strings there.
        """
        if "connecting to local pipeline" in line:
            self._set_state("connecting", line)
        elif " USER: " in line or line.strip().startswith("USER:"):
            text = line.split("USER:", 1)[1].strip()
            if text:
                self.last_user_text = text
                self.user_transcript.append(text)
                self._set_state("hearing", f"User: {text}")
        elif " BOT: " in line or line.strip().startswith("BOT:"):
            self._set_state("speaking", line.split("BOT:", 1)[1].strip()[:80])
        elif "BARGE-IN (local)" in line:
            self._set_state("interrupting", line)
        elif "session error" in line:
            self._set_state("error", line)
        elif "local pipeline stopped" in line or "cancelled — stopping" in line:
            self._set_state("stopped", line)
        elif "listening" in line.lower() and "no speech" not in line:
            self._set_state("listening", "Listening for speech.")
    def _reader_loop(self):
        proc = self.process
        if proc is None or proc.stdout is None:
            return
        fh = self._open_session_log(proc.pid)
        self._log_file = fh
        for line in proc.stdout:
            clean = line.rstrip()
            if not clean:
                continue
            if fh is not None:
                try:
                    fh.write(clean + "\n")
                except Exception:
                    pass
            with self._lock:
                if self._is_noisy(clean):
                    self.suppressed_noise += 1
                    continue
                self.log_tail.append(clean)
                self._track_line(clean)
        with self._lock:
            self.log_tail.append("Local pipeline process exited.")
            self._set_state("stopped", "Process exited.")
        if fh is not None:
            try:
                fh.write(
                    f"===== local subprocess exit "
                    f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} =====\n"
                )
                fh.close()
            except Exception:
                pass
            self._log_file = None
    # ─── lifecycle ────────────────────────────────────────
    def is_running(self) -> bool:
        with self._lock:
            return self.process is not None and self.process.poll() is None
    def start(self) -> dict[str, Any]:
        with self._lock:
            if self.process is not None and self.process.poll() is None:
                return {"started": False, "message": "Already running.", "pid": self.process.pid}
            self._set_state("starting", "Starting local pipeline (loading models)...")
        script = LIVE_SCRIPT
        if not script.exists():
            raise RuntimeError(f"Script not found: {script}")
        env = os.environ.copy()
        env.update({
            "PYTHONUNBUFFERED": "1",
            **LIVE_TUNE,
            "SANAD_VOICE_BRAIN": "local",
        })
        dds_iface = env.get("SANAD_DDS_INTERFACE", "eth0")
        # Use the `local` env's Python so CUDA torch + CosyVoice are available.
        # Fall back to sys.executable only if the configured bin doesn't exist.
        py_bin = LOCAL_PYTHON_BIN
        if not Path(py_bin).exists():
            log.warning("LOCAL_PYTHON_BIN=%s not found, falling back to %s",
                        py_bin, sys.executable)
            py_bin = sys.executable
        cmd = [py_bin, str(script), dds_iface]
        proc = subprocess.Popen(
            cmd,
            cwd=str(script.parent),
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
            text=True,
            bufsize=1,
            env=env,
        )
        with self._lock:
            self.process = proc
            self.log_tail.append(f"Started: pid={proc.pid}")
            self._set_state("starting", f"pid={proc.pid}")
            self._reader_thread = threading.Thread(target=self._reader_loop, daemon=True)
            self._reader_thread.start()
        log.info("Local subprocess started: pid=%d", proc.pid)
        return {"started": True, "pid": proc.pid}
    def stop(self) -> dict[str, Any]:
        with self._lock:
            proc = self.process
            if proc is None or proc.poll() is not None:
                return {"stopped": False, "message": "Not running."}
            self._set_state("stopping", "Stopping...")
        try:
            proc.send_signal(signal.SIGINT)
            proc.wait(timeout=_STOP_TIMEOUT_SEC)
        except subprocess.TimeoutExpired:
            proc.terminate()
            try:
                proc.wait(timeout=_TERMINATE_TIMEOUT_SEC)
            except subprocess.TimeoutExpired:
                proc.kill()
                proc.wait(timeout=_TERMINATE_TIMEOUT_SEC)
        rc = proc.returncode
        with self._lock:
            self.process = None
            self.log_tail.append("Stopped.")
            self._set_state("stopped", "Stopped.")
        log.info("Local subprocess stopped (rc=%s)", rc)
        return {"stopped": True, "returncode": rc}
    def status(self) -> dict[str, Any]:
        with self._lock:
            running = self.process is not None and self.process.poll() is None
            return {
                "running": running,
                "pid": self.process.pid if running and self.process else None,
                "state": self.state,
                "state_message": self.state_message,
                "last_user_text": self.last_user_text,
                "user_transcript": list(self.user_transcript),
                "log_tail": list(self.log_tail),
                "suppressed_noise": self.suppressed_noise,
            }
--- a/local/tts.py
+++ b/local/tts.py
@ -0,0 +1,126 @@
 """CosyVoice2 0.5B streaming TTS — GPU.
 Phase 4 of the local pipeline. Holds a 3-second reference WAV in VRAM
 and synthesises streaming Arabic/English audio for every text chunk
 arriving from the LLM. Emits int16 PCM at the model's native rate
 (CosyVoice2 outputs 22 050 Hz — we resample to `sample_rate` from
 config so the downstream `audio_io.speaker` gets a consistent rate).
 Install (on the robot):
    cd ~/src
    git clone --recursive https://github.com/FunAudioLLM/CosyVoice
    cd CosyVoice
    pip install -r requirements.txt
    pip install -e .
    # model + reference voice
    huggingface-cli download FunAudioLLM/CosyVoice2-0.5B \\
        --local-dir ~/sanad/model/local/CosyVoice2-0.5B
    # place a 3-s Khaleeji clip at model/local/khaleeji_reference_3s.wav
    # (16 kHz mono int16 WAV)
 API note:
    CosyVoice2 is evolving. We use the published `inference_zero_shot`
    with `stream=True` which yields `{"tts_speech": tensor}` chunks.
    If the upstream API renames, adapt in one place — `TtsEngine._stream`.
 """
 from __future__ import annotations
 from pathlib import Path
 from typing import AsyncIterator, Iterator, Optional
 import numpy as np
 from Project.Sanad.config import MODEL_DIR
 from Project.Sanad.core.config_loader import section as _cfg_section
 from Project.Sanad.core.logger import get_logger
 log = get_logger("local_tts")
 _CFG = _cfg_section("local", "tts")
 MODEL_SUBDIR = _CFG.get("model_subdir", "CosyVoice2-0.5B")
 REFERENCE_WAV_SUBDIR = _CFG.get("reference_wav_subdir", "khaleeji_reference_3s.wav")
 REFERENCE_PROMPT = _CFG.get("reference_prompt", "")
 OUT_RATE = int(_CFG.get("sample_rate", 16000))
 QUEUE_MAX = int(_CFG.get("queue_max", 3))
 DEVICE = _CFG.get("device", "cuda")
 LOCAL_MODEL_DIR = MODEL_DIR / "local" / MODEL_SUBDIR
 REFERENCE_WAV_PATH = MODEL_DIR / "local" / REFERENCE_WAV_SUBDIR
 def _resample_int16(pcm: np.ndarray, src_rate: int, dst_rate: int) -> np.ndarray:
    if src_rate == dst_rate or pcm.size == 0:
        return pcm.astype(np.int16, copy=False)
    target_len = max(1, int(len(pcm) * dst_rate / src_rate))
    return np.interp(
        np.linspace(0, len(pcm), target_len, endpoint=False),
        np.arange(len(pcm)),
        pcm.astype(np.float64),
    ).astype(np.int16)
 class CosyVoiceTTS:
    """Thin async wrapper around CosyVoice2 streaming inference."""
    def __init__(self) -> None:
        self._model = None
        self._ref_speech = None       # preloaded reference tensor
        self._ref_prompt = REFERENCE_PROMPT
        self._model_rate: int = 22050
    def start(self) -> None:
        try:
            from cosyvoice.cli.cosyvoice import CosyVoice2
            from cosyvoice.utils.file_utils import load_wav
        except ImportError as exc:
            raise RuntimeError(
                f"CosyVoiceTTS requires the CosyVoice package from source: {exc}"
            )
        if not LOCAL_MODEL_DIR.exists():
            raise RuntimeError(f"CosyVoice2 model not found at {LOCAL_MODEL_DIR}")
        if not REFERENCE_WAV_PATH.exists():
            raise RuntimeError(
                f"Reference voice WAV not found at {REFERENCE_WAV_PATH}"
            )
        log.info("loading CosyVoice2: %s", LOCAL_MODEL_DIR)
        self._model = CosyVoice2(str(LOCAL_MODEL_DIR), load_jit=True, fp16=True)
        # model.sample_rate is an instance attr on CosyVoice2
        self._model_rate = getattr(self._model, "sample_rate", 22050)
        self._ref_speech = load_wav(str(REFERENCE_WAV_PATH), 16000)
        log.info("CosyVoiceTTS ready (model_rate=%d)", self._model_rate)
    def synthesize_stream(self, text: str) -> Iterator[bytes]:
        """Yield int16 PCM bytes at OUT_RATE, one streaming chunk at a time."""
        if self._model is None or self._ref_speech is None:
            return
        try:
            # CosyVoice2 streaming generator. Each step yields a tensor
            # of float32 waveform samples at the model's native rate.
            for step in self._model.inference_zero_shot(
                    text,
                    self._ref_prompt,
                    self._ref_speech,
                    stream=True):
                wave = step.get("tts_speech")
                if wave is None:
                    continue
                # tensor → float32 numpy → int16 at OUT_RATE
                arr = wave.cpu().numpy().squeeze()
                if arr.size == 0:
                    continue
                pcm_i16 = np.clip(arr * 32767.0, -32768, 32767).astype(np.int16)
                if self._model_rate != OUT_RATE:
                    pcm_i16 = _resample_int16(pcm_i16, self._model_rate, OUT_RATE)
                yield pcm_i16.tobytes()
        except Exception as exc:
            log.warning("TTS synth failed for chunk %r: %s", text[:40], exc)
    def stop(self) -> None:
        self._model = None
        self._ref_speech = None
    @property
    def output_rate(self) -> int:
        return OUT_RATE
--- a/local/vad.py
+++ b/local/vad.py
@ -0,0 +1,150 @@
 """Silero VAD wrapper — CPU-only speech boundary detection.
 Phase 1 of the local pipeline. Consumes 16 kHz mono int16 PCM in short
 frames, emits speech_start / speech_end events. All thresholds + frame
 sizes come from config/local_config.json > vad.
 Install (on the robot):
    pip install silero-vad torch==2.2.* torchaudio==2.2.*
 Usage:
    vad = SileroVAD()
    vad.start()
    evt = vad.process(pcm_bytes)
    if evt == 'speech_start': ...
    elif evt == 'speech_end':  buf = vad.collected_audio()
 """
 from __future__ import annotations
 import time
 from typing import Optional
 import numpy as np
 from Project.Sanad.core.config_loader import section as _cfg_section
 from Project.Sanad.core.logger import get_logger
 log = get_logger("local_vad")
 _CFG = _cfg_section("local", "vad")
 SAMPLE_RATE = _CFG.get("sample_rate", 16000)
 FRAME_MS = _CFG.get("frame_ms", 32)
 THRESHOLD = _CFG.get("threshold", 0.55)
 MIN_SILENCE_MS = _CFG.get("min_silence_ms", 400)
 MIN_SPEECH_MS = _CFG.get("min_speech_ms", 250)
 PAD_START_MS = _CFG.get("pad_start_ms", 200)
 PAD_END_MS = _CFG.get("pad_end_ms", 200)
 FRAME_SAMPLES = SAMPLE_RATE * FRAME_MS // 1000  # 512 @ 16k/32ms
 class SileroVAD:
    """Streaming VAD with buffered utterance capture.
    Fed one mic frame at a time via `process()`. Internal state tracks
    whether we're inside an utterance; on speech_end, `collected_audio()`
    returns the full utterance (with configured padding).
    """
    def __init__(self) -> None:
        self._model = None
        self._audio_buf: list[bytes] = []   # utterance being collected
        self._pre_buf: list[bytes] = []     # rolling "pre-speech" ring
        self._pre_frames = max(1, PAD_START_MS // FRAME_MS)
        self._pad_end_frames = max(1, PAD_END_MS // FRAME_MS)
        self._in_speech = False
        self._last_speech_time = 0.0
        self._speech_start_time = 0.0
        self._trailing_silence_frames = 0
        self._last_utterance: Optional[bytes] = None
    def start(self) -> None:
        """Load the Silero model once. Call before `process()`."""
        try:
            import torch
            from silero_vad import load_silero_vad
        except ImportError as exc:
            raise RuntimeError(
                f"SileroVAD requires 'silero-vad' + torch: {exc}"
            )
        self._model = load_silero_vad()
        log.info("SileroVAD ready (threshold=%.2f, frame=%dms)",
                 THRESHOLD, FRAME_MS)
    def process(self, pcm: bytes) -> Optional[str]:
        """Feed one frame (≈ FRAME_MS of audio). Returns an event or None.
        Events: 'speech_start' | 'speech_end' | None
        """
        if self._model is None:
            return None
        # keep a rolling pre-buffer so captured utterances include lead-in
        self._pre_buf.append(pcm)
        if len(self._pre_buf) > self._pre_frames:
            self._pre_buf.pop(0)
        # VAD expects float32 in [-1, 1]
        arr = np.frombuffer(pcm, dtype=np.int16).astype(np.float32) / 32768.0
        if arr.size < FRAME_SAMPLES:
            # pad if short tail chunk arrived
            arr = np.concatenate([arr, np.zeros(FRAME_SAMPLES - arr.size, dtype=np.float32)])
        elif arr.size > FRAME_SAMPLES:
            arr = arr[:FRAME_SAMPLES]
        try:
            import torch
            with torch.no_grad():
                prob = float(self._model(torch.from_numpy(arr), SAMPLE_RATE).item())
        except Exception as exc:
            log.warning("VAD inference failed: %s", exc)
            return None
        now = time.time()
        is_speech = prob >= THRESHOLD
        if is_speech:
            self._trailing_silence_frames = 0
            self._last_speech_time = now
            if not self._in_speech:
                # transition → speech
                self._in_speech = True
                self._speech_start_time = now
                self._audio_buf = list(self._pre_buf)  # seed with pad
                self._audio_buf.append(pcm)
                return "speech_start"
            self._audio_buf.append(pcm)
            return None
        # silent frame
        if self._in_speech:
            self._audio_buf.append(pcm)  # collect trailing pad
            self._trailing_silence_frames += 1
            silence_ms = self._trailing_silence_frames * FRAME_MS
            if silence_ms >= MIN_SILENCE_MS:
                # speech ended — validate min_speech
                speech_dur_ms = (now - self._speech_start_time) * 1000
                self._in_speech = False
                if speech_dur_ms < MIN_SPEECH_MS:
                    log.debug("drop short utterance (%.0fms)", speech_dur_ms)
                    self._audio_buf.clear()
                    self._last_utterance = None
                    return None
                self._last_utterance = b"".join(self._audio_buf)
                self._audio_buf.clear()
                return "speech_end"
        return None
    def collected_audio(self) -> Optional[bytes]:
        """After a speech_end event, return the full utterance bytes."""
        return self._last_utterance
    def reset(self) -> None:
        """Drop any in-flight utterance (used on barge-in)."""
        self._in_speech = False
        self._audio_buf.clear()
        self._trailing_silence_frames = 0
        self._last_utterance = None
    def stop(self) -> None:
        self._model = None
--- a/main.py
+++ b/main.py
@ -11,6 +11,7 @@ from __future__ import annotations
 import argparse
 import importlib
 import os
 import sys
 import types
 from pathlib import Path
@ -92,8 +93,9 @@ LocalTTSEngine       = _safe_import("LocalTTSEngine",       lambda: __import__("
 WakePhraseManager    = _safe_import("WakePhraseManager",    lambda: __import__("Project.Sanad.voice.wake_phrase_manager", fromlist=["WakePhraseManager"]).WakePhraseManager)
 LiveVoiceLoop        = _safe_import("LiveVoiceLoop",        lambda: __import__("Project.Sanad.voice.live_voice_loop", fromlist=["LiveVoiceLoop"]).LiveVoiceLoop)
 TypedReplayEngine    = _safe_import("TypedReplayEngine",    lambda: __import__("Project.Sanad.voice.typed_replay", fromlist=["TypedReplayEngine"]).TypedReplayEngine)
-GeminiVoiceClient    = _safe_import("GeminiVoiceClient",    lambda: __import__("Project.Sanad.voice.gemini_client", fromlist=["GeminiVoiceClient"]).GeminiVoiceClient)
+GeminiVoiceClient    = _safe_import("GeminiVoiceClient",    lambda: __import__("Project.Sanad.gemini.client", fromlist=["GeminiVoiceClient"]).GeminiVoiceClient)
-LiveGeminiSubprocess = _safe_import("LiveGeminiSubprocess", lambda: __import__("Project.Sanad.voice.live_gemini_subprocess", fromlist=["LiveGeminiSubprocess"]).LiveGeminiSubprocess)
+GeminiSubprocess     = _safe_import("GeminiSubprocess",     lambda: __import__("Project.Sanad.gemini.subprocess", fromlist=["GeminiSubprocess"]).GeminiSubprocess)
 LocalSubprocess      = _safe_import("LocalSubprocess",      lambda: __import__("Project.Sanad.local.subprocess",  fromlist=["LocalSubprocess"]).LocalSubprocess)
 # ── global instances (imported by route modules) ──
@ -108,7 +110,15 @@ macro_rec     = _safe_construct("macro_rec",    (lambda: MacroRecorder(arm)) if
 macro_play    = _safe_construct("macro_play",   (lambda: MacroPlayer(audio_mgr, arm)) if (MacroPlayer and arm) else None)
 teacher       = _safe_construct("teacher",      (lambda: TeachingSession(arm)) if (TeachingSession and arm) else None)
 live_voice    = _safe_construct("live_voice",   (lambda: LiveVoiceLoop(voice_client, arm, wake_mgr, audio_mgr)) if (LiveVoiceLoop and voice_client and arm and wake_mgr and audio_mgr) else None)
-live_sub      = _safe_construct("live_sub",     LiveGeminiSubprocess)
+# Which voice supervisor to mount. SANAD_VOICE_BRAIN chooses the brain
 # that runs INSIDE the subprocess (see voice/sanad_voice.py); the same
 # env var picks WHICH supervisor here manages that subprocess so its
 # log-line parser matches the brain's emit format.
 _brain_choice = os.environ.get("SANAD_VOICE_BRAIN", "gemini").strip().lower()
 if _brain_choice == "local" and LocalSubprocess is not None:
    live_sub  = _safe_construct("live_sub", LocalSubprocess)
 else:
    live_sub  = _safe_construct("live_sub", GeminiSubprocess)
 typed_replay  = _safe_construct("typed_replay", (lambda: TypedReplayEngine(voice_client, audio_mgr)) if (TypedReplayEngine and voice_client and audio_mgr) else None)
 # Wire everything into the Brain (only what was constructed)
--- a/scripts/sanad_Ramdan.txt
+++ b/scripts/sanad_Ramdan.txt
@ -1,51 +0,0 @@
 أنت "بوسنده" — مساعد صوتي إماراتي ذكي تابع لروبوت شركة لوتاه تيك Lootah Tech.
 [أولاً: الروح الرمضانية والمعرفة]
    أنت على علم تام بأننا في شهر رمضان المبارك لعام 2026.
    لديك معرفة واسعة بالقرآن الكريم والأحاديث النبوية الشريفة للإجابة على أي سؤال ديني أو تقديم تذكيرات إيمانية بدقة مع ذكر المصدر.
    إذا سألك المستخدم عن آية، حديث، أو حكم صيام، جاوب بوقار وتبسيط بلهجتك الإماراتية.
 [ثانياً: الأسلوب واللغة (التبديل المرن)]
    تكلم باللهجة الإماراتية بشكل طبيعي بدون مبالغة.
    قاعدة التبديل الفوري: إذا استخدم المستخدم أي لغة ثانية في أي لحظة، غيّر فوراً ورد بنفس اللغة الجديدة في نفس الرد.
    إذا رجع المستخدم للعربي: أرجع فوراً للعربي (لهجة إماراتية).
    "آخر لغة كتب فيها المستخدم" هي اللغة اللي ترد فيها.
    ممنوع تخلط لغتين في نفس الرد إلا لطلب ترجمة أو مقارنة صريحة.
 [ثالثاً: التفاعل والبدايات]
    خلك محترم، ودود، ومباشر، وركّز على الزبدة والحل العملي.
    تنويع البداية: استخدم عبارات مثل (مبارك عليك الشهر، عساكم من عواده، تقبل الله طاعتكم، فالك طيب، أبشر بعزك، مرحبابك) ولا تكرر نفس العبارة مرتين متتاليتين.
    إذا كان السؤال تقني سريع أو كود، ابدأ مباشرة بدون مقدمات.
 [رابعاً: إنجاز المهام وقوة الذاكرة العمومية]
    حفظ واسترجاع: اعتبر كل كلمة، اسم، مسار، أو تفصيل تقوله بمثابة "أمر حفظ" وأولوية قصوى داخل المحادثة.
    الثوابت: تعامل مع معلوماتك وتفضيلاتك وتصحيحاتك كأنها ثوابت محفورة في الذاكرة.
    عند التصحيح: إذا عدّلت لي معلومة، قل: "زين نبهتني يا الشيخ، انحفرت في الذاكرة".
 [خامساً: الأمان والخصوصية]
    إذا كتب المستخدم API key أو Password أو Token: نبهه فوراً يمسحه ويبدله.
    لا تطلب بيانات حساسة إلا للضرورة وبطريقة محترمة.
    ممنوع أي نكت أو محتوى حساس في الدين أو السياسة.
 [سادساً: السرعة والتكرار]
    جاوب بسرعة وباختصار (من 2 إلى 6 سطور غالباً).
    إذا طلب المستخدم "كرر" أو "repeat": أعد نفس الكلام بنفس اللغة الحالية وحرفياً إذا طلب ذلك.
--- a/scripts/sanad_azure_script.txt
+++ b/scripts/sanad_azure_script.txt
@ -1,269 +0,0 @@
 You are "Bousandah" (بوسنده) — a smart Emirati voice assistant and real-estate concierge. Your ONLY project knowledge is the “Azure by Lapis on Al Marjan Island, Ras Al Khaimah (RAK)” content provided below. You must interact with users using ONLY this knowledge base and the rules in this prompt.
 =================================
 1) STYLE & LANGUAGE (MANDATORY)
 =================================
 - If user speak Arabic → reply in friendly Emirati dialect (light, natural).
 - If user speak English → reply in clear modern English Emirati dialect (light, natural).
 - If user speak any other language → reply in that language as best as possible.
 - Rule: reply in the SAME language as the user’s last message.
 - Do NOT mix languages in the same reply unless the user asks for translation.
 Tone:
 - Friendly, confident, professional, not robotic.
 - Short answers by default (2–6 lines).
 - If user asks for details, give structured bullet points.
 Behavior:
 - If question is clear → answer directly.
 - If one missing detail is needed to answer correctly → ask ONLY ONE question.
 - Do not explain for more than 15 seconds (keep replies under 15 seconds).
 - Do not invent facts. If info is not in the Knowledge Base, say:
  "I don’t have that detail in the provided project info."
  Then offer what you CAN do from the provided info.
 Calls-to-action (use only when helpful, choose ONE):
 - Ask budget range
 - Ask unit preference (Studio / 1BR / 2BR / 3BR / Penthouse / Sky Villa)
 - Ask purpose (End-user vs Investor)
 =================================
 2) ROLE (MANDATORY)
 =================================
 You are a project specialist for Azure by Lapis.
 You can:
 - Explain the project and developer
 - Answer FAQs
 - Summarize payment plan / handover timeline
 - Compare unit types by size
 - Provide short sales scripts, WhatsApp replies, call scripts, captions, ad copy
 All outputs must be based ONLY on the Knowledge Base below.
 =================================
 3) KNOWLEDGE BASE (USE ONLY THIS)
 =================================
 -------------------------------------------------
 A) Project Header
 -------------------------------------------------
 Azure by Lapis on Al Marjan Island, RAK
 Resort-style living in Ras Al Khaimah’s coastal landmark — where ocean views, architectural elegance, and investment value unite.
 STARTING PRICE: AED 750K
 PAYMENT PLAN: 75/25
 HANDOVER: Q4 2028
 ROI POTENTIAL: Up to 9%
 -------------------------------------------------
 B) About LAPIS Properties
 -------------------------------------------------
 LAPIS Properties is an established real estate developer with over 20 years of expertise in delivering innovative, sustainable, and community-driven developments across the Middle East and Turkey.
 Famed for architectural precision and timeless aesthetics, LAPIS establishes spaces that blend functionality, design, and enduring value.
 -------------------------------------------------
 C) Overview of Azure by Lapis
 -------------------------------------------------
 Azure by Lapis is an architectural artwork redefining coastal living on Al Marjan Island, Ras Al Khaimah.
 Developed by LAPIS Properties — a name with over 20 years of innovation and craftsmanship across the Middle East and Turkey — Azure rises 40 storeys tall, blending serenity with sophistication.
 Designed to capture the horizon and enhance natural light, every home at Azure speaks of tranquility, privacy, and timeless design.
 Nearby / Drive times:
 - Al Hamra Mall — 1 min
 - 5-star Hotels (Waldorf, Sofitel, Ritz-Carlton) — 4 min
 - Al Hamra Golf Course — 5 min
 - RAK Free Zone — 5 min
 - RAK Beach — 5 min
 - Marjan Island Boulevard — 7 min
 - Jebel Jais — 45 min
 - Dubai — 1 hr
 -------------------------------------------------
 D) Building and Operational Variables
 -------------------------------------------------
 The tower configuration is a critical technical component of the brief.
 Azure is designed as a single, high-rise residential structure with the following specifications:
 Component / Quantity / Details
 - Residential Floors: 40 Floors — High-density luxury residential programming.
 - Podium Levels: 3 Podiums — Primary structural base providing elevated views.
 - Parking Ratio (1): 1 Space — Allocated to Studio, 1BR, and 2BR units.
 - Parking Ratio (2): 2 Spaces — Allocated to 3BR units, Penthouses, and Sky Villas.
 - Retail Component: Ground Floor — Designated for boutique retail and leisure outlets.
 - Ownership Status: Freehold — Open to all nationalities.
 - Completion Date: Q4 2028 — Targeted handover for investors.
 -------------------------------------------------
 E) Unit Typology and Space Allocation
 -------------------------------------------------
 Azure offers a diverse range of residences — from AED 750K (~407 sq.ft) studios to AED 9.5M (~5,100 sq.ft) expansive sky villas.
 Unit Type / Suite Area (ft2) / Balcony Area (ft2) / Total Area (ft2)
 - Studio (Normal): 333.47 / 239.82 / 573.29
 - Studio (Premium): 460.05 / 241.33 / 701.38
 - 1 Bedroom Apartment: 610.32 / 369.53 / 979.85
 - 2 Bedroom Apartment: 867.79 / 539.06 / 1,406.85
 - 3 Bedroom (Normal): 1,246.15 / 1,323.76 / 2,569.91
 - 3 Bedroom Duplex (GF): 2,417.59 / 2,788.74 / 5,206.33
 - 3 Bedroom Duplex (1stF): 2,417.59 / 2,788.74 / 5,206.33
 -------------------------------------------------
 F) FAQ about Azure by Lapis on Al Marjan Island
 -------------------------------------------------
 Q: Where is Azure by Lapis located?
 A: Azure is located on Al Marjan Island in Ras Al Khaimah — a beachfront destination minutes from Al Hamra Mall, Golf Course, and major 5-star resorts.
 Q: Who is the developer of Azure?
 A: Azure is developed by LAPIS Real Estate FZ-LLC, a regional developer with two decades of excellence across the Middle East and Turkey.
 Q: What types of properties are available at Azure?
 A: The tower includes Studios, 1–3 Bedroom Apartments, Penthouses, and Sky Villas with sea views.
 Q: What’s the starting price at Azure?
 A: Prices start from AED 750,000 for studios.
 Q: What payment plan is available at Azure?
 A: A 75/25 payment plan with 2 years post-handover.
 Q: When is handover scheduled for Azure?
 A: Handover is expected in Q4 2028.
 Q: What ROI to expect when investing in Azure?
 A: Investors can expect up to 9% ROI annually, supported by RAK’s growing hospitality and tourism sectors.
 Q: Is foreign ownership allowed through Azure by Lapis?
 A: Yes – Azure offers freehold ownership for all nationalities.
 Q: What amenities are available in Azure by Lapis?
 A: Azure by Lapis offers infinity pool, sky garden, spa, gyms, retail outlets, concierge, and kids’ play zones.
 Q: Are there healthcare facilities near Azure by Lapis?
 A: Yes – leading hospitals like RAK Hospital and Sheikh Khalifa Specialty Hospital are within 10 minutes.
 Q: Are there schools close to Azure by Lapis?
 A: Yes – RAK Academy, British School Al Hamra, and International School of Choueifat.
 Q: How is the transport connectivity around Azure by Lapis?
 A: Excellent — with quick access to RAK Airport (15 min), Dubai (1 hr), and major highways connecting the UAE.
 Q: Is Al Marjan Island a good place to live?
 A: Yes — it’s a peaceful island destination with direct beach access, five-star resorts, and exceptional lifestyle amenities.
 Q: Why invest in Azure by Lapis?
 A: The project offers high ROI potential, resort-style living, and prime beachfront investment on Al Marjan Island.
 -------------------------------------------------
 G) Regional Connectivity and Strategic Location
 -------------------------------------------------
 Explore life around Azure by Lapis:
 - Al Hamra Mall – 1 min
 - Al Hamra Golf Course – 5 min
 - Waldorf Astoria, Sofitel, Ritz-Carlton – 4 min
 - RAK Free Zone – 5 min
 - RAK Beach – 5 min
 - Marjan Island Boulevard – 7 min
 - Wynn Resort & Casino (2027) – nearby
 - Jebel Jais Adventure Peak – 45 min
 -------------------------------------------------
 H) Premier Healthcare
 -------------------------------------------------
 Leading hospitals and wellness centers nearby:
 - RAK Hospital – 10 min
 - Sheikh Khalifa Specialty Hospital – 12 min
 - RAK Medical Centre – Al Hamra – 5 min
 - Thumbay Clinic – 6 min
 -------------------------------------------------
 I) Top-Tier Education
 -------------------------------------------------
 Nearby schools and nurseries offer convenient access for families:
 - RAK Academy — 5 min
 - New British International School — 6 min
 - International School of Choueifat – RAK — 7 min
 - Little Treasures Nursery — 4 min
 - British School Al Hamra — 5 min
 -------------------------------------------------
 J) Signature Features & Resort-Style Amenities
 -------------------------------------------------
 Azure by Lapis reveals timeless living inspired by coastal tranquility.
 Every element — from ocean-facing glass façades to elevated sky gardens — is deliberately designed to evoke serenity and elegance.
 Residents access high-end wellness facilities and recreational spaces designed for a balanced and elegant lifestyle.
 Amenities list:
 - Infinity pool with sea vistas
 - Sky garden
 - Rooftop leisure deck
 - Outdoor & indoor gyms
 - Spa & sauna facilities
 - Kids’ pool
 - Outdoor play area
 - Outdoor play
 - Outdoor cinema
 - BBQ zone
 - Volleyball court
 - Jogging tracks
 - Landscaped parks
 - Fountains
 - Boutique retail
 - Dining spaces
 -------------------------------------------------
 K) Payment Plan
 -------------------------------------------------
 LAPIS proposes a highly flexible 75/25 plan with 2 years post-handover — aiming to attract both investors and end-users seeking long-term value on Al Marjan Island.
 - 75% During construction
 - 25% On Handover (Q4 2028)
 -------------------------------------------------
 L) Investment Potential
 -------------------------------------------------
 Azure by Lapis offers investors access to one of the UAE’s fastest-growing beachfront destinations.
 Ras Al Khaimah’s thriving tourism and hospitality sectors — supported by record-breaking visitor numbers and global developments like the Wynn Resort & Casino — yield Azure a high-performing coastal investment with up to 9% ROI potential.
 As of 2025:
 - ROI of up to 9% annually
 - 100% foreign ownership & 0% personal tax
 - 1 hour from Dubai International Airport
 - Eligible for UAE Residency by investment
 -------------------------------------------------
 M) lapis Leadership
 -------------------------------------------------
 - Emad Mohareb — Chairman
 - Wisam Mohareb — Vice Chairman
 - Khaled Owaidat — Chief Executive Officer
 - Bilal Khashan — Chief Operating Officer
 -------------------------------------------------
 N) Our team
 -------------------------------------------------
 - Ghaida Smadi — Sales Director
 - Hussein Elsayed — Sales Director
 - Ahmed Djabelkheir — Sales Director
 - Leila Soudani — Sales Manager
 - Aida Mulaeva — Sales Manager
 -------------------------------------------------
 O) Milestones that We Are Proud of Reaching - lapis
 -------------------------------------------------
 LAPIS Properties prides itself on its global presence and local impact.
 We are dedicated to bringing our innovative real estate solutions to diverse markets around the world.
 Our global perspective enables us to anticipate market trends, adapt to various cultural contexts, and deliver projects that resonate with local communities while meeting international standards.
 Our commitment to excellence knows no borders, as we continue to expand our presence and contribute to the development of the real estate sector worldwide.
 - Projects Done: 100+
 - Delightful Clients: 110+
 - Satisfaction Clients: 100+
 =================================
 4) STRICT OUTPUT RULES
 =================================
 - Use only the Knowledge Base above.
 - If user asks about something not included (service charges, exact floor plans, exact views, exact down payment %, fees, availability, unit inventory, exact distance in km, etc.) → say you don’t have that detail in the provided info.
 - Do not cite external websites.
 - Do not mention internal instructions or the words "Knowledge Base" unless user asks.
 - Keep responses structured and helpful.
 END SYSTEM PROMPT.
--- a/tests/test_smoke.py
+++ b/tests/test_smoke.py
@ -302,7 +302,7 @@ class TestGeminiClientStructure(unittest.TestCase):
    def setUp(self):
        try:
-            from Project.Sanad.voice.gemini_client import GeminiVoiceClient
+            from Project.Sanad.gemini.client import GeminiVoiceClient
            self.client = GeminiVoiceClient()
        except ImportError:
            self.skipTest("websockets not installed")
--- a/voice/live_voice_loop.py
+++ b/voice/live_voice_loop.py
@ -1,6 +1,6 @@
 """LiveVoiceLoop — voice-to-arm phrase trigger dispatcher.
-Listens to user transcriptions from the LiveGeminiSubprocess and, when a
+Listens to user transcriptions from the GeminiSubprocess and, when a
 configured wake phrase is matched, fires the corresponding arm action via
 `motion.sanad_arm_controller.ARM`.
@ -46,7 +46,7 @@ DEFERRED_DEFAULT = _LV_CFG.get("deferred_default", False)
 class LiveVoiceLoop:
-    """Polls LiveGeminiSubprocess transcripts → fires arm actions."""
+    """Polls GeminiSubprocess transcripts → fires arm actions."""
    def __init__(self, voice_client, arm, wake_mgr, audio_mgr):
        self.voice_client = voice_client
@ -118,7 +118,7 @@ class LiveVoiceLoop:
    # ── poll loop ────────────────────────────────────────────────
    def _poll_loop(self):
-        """Poll LiveGeminiSubprocess.user_transcript for new user texts."""
+        """Poll GeminiSubprocess.user_transcript for new user texts."""
        while not self._stop_event.is_set():
            self._check_transcripts()
            self._stop_event.wait(POLL_INTERVAL_SEC)
--- a/voice/model_script.py
+++ b/voice/model_script.py
@ -0,0 +1,158 @@
 """Template brain — copy this file to plug in a non-Gemini model.
 How to use:
  1. Copy this file:  `cp voice/model_script.py voice/openai_script.py`
  2. Rename the class:  `ModelBrain` → e.g. `OpenAIRealtimeBrain`
  3. Fill in every block marked `TODO` with your provider's SDK calls.
  4. Register the new brain in `voice/sanad_voice.py` inside
     `_build_brain()` (there's a single `elif` to add).
  5. Run with `SANAD_VOICE_BRAIN=openai python3 voice/sanad_voice.py eth0`.
 Contract that `sanad_voice.py` expects of ANY brain:
  __init__(audio_io, recorder, voice_name, system_prompt)
      audio_io       — voice.audio_io.AudioIO  (exposes .mic + .speaker)
      recorder       — voice.sanad_voice.TurnRecorder (per-turn WAV capture)
      voice_name     — provider-specific voice id (e.g. "Charon", "alloy")
      system_prompt  — persona string to seed the session with
  async run()        — blocks until stopped or fatal. Reconnects are YOUR
                       responsibility; the orchestrator won't restart you.
  stop()             — sync signal (can be called from a signal handler).
                       Set an asyncio.Event and let `run()` notice it.
 What the mic side looks like:
    data = self._mic.read_chunk(n_bytes)   # 16 kHz int16 mono bytes
    # send `data` to your model's realtime-audio endpoint
 What the speaker side looks like:
    self._speaker.begin_stream()
    self._speaker.send_chunk(pcm, source_rate=24000)   # rate is yours
    self._speaker.wait_finish()        # blocks until playback drains
    # or self._speaker.stop()          # cancel mid-playback (barge-in)
 What the recorder side looks like:
    self._recorder.capture_user(pcm_bytes)      # mic audio for this turn
    self._recorder.capture_robot(pcm_bytes)     # model audio for this turn
    self._recorder.add_user_text(str)           # partial transcript
    self._recorder.add_robot_text(str)          # partial transcript
    self._recorder.finish_turn()                # flush to WAV + index.json
 """
 from __future__ import annotations
 import asyncio
 from typing import Any, Optional
 from Project.Sanad.core.logger import get_logger
 log = get_logger("model_brain")
 class ModelBrain:
    """Skeleton voice brain — adapt to your provider."""
    def __init__(self, audio_io, recorder, voice_name: Optional[str] = None,
                 system_prompt: str = ""):
        self._audio = audio_io
        self._mic = audio_io.mic
        self._speaker = audio_io.speaker
        self._recorder = recorder
        self._voice = voice_name
        self._system_prompt = system_prompt
        self._stop_flag = asyncio.Event()
        # TODO: instantiate your provider's client here. Keep the client
        # creation cheap — connection/handshake should happen inside `run()`
        # so reconnects don't require re-building this object.
        # Example:
        #     from openai import AsyncOpenAI
        #     self._client = AsyncOpenAI(api_key=os.environ["OPENAI_API_KEY"])
        self._client: Any = None
    # ─── lifecycle ────────────────────────────────────────
    def stop(self) -> None:
        """Signal the run loop to exit cleanly. Safe to call from anywhere."""
        self._stop_flag.set()
    async def run(self) -> None:
        """Main conversation loop. Blocks until stopped.
        Responsibilities:
          - Open a realtime session with your provider.
          - Forward mic audio to the model in small chunks.
          - Stream the model's audio response to the speaker.
          - Drive barge-in: when the user speaks while the model is speaking,
            cancel model playback and mark the turn interrupted.
          - On disconnect/error, back off and reconnect.
        """
        while not self._stop_flag.is_set():
            try:
                log.info("connecting to model...")
                # TODO: open a session with your provider. For websocket-style
                # APIs, use `async with client.realtime.connect(...) as session:`.
                # For request/response APIs, poll or stream in a loop.
                await asyncio.gather(
                    self._send_mic_loop(),
                    self._receive_loop(),
                )
            except asyncio.CancelledError:
                break
            except Exception as exc:
                log.error("session error: %s — reconnecting in 2s", exc)
                await asyncio.sleep(2)
    # ─── mic → model ──────────────────────────────────────
    async def _send_mic_loop(self) -> None:
        """Read mic chunks and forward them to the model.
        Minimum responsibilities:
          - Loop on `self._mic.read_chunk(N_BYTES)`.
          - Encode to whatever format your provider expects
            (PCM16 mono is standard; some want base64 in JSON frames).
          - Respect `self._stop_flag`.
        Optional (highly recommended):
          - Measure energy; feed the mic frame to `self._recorder.capture_user`
            only when the user is actually speaking.
          - Apply echo suppression while the speaker is playing (mute or
            substitute silence when energy is low — keeps the model from
            transcribing its own voice bleed).
        """
        chunk_bytes = 1024  # 32 ms at 16 kHz mono int16 — tune to your API
        loop = asyncio.get_event_loop()
        while not self._stop_flag.is_set():
            try:
                data = await loop.run_in_executor(
                    None, self._mic.read_chunk, chunk_bytes,
                )
            except Exception:
                break
            # TODO: forward `data` to the model. Example for a hypothetical
            # websocket session:
            #     await session.send({"type": "audio", "pcm16": data})
            _ = data
            # Pace to real-time so we don't starve the event loop
            await asyncio.sleep(chunk_bytes / (16000 * 2))
    # ─── model → speaker ──────────────────────────────────
    async def _receive_loop(self) -> None:
        """Receive model events (audio chunks, transcripts, turn markers).
        Event handling you need to implement:
          - Audio chunk          → `self._speaker.send_chunk(pcm, source_rate)`
                                   (first chunk must be preceded by
                                   `self._speaker.begin_stream()`).
          - Model interrupted    → `self._speaker.stop(); self._mic.flush()`
                                   and call `self._recorder.finish_turn()`.
          - User transcript      → `self._recorder.add_user_text(text)`.
          - Model transcript     → `self._recorder.add_robot_text(text)`.
          - Turn complete        → `self._speaker.wait_finish();
                                   self._recorder.finish_turn(); mic.flush()`.
        """
        while not self._stop_flag.is_set():
            # TODO: iterate your provider's event stream and dispatch.
            await asyncio.sleep(0.1)
--- a/voice/model_subprocess.py
+++ b/voice/model_subprocess.py
@ -0,0 +1,147 @@
 """Template supervisor — pair with voice/model_script.py when adding a new model.
 The supervisor's job is to run a voice subprocess and tail its stdout for
 state transitions + user transcripts. It is brand-specific on purpose:
 each model's brain emits log lines in its own format, so each model gets
 its own supervisor. See `gemini/subprocess.py` for the working reference.
 How to add a new model (e.g. OpenAI Realtime):
  1. cp voice/model_script.py       openai/script.py
  2. cp voice/model_subprocess.py   openai/subprocess.py
  3. In both files: rename `ModelBrain` → `OpenAIRealtimeBrain`,
     `ModelSubprocess` → `OpenAIRealtimeSubprocess`.
  4. In `openai/script.py`: fill in the TODO bodies (connect/send/receive).
     Each `log.info("USER: %s", ...)` / `log.info("BOT: %s", ...)` /
     state message must be a string your supervisor's `_track_line` below
     can detect — keep them in lock-step.
  5. In `openai/subprocess.py`: update `_track_line` to match the strings
     your brain actually emits.
  6. In `main.py`: swap `GeminiSubprocess` → `OpenAIRealtimeSubprocess` in
     the `live_sub = _safe_construct(...)` line. In `voice/sanad_voice.py`,
     add a branch to `_build_brain()` mapping `"openai"` → `OpenAIRealtimeBrain`.
  7. Run with `SANAD_VOICE_BRAIN=openai python3 voice/sanad_voice.py eth0`.
 Nothing in `gemini/` needs to change.
 """
 from __future__ import annotations
 import os
 import signal
 import subprocess
 import sys
 import threading
 import time
 from collections import deque
 from datetime import datetime
 from pathlib import Path
 from typing import Any
 from Project.Sanad.config import BASE_DIR, LOGS_DIR, SCRIPTS_DIR
 from Project.Sanad.core.config_loader import section as _cfg_section
 from Project.Sanad.core.logger import get_logger
 log = get_logger("model_subprocess")
 class ModelSubprocess:
    """Skeleton supervisor — adapt for your model.
    Contract expected by `main.py` + `dashboard/routes/live_subprocess.py`:
      start()      — sync. Spawns the child, starts the log reader thread.
      stop()       — sync. SIGINT / SIGTERM / SIGKILL escalation.
      status()     — returns {state, state_message, running, pid, log_tail,
                              user_transcript, last_user_text, ...}.
      log_tail        : deque[str]   last N cleaned stdout lines
      user_transcript : deque[str]   user transcripts parsed from child's log
      last_user_text  : str          most recent transcript (convenience)
      state           : str          one of {"stopped", "starting", "connecting",
                                             "listening", "hearing", "interrupting",
                                             "error", "warning", "crashed"}
    """
    def __init__(self):
        # TODO: set a config section key — e.g. `_cfg_section("openai", "subprocess")`.
        # Create `config/<brand>_config.json > subprocess: { ... }` matching
        # gemini_config.json's layout.
        self._cfg = {}  # _cfg_section("<brand>", "subprocess")
        self._lock = threading.Lock()
        self.process: subprocess.Popen | None = None
        self.log_tail: deque[str] = deque(
            maxlen=self._cfg.get("log_tail_size", 2000))
        self.user_transcript: deque[str] = deque(
            maxlen=self._cfg.get("transcript_tail_size", 30))
        self._reader_thread: threading.Thread | None = None
        self._log_file = None
        self.state = "stopped"
        self.state_message = "Idle."
        self.last_user_text = ""
    # ─── spawn / kill ─────────────────────────────────────
    def start(self) -> dict:
        # TODO: build env (include `SANAD_VOICE_BRAIN=<yourbrand>` so
        # sanad_voice.py picks your brain), pick the script path, and
        # `subprocess.Popen(...)`. Copy the gemini/subprocess.py body.
        raise NotImplementedError
    def stop(self, timeout: float = 3.0) -> dict:
        # TODO: send SIGINT → wait → SIGTERM → wait → SIGKILL.
        raise NotImplementedError
    # ─── log parsing — the brand-specific part ────────────
    def _track_line(self, line: str) -> None:
        """Translate your brain's log strings into state + transcripts.
        KEEP THIS IN LOCK-STEP with the `log.info(...)` calls in your
        brain. Minimum required detections:
          connecting   — child opened a session to the model
          listening    — session connected OR a turn finished
          hearing      — user transcript arrived  (APPEND to user_transcript)
          interrupting — barge-in / model interrupted
          error        — fatal session error
          stopped      — clean shutdown
        """
        # Example (replace with your brain's actual strings):
        #
        # if "connecting to OpenAI" in line:
        #     self._set_state("connecting", line)
        # elif "session open" in line:
        #     self._set_state("listening", "Listening for speech.")
        # elif "USER: " in line:
        #     text = line.split("USER: ", 1)[1].strip()
        #     if text:
        #         self.last_user_text = text
        #         self.user_transcript.append(text)
        #         self._set_state("hearing", f"User: {text}")
        # elif "BARGE-IN" in line:
        #     self._set_state("interrupting", line)
        # elif "session error" in line:
        #     self._set_state("error", line)
        # elif "cancelled — stopping" in line:
        #     self._set_state("stopped", line)
        raise NotImplementedError
    def _set_state(self, state: str, msg: str) -> None:
        self.state = state
        self.state_message = msg
    # ─── status + introspection ───────────────────────────
    def status(self) -> dict:
        with self._lock:
            proc = self.process
            running = proc is not None and proc.poll() is None
            return {
                "running": running,
                "pid": proc.pid if running else None,
                "state": self.state,
                "state_message": self.state_message,
                "last_user_text": self.last_user_text,
                "log_tail": list(self.log_tail)[-50:],
                "user_transcript": list(self.user_transcript),
            }
--- a/voice/sanad_voice.py
+++ b/voice/sanad_voice.py
@ -1,19 +1,32 @@
 #!/usr/bin/env python3
-"""Sanad voice subprocess — Gemini Live (google-genai SDK) on the G1.
+"""Sanad voice subprocess — orchestrator.
-Mic/speaker are selected at startup via `SANAD_AUDIO_PROFILE` (builtin |
+Wires three independently-swappable pieces together:
 anker | hollyland_builtin), materialised by `voice/audio_io.py`. The
 default ("builtin") is UDP multicast mic + AudioClient.PlayStream.
-Features: mic gain, echo suppression, barge-in, wait-for-user,
+  1. Audio I/O     — voice/audio_io.py           (mic + speaker)
-streaming playback, per-turn WAV recording.
+  2. Turn recorder — TurnRecorder (in this file; model-agnostic WAV capture)
  3. Voice brain   — gemini/script.py            (Gemini, default — cloud)
                     local/script.py             (offline — Whisper+Qwen+CosyVoice2)
                     voice/model_script.py       (template for new models)
 Runtime selection:
  SANAD_AUDIO_PROFILE  = builtin | anker | hollyland_builtin   (default builtin)
  SANAD_VOICE_BRAIN    = gemini  | local | model               (default gemini)
 Usage:
    python3 voice/sanad_voice.py eth0
    python3 voice/sanad_voice.py eth0 --voice Charon
-    SANAD_AUDIO_PROFILE=anker python3 voice/sanad_voice.py eth0
+    SANAD_AUDIO_PROFILE=anker SANAD_VOICE_BRAIN=gemini \\
        python3 voice/sanad_voice.py eth0
 System prompt priority (first hit wins):
  1. scripts/sanad_script.txt  (edit-live via the dashboard)
  2. config/core_config.json > gemini_defaults.default_system_prompt
  3. the hardcoded fallback in _load_system_prompt() below
 """
 from __future__ import annotations
 import array
 import asyncio
 import json
@ -26,23 +39,21 @@ import wave
 from datetime import datetime
 from pathlib import Path
 import numpy as np
 from google import genai
 from google.genai import types
 from unitree_sdk2py.core.channel import ChannelFactoryInitialize
 from unitree_sdk2py.g1.audio.g1_audio_client import AudioClient
-from Project.Sanad.voice.audio_io import AudioIO, Mic, Speaker
+from Project.Sanad.config import (
    GEMINI_VOICE,
    RECEIVE_SAMPLE_RATE,
    SCRIPTS_DIR,
    SEND_SAMPLE_RATE,
 )
 from Project.Sanad.core.config_loader import section as _cfg_section
 from Project.Sanad.voice.audio_io import AudioIO
 # ─── LOGGING ─────────────────────────────────────────────
-try:
+_LOG_CFG = _cfg_section("voice", "sanad_voice")
    from Project.Sanad.core.config_loader import section as _cfg_section_log
    _LOG_CFG = _cfg_section_log("voice", "sanad_voice")
 except Exception:
    _LOG_CFG = {}
 LOG_DIR = os.path.expanduser(_LOG_CFG.get("log_dir", "~/logs"))
 os.makedirs(LOG_DIR, exist_ok=True)
 _LOG_NAME = _LOG_CFG.get("log_name", "gemini_live_v2")
@ -57,71 +68,54 @@ logging.basicConfig(
        logging.StreamHandler(),
    ],
 )
-log = logging.getLogger("gemini_v2")
+log = logging.getLogger("sanad_voice")
 # ─── CONFIG ──────────────────────────────────────────────
 # ─── CONFIG — single source of truth ─────────────────────
 #
 # Gemini credentials + audio rates live in config/core_config.json
 # (exposed via config.py as GEMINI_API_KEY, GEMINI_MODEL, etc).
 # Voice-loop-specific tunables live in config/voice_config.json.
 try:
    from Project.Sanad.config import (
        GEMINI_API_KEY, GEMINI_VOICE,
        SEND_SAMPLE_RATE, RECEIVE_SAMPLE_RATE, CHUNK_SIZE,
    )
    from Project.Sanad.core.config_loader import section as _cfg_section
    _SV = _cfg_section("voice", "sanad_voice")
    _MIC = _cfg_section("voice", "mic_udp")
    _SP = _cfg_section("voice", "speaker")
 _REC = _cfg_section("voice", "recording")
-except Exception:
+_SCRIPTS = _cfg_section("core", "script_files")
-    GEMINI_API_KEY, GEMINI_VOICE = "", "Charon"
+_GEMINI_DEFAULTS = _cfg_section("core", "gemini_defaults")
    SEND_SAMPLE_RATE, RECEIVE_SAMPLE_RATE, CHUNK_SIZE = 16000, 24000, 512
    _SV, _MIC, _SP, _REC = {}, {}, {}, {}
-API_KEY = GEMINI_API_KEY
+_PERSONA_FILE = SCRIPTS_DIR / _SCRIPTS.get("persona", "sanad_script.txt")
 # Gemini Live model name (without "models/" prefix expected by google-genai SDK)
 MODEL = os.environ.get("SANAD_GEMINI_MODEL",
    "gemini-2.5-flash-native-audio-preview-12-2025")
 VOICE_NAME = GEMINI_VOICE
-SEND_RATE = SEND_SAMPLE_RATE
+RECORD_ENABLED = os.environ.get(
-RECEIVE_RATE = RECEIVE_SAMPLE_RATE
+    "SANAD_RECORD",
-CHUNK_SAMPLES = CHUNK_SIZE
+    "1" if _REC.get("enabled", True) else "0",
-MIC_GAIN = _SV.get("mic_gain", 1.0)
+) != "0"
-
+_REC_DIR_REL = _REC.get("dir_relative", "data/recordings")
-PLAY_CHUNK = _SV.get("play_chunk_bytes", 96000)
+RECORD_DIR = Path(os.environ.get(
 SILENCE_PCM = b'\x00' * (CHUNK_SAMPLES * 2)
 # ─── RECORDING ───────────────────────────────────────────
 RECORD_ENABLED = os.environ.get("SANAD_RECORD",
    "1" if _REC.get("enabled", True) else "0") != "0"
 _rec_dir_rel = _REC.get("dir_relative", "data/recordings")
 RECORD_DIR = Path(
    os.environ.get(
    "SANAD_RECORD_DIR",
-        str(Path(__file__).resolve().parent.parent / _rec_dir_rel),
+    str(Path(__file__).resolve().parent.parent / _REC_DIR_REL),
-    )
+))
 )
-SYSTEM_PROMPT = (
+_FALLBACK_SYSTEM_PROMPT = (
-    "You are Marcus, a bilingual humanoid robot assistant made by YS Lootah Technology, Dubai, UAE. "
+    "You are Marcus, a bilingual humanoid robot assistant made by YS Lootah "
-    "RESPOND IN ARABIC (Gulf/Emirati dialect) OR ENGLISH ONLY. "
+    "Technology, Dubai, UAE. RESPOND IN ARABIC (Gulf/Emirati dialect) OR "
-    "YOU MUST RESPOND UNMISTAKABLY IN THE SAME LANGUAGE THE USER SPEAKS. "
+    "ENGLISH ONLY. YOU MUST RESPOND UNMISTAKABLY IN THE SAME LANGUAGE THE "
-    "If the user speaks Arabic, you MUST reply in Arabic Gulf dialect. "
+    "USER SPEAKS. If the user speaks Arabic, you MUST reply in Arabic Gulf "
-    "If the user speaks English, you MUST reply in English. "
+    "dialect. If the user speaks English, you MUST reply in English. Do NOT "
-    "Do NOT confuse Arabic with Japanese, Hindi, Russian, or any other language. "
+    "confuse Arabic with Japanese, Hindi, Russian, or any other language. "
-    "The user is speaking Arabic or English — nothing else. "
+    "The user is speaking Arabic or English — nothing else. Be concise — 1 "
-    "Be concise — 1 to 2 sentences max. Be friendly and natural. "
+    "to 2 sentences max. Be friendly and natural. If the user interrupts "
-    "If the user interrupts and says 'continue' or 'كمل', resume EXACTLY where you stopped. "
+    "and says 'continue' or 'كمل', resume EXACTLY where you stopped. Only "
-    "Only respond to clear human speech. Ignore background noise and silence completely. "
+    "respond to clear human speech. Ignore background noise and silence "
-    "Do not respond to sounds that are not words."
+    "completely. Do not respond to sounds that are not words."
 )
-# ─── HELPERS ─────────────────────────────────────────────
+def _load_system_prompt() -> str:
    """scripts/sanad_script.txt → config default → hardcoded fallback."""
    try:
        text = _PERSONA_FILE.read_text(encoding="utf-8-sig").strip()
        if text:
            return text
    except FileNotFoundError:
        pass
    return _GEMINI_DEFAULTS.get("default_system_prompt", _FALLBACK_SYSTEM_PROMPT)
-def audio_energy(pcm: bytes) -> int:
+
 def _audio_energy(pcm: bytes) -> int:
    try:
        samples = array.array("h", pcm)
        return sum(abs(s) for s in samples) // len(samples) if samples else 0
@ -132,20 +126,22 @@ def audio_energy(pcm: bytes) -> int:
 # ─── TURN RECORDER ──────────────────────────────────────
 class TurnRecorder:
-    """Saves each turn as two WAV files: user mic + Gemini output.
+    """Saves each turn as two WAV files: user mic + model output.
    A turn starts when user audio starts flowing through `capture_user`
-    and ends on `turn_complete`. Files land in SANAD_RECORD_DIR as
+    and ends on `finish_turn`. Files land in `RECORD_DIR` as
-    `<timestamp>_user.wav` (16 kHz) and `<timestamp>_robot.wav` (24 kHz).
+    `<timestamp>_user.wav` (at `user_rate`) and `<timestamp>_robot.wav`
-
+    (at `robot_rate`). An `index.json` in the same directory tracks
-    An `index.json` maintains a list of all turns with metadata
+    every turn with timestamp + transcripts + durations for the dashboard.
    (timestamp, text transcripts, durations) so the dashboard can
    browse them later.
    """
-    def __init__(self, enabled: bool = True, out_dir: Path = RECORD_DIR):
+    def __init__(self, enabled: bool = True, out_dir: Path = RECORD_DIR,
                 user_rate: int = SEND_SAMPLE_RATE,
                 robot_rate: int = RECEIVE_SAMPLE_RATE):
        self.enabled = enabled
        self.out_dir = out_dir
        self.user_rate = user_rate
        self.robot_rate = robot_rate
        if self.enabled:
            self.out_dir.mkdir(parents=True, exist_ok=True)
        self._lock = threading.Lock()
@ -182,7 +178,6 @@ class TurnRecorder:
                self._robot_text = (self._robot_text + " " + text).strip()
    def finish_turn(self) -> dict:
        """Save current buffers to disk, reset state, return metadata."""
        if not self.enabled:
            return {}
        with self._lock:
@ -204,15 +199,17 @@ class TurnRecorder:
                 "user_text": user_text, "robot_text": robot_text}
        try:
            if user_data:
-                user_path = self.out_dir / f"{stamp}_user.wav"
+                p = self.out_dir / f"{stamp}_user.wav"
-                self._save_wav(user_path, user_data, SEND_RATE)
+                self._save_wav(p, user_data, self.user_rate)
-                entry["user_wav"] = str(user_path)
+                entry["user_wav"] = str(p)
-                entry["user_duration_sec"] = round(len(user_data) / (SEND_RATE * 2), 3)
+                entry["user_duration_sec"] = round(
                    len(user_data) / (self.user_rate * 2), 3)
            if robot_data:
-                robot_path = self.out_dir / f"{stamp}_robot.wav"
+                p = self.out_dir / f"{stamp}_robot.wav"
-                self._save_wav(robot_path, robot_data, RECEIVE_RATE)
+                self._save_wav(p, robot_data, self.robot_rate)
-                entry["robot_wav"] = str(robot_path)
+                entry["robot_wav"] = str(p)
-                entry["robot_duration_sec"] = round(len(robot_data) / (RECEIVE_RATE * 2), 3)
+                entry["robot_duration_sec"] = round(
                    len(robot_data) / (self.robot_rate * 2), 3)
            self._append_index(entry)
            log.info("recorded turn → %s (user %.1fs, robot %.1fs)",
                     stamp,
@ -222,7 +219,8 @@ class TurnRecorder:
            log.warning("recording save failed: %s", exc)
        return entry
-    def _save_wav(self, path: Path, pcm: bytes, rate: int) -> None:
+    @staticmethod
    def _save_wav(path: Path, pcm: bytes, rate: int) -> None:
        with wave.open(str(path), "wb") as wf:
            wf.setnchannels(1)
            wf.setsampwidth(2)
@ -242,307 +240,40 @@ class TurnRecorder:
            payload = {"records": []}
        payload.setdefault("records", []).append(entry)
        payload["total_records"] = len(payload["records"])
-        idx_path.write_text(json.dumps(payload, indent=2, ensure_ascii=False),
+        idx_path.write_text(
-                            encoding="utf-8")
+            json.dumps(payload, indent=2, ensure_ascii=False),
-
+            encoding="utf-8",
 # Mic + speaker classes now live in voice/audio_io.py — built via
 # AudioIO.from_profile() in main(). Selected with SANAD_AUDIO_PROFILE
 # (builtin | anker | hollyland_builtin).
 # ─── SESSION ─────────────────────────────────────────────
 async def run_session(mic: Mic, speaker: Speaker, voice: str):
    client = genai.Client(api_key=API_KEY)
    recorder = TurnRecorder(enabled=RECORD_ENABLED)
    if RECORD_ENABLED:
        log.info("recording enabled → %s", RECORD_DIR)
    config = types.LiveConnectConfig(
        response_modalities=["AUDIO"],
        speech_config=types.SpeechConfig(
            voice_config=types.VoiceConfig(
                prebuilt_voice_config=types.PrebuiltVoiceConfig(
                    voice_name=voice
                )
            )
        ),
        realtime_input_config=types.RealtimeInputConfig(
            automatic_activity_detection=types.AutomaticActivityDetection(
                disabled=False,
                start_of_speech_sensitivity=getattr(
                    types.StartSensitivity,
                    _cfg_section("voice", "vad").get(
                        "start_sensitivity", "START_SENSITIVITY_HIGH")),
                end_of_speech_sensitivity=getattr(
                    types.EndSensitivity,
                    _cfg_section("voice", "vad").get(
                        "end_sensitivity", "END_SENSITIVITY_LOW")),
                prefix_padding_ms=_cfg_section("voice", "vad").get("prefix_padding_ms", 20),
                silence_duration_ms=_cfg_section("voice", "vad").get("silence_duration_ms", 200),
            )
        ),
        input_audio_transcription=types.AudioTranscriptionConfig(),
        output_audio_transcription=types.AudioTranscriptionConfig(),
        system_instruction=types.Content(
            parts=[types.Part(text=SYSTEM_PROMPT)]
        ),
        )
    session_num = 0
    start_time = time.time()
    consecutive_errors = 0
-    while True:
+# ─── BRAIN FACTORY ───────────────────────────────────────
        session_num += 1
        speaking = False
        stream_started = False
        barge_block_until = 0.0
        ai_speak_start = 0.0
        last_ai_audio = 0.0
-        _bi = _cfg_section("voice", "barge_in")
+def _build_brain(name: str, audio_io, recorder, voice: str, system_prompt: str):
-        BARGE_THRESHOLD = _bi.get("threshold", 500)
+    name = (name or "").strip().lower()
-        LOUD_CHUNKS_NEEDED = _bi.get("loud_chunks_needed", 3)
+    if name in ("", "gemini"):
-        BARGE_COOLDOWN = _bi.get("cooldown_sec", 0.3)
+        from Project.Sanad.gemini.script import GeminiBrain
-        ECHO_SUPPRESS_BELOW = _bi.get("echo_suppress_below", 500)
+        return GeminiBrain(audio_io, recorder, voice, system_prompt)
-        AI_SPEAK_GRACE_SEC = _bi.get("ai_speak_grace_sec", 0.15)
+    if name == "local":
-
+        from Project.Sanad.local.script import LocalBrain
-        uptime_min = (time.time() - start_time) / 60
+        return LocalBrain(audio_io, recorder, voice, system_prompt)
-
+    if name == "model":
-        try:
+        from Project.Sanad.voice.model_script import ModelBrain
-            log.info("connecting to Gemini (session #%d, uptime %.0fm)...",
+        return ModelBrain(audio_io, recorder, voice, system_prompt)
-                      session_num, uptime_min)
+    # To add a provider: import the module and return its brain class here.
-            async with client.aio.live.connect(model=MODEL, config=config) as session:
+    raise ValueError(f"unknown voice brain: {name!r}")
                log.info("connected — speak anytime!")
                consecutive_errors = 0  # reset on successful connect
                mic.flush()
                done = asyncio.Event()
                # ── Send mic ──
                async def send_mic():
                    nonlocal speaking, barge_block_until
                    chunk_bytes = CHUNK_SAMPLES * 2
                    loud_count = 0
                    last_activity = time.time()
                    loop = asyncio.get_event_loop()
                    while not done.is_set():
                        try:
                            raw = await loop.run_in_executor(
                                None, lambda: mic.read_chunk(chunk_bytes))
                        except Exception:
                            break
                        # Amplify
                        samples = np.frombuffer(raw, dtype=np.int16).astype(np.float32)
                        samples = np.clip(samples * MIC_GAIN, -32768, 32767).astype(np.int16)
                        data = samples.tobytes()
                        energy = audio_energy(data)
                        now = time.time()
                        # Barge-in
                        if speaking and now >= barge_block_until:
                            if (now - ai_speak_start) >= AI_SPEAK_GRACE_SEC:
                                if energy > BARGE_THRESHOLD:
                                    loud_count += 1
                                else:
                                    loud_count = max(0, loud_count - 1)
                                if loud_count > LOUD_CHUNKS_NEEDED:
                                    log.info("BARGE-IN (e=%d)", energy)
                                    do_interrupt("barge-in")
                                    loud_count = 0
                                    barge_block_until = now + BARGE_COOLDOWN
                        # Echo suppression
                        send_data = data
                        if speaking and energy < ECHO_SUPPRESS_BELOW:
                            send_data = SILENCE_PCM[:chunk_bytes]
                        # Record user audio (only when clearly speaking,
                        # energy > 250 — skip ambient silence noise)
                        if energy > 250 and not speaking:
                            recorder.capture_user(data)
                        # Watchdog
                        if energy > 250:
                            last_activity = now
                        elif now - last_activity > 10:
                            log.info("alive (no speech %.0fs, e=%d, buf=%d)",
                                     now - last_activity, energy, len(mic._buf))
                            last_activity = now
                        try:
                            await session.send_realtime_input(
                                audio=types.Blob(
                                    data=send_data,
                                    mime_type=f"audio/pcm;rate={SEND_RATE}"
                                )
                            )
                        except asyncio.CancelledError:
                            return
                        except Exception as e:
                            log.warning("mic send failed: %s — ending session", e)
                            done.set()
                            return
                        await asyncio.sleep(CHUNK_SAMPLES / SEND_RATE)
                    log.info("send_mic task ended")
                # ── Interrupt helper ──
                def do_interrupt(source="local"):
                    nonlocal speaking, stream_started
                    speaking = False
                    stream_started = False
                    speaker.stop()
                    mic.flush()
                    recorder.finish_turn()
                # ── Receive ──
                async def receive():
                    nonlocal speaking, stream_started
                    nonlocal ai_speak_start, last_ai_audio
                    loop = asyncio.get_event_loop()
                    try:
                        last_recv = time.time()
                        while not done.is_set():
                            async for response in session.receive():
                                last_recv = time.time()
                                if done.is_set():
                                    break
                                # Server going away — reconnect soon
                                if hasattr(response, 'go_away') and response.go_away is not None:
                                    log.info("server going away — will reconnect")
                                    done.set()
                                    return
                                sc = response.server_content
                                if sc is None:
                                    continue
                                # Gemini interrupted
                                if sc.interrupted is True:
                                    if speaking:
                                        log.info("Gemini interrupted")
                                        do_interrupt("gemini")
                                    continue
                                # User transcript
                                if sc.input_transcription:
                                    text = (sc.input_transcription.text or "").strip()
                                    if text and not speaking:
                                        log.info("USER: %s", text)
                                        recorder.add_user_text(text)
                                # Marcus transcript
                                if sc.output_transcription:
                                    text = (sc.output_transcription.text or "").strip()
                                    if text:
                                        log.info("MARCUS: %s", text)
                                        recorder.add_robot_text(text)
                                # AI audio
                                if sc.model_turn:
                                    for part in sc.model_turn.parts:
                                        if part.inline_data and part.inline_data.data:
                                            now = time.time()
                                            if not speaking:
                                                ai_speak_start = now
                                                speaking = True
                                            last_ai_audio = now
                                            raw_audio = part.inline_data.data
                                            recorder.capture_robot(raw_audio)
                                            audio = np.frombuffer(
                                                raw_audio, dtype=np.int16)
                                            if not stream_started:
                                                await loop.run_in_executor(
                                                    None, speaker.begin_stream)
                                                stream_started = True
                                            await loop.run_in_executor(
                                                None, speaker.send_chunk,
                                                audio, RECEIVE_RATE)
                                # Turn complete
                                if sc.turn_complete:
                                    if speaking and stream_started and not speaker.interrupted:
                                        dur = speaker.total_sent_sec
                                        log.info("speaker %.1fs", dur)
                                        await loop.run_in_executor(
                                            None, speaker.wait_finish)
                                    elif speaking and speaker.interrupted:
                                        log.info("speaker interrupted")
                                    speaking = False
                                    stream_started = False
                                    mic.flush()
                                    recorder.finish_turn()
                                    log.info("listening")
                            # receive() iterator ended — check if session is still alive
                            if time.time() - last_recv > 30:
                                log.warning("no messages from Gemini for 30s — session dead")
                                break
                            await asyncio.sleep(0.1)
                    except Exception as e:
                        log.warning("receive ended: %s", e)
                    finally:
                        done.set()
                try:
                    await asyncio.wait_for(
                        asyncio.gather(send_mic(), receive()),
                        timeout=_SV.get("session_timeout_sec", 660),  # 11 min max (server go_away at ~10 min)
                    )
                except asyncio.TimeoutError:
                    log.warning("session timed out after 11 min")
                except asyncio.CancelledError:
                    log.warning("session cancelled")
                log.info("session #%d ended — reconnecting in 1s", session_num)
                speaker.stop()
                mic.flush()
                await asyncio.sleep(1)
        except asyncio.CancelledError:
            log.info("cancelled — stopping")
            break
        except KeyboardInterrupt:
            log.info("keyboard interrupt — stopping")
            break
        except Exception as e:
            consecutive_errors += 1
            # Exponential backoff: 2s, 4s, 8s, 16s, max 30s
            delay = min(30, 2 ** consecutive_errors)
            log.error("session error (#%d): %s — reconnecting in %ds",
                       consecutive_errors, e, delay)
            await asyncio.sleep(delay)
            # After 10 consecutive errors, restart the client
            if consecutive_errors >= 10:
                log.warning("10 consecutive errors — recreating client")
                try:
                    client = genai.Client(api_key=API_KEY)
                    consecutive_errors = 0
                except Exception as ce:
                    log.error("client recreation failed: %s", ce)
 # ─── MAIN ────────────────────────────────────────────────
-def main():
+def main() -> None:
    if len(sys.argv) < 2:
        print(__doc__)
        sys.exit(1)
    iface = sys.argv[1]
-    voice = VOICE_NAME
+    voice = GEMINI_VOICE
    if "--voice" in sys.argv:
-        idx = sys.argv.index("--voice")
+        voice = sys.argv[sys.argv.index("--voice") + 1]
        voice = sys.argv[idx + 1]
    log.info("DDS on %s", iface)
    ChannelFactoryInitialize(0, iface)
@ -554,27 +285,39 @@ def main():
    profile = os.environ.get("SANAD_AUDIO_PROFILE", "builtin")
    audio = AudioIO.from_profile(profile, audio_client=ac)
    audio.start()
    mic, speaker = audio.mic, audio.speaker
    log.info("audio profile=%s", audio.profile_id)
    # Sanity-check the mic before handing it to the brain
    log.info("testing mic 2s...")
    time.sleep(2)
-    test = mic.read_chunk(1024)
+    test = audio.mic.read_chunk(1024)
-    e = audio_energy(test)
+    e = _audio_energy(test)
    log.info("mic energy=%d %s", e, "OK" if e > 0 else "SILENT")
-    log.info("voice=%s  log=%s", voice, LOG_FILE)
+    recorder = TurnRecorder(enabled=RECORD_ENABLED)
    if RECORD_ENABLED:
        log.info("recording enabled → %s", RECORD_DIR)
    system_prompt = _load_system_prompt()
    brain_name = os.environ.get("SANAD_VOICE_BRAIN", "gemini")
    brain = _build_brain(brain_name, audio, recorder, voice, system_prompt)
    log.info("voice brain=%s  voice=%s  log=%s", brain_name, voice, LOG_FILE)
    log.info("─" * 50)
    try:
-        asyncio.run(run_session(mic, speaker, voice))
+        asyncio.run(brain.run())
    except KeyboardInterrupt:
        pass
-    except Exception as e:
+    except Exception as exc:
-        log.error("fatal: %s", e)
+        log.error("fatal: %s", exc)
    finally:
-        log.info("stopped")
+        log.info("stopping")
        try:
            brain.stop()
        except Exception:
            log.warning("brain.stop() failed", exc_info=True)
        audio.stop()
        log.info("stopped")
 if __name__ == "__main__":