#!/usr/bin/env python3 """ Voice/marcus_voice.py — Gemini Live voice orchestrator for Marcus. Pipeline: G1 mic UDP ──► BuiltinMic (Voice/audio_io.py) │ ▼ GeminiBrain (Voice/gemini_script.py — subprocess in gemini_sdk env) │ audio out (24 kHz, Gemini's voice through G1 speaker) │ output_transcription (chunks of Gemini's reply text) ▼ _dispatch_gemini_bot (Marcus side-channel) - match Gemini's spoken phrase against Config/instruction.json::actions[*].bot_phrases - dedup canonical command within command_cooldown_sec │ ▼ on_command(canonical, "en") ──► Marcus brain → motion GEMINI IS THE WAKE-WORD GATEKEEPER. The persona prompt in Config/config_Voice.json::gemini_system_prompt instructs Gemini to ONLY speak motion-confirmation phrases ('Turning right', 'أستدير يميناً') when the user said the wake word ('Sanad' or 'سند') AND requested an action. If Gemini says a motion phrase, Marcus trusts that decision and fires the matching robot motion. Marcus does NOT inspect user transcripts for wake words anymore — single gatekeeper, no double-checking. """ from __future__ import annotations import logging import os import re import sys import threading import time from difflib import SequenceMatcher from logging.handlers import RotatingFileHandler from typing import Callable, Optional import numpy as np _PROJECT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) if _PROJECT_DIR not in sys.path: sys.path.insert(0, _PROJECT_DIR) from Core.env_loader import PROJECT_ROOT from Core.config_loader import load_config LOG_DIR = os.path.join(PROJECT_ROOT, "logs") os.makedirs(LOG_DIR, exist_ok=True) logging.basicConfig( level=logging.INFO, format="%(asctime)s [%(name)s] %(levelname)s: %(message)s", handlers=[ RotatingFileHandler( os.path.join(LOG_DIR, "voice.log"), maxBytes=5_000_000, backupCount=3, encoding="utf-8", ), ], ) log = logging.getLogger("marcus_voice") # ── Transcript log ───────────────────────────────────────────── # Every user transcript Gemini emits is written here in a simple # one-line-per-entry format. Rotates every 5 MB × 3 backups. _TRANSCRIPT_PATH = os.path.join(LOG_DIR, "transcript.log") _transcript_log = logging.getLogger("transcript") _transcript_log.setLevel(logging.INFO) _transcript_log.propagate = False if not _transcript_log.handlers: _th = RotatingFileHandler( _TRANSCRIPT_PATH, maxBytes=5_000_000, backupCount=3, encoding="utf-8", ) _th.setFormatter(logging.Formatter("%(asctime)s %(message)s")) _transcript_log.addHandler(_th) def _log_transcript(action: str, text: str) -> None: _transcript_log.info("%-5s %s", action, (text or "").strip()) # ─── instruction.json — bilingual phrase tables ────────────────── # # Single source of truth for every voice phrase the dispatch layer cares # about: wake-word variants (EN + AR), per-action user_phrases (what the # user might say), per-action bot_phrases (what Gemini might say back). # Loaded ONCE at module import; rebuilds the runtime tables below. # Adding a new accent / variant / action is a JSON-only edit — no Python # change required. import json as _json def _load_instructions() -> dict: path = os.path.join(PROJECT_ROOT, "Config", "instruction.json") try: with open(path, "r", encoding="utf-8") as f: return _json.load(f) or {} except Exception as e: # Fail soft — empty tables mean the dispatch gate just rejects # everything and is_running stays True. Better than a crash. try: log.error("instruction.json not loaded: %s", e) except Exception: pass return {} _INSTRUCTIONS = _load_instructions() def _build_wake_words(data: dict) -> set: out = set() wake = data.get("wake_words", {}) or {} for lang in ("english", "arabic"): for w in wake.get(lang, []) or []: if isinstance(w, str) and w.strip(): out.add(w.strip().lower()) return out def _build_command_vocab(data: dict) -> list: """English-only canonical phrases — used by the difflib fuzzy matcher. Includes every action's `canonical` plus all entries from `user_phrases.english` (deduped, original order preserved per action).""" seen = set() out = [] for action in (data.get("actions", {}) or {}).values(): canon = (action.get("canonical") or "").strip() if canon and canon not in seen: seen.add(canon) out.append(canon) for p in (action.get("user_phrases", {}) or {}).get("english", []) or []: p = (p or "").strip() if p and p not in seen: seen.add(p) out.append(p) return out def _build_arabic_motion_map(data: dict) -> dict: """Map Arabic user phrase → English canonical for every action.""" out = {} for action in (data.get("actions", {}) or {}).values(): canon = (action.get("canonical") or "").strip() if not canon: continue for p in (action.get("user_phrases", {}) or {}).get("arabic", []) or []: p = (p or "").strip() if p: out[p] = canon return out def _build_parametric_actions(data: dict) -> list: """Compile parametric_actions from instruction.json into a list of (compiled_regex, command_template) tuples. Every regex carries re.IGNORECASE so 'Turning' / 'turning' / 'TURNING' all match. Arabic is unaffected by IGNORECASE. Bad entries (missing regex/template, regex compile errors) are skipped with a warning — the rest of the table still loads. """ out = [] for entry in (data.get("parametric_actions", []) or []): if not isinstance(entry, dict): continue rx = entry.get("regex") tpl = entry.get("command_template") if not rx or not tpl: continue try: compiled = re.compile(rx, re.IGNORECASE) except re.error as e: try: log.warning("parametric_actions skipped (bad regex %r): %s", rx, e) except Exception: pass continue out.append((compiled, tpl)) return out def _format_param_template(template: str, groups: tuple) -> str: """Substitute $1, $2, … in a parametric command template with the matching regex groups. None groups (optional captures that didn't fire) are treated as empty strings; surrounding whitespace is collapsed so 'turn 90 degrees' becomes 'turn 90 degrees'.""" out = template for i, g in enumerate(groups, 1): token = "$" + str(i) out = out.replace(token, "" if g is None else str(g)) out = re.sub(r"\s+", " ", out).strip() return out def _build_bot_motion_patterns(data: dict) -> list: """List of (needle, canonical) the bot dispatcher matches Gemini's spoken reply against. English needles are lowercased so the dispatcher can use case-insensitive `in` checks; Arabic needles are kept verbatim.""" out = [] for action in (data.get("actions", {}) or {}).values(): canon = (action.get("canonical") or "").strip() if not canon: continue bot = action.get("bot_phrases", {}) or {} for p in bot.get("english", []) or []: p = (p or "").strip() if p: out.append((p.lower(), canon)) for p in bot.get("arabic", []) or []: p = (p or "").strip() if p: out.append((p, canon)) # Sort by needle length descending so multi-word phrases match before # their shorter prefixes (e.g. "moving forward" before "moving"). out.sort(key=lambda x: len(x[0]), reverse=True) return out # Module-level vocabulary tables, all derived from instruction.json. # Mutable (rebuildable) — VoiceModule.__init__ re-reads in case the file # changed since import. WAKE_WORDS: set = _build_wake_words(_INSTRUCTIONS) COMMAND_VOCAB: list = _build_command_vocab(_INSTRUCTIONS) _ARABIC_MOTION_TO_CANONICAL: dict = _build_arabic_motion_map(_INSTRUCTIONS) _BOT_MOTION_PATTERNS: list = _build_bot_motion_patterns(_INSTRUCTIONS) _PARAMETRIC_ACTIONS: list = _build_parametric_actions(_INSTRUCTIONS) # Garbage patterns + min length stay in config_Voice.json (they're # noise filtering, not voice instructions). GARBAGE_PATTERNS: set = set() _MIN_TRANSCRIPTION_LENGTH: int = 3 def _has_wake_word(text: str) -> bool: """True if `text` contains any wake-word variant as a whole word.""" low = text.lower() for w in WAKE_WORDS: if re.search(r'\b' + re.escape(w) + r'\b', low): return True return False def _strip_wake_word_once(text: str) -> str: """Single pass of wake-word stripping. Use via _strip_wake_word().""" stripped = text.strip() for w in WAKE_WORDS: if re.fullmatch(rf'{re.escape(w)}[\s,.!?]*', stripped, re.IGNORECASE): return "" for w in sorted(WAKE_WORDS, key=len, reverse=True): m = re.match( rf'^\s*{re.escape(w)}\s*[,.!?]?\s+(.+)$', text, re.IGNORECASE, ) if m: return m.group(1).strip(' ,.!?') m = re.match( rf'^(.+?)\s+{re.escape(w)}\s*[.!?]*\s*$', text, re.IGNORECASE, ) if m: return m.group(1).strip(' ,.!?') return text def _strip_wake_word(text: str) -> str: """ Remove the wake word from the start or end of text, iteratively, so repeated-wake transcriptions ("Sanad. Sanad.") fully collapse. Capped at 5 passes to prevent pathological inputs from looping. """ for _ in range(5): stripped = _strip_wake_word_once(text) if stripped == text: return text text = stripped return text def _translate_arabic_motion(text: str) -> str: """Translate Arabic motion phrases to English canonical equivalents, using the table built from instruction.json::actions[*].user_phrases.arabic. Substring match; longest-needle wins so multi-word phrases match before their shorter prefixes.""" s = text.strip() if not s or not _ARABIC_MOTION_TO_CANONICAL: return text for ar in sorted(_ARABIC_MOTION_TO_CANONICAL.keys(), key=len, reverse=True): if ar in s: return _ARABIC_MOTION_TO_CANONICAL[ar] return text def _closest_command(text: str, cutoff: float = 0.72) -> str: """ Map a transcription to the closest known command phrase. Returns the canonical command if there's a close-enough match, else returns the original text unchanged. """ low = text.lower().strip().rstrip(".!?,") if not low: return text for cmd in COMMAND_VOCAB: if cmd in low: return cmd best_cmd = None best_ratio = 0.0 for cmd in COMMAND_VOCAB: r = SequenceMatcher(None, low, cmd).ratio() if r > best_ratio: best_ratio = r best_cmd = cmd if best_ratio >= cutoff: return best_cmd return text class VoiceModule: """Thin orchestrator around GeminiBrain + command dispatch.""" def __init__( self, audio_api, on_command: Optional[Callable] = None, on_wake: Optional[Callable] = None, ): self._audio = audio_api self._on_command = on_command self._on_wake = on_wake self._config = load_config("Voice") self._stt = self._config.get("stt", {}) self._messages = self._config.get("messages", {}) # Reload instruction.json so a hot-edit between runs is picked # up without re-importing the module. All four phrase tables # (wake_words, command_vocab, Arabic→canonical map, bot motion # patterns) are rebuilt from instruction.json — single source of # truth. Garbage_patterns + min_transcription_length stay in # config_Voice.json (those are noise filtering, not voice # instruction tables). global WAKE_WORDS, COMMAND_VOCAB, GARBAGE_PATTERNS, \ _MIN_TRANSCRIPTION_LENGTH, _ARABIC_MOTION_TO_CANONICAL, \ _BOT_MOTION_PATTERNS, _PARAMETRIC_ACTIONS, _INSTRUCTIONS _INSTRUCTIONS = _load_instructions() WAKE_WORDS = _build_wake_words(_INSTRUCTIONS) COMMAND_VOCAB = _build_command_vocab(_INSTRUCTIONS) _ARABIC_MOTION_TO_CANONICAL = _build_arabic_motion_map(_INSTRUCTIONS) _BOT_MOTION_PATTERNS = _build_bot_motion_patterns(_INSTRUCTIONS) _PARAMETRIC_ACTIONS = _build_parametric_actions(_INSTRUCTIONS) GARBAGE_PATTERNS = {p.lower() for p in self._stt.get("garbage_patterns", [])} _MIN_TRANSCRIPTION_LENGTH = int(self._stt.get("min_transcription_length", 3)) self._vocab_cutoff = float(self._stt.get("command_vocab_cutoff", 0.72)) log.info( "instruction.json loaded: %d wake_words, %d command_vocab, " "%d arabic→canonical, %d bot patterns, %d parametric; " "+ %d garbage_patterns from config_Voice.json", len(WAKE_WORDS), len(COMMAND_VOCAB), len(_ARABIC_MOTION_TO_CANONICAL), len(_BOT_MOTION_PATTERNS), len(_PARAMETRIC_ACTIONS), len(GARBAGE_PATTERNS), ) # Dispatch dedup state. Gemini's output_transcription arrives in # many small chunks per turn — and one chunk may contain MULTIPLE # motion phrases for compound commands ("Turning right, then # moving forward."). Two layers of dedup: # 1. _fired_canons_this_turn — set of canonicals already fired # since the current Gemini turn started; cleared on turn_end. # Same motion never fires twice within one reply. # 2. _last_gemini_canon / _last_gemini_dispatch_at — cross-chunk # cooldown safety net for cases where a turn_end is delayed # and the next turn's phrase arrives before the reset. self._fired_canons_this_turn: set = set() self._last_gemini_canon = "" self._last_gemini_dispatch_at = 0.0 # Wake-word gate is enforced INSIDE Gemini via the persona prompt # (see config_Voice.json::gemini_system_prompt). Marcus does NOT # check for "Sanad"/"سند" in Python anymore — if Gemini speaks # a motion-confirmation phrase, that's its own decision and we # trust it. Cleaner, single gatekeeper, no double-checking. # Gemini brain reference for flush_mic() — populated by # _voice_loop_gemini after spawning the runner subprocess. self._brain = None # Per-turn buffer for Gemini's spoken text. Gemini Live emits the # output transcription in many small chunks ("I", "see", "a", ...); # we accumulate them and print one clean `[Sanad] said: "..."` # line per turn. Flushed on turn_end OR when a chunk ends with # sentence-ending punctuation. self._gemini_say_buf = [] self._gemini_say_lock = threading.Lock() self._gemini_say_last_chunk_at = 0.0 self._running = False self._thread = None log.info("VoiceModule initialized (backend=gemini)") # ─── main loop ──────────────────────────────────────── def _voice_loop(self): """ Spawn the Gemini Live STT subprocess (runs in the gemini_sdk Python 3.10+ env) and forward its transcripts into Marcus's dispatch gate. Marcus's main process never opens the Gemini WebSocket itself — google-genai needs Python ≥3.9 and marcus is pinned to 3.8 by the Jetson torch wheel. """ api_key = ( os.environ.get("MARCUS_GEMINI_API_KEY") or os.environ.get("SANAD_GEMINI_API_KEY") or self._stt.get("gemini_api_key", "") ) if not api_key: log.error( "No Gemini API key found. Set env MARCUS_GEMINI_API_KEY " "or stt.gemini_api_key in Config/config_Voice.json" ) while self._running: time.sleep(0.5) return from Voice.gemini_script import GeminiBrain # Env overrides for model + voice are passed through to the # runner subprocess automatically (it reads the same env vars). model = ( os.environ.get("MARCUS_GEMINI_MODEL") or self._stt.get( "gemini_model", "gemini-2.5-flash-native-audio-preview-12-2025", ) ) voice_name = ( os.environ.get("MARCUS_GEMINI_VOICE") or self._stt.get("gemini_voice_name", "Charon") ) # System prompt: the runner reads the same config & file paths, # but we forward the resolved string in case marcus's config layer # picked a fallback. Forwarded via env in GeminiBrain.start(). system_prompt = self._stt.get( "gemini_system_prompt", "Transcribe what the user says to Sanad. Stay silent.", ) sp_file = self._stt.get("gemini_system_prompt_file", "") if sp_file: sp_path = sp_file if os.path.isabs(sp_file) else os.path.join( PROJECT_ROOT, sp_file, ) try: with open(sp_path, "r", encoding="utf-8") as f: loaded = f.read().strip() if loaded: system_prompt = loaded log.info( "gemini system prompt loaded from %s (%d chars)", sp_path, len(loaded), ) except Exception as e: log.warning( "gemini_system_prompt_file=%r unreadable: %s — " "using inline config", sp_file, e, ) log.info( "Voice loop started — GEMINI S2S subprocess " "(model=%s, voice=%s)", model, voice_name, ) brain = GeminiBrain( None, None, # audio_io, recorder owned by runner voice_name=voice_name, system_prompt=system_prompt, api_key=api_key, on_transcript=self._on_gemini_transcript, # No on_command — Marcus does NOT inspect user transcripts # for wake words anymore. Gemini's persona prompt enforces # the wake-word gate; if Gemini speaks a motion phrase, the # bot-side dispatcher below picks it up and fires motion. on_bot_text=self._on_gemini_say_chunk, on_turn_end=self._on_gemini_turn_end, ) self._brain = brain brain.start() # ── Camera-frame sender ──────────────────────────────────── # Stream JPEG frames to the runner so Gemini Live can SEE what # the robot sees. Without this, "what do you see" / "describe # this exhibit" answers would be hallucinations. The runner # forwards them to Gemini as image/jpeg blobs and de-stales # anything older than gemini_frame_max_age_sec. send_frames = bool(self._stt.get("gemini_send_frames", True)) frame_interval = float(self._stt.get("gemini_frame_interval_sec", 0.5)) frame_thread = None frame_stop = threading.Event() if send_frames: try: from API.camera_api import get_frame as _camera_get_frame except Exception as e: log.warning("camera_api unavailable — frame streaming disabled: %s", e) _camera_get_frame = None if _camera_get_frame is not None: def _frame_sender_loop(): log.info( "frame sender started — %.2fs interval, " "streaming camera frames to Gemini Live", frame_interval, ) while not frame_stop.is_set() and self._running: try: frame_b64 = _camera_get_frame() if frame_b64: # camera_api returns a base64 ASCII string — # GeminiBrain.send_frame accepts that directly. brain.send_frame(frame_b64) except Exception as e: log.debug("frame send failed: %s", e) frame_stop.wait(frame_interval) log.info("frame sender stopped") frame_thread = threading.Thread( target=_frame_sender_loop, daemon=True, name="gemini-frames", ) frame_thread.start() try: while self._running: time.sleep(0.25) finally: frame_stop.set() if frame_thread is not None: frame_thread.join(timeout=2) brain.stop() self._brain = None # ─── dispatch side channel ──────────────────────────── def _on_gemini_transcript(self, text: str) -> None: """Log every user transcript to logs/transcript.log.""" if text: _log_transcript("HEARD", text) def _on_gemini_say_chunk(self, text: str) -> None: """ Receive a Gemini output-transcription chunk. Two side effects: 1. Forward to the bot dispatcher so motion can fire on confirmation phrases (Turning right / Sitting down / etc.). 2. Buffer the chunk for the per-turn `[Sanad] said: ...` line that prints once on turn_end (or sooner if the chunk ends with sentence punctuation). """ # Motion side-channel — chunk-level so dispatch is fast. try: self._dispatch_gemini_bot(text) except Exception: pass with self._gemini_say_lock: self._gemini_say_buf.append(text) self._gemini_say_last_chunk_at = time.time() # Flush early if this chunk closes a sentence — typical for # short acks like "Turning right." that arrive as one chunk. if text.rstrip().endswith((".", "!", "?")): self._flush_gemini_say_locked() def _on_gemini_turn_end(self) -> None: """Flush any pending Gemini output chunks at turn boundary, and reset the per-turn motion dedup set so the next user→Gemini exchange starts fresh.""" with self._gemini_say_lock: self._flush_gemini_say_locked() self._fired_canons_this_turn.clear() def _flush_gemini_say_locked(self) -> None: """Caller MUST hold self._gemini_say_lock. Prints one [Sanad] said: line.""" if not self._gemini_say_buf: return joined = " ".join(t.strip() for t in self._gemini_say_buf if t).strip() while " " in joined: joined = joined.replace(" ", " ") self._gemini_say_buf = [] if joined: _log_transcript("SAID", joined) try: print(f' [Sanad] said: "{joined[:200]}"') print("Command: ", end="", flush=True) except Exception: pass def flush_mic(self) -> None: """ Tell the Gemini runner subprocess to drop its mic buffer. Called before AND after `audio_api.speak()` so the robot's own voice (picked up by the mic during TtsMaker playback) doesn't come back from Gemini as a fake user utterance. No-op if the runner hasn't started yet. """ b = getattr(self, "_brain", None) if b is None: return try: b.flush_mic() except Exception: pass def _normalize_command(self, text: str) -> str: """Fuzzy-match a transcription to the closest canonical phrase.""" canonical = _closest_command(text, cutoff=self._vocab_cutoff) if canonical != text: log.info("fuzzy-match: %r → %r", text, canonical) return canonical # _BOT_MOTION_PATTERNS is built at module load from # Config/instruction.json::actions[*].bot_phrases (both English and # Arabic). The dispatcher reads it via the module-level reference. @property def _BOT_MOTION_PATTERNS(self): return _BOT_MOTION_PATTERNS # _PARAMETRIC_ACTIONS holds the regex/template tuples for motion # confirmations that carry a number ('Turning 360 degrees.', # 'Walking 5 steps.'). Built from instruction.json::parametric_actions. @property def _PARAMETRIC_ACTIONS(self): return _PARAMETRIC_ACTIONS def _dispatch_gemini_bot(self, text: str) -> None: """ Dispatch motion when Gemini's spoken reply contains motion- confirmation patterns (English or Arabic). The wake-word gate lives INSIDE Gemini (see config_Voice.json::gemini_system_prompt) — if Gemini speaks a motion phrase, that is its own decision after evaluating whether the user said "Sanad"/"سند". Marcus trusts it and dispatches the motion(s). COMPOUND-COMMAND HANDLING: a single chunk may contain multiple motion phrases — e.g. "Turning right, then moving forward." We scan for ALL non-overlapping motion patterns in the chunk and fire each unique canonical in spoken order. Three guards: 1. Longest-needle-first sort + span-claiming on the pattern list rejects overlaps so 'moving forward' wins over the shorter prefix 'moving' inside the same span. 2. _fired_canons_this_turn ensures each canonical fires AT MOST ONCE per Gemini turn (compound replies often repeat the same verb across chunks); cleared on turn_end. 3. command_cooldown_sec is a final safety net for runaway same-canon dispatch across a turn_end gap. """ if not text: return # .lower() preserves character-by-character indexing for both # ASCII and Arabic, so positions in `low` correspond to positions # in `text` for ordering. Arabic patterns are stored as-is and # unaffected by lowercasing. low = text.lower() if not low.strip(): return # Walk every pattern, collect all (start_index, command_string, # label) matches that don't overlap an already-claimed span. # Two pattern families: # - PARAMETRIC (regex with capture groups, e.g. 'turning 90 # degrees') — scanned FIRST because they're more specific # than the bare-canonical match 'turn right'. The dispatched # string is the formatted command_template ('turn left 90 # degrees'), which Brain/command_parser.py parses natively. # - FIXED CANONICALS (substring needles from bot_phrases) # — scanned second; pre-sorted longest-first so multi-word # phrases claim spans before shorter prefixes. # Per-turn dedup (_fired_canons_this_turn) is keyed by the # dispatched command string, so the same parametric command # ('turn 90 degrees') only fires once per Gemini reply. matches = [] # type: list[tuple[int, str, str]] claimed_spans = [] # type: list[tuple[int, int]] for compiled, template in self._PARAMETRIC_ACTIONS: for m in compiled.finditer(low): j, end = m.start(), m.end() if any(not (end <= s or j >= e) for (s, e) in claimed_spans): continue cmd_text = _format_param_template(template, m.groups()) if not cmd_text: continue matches.append((j, cmd_text, "param")) claimed_spans.append((j, end)) for needle, cmd in self._BOT_MOTION_PATTERNS: if not needle: continue i = 0 while True: j = low.find(needle, i) if j == -1: break end = j + len(needle) if any(not (end <= s or j >= e) for (s, e) in claimed_spans): i = j + 1 continue matches.append((j, cmd, "canon")) claimed_spans.append((j, end)) i = end if not matches: return # Spoken order — sort by position so we fire turn_right BEFORE # move_forward when Gemini said "Turning right, then moving # forward." matches.sort(key=lambda m: m[0]) now = time.time() cooldown = float(self._stt.get("command_cooldown_sec", 1.5)) for _, cmd, label in matches: if cmd in self._fired_canons_this_turn: continue if (cmd == self._last_gemini_canon and now - self._last_gemini_dispatch_at < cooldown): continue self._fired_canons_this_turn.add(cmd) self._last_gemini_canon = cmd self._last_gemini_dispatch_at = now log.info( "dispatch (gemini-bot, %s): %s (heard: %r)", label, cmd, text[:80], ) _log_transcript("CMD-BOT", cmd) if self._on_command: try: self._on_command(cmd, "en") except Exception as e: log.error("on_command error: %s", e, exc_info=True) # ─── start / stop ───────────────────────────────────── def start(self): if self._running: log.warning("VoiceModule already running") return self._running = True self._thread = threading.Thread( target=self._voice_loop, daemon=True, name="voice", ) self._thread.start() log.info("Voice module started") def stop(self): self._running = False if self._thread: self._thread.join(timeout=5) self._thread = None log.info("Voice module stopped") @property def is_running(self) -> bool: """True while the voice loop thread is alive.""" t = self._thread return bool(self._running and t is not None and t.is_alive()) @property def is_speaking(self) -> bool: """Delegates to AudioAPI — True while TtsMaker is playing.""" try: return bool(self._audio.is_speaking) except Exception: return False