#!/usr/bin/env python3 """ Voice/marcus_voice.py — Marcus Wake-Signal Module (no ML, no STT). This is a deliberately-minimal voice subsystem: - A custom energy-based wake detector (Voice/wake_detector.py) listens to the G1's on-board mic continuously. - When the user says any short word (~0.2-1.5 s of speech followed by silence), wake fires. - The robot acknowledges via TTS ("Yes" — configurable). - The user then types their command at the Marcus terminal prompt. No Vosk, no Whisper, no torch, no network. Pure numpy DSP. Why not STT here: Both Vosk's small English model ("sanad" absent from lexicon) and openai-whisper ("!!!!!" numerical garbage on this Jetson's torch-aarch64) proved unreliable for this hardware. Rather than fight either, the wake path becomes a simple "did the user say something?" signal. Interface with Marcus brain: VoiceModule(audio_api, on_wake=callback) on_wake() is called when wake fires. Brain can display a prompt or do anything else. """ from __future__ import annotations import logging import os import sys import threading import time from logging.handlers import RotatingFileHandler from typing import Callable, Optional # ─── PATH + CONFIG ─────────────────────────────────────── _PROJECT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) if _PROJECT_DIR not in sys.path: sys.path.insert(0, _PROJECT_DIR) from Core.env_loader import PROJECT_ROOT from Core.config_loader import load_config LOG_DIR = os.path.join(PROJECT_ROOT, "logs") os.makedirs(LOG_DIR, exist_ok=True) logging.basicConfig( level=logging.INFO, format="%(asctime)s [%(name)s] %(levelname)s: %(message)s", handlers=[ RotatingFileHandler( os.path.join(LOG_DIR, "voice.log"), maxBytes=5_000_000, backupCount=3, encoding="utf-8", ), ], ) log = logging.getLogger("marcus_voice") class VoiceModule: """Wake-only voice subsystem — fires a callback when speech is detected.""" def __init__(self, audio_api, on_command: Optional[Callable] = None, on_wake: Optional[Callable] = None): """ Args: audio_api: AudioAPI instance (for TTS ack). on_command: kept for API compatibility; always called with text="" because there's no STT. Brain should prompt the user to type. on_wake: alternative callback fired when wake detected. Exactly one of on_command / on_wake is used. """ self._audio = audio_api self._on_command = on_command self._on_wake = on_wake self._config = load_config("Voice") self._stt = self._config.get("stt", {}) self._messages = self._config.get("messages", {}) # Wake-detector parameters (tweakable via config_Voice.json::stt). from Voice.wake_detector import WakeDetector, WakeConfig wcfg = WakeConfig( sample_rate = 16_000, speech_threshold = float(self._stt.get("speech_threshold", 150.0)), min_word_duration_s= float(self._stt.get("min_word_duration", 0.20)), max_word_duration_s= float(self._stt.get("max_word_duration", 1.50)), post_silence_s = float(self._stt.get("post_silence", 0.30)), cooldown_s = float(self._stt.get("wake_cooldown", 1.50)), chunk_ms = int( self._stt.get("wake_chunk_ms", 50)), ) self._detector = WakeDetector(wcfg) # G1 built-in mic (UDP multicast). from Voice.builtin_mic import BuiltinMic _mcfg = self._config.get("mic_udp", {}) self._mic_capture = BuiltinMic( group = _mcfg.get("group", "239.168.123.161"), port = _mcfg.get("port", 5555), buf_max = _mcfg.get("buffer_max_bytes", 64000), ) self._running = False self._thread = None log.info( "VoiceModule initialized (custom wake detector, " "speech_threshold=%s, min/max_word=%s/%s s)", wcfg.speech_threshold, wcfg.min_word_duration_s, wcfg.max_word_duration_s, ) # ─── main loop ──────────────────────────────────────── def _voice_loop(self): self._mic_capture.start() log.info("Voice loop started — listening for wake (energy-based, no ML)") while self._running: try: # Don't listen while the robot is speaking (prevents # self-trigger from TTS output leaking into the mic). if self._audio.is_speaking: time.sleep(0.1) self._detector.reset() continue chunk = self._mic_capture.read_chunk(1024) # ~32 ms at 16 kHz if not chunk: continue if self._detector.process(chunk): self._on_wake_fired() except Exception as e: log.error("Voice loop error: %s", e, exc_info=True) time.sleep(1) def _on_wake_fired(self): log.info("Wake detected (acoustic)") print("\n [Sanad] wake heard — type your command at the prompt.") # TTS ack msg = self._messages.get("wake_heard", "Yes") try: self._audio.speak(msg) except Exception as e: log.warning("TTS ack failed: %s", e) # Brain callbacks for compatibility with the old interface. if self._on_wake: try: self._on_wake() except Exception as e: log.error("on_wake callback error: %s", e) elif self._on_command: # Old API expected (text, lang). We have no transcription, so # pass empty text — brain is expected to prompt for typed input. try: self._on_command("", "en") except Exception as e: log.error("on_command callback error: %s", e) # ─── start / stop ───────────────────────────────────── def start(self): if self._running: log.warning("VoiceModule already running") return self._running = True self._thread = threading.Thread( target=self._voice_loop, daemon=True, name="voice", ) self._thread.start() log.info("Voice module started") def stop(self): self._running = False try: self._mic_capture.stop() except Exception: pass if self._thread: self._thread.join(timeout=5) self._thread = None log.info("Voice module stopped") @property def is_running(self) -> bool: return self._running # ─── standalone test ───────────────────────────────────── if __name__ == "__main__": from API.audio_api import AudioAPI def on_wake(): print(" (brain callback fired)") audio = AudioAPI() voice = VoiceModule(audio, on_wake=on_wake) print("Starting voice module... say any short word to test the wake.") print("Press Ctrl-C to stop.\n") voice.start() try: while voice.is_running: time.sleep(0.5) except KeyboardInterrupt: print("\nStopping...") voice.stop() print("Done.")