210 lines
7.6 KiB
Python
210 lines
7.6 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Voice/marcus_voice.py — Marcus Wake-Signal Module (no ML, no STT).
|
|
|
|
This is a deliberately-minimal voice subsystem:
|
|
|
|
- A custom energy-based wake detector (Voice/wake_detector.py) listens
|
|
to the G1's on-board mic continuously.
|
|
- When the user says any short word (~0.2-1.5 s of speech followed by
|
|
silence), wake fires.
|
|
- The robot acknowledges via TTS ("Yes" — configurable).
|
|
- The user then types their command at the Marcus terminal prompt.
|
|
|
|
No Vosk, no Whisper, no torch, no network. Pure numpy DSP.
|
|
|
|
Why not STT here:
|
|
Both Vosk's small English model ("sanad" absent from lexicon) and
|
|
openai-whisper ("!!!!!" numerical garbage on this Jetson's torch-aarch64)
|
|
proved unreliable for this hardware. Rather than fight either, the
|
|
wake path becomes a simple "did the user say something?" signal.
|
|
|
|
Interface with Marcus brain:
|
|
VoiceModule(audio_api, on_wake=callback)
|
|
on_wake() is called when wake fires. Brain can display a prompt
|
|
or do anything else.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import os
|
|
import sys
|
|
import threading
|
|
import time
|
|
from logging.handlers import RotatingFileHandler
|
|
from typing import Callable, Optional
|
|
|
|
# ─── PATH + CONFIG ───────────────────────────────────────
|
|
_PROJECT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
|
if _PROJECT_DIR not in sys.path:
|
|
sys.path.insert(0, _PROJECT_DIR)
|
|
from Core.env_loader import PROJECT_ROOT
|
|
from Core.config_loader import load_config
|
|
|
|
LOG_DIR = os.path.join(PROJECT_ROOT, "logs")
|
|
os.makedirs(LOG_DIR, exist_ok=True)
|
|
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format="%(asctime)s [%(name)s] %(levelname)s: %(message)s",
|
|
handlers=[
|
|
RotatingFileHandler(
|
|
os.path.join(LOG_DIR, "voice.log"),
|
|
maxBytes=5_000_000, backupCount=3, encoding="utf-8",
|
|
),
|
|
],
|
|
)
|
|
log = logging.getLogger("marcus_voice")
|
|
|
|
|
|
class VoiceModule:
|
|
"""Wake-only voice subsystem — fires a callback when speech is detected."""
|
|
|
|
def __init__(self, audio_api, on_command: Optional[Callable] = None,
|
|
on_wake: Optional[Callable] = None):
|
|
"""
|
|
Args:
|
|
audio_api: AudioAPI instance (for TTS ack).
|
|
on_command: kept for API compatibility; always called with
|
|
text="" because there's no STT. Brain should
|
|
prompt the user to type.
|
|
on_wake: alternative callback fired when wake detected.
|
|
Exactly one of on_command / on_wake is used.
|
|
"""
|
|
self._audio = audio_api
|
|
self._on_command = on_command
|
|
self._on_wake = on_wake
|
|
self._config = load_config("Voice")
|
|
|
|
self._stt = self._config.get("stt", {})
|
|
self._messages = self._config.get("messages", {})
|
|
|
|
# Wake-detector parameters (tweakable via config_Voice.json::stt).
|
|
from Voice.wake_detector import WakeDetector, WakeConfig
|
|
wcfg = WakeConfig(
|
|
sample_rate = 16_000,
|
|
speech_threshold = float(self._stt.get("speech_threshold", 150.0)),
|
|
min_word_duration_s= float(self._stt.get("min_word_duration", 0.20)),
|
|
max_word_duration_s= float(self._stt.get("max_word_duration", 1.50)),
|
|
post_silence_s = float(self._stt.get("post_silence", 0.30)),
|
|
cooldown_s = float(self._stt.get("wake_cooldown", 1.50)),
|
|
chunk_ms = int( self._stt.get("wake_chunk_ms", 50)),
|
|
)
|
|
self._detector = WakeDetector(wcfg)
|
|
|
|
# G1 built-in mic (UDP multicast).
|
|
from Voice.builtin_mic import BuiltinMic
|
|
_mcfg = self._config.get("mic_udp", {})
|
|
self._mic_capture = BuiltinMic(
|
|
group = _mcfg.get("group", "239.168.123.161"),
|
|
port = _mcfg.get("port", 5555),
|
|
buf_max = _mcfg.get("buffer_max_bytes", 64000),
|
|
)
|
|
|
|
self._running = False
|
|
self._thread = None
|
|
|
|
log.info(
|
|
"VoiceModule initialized (custom wake detector, "
|
|
"speech_threshold=%s, min/max_word=%s/%s s)",
|
|
wcfg.speech_threshold, wcfg.min_word_duration_s, wcfg.max_word_duration_s,
|
|
)
|
|
|
|
# ─── main loop ────────────────────────────────────────
|
|
|
|
def _voice_loop(self):
|
|
self._mic_capture.start()
|
|
log.info("Voice loop started — listening for wake (energy-based, no ML)")
|
|
while self._running:
|
|
try:
|
|
# Don't listen while the robot is speaking (prevents
|
|
# self-trigger from TTS output leaking into the mic).
|
|
if self._audio.is_speaking:
|
|
time.sleep(0.1)
|
|
self._detector.reset()
|
|
continue
|
|
|
|
chunk = self._mic_capture.read_chunk(1024) # ~32 ms at 16 kHz
|
|
if not chunk:
|
|
continue
|
|
|
|
if self._detector.process(chunk):
|
|
self._on_wake_fired()
|
|
except Exception as e:
|
|
log.error("Voice loop error: %s", e, exc_info=True)
|
|
time.sleep(1)
|
|
|
|
def _on_wake_fired(self):
|
|
log.info("Wake detected (acoustic)")
|
|
print("\n [Sanad] wake heard — type your command at the prompt.")
|
|
# TTS ack
|
|
msg = self._messages.get("wake_heard", "Yes")
|
|
try:
|
|
self._audio.speak(msg)
|
|
except Exception as e:
|
|
log.warning("TTS ack failed: %s", e)
|
|
|
|
# Brain callbacks for compatibility with the old interface.
|
|
if self._on_wake:
|
|
try:
|
|
self._on_wake()
|
|
except Exception as e:
|
|
log.error("on_wake callback error: %s", e)
|
|
elif self._on_command:
|
|
# Old API expected (text, lang). We have no transcription, so
|
|
# pass empty text — brain is expected to prompt for typed input.
|
|
try:
|
|
self._on_command("", "en")
|
|
except Exception as e:
|
|
log.error("on_command callback error: %s", e)
|
|
|
|
# ─── start / stop ─────────────────────────────────────
|
|
|
|
def start(self):
|
|
if self._running:
|
|
log.warning("VoiceModule already running")
|
|
return
|
|
self._running = True
|
|
self._thread = threading.Thread(
|
|
target=self._voice_loop, daemon=True, name="voice",
|
|
)
|
|
self._thread.start()
|
|
log.info("Voice module started")
|
|
|
|
def stop(self):
|
|
self._running = False
|
|
try:
|
|
self._mic_capture.stop()
|
|
except Exception:
|
|
pass
|
|
if self._thread:
|
|
self._thread.join(timeout=5)
|
|
self._thread = None
|
|
log.info("Voice module stopped")
|
|
|
|
@property
|
|
def is_running(self) -> bool:
|
|
return self._running
|
|
|
|
|
|
# ─── standalone test ─────────────────────────────────────
|
|
if __name__ == "__main__":
|
|
from API.audio_api import AudioAPI
|
|
|
|
def on_wake():
|
|
print(" (brain callback fired)")
|
|
|
|
audio = AudioAPI()
|
|
voice = VoiceModule(audio, on_wake=on_wake)
|
|
print("Starting voice module... say any short word to test the wake.")
|
|
print("Press Ctrl-C to stop.\n")
|
|
voice.start()
|
|
try:
|
|
while voice.is_running:
|
|
time.sleep(0.5)
|
|
except KeyboardInterrupt:
|
|
print("\nStopping...")
|
|
voice.stop()
|
|
print("Done.")
|