Sanad/voice/live_voice_loop.py

213 lines
8.5 KiB
Python

"""LiveVoiceLoop — voice-to-arm phrase trigger dispatcher.
Listens to user transcriptions from the LiveGeminiSubprocess and, when a
configured wake phrase is matched, fires the corresponding arm action via
`motion.sanad_arm_controller.ARM`.
Mode toggle ("DEFERRED TRIGGER"):
- fire_on_wake_match=True fires the arm instantly on phrase match
(fast, no coordination with AI speech)
- fire_on_wake_match=False marks a pending action that fires when the
AI starts/finishes its reply (visually nicer
— robot answers, then moves)
This is Option-D integration: parallel to skill_registry, uses the full
gemini_interact phrase dictionary (sanad_arm.txt, 29 arm IDs, hundreds of
Arabic phrase variants).
"""
from __future__ import annotations
import threading
import time
from collections import deque
from datetime import datetime
from types import SimpleNamespace
from typing import Any
from Project.Sanad.config import SCRIPTS_DIR, BASE_DIR
from Project.Sanad.core.config_loader import section as _cfg_section
from Project.Sanad.core.logger import get_logger
from Project.Sanad.voice.text_utils import (
load_arm_phrase_dispatch,
maybe_trigger_arm,
)
log = get_logger("live_voice_loop")
_LV_CFG = _cfg_section("voice", "live_voice_loop")
# Filename from core.script_files (single source) — resolved under SCRIPTS_DIR
_SCRIPTS = _cfg_section("core", "script_files")
_ARM_TXT_NAME = _SCRIPTS.get("arm_phrases", "sanad_arm.txt")
SANAD_ARM_TXT = SCRIPTS_DIR / _ARM_TXT_NAME
TRIGGER_LOG_SIZE = _LV_CFG.get("trigger_log_size", 100)
POLL_INTERVAL_SEC = _LV_CFG.get("poll_interval_sec", 0.1)
DEFERRED_DEFAULT = _LV_CFG.get("deferred_default", False)
class LiveVoiceLoop:
"""Polls LiveGeminiSubprocess transcripts → fires arm actions."""
def __init__(self, voice_client, arm, wake_mgr, audio_mgr):
self.voice_client = voice_client
self.arm = arm # Sanad's motion/arm_controller (not used for trigger)
self.wake_mgr = wake_mgr
self.audio_mgr = audio_mgr
self._running = False
self._poll_thread: threading.Thread | None = None
self._stop_event = threading.Event()
# Deferred-trigger toggle (fire on phrase match vs fire after AI responds)
self.deferred_mode = DEFERRED_DEFAULT
# Trigger history (dashboard log)
self.triggers: deque[dict[str, Any]] = deque(maxlen=TRIGGER_LOG_SIZE)
self.last_heard: str = ""
self.last_action: str = ""
# ASR dispatch state (SimpleNamespace — maybe_trigger_arm mutates attrs)
self.state = SimpleNamespace()
# Load sanad_arm.txt on first construction
self.wake_dispatch: dict[int, set[str]] = {}
self.option_by_id: dict[int, Any] = {}
self.sanad_arm: Any = None
self._load_dispatch()
# Snapshot of already-processed transcript lines so we don't re-fire
self._seen_transcripts: set[str] = set()
# ── phrase dispatch loader ────────────────────────────────────
def _load_dispatch(self):
try:
from Project.Sanad.motion.sanad_arm_controller import ARM, OPTION_LIST, OPTION_BY_ID
self.sanad_arm = ARM
self.option_by_id = OPTION_BY_ID
if SANAD_ARM_TXT.exists():
self.wake_dispatch = load_arm_phrase_dispatch(SANAD_ARM_TXT, OPTION_LIST)
log.info("loaded %d arm-action phrase sets from %s",
len(self.wake_dispatch), SANAD_ARM_TXT.name)
else:
log.warning("sanad_arm.txt missing at %s — arm trigger disabled",
SANAD_ARM_TXT)
except Exception as exc:
log.warning("arm dispatch unavailable: %s", exc)
self.sanad_arm = None
self.wake_dispatch = {}
# ── lifecycle ────────────────────────────────────────────────
async def start(self) -> None:
if self._running:
return
self._stop_event.clear()
self._running = True
self._poll_thread = threading.Thread(
target=self._poll_loop, daemon=True, name="live_voice_loop")
self._poll_thread.start()
log.info("LiveVoiceLoop started (deferred=%s, dispatch=%d)",
self.deferred_mode, len(self.wake_dispatch))
async def stop(self) -> None:
self._stop_event.set()
self._running = False
log.info("LiveVoiceLoop stopped")
def set_deferred(self, enabled: bool) -> None:
self.deferred_mode = bool(enabled)
# ── poll loop ────────────────────────────────────────────────
def _poll_loop(self):
"""Poll LiveGeminiSubprocess.user_transcript for new user texts."""
while not self._stop_event.is_set():
self._check_transcripts()
self._stop_event.wait(POLL_INTERVAL_SEC)
def _check_transcripts(self):
try:
from Project.Sanad.main import live_sub
except Exception:
return
if live_sub is None:
return
# Pull recent transcripts
for text in list(live_sub.user_transcript):
if text in self._seen_transcripts:
continue
self._seen_transcripts.add(text)
self.last_heard = text
self._dispatch(text)
# Prune seen set when subprocess stops to free memory
if not live_sub.is_running() and len(self._seen_transcripts) > 500:
self._seen_transcripts.clear()
def _dispatch(self, transcript_text: str) -> None:
if not self.wake_dispatch or self.sanad_arm is None:
return
# Gate trigger on arm idle
if getattr(self.sanad_arm, "_is_busy", False):
return
fire_now = not self.deferred_mode
for action_id, phrases in self.wake_dispatch.items():
fn = self._make_trigger_fn(action_id)
fired = maybe_trigger_arm(
self.state, transcript_text, phrases,
fire_on_wake_match=fire_now,
arm_trigger_fn=fn,
)
if fired:
self._record_trigger(action_id, transcript_text, fire_now)
break
def _make_trigger_fn(self, action_id: int):
def _fire():
try:
self.sanad_arm.trigger_action_by_id(action_id)
except Exception as exc:
log.warning("arm trigger failed (id=%d): %s", action_id, exc)
return _fire
def _record_trigger(self, action_id: int, user_text: str, fired_now: bool):
opt = self.option_by_id.get(action_id)
action_name = opt.name if opt else f"id={action_id}"
self.last_action = action_name
self.triggers.append({
"time": datetime.now().strftime("%H:%M:%S"),
"user_text": user_text,
"action_id": action_id,
"action_name": action_name,
"mode": "instant" if fired_now else "deferred",
})
log.info("arm trigger %s (id=%d) for: %r [%s]",
action_name, action_id, user_text,
"instant" if fired_now else "deferred")
# ── status (dashboard) ───────────────────────────────────────
def status(self) -> dict[str, Any]:
pending = ""
pend_name = ""
if getattr(self.state, "_pending_arm_wave", False):
if self.state._pending_arm_trigger_fn is not None:
# We can't introspect the action id from fn (closure), but
# the last triggered line in self.triggers is likely the one.
pend_name = self.triggers[-1].get("action_name", "") if self.triggers else ""
pending = f"pending: {pend_name}"
return {
"running": self._running,
"deferred_mode": self.deferred_mode,
"last_heard": self.last_heard,
"pending_action": pending,
"last_action": self.last_action,
"audio_attached": self.audio_mgr is not None,
"arm_attached": self.sanad_arm is not None,
"gemini_connected": bool(
self.voice_client and self.voice_client.connected),
"dispatch_actions": len(self.wake_dispatch),
"triggers": list(self.triggers)[-30:],
}