Saqr/robot/robot_controller.py

273 lines
11 KiB
Python

"""G1 arm + audio + LowState DDS client owned by the bridge.
Announcements run on a dedicated worker thread. Each queue item is a tuple
``(text, category, key)``. The worker picks WAV playback via
``AudioClient.PlayStream`` when the clip exists under ``assets/audio/`` and
``tts.mode`` allows, otherwise falls back to ``TtsMaker`` with the adaptive
busy-factor backoff for 3104 ("device busy") errors.
"""
from __future__ import annotations
import collections
import datetime
import threading
import time
from typing import Deque, Optional, Tuple
from utils.config import load_config
_ROBOT = load_config("robot")
_TTS = _ROBOT["tts"]
_ARM = _ROBOT["arm"]
TTS_VOLUME = _TTS["volume"]
TTS_SECONDS_PER_CHAR = _TTS["seconds_per_char"]
TTS_MIN_SECONDS = _TTS["min_seconds"]
TTS_QUEUE_MAX = _TTS["queue_max"]
TTS_BUSY_FACTOR_MIN = _TTS["busy_factor"]["min"]
TTS_BUSY_FACTOR_MAX = _TTS["busy_factor"]["max"]
TTS_BUSY_FACTOR_UP = _TTS["busy_factor"]["up"]
TTS_BUSY_FACTOR_DOWN = _TTS["busy_factor"]["down"]
TTS_MODE = _TTS.get("mode", "tts_only") # tts_only | recorded_or_tts | recorded_only
REJECT_ACTION = _ARM["reject_action"]
RELEASE_ACTION = _ARM["release_action"]
QueueItem = Tuple[str, Optional[str], Optional[str]] # (text, category, key)
def _ts() -> str:
return datetime.datetime.now().strftime("%H:%M:%S.%f")[:-3]
class RobotController:
"""Owns both the G1 arm action client and the G1 audio (TTS + PlayStream) client."""
def __init__(self, iface: Optional[str], timeout: float, dry_run: bool,
tts_speaker_id: int, want_lowstate: bool = True):
self.dry_run = dry_run
self.tts_speaker_id = tts_speaker_id
self.arm_client = None
self.audio_client = None
self._action_map = None
self.hub = None
self._lowstate_sub = None
self._player = None # AudioPlayer, lazily initialised
self._tts_queue: Deque[QueueItem] = collections.deque(maxlen=TTS_QUEUE_MAX)
self._tts_event = threading.Event()
self._tts_worker_stop = threading.Event()
self._tts_worker_thread: Optional[threading.Thread] = None
self._tts_busy_factor: float = TTS_BUSY_FACTOR_MIN
self._tts_last_call_t: float = 0.0
self._tts_call_count: int = 0
self._tts_busy_count: int = 0
if dry_run:
print(f"[BRIDGE {_ts()}] DRY RUN — G1 SDK will not be loaded.", flush=True)
return
from unitree_sdk2py.core.channel import ChannelFactoryInitialize
from unitree_sdk2py.g1.arm.g1_arm_action_client import (
G1ArmActionClient,
action_map,
)
from unitree_sdk2py.g1.audio.g1_audio_client import AudioClient
self._action_map = action_map
if iface:
ChannelFactoryInitialize(0, iface)
else:
ChannelFactoryInitialize(0)
self.arm_client = G1ArmActionClient()
self.arm_client.SetTimeout(timeout)
self.arm_client.Init()
print(f"[BRIDGE {_ts()}] G1ArmActionClient ready (iface={iface or 'default'})",
flush=True)
self.audio_client = AudioClient()
self.audio_client.SetTimeout(timeout)
self.audio_client.Init()
try:
self.audio_client.SetVolume(TTS_VOLUME)
except Exception as e:
print(f"[BRIDGE {_ts()}][WARN] AudioClient.SetVolume failed: {e}", flush=True)
print(f"[BRIDGE {_ts()}] G1 AudioClient ready (speaker_id={tts_speaker_id}, "
f"tts_mode={TTS_MODE})", flush=True)
# Pre-recorded clip library (WAVs under assets/audio/).
from robot.audio_player import AudioPlayer
self._player = AudioPlayer(self.audio_client)
self._tts_worker_thread = threading.Thread(
target=self._tts_worker_loop, name="TtsWorker", daemon=True,
)
self._tts_worker_thread.start()
if want_lowstate:
try:
from unitree_sdk2py.core.channel import ChannelSubscriber
from unitree_sdk2py.idl.unitree_hg.msg.dds_ import LowState_
from robot.controller import LowStateHub
self.hub = LowStateHub(watchdog_timeout=0.25)
self._lowstate_sub = ChannelSubscriber("rt/lowstate", LowState_)
self._lowstate_sub.Init(self.hub.handler, 10)
print(f"[BRIDGE {_ts()}] Subscribed to rt/lowstate (wireless remote)",
flush=True)
except Exception as e:
print(f"[BRIDGE {_ts()}][WARN] LowState subscribe failed: {e}", flush=True)
print(f"[BRIDGE {_ts()}][WARN] Trigger keys (R2+X / R2+Y) will not work.",
flush=True)
self.hub = None
# ── Public API ──────────────────────────────────────────────────────────
def speak(self, text: str,
category: Optional[str] = None, key: Optional[str] = None):
"""Non-blocking — enqueue an announcement for the worker thread.
If ``category`` + ``key`` are provided and a matching WAV exists under
``assets/audio/<category>/<key>.wav``, the worker plays the WAV via
PlayStream (no firmware TTS latency). Otherwise falls back per
``tts.mode`` setting.
"""
if self.dry_run:
print(f"[BRIDGE {_ts()}] (dry) would speak({text!r}, category={category!r}, "
f"key={key!r})", flush=True)
return
if self.audio_client is None:
return
item: QueueItem = (text, category, key)
# Drop adjacent duplicates (same text + routing).
if self._tts_queue and self._tts_queue[-1] == item:
return
self._tts_queue.append(item)
self._tts_event.set()
def shutdown_tts(self):
self._tts_worker_stop.set()
self._tts_event.set()
if self._tts_worker_thread is not None:
self._tts_worker_thread.join(timeout=1.0)
# ── Worker thread ───────────────────────────────────────────────────────
def _tts_worker_loop(self):
while not self._tts_worker_stop.is_set():
if not self._tts_queue:
self._tts_event.wait(timeout=0.2)
self._tts_event.clear()
continue
try:
item = self._tts_queue.popleft()
except IndexError:
continue
self._dispatch(*item)
def _dispatch(self, text: str, category: Optional[str], key: Optional[str]):
"""Route one queue item to PlayStream or TtsMaker per ``tts.mode``."""
wants_clip = (
TTS_MODE in ("recorded_or_tts", "recorded_only")
and category is not None and key is not None
and self._player is not None
and self._player.has(category, key)
)
if wants_clip:
print(f"[BRIDGE {_ts()}] play -> {category}/{key!r} "
f"(text={text!r})", flush=True)
call_t0 = time.monotonic()
ok = self._player.play(category, key)
dt = time.monotonic() - call_t0
if ok:
print(f"[BRIDGE {_ts()}] play done ({dt*1000:.0f} ms)", flush=True)
return
# Play failed. Decide by mode whether to fall back to TtsMaker.
if TTS_MODE == "recorded_only":
print(f"[BRIDGE {_ts()}][WARN] play failed and tts.mode=recorded_only "
f"— dropping phrase silently", flush=True)
return
print(f"[BRIDGE {_ts()}][WARN] play failed; falling back to TtsMaker",
flush=True)
# fall through to TtsMaker
if TTS_MODE == "recorded_only":
# No clip exists for this phrase and user opted out of TtsMaker.
print(f"[BRIDGE {_ts()}] skip (recorded_only, no clip for "
f"{category}/{key!r}): {text!r}", flush=True)
return
self._speak_blocking(text)
# ── TtsMaker path (fallback + legacy) ───────────────────────────────────
def _estimate_tts_seconds(self, text: str) -> float:
base = max(TTS_MIN_SECONDS, len(text) * TTS_SECONDS_PER_CHAR)
return base * self._tts_busy_factor
def _speak_blocking(self, text: str):
if self.audio_client is None:
return
now = time.monotonic()
gap_since_last = (now - self._tts_last_call_t) if self._tts_last_call_t else -1.0
est = self._estimate_tts_seconds(text)
qsize = len(self._tts_queue)
self._tts_call_count += 1
gap_str = f"{gap_since_last:5.2f}s" if gap_since_last >= 0 else " n/a"
print(
f"[BRIDGE {_ts()}] tts -> {text!r} "
f"(est={est:.2f}s, gap={gap_str}, busy_x={self._tts_busy_factor:.2f}, "
f"q={qsize})",
flush=True,
)
call_t0 = time.monotonic()
try:
code = self.audio_client.TtsMaker(text, self.tts_speaker_id)
except Exception as e:
print(f"[BRIDGE {_ts()}][ERR] TtsMaker raised: {e}", flush=True)
return
call_dt = time.monotonic() - call_t0
if code != 0:
self._tts_busy_count += 1
self._tts_busy_factor = min(
TTS_BUSY_FACTOR_MAX, self._tts_busy_factor * TTS_BUSY_FACTOR_UP
)
print(
f"[BRIDGE {_ts()}][WARN] TtsMaker rc={code} "
f"(call took {call_dt*1000:.0f}ms; busy_x -> "
f"{self._tts_busy_factor:.2f})",
flush=True,
)
else:
self._tts_busy_factor = max(
TTS_BUSY_FACTOR_MIN, self._tts_busy_factor * TTS_BUSY_FACTOR_DOWN
)
self._tts_last_call_t = time.monotonic()
remaining = est - call_dt
if remaining > 0:
time.sleep(remaining)
# ── Arm ─────────────────────────────────────────────────────────────────
def reject(self, release_after: float):
if self.dry_run:
print(f"[BRIDGE {_ts()}] (dry) would run '{REJECT_ACTION}' "
f"then release after {release_after:.1f}s", flush=True)
return
if self.arm_client is None or self._action_map is None:
return
if REJECT_ACTION not in self._action_map:
print(f"[BRIDGE {_ts()}][ERR] '{REJECT_ACTION}' not in SDK action_map",
flush=True)
return
print(f"[BRIDGE {_ts()}] -> {REJECT_ACTION}", flush=True)
self.arm_client.ExecuteAction(self._action_map[REJECT_ACTION])
if release_after > 0:
time.sleep(release_after)
print(f"[BRIDGE {_ts()}] -> {RELEASE_ACTION}", flush=True)
self.arm_client.ExecuteAction(self._action_map[RELEASE_ACTION])