273 lines
11 KiB
Python
273 lines
11 KiB
Python
"""G1 arm + audio + LowState DDS client owned by the bridge.
|
|
|
|
Announcements run on a dedicated worker thread. Each queue item is a tuple
|
|
``(text, category, key)``. The worker picks WAV playback via
|
|
``AudioClient.PlayStream`` when the clip exists under ``assets/audio/`` and
|
|
``tts.mode`` allows, otherwise falls back to ``TtsMaker`` with the adaptive
|
|
busy-factor backoff for 3104 ("device busy") errors.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import collections
|
|
import datetime
|
|
import threading
|
|
import time
|
|
from typing import Deque, Optional, Tuple
|
|
|
|
from utils.config import load_config
|
|
|
|
_ROBOT = load_config("robot")
|
|
_TTS = _ROBOT["tts"]
|
|
_ARM = _ROBOT["arm"]
|
|
|
|
TTS_VOLUME = _TTS["volume"]
|
|
TTS_SECONDS_PER_CHAR = _TTS["seconds_per_char"]
|
|
TTS_MIN_SECONDS = _TTS["min_seconds"]
|
|
TTS_QUEUE_MAX = _TTS["queue_max"]
|
|
TTS_BUSY_FACTOR_MIN = _TTS["busy_factor"]["min"]
|
|
TTS_BUSY_FACTOR_MAX = _TTS["busy_factor"]["max"]
|
|
TTS_BUSY_FACTOR_UP = _TTS["busy_factor"]["up"]
|
|
TTS_BUSY_FACTOR_DOWN = _TTS["busy_factor"]["down"]
|
|
TTS_MODE = _TTS.get("mode", "tts_only") # tts_only | recorded_or_tts | recorded_only
|
|
|
|
REJECT_ACTION = _ARM["reject_action"]
|
|
RELEASE_ACTION = _ARM["release_action"]
|
|
|
|
QueueItem = Tuple[str, Optional[str], Optional[str]] # (text, category, key)
|
|
|
|
|
|
def _ts() -> str:
|
|
return datetime.datetime.now().strftime("%H:%M:%S.%f")[:-3]
|
|
|
|
|
|
class RobotController:
|
|
"""Owns both the G1 arm action client and the G1 audio (TTS + PlayStream) client."""
|
|
|
|
def __init__(self, iface: Optional[str], timeout: float, dry_run: bool,
|
|
tts_speaker_id: int, want_lowstate: bool = True):
|
|
self.dry_run = dry_run
|
|
self.tts_speaker_id = tts_speaker_id
|
|
self.arm_client = None
|
|
self.audio_client = None
|
|
self._action_map = None
|
|
self.hub = None
|
|
self._lowstate_sub = None
|
|
self._player = None # AudioPlayer, lazily initialised
|
|
|
|
self._tts_queue: Deque[QueueItem] = collections.deque(maxlen=TTS_QUEUE_MAX)
|
|
self._tts_event = threading.Event()
|
|
self._tts_worker_stop = threading.Event()
|
|
self._tts_worker_thread: Optional[threading.Thread] = None
|
|
self._tts_busy_factor: float = TTS_BUSY_FACTOR_MIN
|
|
self._tts_last_call_t: float = 0.0
|
|
self._tts_call_count: int = 0
|
|
self._tts_busy_count: int = 0
|
|
|
|
if dry_run:
|
|
print(f"[BRIDGE {_ts()}] DRY RUN — G1 SDK will not be loaded.", flush=True)
|
|
return
|
|
|
|
from unitree_sdk2py.core.channel import ChannelFactoryInitialize
|
|
from unitree_sdk2py.g1.arm.g1_arm_action_client import (
|
|
G1ArmActionClient,
|
|
action_map,
|
|
)
|
|
from unitree_sdk2py.g1.audio.g1_audio_client import AudioClient
|
|
|
|
self._action_map = action_map
|
|
|
|
if iface:
|
|
ChannelFactoryInitialize(0, iface)
|
|
else:
|
|
ChannelFactoryInitialize(0)
|
|
|
|
self.arm_client = G1ArmActionClient()
|
|
self.arm_client.SetTimeout(timeout)
|
|
self.arm_client.Init()
|
|
print(f"[BRIDGE {_ts()}] G1ArmActionClient ready (iface={iface or 'default'})",
|
|
flush=True)
|
|
|
|
self.audio_client = AudioClient()
|
|
self.audio_client.SetTimeout(timeout)
|
|
self.audio_client.Init()
|
|
try:
|
|
self.audio_client.SetVolume(TTS_VOLUME)
|
|
except Exception as e:
|
|
print(f"[BRIDGE {_ts()}][WARN] AudioClient.SetVolume failed: {e}", flush=True)
|
|
print(f"[BRIDGE {_ts()}] G1 AudioClient ready (speaker_id={tts_speaker_id}, "
|
|
f"tts_mode={TTS_MODE})", flush=True)
|
|
|
|
# Pre-recorded clip library (WAVs under assets/audio/).
|
|
from robot.audio_player import AudioPlayer
|
|
self._player = AudioPlayer(self.audio_client)
|
|
|
|
self._tts_worker_thread = threading.Thread(
|
|
target=self._tts_worker_loop, name="TtsWorker", daemon=True,
|
|
)
|
|
self._tts_worker_thread.start()
|
|
|
|
if want_lowstate:
|
|
try:
|
|
from unitree_sdk2py.core.channel import ChannelSubscriber
|
|
from unitree_sdk2py.idl.unitree_hg.msg.dds_ import LowState_
|
|
from robot.controller import LowStateHub
|
|
|
|
self.hub = LowStateHub(watchdog_timeout=0.25)
|
|
self._lowstate_sub = ChannelSubscriber("rt/lowstate", LowState_)
|
|
self._lowstate_sub.Init(self.hub.handler, 10)
|
|
print(f"[BRIDGE {_ts()}] Subscribed to rt/lowstate (wireless remote)",
|
|
flush=True)
|
|
except Exception as e:
|
|
print(f"[BRIDGE {_ts()}][WARN] LowState subscribe failed: {e}", flush=True)
|
|
print(f"[BRIDGE {_ts()}][WARN] Trigger keys (R2+X / R2+Y) will not work.",
|
|
flush=True)
|
|
self.hub = None
|
|
|
|
# ── Public API ──────────────────────────────────────────────────────────
|
|
def speak(self, text: str,
|
|
category: Optional[str] = None, key: Optional[str] = None):
|
|
"""Non-blocking — enqueue an announcement for the worker thread.
|
|
|
|
If ``category`` + ``key`` are provided and a matching WAV exists under
|
|
``assets/audio/<category>/<key>.wav``, the worker plays the WAV via
|
|
PlayStream (no firmware TTS latency). Otherwise falls back per
|
|
``tts.mode`` setting.
|
|
"""
|
|
if self.dry_run:
|
|
print(f"[BRIDGE {_ts()}] (dry) would speak({text!r}, category={category!r}, "
|
|
f"key={key!r})", flush=True)
|
|
return
|
|
if self.audio_client is None:
|
|
return
|
|
item: QueueItem = (text, category, key)
|
|
# Drop adjacent duplicates (same text + routing).
|
|
if self._tts_queue and self._tts_queue[-1] == item:
|
|
return
|
|
self._tts_queue.append(item)
|
|
self._tts_event.set()
|
|
|
|
def shutdown_tts(self):
|
|
self._tts_worker_stop.set()
|
|
self._tts_event.set()
|
|
if self._tts_worker_thread is not None:
|
|
self._tts_worker_thread.join(timeout=1.0)
|
|
|
|
# ── Worker thread ───────────────────────────────────────────────────────
|
|
def _tts_worker_loop(self):
|
|
while not self._tts_worker_stop.is_set():
|
|
if not self._tts_queue:
|
|
self._tts_event.wait(timeout=0.2)
|
|
self._tts_event.clear()
|
|
continue
|
|
try:
|
|
item = self._tts_queue.popleft()
|
|
except IndexError:
|
|
continue
|
|
self._dispatch(*item)
|
|
|
|
def _dispatch(self, text: str, category: Optional[str], key: Optional[str]):
|
|
"""Route one queue item to PlayStream or TtsMaker per ``tts.mode``."""
|
|
wants_clip = (
|
|
TTS_MODE in ("recorded_or_tts", "recorded_only")
|
|
and category is not None and key is not None
|
|
and self._player is not None
|
|
and self._player.has(category, key)
|
|
)
|
|
if wants_clip:
|
|
print(f"[BRIDGE {_ts()}] play -> {category}/{key!r} "
|
|
f"(text={text!r})", flush=True)
|
|
call_t0 = time.monotonic()
|
|
ok = self._player.play(category, key)
|
|
dt = time.monotonic() - call_t0
|
|
if ok:
|
|
print(f"[BRIDGE {_ts()}] play done ({dt*1000:.0f} ms)", flush=True)
|
|
return
|
|
# Play failed. Decide by mode whether to fall back to TtsMaker.
|
|
if TTS_MODE == "recorded_only":
|
|
print(f"[BRIDGE {_ts()}][WARN] play failed and tts.mode=recorded_only "
|
|
f"— dropping phrase silently", flush=True)
|
|
return
|
|
print(f"[BRIDGE {_ts()}][WARN] play failed; falling back to TtsMaker",
|
|
flush=True)
|
|
# fall through to TtsMaker
|
|
|
|
if TTS_MODE == "recorded_only":
|
|
# No clip exists for this phrase and user opted out of TtsMaker.
|
|
print(f"[BRIDGE {_ts()}] skip (recorded_only, no clip for "
|
|
f"{category}/{key!r}): {text!r}", flush=True)
|
|
return
|
|
|
|
self._speak_blocking(text)
|
|
|
|
# ── TtsMaker path (fallback + legacy) ───────────────────────────────────
|
|
def _estimate_tts_seconds(self, text: str) -> float:
|
|
base = max(TTS_MIN_SECONDS, len(text) * TTS_SECONDS_PER_CHAR)
|
|
return base * self._tts_busy_factor
|
|
|
|
def _speak_blocking(self, text: str):
|
|
if self.audio_client is None:
|
|
return
|
|
|
|
now = time.monotonic()
|
|
gap_since_last = (now - self._tts_last_call_t) if self._tts_last_call_t else -1.0
|
|
est = self._estimate_tts_seconds(text)
|
|
qsize = len(self._tts_queue)
|
|
self._tts_call_count += 1
|
|
|
|
gap_str = f"{gap_since_last:5.2f}s" if gap_since_last >= 0 else " n/a"
|
|
print(
|
|
f"[BRIDGE {_ts()}] tts -> {text!r} "
|
|
f"(est={est:.2f}s, gap={gap_str}, busy_x={self._tts_busy_factor:.2f}, "
|
|
f"q={qsize})",
|
|
flush=True,
|
|
)
|
|
|
|
call_t0 = time.monotonic()
|
|
try:
|
|
code = self.audio_client.TtsMaker(text, self.tts_speaker_id)
|
|
except Exception as e:
|
|
print(f"[BRIDGE {_ts()}][ERR] TtsMaker raised: {e}", flush=True)
|
|
return
|
|
call_dt = time.monotonic() - call_t0
|
|
|
|
if code != 0:
|
|
self._tts_busy_count += 1
|
|
self._tts_busy_factor = min(
|
|
TTS_BUSY_FACTOR_MAX, self._tts_busy_factor * TTS_BUSY_FACTOR_UP
|
|
)
|
|
print(
|
|
f"[BRIDGE {_ts()}][WARN] TtsMaker rc={code} "
|
|
f"(call took {call_dt*1000:.0f}ms; busy_x -> "
|
|
f"{self._tts_busy_factor:.2f})",
|
|
flush=True,
|
|
)
|
|
else:
|
|
self._tts_busy_factor = max(
|
|
TTS_BUSY_FACTOR_MIN, self._tts_busy_factor * TTS_BUSY_FACTOR_DOWN
|
|
)
|
|
|
|
self._tts_last_call_t = time.monotonic()
|
|
|
|
remaining = est - call_dt
|
|
if remaining > 0:
|
|
time.sleep(remaining)
|
|
|
|
# ── Arm ─────────────────────────────────────────────────────────────────
|
|
def reject(self, release_after: float):
|
|
if self.dry_run:
|
|
print(f"[BRIDGE {_ts()}] (dry) would run '{REJECT_ACTION}' "
|
|
f"then release after {release_after:.1f}s", flush=True)
|
|
return
|
|
if self.arm_client is None or self._action_map is None:
|
|
return
|
|
if REJECT_ACTION not in self._action_map:
|
|
print(f"[BRIDGE {_ts()}][ERR] '{REJECT_ACTION}' not in SDK action_map",
|
|
flush=True)
|
|
return
|
|
print(f"[BRIDGE {_ts()}] -> {REJECT_ACTION}", flush=True)
|
|
self.arm_client.ExecuteAction(self._action_map[REJECT_ACTION])
|
|
if release_after > 0:
|
|
time.sleep(release_after)
|
|
print(f"[BRIDGE {_ts()}] -> {RELEASE_ACTION}", flush=True)
|
|
self.arm_client.ExecuteAction(self._action_map[RELEASE_ACTION])
|