Saqr/robot/robot_controller.py

"""G1 arm + audio + LowState DDS client owned by the bridge.

Announcements run on a dedicated worker thread. Each queue item is a tuple
``(text, category, key)``. The worker picks WAV playback via
``AudioClient.PlayStream`` when the clip exists under ``assets/audio/`` and
``tts.mode`` allows, otherwise falls back to ``TtsMaker`` with the adaptive
busy-factor backoff for 3104 ("device busy") errors.
"""
from __future__ import annotations

import collections
import datetime
import threading
import time
from typing import Deque, Optional, Tuple

from utils.config import load_config

_ROBOT = load_config("robot")
_TTS = _ROBOT["tts"]
_ARM = _ROBOT["arm"]

TTS_VOLUME           = _TTS["volume"]
TTS_SECONDS_PER_CHAR = _TTS["seconds_per_char"]
TTS_MIN_SECONDS      = _TTS["min_seconds"]
TTS_QUEUE_MAX        = _TTS["queue_max"]
TTS_BUSY_FACTOR_MIN  = _TTS["busy_factor"]["min"]
TTS_BUSY_FACTOR_MAX  = _TTS["busy_factor"]["max"]
TTS_BUSY_FACTOR_UP   = _TTS["busy_factor"]["up"]
TTS_BUSY_FACTOR_DOWN = _TTS["busy_factor"]["down"]
TTS_MODE             = _TTS.get("mode", "tts_only")   # tts_only | recorded_or_tts | recorded_only

REJECT_ACTION  = _ARM["reject_action"]
RELEASE_ACTION = _ARM["release_action"]

QueueItem = Tuple[str, Optional[str], Optional[str]]   # (text, category, key)


def _ts() -> str:
    return datetime.datetime.now().strftime("%H:%M:%S.%f")[:-3]


class RobotController:
    """Owns both the G1 arm action client and the G1 audio (TTS + PlayStream) client."""

    def __init__(self, iface: Optional[str], timeout: float, dry_run: bool,
                 tts_speaker_id: int, want_lowstate: bool = True):
        self.dry_run = dry_run
        self.tts_speaker_id = tts_speaker_id
        self.arm_client = None
        self.audio_client = None
        self._action_map = None
        self.hub = None
        self._lowstate_sub = None
        self._player = None     # AudioPlayer, lazily initialised

        self._tts_queue: Deque[QueueItem] = collections.deque(maxlen=TTS_QUEUE_MAX)
        self._tts_event = threading.Event()
        self._tts_worker_stop = threading.Event()
        self._tts_worker_thread: Optional[threading.Thread] = None
        self._tts_busy_factor: float = TTS_BUSY_FACTOR_MIN
        self._tts_last_call_t: float = 0.0
        self._tts_call_count: int = 0
        self._tts_busy_count: int = 0

        if dry_run:
            print(f"[BRIDGE {_ts()}] DRY RUN — G1 SDK will not be loaded.", flush=True)
            return

        from unitree_sdk2py.core.channel import ChannelFactoryInitialize
        from unitree_sdk2py.g1.arm.g1_arm_action_client import (
            G1ArmActionClient,
            action_map,
        )
        from unitree_sdk2py.g1.audio.g1_audio_client import AudioClient

        self._action_map = action_map

        if iface:
            ChannelFactoryInitialize(0, iface)
        else:
            ChannelFactoryInitialize(0)

        self.arm_client = G1ArmActionClient()
        self.arm_client.SetTimeout(timeout)
        self.arm_client.Init()
        print(f"[BRIDGE {_ts()}] G1ArmActionClient ready (iface={iface or 'default'})",
              flush=True)

        self.audio_client = AudioClient()
        self.audio_client.SetTimeout(timeout)
        self.audio_client.Init()
        try:
            self.audio_client.SetVolume(TTS_VOLUME)
        except Exception as e:
            print(f"[BRIDGE {_ts()}][WARN] AudioClient.SetVolume failed: {e}", flush=True)
        print(f"[BRIDGE {_ts()}] G1 AudioClient ready (speaker_id={tts_speaker_id}, "
              f"tts_mode={TTS_MODE})", flush=True)

        # Pre-recorded clip library (WAVs under assets/audio/).
        from robot.audio_player import AudioPlayer
        self._player = AudioPlayer(self.audio_client)

        self._tts_worker_thread = threading.Thread(
            target=self._tts_worker_loop, name="TtsWorker", daemon=True,
        )
        self._tts_worker_thread.start()

        if want_lowstate:
            try:
                from unitree_sdk2py.core.channel import ChannelSubscriber
                from unitree_sdk2py.idl.unitree_hg.msg.dds_ import LowState_
                from robot.controller import LowStateHub

                self.hub = LowStateHub(watchdog_timeout=0.25)
                self._lowstate_sub = ChannelSubscriber("rt/lowstate", LowState_)
                self._lowstate_sub.Init(self.hub.handler, 10)
                print(f"[BRIDGE {_ts()}] Subscribed to rt/lowstate (wireless remote)",
                      flush=True)
            except Exception as e:
                print(f"[BRIDGE {_ts()}][WARN] LowState subscribe failed: {e}", flush=True)
                print(f"[BRIDGE {_ts()}][WARN] Trigger keys (R2+X / R2+Y) will not work.",
                      flush=True)
                self.hub = None

    # ── Public API ──────────────────────────────────────────────────────────
    def speak(self, text: str,
              category: Optional[str] = None, key: Optional[str] = None):
        """Non-blocking — enqueue an announcement for the worker thread.

        If ``category`` + ``key`` are provided and a matching WAV exists under
        ``assets/audio/<category>/<key>.wav``, the worker plays the WAV via
        PlayStream (no firmware TTS latency). Otherwise falls back per
        ``tts.mode`` setting.
        """
        if self.dry_run:
            print(f"[BRIDGE {_ts()}] (dry) would speak({text!r}, category={category!r}, "
                  f"key={key!r})", flush=True)
            return
        if self.audio_client is None:
            return
        item: QueueItem = (text, category, key)
        # Drop adjacent duplicates (same text + routing).
        if self._tts_queue and self._tts_queue[-1] == item:
            return
        self._tts_queue.append(item)
        self._tts_event.set()

    def shutdown_tts(self):
        self._tts_worker_stop.set()
        self._tts_event.set()
        if self._tts_worker_thread is not None:
            self._tts_worker_thread.join(timeout=1.0)

    # ── Worker thread ───────────────────────────────────────────────────────
    def _tts_worker_loop(self):
        while not self._tts_worker_stop.is_set():
            if not self._tts_queue:
                self._tts_event.wait(timeout=0.2)
                self._tts_event.clear()
                continue
            try:
                item = self._tts_queue.popleft()
            except IndexError:
                continue
            self._dispatch(*item)

    def _dispatch(self, text: str, category: Optional[str], key: Optional[str]):
        """Route one queue item to PlayStream or TtsMaker per ``tts.mode``."""
        wants_clip = (
            TTS_MODE in ("recorded_or_tts", "recorded_only")
            and category is not None and key is not None
            and self._player is not None
            and self._player.has(category, key)
        )
        if wants_clip:
            print(f"[BRIDGE {_ts()}] play -> {category}/{key!r}  "
                  f"(text={text!r})", flush=True)
            call_t0 = time.monotonic()
            ok = self._player.play(category, key)
            dt = time.monotonic() - call_t0
            if ok:
                print(f"[BRIDGE {_ts()}] play done ({dt*1000:.0f} ms)", flush=True)
                return
            # Play failed. Decide by mode whether to fall back to TtsMaker.
            if TTS_MODE == "recorded_only":
                print(f"[BRIDGE {_ts()}][WARN] play failed and tts.mode=recorded_only "
                      f"— dropping phrase silently", flush=True)
                return
            print(f"[BRIDGE {_ts()}][WARN] play failed; falling back to TtsMaker",
                  flush=True)
            # fall through to TtsMaker

        if TTS_MODE == "recorded_only":
            # No clip exists for this phrase and user opted out of TtsMaker.
            print(f"[BRIDGE {_ts()}] skip (recorded_only, no clip for "
                  f"{category}/{key!r}): {text!r}", flush=True)
            return

        self._speak_blocking(text)

    # ── TtsMaker path (fallback + legacy) ───────────────────────────────────
    def _estimate_tts_seconds(self, text: str) -> float:
        base = max(TTS_MIN_SECONDS, len(text) * TTS_SECONDS_PER_CHAR)
        return base * self._tts_busy_factor

    def _speak_blocking(self, text: str):
        if self.audio_client is None:
            return

        now = time.monotonic()
        gap_since_last = (now - self._tts_last_call_t) if self._tts_last_call_t else -1.0
        est = self._estimate_tts_seconds(text)
        qsize = len(self._tts_queue)
        self._tts_call_count += 1

        gap_str = f"{gap_since_last:5.2f}s" if gap_since_last >= 0 else "  n/a"
        print(
            f"[BRIDGE {_ts()}] tts -> {text!r}  "
            f"(est={est:.2f}s, gap={gap_str}, busy_x={self._tts_busy_factor:.2f}, "
            f"q={qsize})",
            flush=True,
        )

        call_t0 = time.monotonic()
        try:
            code = self.audio_client.TtsMaker(text, self.tts_speaker_id)
        except Exception as e:
            print(f"[BRIDGE {_ts()}][ERR] TtsMaker raised: {e}", flush=True)
            return
        call_dt = time.monotonic() - call_t0

        if code != 0:
            self._tts_busy_count += 1
            self._tts_busy_factor = min(
                TTS_BUSY_FACTOR_MAX, self._tts_busy_factor * TTS_BUSY_FACTOR_UP
            )
            print(
                f"[BRIDGE {_ts()}][WARN] TtsMaker rc={code} "
                f"(call took {call_dt*1000:.0f}ms; busy_x -> "
                f"{self._tts_busy_factor:.2f})",
                flush=True,
            )
        else:
            self._tts_busy_factor = max(
                TTS_BUSY_FACTOR_MIN, self._tts_busy_factor * TTS_BUSY_FACTOR_DOWN
            )

        self._tts_last_call_t = time.monotonic()

        remaining = est - call_dt
        if remaining > 0:
            time.sleep(remaining)

    # ── Arm ─────────────────────────────────────────────────────────────────
    def reject(self, release_after: float):
        if self.dry_run:
            print(f"[BRIDGE {_ts()}] (dry) would run '{REJECT_ACTION}' "
                  f"then release after {release_after:.1f}s", flush=True)
            return
        if self.arm_client is None or self._action_map is None:
            return
        if REJECT_ACTION not in self._action_map:
            print(f"[BRIDGE {_ts()}][ERR] '{REJECT_ACTION}' not in SDK action_map",
                  flush=True)
            return
        print(f"[BRIDGE {_ts()}] -> {REJECT_ACTION}", flush=True)
        self.arm_client.ExecuteAction(self._action_map[REJECT_ACTION])
        if release_after > 0:
            time.sleep(release_after)
            print(f"[BRIDGE {_ts()}] -> {RELEASE_ACTION}", flush=True)
            self.arm_client.ExecuteAction(self._action_map[RELEASE_ACTION])