Saqr/robot/robot_controller.py

"""G1 arm + audio + LowState DDS client owned by the bridge.

Announcements run on a dedicated worker thread. Each queue item is a tuple
``(text, category, key)``. The worker picks WAV playback via
``AudioClient.PlayStream`` when the clip exists under ``assets/audio/`` and
``tts.mode`` allows, otherwise falls back to ``TtsMaker`` with the adaptive
busy-factor backoff for 3104 ("device busy") errors.
"""
from __future__ import annotations

import collections
import datetime
import threading
import time
from typing import Deque, Optional, Tuple

from utils.config import load_config

_ROBOT = load_config("robot")
_TTS = _ROBOT["tts"]
_ARM = _ROBOT["arm"]

TTS_VOLUME           = _TTS["volume"]
TTS_SECONDS_PER_CHAR = _TTS["seconds_per_char"]
TTS_MIN_SECONDS      = _TTS["min_seconds"]
TTS_QUEUE_MAX        = _TTS["queue_max"]
TTS_BUSY_FACTOR_MIN  = _TTS["busy_factor"]["min"]
TTS_BUSY_FACTOR_MAX  = _TTS["busy_factor"]["max"]
TTS_BUSY_FACTOR_UP   = _TTS["busy_factor"]["up"]
TTS_BUSY_FACTOR_DOWN = _TTS["busy_factor"]["down"]
TTS_MODE             = _TTS.get("mode", "tts_only")   # tts_only | recorded_or_tts | recorded_only

REJECT_ACTION  = _ARM["reject_action"]
RELEASE_ACTION = _ARM["release_action"]

# unitree_sdk2py/g1/audio/g1_audio_client.py bug: TtsMaker does
#   self.tts_index += self.tts_index   # 0+0=0, never increments
# We bypass the broken wrapper and call _Call(ROBOT_API_ID_AUDIO_TTS, ...)
# with our own counter so the firmware sees a unique index per request.
ROBOT_API_ID_AUDIO_TTS        = 1001
ROBOT_API_ID_AUDIO_STOP_PLAY  = 1004

# Audio RPC timeout: firmware occasionally stalls after long idle + prior
# arm-sdk activity. Short timeout + retry recovers in seconds instead of
# the 10s bridge default.
TTS_RPC_TIMEOUT_S      = 3.0
TTS_RETRY_MAX_ATTEMPTS = 2
TTS_RETRY_DELAY_S      = 0.5

# Prior-art reset pattern (G1_Lootah/Audio_Recorder/voice_note.txt and
# g1_tts_arabic.py:50): call AUDIO_STOP_PLAY with a 300 ms settle before
# each new audio operation or the firmware state goes stale and subsequent
# TtsMaker calls time out with rc=3104.
AUDIO_STOP_APP_NAME = "tts"
AUDIO_STOP_SETTLE_S = 0.3

# Recorded arm motions (assets/motions/*.jsonl) replace the high-level
# ExecuteAction('reject') call with a teach-and-replay trajectory.
_MOTION = _ROBOT.get("motion", {})
MOTION_ENABLED     = bool(_MOTION.get("enabled", True))
MOTION_UNSAFE_FILE = _MOTION.get("unsafe_file", "adnoc1.jsonl")
MOTION_HOME_FILE   = _MOTION.get("home_file",   "arm_home.jsonl")
MOTION_SPEED       = float(_MOTION.get("speed", 1.0))

QueueItem = Tuple[str, Optional[str], Optional[str]]   # (text, category, key)


def _ts() -> str:
    return datetime.datetime.now().strftime("%H:%M:%S.%f")[:-3]


class RobotController:
    """Owns both the G1 arm action client and the G1 audio (TTS + PlayStream) client."""

    def __init__(self, iface: Optional[str], timeout: float, dry_run: bool,
                 tts_speaker_id: int, want_lowstate: bool = True):
        self.dry_run = dry_run
        self.tts_speaker_id = tts_speaker_id
        self.arm_client = None
        self.audio_client = None
        self._action_map = None
        self.hub = None
        self._lowstate_sub = None
        self._player = None     # AudioPlayer, lazily initialised
        self._arm_replayer = None   # ArmReplayer, lazily initialised

        self._tts_queue: Deque[QueueItem] = collections.deque(maxlen=TTS_QUEUE_MAX)
        self._tts_event = threading.Event()
        # Set when the worker is not dispatching AND the queue is empty. Cleared
        # on speak(). Callers who want to wait for audio to finish before
        # issuing arm commands (so the firmware doesn't serialise-busy on us)
        # can await this event.
        self._audio_idle = threading.Event()
        self._audio_idle.set()
        self._tts_worker_stop = threading.Event()
        self._tts_worker_thread: Optional[threading.Thread] = None
        self._tts_busy_factor: float = TTS_BUSY_FACTOR_MIN
        self._tts_last_call_t: float = 0.0
        self._tts_call_count: int = 0
        self._tts_busy_count: int = 0
        self._tts_index: int = 0

        if dry_run:
            print(f"[BRIDGE {_ts()}] DRY RUN — G1 SDK will not be loaded.", flush=True)
            return

        from unitree_sdk2py.core.channel import ChannelFactoryInitialize
        from unitree_sdk2py.g1.arm.g1_arm_action_client import (
            G1ArmActionClient,
            action_map,
        )
        from unitree_sdk2py.g1.audio.g1_audio_client import AudioClient

        self._action_map = action_map

        if iface:
            ChannelFactoryInitialize(0, iface)
        else:
            ChannelFactoryInitialize(0)

        self.arm_client = G1ArmActionClient()
        self.arm_client.SetTimeout(timeout)
        self.arm_client.Init()
        print(f"[BRIDGE {_ts()}] G1ArmActionClient ready (iface={iface or 'default'})",
              flush=True)

        self.audio_client = AudioClient()
        # Override the bridge-wide 10s with a shorter audio-specific timeout
        # so TtsMaker hangs (rc=3104 after full timeout) recover quickly via
        # the retry loop in _speak_blocking instead of blocking the worker.
        self.audio_client.SetTimeout(TTS_RPC_TIMEOUT_S)
        self.audio_client.Init()
        try:
            self.audio_client.SetVolume(TTS_VOLUME)
        except Exception as e:
            print(f"[BRIDGE {_ts()}][WARN] AudioClient.SetVolume failed: {e}", flush=True)
        print(f"[BRIDGE {_ts()}] G1 AudioClient ready (speaker_id={tts_speaker_id}, "
              f"tts_mode={TTS_MODE})", flush=True)

        # Pre-recorded clip library (WAVs under assets/audio/).
        from robot.audio_player import AudioPlayer
        self._player = AudioPlayer(self.audio_client)

        # Recorded arm-motion replayer (teach-and-replay trajectories under
        # assets/motions/) — replaces the high-level ExecuteAction path.
        if MOTION_ENABLED:
            try:
                from robot.arm_replay import ArmReplayer
                self._arm_replayer = ArmReplayer()
                print(f"[BRIDGE {_ts()}] ArmReplayer ready "
                      f"(unsafe_file={MOTION_UNSAFE_FILE}, "
                      f"home_file={MOTION_HOME_FILE}, speed={MOTION_SPEED})",
                      flush=True)
            except Exception as e:
                print(f"[BRIDGE {_ts()}][WARN] ArmReplayer init failed: {e}",
                      flush=True)

        self._tts_worker_thread = threading.Thread(
            target=self._tts_worker_loop, name="TtsWorker", daemon=True,
        )
        self._tts_worker_thread.start()

        if want_lowstate:
            try:
                from unitree_sdk2py.core.channel import ChannelSubscriber
                from unitree_sdk2py.idl.unitree_hg.msg.dds_ import LowState_
                from robot.controller import LowStateHub

                self.hub = LowStateHub(watchdog_timeout=0.25)
                self._lowstate_sub = ChannelSubscriber("rt/lowstate", LowState_)
                self._lowstate_sub.Init(self.hub.handler, 10)
                print(f"[BRIDGE {_ts()}] Subscribed to rt/lowstate (wireless remote)",
                      flush=True)
            except Exception as e:
                print(f"[BRIDGE {_ts()}][WARN] LowState subscribe failed: {e}", flush=True)
                print(f"[BRIDGE {_ts()}][WARN] Trigger keys (R2+X / R2+Y) will not work.",
                      flush=True)
                self.hub = None

    # ── Public API ──────────────────────────────────────────────────────────
    def speak(self, text: str,
              category: Optional[str] = None, key: Optional[str] = None):
        """Non-blocking — announcement request for the worker thread.

        Freshness policy: a new announcement **cancels** any in-progress play
        and **replaces** any queued item. This keeps audio synchronised with
        the latest event — otherwise a stale phrase from an earlier event
        could finish playing AFTER the newer arm motion has completed.
        """
        if self.dry_run:
            print(f"[BRIDGE {_ts()}] (dry) would speak({text!r}, category={category!r}, "
                  f"key={key!r})", flush=True)
            return
        if self.audio_client is None:
            return
        item: QueueItem = (text, category, key)
        # Drop adjacent duplicates (same text + routing).
        if self._tts_queue and self._tts_queue[-1] == item:
            return
        # Freshness: throw away any stale queued item and cancel the current
        # play so the new item takes over immediately.
        self._tts_queue.clear()
        if self._player is not None:
            self._player.cancel()
        self._tts_queue.append(item)
        self._audio_idle.clear()
        self._tts_event.set()

    def wait_for_audio_done(self, timeout: float = 15.0) -> bool:
        """Block until the worker has drained the queue AND the current play
        is finished. Returns True if audio finished, False on timeout.

        Used by callers that need to ensure no audio is in flight before
        issuing a low-level arm command (avoids firmware bus contention).
        """
        return self._audio_idle.wait(timeout=timeout)

    def shutdown_tts(self):
        self._tts_worker_stop.set()
        self._tts_event.set()
        if self._tts_worker_thread is not None:
            self._tts_worker_thread.join(timeout=1.0)

    # ── Worker thread ───────────────────────────────────────────────────────
    def _tts_worker_loop(self):
        while not self._tts_worker_stop.is_set():
            if not self._tts_queue:
                # Queue drained AND no dispatch in progress → audio is idle.
                self._audio_idle.set()
                self._tts_event.wait(timeout=0.2)
                self._tts_event.clear()
                continue
            try:
                item = self._tts_queue.popleft()
            except IndexError:
                continue
            self._dispatch(*item)

    def _dispatch(self, text: str, category: Optional[str], key: Optional[str]):
        """Route one queue item to PlayStream or TtsMaker per ``tts.mode``."""
        wants_clip = (
            TTS_MODE in ("recorded_or_tts", "recorded_only")
            and category is not None and key is not None
            and self._player is not None
            and self._player.has(category, key)
        )
        if wants_clip:
            print(f"[BRIDGE {_ts()}] play -> {category}/{key!r}  "
                  f"(text={text!r})", flush=True)
            call_t0 = time.monotonic()
            ok = self._player.play(category, key)
            dt = time.monotonic() - call_t0
            if ok:
                print(f"[BRIDGE {_ts()}] play done ({dt*1000:.0f} ms)", flush=True)
                return
            # Play failed. Decide by mode whether to fall back to TtsMaker.
            if TTS_MODE == "recorded_only":
                print(f"[BRIDGE {_ts()}][WARN] play failed and tts.mode=recorded_only "
                      f"— dropping phrase silently", flush=True)
                return
            print(f"[BRIDGE {_ts()}][WARN] play failed; falling back to TtsMaker",
                  flush=True)
            # fall through to TtsMaker

        if TTS_MODE == "recorded_only":
            # No clip exists for this phrase and user opted out of TtsMaker.
            print(f"[BRIDGE {_ts()}] skip (recorded_only, no clip for "
                  f"{category}/{key!r}): {text!r}", flush=True)
            return

        self._speak_blocking(text)

    # ── TtsMaker path (fallback + legacy) ───────────────────────────────────
    def _estimate_tts_seconds(self, text: str) -> float:
        base = max(TTS_MIN_SECONDS, len(text) * TTS_SECONDS_PER_CHAR)
        return base * self._tts_busy_factor

    def _tts_maker_call(self, text: str, speaker_id: int) -> int:
        """Bypass the SDK's broken TtsMaker (see ROBOT_API_ID_AUDIO_TTS note)
        and call the underlying RPC with a real incrementing index."""
        import json
        self._tts_index += 1
        param = json.dumps({
            "index":      self._tts_index,
            "text":       text,
            "speaker_id": speaker_id,
        })
        code, _ = self.audio_client._Call(ROBOT_API_ID_AUDIO_TTS, param)
        return code

    def _speak_blocking(self, text: str):
        if self.audio_client is None:
            return

        now = time.monotonic()
        gap_since_last = (now - self._tts_last_call_t) if self._tts_last_call_t else -1.0
        est = self._estimate_tts_seconds(text)
        qsize = len(self._tts_queue)
        self._tts_call_count += 1

        gap_str = f"{gap_since_last:5.2f}s" if gap_since_last >= 0 else "  n/a"
        print(
            f"[BRIDGE {_ts()}] tts -> {text!r}  "
            f"(est={est:.2f}s, gap={gap_str}, busy_x={self._tts_busy_factor:.2f}, "
            f"q={qsize}, idx={self._tts_index + 1})",
            flush=True,
        )

        # Firmware reset: AUDIO_STOP_PLAY clears any residual audio stream
        # state from the prior TtsMaker/PlayStream, then a 300 ms settle
        # before the new request. Without this the firmware time-outs the
        # next call with rc=3104 after successful-then-parallel-arm events.
        # See G1_Lootah/Audio_Recorder/voice_note.txt line 11.
        import json as _json
        stop_t0 = time.monotonic()
        try:
            self.audio_client._Call(
                ROBOT_API_ID_AUDIO_STOP_PLAY,
                _json.dumps({"app_name": AUDIO_STOP_APP_NAME}),
            )
        except Exception as e:
            print(f"[BRIDGE {_ts()}][WARN] AUDIO_STOP_PLAY raised: {e}", flush=True)
        stop_dt = time.monotonic() - stop_t0
        if stop_dt > 0.5:
            print(f"[BRIDGE {_ts()}][WARN] AUDIO_STOP_PLAY slow "
                  f"({stop_dt*1000:.0f}ms) — firmware busy", flush=True)
        time.sleep(AUDIO_STOP_SETTLE_S)

        code = -1
        call_dt = 0.0
        total_t0 = time.monotonic()
        for attempt in range(1, TTS_RETRY_MAX_ATTEMPTS + 1):
            call_t0 = time.monotonic()
            try:
                code = self._tts_maker_call(text, self.tts_speaker_id)
            except Exception as e:
                print(f"[BRIDGE {_ts()}][ERR] TtsMaker raised: {e}", flush=True)
                return
            call_dt = time.monotonic() - call_t0

            if code == 0:
                if attempt > 1:
                    print(f"[BRIDGE {_ts()}] TtsMaker recovered on attempt "
                          f"{attempt}/{TTS_RETRY_MAX_ATTEMPTS}", flush=True)
                break

            if attempt < TTS_RETRY_MAX_ATTEMPTS:
                print(f"[BRIDGE {_ts()}][WARN] TtsMaker rc={code} "
                      f"(call {call_dt*1000:.0f}ms) — retrying in "
                      f"{TTS_RETRY_DELAY_S:.1f}s ({attempt}/"
                      f"{TTS_RETRY_MAX_ATTEMPTS})", flush=True)
                time.sleep(TTS_RETRY_DELAY_S)

        total_dt = time.monotonic() - total_t0
        if code != 0:
            self._tts_busy_count += 1
            self._tts_busy_factor = min(
                TTS_BUSY_FACTOR_MAX, self._tts_busy_factor * TTS_BUSY_FACTOR_UP
            )
            print(
                f"[BRIDGE {_ts()}][WARN] TtsMaker rc={code} after "
                f"{TTS_RETRY_MAX_ATTEMPTS} attempt(s) ({total_dt*1000:.0f}ms "
                f"total; busy_x -> {self._tts_busy_factor:.2f}) — phrase dropped",
                flush=True,
            )
        else:
            self._tts_busy_factor = max(
                TTS_BUSY_FACTOR_MIN, self._tts_busy_factor * TTS_BUSY_FACTOR_DOWN
            )

        self._tts_last_call_t = time.monotonic()

        remaining = est - total_dt
        if remaining > 0:
            time.sleep(remaining)

    # ── Arm ─────────────────────────────────────────────────────────────────
    def reject(self, release_after: float):
        """Trigger the UNSAFE arm motion.

        If a recorded motion is configured (``robot.motion.enabled`` + a
        readable JSONL under ``assets/motions/``), play it via
        ``ArmReplayer`` — that's the teach-and-replay trajectory. Otherwise
        fall back to the legacy high-level ``ExecuteAction('reject')`` +
        optional ``'release arm'`` pair.
        """
        if self.dry_run:
            if self._arm_replayer is not None or MOTION_ENABLED:
                print(f"[BRIDGE {_ts()}] (dry) would play motion "
                      f"{MOTION_UNSAFE_FILE}", flush=True)
            else:
                print(f"[BRIDGE {_ts()}] (dry) would run '{REJECT_ACTION}' "
                      f"then release after {release_after:.1f}s", flush=True)
            return

        # Preferred path: play a recorded motion.
        if self._arm_replayer is not None:
            from core.paths import PROJECT_ROOT
            motions_dir = PROJECT_ROOT / "assets" / "motions"
            motion_path = motions_dir / MOTION_UNSAFE_FILE
            home_path   = motions_dir / MOTION_HOME_FILE
            if not motion_path.exists() or not home_path.exists():
                print(f"[BRIDGE {_ts()}][WARN] motion files missing under "
                      f"{motions_dir} — skipping arm motion", flush=True)
                return
            print(f"[BRIDGE {_ts()}] -> play_motion {MOTION_UNSAFE_FILE}",
                  flush=True)
            t0 = time.monotonic()
            try:
                self._arm_replayer.play(motion_path, home_path, speed=MOTION_SPEED)
            except Exception as e:
                print(f"[BRIDGE {_ts()}][ERR] play_motion failed: {e}",
                      flush=True)
                return
            dt = time.monotonic() - t0
            print(f"[BRIDGE {_ts()}] motion done ({dt*1000:.0f} ms)", flush=True)
            return

        # Legacy fallback: high-level reject + release arm action.
        if self.arm_client is None or self._action_map is None:
            return
        if REJECT_ACTION not in self._action_map:
            print(f"[BRIDGE {_ts()}][ERR] '{REJECT_ACTION}' not in SDK action_map",
                  flush=True)
            return
        print(f"[BRIDGE {_ts()}] -> {REJECT_ACTION}", flush=True)
        self.arm_client.ExecuteAction(self._action_map[REJECT_ACTION])
        if release_after > 0:
            time.sleep(release_after)
            print(f"[BRIDGE {_ts()}] -> {RELEASE_ACTION}", flush=True)
            self.arm_client.ExecuteAction(self._action_map[RELEASE_ACTION])