Saqr/robot/audio_player.py

"""Plays pre-recorded WAV clips via AudioClient.PlayStream — bypasses TtsMaker.

Expected WAV format (required by the G1 audio channel):
    16 kHz, mono, 16-bit signed PCM.

Library layout under assets/audio/:
    fixed/<key>.wav              e.g. safe.wav, unsafe_generic.wav, ready.wav
    unsafe_missing/<key>.wav     e.g. helmet.wav, vest.wav, helmet_vest.wav
                                  (key is sorted-joined PPE names, "_" separator)

Callers look up by (category, key). Missing clip → returns False so the caller
can fall back to TtsMaker.
"""
from __future__ import annotations

import datetime
import json
import time
import wave
from pathlib import Path
from typing import Dict, Optional, Tuple

from core.paths import PROJECT_ROOT

AUDIO_ROOT = PROJECT_ROOT / "assets" / "audio"


def _ts() -> str:
    return datetime.datetime.now().strftime("%H:%M:%S.%f")[:-3]

EXPECTED_RATE     = 16000
EXPECTED_CHANNELS = 1
EXPECTED_WIDTH    = 2          # bytes per sample (int16)
PLAY_CHUNK_BYTES  = 96000      # 3 s per PlayStream call (matches the Unitree example)
PLAY_APP_NAME     = "saqr_audio"

# The G1 arm action and audio stack share a firmware busy state. If we try to
# PlayStream while an arm action is still being processed, chunk 0 is often
# rejected with rc=3104. Later chunks can fail too if the firmware hasn't
# fully processed the previous chunk. Retry both cases with back-off.
CHUNK0_RETRIES     = 4
CHUNK0_BACKOFF_S   = 1.0    # 1,2,3,4 s — total ≈ 10 s, covers a full arm cycle
CHUNKN_RETRIES     = 2
CHUNKN_BACKOFF_S   = 1.0    # 1,2 s — firmware usually clears within 1-2 s
PRE_STREAM_SLEEP   = 0.1    # pause after PlayStop before first PlayStream
INTER_CHUNK_MARGIN = 0.1    # extra sleep after each chunk's audio duration


def _read_wav_pcm(path: Path) -> Optional[bytes]:
    """Return the raw PCM bytes if the WAV matches the expected format, else None."""
    try:
        with wave.open(str(path), "rb") as wf:
            ch = wf.getnchannels()
            sw = wf.getsampwidth()
            fr = wf.getframerate()
            if ch != EXPECTED_CHANNELS or sw != EXPECTED_WIDTH or fr != EXPECTED_RATE:
                print(
                    f"[audio_player {_ts()}][WARN] {path}: expected "
                    f"{EXPECTED_RATE} Hz mono 16-bit; got "
                    f"{fr} Hz {ch}-ch {sw*8}-bit. Skipping.",
                    flush=True,
                )
                return None
            return wf.readframes(wf.getnframes())
    except Exception as e:
        print(f"[audio_player {_ts()}][WARN] failed to load {path}: {e}", flush=True)
        return None


class AudioPlayer:
    """Loads WAVs under ``assets/audio/<category>/<key>.wav`` and plays them on the G1."""

    def __init__(self, audio_client):
        self.audio_client = audio_client
        self._clips: Dict[Tuple[str, str], bytes] = {}
        self._load_all()
        if self._clips:
            print(f"[audio_player {_ts()}] loaded {len(self._clips)} clip(s): "
                  f"{sorted(self._clips.keys())}", flush=True)
        else:
            print(f"[audio_player {_ts()}] no clips found under {AUDIO_ROOT}", flush=True)

    # ── library ─────────────────────────────────────────────────────────────
    def _load_all(self) -> None:
        if not AUDIO_ROOT.exists():
            return
        for category_dir in sorted(AUDIO_ROOT.iterdir()):
            if not category_dir.is_dir():
                continue
            for wav_path in sorted(category_dir.glob("*.wav")):
                pcm = _read_wav_pcm(wav_path)
                if pcm is not None:
                    self._clips[(category_dir.name, wav_path.stem)] = pcm

    def has(self, category: str, key: str) -> bool:
        return (category, key) in self._clips

    # ── playback ────────────────────────────────────────────────────────────
    def play(self, category: str, key: str) -> bool:
        """Blocking play. Returns True on success, False if clip missing / failed."""
        pcm = self._clips.get((category, key))
        if pcm is None:
            return False
        if self.audio_client is None:
            return False

        # Re-assert max volume on every play; the firmware sometimes resets
        # between sessions or after certain events. Matches the Unitree
        # reference script's pattern.
        try:
            self.audio_client.SetVolume(100)
        except Exception:
            pass

        # Cancel any in-flight stream from a previous call.
        try:
            from unitree_sdk2py.g1.audio.g1_audio_api import ROBOT_API_ID_AUDIO_STOP_PLAY
            self.audio_client._Call(
                ROBOT_API_ID_AUDIO_STOP_PLAY,
                json.dumps({"app_name": PLAY_APP_NAME}),
            )
        except Exception:
            pass
        time.sleep(PRE_STREAM_SLEEP)

        sid = f"saqr_{int(time.time() * 1000)}"
        offset = 0
        chunk0_attempts = 0
        chunkn_attempts = 0
        while offset < len(pcm):
            chunk = pcm[offset:offset + PLAY_CHUNK_BYTES]
            code, _ = self.audio_client.PlayStream(PLAY_APP_NAME, sid, chunk)

            if code != 0:
                # chunk 0: likely firmware busy (arm motion colliding with audio).
                # retry with linear back-off; new sid so firmware sees a fresh stream.
                if offset == 0 and chunk0_attempts < CHUNK0_RETRIES:
                    chunk0_attempts += 1
                    delay = CHUNK0_BACKOFF_S * chunk0_attempts
                    print(f"[audio_player {_ts()}][WARN] PlayStream rc={code} at byte 0; "
                          f"retry {chunk0_attempts}/{CHUNK0_RETRIES} in {delay:.1f}s",
                          flush=True)
                    time.sleep(delay)
                    sid = f"saqr_{int(time.time() * 1000)}"
                    continue

                # later chunks: firmware is still processing the previous chunk.
                # Retry a couple of times with back-off, keeping the same sid.
                if offset > 0 and chunkn_attempts < CHUNKN_RETRIES:
                    chunkn_attempts += 1
                    delay = CHUNKN_BACKOFF_S * chunkn_attempts
                    print(f"[audio_player {_ts()}][WARN] PlayStream rc={code} at byte {offset}; "
                          f"mid-stream retry {chunkn_attempts}/{CHUNKN_RETRIES} in {delay:.1f}s",
                          flush=True)
                    time.sleep(delay)
                    continue

                print(f"[audio_player {_ts()}][WARN] PlayStream rc={code} at byte {offset} "
                      f"(retries exhausted)", flush=True)
                return False

            if offset == 0 and chunk0_attempts > 0:
                print(f"[audio_player {_ts()}] chunk 0 succeeded after "
                      f"{chunk0_attempts} retry/retries", flush=True)
            elif offset > 0 and chunkn_attempts > 0:
                print(f"[audio_player {_ts()}] chunk at byte {offset} succeeded after "
                      f"{chunkn_attempts} retry/retries", flush=True)
                chunkn_attempts = 0   # reset for any subsequent chunk

            offset += len(chunk)
            # Wait for the chunk to finish playing before sending the next.
            chunk_seconds = len(chunk) / (EXPECTED_RATE * EXPECTED_WIDTH)
            time.sleep(chunk_seconds + INTER_CHUNK_MARGIN)

        # Short settle time after the final chunk before PlayStop.
        time.sleep(0.2)
        try:
            self.audio_client.PlayStop(PLAY_APP_NAME)
        except Exception:
            pass
        return True