Saqr/robot/audio_player.py

182 lines
7.7 KiB
Python

"""Plays pre-recorded WAV clips via AudioClient.PlayStream — bypasses TtsMaker.
Expected WAV format (required by the G1 audio channel):
16 kHz, mono, 16-bit signed PCM.
Library layout under assets/audio/:
fixed/<key>.wav e.g. safe.wav, unsafe_generic.wav, ready.wav
unsafe_missing/<key>.wav e.g. helmet.wav, vest.wav, helmet_vest.wav
(key is sorted-joined PPE names, "_" separator)
Callers look up by (category, key). Missing clip → returns False so the caller
can fall back to TtsMaker.
"""
from __future__ import annotations
import datetime
import json
import time
import wave
from pathlib import Path
from typing import Dict, Optional, Tuple
from core.paths import PROJECT_ROOT
AUDIO_ROOT = PROJECT_ROOT / "assets" / "audio"
def _ts() -> str:
return datetime.datetime.now().strftime("%H:%M:%S.%f")[:-3]
EXPECTED_RATE = 16000
EXPECTED_CHANNELS = 1
EXPECTED_WIDTH = 2 # bytes per sample (int16)
PLAY_CHUNK_BYTES = 96000 # 3 s per PlayStream call (matches the Unitree example)
PLAY_APP_NAME = "saqr_audio"
# The G1 arm action and audio stack share a firmware busy state. If we try to
# PlayStream while an arm action is still being processed, chunk 0 is often
# rejected with rc=3104. Later chunks can fail too if the firmware hasn't
# fully processed the previous chunk. Retry both cases with back-off.
CHUNK0_RETRIES = 4
CHUNK0_BACKOFF_S = 1.0 # 1,2,3,4 s — total ≈ 10 s, covers a full arm cycle
CHUNKN_RETRIES = 2
CHUNKN_BACKOFF_S = 1.0 # 1,2 s — firmware usually clears within 1-2 s
PRE_STREAM_SLEEP = 0.1 # pause after PlayStop before first PlayStream
INTER_CHUNK_MARGIN = 0.1 # extra sleep after each chunk's audio duration
def _read_wav_pcm(path: Path) -> Optional[bytes]:
"""Return the raw PCM bytes if the WAV matches the expected format, else None."""
try:
with wave.open(str(path), "rb") as wf:
ch = wf.getnchannels()
sw = wf.getsampwidth()
fr = wf.getframerate()
if ch != EXPECTED_CHANNELS or sw != EXPECTED_WIDTH or fr != EXPECTED_RATE:
print(
f"[audio_player {_ts()}][WARN] {path}: expected "
f"{EXPECTED_RATE} Hz mono 16-bit; got "
f"{fr} Hz {ch}-ch {sw*8}-bit. Skipping.",
flush=True,
)
return None
return wf.readframes(wf.getnframes())
except Exception as e:
print(f"[audio_player {_ts()}][WARN] failed to load {path}: {e}", flush=True)
return None
class AudioPlayer:
"""Loads WAVs under ``assets/audio/<category>/<key>.wav`` and plays them on the G1."""
def __init__(self, audio_client):
self.audio_client = audio_client
self._clips: Dict[Tuple[str, str], bytes] = {}
self._load_all()
if self._clips:
print(f"[audio_player {_ts()}] loaded {len(self._clips)} clip(s): "
f"{sorted(self._clips.keys())}", flush=True)
else:
print(f"[audio_player {_ts()}] no clips found under {AUDIO_ROOT}", flush=True)
# ── library ─────────────────────────────────────────────────────────────
def _load_all(self) -> None:
if not AUDIO_ROOT.exists():
return
for category_dir in sorted(AUDIO_ROOT.iterdir()):
if not category_dir.is_dir():
continue
for wav_path in sorted(category_dir.glob("*.wav")):
pcm = _read_wav_pcm(wav_path)
if pcm is not None:
self._clips[(category_dir.name, wav_path.stem)] = pcm
def has(self, category: str, key: str) -> bool:
return (category, key) in self._clips
# ── playback ────────────────────────────────────────────────────────────
def play(self, category: str, key: str) -> bool:
"""Blocking play. Returns True on success, False if clip missing / failed."""
pcm = self._clips.get((category, key))
if pcm is None:
return False
if self.audio_client is None:
return False
# Re-assert max volume on every play; the firmware sometimes resets
# between sessions or after certain events. Matches the Unitree
# reference script's pattern.
try:
self.audio_client.SetVolume(100)
except Exception:
pass
# Cancel any in-flight stream from a previous call.
try:
from unitree_sdk2py.g1.audio.g1_audio_api import ROBOT_API_ID_AUDIO_STOP_PLAY
self.audio_client._Call(
ROBOT_API_ID_AUDIO_STOP_PLAY,
json.dumps({"app_name": PLAY_APP_NAME}),
)
except Exception:
pass
time.sleep(PRE_STREAM_SLEEP)
sid = f"saqr_{int(time.time() * 1000)}"
offset = 0
chunk0_attempts = 0
chunkn_attempts = 0
while offset < len(pcm):
chunk = pcm[offset:offset + PLAY_CHUNK_BYTES]
code, _ = self.audio_client.PlayStream(PLAY_APP_NAME, sid, chunk)
if code != 0:
# chunk 0: likely firmware busy (arm motion colliding with audio).
# retry with linear back-off; new sid so firmware sees a fresh stream.
if offset == 0 and chunk0_attempts < CHUNK0_RETRIES:
chunk0_attempts += 1
delay = CHUNK0_BACKOFF_S * chunk0_attempts
print(f"[audio_player {_ts()}][WARN] PlayStream rc={code} at byte 0; "
f"retry {chunk0_attempts}/{CHUNK0_RETRIES} in {delay:.1f}s",
flush=True)
time.sleep(delay)
sid = f"saqr_{int(time.time() * 1000)}"
continue
# later chunks: firmware is still processing the previous chunk.
# Retry a couple of times with back-off, keeping the same sid.
if offset > 0 and chunkn_attempts < CHUNKN_RETRIES:
chunkn_attempts += 1
delay = CHUNKN_BACKOFF_S * chunkn_attempts
print(f"[audio_player {_ts()}][WARN] PlayStream rc={code} at byte {offset}; "
f"mid-stream retry {chunkn_attempts}/{CHUNKN_RETRIES} in {delay:.1f}s",
flush=True)
time.sleep(delay)
continue
print(f"[audio_player {_ts()}][WARN] PlayStream rc={code} at byte {offset} "
f"(retries exhausted)", flush=True)
return False
if offset == 0 and chunk0_attempts > 0:
print(f"[audio_player {_ts()}] chunk 0 succeeded after "
f"{chunk0_attempts} retry/retries", flush=True)
elif offset > 0 and chunkn_attempts > 0:
print(f"[audio_player {_ts()}] chunk at byte {offset} succeeded after "
f"{chunkn_attempts} retry/retries", flush=True)
chunkn_attempts = 0 # reset for any subsequent chunk
offset += len(chunk)
# Wait for the chunk to finish playing before sending the next.
chunk_seconds = len(chunk) / (EXPECTED_RATE * EXPECTED_WIDTH)
time.sleep(chunk_seconds + INTER_CHUNK_MARGIN)
# Short settle time after the final chunk before PlayStop.
time.sleep(0.2)
try:
self.audio_client.PlayStop(PLAY_APP_NAME)
except Exception:
pass
return True