505 lines
20 KiB
Python

"""Pluggable greeting-audio backends.
The Go2 has **no** first-party SDK audio-playback path. The Unitree Python
SDK's :class:`AudioClient` targets the G1 ``voice`` service (TTS / PCM stream
play); on the Go2 only :class:`VuiClient` exists (volume / brightness control).
There is no documented, SDK-exposed way to play an arbitrary WAV through the
Go2's onboard speaker. The likely real path is the DDS topic
``rt/api/audiohub/request`` -- but that is **not** wrapped by the SDK, so we do
not implement it here.
Consequently greeting audio is *pluggable*:
* :class:`NullAudio` -- logs only (mock / CI).
* :class:`HostSpeakerAudio` -- plays on the machine running GoWelcome (laptop /
Jetson). Reliable, model-independent. The DEFAULT.
* :class:`Go2AudioHubAudio` -- EXPERIMENTAL / UNVERIFIED attempt to stream PCM
to the robot via the G1 ``AudioClient`` (almost certainly absent on Go2
firmware -- it degrades gracefully to a logged no-op).
Pick one with :func:`build_audio_backend`.
All heavy/optional imports (``simpleaudio``, the Unitree SDK) are performed
lazily inside methods so this module imports cleanly off-robot.
"""
from __future__ import annotations
import logging
import struct
import subprocess
import threading
import time
from pathlib import Path
from typing import List, Optional, Tuple
from config import GoWelcomeConfig
from gowelcome.robot.interface import AudioBackend
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Vendored WAV/PCM helpers (copied verbatim-in-spirit from the Unitree SDK
# example ``example/g1/audio/wav.py``). Used only by Go2AudioHubAudio. Logging
# replaces the original ``print`` calls.
# ---------------------------------------------------------------------------
def read_wav(filename: str) -> Tuple[List[int], int, int, bool]:
"""Parse a PCM WAV file into a flat list of raw bytes.
Vendored from the Unitree SDK ``g1/audio`` example. Only 16-bit PCM is
supported (the requirement for the onboard audio stream).
Returns:
``(raw_pcm_bytes, sample_rate, num_channels, ok)``. On any error
returns ``([], -1, -1, False)`` instead of raising.
"""
try:
with open(filename, "rb") as f:
def read(fmt: str):
return struct.unpack(fmt, f.read(struct.calcsize(fmt)))
# === Chunk Header ===
chunk_id, = read("<I")
if chunk_id != 0x46464952: # "RIFF"
logger.error("read_wav: chunk_id != 'RIFF': %s", hex(chunk_id))
return [], -1, -1, False
_chunk_size, = read("<I")
format_tag, = read("<I")
if format_tag != 0x45564157: # "WAVE"
logger.error("read_wav: format != 'WAVE': %s", hex(format_tag))
return [], -1, -1, False
# === Subchunk1: fmt ===
subchunk1_id, = read("<I")
subchunk1_size, = read("<I")
if subchunk1_id == 0x4B4E554A: # JUNK
f.seek(subchunk1_size, 1)
subchunk1_id, = read("<I")
subchunk1_size, = read("<I")
if subchunk1_id != 0x20746D66: # "fmt "
logger.error("read_wav: subchunk1_id != 'fmt ': %s", hex(subchunk1_id))
return [], -1, -1, False
if subchunk1_size not in (16, 18):
logger.error("read_wav: subchunk1_size != 16 or 18: %s", subchunk1_size)
return [], -1, -1, False
audio_format, = read("<H")
if audio_format != 1:
logger.error("read_wav: audio_format != PCM (1): %s", audio_format)
return [], -1, -1, False
num_channels, = read("<H")
sample_rate, = read("<I")
byte_rate, = read("<I")
block_align, = read("<H")
bits_per_sample, = read("<H")
expected_byte_rate = sample_rate * num_channels * bits_per_sample // 8
if byte_rate != expected_byte_rate:
logger.error(
"read_wav: byte_rate mismatch: got %s, expected %s",
byte_rate, expected_byte_rate,
)
return [], -1, -1, False
expected_align = num_channels * bits_per_sample // 8
if block_align != expected_align:
logger.error(
"read_wav: block_align mismatch: got %s, expected %s",
block_align, expected_align,
)
return [], -1, -1, False
if bits_per_sample != 16:
logger.error(
"read_wav: only 16-bit samples supported, got %s",
bits_per_sample,
)
return [], -1, -1, False
if subchunk1_size == 18:
extra_size, = read("<H")
if extra_size != 0:
logger.error("read_wav: extra_size != 0: %s", extra_size)
return [], -1, -1, False
# === Subchunk2: data ===
while True:
subchunk2_id, subchunk2_size = read("<II")
if subchunk2_id == 0x61746164: # "data"
break
f.seek(subchunk2_size, 1)
raw_pcm = f.read(subchunk2_size)
if len(raw_pcm) != subchunk2_size:
logger.error("read_wav: failed to read full PCM data")
return [], -1, -1, False
return list(raw_pcm), sample_rate, num_channels, True
except Exception as exc: # noqa: BLE001 -- never raise out of audio
logger.error("read_wav() failed: %s", exc)
return [], -1, -1, False
def play_pcm_stream(
client,
pcm_list: List[int],
stream_name: str = "example",
chunk_size: int = 96000,
sleep_time: float = 1.0,
verbose: bool = False,
) -> bool:
"""Stream 16-bit little-endian PCM to a client with a ``PlayStream`` method.
Vendored from the Unitree SDK ``g1/audio`` example. Sends the PCM in
``chunk_size``-byte chunks (default 96000 = ~3 s at 16 kHz mono), pausing
``sleep_time`` seconds between chunks.
Returns:
``True`` if every chunk was accepted (return code 0), else ``False``.
"""
pcm_data = bytes(pcm_list)
stream_id = str(int(time.time() * 1000)) # unique id from current time
offset = 0
chunk_index = 0
total_size = len(pcm_data)
while offset < total_size:
remaining = total_size - offset
current_chunk_size = min(chunk_size, remaining)
chunk = pcm_data[offset:offset + current_chunk_size]
if verbose:
preview = " ".join(
str(struct.unpack_from("<h", chunk, i)[0])
for i in range(0, min(20, len(chunk) - 1), 2)
)
logger.debug(
"[CHUNK %s] offset=%s size=%s first samples: %s",
chunk_index, offset, current_chunk_size, preview,
)
ret_code, _ = client.PlayStream(stream_name, stream_id, chunk)
if ret_code != 0:
logger.error(
"play_pcm_stream: failed to send chunk %s, return code: %s",
chunk_index, ret_code,
)
return False
logger.debug("play_pcm_stream: chunk %s sent", chunk_index)
offset += current_chunk_size
chunk_index += 1
time.sleep(sleep_time)
return True
# ---------------------------------------------------------------------------
# Backends
# ---------------------------------------------------------------------------
class NullAudio(AudioBackend):
"""No-op backend: logs what *would* play. Used by the mock robot / CI."""
def play(self, wav_path: str, blocking: bool = False) -> bool:
"""Log the intended playback and return ``True`` (always 'succeeds')."""
logger.info("[NULL-AUDIO] would play %s (blocking=%s)", wav_path, blocking)
return True
class HostSpeakerAudio(AudioBackend):
"""Play the greeting on the *host* running GoWelcome (laptop / Jetson).
Prefers ``simpleaudio`` (pure-Python, no external process). If that import
fails it falls back to running ``cfg.audio.host_player_cmd`` + the WAV path
as a subprocess (e.g. ``aplay -q <wav>``). Never raises on a missing file or
backend -- it logs and returns ``False``.
"""
def __init__(self, cfg: GoWelcomeConfig) -> None:
self._cfg = cfg
self._player_cmd: List[str] = list(cfg.audio.host_player_cmd)
# Optional explicit PulseAudio sink to pin a specific speaker (e.g. a
# USB/BT speaker on the dog). When set we MUST go through paplay --
# neither simpleaudio nor aplay targets a Pulse sink by name.
self._output_device: str = (cfg.audio.output_device or "").strip()
# Live handle to the in-flight playback (simpleaudio PlayObject or the
# subprocess.Popen), so close()/non-blocking calls can manage it.
self._sa_play = None
self._proc: Optional[subprocess.Popen] = None
# Resolve simpleaudio availability once, lazily. Skipped entirely when a
# specific output device is pinned (simpleaudio can't target a sink).
self._sa = None
if self._output_device:
logger.info(
"HostSpeakerAudio: pinning output to PulseAudio sink %r "
"(via paplay)", self._output_device,
)
else:
try:
import simpleaudio # type: ignore
self._sa = simpleaudio
logger.debug("HostSpeakerAudio: using simpleaudio")
except ImportError:
logger.info(
"HostSpeakerAudio: simpleaudio not available "
"(pip install simpleaudio) -- falling back to subprocess %s",
self._player_cmd,
)
def play(self, wav_path: str, blocking: bool = False) -> bool:
"""Play ``wav_path`` on the host speaker; ``True`` if dispatched."""
if not Path(wav_path).is_file():
logger.warning("HostSpeakerAudio: wav not found: %s", wav_path)
return False
if self._output_device:
return self._play_subprocess(wav_path, blocking)
if self._sa is not None:
return self._play_simpleaudio(wav_path, blocking)
return self._play_subprocess(wav_path, blocking)
def _play_simpleaudio(self, wav_path: str, blocking: bool) -> bool:
"""Play via simpleaudio; honour ``blocking`` with ``wait_done()``."""
try:
wave_obj = self._sa.WaveObject.from_wave_file(wav_path)
self._sa_play = wave_obj.play()
logger.info("HostSpeakerAudio: playing %s (blocking=%s)", wav_path, blocking)
if blocking:
self._sa_play.wait_done()
self._sa_play = None
return True
except Exception as exc: # noqa: BLE001 -- never raise out of audio
logger.warning("HostSpeakerAudio: simpleaudio playback failed: %s", exc)
return False
def _play_subprocess(self, wav_path: str, blocking: bool) -> bool:
"""Play via an external player.
When an explicit ``output_device`` sink is pinned, use
``paplay --device=<sink> <wav>`` (the only path that targets a specific
PulseAudio sink -- mirrors the team's Sanad G1 stack). Otherwise spawn
the configured ``host_player_cmd`` (default ``aplay -q``).
"""
if self._output_device:
cmd = ["paplay", f"--device={self._output_device}", wav_path]
elif self._player_cmd:
cmd = self._player_cmd + [wav_path]
else:
logger.warning("HostSpeakerAudio: empty host_player_cmd; cannot play")
return False
try:
self._proc = subprocess.Popen(
cmd,
stdout=subprocess.DEVNULL,
stderr=subprocess.PIPE,
)
logger.info(
"HostSpeakerAudio: spawned %s (blocking=%s)", cmd, blocking,
)
if blocking:
_, stderr = self._proc.communicate()
rc = self._proc.returncode
self._proc = None
if rc != 0:
logger.warning(
"HostSpeakerAudio: player exited %s: %s",
rc, (stderr or b"").decode(errors="replace").strip(),
)
return False
return True
except FileNotFoundError:
logger.warning(
"HostSpeakerAudio: player not found: %r "
"(install it or set cfg.audio.host_player_cmd)", cmd[0],
)
return False
except Exception as exc: # noqa: BLE001 -- never raise out of audio
logger.warning("HostSpeakerAudio: subprocess playback failed: %s", exc)
return False
def close(self) -> None:
"""Stop any in-flight playback and release resources."""
if self._sa_play is not None:
try:
self._sa_play.stop()
except Exception: # noqa: BLE001
pass
self._sa_play = None
if self._proc is not None and self._proc.poll() is None:
try:
self._proc.terminate()
except Exception: # noqa: BLE001
pass
self._proc = None
class Go2AudioHubAudio(AudioBackend):
"""EXPERIMENTAL / UNVERIFIED onboard-speaker backend for the Go2.
.. warning::
This is **not verified to work on Go2 firmware**. The Unitree Python
SDK exposes :class:`AudioClient` only for the **G1** ``voice`` service;
the Go2 ships :class:`VuiClient` (volume / brightness) and has no
documented SDK path to play an arbitrary clip. The real path is almost
certainly the DDS topic ``rt/api/audiohub/request``, which the SDK does
**not** wrap -- so it is *not* implemented here.
This class optimistically tries the G1 :class:`AudioClient` (which
chunks 16 kHz mono 16-bit PCM via ``PlayStream``). On a Go2 the service
Init will typically fail; we log a clear "onboard audio
unsupported/unverified -- falling back" message and return ``False``.
Always test on hardware before relying on it.
Requires the WAV to be **16 kHz mono 16-bit PCM**.
"""
def __init__(self, cfg: GoWelcomeConfig) -> None:
self._cfg = cfg
self._client = None
self._init_ok = False
self._play_thread: Optional[threading.Thread] = None
try:
from unitree_sdk2py.g1.audio.g1_audio_client import AudioClient # type: ignore
except ImportError as exc:
logger.warning(
"Go2AudioHubAudio: unitree_sdk2py not available (%s) -- "
"install the Unitree Python SDK. Onboard audio disabled.", exc,
)
return
except Exception as exc: # noqa: BLE001
logger.warning(
"Go2AudioHubAudio: failed to import AudioClient (%s); "
"onboard audio unsupported/unverified.", exc,
)
return
try:
client = AudioClient()
# The DDS channel factory must already be initialised by the robot
# backend (Go2Robot.__init__) before this is used on hardware.
client.SetTimeout(10.0)
client.Init()
self._client = client
self._init_ok = True
# Best-effort volume; ignored if the firmware lacks the API.
try:
self._client.SetVolume(int(self._cfg.greet.audio_volume))
except Exception as exc: # noqa: BLE001
logger.debug("Go2AudioHubAudio: SetVolume not supported: %s", exc)
logger.info("Go2AudioHubAudio: AudioClient init OK (UNVERIFIED on Go2)")
except Exception as exc: # noqa: BLE001
logger.warning(
"Go2AudioHubAudio: AudioClient Init failed (%s) -- Go2 onboard "
"audio unsupported/unverified; falling back.", exc,
)
self._client = None
self._init_ok = False
def play(self, wav_path: str, blocking: bool = False) -> bool:
"""Stream the WAV to the robot speaker. ``True`` only if it succeeded.
``blocking`` is effectively always honoured: :func:`play_pcm_stream`
sends synchronously (sleeping between chunks), so playback completes
before this returns regardless of the flag.
"""
if not self._init_ok or self._client is None:
logger.warning(
"Go2AudioHubAudio: client not initialised -- Go2 onboard audio "
"unsupported/unverified; falling back. (path=%s)", wav_path,
)
return False
if not Path(wav_path).is_file():
logger.warning("Go2AudioHubAudio: wav not found: %s", wav_path)
return False
pcm, sample_rate, num_channels, ok = read_wav(wav_path)
if not ok:
logger.warning("Go2AudioHubAudio: failed to parse wav: %s", wav_path)
return False
if sample_rate != 16000 or num_channels != 1:
logger.warning(
"Go2AudioHubAudio: expected 16kHz mono 16-bit PCM, got "
"%sHz / %s channel(s): %s",
sample_rate, num_channels, wav_path,
)
return False
if blocking:
return self._stream(pcm)
# Non-blocking: play_greeting() is called from the state machine's
# step() on the control-loop thread, which must NEVER block (it gates
# the perception-staleness safety stop). play_pcm_stream sleeps between
# chunks and makes synchronous PlayStream RPCs, so run it on a daemon
# worker thread instead and return immediately.
if self._play_thread is not None and self._play_thread.is_alive():
logger.debug("Go2AudioHubAudio: a clip is already playing; skipping")
return True
self._play_thread = threading.Thread(
target=self._stream, args=(pcm,), name="Go2AudioStream", daemon=True,
)
self._play_thread.start()
return True
def _stream(self, pcm: List[int]) -> bool:
"""Stream PCM to the robot synchronously (on a worker thread when called
non-blocking). Never raises."""
try:
ok = play_pcm_stream(self._client, pcm, stream_name="gowelcome")
if not ok:
logger.warning(
"Go2AudioHubAudio: PlayStream failed -- Go2 onboard audio "
"unsupported/unverified; falling back.",
)
return ok
except Exception as exc: # noqa: BLE001 -- never raise out of audio
logger.warning(
"Go2AudioHubAudio: playback error (%s) -- Go2 onboard audio "
"unsupported/unverified; falling back.", exc,
)
return False
def close(self) -> None:
"""Best-effort stop of any in-flight stream."""
if self._client is not None:
try:
self._client.PlayStop("gowelcome")
except Exception: # noqa: BLE001
pass
def build_audio_backend(cfg: GoWelcomeConfig) -> AudioBackend:
"""Construct the audio backend named by ``cfg.audio.backend``.
Recognised values:
* ``"host"`` -> :class:`HostSpeakerAudio` (default for unrecognised-but-
host-like intent).
* ``"go2"`` -> :class:`Go2AudioHubAudio` (experimental).
* ``"null"`` -> :class:`NullAudio`.
Anything else logs a warning and falls back to :class:`NullAudio` (silent,
safe).
"""
backend = (cfg.audio.backend or "").strip().lower()
if backend == "host":
return HostSpeakerAudio(cfg)
if backend == "go2":
return Go2AudioHubAudio(cfg)
if backend == "null":
return NullAudio()
logger.warning(
"build_audio_backend: unknown audio backend %r -- using NullAudio "
"(no sound). Valid: 'host', 'go2', 'null'.", cfg.audio.backend,
)
return NullAudio()