505 lines
20 KiB
Python
505 lines
20 KiB
Python
"""Pluggable greeting-audio backends.
|
|
|
|
The Go2 has **no** first-party SDK audio-playback path. The Unitree Python
|
|
SDK's :class:`AudioClient` targets the G1 ``voice`` service (TTS / PCM stream
|
|
play); on the Go2 only :class:`VuiClient` exists (volume / brightness control).
|
|
There is no documented, SDK-exposed way to play an arbitrary WAV through the
|
|
Go2's onboard speaker. The likely real path is the DDS topic
|
|
``rt/api/audiohub/request`` -- but that is **not** wrapped by the SDK, so we do
|
|
not implement it here.
|
|
|
|
Consequently greeting audio is *pluggable*:
|
|
|
|
* :class:`NullAudio` -- logs only (mock / CI).
|
|
* :class:`HostSpeakerAudio` -- plays on the machine running GoWelcome (laptop /
|
|
Jetson). Reliable, model-independent. The DEFAULT.
|
|
* :class:`Go2AudioHubAudio` -- EXPERIMENTAL / UNVERIFIED attempt to stream PCM
|
|
to the robot via the G1 ``AudioClient`` (almost certainly absent on Go2
|
|
firmware -- it degrades gracefully to a logged no-op).
|
|
|
|
Pick one with :func:`build_audio_backend`.
|
|
|
|
All heavy/optional imports (``simpleaudio``, the Unitree SDK) are performed
|
|
lazily inside methods so this module imports cleanly off-robot.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import struct
|
|
import subprocess
|
|
import threading
|
|
import time
|
|
from pathlib import Path
|
|
from typing import List, Optional, Tuple
|
|
|
|
from config import GoWelcomeConfig
|
|
from gowelcome.robot.interface import AudioBackend
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Vendored WAV/PCM helpers (copied verbatim-in-spirit from the Unitree SDK
|
|
# example ``example/g1/audio/wav.py``). Used only by Go2AudioHubAudio. Logging
|
|
# replaces the original ``print`` calls.
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def read_wav(filename: str) -> Tuple[List[int], int, int, bool]:
|
|
"""Parse a PCM WAV file into a flat list of raw bytes.
|
|
|
|
Vendored from the Unitree SDK ``g1/audio`` example. Only 16-bit PCM is
|
|
supported (the requirement for the onboard audio stream).
|
|
|
|
Returns:
|
|
``(raw_pcm_bytes, sample_rate, num_channels, ok)``. On any error
|
|
returns ``([], -1, -1, False)`` instead of raising.
|
|
"""
|
|
try:
|
|
with open(filename, "rb") as f:
|
|
def read(fmt: str):
|
|
return struct.unpack(fmt, f.read(struct.calcsize(fmt)))
|
|
|
|
# === Chunk Header ===
|
|
chunk_id, = read("<I")
|
|
if chunk_id != 0x46464952: # "RIFF"
|
|
logger.error("read_wav: chunk_id != 'RIFF': %s", hex(chunk_id))
|
|
return [], -1, -1, False
|
|
|
|
_chunk_size, = read("<I")
|
|
format_tag, = read("<I")
|
|
if format_tag != 0x45564157: # "WAVE"
|
|
logger.error("read_wav: format != 'WAVE': %s", hex(format_tag))
|
|
return [], -1, -1, False
|
|
|
|
# === Subchunk1: fmt ===
|
|
subchunk1_id, = read("<I")
|
|
subchunk1_size, = read("<I")
|
|
|
|
if subchunk1_id == 0x4B4E554A: # JUNK
|
|
f.seek(subchunk1_size, 1)
|
|
subchunk1_id, = read("<I")
|
|
subchunk1_size, = read("<I")
|
|
|
|
if subchunk1_id != 0x20746D66: # "fmt "
|
|
logger.error("read_wav: subchunk1_id != 'fmt ': %s", hex(subchunk1_id))
|
|
return [], -1, -1, False
|
|
|
|
if subchunk1_size not in (16, 18):
|
|
logger.error("read_wav: subchunk1_size != 16 or 18: %s", subchunk1_size)
|
|
return [], -1, -1, False
|
|
|
|
audio_format, = read("<H")
|
|
if audio_format != 1:
|
|
logger.error("read_wav: audio_format != PCM (1): %s", audio_format)
|
|
return [], -1, -1, False
|
|
|
|
num_channels, = read("<H")
|
|
sample_rate, = read("<I")
|
|
byte_rate, = read("<I")
|
|
block_align, = read("<H")
|
|
bits_per_sample, = read("<H")
|
|
|
|
expected_byte_rate = sample_rate * num_channels * bits_per_sample // 8
|
|
if byte_rate != expected_byte_rate:
|
|
logger.error(
|
|
"read_wav: byte_rate mismatch: got %s, expected %s",
|
|
byte_rate, expected_byte_rate,
|
|
)
|
|
return [], -1, -1, False
|
|
|
|
expected_align = num_channels * bits_per_sample // 8
|
|
if block_align != expected_align:
|
|
logger.error(
|
|
"read_wav: block_align mismatch: got %s, expected %s",
|
|
block_align, expected_align,
|
|
)
|
|
return [], -1, -1, False
|
|
|
|
if bits_per_sample != 16:
|
|
logger.error(
|
|
"read_wav: only 16-bit samples supported, got %s",
|
|
bits_per_sample,
|
|
)
|
|
return [], -1, -1, False
|
|
|
|
if subchunk1_size == 18:
|
|
extra_size, = read("<H")
|
|
if extra_size != 0:
|
|
logger.error("read_wav: extra_size != 0: %s", extra_size)
|
|
return [], -1, -1, False
|
|
|
|
# === Subchunk2: data ===
|
|
while True:
|
|
subchunk2_id, subchunk2_size = read("<II")
|
|
if subchunk2_id == 0x61746164: # "data"
|
|
break
|
|
f.seek(subchunk2_size, 1)
|
|
|
|
raw_pcm = f.read(subchunk2_size)
|
|
if len(raw_pcm) != subchunk2_size:
|
|
logger.error("read_wav: failed to read full PCM data")
|
|
return [], -1, -1, False
|
|
|
|
return list(raw_pcm), sample_rate, num_channels, True
|
|
|
|
except Exception as exc: # noqa: BLE001 -- never raise out of audio
|
|
logger.error("read_wav() failed: %s", exc)
|
|
return [], -1, -1, False
|
|
|
|
|
|
def play_pcm_stream(
|
|
client,
|
|
pcm_list: List[int],
|
|
stream_name: str = "example",
|
|
chunk_size: int = 96000,
|
|
sleep_time: float = 1.0,
|
|
verbose: bool = False,
|
|
) -> bool:
|
|
"""Stream 16-bit little-endian PCM to a client with a ``PlayStream`` method.
|
|
|
|
Vendored from the Unitree SDK ``g1/audio`` example. Sends the PCM in
|
|
``chunk_size``-byte chunks (default 96000 = ~3 s at 16 kHz mono), pausing
|
|
``sleep_time`` seconds between chunks.
|
|
|
|
Returns:
|
|
``True`` if every chunk was accepted (return code 0), else ``False``.
|
|
"""
|
|
pcm_data = bytes(pcm_list)
|
|
stream_id = str(int(time.time() * 1000)) # unique id from current time
|
|
offset = 0
|
|
chunk_index = 0
|
|
total_size = len(pcm_data)
|
|
|
|
while offset < total_size:
|
|
remaining = total_size - offset
|
|
current_chunk_size = min(chunk_size, remaining)
|
|
chunk = pcm_data[offset:offset + current_chunk_size]
|
|
|
|
if verbose:
|
|
preview = " ".join(
|
|
str(struct.unpack_from("<h", chunk, i)[0])
|
|
for i in range(0, min(20, len(chunk) - 1), 2)
|
|
)
|
|
logger.debug(
|
|
"[CHUNK %s] offset=%s size=%s first samples: %s",
|
|
chunk_index, offset, current_chunk_size, preview,
|
|
)
|
|
|
|
ret_code, _ = client.PlayStream(stream_name, stream_id, chunk)
|
|
if ret_code != 0:
|
|
logger.error(
|
|
"play_pcm_stream: failed to send chunk %s, return code: %s",
|
|
chunk_index, ret_code,
|
|
)
|
|
return False
|
|
logger.debug("play_pcm_stream: chunk %s sent", chunk_index)
|
|
|
|
offset += current_chunk_size
|
|
chunk_index += 1
|
|
time.sleep(sleep_time)
|
|
|
|
return True
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Backends
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class NullAudio(AudioBackend):
|
|
"""No-op backend: logs what *would* play. Used by the mock robot / CI."""
|
|
|
|
def play(self, wav_path: str, blocking: bool = False) -> bool:
|
|
"""Log the intended playback and return ``True`` (always 'succeeds')."""
|
|
logger.info("[NULL-AUDIO] would play %s (blocking=%s)", wav_path, blocking)
|
|
return True
|
|
|
|
|
|
class HostSpeakerAudio(AudioBackend):
|
|
"""Play the greeting on the *host* running GoWelcome (laptop / Jetson).
|
|
|
|
Prefers ``simpleaudio`` (pure-Python, no external process). If that import
|
|
fails it falls back to running ``cfg.audio.host_player_cmd`` + the WAV path
|
|
as a subprocess (e.g. ``aplay -q <wav>``). Never raises on a missing file or
|
|
backend -- it logs and returns ``False``.
|
|
"""
|
|
|
|
def __init__(self, cfg: GoWelcomeConfig) -> None:
|
|
self._cfg = cfg
|
|
self._player_cmd: List[str] = list(cfg.audio.host_player_cmd)
|
|
# Optional explicit PulseAudio sink to pin a specific speaker (e.g. a
|
|
# USB/BT speaker on the dog). When set we MUST go through paplay --
|
|
# neither simpleaudio nor aplay targets a Pulse sink by name.
|
|
self._output_device: str = (cfg.audio.output_device or "").strip()
|
|
# Live handle to the in-flight playback (simpleaudio PlayObject or the
|
|
# subprocess.Popen), so close()/non-blocking calls can manage it.
|
|
self._sa_play = None
|
|
self._proc: Optional[subprocess.Popen] = None
|
|
# Resolve simpleaudio availability once, lazily. Skipped entirely when a
|
|
# specific output device is pinned (simpleaudio can't target a sink).
|
|
self._sa = None
|
|
if self._output_device:
|
|
logger.info(
|
|
"HostSpeakerAudio: pinning output to PulseAudio sink %r "
|
|
"(via paplay)", self._output_device,
|
|
)
|
|
else:
|
|
try:
|
|
import simpleaudio # type: ignore
|
|
self._sa = simpleaudio
|
|
logger.debug("HostSpeakerAudio: using simpleaudio")
|
|
except ImportError:
|
|
logger.info(
|
|
"HostSpeakerAudio: simpleaudio not available "
|
|
"(pip install simpleaudio) -- falling back to subprocess %s",
|
|
self._player_cmd,
|
|
)
|
|
|
|
def play(self, wav_path: str, blocking: bool = False) -> bool:
|
|
"""Play ``wav_path`` on the host speaker; ``True`` if dispatched."""
|
|
if not Path(wav_path).is_file():
|
|
logger.warning("HostSpeakerAudio: wav not found: %s", wav_path)
|
|
return False
|
|
|
|
if self._output_device:
|
|
return self._play_subprocess(wav_path, blocking)
|
|
if self._sa is not None:
|
|
return self._play_simpleaudio(wav_path, blocking)
|
|
return self._play_subprocess(wav_path, blocking)
|
|
|
|
def _play_simpleaudio(self, wav_path: str, blocking: bool) -> bool:
|
|
"""Play via simpleaudio; honour ``blocking`` with ``wait_done()``."""
|
|
try:
|
|
wave_obj = self._sa.WaveObject.from_wave_file(wav_path)
|
|
self._sa_play = wave_obj.play()
|
|
logger.info("HostSpeakerAudio: playing %s (blocking=%s)", wav_path, blocking)
|
|
if blocking:
|
|
self._sa_play.wait_done()
|
|
self._sa_play = None
|
|
return True
|
|
except Exception as exc: # noqa: BLE001 -- never raise out of audio
|
|
logger.warning("HostSpeakerAudio: simpleaudio playback failed: %s", exc)
|
|
return False
|
|
|
|
def _play_subprocess(self, wav_path: str, blocking: bool) -> bool:
|
|
"""Play via an external player.
|
|
|
|
When an explicit ``output_device`` sink is pinned, use
|
|
``paplay --device=<sink> <wav>`` (the only path that targets a specific
|
|
PulseAudio sink -- mirrors the team's Sanad G1 stack). Otherwise spawn
|
|
the configured ``host_player_cmd`` (default ``aplay -q``).
|
|
"""
|
|
if self._output_device:
|
|
cmd = ["paplay", f"--device={self._output_device}", wav_path]
|
|
elif self._player_cmd:
|
|
cmd = self._player_cmd + [wav_path]
|
|
else:
|
|
logger.warning("HostSpeakerAudio: empty host_player_cmd; cannot play")
|
|
return False
|
|
try:
|
|
self._proc = subprocess.Popen(
|
|
cmd,
|
|
stdout=subprocess.DEVNULL,
|
|
stderr=subprocess.PIPE,
|
|
)
|
|
logger.info(
|
|
"HostSpeakerAudio: spawned %s (blocking=%s)", cmd, blocking,
|
|
)
|
|
if blocking:
|
|
_, stderr = self._proc.communicate()
|
|
rc = self._proc.returncode
|
|
self._proc = None
|
|
if rc != 0:
|
|
logger.warning(
|
|
"HostSpeakerAudio: player exited %s: %s",
|
|
rc, (stderr or b"").decode(errors="replace").strip(),
|
|
)
|
|
return False
|
|
return True
|
|
except FileNotFoundError:
|
|
logger.warning(
|
|
"HostSpeakerAudio: player not found: %r "
|
|
"(install it or set cfg.audio.host_player_cmd)", cmd[0],
|
|
)
|
|
return False
|
|
except Exception as exc: # noqa: BLE001 -- never raise out of audio
|
|
logger.warning("HostSpeakerAudio: subprocess playback failed: %s", exc)
|
|
return False
|
|
|
|
def close(self) -> None:
|
|
"""Stop any in-flight playback and release resources."""
|
|
if self._sa_play is not None:
|
|
try:
|
|
self._sa_play.stop()
|
|
except Exception: # noqa: BLE001
|
|
pass
|
|
self._sa_play = None
|
|
if self._proc is not None and self._proc.poll() is None:
|
|
try:
|
|
self._proc.terminate()
|
|
except Exception: # noqa: BLE001
|
|
pass
|
|
self._proc = None
|
|
|
|
|
|
class Go2AudioHubAudio(AudioBackend):
|
|
"""EXPERIMENTAL / UNVERIFIED onboard-speaker backend for the Go2.
|
|
|
|
.. warning::
|
|
This is **not verified to work on Go2 firmware**. The Unitree Python
|
|
SDK exposes :class:`AudioClient` only for the **G1** ``voice`` service;
|
|
the Go2 ships :class:`VuiClient` (volume / brightness) and has no
|
|
documented SDK path to play an arbitrary clip. The real path is almost
|
|
certainly the DDS topic ``rt/api/audiohub/request``, which the SDK does
|
|
**not** wrap -- so it is *not* implemented here.
|
|
|
|
This class optimistically tries the G1 :class:`AudioClient` (which
|
|
chunks 16 kHz mono 16-bit PCM via ``PlayStream``). On a Go2 the service
|
|
Init will typically fail; we log a clear "onboard audio
|
|
unsupported/unverified -- falling back" message and return ``False``.
|
|
Always test on hardware before relying on it.
|
|
|
|
Requires the WAV to be **16 kHz mono 16-bit PCM**.
|
|
"""
|
|
|
|
def __init__(self, cfg: GoWelcomeConfig) -> None:
|
|
self._cfg = cfg
|
|
self._client = None
|
|
self._init_ok = False
|
|
self._play_thread: Optional[threading.Thread] = None
|
|
try:
|
|
from unitree_sdk2py.g1.audio.g1_audio_client import AudioClient # type: ignore
|
|
except ImportError as exc:
|
|
logger.warning(
|
|
"Go2AudioHubAudio: unitree_sdk2py not available (%s) -- "
|
|
"install the Unitree Python SDK. Onboard audio disabled.", exc,
|
|
)
|
|
return
|
|
except Exception as exc: # noqa: BLE001
|
|
logger.warning(
|
|
"Go2AudioHubAudio: failed to import AudioClient (%s); "
|
|
"onboard audio unsupported/unverified.", exc,
|
|
)
|
|
return
|
|
|
|
try:
|
|
client = AudioClient()
|
|
# The DDS channel factory must already be initialised by the robot
|
|
# backend (Go2Robot.__init__) before this is used on hardware.
|
|
client.SetTimeout(10.0)
|
|
client.Init()
|
|
self._client = client
|
|
self._init_ok = True
|
|
# Best-effort volume; ignored if the firmware lacks the API.
|
|
try:
|
|
self._client.SetVolume(int(self._cfg.greet.audio_volume))
|
|
except Exception as exc: # noqa: BLE001
|
|
logger.debug("Go2AudioHubAudio: SetVolume not supported: %s", exc)
|
|
logger.info("Go2AudioHubAudio: AudioClient init OK (UNVERIFIED on Go2)")
|
|
except Exception as exc: # noqa: BLE001
|
|
logger.warning(
|
|
"Go2AudioHubAudio: AudioClient Init failed (%s) -- Go2 onboard "
|
|
"audio unsupported/unverified; falling back.", exc,
|
|
)
|
|
self._client = None
|
|
self._init_ok = False
|
|
|
|
def play(self, wav_path: str, blocking: bool = False) -> bool:
|
|
"""Stream the WAV to the robot speaker. ``True`` only if it succeeded.
|
|
|
|
``blocking`` is effectively always honoured: :func:`play_pcm_stream`
|
|
sends synchronously (sleeping between chunks), so playback completes
|
|
before this returns regardless of the flag.
|
|
"""
|
|
if not self._init_ok or self._client is None:
|
|
logger.warning(
|
|
"Go2AudioHubAudio: client not initialised -- Go2 onboard audio "
|
|
"unsupported/unverified; falling back. (path=%s)", wav_path,
|
|
)
|
|
return False
|
|
|
|
if not Path(wav_path).is_file():
|
|
logger.warning("Go2AudioHubAudio: wav not found: %s", wav_path)
|
|
return False
|
|
|
|
pcm, sample_rate, num_channels, ok = read_wav(wav_path)
|
|
if not ok:
|
|
logger.warning("Go2AudioHubAudio: failed to parse wav: %s", wav_path)
|
|
return False
|
|
if sample_rate != 16000 or num_channels != 1:
|
|
logger.warning(
|
|
"Go2AudioHubAudio: expected 16kHz mono 16-bit PCM, got "
|
|
"%sHz / %s channel(s): %s",
|
|
sample_rate, num_channels, wav_path,
|
|
)
|
|
return False
|
|
|
|
if blocking:
|
|
return self._stream(pcm)
|
|
|
|
# Non-blocking: play_greeting() is called from the state machine's
|
|
# step() on the control-loop thread, which must NEVER block (it gates
|
|
# the perception-staleness safety stop). play_pcm_stream sleeps between
|
|
# chunks and makes synchronous PlayStream RPCs, so run it on a daemon
|
|
# worker thread instead and return immediately.
|
|
if self._play_thread is not None and self._play_thread.is_alive():
|
|
logger.debug("Go2AudioHubAudio: a clip is already playing; skipping")
|
|
return True
|
|
self._play_thread = threading.Thread(
|
|
target=self._stream, args=(pcm,), name="Go2AudioStream", daemon=True,
|
|
)
|
|
self._play_thread.start()
|
|
return True
|
|
|
|
def _stream(self, pcm: List[int]) -> bool:
|
|
"""Stream PCM to the robot synchronously (on a worker thread when called
|
|
non-blocking). Never raises."""
|
|
try:
|
|
ok = play_pcm_stream(self._client, pcm, stream_name="gowelcome")
|
|
if not ok:
|
|
logger.warning(
|
|
"Go2AudioHubAudio: PlayStream failed -- Go2 onboard audio "
|
|
"unsupported/unverified; falling back.",
|
|
)
|
|
return ok
|
|
except Exception as exc: # noqa: BLE001 -- never raise out of audio
|
|
logger.warning(
|
|
"Go2AudioHubAudio: playback error (%s) -- Go2 onboard audio "
|
|
"unsupported/unverified; falling back.", exc,
|
|
)
|
|
return False
|
|
|
|
def close(self) -> None:
|
|
"""Best-effort stop of any in-flight stream."""
|
|
if self._client is not None:
|
|
try:
|
|
self._client.PlayStop("gowelcome")
|
|
except Exception: # noqa: BLE001
|
|
pass
|
|
|
|
|
|
def build_audio_backend(cfg: GoWelcomeConfig) -> AudioBackend:
|
|
"""Construct the audio backend named by ``cfg.audio.backend``.
|
|
|
|
Recognised values:
|
|
* ``"host"`` -> :class:`HostSpeakerAudio` (default for unrecognised-but-
|
|
host-like intent).
|
|
* ``"go2"`` -> :class:`Go2AudioHubAudio` (experimental).
|
|
* ``"null"`` -> :class:`NullAudio`.
|
|
|
|
Anything else logs a warning and falls back to :class:`NullAudio` (silent,
|
|
safe).
|
|
"""
|
|
backend = (cfg.audio.backend or "").strip().lower()
|
|
if backend == "host":
|
|
return HostSpeakerAudio(cfg)
|
|
if backend == "go2":
|
|
return Go2AudioHubAudio(cfg)
|
|
if backend == "null":
|
|
return NullAudio()
|
|
logger.warning(
|
|
"build_audio_backend: unknown audio backend %r -- using NullAudio "
|
|
"(no sound). Valid: 'host', 'go2', 'null'.", cfg.audio.backend,
|
|
)
|
|
return NullAudio()
|