GoWelcome/gowelcome/robot/audio.py

"""Pluggable greeting-audio backends.

The Go2 has **no** first-party SDK audio-playback path. The Unitree Python
SDK's :class:`AudioClient` targets the G1 ``voice`` service (TTS / PCM stream
play); on the Go2 only :class:`VuiClient` exists (volume / brightness control).
There is no documented, SDK-exposed way to play an arbitrary WAV through the
Go2's onboard speaker. The likely real path is the DDS topic
``rt/api/audiohub/request`` -- but that is **not** wrapped by the SDK, so we do
not implement it here.

Consequently greeting audio is *pluggable*:

* :class:`NullAudio`      -- logs only (mock / CI).
* :class:`HostSpeakerAudio` -- plays on the machine running GoWelcome (laptop /
  Jetson). Reliable, model-independent. The DEFAULT.
* :class:`Go2AudioHubAudio` -- EXPERIMENTAL / UNVERIFIED attempt to stream PCM
  to the robot via the G1 ``AudioClient`` (almost certainly absent on Go2
  firmware -- it degrades gracefully to a logged no-op).

Pick one with :func:`build_audio_backend`.

All heavy/optional imports (``simpleaudio``, the Unitree SDK) are performed
lazily inside methods so this module imports cleanly off-robot.
"""

from __future__ import annotations

import logging
import struct
import subprocess
import threading
import time
from pathlib import Path
from typing import List, Optional, Tuple

from config import GoWelcomeConfig
from gowelcome.robot.interface import AudioBackend

logger = logging.getLogger(__name__)


# ---------------------------------------------------------------------------
# Vendored WAV/PCM helpers (copied verbatim-in-spirit from the Unitree SDK
# example ``example/g1/audio/wav.py``). Used only by Go2AudioHubAudio. Logging
# replaces the original ``print`` calls.
# ---------------------------------------------------------------------------

def read_wav(filename: str) -> Tuple[List[int], int, int, bool]:
    """Parse a PCM WAV file into a flat list of raw bytes.

    Vendored from the Unitree SDK ``g1/audio`` example. Only 16-bit PCM is
    supported (the requirement for the onboard audio stream).

    Returns:
        ``(raw_pcm_bytes, sample_rate, num_channels, ok)``. On any error
        returns ``([], -1, -1, False)`` instead of raising.
    """
    try:
        with open(filename, "rb") as f:
            def read(fmt: str):
                return struct.unpack(fmt, f.read(struct.calcsize(fmt)))

            # === Chunk Header ===
            chunk_id, = read("<I")
            if chunk_id != 0x46464952:  # "RIFF"
                logger.error("read_wav: chunk_id != 'RIFF': %s", hex(chunk_id))
                return [], -1, -1, False

            _chunk_size, = read("<I")
            format_tag, = read("<I")
            if format_tag != 0x45564157:  # "WAVE"
                logger.error("read_wav: format != 'WAVE': %s", hex(format_tag))
                return [], -1, -1, False

            # === Subchunk1: fmt ===
            subchunk1_id, = read("<I")
            subchunk1_size, = read("<I")

            if subchunk1_id == 0x4B4E554A:  # JUNK
                f.seek(subchunk1_size, 1)
                subchunk1_id, = read("<I")
                subchunk1_size, = read("<I")

            if subchunk1_id != 0x20746D66:  # "fmt "
                logger.error("read_wav: subchunk1_id != 'fmt ': %s", hex(subchunk1_id))
                return [], -1, -1, False

            if subchunk1_size not in (16, 18):
                logger.error("read_wav: subchunk1_size != 16 or 18: %s", subchunk1_size)
                return [], -1, -1, False

            audio_format, = read("<H")
            if audio_format != 1:
                logger.error("read_wav: audio_format != PCM (1): %s", audio_format)
                return [], -1, -1, False

            num_channels, = read("<H")
            sample_rate, = read("<I")
            byte_rate, = read("<I")
            block_align, = read("<H")
            bits_per_sample, = read("<H")

            expected_byte_rate = sample_rate * num_channels * bits_per_sample // 8
            if byte_rate != expected_byte_rate:
                logger.error(
                    "read_wav: byte_rate mismatch: got %s, expected %s",
                    byte_rate, expected_byte_rate,
                )
                return [], -1, -1, False

            expected_align = num_channels * bits_per_sample // 8
            if block_align != expected_align:
                logger.error(
                    "read_wav: block_align mismatch: got %s, expected %s",
                    block_align, expected_align,
                )
                return [], -1, -1, False

            if bits_per_sample != 16:
                logger.error(
                    "read_wav: only 16-bit samples supported, got %s",
                    bits_per_sample,
                )
                return [], -1, -1, False

            if subchunk1_size == 18:
                extra_size, = read("<H")
                if extra_size != 0:
                    logger.error("read_wav: extra_size != 0: %s", extra_size)
                    return [], -1, -1, False

            # === Subchunk2: data ===
            while True:
                subchunk2_id, subchunk2_size = read("<II")
                if subchunk2_id == 0x61746164:  # "data"
                    break
                f.seek(subchunk2_size, 1)

            raw_pcm = f.read(subchunk2_size)
            if len(raw_pcm) != subchunk2_size:
                logger.error("read_wav: failed to read full PCM data")
                return [], -1, -1, False

            return list(raw_pcm), sample_rate, num_channels, True

    except Exception as exc:  # noqa: BLE001 -- never raise out of audio
        logger.error("read_wav() failed: %s", exc)
        return [], -1, -1, False


def play_pcm_stream(
    client,
    pcm_list: List[int],
    stream_name: str = "example",
    chunk_size: int = 96000,
    sleep_time: float = 1.0,
    verbose: bool = False,
) -> bool:
    """Stream 16-bit little-endian PCM to a client with a ``PlayStream`` method.

    Vendored from the Unitree SDK ``g1/audio`` example. Sends the PCM in
    ``chunk_size``-byte chunks (default 96000 = ~3 s at 16 kHz mono), pausing
    ``sleep_time`` seconds between chunks.

    Returns:
        ``True`` if every chunk was accepted (return code 0), else ``False``.
    """
    pcm_data = bytes(pcm_list)
    stream_id = str(int(time.time() * 1000))  # unique id from current time
    offset = 0
    chunk_index = 0
    total_size = len(pcm_data)

    while offset < total_size:
        remaining = total_size - offset
        current_chunk_size = min(chunk_size, remaining)
        chunk = pcm_data[offset:offset + current_chunk_size]

        if verbose:
            preview = " ".join(
                str(struct.unpack_from("<h", chunk, i)[0])
                for i in range(0, min(20, len(chunk) - 1), 2)
            )
            logger.debug(
                "[CHUNK %s] offset=%s size=%s first samples: %s",
                chunk_index, offset, current_chunk_size, preview,
            )

        ret_code, _ = client.PlayStream(stream_name, stream_id, chunk)
        if ret_code != 0:
            logger.error(
                "play_pcm_stream: failed to send chunk %s, return code: %s",
                chunk_index, ret_code,
            )
            return False
        logger.debug("play_pcm_stream: chunk %s sent", chunk_index)

        offset += current_chunk_size
        chunk_index += 1
        time.sleep(sleep_time)

    return True


# ---------------------------------------------------------------------------
# Backends
# ---------------------------------------------------------------------------

class NullAudio(AudioBackend):
    """No-op backend: logs what *would* play. Used by the mock robot / CI."""

    def play(self, wav_path: str, blocking: bool = False) -> bool:
        """Log the intended playback and return ``True`` (always 'succeeds')."""
        logger.info("[NULL-AUDIO] would play %s (blocking=%s)", wav_path, blocking)
        return True


class HostSpeakerAudio(AudioBackend):
    """Play the greeting on the *host* running GoWelcome (laptop / Jetson).

    Prefers ``simpleaudio`` (pure-Python, no external process). If that import
    fails it falls back to running ``cfg.audio.host_player_cmd`` + the WAV path
    as a subprocess (e.g. ``aplay -q <wav>``). Never raises on a missing file or
    backend -- it logs and returns ``False``.
    """

    def __init__(self, cfg: GoWelcomeConfig) -> None:
        self._cfg = cfg
        self._player_cmd: List[str] = list(cfg.audio.host_player_cmd)
        # Optional explicit PulseAudio sink to pin a specific speaker (e.g. a
        # USB/BT speaker on the dog). When set we MUST go through paplay --
        # neither simpleaudio nor aplay targets a Pulse sink by name.
        self._output_device: str = (cfg.audio.output_device or "").strip()
        # Live handle to the in-flight playback (simpleaudio PlayObject or the
        # subprocess.Popen), so close()/non-blocking calls can manage it.
        self._sa_play = None
        self._proc: Optional[subprocess.Popen] = None
        # Resolve simpleaudio availability once, lazily. Skipped entirely when a
        # specific output device is pinned (simpleaudio can't target a sink).
        self._sa = None
        if self._output_device:
            logger.info(
                "HostSpeakerAudio: pinning output to PulseAudio sink %r "
                "(via paplay)", self._output_device,
            )
        else:
            try:
                import simpleaudio  # type: ignore
                self._sa = simpleaudio
                logger.debug("HostSpeakerAudio: using simpleaudio")
            except ImportError:
                logger.info(
                    "HostSpeakerAudio: simpleaudio not available "
                    "(pip install simpleaudio) -- falling back to subprocess %s",
                    self._player_cmd,
                )

    def play(self, wav_path: str, blocking: bool = False) -> bool:
        """Play ``wav_path`` on the host speaker; ``True`` if dispatched."""
        if not Path(wav_path).is_file():
            logger.warning("HostSpeakerAudio: wav not found: %s", wav_path)
            return False

        if self._output_device:
            return self._play_subprocess(wav_path, blocking)
        if self._sa is not None:
            return self._play_simpleaudio(wav_path, blocking)
        return self._play_subprocess(wav_path, blocking)

    def _play_simpleaudio(self, wav_path: str, blocking: bool) -> bool:
        """Play via simpleaudio; honour ``blocking`` with ``wait_done()``."""
        try:
            wave_obj = self._sa.WaveObject.from_wave_file(wav_path)
            self._sa_play = wave_obj.play()
            logger.info("HostSpeakerAudio: playing %s (blocking=%s)", wav_path, blocking)
            if blocking:
                self._sa_play.wait_done()
                self._sa_play = None
            return True
        except Exception as exc:  # noqa: BLE001 -- never raise out of audio
            logger.warning("HostSpeakerAudio: simpleaudio playback failed: %s", exc)
            return False

    def _play_subprocess(self, wav_path: str, blocking: bool) -> bool:
        """Play via an external player.

        When an explicit ``output_device`` sink is pinned, use
        ``paplay --device=<sink> <wav>`` (the only path that targets a specific
        PulseAudio sink -- mirrors the team's Sanad G1 stack). Otherwise spawn
        the configured ``host_player_cmd`` (default ``aplay -q``).
        """
        if self._output_device:
            cmd = ["paplay", f"--device={self._output_device}", wav_path]
        elif self._player_cmd:
            cmd = self._player_cmd + [wav_path]
        else:
            logger.warning("HostSpeakerAudio: empty host_player_cmd; cannot play")
            return False
        try:
            self._proc = subprocess.Popen(
                cmd,
                stdout=subprocess.DEVNULL,
                stderr=subprocess.PIPE,
            )
            logger.info(
                "HostSpeakerAudio: spawned %s (blocking=%s)", cmd, blocking,
            )
            if blocking:
                _, stderr = self._proc.communicate()
                rc = self._proc.returncode
                self._proc = None
                if rc != 0:
                    logger.warning(
                        "HostSpeakerAudio: player exited %s: %s",
                        rc, (stderr or b"").decode(errors="replace").strip(),
                    )
                    return False
            return True
        except FileNotFoundError:
            logger.warning(
                "HostSpeakerAudio: player not found: %r "
                "(install it or set cfg.audio.host_player_cmd)", cmd[0],
            )
            return False
        except Exception as exc:  # noqa: BLE001 -- never raise out of audio
            logger.warning("HostSpeakerAudio: subprocess playback failed: %s", exc)
            return False

    def close(self) -> None:
        """Stop any in-flight playback and release resources."""
        if self._sa_play is not None:
            try:
                self._sa_play.stop()
            except Exception:  # noqa: BLE001
                pass
            self._sa_play = None
        if self._proc is not None and self._proc.poll() is None:
            try:
                self._proc.terminate()
            except Exception:  # noqa: BLE001
                pass
            self._proc = None


class Go2AudioHubAudio(AudioBackend):
    """EXPERIMENTAL / UNVERIFIED onboard-speaker backend for the Go2.

    .. warning::
        This is **not verified to work on Go2 firmware**. The Unitree Python
        SDK exposes :class:`AudioClient` only for the **G1** ``voice`` service;
        the Go2 ships :class:`VuiClient` (volume / brightness) and has no
        documented SDK path to play an arbitrary clip. The real path is almost
        certainly the DDS topic ``rt/api/audiohub/request``, which the SDK does
        **not** wrap -- so it is *not* implemented here.

        This class optimistically tries the G1 :class:`AudioClient` (which
        chunks 16 kHz mono 16-bit PCM via ``PlayStream``). On a Go2 the service
        Init will typically fail; we log a clear "onboard audio
        unsupported/unverified -- falling back" message and return ``False``.
        Always test on hardware before relying on it.

    Requires the WAV to be **16 kHz mono 16-bit PCM**.
    """

    def __init__(self, cfg: GoWelcomeConfig) -> None:
        self._cfg = cfg
        self._client = None
        self._init_ok = False
        self._play_thread: Optional[threading.Thread] = None
        try:
            from unitree_sdk2py.g1.audio.g1_audio_client import AudioClient  # type: ignore
        except ImportError as exc:
            logger.warning(
                "Go2AudioHubAudio: unitree_sdk2py not available (%s) -- "
                "install the Unitree Python SDK. Onboard audio disabled.", exc,
            )
            return
        except Exception as exc:  # noqa: BLE001
            logger.warning(
                "Go2AudioHubAudio: failed to import AudioClient (%s); "
                "onboard audio unsupported/unverified.", exc,
            )
            return

        try:
            client = AudioClient()
            # The DDS channel factory must already be initialised by the robot
            # backend (Go2Robot.__init__) before this is used on hardware.
            client.SetTimeout(10.0)
            client.Init()
            self._client = client
            self._init_ok = True
            # Best-effort volume; ignored if the firmware lacks the API.
            try:
                self._client.SetVolume(int(self._cfg.greet.audio_volume))
            except Exception as exc:  # noqa: BLE001
                logger.debug("Go2AudioHubAudio: SetVolume not supported: %s", exc)
            logger.info("Go2AudioHubAudio: AudioClient init OK (UNVERIFIED on Go2)")
        except Exception as exc:  # noqa: BLE001
            logger.warning(
                "Go2AudioHubAudio: AudioClient Init failed (%s) -- Go2 onboard "
                "audio unsupported/unverified; falling back.", exc,
            )
            self._client = None
            self._init_ok = False

    def play(self, wav_path: str, blocking: bool = False) -> bool:
        """Stream the WAV to the robot speaker. ``True`` only if it succeeded.

        ``blocking`` is effectively always honoured: :func:`play_pcm_stream`
        sends synchronously (sleeping between chunks), so playback completes
        before this returns regardless of the flag.
        """
        if not self._init_ok or self._client is None:
            logger.warning(
                "Go2AudioHubAudio: client not initialised -- Go2 onboard audio "
                "unsupported/unverified; falling back. (path=%s)", wav_path,
            )
            return False

        if not Path(wav_path).is_file():
            logger.warning("Go2AudioHubAudio: wav not found: %s", wav_path)
            return False

        pcm, sample_rate, num_channels, ok = read_wav(wav_path)
        if not ok:
            logger.warning("Go2AudioHubAudio: failed to parse wav: %s", wav_path)
            return False
        if sample_rate != 16000 or num_channels != 1:
            logger.warning(
                "Go2AudioHubAudio: expected 16kHz mono 16-bit PCM, got "
                "%sHz / %s channel(s): %s",
                sample_rate, num_channels, wav_path,
            )
            return False

        if blocking:
            return self._stream(pcm)

        # Non-blocking: play_greeting() is called from the state machine's
        # step() on the control-loop thread, which must NEVER block (it gates
        # the perception-staleness safety stop). play_pcm_stream sleeps between
        # chunks and makes synchronous PlayStream RPCs, so run it on a daemon
        # worker thread instead and return immediately.
        if self._play_thread is not None and self._play_thread.is_alive():
            logger.debug("Go2AudioHubAudio: a clip is already playing; skipping")
            return True
        self._play_thread = threading.Thread(
            target=self._stream, args=(pcm,), name="Go2AudioStream", daemon=True,
        )
        self._play_thread.start()
        return True

    def _stream(self, pcm: List[int]) -> bool:
        """Stream PCM to the robot synchronously (on a worker thread when called
        non-blocking). Never raises."""
        try:
            ok = play_pcm_stream(self._client, pcm, stream_name="gowelcome")
            if not ok:
                logger.warning(
                    "Go2AudioHubAudio: PlayStream failed -- Go2 onboard audio "
                    "unsupported/unverified; falling back.",
                )
            return ok
        except Exception as exc:  # noqa: BLE001 -- never raise out of audio
            logger.warning(
                "Go2AudioHubAudio: playback error (%s) -- Go2 onboard audio "
                "unsupported/unverified; falling back.", exc,
            )
            return False

    def close(self) -> None:
        """Best-effort stop of any in-flight stream."""
        if self._client is not None:
            try:
                self._client.PlayStop("gowelcome")
            except Exception:  # noqa: BLE001
                pass


def build_audio_backend(cfg: GoWelcomeConfig) -> AudioBackend:
    """Construct the audio backend named by ``cfg.audio.backend``.

    Recognised values:
        * ``"host"`` -> :class:`HostSpeakerAudio` (default for unrecognised-but-
          host-like intent).
        * ``"go2"``  -> :class:`Go2AudioHubAudio` (experimental).
        * ``"null"`` -> :class:`NullAudio`.

    Anything else logs a warning and falls back to :class:`NullAudio` (silent,
    safe).
    """
    backend = (cfg.audio.backend or "").strip().lower()
    if backend == "host":
        return HostSpeakerAudio(cfg)
    if backend == "go2":
        return Go2AudioHubAudio(cfg)
    if backend == "null":
        return NullAudio()
    logger.warning(
        "build_audio_backend: unknown audio backend %r -- using NullAudio "
        "(no sound). Valid: 'host', 'go2', 'null'.", cfg.audio.backend,
    )
    return NullAudio()