346 lines
11 KiB
Python
346 lines
11 KiB
Python
"""Hardware-agnostic audio I/O for Marcus voice pipelines.
|
|
|
|
Direct port of /home/zedx/Robotics_workspace/yslootahtech/Project/Sanad/voice/audio_io.py,
|
|
with USB mic/speaker profiles (Anker/Hollyland) removed — Marcus only uses the
|
|
G1 on-board profile. Class names and method signatures match Sanad verbatim so
|
|
the rest of the Gemini brain code lifts over cleanly.
|
|
|
|
Mics deliver int16 mono PCM at 16 kHz.
|
|
Speakers accept int16 mono PCM plus a `source_rate` and resample internally.
|
|
|
|
Usage:
|
|
|
|
audio = AudioIO.from_profile("builtin", audio_client=ac)
|
|
audio.start()
|
|
try:
|
|
chunk = audio.mic.read_chunk(1024)
|
|
audio.speaker.begin_stream()
|
|
audio.speaker.send_chunk(pcm_24k, 24000)
|
|
audio.speaker.wait_finish()
|
|
finally:
|
|
audio.stop()
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import logging
|
|
import os
|
|
import socket
|
|
import struct
|
|
import subprocess
|
|
import sys
|
|
import threading
|
|
import time
|
|
from abc import ABC, abstractmethod
|
|
from dataclasses import dataclass, field
|
|
from typing import Any, Optional, Union
|
|
|
|
import numpy as np
|
|
|
|
_PROJECT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
|
if _PROJECT_DIR not in sys.path:
|
|
sys.path.insert(0, _PROJECT_DIR)
|
|
try:
|
|
from Core.config_loader import load_config
|
|
_VCFG = load_config("Voice") or {}
|
|
except Exception:
|
|
_VCFG = {}
|
|
|
|
log = logging.getLogger("audio_io")
|
|
|
|
_MIC_CFG = _VCFG.get("mic_udp", {}) or {}
|
|
_SP_CFG = _VCFG.get("speaker", {}) or {}
|
|
|
|
TARGET_MIC_RATE = 16_000
|
|
|
|
_MCAST_GRP = _MIC_CFG.get("group", "239.168.123.161")
|
|
_MCAST_PORT = int(_MIC_CFG.get("port", 5555))
|
|
_MIC_BUF_MAX = int(_MIC_CFG.get("buffer_max_bytes", 64_000))
|
|
_MIC_READ_TIMEOUT = float(_MIC_CFG.get("read_timeout_sec", 0.04))
|
|
|
|
PCMLike = Union[bytes, bytearray, memoryview, np.ndarray]
|
|
|
|
|
|
def _find_g1_local_ip() -> str:
|
|
"""Find the host IPv4 address on the G1's internal 192.168.123.0/24 network."""
|
|
out = subprocess.run(
|
|
["ip", "-4", "-o", "addr"], capture_output=True, text=True,
|
|
).stdout
|
|
for line in out.splitlines():
|
|
for tok in line.split():
|
|
if tok.startswith("192.168.123."):
|
|
return tok.split("/")[0]
|
|
raise RuntimeError("no 192.168.123.x interface found")
|
|
|
|
|
|
def _resample_int16(pcm: np.ndarray, src_rate: int, dst_rate: int) -> np.ndarray:
|
|
if src_rate == dst_rate or pcm.size == 0:
|
|
return pcm.astype(np.int16, copy=False)
|
|
target_len = max(1, int(len(pcm) * dst_rate / src_rate))
|
|
return np.interp(
|
|
np.linspace(0, len(pcm), target_len, endpoint=False),
|
|
np.arange(len(pcm)),
|
|
pcm.astype(np.float64),
|
|
).astype(np.int16)
|
|
|
|
|
|
def _as_int16_array(pcm: PCMLike) -> np.ndarray:
|
|
if isinstance(pcm, np.ndarray):
|
|
return pcm.astype(np.int16, copy=False)
|
|
return np.frombuffer(bytes(pcm), dtype=np.int16)
|
|
|
|
|
|
# ─── Protocols ────────────────────────────────────────────
|
|
|
|
class Mic(ABC):
|
|
sample_rate: int = TARGET_MIC_RATE
|
|
|
|
@abstractmethod
|
|
def start(self) -> None: ...
|
|
@abstractmethod
|
|
def read_chunk(self, num_bytes: int) -> bytes: ...
|
|
@abstractmethod
|
|
def flush(self) -> None: ...
|
|
@abstractmethod
|
|
def stop(self) -> None: ...
|
|
|
|
|
|
class Speaker(ABC):
|
|
@abstractmethod
|
|
def begin_stream(self) -> None: ...
|
|
|
|
@abstractmethod
|
|
def send_chunk(self, pcm: PCMLike, source_rate: int) -> None:
|
|
"""Queue PCM for playback. `source_rate` is the sample rate of `pcm`."""
|
|
|
|
@abstractmethod
|
|
def wait_finish(self) -> None: ...
|
|
@abstractmethod
|
|
def stop(self) -> None: ...
|
|
|
|
@property
|
|
@abstractmethod
|
|
def interrupted(self) -> bool: ...
|
|
|
|
@property
|
|
def total_sent_sec(self) -> float:
|
|
return 0.0
|
|
|
|
|
|
# ─── G1 built-in (UDP mic + AudioClient speaker) ──────────
|
|
|
|
class BuiltinMic(Mic):
|
|
"""G1 robot's on-board mic published over UDP multicast."""
|
|
|
|
sample_rate = TARGET_MIC_RATE
|
|
|
|
def __init__(self, group: str = _MCAST_GRP, port: int = _MCAST_PORT,
|
|
buf_max: int = _MIC_BUF_MAX):
|
|
self._group = group
|
|
self._port = port
|
|
self._buf_max = buf_max
|
|
self._sock = None # type: Optional[socket.socket]
|
|
self._buf = bytearray()
|
|
self._lock = threading.Lock()
|
|
self._running = False
|
|
self._thread = None # type: Optional[threading.Thread]
|
|
|
|
def start(self) -> None:
|
|
if self._running:
|
|
return
|
|
local_ip = _find_g1_local_ip()
|
|
self._sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
|
|
self._sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
|
self._sock.bind(("", self._port))
|
|
mreq = struct.pack(
|
|
"4s4s",
|
|
socket.inet_aton(self._group),
|
|
socket.inet_aton(local_ip),
|
|
)
|
|
self._sock.setsockopt(socket.IPPROTO_IP, socket.IP_ADD_MEMBERSHIP, mreq)
|
|
self._sock.settimeout(1.0)
|
|
self._running = True
|
|
self._thread = threading.Thread(target=self._recv_loop, daemon=True)
|
|
self._thread.start()
|
|
log.info("BuiltinMic joined %s:%d on %s", self._group, self._port, local_ip)
|
|
|
|
def _recv_loop(self) -> None:
|
|
while self._running:
|
|
try:
|
|
data, _ = self._sock.recvfrom(4096)
|
|
with self._lock:
|
|
self._buf.extend(data)
|
|
if len(self._buf) > self._buf_max:
|
|
del self._buf[:len(self._buf) - self._buf_max]
|
|
except socket.timeout:
|
|
continue
|
|
except Exception:
|
|
if self._running:
|
|
time.sleep(0.01)
|
|
|
|
def read_chunk(self, num_bytes: int) -> bytes:
|
|
deadline = time.time() + _MIC_READ_TIMEOUT
|
|
while time.time() < deadline:
|
|
with self._lock:
|
|
if len(self._buf) >= num_bytes:
|
|
chunk = bytes(self._buf[:num_bytes])
|
|
del self._buf[:num_bytes]
|
|
return chunk
|
|
time.sleep(0.003)
|
|
with self._lock:
|
|
avail = len(self._buf)
|
|
if avail > 0:
|
|
chunk = bytes(self._buf[:avail])
|
|
del self._buf[:avail]
|
|
return chunk + b"\x00" * (num_bytes - avail)
|
|
return b"\x00" * num_bytes
|
|
|
|
def flush(self) -> None:
|
|
with self._lock:
|
|
self._buf.clear()
|
|
|
|
def stop(self) -> None:
|
|
self._running = False
|
|
if self._sock is not None:
|
|
try:
|
|
self._sock.close()
|
|
except Exception:
|
|
pass
|
|
self._sock = None
|
|
|
|
|
|
class BuiltinSpeaker(Speaker):
|
|
"""G1 robot's built-in speaker via AudioClient.PlayStream (16 kHz mono)."""
|
|
|
|
HARDWARE_RATE = 16_000
|
|
|
|
def __init__(self, audio_client: Any, app_name: Optional[str] = None):
|
|
self._ac = audio_client
|
|
try:
|
|
self._ac.SetVolume(100)
|
|
except Exception:
|
|
log.warning("BuiltinSpeaker.SetVolume failed")
|
|
self._app_name = app_name or _SP_CFG.get("app_name", "marcus")
|
|
self._begin_pause = float(_SP_CFG.get("begin_stream_pause_sec", 0.15))
|
|
self._finish_margin = float(_SP_CFG.get("wait_finish_margin_sec", 0.3))
|
|
self._stop_flag = threading.Event()
|
|
self._stream_id = None # type: Optional[str]
|
|
self._total_sent = 0.0
|
|
self._play_start = 0.0
|
|
|
|
def _stop_play_api(self) -> None:
|
|
try:
|
|
from unitree_sdk2py.g1.audio.g1_audio_api import (
|
|
ROBOT_API_ID_AUDIO_STOP_PLAY,
|
|
)
|
|
self._ac._Call(
|
|
ROBOT_API_ID_AUDIO_STOP_PLAY,
|
|
json.dumps({"app_name": self._app_name}),
|
|
)
|
|
except Exception:
|
|
log.warning("BuiltinSpeaker AUDIO_STOP_PLAY failed")
|
|
|
|
def begin_stream(self) -> None:
|
|
self._stop_flag.clear()
|
|
self._stop_play_api()
|
|
time.sleep(self._begin_pause)
|
|
self._stream_id = "s_{}".format(int(time.time() * 1000))
|
|
self._total_sent = 0.0
|
|
self._play_start = time.time()
|
|
|
|
def send_chunk(self, pcm: PCMLike, source_rate: int) -> None:
|
|
if self._stop_flag.is_set():
|
|
return
|
|
arr = _as_int16_array(pcm)
|
|
if arr.size < 10:
|
|
return
|
|
hw = _resample_int16(arr, int(source_rate), self.HARDWARE_RATE)
|
|
self._ac.PlayStream(self._app_name, self._stream_id, hw.tobytes())
|
|
self._total_sent += len(hw) / float(self.HARDWARE_RATE)
|
|
|
|
def wait_finish(self) -> None:
|
|
elapsed = time.time() - self._play_start
|
|
remaining = self._total_sent - elapsed + self._finish_margin
|
|
waited = 0.0
|
|
while waited < remaining and not self._stop_flag.is_set():
|
|
time.sleep(0.1)
|
|
waited += 0.1
|
|
self._stop_play_api()
|
|
|
|
def stop(self) -> None:
|
|
self._stop_flag.set()
|
|
self._stop_play_api()
|
|
|
|
@property
|
|
def interrupted(self) -> bool:
|
|
return self._stop_flag.is_set()
|
|
|
|
@property
|
|
def total_sent_sec(self) -> float:
|
|
return self._total_sent
|
|
|
|
|
|
# ─── AudioIO factory ──────────────────────────────────────
|
|
|
|
_PROFILE_ALIASES = {
|
|
"builtin": "builtin",
|
|
"g1": "builtin",
|
|
"g1_builtin": "builtin",
|
|
}
|
|
|
|
SUPPORTED_PROFILES = ("builtin",)
|
|
|
|
|
|
@dataclass
|
|
class AudioIO:
|
|
mic: Mic
|
|
speaker: Speaker
|
|
profile_id: str = field(default="builtin")
|
|
|
|
def start(self) -> None:
|
|
self.mic.start()
|
|
|
|
def stop(self) -> None:
|
|
try:
|
|
self.speaker.stop()
|
|
except Exception:
|
|
log.warning("AudioIO speaker.stop failed", exc_info=True)
|
|
try:
|
|
self.mic.stop()
|
|
except Exception:
|
|
log.warning("AudioIO mic.stop failed", exc_info=True)
|
|
|
|
@classmethod
|
|
def from_profile(
|
|
cls,
|
|
profile_id: str,
|
|
*,
|
|
audio_client: Optional[Any] = None,
|
|
) -> "AudioIO":
|
|
"""Build an AudioIO for the requested profile.
|
|
|
|
`audio_client` is the initialised `unitree_sdk2py` `AudioClient` and
|
|
is required for the `builtin` profile (the G1 on-board speaker).
|
|
"""
|
|
raw = (profile_id or "").strip().lower()
|
|
resolved = _PROFILE_ALIASES.get(raw)
|
|
if resolved is None:
|
|
raise ValueError(
|
|
"unknown audio profile {!r}; supported: {}".format(
|
|
profile_id, ", ".join(SUPPORTED_PROFILES),
|
|
)
|
|
)
|
|
|
|
if resolved == "builtin":
|
|
if audio_client is None:
|
|
raise ValueError(
|
|
"profile 'builtin' requires audio_client (G1 AudioClient)"
|
|
)
|
|
return cls(
|
|
mic=BuiltinMic(),
|
|
speaker=BuiltinSpeaker(audio_client),
|
|
profile_id=resolved,
|
|
)
|
|
raise AssertionError("unhandled resolved profile: {!r}".format(resolved))
|