Marcus/Voice/builtin_mic.py

203 lines
7.1 KiB
Python

"""
builtin_mic.py — G1 built-in microphone (UDP multicast capture)
================================================================
The G1 humanoid's on-board microphone is published by the Unitree firmware
as an RTP-like UDP multicast stream on 239.168.123.161:5555, carrying
16 kHz mono int16 PCM. Any host on the robot's 192.168.123.0/24 network
can join the group and read the audio — no extra SDK call required.
This module intentionally has no dependency on pyaudio, pulseaudio, or the
unitree_sdk2py package. Joining the multicast group is all that's needed.
Usage:
from Voice.builtin_mic import BuiltinMic
mic = BuiltinMic()
mic.start()
try:
chunk = mic.read_chunk(1024) # 512 samples, 32 ms at 16 kHz
...
finally:
mic.stop()
Ported from Project/Sanad/voice/audio_io.py (Sanad's production implementation).
"""
from __future__ import annotations
import socket
import struct
import subprocess
import threading
import time
from typing import Optional
DEFAULT_GROUP = "239.168.123.161"
DEFAULT_PORT = 5555
DEFAULT_BUF_MAX = 64_000 # ~2 s of 16 kHz mono int16
DEFAULT_READ_TIMEOUT = 0.04 # 40 ms budget per read_chunk call
SAMPLE_RATE = 16_000 # hardware rate — do not change
def _find_g1_local_ip() -> str:
"""
Return the host IPv4 on the G1's internal 192.168.123.0/24 network.
Required by IP_ADD_MEMBERSHIP so the kernel knows which NIC to join on.
"""
out = subprocess.run(
["ip", "-4", "-o", "addr"], capture_output=True, text=True,
).stdout
for line in out.splitlines():
for tok in line.split():
if tok.startswith("192.168.123."):
return tok.split("/")[0]
raise RuntimeError(
"BuiltinMic: no interface on 192.168.123.0/24 — "
"host is not on the G1's internal network"
)
class BuiltinMic:
"""
G1 on-board microphone over UDP multicast.
Thread-safe: a background daemon thread receives datagrams into an
internal ring buffer; `read_chunk(n)` pulls the next `n` bytes or
blocks up to `read_timeout` before returning zeros.
"""
sample_rate = SAMPLE_RATE
def __init__(
self,
group: str = DEFAULT_GROUP,
port: int = DEFAULT_PORT,
buf_max: int = DEFAULT_BUF_MAX,
read_timeout: float = DEFAULT_READ_TIMEOUT,
):
self._group = group
self._port = port
self._buf_max = buf_max
self._read_timeout = read_timeout
self._sock: Optional[socket.socket] = None
self._buf = bytearray()
self._lock = threading.Lock()
self._running = False
self._thread: Optional[threading.Thread] = None
def start(self) -> None:
if self._running:
return
local_ip = _find_g1_local_ip()
self._sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
self._sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
self._sock.bind(("", self._port))
mreq = struct.pack(
"4s4s",
socket.inet_aton(self._group),
socket.inet_aton(local_ip),
)
self._sock.setsockopt(socket.IPPROTO_IP, socket.IP_ADD_MEMBERSHIP, mreq)
self._sock.settimeout(1.0)
self._running = True
self._thread = threading.Thread(
target=self._recv_loop, daemon=True, name="builtin_mic_rx",
)
self._thread.start()
print(f" [BuiltinMic] joined {self._group}:{self._port} on {local_ip}")
def _recv_loop(self) -> None:
while self._running:
try:
data, _ = self._sock.recvfrom(4096)
with self._lock:
self._buf.extend(data)
# ring-buffer: drop oldest when we'd exceed buf_max
if len(self._buf) > self._buf_max:
del self._buf[: len(self._buf) - self._buf_max]
except socket.timeout:
continue
except Exception:
if self._running:
time.sleep(0.01)
def read_chunk(self, num_bytes: int) -> bytes:
"""
Return exactly `num_bytes` of 16 kHz mono int16 PCM.
Waits up to `read_timeout` for that many bytes to be available.
If the buffer is still short after the timeout, returns whatever
is available padded with silence. Never blocks forever.
"""
deadline = time.time() + self._read_timeout
while time.time() < deadline:
with self._lock:
if len(self._buf) >= num_bytes:
chunk = bytes(self._buf[:num_bytes])
del self._buf[:num_bytes]
return chunk
time.sleep(0.003)
with self._lock:
avail = len(self._buf)
if avail > 0:
chunk = bytes(self._buf[:avail])
del self._buf[:avail]
return chunk + b"\x00" * (num_bytes - avail)
return b"\x00" * num_bytes
def read_seconds(self, seconds: float) -> bytes:
"""
Convenience: capture `seconds` of audio and return as bytes.
Blocks for the full duration (not a real-time producer).
"""
num_bytes = int(seconds * self.sample_rate * 2) # 2 bytes/sample (int16)
out = bytearray()
chunk_bytes = 1024
while len(out) < num_bytes:
out.extend(self.read_chunk(min(chunk_bytes, num_bytes - len(out))))
return bytes(out)
def flush(self) -> None:
"""Drop all buffered audio (e.g. after the robot spoke)."""
with self._lock:
self._buf.clear()
def stop(self) -> None:
self._running = False
if self._sock is not None:
try:
self._sock.close()
except Exception:
pass
self._sock = None
if self._thread is not None:
self._thread.join(timeout=1.5)
self._thread = None
# ────────────────────────────────────────────────────────────────
# Standalone test — capture 3 s and print energy stats
# ────────────────────────────────────────────────────────────────
if __name__ == "__main__":
import array
print("BuiltinMic standalone test — capturing 3 s from G1...")
mic = BuiltinMic()
mic.start()
time.sleep(0.3) # let the receiver thread warm up
raw = mic.read_seconds(3.0)
mic.stop()
samples = array.array("h", raw)
if not samples:
print(" FAIL — got zero samples")
else:
mn = min(samples); mx = max(samples)
mean_abs = sum(abs(s) for s in samples) / len(samples)
print(f" samples={len(samples)} min={mn} max={mx} mean|s|={mean_abs:.0f}")
if mean_abs > 30:
print(" OK — mic is capturing audio")
else:
print(" WARN — signal very low, check G1 audio service is running")