Sanad/voice/audio_manager.py

178 lines
5.9 KiB
Python

"""Audio I/O manager — recording and playback via PyAudio.
Handles microphone capture, speaker playback, and speaker-monitor recording.
Thread-safe; one playback at a time via play_lock.
Device selection is dynamic — read from voice.audio_devices on each refresh.
"""
from __future__ import annotations
import subprocess
import threading
import time
import wave
from pathlib import Path
from typing import Any
try:
import pyaudio
except ImportError:
pyaudio = None # optional — only needed for local PCM playback
from Project.Sanad.config import (
CHANNELS,
CHUNK_SIZE,
RECEIVE_SAMPLE_RATE,
SINK as DEFAULT_SINK,
SOURCE as DEFAULT_SOURCE,
MONITOR_SOURCE,
)
from Project.Sanad.core.logger import get_logger
from Project.Sanad.voice import audio_devices as ad
log = get_logger("audio_manager")
FORMAT = pyaudio.paInt16 if pyaudio else 8
# Cached current selection — updated by refresh_devices()
_DEVICE_LOCK = threading.Lock()
_current_sink = DEFAULT_SINK
_current_source = DEFAULT_SOURCE
def _run_pactl(args: list[str]) -> subprocess.CompletedProcess[str]:
return subprocess.run(["pactl", *args], check=True, text=True, capture_output=True)
def _resolve_devices() -> tuple[str, str]:
"""Return current (sink, source) — falls back to config defaults."""
try:
cur = ad.current_selection()
sink = cur.get("sink") or DEFAULT_SINK
source = cur.get("source") or DEFAULT_SOURCE
return sink, source
except Exception as exc:
log.warning("Could not resolve audio devices: %s", exc)
return DEFAULT_SINK, DEFAULT_SOURCE
def ensure_audio_defaults():
"""Re-scan all USB ports, resolve the active profile, set pactl defaults.
This is called at startup AND before every playback/recording so that
even if the user unplugs/re-plugs a device into a different port, the
correct sink/source is always used.
"""
try:
result = ad.apply_current_selection()
cur = result.get("selection", {})
sink = cur.get("sink", "")
source = cur.get("source", "")
with _DEVICE_LOCK:
global _current_sink, _current_source
_current_sink = sink or DEFAULT_SINK
_current_source = source or DEFAULT_SOURCE
except Exception as exc:
log.warning("Audio defaults not applied: %s", exc)
class AudioManager:
def __init__(self):
if pyaudio is None:
raise RuntimeError(
"pyaudio not installed — AudioManager cannot play local PCM. "
"Install with `pip install pyaudio` (needs portaudio headers), "
"or rely on the G1 speaker via AudioClient.PlayStream."
)
self.pya = pyaudio.PyAudio()
self.play_lock = threading.Lock()
# Resolve devices and set PulseAudio defaults at startup
self.refresh_devices()
ensure_audio_defaults()
def refresh_devices(self) -> dict[str, str]:
"""Re-read selected sink/source from audio_devices module."""
sink, source = _resolve_devices()
with _DEVICE_LOCK:
global _current_sink, _current_source
_current_sink, _current_source = sink, source
log.info("AudioManager devices refreshed: sink=%s source=%s", sink, source)
return {"sink": sink, "source": source}
@property
def current_sink(self) -> str:
with _DEVICE_LOCK:
return _current_sink
@property
def current_source(self) -> str:
with _DEVICE_LOCK:
return _current_source
def close(self):
self.pya.terminate()
def sample_width(self) -> int:
return self.pya.get_sample_size(FORMAT)
# -- playback --
def play_pcm(self, pcm_bytes: bytes, channels: int, sample_rate: int, sample_width: int):
with self.play_lock:
ensure_audio_defaults()
stream = self.pya.open(
format=self.pya.get_format_from_width(sample_width),
channels=channels,
rate=sample_rate,
output=True,
frames_per_buffer=CHUNK_SIZE,
)
try:
frame_bytes = CHUNK_SIZE * channels * sample_width
for offset in range(0, len(pcm_bytes), frame_bytes):
stream.write(pcm_bytes[offset : offset + frame_bytes])
finally:
stream.stop_stream()
stream.close()
def play_wav(self, path: Path) -> dict[str, Any]:
with wave.open(str(path), "rb") as wf:
channels = wf.getnchannels()
sw = wf.getsampwidth()
rate = wf.getframerate()
data = wf.readframes(wf.getnframes())
self.play_pcm(data, channels, rate, sw)
duration = len(data) / (rate * channels * sw) if rate else 0
return {"path": str(path), "duration_seconds": round(duration, 3)}
# -- recording --
def record_mic(self, duration_sec: float) -> bytes:
"""Record from default mic for *duration_sec* seconds, return raw PCM."""
ensure_audio_defaults()
stream = self.pya.open(
format=FORMAT,
channels=CHANNELS,
rate=RECEIVE_SAMPLE_RATE,
input=True,
frames_per_buffer=CHUNK_SIZE,
)
frames: list[bytes] = []
total_chunks = int(RECEIVE_SAMPLE_RATE / CHUNK_SIZE * duration_sec)
try:
for _ in range(total_chunks):
frames.append(stream.read(CHUNK_SIZE, exception_on_overflow=False))
finally:
stream.stop_stream()
stream.close()
return b"".join(frames)
def save_wav(self, pcm_bytes: bytes, path: Path, channels: int, sample_rate: int):
path.parent.mkdir(parents=True, exist_ok=True)
with wave.open(str(path), "wb") as wf:
wf.setnchannels(channels)
wf.setsampwidth(self.sample_width())
wf.setframerate(sample_rate)
wf.writeframes(pcm_bytes)