Sanad/examples/voice_example.py

137 lines
4.3 KiB
Python

#!/usr/bin/env python3
"""voice_example.py — demos for each voice subsystem in isolation.
Each subcommand exercises one component so you can debug pieces without
running the full Sanad stack.
Usage:
python3 voice_example.py gemini "hello" # one-shot Gemini text→audio
python3 voice_example.py local_tts "hello" # local Coqui TTS
python3 voice_example.py typed_replay "hello" # typed replay engine
python3 voice_example.py live # spawn LiveGeminiSubprocess
python3 voice_example.py status # show status of all subsystems
Assumes Project.Sanad is importable (run from repo root or with PYTHONPATH set).
"""
from __future__ import annotations
import argparse
import asyncio
import sys
def _demo_gemini(text: str) -> None:
"""One-shot: connect Gemini, send text, play reply."""
from Project.Sanad.voice.gemini_client import GeminiVoiceClient
from Project.Sanad.voice.audio_manager import AudioManager
async def run():
client = GeminiVoiceClient()
audio = AudioManager()
await client.connect()
try:
audio_bytes, text_parts = await client.send_text(text, owner="example")
print(f"[gemini] got {len(audio_bytes)} bytes audio, text={text_parts}")
if audio_bytes:
await asyncio.to_thread(audio.play_pcm, audio_bytes, 1, 24000, 2)
finally:
await client.disconnect()
asyncio.run(run())
def _demo_local_tts(text: str) -> None:
"""Synthesize with local Coqui TTS and play."""
from Project.Sanad.voice.local_tts import LocalTTSEngine
from Project.Sanad.voice.audio_manager import AudioManager
tts = LocalTTSEngine()
audio = AudioManager()
pcm = tts.synthesize(text)
print(f"[local_tts] generated {len(pcm)} bytes")
audio.play_pcm(pcm, 1, 16000, 2)
def _demo_typed_replay(text: str) -> None:
"""Exercise the TypedReplayEngine end-to-end."""
from Project.Sanad.voice.gemini_client import GeminiVoiceClient
from Project.Sanad.voice.audio_manager import AudioManager
from Project.Sanad.voice.typed_replay import TypedReplayEngine
async def run():
client = GeminiVoiceClient()
await client.connect()
audio = AudioManager()
engine = TypedReplayEngine(client, audio)
result = await engine.say(text)
print(f"[typed_replay] {result}")
await client.disconnect()
asyncio.run(run())
def _demo_live() -> None:
"""Spawn the live voice subprocess — same as dashboard /api/live-subprocess."""
from Project.Sanad.voice.live_gemini_subprocess import LiveGeminiSubprocess
mgr = LiveGeminiSubprocess()
info = mgr.start()
print(f"[live] {info}")
print("Running. Ctrl+C to stop.")
try:
while True:
import time
time.sleep(1)
except KeyboardInterrupt:
print("\n[live] stopping...")
print(mgr.stop())
def _demo_status() -> None:
"""Print status of all voice subsystems."""
from Project.Sanad.voice.gemini_client import GeminiVoiceClient
try:
from Project.Sanad.voice.local_tts import LocalTTSEngine
except Exception:
LocalTTSEngine = None
client = GeminiVoiceClient()
print("[gemini]", client.status())
if LocalTTSEngine:
try:
tts = LocalTTSEngine()
print("[local_tts]", tts.status())
except Exception as exc:
print(f"[local_tts] unavailable: {exc}")
else:
print("[local_tts] not installed")
def main():
ap = argparse.ArgumentParser(description=__doc__)
sub = ap.add_subparsers(dest="cmd", required=True)
for name in ("gemini", "local_tts", "typed_replay"):
sp = sub.add_parser(name, help=f"demo {name}")
sp.add_argument("text", help="text to speak")
sub.add_parser("live", help="spawn live voice subprocess")
sub.add_parser("status", help="print subsystem status")
args = ap.parse_args()
if args.cmd == "gemini":
_demo_gemini(args.text)
elif args.cmd == "local_tts":
_demo_local_tts(args.text)
elif args.cmd == "typed_replay":
_demo_typed_replay(args.text)
elif args.cmd == "live":
_demo_live()
elif args.cmd == "status":
_demo_status()
if __name__ == "__main__":
main()