Manual_Photographer/voice_sanad.py
2026-04-12 18:53:20 +04:00

186 lines
5.9 KiB
Python

#!/usr/bin/env python3
"""
voice_sanad.py (PRODUCTION)
- Starts photo server ONCE (thread) and keeps it alive.
- Initializes DDS ONCE (CycloneDDS doesn't like re-init loops).
- Runs Gemini WS sessions in a reconnect loop with backoff + jitter.
- If WS drops / idle disconnects: reconnect automatically without killing the whole app.
"""
import asyncio
import json
import os
import random
import websockets
from Logger import Logs
from unitree_sdk2py.core.channel import ChannelSubscriber, ChannelFactoryInitialize
from unitree_sdk2py.idl.unitree_hg.msg.dds_ import LowState_
from config import (
PHOTOS_DIR,
PHOTO_SERVER_PORT,
MODEL,
VOICE_NAME,
GEMINI_API_KEY,
PHOTO_TOTAL_SEC,
PHOTO_THANKS_SEC,
PHOTO_DELAY_SEC,
load_system_prompt,
)
from photo_server import start_photo_server
from controller import auto_pick_iface, LowStateHub
from replay_engine import ReplayWithHome
from gemini_voice import HamadGeminiVoice
from trigger_loop import trigger_loop
from photo_runner import take_photo_sync
# ==================================================
# 🪵 LOGGERS (separate logs like you want)
# ==================================================
sanad_logger = Logs()
sanad_logger.LogEngine("G1_Logs", "voice_sanad")
# ==================================================
# 🌐 GEMINI URI
# ==================================================
API_KEY = GEMINI_API_KEY.strip()
if not API_KEY:
raise RuntimeError("GEMINI_API_KEY is empty. Export it before running.")
URI = (
"wss://generativelanguage.googleapis.com/ws/"
"google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent"
f"?key={API_KEY}"
)
async def run_ws_session(ws, hub, replay, voice) -> None:
"""
Runs ONE Gemini WS session.
If WS drops or any task ends, this returns -> caller reconnects.
"""
system_prompt = load_system_prompt()
setup_msg = {
"setup": {
"model": MODEL,
"generationConfig": {
"responseModalities": ["AUDIO"],
"speechConfig": {
"voiceConfig": {"prebuiltVoiceConfig": {"voiceName": VOICE_NAME}}
},
},
"inputAudioTranscription": {},
"systemInstruction": {"parts": [{"text": system_prompt}]},
}
}
await ws.send(json.dumps(setup_msg))
await ws.recv()
sanad_logger.print_and_log("🎙️ Connected! Sanad ready.", message_type="info")
sanad_logger.print_and_log("🎮 Controls:", "info")
sanad_logger.print_and_log(" - R2+X : Replay + Photographer Talk + Take Photo", "info")
sanad_logger.print_and_log(" - R2+L1: Cancel replay -> Home", "info")
# Trigger loop uses this ws (session-bound)
trig_task = asyncio.create_task(
trigger_loop(
hub=hub,
replay=replay,
voice=voice,
ws=ws,
take_photo_sync_callable=take_photo_sync,
# timing can stay inside trigger_loop via config/env,
# but we keep these exported via config.py already
)
)
tasks = [
asyncio.create_task(voice.capture_mic(ws)),
asyncio.create_task(voice.receive_audio(ws)),
asyncio.create_task(voice.play_audio()), # IMPORTANT: no ws here
asyncio.create_task(voice.keepalive(ws, every_sec=20.0)), # helps idle stability
trig_task,
]
try:
done, pending = await asyncio.wait(tasks, return_when=asyncio.FIRST_EXCEPTION)
for t in done:
exc = None
try:
exc = t.exception()
except asyncio.CancelledError:
exc = None
except Exception as e:
exc = e
if exc:
sanad_logger.print_and_log(f"⚠️ Task ended with error: {exc}", message_type="warning")
finally:
for t in tasks:
if not t.done():
t.cancel()
await asyncio.gather(*tasks, return_exceptions=True)
async def main():
# 1) Start photo server ONCE (never stops)
url = start_photo_server(PHOTOS_DIR, port=PHOTO_SERVER_PORT)
sanad_logger.print_and_log(f"🖼️ Phone gallery: {url}", message_type="info")
# 2) Init DDS ONCE (never loop this)
iface = auto_pick_iface()
sanad_logger.print_and_log(f"🌐 DDS iface: {iface}", "info")
ChannelFactoryInitialize(0, iface)
hub = LowStateHub(watchdog_timeout=0.25)
sub = ChannelSubscriber("rt/lowstate", LowState_)
sub.Init(hub.handler, 10)
replay = ReplayWithHome(hub, watchdog_disable_after=1.0)
# 3) Voice object once (keeps audio device stable)
voice = HamadGeminiVoice()
voice.calibrate_mic()
# 4) Reconnect loop with backoff
backoff = 1.0
max_backoff = 20.0
while True:
sanad_logger.print_and_log(f"\n🚀 Connecting to Gemini ({MODEL})...", message_type="info")
try:
async with websockets.connect(URI, **voice._ws_connect_kwargs()) as ws:
backoff = 1.0 # reset after a successful connect
await run_ws_session(ws, hub, replay, voice)
except websockets.exceptions.ConnectionClosed as e:
sanad_logger.print_and_log(f"⚠️ WebSocket closed: {e}", message_type="warning")
except Exception as e:
sanad_logger.print_and_log(f"❌ WS session error: {e}", message_type="error")
# Backoff sleep with jitter
sleep_s = backoff + random.uniform(0.0, 0.8)
sanad_logger.print_and_log(f"🔁 Reconnecting in {sleep_s:.1f}s...", message_type="warning")
await asyncio.sleep(sleep_s)
backoff = min(max_backoff, backoff * 1.7)
if __name__ == "__main__":
try:
asyncio.run(main())
except KeyboardInterrupt:
sanad_logger.print_and_log("\n👋 Ma'a Salama (Goodbye)!", message_type="info")
except Exception as e:
sanad_logger.print_and_log(f"\n❌ Fatal Error: {e}", message_type="error")