Marcus/Voice/_probe_live.py

116 lines
3.7 KiB
Python

#!/usr/bin/env python3
"""Probe — test which Gemini Live configurations actually connect.
Run on the Jetson under the gemini_sdk env:
/home/unitree/miniconda3/envs/gemini_sdk/bin/python \
/home/unitree/Marcus/Voice/_probe_live.py
Tries 6 model+config combos and reports which CONNECT successfully.
We don't send audio — just open the Live session and immediately close.
A successful connect == Google's server accepted our config; an error
means we either sent something invalid or used an unsupported model.
"""
import asyncio
import os
import sys
API_KEY = (
os.environ.get("MARCUS_GEMINI_API_KEY")
or os.environ.get("SANAD_GEMINI_API_KEY")
or "AIzaSyDt9Xi83MDZuuPpfwfHyMD92X7ZKdGkqf8"
)
try:
from google import genai
from google.genai import types
except ImportError:
print("google-genai not installed", file=sys.stderr)
sys.exit(2)
client = genai.Client(api_key=API_KEY)
async def try_connect(label: str, model: str, config) -> str:
try:
async with client.aio.live.connect(model=model, config=config):
return "OK"
except Exception as e:
msg = str(e)
if len(msg) > 140:
msg = msg[:140] + "..."
return "FAIL: " + msg
async def main():
cases = []
# 1. 3.1 Live preview, TEXT, full Marcus config
cases.append((
"3.1-live + TEXT + VAD + input_transcription + system_instruction",
"gemini-3.1-flash-live-preview",
types.LiveConnectConfig(
response_modalities=["TEXT"],
realtime_input_config=types.RealtimeInputConfig(
automatic_activity_detection=types.AutomaticActivityDetection(
disabled=False,
),
),
input_audio_transcription=types.AudioTranscriptionConfig(),
system_instruction=types.Content(
parts=[types.Part(text="Transcribe what the user says. Stay silent.")],
),
),
))
# 2. 3.1 Live preview, TEXT, BARE config (no extras)
cases.append((
"3.1-live + TEXT + bare",
"gemini-3.1-flash-live-preview",
types.LiveConnectConfig(response_modalities=["TEXT"]),
))
# 3. 3.1 Live preview, AUDIO mode (full S2S, no transcription extras)
cases.append((
"3.1-live + AUDIO + bare",
"gemini-3.1-flash-live-preview",
types.LiveConnectConfig(response_modalities=["AUDIO"]),
))
# 4. Sanad's native-audio model, AUDIO mode (known-working in Sanad)
cases.append((
"native-audio-12-2025 + AUDIO + bare (Sanad's combo)",
"gemini-2.5-flash-native-audio-preview-12-2025",
types.LiveConnectConfig(response_modalities=["AUDIO"]),
))
# 5. Sanad's model + AUDIO + transcription extras (what Sanad actually sends)
cases.append((
"native-audio-12-2025 + AUDIO + input/output transcription",
"gemini-2.5-flash-native-audio-preview-12-2025",
types.LiveConnectConfig(
response_modalities=["AUDIO"],
input_audio_transcription=types.AudioTranscriptionConfig(),
output_audio_transcription=types.AudioTranscriptionConfig(),
),
))
# 6. native-audio + AUDIO + the input_transcription the runner adds
cases.append((
"native-audio-09-2025 + AUDIO + bare",
"gemini-2.5-flash-native-audio-preview-09-2025",
types.LiveConnectConfig(response_modalities=["AUDIO"]),
))
print(f"{'CASE':<70} RESULT")
print("-" * 110)
for label, model, cfg in cases:
res = await try_connect(label, model, cfg)
marker = "" if res == "OK" else ""
print(f" {marker} {label:<66} {res}")
if __name__ == "__main__":
asyncio.run(main())