116 lines
3.7 KiB
Python
116 lines
3.7 KiB
Python
#!/usr/bin/env python3
|
|
"""Probe — test which Gemini Live configurations actually connect.
|
|
|
|
Run on the Jetson under the gemini_sdk env:
|
|
|
|
/home/unitree/miniconda3/envs/gemini_sdk/bin/python \
|
|
/home/unitree/Marcus/Voice/_probe_live.py
|
|
|
|
Tries 6 model+config combos and reports which CONNECT successfully.
|
|
We don't send audio — just open the Live session and immediately close.
|
|
A successful connect == Google's server accepted our config; an error
|
|
means we either sent something invalid or used an unsupported model.
|
|
"""
|
|
import asyncio
|
|
import os
|
|
import sys
|
|
|
|
API_KEY = (
|
|
os.environ.get("MARCUS_GEMINI_API_KEY")
|
|
or os.environ.get("SANAD_GEMINI_API_KEY")
|
|
or "AIzaSyDt9Xi83MDZuuPpfwfHyMD92X7ZKdGkqf8"
|
|
)
|
|
|
|
try:
|
|
from google import genai
|
|
from google.genai import types
|
|
except ImportError:
|
|
print("google-genai not installed", file=sys.stderr)
|
|
sys.exit(2)
|
|
|
|
|
|
client = genai.Client(api_key=API_KEY)
|
|
|
|
|
|
async def try_connect(label: str, model: str, config) -> str:
|
|
try:
|
|
async with client.aio.live.connect(model=model, config=config):
|
|
return "OK"
|
|
except Exception as e:
|
|
msg = str(e)
|
|
if len(msg) > 140:
|
|
msg = msg[:140] + "..."
|
|
return "FAIL: " + msg
|
|
|
|
|
|
async def main():
|
|
cases = []
|
|
|
|
# 1. 3.1 Live preview, TEXT, full Marcus config
|
|
cases.append((
|
|
"3.1-live + TEXT + VAD + input_transcription + system_instruction",
|
|
"gemini-3.1-flash-live-preview",
|
|
types.LiveConnectConfig(
|
|
response_modalities=["TEXT"],
|
|
realtime_input_config=types.RealtimeInputConfig(
|
|
automatic_activity_detection=types.AutomaticActivityDetection(
|
|
disabled=False,
|
|
),
|
|
),
|
|
input_audio_transcription=types.AudioTranscriptionConfig(),
|
|
system_instruction=types.Content(
|
|
parts=[types.Part(text="Transcribe what the user says. Stay silent.")],
|
|
),
|
|
),
|
|
))
|
|
|
|
# 2. 3.1 Live preview, TEXT, BARE config (no extras)
|
|
cases.append((
|
|
"3.1-live + TEXT + bare",
|
|
"gemini-3.1-flash-live-preview",
|
|
types.LiveConnectConfig(response_modalities=["TEXT"]),
|
|
))
|
|
|
|
# 3. 3.1 Live preview, AUDIO mode (full S2S, no transcription extras)
|
|
cases.append((
|
|
"3.1-live + AUDIO + bare",
|
|
"gemini-3.1-flash-live-preview",
|
|
types.LiveConnectConfig(response_modalities=["AUDIO"]),
|
|
))
|
|
|
|
# 4. Sanad's native-audio model, AUDIO mode (known-working in Sanad)
|
|
cases.append((
|
|
"native-audio-12-2025 + AUDIO + bare (Sanad's combo)",
|
|
"gemini-2.5-flash-native-audio-preview-12-2025",
|
|
types.LiveConnectConfig(response_modalities=["AUDIO"]),
|
|
))
|
|
|
|
# 5. Sanad's model + AUDIO + transcription extras (what Sanad actually sends)
|
|
cases.append((
|
|
"native-audio-12-2025 + AUDIO + input/output transcription",
|
|
"gemini-2.5-flash-native-audio-preview-12-2025",
|
|
types.LiveConnectConfig(
|
|
response_modalities=["AUDIO"],
|
|
input_audio_transcription=types.AudioTranscriptionConfig(),
|
|
output_audio_transcription=types.AudioTranscriptionConfig(),
|
|
),
|
|
))
|
|
|
|
# 6. native-audio + AUDIO + the input_transcription the runner adds
|
|
cases.append((
|
|
"native-audio-09-2025 + AUDIO + bare",
|
|
"gemini-2.5-flash-native-audio-preview-09-2025",
|
|
types.LiveConnectConfig(response_modalities=["AUDIO"]),
|
|
))
|
|
|
|
print(f"{'CASE':<70} RESULT")
|
|
print("-" * 110)
|
|
for label, model, cfg in cases:
|
|
res = await try_connect(label, model, cfg)
|
|
marker = "✓" if res == "OK" else "✗"
|
|
print(f" {marker} {label:<66} {res}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main())
|