#!/usr/bin/env python3 """Probe — test which Gemini Live configurations actually connect. Run on the Jetson under the gemini_sdk env: /home/unitree/miniconda3/envs/gemini_sdk/bin/python \ /home/unitree/Marcus/Voice/_probe_live.py Tries 6 model+config combos and reports which CONNECT successfully. We don't send audio — just open the Live session and immediately close. A successful connect == Google's server accepted our config; an error means we either sent something invalid or used an unsupported model. """ import asyncio import os import sys API_KEY = ( os.environ.get("MARCUS_GEMINI_API_KEY") or os.environ.get("SANAD_GEMINI_API_KEY") or "AIzaSyDt9Xi83MDZuuPpfwfHyMD92X7ZKdGkqf8" ) try: from google import genai from google.genai import types except ImportError: print("google-genai not installed", file=sys.stderr) sys.exit(2) client = genai.Client(api_key=API_KEY) async def try_connect(label: str, model: str, config) -> str: try: async with client.aio.live.connect(model=model, config=config): return "OK" except Exception as e: msg = str(e) if len(msg) > 140: msg = msg[:140] + "..." return "FAIL: " + msg async def main(): cases = [] # 1. 3.1 Live preview, TEXT, full Marcus config cases.append(( "3.1-live + TEXT + VAD + input_transcription + system_instruction", "gemini-3.1-flash-live-preview", types.LiveConnectConfig( response_modalities=["TEXT"], realtime_input_config=types.RealtimeInputConfig( automatic_activity_detection=types.AutomaticActivityDetection( disabled=False, ), ), input_audio_transcription=types.AudioTranscriptionConfig(), system_instruction=types.Content( parts=[types.Part(text="Transcribe what the user says. Stay silent.")], ), ), )) # 2. 3.1 Live preview, TEXT, BARE config (no extras) cases.append(( "3.1-live + TEXT + bare", "gemini-3.1-flash-live-preview", types.LiveConnectConfig(response_modalities=["TEXT"]), )) # 3. 3.1 Live preview, AUDIO mode (full S2S, no transcription extras) cases.append(( "3.1-live + AUDIO + bare", "gemini-3.1-flash-live-preview", types.LiveConnectConfig(response_modalities=["AUDIO"]), )) # 4. Sanad's native-audio model, AUDIO mode (known-working in Sanad) cases.append(( "native-audio-12-2025 + AUDIO + bare (Sanad's combo)", "gemini-2.5-flash-native-audio-preview-12-2025", types.LiveConnectConfig(response_modalities=["AUDIO"]), )) # 5. Sanad's model + AUDIO + transcription extras (what Sanad actually sends) cases.append(( "native-audio-12-2025 + AUDIO + input/output transcription", "gemini-2.5-flash-native-audio-preview-12-2025", types.LiveConnectConfig( response_modalities=["AUDIO"], input_audio_transcription=types.AudioTranscriptionConfig(), output_audio_transcription=types.AudioTranscriptionConfig(), ), )) # 6. native-audio + AUDIO + the input_transcription the runner adds cases.append(( "native-audio-09-2025 + AUDIO + bare", "gemini-2.5-flash-native-audio-preview-09-2025", types.LiveConnectConfig(response_modalities=["AUDIO"]), )) print(f"{'CASE':<70} RESULT") print("-" * 110) for label, model, cfg in cases: res = await try_connect(label, model, cfg) marker = "✓" if res == "OK" else "✗" print(f" {marker} {label:<66} {res}") if __name__ == "__main__": asyncio.run(main())