Marcus/Voice/_probe_live.py

#!/usr/bin/env python3
"""Probe — test which Gemini Live configurations actually connect.

Run on the Jetson under the gemini_sdk env:

    /home/unitree/miniconda3/envs/gemini_sdk/bin/python \
        /home/unitree/Marcus/Voice/_probe_live.py

Tries 6 model+config combos and reports which CONNECT successfully.
We don't send audio — just open the Live session and immediately close.
A successful connect == Google's server accepted our config; an error
means we either sent something invalid or used an unsupported model.
"""
import asyncio
import os
import sys

API_KEY = (
    os.environ.get("MARCUS_GEMINI_API_KEY")
    or os.environ.get("SANAD_GEMINI_API_KEY")
    or "AIzaSyDt9Xi83MDZuuPpfwfHyMD92X7ZKdGkqf8"
)

try:
    from google import genai
    from google.genai import types
except ImportError:
    print("google-genai not installed", file=sys.stderr)
    sys.exit(2)


client = genai.Client(api_key=API_KEY)


async def try_connect(label: str, model: str, config) -> str:
    try:
        async with client.aio.live.connect(model=model, config=config):
            return "OK"
    except Exception as e:
        msg = str(e)
        if len(msg) > 140:
            msg = msg[:140] + "..."
        return "FAIL: " + msg


async def main():
    cases = []

    # 1. 3.1 Live preview, TEXT, full Marcus config
    cases.append((
        "3.1-live + TEXT + VAD + input_transcription + system_instruction",
        "gemini-3.1-flash-live-preview",
        types.LiveConnectConfig(
            response_modalities=["TEXT"],
            realtime_input_config=types.RealtimeInputConfig(
                automatic_activity_detection=types.AutomaticActivityDetection(
                    disabled=False,
                ),
            ),
            input_audio_transcription=types.AudioTranscriptionConfig(),
            system_instruction=types.Content(
                parts=[types.Part(text="Transcribe what the user says. Stay silent.")],
            ),
        ),
    ))

    # 2. 3.1 Live preview, TEXT, BARE config (no extras)
    cases.append((
        "3.1-live + TEXT + bare",
        "gemini-3.1-flash-live-preview",
        types.LiveConnectConfig(response_modalities=["TEXT"]),
    ))

    # 3. 3.1 Live preview, AUDIO mode (full S2S, no transcription extras)
    cases.append((
        "3.1-live + AUDIO + bare",
        "gemini-3.1-flash-live-preview",
        types.LiveConnectConfig(response_modalities=["AUDIO"]),
    ))

    # 4. Sanad's native-audio model, AUDIO mode (known-working in Sanad)
    cases.append((
        "native-audio-12-2025 + AUDIO + bare (Sanad's combo)",
        "gemini-2.5-flash-native-audio-preview-12-2025",
        types.LiveConnectConfig(response_modalities=["AUDIO"]),
    ))

    # 5. Sanad's model + AUDIO + transcription extras (what Sanad actually sends)
    cases.append((
        "native-audio-12-2025 + AUDIO + input/output transcription",
        "gemini-2.5-flash-native-audio-preview-12-2025",
        types.LiveConnectConfig(
            response_modalities=["AUDIO"],
            input_audio_transcription=types.AudioTranscriptionConfig(),
            output_audio_transcription=types.AudioTranscriptionConfig(),
        ),
    ))

    # 6. native-audio + AUDIO + the input_transcription the runner adds
    cases.append((
        "native-audio-09-2025 + AUDIO + bare",
        "gemini-2.5-flash-native-audio-preview-09-2025",
        types.LiveConnectConfig(response_modalities=["AUDIO"]),
    ))

    print(f"{'CASE':<70} RESULT")
    print("-" * 110)
    for label, model, cfg in cases:
        res = await try_connect(label, model, cfg)
        marker = "✓" if res == "OK" else "✗"
        print(f"  {marker} {label:<66} {res}")


if __name__ == "__main__":
    asyncio.run(main())