Sanad/config/voice_config.json

124 lines
3.6 KiB
JSON

{
"_description": "Tunables for voice/* modules. Loaded via core.config_loader.load('voice').",
"sanad_voice": {
"_comment": "voice/sanad_voice.py — main live voice subprocess. Gemini API credentials (api_key, model, voice_name) come from core_config.json's gemini_defaults — single source of truth.",
"mic_gain": 1.0,
"play_chunk_bytes": 96000,
"log_dir": "~/logs",
"log_name": "gemini_live_v2",
"session_timeout_sec": 660,
"max_reconnect_delay_sec": 30,
"max_consecutive_errors": 10,
"no_messages_timeout_sec": 30
},
"mic_udp": {
"_comment": "G1 built-in mic — UDP multicast subscriber",
"group": "239.168.123.161",
"port": 5555,
"buffer_max_bytes": 64000,
"read_timeout_sec": 0.04,
"socket_timeout_sec": 1.0
},
"speaker": {
"_comment": "G1 built-in speaker — AudioClient.PlayStream wrapper",
"app_name": "sanad",
"begin_stream_pause_sec": 0.15,
"wait_finish_margin_sec": 0.3
},
"vad": {
"_comment": "Gemini Live server-side voice-activity-detection config",
"start_sensitivity": "START_SENSITIVITY_HIGH",
"end_sensitivity": "END_SENSITIVITY_LOW",
"prefix_padding_ms": 20,
"silence_duration_ms": 200
},
"barge_in": {
"threshold": 500,
"loud_chunks_needed": 3,
"cooldown_sec": 0.3,
"echo_suppress_below": 500,
"ai_speak_grace_sec": 0.15
},
"recording": {
"enabled": true,
"dir_relative": "data/recordings"
},
"system_prompt": {
"_comment": "Persona filename lives in core.script_files.persona; default text in core.gemini_defaults.default_system_prompt. This section is now metadata-only."
},
"typed_replay": {
"_comment": "voice/typed_replay.py — max_text_len comes from dashboard.api_input",
"monitor_chunk_size": 512,
"monitor_tail_sec": 0.2
},
"live_gemini_subprocess": {
"_comment": "voice/live_gemini_subprocess.py — LiveGeminiSubprocess",
"log_tail_size": 2000,
"transcript_tail_size": 30,
"log_name": "live_gemini_subprocess",
"stop_timeout_sec": 3.0,
"terminate_timeout_sec": 2.0,
"noisy_prefixes": [
"ALSA lib ",
"Expression 'alsa_",
"Cannot connect to server socket",
"jack server is not running"
],
"noisy_fragments": [
"Unknown PCM",
"Evaluate error",
"snd_pcm_open_noupdate",
"PaAlsaStream",
"snd_config_evaluate",
"snd_func_refer"
]
},
"live_voice_loop": {
"_comment": "voice/live_voice_loop.py — arm phrase dispatcher. arm_txt filename comes from core.script_files.arm_phrases",
"trigger_log_size": 100,
"poll_interval_sec": 0.1,
"deferred_default": false
},
"local_tts": {
"_comment": "voice/local_tts.py — offline Coqui TTS",
"model_subdir": "speecht5_tts_clartts_ar",
"vocoder_subdir": "speecht5_hifigan",
"xvector_filename": "arabic_xvector_embedding.pt",
"sample_rate": 16000,
"channels": 1
},
"gemini_client": {
"_comment": "voice/gemini_client.py — default_system_prompt comes from core.gemini_defaults",
"recv_timeout_sec": 30,
"reconnect_max_attempts": 3,
"reconnect_initial_delay_sec": 1.0,
"reconnect_max_delay_sec": 10.0
},
"asr_buffer": {
"_comment": "text_utils.maybe_trigger_arm state machine defaults",
"window_sec": 2.0,
"short_token_bonus_sec": 1.0,
"join_no_space_maxlen": 2,
"max_chars": 120,
"stream_max_chars": 80,
"trigger_dedup_window_sec": 2.0,
"pending_arm_ttl_sec": 6.0,
"pending_arm_fallback_sec": 0.65,
"dup_call_window_sec": 0.25,
"dup_asr_repeat_window_sec": 0.9
}
}