{ "_description": "Tunables for local/* — fully on-device voice pipeline (Silero VAD → Whisper → Qwen via llama.cpp → CosyVoice2). Loaded via core.config_loader.load('local').", "subprocess": { "_comment": "local/subprocess.py — LocalSubprocess supervisor. Mirrors gemini/subprocess.py. IMPORTANT: python_bin points at the `local` conda env (Python 3.8 + Jetson CUDA torch) so CosyVoice+Whisper run with GPU, while the dashboard/Gemini stack stays in gemini_sdk (Python 3.10).", "python_bin": "/home/unitree/miniconda3/envs/local/bin/python", "log_tail_size": 2000, "transcript_tail_size": 30, "log_name": "local_subprocess", "stop_timeout_sec": 5.0, "terminate_timeout_sec": 3.0, "noisy_prefixes": [ "ALSA lib ", "Expression 'alsa_", "Cannot connect to server socket", "jack server is not running" ], "noisy_fragments": [ "Unknown PCM", "Evaluate error", "snd_pcm_open_noupdate", "PaAlsaStream" ] }, "vad": { "_comment": "Silero VAD — CPU. Emits speech_start / speech_end events.", "sample_rate": 16000, "frame_ms": 32, "threshold": 0.55, "min_silence_ms": 400, "min_speech_ms": 250, "pad_start_ms": 200, "pad_end_ms": 200, "device": "cpu" }, "stt": { "_comment": "faster-whisper Large V3 Turbo, INT8 on GPU.", "model_name": "large-v3-turbo", "model_subdir": "faster-whisper-large-v3-turbo", "device": "cuda", "compute_type": "int8_float16", "beam_size": 1, "language": null, "vad_filter": false, "no_speech_threshold": 0.6, "min_utterance_chars": 2, "temperature": 0.0 }, "llm": { "_comment": "Qwen 2.5 Instruct via Ollama (default) OR self-managed llama.cpp. Set backend to pick.", "backend": "ollama", "_ollama_comment": "Ollama daemon — assumes `ollama serve` is running; `ollama pull qwen2.5:1.5b` to fetch.", "ollama_host": "127.0.0.1", "ollama_port": 11434, "ollama_model": "qwen2.5:1.5b", "ollama_keep_alive": "5m", "_llamacpp_comment": "Self-managed llama-server subprocess. Only used when backend='llama_cpp'.", "model_subdir": "qwen2.5-1.5b-instruct-q4_k_m.gguf", "server_binary": "llama-server", "host": "127.0.0.1", "port": 8080, "n_gpu_layers": 99, "ctx_size": 2048, "threads": 4, "startup_timeout_sec": 30, "_shared_comment": "Generation params — both backends.", "request_timeout_sec": 30, "max_tokens": 200, "temperature": 0.7, "top_p": 0.9, "stop": ["<|im_end|>", "\n\n\n"], "chunk_delimiters": ".,?!؟،", "chunk_min_chars": 8 }, "tts": { "_comment": "CosyVoice2 0.5B streaming — GPU. Uses a 3s reference WAV for voice cloning.", "model_subdir": "CosyVoice2-0.5B", "reference_wav_subdir": "khaleeji_reference_3s.wav", "reference_prompt": "", "stream_chunk_sec": 0.25, "sample_rate": 16000, "queue_max": 3, "device": "cuda" } }