93 lines
2.9 KiB
JSON
93 lines
2.9 KiB
JSON
{
|
|
"_description": "Tunables for local/* — fully on-device voice pipeline (Silero VAD → Whisper → Qwen via llama.cpp → CosyVoice2). Loaded via core.config_loader.load('local').",
|
|
|
|
"subprocess": {
|
|
"_comment": "local/subprocess.py — LocalSubprocess supervisor. Mirrors gemini/subprocess.py. IMPORTANT: python_bin points at the `local` conda env (Python 3.8 + Jetson CUDA torch) so CosyVoice+Whisper run with GPU, while the dashboard/Gemini stack stays in gemini_sdk (Python 3.10).",
|
|
"python_bin": "/home/unitree/miniconda3/envs/local/bin/python",
|
|
"log_tail_size": 2000,
|
|
"transcript_tail_size": 30,
|
|
"log_name": "local_subprocess",
|
|
"stop_timeout_sec": 5.0,
|
|
"terminate_timeout_sec": 3.0,
|
|
"noisy_prefixes": [
|
|
"ALSA lib ",
|
|
"Expression 'alsa_",
|
|
"Cannot connect to server socket",
|
|
"jack server is not running"
|
|
],
|
|
"noisy_fragments": [
|
|
"Unknown PCM",
|
|
"Evaluate error",
|
|
"snd_pcm_open_noupdate",
|
|
"PaAlsaStream"
|
|
]
|
|
},
|
|
|
|
"vad": {
|
|
"_comment": "Silero VAD — CPU. Emits speech_start / speech_end events.",
|
|
"sample_rate": 16000,
|
|
"frame_ms": 32,
|
|
"threshold": 0.55,
|
|
"min_silence_ms": 400,
|
|
"min_speech_ms": 250,
|
|
"pad_start_ms": 200,
|
|
"pad_end_ms": 200,
|
|
"device": "cpu"
|
|
},
|
|
|
|
"stt": {
|
|
"_comment": "faster-whisper Large V3 Turbo, INT8 on GPU.",
|
|
"model_name": "large-v3-turbo",
|
|
"model_subdir": "faster-whisper-large-v3-turbo",
|
|
"device": "cuda",
|
|
"compute_type": "int8_float16",
|
|
"beam_size": 1,
|
|
"language": null,
|
|
"vad_filter": false,
|
|
"no_speech_threshold": 0.6,
|
|
"min_utterance_chars": 2,
|
|
"temperature": 0.0
|
|
},
|
|
|
|
"llm": {
|
|
"_comment": "Qwen 2.5 Instruct via Ollama (default) OR self-managed llama.cpp. Set backend to pick.",
|
|
"backend": "ollama",
|
|
|
|
"_ollama_comment": "Ollama daemon — assumes `ollama serve` is running; `ollama pull qwen2.5:1.5b` to fetch.",
|
|
"ollama_host": "127.0.0.1",
|
|
"ollama_port": 11434,
|
|
"ollama_model": "qwen2.5:1.5b",
|
|
"ollama_keep_alive": "5m",
|
|
|
|
"_llamacpp_comment": "Self-managed llama-server subprocess. Only used when backend='llama_cpp'.",
|
|
"model_subdir": "qwen2.5-1.5b-instruct-q4_k_m.gguf",
|
|
"server_binary": "llama-server",
|
|
"host": "127.0.0.1",
|
|
"port": 8080,
|
|
"n_gpu_layers": 99,
|
|
"ctx_size": 2048,
|
|
"threads": 4,
|
|
"startup_timeout_sec": 30,
|
|
|
|
"_shared_comment": "Generation params — both backends.",
|
|
"request_timeout_sec": 30,
|
|
"max_tokens": 200,
|
|
"temperature": 0.7,
|
|
"top_p": 0.9,
|
|
"stop": ["<|im_end|>", "\n\n\n"],
|
|
"chunk_delimiters": ".,?!؟،",
|
|
"chunk_min_chars": 8
|
|
},
|
|
|
|
"tts": {
|
|
"_comment": "CosyVoice2 0.5B streaming — GPU. Uses a 3s reference WAV for voice cloning.",
|
|
"model_subdir": "CosyVoice2-0.5B",
|
|
"reference_wav_subdir": "khaleeji_reference_3s.wav",
|
|
"reference_prompt": "",
|
|
"stream_chunk_sec": 0.25,
|
|
"sample_rate": 16000,
|
|
"queue_max": 3,
|
|
"device": "cuda"
|
|
}
|
|
}
|