diff --git a/Brain/marcus_brain.py b/Brain/marcus_brain.py index 9b351b3..9c47731 100644 --- a/Brain/marcus_brain.py +++ b/Brain/marcus_brain.py @@ -529,8 +529,16 @@ def run_terminal(): return str(v) W = 58 + LEFT_W = 28 + RIGHT_W = (W - 2) - LEFT_W # visible chars available in the right column + + def _pad(s: str, width: int) -> str: + """ljust by visible width, treating ANSI colour escapes as zero-width.""" + visible = len(_strip_ansi(s)) + return s + " " * max(0, width - visible) + print("\n\n" + "╔" + "═" * (W-2) + "╗") - print("║" + " SANAD — AI BRAIN READY".ljust(W-2) + "║") + print("║" + _pad(" SANAD — AI BRAIN READY", W-2) + "║") print("╠" + "═" * (W-2) + "╣") left = [("model", status["model"]), ("voice", _fmt(status["voice"])), @@ -542,12 +550,10 @@ def run_terminal(): for i in range(max(len(left), len(right))): l = f" {left[i][0]:<8}: {left[i][1]}" if i < len(left) else "" r = f" {right[i][0]:<8}: {right[i][1]}" if i < len(right) else "" - # account for ANSI escapes when padding the left column - pad = 28 + (len(l) - len(_strip_ansi(l))) - print("║" + l.ljust(pad) + r.ljust(W - 2 - pad) + "║") + print("║" + _pad(l, LEFT_W) + _pad(r, RIGHT_W) + "║") print("╠" + "═" * (W-2) + "╣") - print("║" + " Type a command, or say \"Sanad, \".".ljust(W-2) + "║") - print("║" + " help · example · yolo · patrol · auto on/off · q".ljust(W-2) + "║") + print("║" + _pad(" Type a command, or say \"Sanad, \".", W-2) + "║") + print("║" + _pad(" help · example · yolo · test_tts · auto on/off · q", W-2) + "║") print("╚" + "═" * (W-2) + "╝\n") try: @@ -574,6 +580,22 @@ def run_terminal(): _audio_api._unmute_mic() print(" Mic unmuted") continue + if cmd.lower().startswith("test_tts"): + # Probe speaker IDs to find which one speaks English on this + # firmware. Usage: `test_tts` (runs 0, 1, 2) or `test_tts 1`. + if _audio_api is None or _audio_api._tts_engine is None: + print(" Voice is not initialized") + continue + parts = cmd.split() + ids = [int(x) for x in parts[1:]] if len(parts) > 1 else [0, 1, 2] + phrase = "Hello, I am Sanad." + for sid in ids: + print(f" → speaker_id = {sid}") + _audio_api._tts_engine.speak(phrase, speaker_id=sid, block=True) + time.sleep(0.3) + print(' Pick the ID that sounded English and set it in') + print(' Config/config_Voice.json :: tts.builtin_speaker_id') + continue result = process_command(cmd) sp = result.get("speak", "") if isinstance(result, dict) else "" if sp and _audio_api: diff --git a/Config/config_Voice.json b/Config/config_Voice.json index b5d9a3a..99e6ac3 100644 --- a/Config/config_Voice.json +++ b/Config/config_Voice.json @@ -6,7 +6,7 @@ }, "stt": { "wake_model": "tiny", - "command_model": "small", + "command_model": "tiny", "wake_words_en": ["sanad", "sannad", "sanat", "sunnat"], "language": "en", "command_timeout_sec": 10, diff --git a/Voice/builtin_tts.py b/Voice/builtin_tts.py index 32ab3d5..677d5e1 100644 --- a/Voice/builtin_tts.py +++ b/Voice/builtin_tts.py @@ -1,23 +1,25 @@ """ -builtin_tts.py — Unitree G1 built-in TTS (English only) -======================================================== +builtin_tts.py — Unitree G1 built-in TTS (English) +=================================================== Thin wrapper around AudioClient.TtsMaker(text, speaker_id). The G1's on-board TTS engine synthesizes and plays directly through the body speaker — no internet, no MP3/WAV roundtrip, no audio SDK plumbing on our side. -Supported languages (firmware-side): - English — works (Marcus uses this) - Chinese — works (unused) - Arabic — silently falls back to Chinese (unusable — we refuse these) +Speaker IDs — IMPORTANT: + The `speaker_id` argument is *not* a language selector; it's a voice + bank. On some G1 firmware revisions, speaker_id 0 produces a Mandarin + voice even for Latin-script input. If `Config/config_Voice.json:: + tts.builtin_speaker_id` gives you Chinese output, try 1 or 2. -Signature: - client.TtsMaker(text: str, speaker_id: int) -> int # 0 = success - speaker_id ∈ {0, 1, 2} — different voice timbres + Quick probe on the robot: + python3 /home/unitree/Marcus/Voice/builtin_tts.py 0 1 2 + (plays "Hello, I am Sanad" once per speaker_id so you can pick the + one that sounds like English, then set that in config_Voice.json). Usage: from Voice.builtin_tts import BuiltinTTS - tts = BuiltinTTS(audio_client) - tts.speak("Hello, I am Sanad", speaker_id=0) + tts = BuiltinTTS(audio_client, default_speaker_id=1) + tts.speak("Hello, I am Sanad") """ from __future__ import annotations @@ -86,3 +88,33 @@ class BuiltinTTS: time.sleep(duration) return code + + +# ─── STANDALONE PROBE ──────────────────────────────────────────────── +# Usage on the robot (in the marcus conda env): +# python3 Voice/builtin_tts.py # plays all 3 speaker IDs +# python3 Voice/builtin_tts.py 1 2 # plays only IDs 1 and 2 +# Pick whichever ID sounds like English and set it in +# Config/config_Voice.json :: tts.builtin_speaker_id + +if __name__ == "__main__": + import sys + from unitree_sdk2py.core.channel import ChannelFactoryInitialize + from unitree_sdk2py.g1.audio.g1_audio_client import AudioClient + + ChannelFactoryInitialize(0, "eth0") + ac = AudioClient() + ac.SetTimeout(10.0) + ac.Init() + ac.SetVolume(100) + + ids = [int(x) for x in sys.argv[1:]] if len(sys.argv) > 1 else [0, 1, 2] + phrase = "Hello, I am Sanad." + print(f"\nProbing TtsMaker with text: {phrase!r}\n") + tts = BuiltinTTS(ac) + for sid in ids: + print(f" → speaker_id = {sid}") + tts.speak(phrase, speaker_id=sid, block=True) + time.sleep(0.5) + print("\nDone. Pick the speaker_id that sounded like English and put it in") + print(" Config/config_Voice.json :: tts.builtin_speaker_id") diff --git a/run_marcus.py b/run_marcus.py index 0db6bc2..09f3105 100644 --- a/run_marcus.py +++ b/run_marcus.py @@ -3,8 +3,17 @@ run_marcus.py — Marcus AI Brain (terminal mode) Usage: python3 run_marcus.py """ -import sys import os +import sys +import warnings + +# Silence known-harmless third-party deprecation warnings before ANY heavy +# import fires them. Keeps the terminal dashboard readable. +# - TypedStorage : fires from torch during yolov8m.pt checkpoint load +# - torch.ampwhile : fires in ultralytics when FP16 is enabled on Jetson torch 2.1 +warnings.filterwarnings("ignore", message=".*TypedStorage is deprecated.*") +warnings.filterwarnings("ignore", message=".*torch\\.cuda\\.amp.*") +os.environ.setdefault("PYTHONWARNINGS", "ignore::UserWarning:torch._utils") PROJECT_ROOT = os.path.dirname(os.path.abspath(__file__)) if PROJECT_ROOT not in sys.path: diff --git a/start_ollama.sh b/start_ollama.sh new file mode 100755 index 0000000..922d97d --- /dev/null +++ b/start_ollama.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash +# start_ollama.sh — launch Ollama with Jetson-friendly memory settings +# +# The Jetson Orin NX has 16 GB unified CPU+GPU memory. When Marcus + YOLO + +# Whisper + Holosoma + the camera + audio all run alongside Qwen2.5-VL, +# the compute-graph OOMs the llama runner and Linux kills the biggest +# process (often Holosoma — which is a safety problem for locomotion). +# +# These env vars cut Ollama's memory footprint: +# OLLAMA_FLASH_ATTENTION=1 ~30 % less memory for attention tensors +# OLLAMA_KV_CACHE_TYPE=q8_0 quantize KV cache (halves it) +# OLLAMA_KEEP_ALIVE=2m keep the model warm for 2 min then evict +# (adjust if cold-load lag matters more +# than idle memory) +# OLLAMA_MAX_LOADED_MODELS=1 never hold two VL models at once +# +# Usage: +# ./start_ollama.sh # starts server in background, logs to /tmp/ollama.log +# ./start_ollama.sh --fg # runs in foreground (for debugging) + +pkill -f "ollama (runner|serve)" 2>/dev/null +sleep 1 + +export OLLAMA_FLASH_ATTENTION=1 +export OLLAMA_KV_CACHE_TYPE=q8_0 +export OLLAMA_KEEP_ALIVE=2m +export OLLAMA_MAX_LOADED_MODELS=1 + +if [[ "$1" == "--fg" ]]; then + echo "Running ollama in foreground..." + ollama serve +else + ollama serve > /tmp/ollama.log 2>&1 & + sleep 3 + if curl -sf http://localhost:11434/api/version > /dev/null; then + echo "✓ Ollama started (pid $(pgrep -f 'ollama serve'))" + echo " logs: tail -f /tmp/ollama.log" + echo " stop: pkill -f 'ollama serve'" + else + echo "✗ Ollama failed to start — see /tmp/ollama.log" + exit 1 + fi +fi