Update 2026-04-22 12:08:03

2026-04-22 12:08:04 +04:00 · 2026-04-22 12:08:04 +04:00 · dcf5f9f39b
commit dcf5f9f39b
parent 1c994fa175
5 changed files with 125 additions and 19 deletions
--- a/Brain/marcus_brain.py
+++ b/Brain/marcus_brain.py
@ -529,8 +529,16 @@ def run_terminal():
        return str(v)
    W = 58
    LEFT_W  = 28
    RIGHT_W = (W - 2) - LEFT_W   # visible chars available in the right column
    def _pad(s: str, width: int) -> str:
        """ljust by visible width, treating ANSI colour escapes as zero-width."""
        visible = len(_strip_ansi(s))
        return s + " " * max(0, width - visible)
    print("\n\n" + "╔" + "═" * (W-2) + "╗")
-    print("║" + "   SANAD — AI BRAIN READY".ljust(W-2) + "║")
+    print("║" + _pad("   SANAD — AI BRAIN READY", W-2) + "║")
    print("╠" + "═" * (W-2) + "╣")
    left  = [("model",    status["model"]),
             ("voice",    _fmt(status["voice"])),
@ -542,12 +550,10 @@ def run_terminal():
    for i in range(max(len(left), len(right))):
        l = f"  {left[i][0]:<8}: {left[i][1]}"        if i < len(left)  else ""
        r = f"  {right[i][0]:<8}: {right[i][1]}"      if i < len(right) else ""
-        # account for ANSI escapes when padding the left column
+        print("║" + _pad(l, LEFT_W) + _pad(r, RIGHT_W) + "║")
        pad = 28 + (len(l) - len(_strip_ansi(l)))
        print("║" + l.ljust(pad) + r.ljust(W - 2 - pad) + "║")
    print("╠" + "═" * (W-2) + "╣")
-    print("║" + "  Type a command, or say \"Sanad, <command>\".".ljust(W-2) + "║")
+    print("║" + _pad("  Type a command, or say \"Sanad, <command>\".", W-2) + "║")
-    print("║" + "  help · example · yolo · patrol · auto on/off · q".ljust(W-2) + "║")
+    print("║" + _pad("  help · example · yolo · test_tts · auto on/off · q", W-2) + "║")
    print("╚" + "═" * (W-2) + "╝\n")
    try:
@ -574,6 +580,22 @@ def run_terminal():
                    _audio_api._unmute_mic()
                    print("  Mic unmuted")
                continue
            if cmd.lower().startswith("test_tts"):
                # Probe speaker IDs to find which one speaks English on this
                # firmware. Usage: `test_tts` (runs 0, 1, 2) or `test_tts 1`.
                if _audio_api is None or _audio_api._tts_engine is None:
                    print("  Voice is not initialized")
                    continue
                parts = cmd.split()
                ids = [int(x) for x in parts[1:]] if len(parts) > 1 else [0, 1, 2]
                phrase = "Hello, I am Sanad."
                for sid in ids:
                    print(f"  → speaker_id = {sid}")
                    _audio_api._tts_engine.speak(phrase, speaker_id=sid, block=True)
                    time.sleep(0.3)
                print('  Pick the ID that sounded English and set it in')
                print('  Config/config_Voice.json :: tts.builtin_speaker_id')
                continue
            result = process_command(cmd)
            sp = result.get("speak", "") if isinstance(result, dict) else ""
            if sp and _audio_api:
--- a/Config/config_Voice.json
+++ b/Config/config_Voice.json
@ -6,7 +6,7 @@
  },
  "stt": {
    "wake_model": "tiny",
-    "command_model": "small",
+    "command_model": "tiny",
    "wake_words_en": ["sanad", "sannad", "sanat", "sunnat"],
    "language": "en",
    "command_timeout_sec": 10,
--- a/Voice/builtin_tts.py
+++ b/Voice/builtin_tts.py
@ -1,23 +1,25 @@
 """
-builtin_tts.py — Unitree G1 built-in TTS (English only)
+builtin_tts.py — Unitree G1 built-in TTS (English)
-========================================================
+===================================================
 Thin wrapper around AudioClient.TtsMaker(text, speaker_id). The G1's on-board
 TTS engine synthesizes and plays directly through the body speaker — no
 internet, no MP3/WAV roundtrip, no audio SDK plumbing on our side.
-Supported languages (firmware-side):
+Speaker IDs — IMPORTANT:
-    English  — works  (Marcus uses this)
+    The `speaker_id` argument is *not* a language selector; it's a voice
-    Chinese  — works  (unused)
+    bank. On some G1 firmware revisions, speaker_id 0 produces a Mandarin
-    Arabic   — silently falls back to Chinese (unusable — we refuse these)
+    voice even for Latin-script input. If `Config/config_Voice.json::
    tts.builtin_speaker_id` gives you Chinese output, try 1 or 2.
-Signature:
+    Quick probe on the robot:
-    client.TtsMaker(text: str, speaker_id: int) -> int    # 0 = success
+        python3 /home/unitree/Marcus/Voice/builtin_tts.py 0 1 2
-    speaker_id ∈ {0, 1, 2}   — different voice timbres
+    (plays "Hello, I am Sanad" once per speaker_id so you can pick the
    one that sounds like English, then set that in config_Voice.json).
 Usage:
    from Voice.builtin_tts import BuiltinTTS
-    tts = BuiltinTTS(audio_client)
+    tts = BuiltinTTS(audio_client, default_speaker_id=1)
-    tts.speak("Hello, I am Sanad", speaker_id=0)
+    tts.speak("Hello, I am Sanad")
 """
 from __future__ import annotations
@ -86,3 +88,33 @@ class BuiltinTTS:
            time.sleep(duration)
        return code
 # ─── STANDALONE PROBE ────────────────────────────────────────────────
 # Usage on the robot (in the marcus conda env):
 #     python3 Voice/builtin_tts.py              # plays all 3 speaker IDs
 #     python3 Voice/builtin_tts.py 1 2          # plays only IDs 1 and 2
 # Pick whichever ID sounds like English and set it in
 #     Config/config_Voice.json :: tts.builtin_speaker_id
 if __name__ == "__main__":
    import sys
    from unitree_sdk2py.core.channel import ChannelFactoryInitialize
    from unitree_sdk2py.g1.audio.g1_audio_client import AudioClient
    ChannelFactoryInitialize(0, "eth0")
    ac = AudioClient()
    ac.SetTimeout(10.0)
    ac.Init()
    ac.SetVolume(100)
    ids = [int(x) for x in sys.argv[1:]] if len(sys.argv) > 1 else [0, 1, 2]
    phrase = "Hello, I am Sanad."
    print(f"\nProbing TtsMaker with text: {phrase!r}\n")
    tts = BuiltinTTS(ac)
    for sid in ids:
        print(f"  → speaker_id = {sid}")
        tts.speak(phrase, speaker_id=sid, block=True)
        time.sleep(0.5)
    print("\nDone. Pick the speaker_id that sounded like English and put it in")
    print("   Config/config_Voice.json :: tts.builtin_speaker_id")
--- a/run_marcus.py
+++ b/run_marcus.py
@ -3,8 +3,17 @@
 run_marcus.py — Marcus AI Brain (terminal mode)
 Usage: python3 run_marcus.py
 """
 import sys
 import os
 import sys
 import warnings
 # Silence known-harmless third-party deprecation warnings before ANY heavy
 # import fires them. Keeps the terminal dashboard readable.
 #   - TypedStorage  : fires from torch during yolov8m.pt checkpoint load
 #   - torch.ampwhile : fires in ultralytics when FP16 is enabled on Jetson torch 2.1
 warnings.filterwarnings("ignore", message=".*TypedStorage is deprecated.*")
 warnings.filterwarnings("ignore", message=".*torch\\.cuda\\.amp.*")
 os.environ.setdefault("PYTHONWARNINGS", "ignore::UserWarning:torch._utils")
 PROJECT_ROOT = os.path.dirname(os.path.abspath(__file__))
 if PROJECT_ROOT not in sys.path:
--- a/start_ollama.sh
+++ b/start_ollama.sh
@ -0,0 +1,43 @@
 #!/usr/bin/env bash
 # start_ollama.sh — launch Ollama with Jetson-friendly memory settings
 #
 # The Jetson Orin NX has 16 GB unified CPU+GPU memory. When Marcus + YOLO +
 # Whisper + Holosoma + the camera + audio all run alongside Qwen2.5-VL,
 # the compute-graph OOMs the llama runner and Linux kills the biggest
 # process (often Holosoma — which is a safety problem for locomotion).
 #
 # These env vars cut Ollama's memory footprint:
 #   OLLAMA_FLASH_ATTENTION=1      ~30 % less memory for attention tensors
 #   OLLAMA_KV_CACHE_TYPE=q8_0     quantize KV cache (halves it)
 #   OLLAMA_KEEP_ALIVE=2m          keep the model warm for 2 min then evict
 #                                 (adjust if cold-load lag matters more
 #                                  than idle memory)
 #   OLLAMA_MAX_LOADED_MODELS=1    never hold two VL models at once
 #
 # Usage:
 #   ./start_ollama.sh            # starts server in background, logs to /tmp/ollama.log
 #   ./start_ollama.sh --fg       # runs in foreground (for debugging)
 pkill -f "ollama (runner|serve)" 2>/dev/null
 sleep 1
 export OLLAMA_FLASH_ATTENTION=1
 export OLLAMA_KV_CACHE_TYPE=q8_0
 export OLLAMA_KEEP_ALIVE=2m
 export OLLAMA_MAX_LOADED_MODELS=1
 if [[ "$1" == "--fg" ]]; then
    echo "Running ollama in foreground..."
    ollama serve
 else
    ollama serve > /tmp/ollama.log 2>&1 &
    sleep 3
    if curl -sf http://localhost:11434/api/version > /dev/null; then
        echo "✓ Ollama started (pid $(pgrep -f 'ollama serve'))"
        echo "  logs:  tail -f /tmp/ollama.log"
        echo "  stop:  pkill -f 'ollama serve'"
    else
        echo "✗ Ollama failed to start — see /tmp/ollama.log"
        exit 1
    fi
 fi