diff --git a/Brain/marcus_brain.py b/Brain/marcus_brain.py
index 9b351b3..9c47731 100644
--- a/Brain/marcus_brain.py
+++ b/Brain/marcus_brain.py
@@ -529,8 +529,16 @@ def run_terminal():
         return str(v)
 
     W = 58
+    LEFT_W  = 28
+    RIGHT_W = (W - 2) - LEFT_W   # visible chars available in the right column
+
+    def _pad(s: str, width: int) -> str:
+        """ljust by visible width, treating ANSI colour escapes as zero-width."""
+        visible = len(_strip_ansi(s))
+        return s + " " * max(0, width - visible)
+
     print("\n\n" + "╔" + "═" * (W-2) + "╗")
-    print("║" + "   SANAD — AI BRAIN READY".ljust(W-2) + "║")
+    print("║" + _pad("   SANAD — AI BRAIN READY", W-2) + "║")
     print("╠" + "═" * (W-2) + "╣")
     left  = [("model",    status["model"]),
              ("voice",    _fmt(status["voice"])),
@@ -542,12 +550,10 @@ def run_terminal():
     for i in range(max(len(left), len(right))):
         l = f"  {left[i][0]:<8}: {left[i][1]}"        if i < len(left)  else ""
         r = f"  {right[i][0]:<8}: {right[i][1]}"      if i < len(right) else ""
-        # account for ANSI escapes when padding the left column
-        pad = 28 + (len(l) - len(_strip_ansi(l)))
-        print("║" + l.ljust(pad) + r.ljust(W - 2 - pad) + "║")
+        print("║" + _pad(l, LEFT_W) + _pad(r, RIGHT_W) + "║")
     print("╠" + "═" * (W-2) + "╣")
-    print("║" + "  Type a command, or say \"Sanad, <command>\".".ljust(W-2) + "║")
-    print("║" + "  help · example · yolo · patrol · auto on/off · q".ljust(W-2) + "║")
+    print("║" + _pad("  Type a command, or say \"Sanad, <command>\".", W-2) + "║")
+    print("║" + _pad("  help · example · yolo · test_tts · auto on/off · q", W-2) + "║")
     print("╚" + "═" * (W-2) + "╝\n")
 
     try:
@@ -574,6 +580,22 @@ def run_terminal():
                     _audio_api._unmute_mic()
                     print("  Mic unmuted")
                 continue
+            if cmd.lower().startswith("test_tts"):
+                # Probe speaker IDs to find which one speaks English on this
+                # firmware. Usage: `test_tts` (runs 0, 1, 2) or `test_tts 1`.
+                if _audio_api is None or _audio_api._tts_engine is None:
+                    print("  Voice is not initialized")
+                    continue
+                parts = cmd.split()
+                ids = [int(x) for x in parts[1:]] if len(parts) > 1 else [0, 1, 2]
+                phrase = "Hello, I am Sanad."
+                for sid in ids:
+                    print(f"  → speaker_id = {sid}")
+                    _audio_api._tts_engine.speak(phrase, speaker_id=sid, block=True)
+                    time.sleep(0.3)
+                print('  Pick the ID that sounded English and set it in')
+                print('  Config/config_Voice.json :: tts.builtin_speaker_id')
+                continue
             result = process_command(cmd)
             sp = result.get("speak", "") if isinstance(result, dict) else ""
             if sp and _audio_api:
diff --git a/Config/config_Voice.json b/Config/config_Voice.json
index b5d9a3a..99e6ac3 100644
--- a/Config/config_Voice.json
+++ b/Config/config_Voice.json
@@ -6,7 +6,7 @@
   },
   "stt": {
     "wake_model": "tiny",
-    "command_model": "small",
+    "command_model": "tiny",
     "wake_words_en": ["sanad", "sannad", "sanat", "sunnat"],
     "language": "en",
     "command_timeout_sec": 10,
diff --git a/Voice/builtin_tts.py b/Voice/builtin_tts.py
index 32ab3d5..677d5e1 100644
--- a/Voice/builtin_tts.py
+++ b/Voice/builtin_tts.py
@@ -1,23 +1,25 @@
 """
-builtin_tts.py — Unitree G1 built-in TTS (English only)
-========================================================
+builtin_tts.py — Unitree G1 built-in TTS (English)
+===================================================
 Thin wrapper around AudioClient.TtsMaker(text, speaker_id). The G1's on-board
 TTS engine synthesizes and plays directly through the body speaker — no
 internet, no MP3/WAV roundtrip, no audio SDK plumbing on our side.
 
-Supported languages (firmware-side):
-    English  — works  (Marcus uses this)
-    Chinese  — works  (unused)
-    Arabic   — silently falls back to Chinese (unusable — we refuse these)
+Speaker IDs — IMPORTANT:
+    The `speaker_id` argument is *not* a language selector; it's a voice
+    bank. On some G1 firmware revisions, speaker_id 0 produces a Mandarin
+    voice even for Latin-script input. If `Config/config_Voice.json::
+    tts.builtin_speaker_id` gives you Chinese output, try 1 or 2.
 
-Signature:
-    client.TtsMaker(text: str, speaker_id: int) -> int    # 0 = success
-    speaker_id ∈ {0, 1, 2}   — different voice timbres
+    Quick probe on the robot:
+        python3 /home/unitree/Marcus/Voice/builtin_tts.py 0 1 2
+    (plays "Hello, I am Sanad" once per speaker_id so you can pick the
+    one that sounds like English, then set that in config_Voice.json).
 
 Usage:
     from Voice.builtin_tts import BuiltinTTS
-    tts = BuiltinTTS(audio_client)
-    tts.speak("Hello, I am Sanad", speaker_id=0)
+    tts = BuiltinTTS(audio_client, default_speaker_id=1)
+    tts.speak("Hello, I am Sanad")
 """
 
 from __future__ import annotations
@@ -86,3 +88,33 @@ class BuiltinTTS:
             time.sleep(duration)
 
         return code
+
+
+# ─── STANDALONE PROBE ────────────────────────────────────────────────
+# Usage on the robot (in the marcus conda env):
+#     python3 Voice/builtin_tts.py              # plays all 3 speaker IDs
+#     python3 Voice/builtin_tts.py 1 2          # plays only IDs 1 and 2
+# Pick whichever ID sounds like English and set it in
+#     Config/config_Voice.json :: tts.builtin_speaker_id
+
+if __name__ == "__main__":
+    import sys
+    from unitree_sdk2py.core.channel import ChannelFactoryInitialize
+    from unitree_sdk2py.g1.audio.g1_audio_client import AudioClient
+
+    ChannelFactoryInitialize(0, "eth0")
+    ac = AudioClient()
+    ac.SetTimeout(10.0)
+    ac.Init()
+    ac.SetVolume(100)
+
+    ids = [int(x) for x in sys.argv[1:]] if len(sys.argv) > 1 else [0, 1, 2]
+    phrase = "Hello, I am Sanad."
+    print(f"\nProbing TtsMaker with text: {phrase!r}\n")
+    tts = BuiltinTTS(ac)
+    for sid in ids:
+        print(f"  → speaker_id = {sid}")
+        tts.speak(phrase, speaker_id=sid, block=True)
+        time.sleep(0.5)
+    print("\nDone. Pick the speaker_id that sounded like English and put it in")
+    print("   Config/config_Voice.json :: tts.builtin_speaker_id")
diff --git a/run_marcus.py b/run_marcus.py
index 0db6bc2..09f3105 100644
--- a/run_marcus.py
+++ b/run_marcus.py
@@ -3,8 +3,17 @@
 run_marcus.py — Marcus AI Brain (terminal mode)
 Usage: python3 run_marcus.py
 """
-import sys
 import os
+import sys
+import warnings
+
+# Silence known-harmless third-party deprecation warnings before ANY heavy
+# import fires them. Keeps the terminal dashboard readable.
+#   - TypedStorage  : fires from torch during yolov8m.pt checkpoint load
+#   - torch.ampwhile : fires in ultralytics when FP16 is enabled on Jetson torch 2.1
+warnings.filterwarnings("ignore", message=".*TypedStorage is deprecated.*")
+warnings.filterwarnings("ignore", message=".*torch\\.cuda\\.amp.*")
+os.environ.setdefault("PYTHONWARNINGS", "ignore::UserWarning:torch._utils")
 
 PROJECT_ROOT = os.path.dirname(os.path.abspath(__file__))
 if PROJECT_ROOT not in sys.path:
diff --git a/start_ollama.sh b/start_ollama.sh
new file mode 100755
index 0000000..922d97d
--- /dev/null
+++ b/start_ollama.sh
@@ -0,0 +1,43 @@
+#!/usr/bin/env bash
+# start_ollama.sh — launch Ollama with Jetson-friendly memory settings
+#
+# The Jetson Orin NX has 16 GB unified CPU+GPU memory. When Marcus + YOLO +
+# Whisper + Holosoma + the camera + audio all run alongside Qwen2.5-VL,
+# the compute-graph OOMs the llama runner and Linux kills the biggest
+# process (often Holosoma — which is a safety problem for locomotion).
+#
+# These env vars cut Ollama's memory footprint:
+#   OLLAMA_FLASH_ATTENTION=1      ~30 % less memory for attention tensors
+#   OLLAMA_KV_CACHE_TYPE=q8_0     quantize KV cache (halves it)
+#   OLLAMA_KEEP_ALIVE=2m          keep the model warm for 2 min then evict
+#                                 (adjust if cold-load lag matters more
+#                                  than idle memory)
+#   OLLAMA_MAX_LOADED_MODELS=1    never hold two VL models at once
+#
+# Usage:
+#   ./start_ollama.sh            # starts server in background, logs to /tmp/ollama.log
+#   ./start_ollama.sh --fg       # runs in foreground (for debugging)
+
+pkill -f "ollama (runner|serve)" 2>/dev/null
+sleep 1
+
+export OLLAMA_FLASH_ATTENTION=1
+export OLLAMA_KV_CACHE_TYPE=q8_0
+export OLLAMA_KEEP_ALIVE=2m
+export OLLAMA_MAX_LOADED_MODELS=1
+
+if [[ "$1" == "--fg" ]]; then
+    echo "Running ollama in foreground..."
+    ollama serve
+else
+    ollama serve > /tmp/ollama.log 2>&1 &
+    sleep 3
+    if curl -sf http://localhost:11434/api/version > /dev/null; then
+        echo "✓ Ollama started (pid $(pgrep -f 'ollama serve'))"
+        echo "  logs:  tail -f /tmp/ollama.log"
+        echo "  stop:  pkill -f 'ollama serve'"
+    else
+        echo "✗ Ollama failed to start — see /tmp/ollama.log"
+        exit 1
+    fi
+fi