diff --git a/API/llava_api.py b/API/llava_api.py
index f431a65..22cd904 100644
--- a/API/llava_api.py
+++ b/API/llava_api.py
@@ -1,5 +1,24 @@
 """
-llava_api.py — LLaVA / Qwen VL query interface
+llava_api.py — Qwen-VL query interface (via Ollama)
+
+Three deployment modes, chosen via config_Brain.json:
+
+  1. subsystems.vlm = false
+     → every ask*() returns a safe fallback dict. Marcus runs in
+       regex-only "safe mode": no LLM load on the Jetson, no GPU/CPU
+       contention with Holosoma, robot won't fall from thrashing.
+       Vision questions just answer "Scene understanding is disabled
+       — running in safe mode." Everything else (movement, places,
+       patrol, autonomous) still works.
+
+  2. ollama_host = "http://127.0.0.1:11434" + subsystems.vlm = true
+     → Ollama runs on the Jetson. Old behavior — competes with
+       Holosoma for memory. Unsafe during walking with a 3B VL model.
+
+  3. ollama_host = "http://192.168.123.222:11434" + subsystems.vlm = true
+     → Ollama runs on the workstation. Jetson stays light, Holosoma
+       keeps its 50 Hz real-time deadline, and the brain still gets
+       full Qwen-VL. Best mode for demos / walking with conversation.
 """
 import json
 import ollama
@@ -9,12 +28,13 @@ from Core.config_loader import load_config
 
 _cfg = load_config("Brain")
 
-# Load prompts from YAML (the authoritative source — bilingual, complete)
 _yaml_path = Path(__file__).resolve().parent.parent / "Config" / "marcus_prompts.yaml"
 with open(_yaml_path, encoding="utf-8") as _f:
     _prompts = yaml.safe_load(_f)
 
 OLLAMA_MODEL  = _cfg["ollama_model"]
+OLLAMA_HOST   = _cfg.get("ollama_host", "http://127.0.0.1:11434")
+VLM_ENABLED   = bool(_cfg.get("subsystems", {}).get("vlm", True))
 MAX_HISTORY   = _cfg["max_history"]
 # Cap batch and context on every request. Without this, llama.cpp on Jetson
 # Orin NX allocates a ~7.5 GiB compute graph (defaults: batch 512, ctx 4096)
@@ -28,6 +48,14 @@ PATROL_PROMPT = _prompts["patrol_prompt"]
 TALK_PROMPT   = _prompts["talk_prompt"]
 VERIFY_PROMPT = _prompts["verify_prompt"]
 
+# Explicit Ollama client — lets us route to a remote host (e.g., workstation)
+# without relying on the OLLAMA_HOST env var being set in the launch shell.
+_client = ollama.Client(host=OLLAMA_HOST)
+
+# Safe-mode replies used when subsystems.vlm == false
+_VLM_OFF_TALK  = "Scene understanding is disabled — Sanad is in safe mode."
+_VLM_OFF_EMPTY = {"actions": [], "arm": None, "speak": _VLM_OFF_TALK, "abort": None}
+
 # Conversation state
 _conversation_history = []
 _facts = []
@@ -48,6 +76,8 @@ def add_to_history(user_msg: str, assistant_msg: str):
 
 
 def call_llava(prompt: str, img_b64, num_predict: int = 200, use_history: bool = False) -> str:
+    if not VLM_ENABLED:
+        return ""    # safe-mode — caller must handle empty string
     messages = []
     if use_history and _conversation_history:
         messages.extend(_conversation_history)
@@ -55,13 +85,13 @@ def call_llava(prompt: str, img_b64, num_predict: int = 200, use_history: bool =
     if img_b64:
         msg["images"] = [img_b64]
     messages.append(msg)
-    r = ollama.chat(model=OLLAMA_MODEL, messages=messages,
-                    options={
-                        "temperature": 0.0,
-                        "num_predict": num_predict,
-                        "num_batch":   NUM_BATCH,
-                        "num_ctx":     NUM_CTX,
-                    })
+    r = _client.chat(model=OLLAMA_MODEL, messages=messages,
+                     options={
+                         "temperature": 0.0,
+                         "num_predict": num_predict,
+                         "num_batch":   NUM_BATCH,
+                         "num_ctx":     NUM_CTX,
+                     })
     return r["message"]["content"].strip()
 
 
@@ -79,7 +109,9 @@ def parse_json(raw: str):
 
 
 def ask(command: str, img_b64) -> dict:
-    """Send command + camera frame to LLaVA with conversation history."""
+    """Send command + camera frame to the VLM with conversation history."""
+    if not VLM_ENABLED:
+        return dict(_VLM_OFF_EMPTY)
     try:
         facts_str = ("\nKnown facts: " + "; ".join(_facts) + ".") if _facts else ""
         raw = call_llava(MAIN_PROMPT.format(command=command, facts=facts_str), img_b64,
@@ -92,16 +124,19 @@ def ask(command: str, img_b64) -> dict:
             return {"actions": [], "arm": None, "speak": raw, "abort": None}
         return d
     except Exception as ex:
-        print(f"  LLaVA error: {ex}")
-        return {"actions": [], "arm": None, "speak": "Error.", "abort": None}
+        print(f"  VLM error: {ex}")
+        return {"actions": [], "arm": None, "speak": "VLM error.", "abort": None}
 
 
 def ask_goal(goal: str, img_b64) -> dict:
-    """Ask LLaVA if goal is reached."""
+    """Ask the VLM if the goal is reached."""
+    if not VLM_ENABLED:
+        return {"reached": False, "next_move": "left", "duration": 0.5,
+                "speak": "VLM disabled — relying on YOLO fast-match only."}
     try:
         raw = call_llava(GOAL_PROMPT.format(goal=goal), img_b64,
                          num_predict=_cfg["num_predict_goal"])
-        print(f"  LLaVA: {raw}")
+        print(f"  VLM: {raw}")
         d = parse_json(raw)
         if d is None:
             text = raw.lower()
@@ -119,6 +154,8 @@ def ask_goal(goal: str, img_b64) -> dict:
 
 def ask_talk(command: str, img_b64, facts: str = "") -> dict:
     """Handle talk-only commands using the YAML talk_prompt."""
+    if not VLM_ENABLED:
+        return dict(_VLM_OFF_EMPTY)
     try:
         prompt = TALK_PROMPT.format(command=command, facts=facts)
         raw = call_llava(prompt, img_b64, num_predict=_cfg["num_predict_talk"],
@@ -137,6 +174,9 @@ def ask_talk(command: str, img_b64, facts: str = "") -> dict:
 
 def ask_verify(target: str, condition: str, img_b64) -> str:
     """Verify a condition on a detected target. Returns 'yes' or 'no'."""
+    if not VLM_ENABLED:
+        # Without VLM we can't verify compound conditions; trust the YOLO match.
+        return "yes"
     try:
         prompt = VERIFY_PROMPT.format(target=target, condition=condition)
         raw = call_llava(prompt, img_b64, num_predict=_cfg["num_predict_verify"])
@@ -148,7 +188,10 @@ def ask_verify(target: str, condition: str, img_b64) -> str:
 
 
 def ask_patrol(img_b64) -> dict:
-    """Ask LLaVA to assess scene during patrol."""
+    """Ask the VLM to assess the scene during patrol."""
+    if not VLM_ENABLED:
+        return {"observation": "VLM off — patrolling without scene analysis.",
+                "alert": None, "next_move": "forward", "duration": 1.0}
     try:
         raw = call_llava(PATROL_PROMPT, img_b64, num_predict=_cfg["num_predict_patrol"])
         d = parse_json(raw)
diff --git a/Brain/marcus_brain.py b/Brain/marcus_brain.py
index 9c47731..bb02a24 100644
--- a/Brain/marcus_brain.py
+++ b/Brain/marcus_brain.py
@@ -147,11 +147,19 @@ def init_brain():
 
     _log("Brain initialized", "info", "brain")
 
-    # Warmup runs in a daemon thread so the dashboard + Command: prompt
-    # appear immediately. The first real user command will either hit a
-    # warm model (fast) or pay the cold-load itself (same as before).
-    import threading as _t
-    _t.Thread(target=_warmup_llava, daemon=True, name="llava-warmup").start()
+    # Skip warmup when VLM is off — there's no model to warm, and the
+    # dashboard should mention that Marcus is in safe mode.
+    from API.llava_api import VLM_ENABLED, OLLAMA_HOST
+    if not VLM_ENABLED:
+        print("  [VLM] disabled by config — safe mode (no Ollama load)")
+    else:
+        host_short = OLLAMA_HOST.replace("http://", "")
+        print(f"  [VLM] target: {host_short}  ({OLLAMA_MODEL})")
+        # Warmup runs in a daemon thread so the dashboard + Command: prompt
+        # appear immediately. The first real user command will either hit a
+        # warm model (fast) or pay the cold-load itself (same as before).
+        import threading as _t
+        _t.Thread(target=_warmup_llava, daemon=True, name="llava-warmup").start()
 
 
 # Global voice references
@@ -540,7 +548,9 @@ def run_terminal():
     print("\n\n" + "╔" + "═" * (W-2) + "╗")
     print("║" + _pad("   SANAD — AI BRAIN READY", W-2) + "║")
     print("╠" + "═" * (W-2) + "╣")
+    from API.llava_api import VLM_ENABLED
     left  = [("model",    status["model"]),
+             ("vlm",      _fmt(VLM_ENABLED)),
              ("voice",    _fmt(status["voice"])),
              ("camera",   status["camera"])]
     right = [("yolo",     _fmt(status["yolo"])),
diff --git a/Config/config_Brain.json b/Config/config_Brain.json
index de9ed36..cf85c53 100644
--- a/Config/config_Brain.json
+++ b/Config/config_Brain.json
@@ -1,12 +1,14 @@
 {
   "ollama_model": "qwen2.5vl:3b",
+  "ollama_host":  "http://127.0.0.1:11434",
   "max_history": 6,
   "num_batch": 128,
   "num_ctx": 2048,
   "subsystems": {
-    "lidar": true,
-    "voice": true,
-    "imgsearch": false,
+    "vlm":        true,
+    "lidar":      true,
+    "voice":      true,
+    "imgsearch":  false,
     "autonomous": true
   },
   "num_predict_main": 120,
diff --git a/Config/config_Voice.json b/Config/config_Voice.json
index 99e6ac3..91635fb 100644
--- a/Config/config_Voice.json
+++ b/Config/config_Voice.json
@@ -1,7 +1,7 @@
 {
   "tts": {
     "backend": "builtin_ttsmaker",
-    "builtin_speaker_id": 0,
+    "builtin_speaker_id": 2,
     "target_sample_rate": 16000
   },
   "stt": {
diff --git a/Data/Brain/Sessions/session_029_2026-04-22/alerts.json b/Data/Brain/Sessions/session_029_2026-04-22/alerts.json
new file mode 100644
index 0000000..0637a08
--- /dev/null
+++ b/Data/Brain/Sessions/session_029_2026-04-22/alerts.json
@@ -0,0 +1 @@
+[]
\ No newline at end of file
diff --git a/Data/Brain/Sessions/session_029_2026-04-22/commands.json b/Data/Brain/Sessions/session_029_2026-04-22/commands.json
new file mode 100644
index 0000000..5185d44
--- /dev/null
+++ b/Data/Brain/Sessions/session_029_2026-04-22/commands.json
@@ -0,0 +1,8 @@
+[
+  {
+    "time": "10:54:15",
+    "cmd": "hi",
+    "response": "Hello! I am Sanad. How can I help you?",
+    "duration_s": 0.0
+  }
+]
\ No newline at end of file
diff --git a/Data/Brain/Sessions/session_029_2026-04-22/detections.json b/Data/Brain/Sessions/session_029_2026-04-22/detections.json
new file mode 100644
index 0000000..0637a08
--- /dev/null
+++ b/Data/Brain/Sessions/session_029_2026-04-22/detections.json
@@ -0,0 +1 @@
+[]
\ No newline at end of file
diff --git a/Data/Brain/Sessions/session_029_2026-04-22/places.json b/Data/Brain/Sessions/session_029_2026-04-22/places.json
new file mode 100644
index 0000000..9e26dfe
--- /dev/null
+++ b/Data/Brain/Sessions/session_029_2026-04-22/places.json
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/Data/Brain/Sessions/session_030_2026-04-22/alerts.json b/Data/Brain/Sessions/session_030_2026-04-22/alerts.json
new file mode 100644
index 0000000..0637a08
--- /dev/null
+++ b/Data/Brain/Sessions/session_030_2026-04-22/alerts.json
@@ -0,0 +1 @@
+[]
\ No newline at end of file
diff --git a/Data/Brain/Sessions/session_030_2026-04-22/commands.json b/Data/Brain/Sessions/session_030_2026-04-22/commands.json
new file mode 100644
index 0000000..0637a08
--- /dev/null
+++ b/Data/Brain/Sessions/session_030_2026-04-22/commands.json
@@ -0,0 +1 @@
+[]
\ No newline at end of file
diff --git a/Data/Brain/Sessions/session_030_2026-04-22/detections.json b/Data/Brain/Sessions/session_030_2026-04-22/detections.json
new file mode 100644
index 0000000..0637a08
--- /dev/null
+++ b/Data/Brain/Sessions/session_030_2026-04-22/detections.json
@@ -0,0 +1 @@
+[]
\ No newline at end of file
diff --git a/Data/Brain/Sessions/session_030_2026-04-22/places.json b/Data/Brain/Sessions/session_030_2026-04-22/places.json
new file mode 100644
index 0000000..9e26dfe
--- /dev/null
+++ b/Data/Brain/Sessions/session_030_2026-04-22/places.json
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/Data/Brain/Sessions/session_030_2026-04-22/summary.txt b/Data/Brain/Sessions/session_030_2026-04-22/summary.txt
new file mode 100644
index 0000000..8b9af93
--- /dev/null
+++ b/Data/Brain/Sessions/session_030_2026-04-22/summary.txt
@@ -0,0 +1,9 @@
+Session: session_030_2026-04-22
+Date: 2026-04-22 10:56
+Duration: 0m 37s
+Commands: 0
+YOLO detections: 0
+Alerts: 0
+Known places: none
+
+First commands:
\ No newline at end of file
diff --git a/Vision/marcus_imgsearch.py b/Vision/marcus_imgsearch.py
index 15aea51..5530f63 100644
--- a/Vision/marcus_imgsearch.py
+++ b/Vision/marcus_imgsearch.py
@@ -349,12 +349,14 @@ class ImageSearch:
 
                 try:
                     if has_ref:
-                        # Pass BOTH images: [reference, current_frame]
-                        # num_batch/num_ctx mirror llava_api.py — without these
-                        # caps the compute graph OOMs the runner on Jetson.
-                        import ollama as _ollama
-                        from API.llava_api import NUM_BATCH, NUM_CTX
-                        r = _ollama.chat(
+                        # Pass BOTH images: [reference, current_frame]. Route through
+                        # the shared Ollama client (so VLM-off and remote-host config
+                        # are honored) and mirror the compute-graph caps.
+                        from API.llava_api import NUM_BATCH, NUM_CTX, VLM_ENABLED, _client as _llava_client
+                        if not VLM_ENABLED:
+                            print(f"  [{step}/{max_steps}] VLM disabled — skipping image-match")
+                            continue
+                        r = _llava_client.chat(
                             model=self._model,
                             messages=[{
                                 "role": "user",