Update 2026-04-22 12:17:30

2026-04-22 12:17:32 +04:00 · 2026-04-22 12:17:32 +04:00 · d257808e48
commit d257808e48
parent dcf5f9f39b
14 changed files with 111 additions and 30 deletions
--- a/API/llava_api.py
+++ b/API/llava_api.py
@ -1,5 +1,24 @@
 """
-llava_api.py — LLaVA / Qwen VL query interface
+llava_api.py — Qwen-VL query interface (via Ollama)
+
+Three deployment modes, chosen via config_Brain.json:
+
+  1. subsystems.vlm = false
+     → every ask*() returns a safe fallback dict. Marcus runs in
+       regex-only "safe mode": no LLM load on the Jetson, no GPU/CPU
+       contention with Holosoma, robot won't fall from thrashing.
+       Vision questions just answer "Scene understanding is disabled
+       — running in safe mode." Everything else (movement, places,
+       patrol, autonomous) still works.
+
+  2. ollama_host = "http://127.0.0.1:11434" + subsystems.vlm = true
+     → Ollama runs on the Jetson. Old behavior — competes with
+       Holosoma for memory. Unsafe during walking with a 3B VL model.
+
+  3. ollama_host = "http://192.168.123.222:11434" + subsystems.vlm = true
+     → Ollama runs on the workstation. Jetson stays light, Holosoma
+       keeps its 50 Hz real-time deadline, and the brain still gets
+       full Qwen-VL. Best mode for demos / walking with conversation.
 """
 import json
 import ollama
@ -9,12 +28,13 @@ from Core.config_loader import load_config

 _cfg = load_config("Brain")

-# Load prompts from YAML (the authoritative source — bilingual, complete)
 _yaml_path = Path(__file__).resolve().parent.parent / "Config" / "marcus_prompts.yaml"
 with open(_yaml_path, encoding="utf-8") as _f:
    _prompts = yaml.safe_load(_f)

 OLLAMA_MODEL  = _cfg["ollama_model"]
+OLLAMA_HOST   = _cfg.get("ollama_host", "http://127.0.0.1:11434")
+VLM_ENABLED   = bool(_cfg.get("subsystems", {}).get("vlm", True))
 MAX_HISTORY   = _cfg["max_history"]
 # Cap batch and context on every request. Without this, llama.cpp on Jetson
 # Orin NX allocates a ~7.5 GiB compute graph (defaults: batch 512, ctx 4096)
@ -28,6 +48,14 @@ PATROL_PROMPT = _prompts["patrol_prompt"]
 TALK_PROMPT   = _prompts["talk_prompt"]
 VERIFY_PROMPT = _prompts["verify_prompt"]

+# Explicit Ollama client — lets us route to a remote host (e.g., workstation)
+# without relying on the OLLAMA_HOST env var being set in the launch shell.
+_client = ollama.Client(host=OLLAMA_HOST)
+
+# Safe-mode replies used when subsystems.vlm == false
+_VLM_OFF_TALK  = "Scene understanding is disabled — Sanad is in safe mode."
+_VLM_OFF_EMPTY = {"actions": [], "arm": None, "speak": _VLM_OFF_TALK, "abort": None}
+
 # Conversation state
 _conversation_history = []
 _facts = []
@ -48,6 +76,8 @@ def add_to_history(user_msg: str, assistant_msg: str):


 def call_llava(prompt: str, img_b64, num_predict: int = 200, use_history: bool = False) -> str:
+    if not VLM_ENABLED:
+        return ""    # safe-mode — caller must handle empty string
    messages = []
    if use_history and _conversation_history:
        messages.extend(_conversation_history)
@ -55,13 +85,13 @@ def call_llava(prompt: str, img_b64, num_predict: int = 200, use_history: bool =
    if img_b64:
        msg["images"] = [img_b64]
    messages.append(msg)
-    r = ollama.chat(model=OLLAMA_MODEL, messages=messages,
-                    options={
-                        "temperature": 0.0,
-                        "num_predict": num_predict,
-                        "num_batch":   NUM_BATCH,
-                        "num_ctx":     NUM_CTX,
-                    })
+    r = _client.chat(model=OLLAMA_MODEL, messages=messages,
+                     options={
+                         "temperature": 0.0,
+                         "num_predict": num_predict,
+                         "num_batch":   NUM_BATCH,
+                         "num_ctx":     NUM_CTX,
+                     })
    return r["message"]["content"].strip()


@ -79,7 +109,9 @@ def parse_json(raw: str):


 def ask(command: str, img_b64) -> dict:
-    """Send command + camera frame to LLaVA with conversation history."""
+    """Send command + camera frame to the VLM with conversation history."""
+    if not VLM_ENABLED:
+        return dict(_VLM_OFF_EMPTY)
    try:
        facts_str = ("\nKnown facts: " + "; ".join(_facts) + ".") if _facts else ""
        raw = call_llava(MAIN_PROMPT.format(command=command, facts=facts_str), img_b64,
@ -92,16 +124,19 @@ def ask(command: str, img_b64) -> dict:
            return {"actions": [], "arm": None, "speak": raw, "abort": None}
        return d
    except Exception as ex:
-        print(f"  LLaVA error: {ex}")
-        return {"actions": [], "arm": None, "speak": "Error.", "abort": None}
+        print(f"  VLM error: {ex}")
+        return {"actions": [], "arm": None, "speak": "VLM error.", "abort": None}


 def ask_goal(goal: str, img_b64) -> dict:
-    """Ask LLaVA if goal is reached."""
+    """Ask the VLM if the goal is reached."""
+    if not VLM_ENABLED:
+        return {"reached": False, "next_move": "left", "duration": 0.5,
+                "speak": "VLM disabled — relying on YOLO fast-match only."}
    try:
        raw = call_llava(GOAL_PROMPT.format(goal=goal), img_b64,
                         num_predict=_cfg["num_predict_goal"])
-        print(f"  LLaVA: {raw}")
+        print(f"  VLM: {raw}")
        d = parse_json(raw)
        if d is None:
            text = raw.lower()
@ -119,6 +154,8 @@ def ask_goal(goal: str, img_b64) -> dict:

 def ask_talk(command: str, img_b64, facts: str = "") -> dict:
    """Handle talk-only commands using the YAML talk_prompt."""
+    if not VLM_ENABLED:
+        return dict(_VLM_OFF_EMPTY)
    try:
        prompt = TALK_PROMPT.format(command=command, facts=facts)
        raw = call_llava(prompt, img_b64, num_predict=_cfg["num_predict_talk"],
@ -137,6 +174,9 @@ def ask_talk(command: str, img_b64, facts: str = "") -> dict:

 def ask_verify(target: str, condition: str, img_b64) -> str:
    """Verify a condition on a detected target. Returns 'yes' or 'no'."""
+    if not VLM_ENABLED:
+        # Without VLM we can't verify compound conditions; trust the YOLO match.
+        return "yes"
    try:
        prompt = VERIFY_PROMPT.format(target=target, condition=condition)
        raw = call_llava(prompt, img_b64, num_predict=_cfg["num_predict_verify"])
@ -148,7 +188,10 @@ def ask_verify(target: str, condition: str, img_b64) -> str:


 def ask_patrol(img_b64) -> dict:
-    """Ask LLaVA to assess scene during patrol."""
+    """Ask the VLM to assess the scene during patrol."""
+    if not VLM_ENABLED:
+        return {"observation": "VLM off — patrolling without scene analysis.",
+                "alert": None, "next_move": "forward", "duration": 1.0}
    try:
        raw = call_llava(PATROL_PROMPT, img_b64, num_predict=_cfg["num_predict_patrol"])
        d = parse_json(raw)
--- a/Brain/marcus_brain.py
+++ b/Brain/marcus_brain.py
@ -147,11 +147,19 @@ def init_brain():

    _log("Brain initialized", "info", "brain")

-    # Warmup runs in a daemon thread so the dashboard + Command: prompt
-    # appear immediately. The first real user command will either hit a
-    # warm model (fast) or pay the cold-load itself (same as before).
-    import threading as _t
-    _t.Thread(target=_warmup_llava, daemon=True, name="llava-warmup").start()
+    # Skip warmup when VLM is off — there's no model to warm, and the
+    # dashboard should mention that Marcus is in safe mode.
+    from API.llava_api import VLM_ENABLED, OLLAMA_HOST
+    if not VLM_ENABLED:
+        print("  [VLM] disabled by config — safe mode (no Ollama load)")
+    else:
+        host_short = OLLAMA_HOST.replace("http://", "")
+        print(f"  [VLM] target: {host_short}  ({OLLAMA_MODEL})")
+        # Warmup runs in a daemon thread so the dashboard + Command: prompt
+        # appear immediately. The first real user command will either hit a
+        # warm model (fast) or pay the cold-load itself (same as before).
+        import threading as _t
+        _t.Thread(target=_warmup_llava, daemon=True, name="llava-warmup").start()


 # Global voice references
@ -540,7 +548,9 @@ def run_terminal():
    print("\n\n" + "╔" + "═" * (W-2) + "╗")
    print("║" + _pad("   SANAD — AI BRAIN READY", W-2) + "║")
    print("╠" + "═" * (W-2) + "╣")
+    from API.llava_api import VLM_ENABLED
    left  = [("model",    status["model"]),
+             ("vlm",      _fmt(VLM_ENABLED)),
             ("voice",    _fmt(status["voice"])),
             ("camera",   status["camera"])]
    right = [("yolo",     _fmt(status["yolo"])),
--- a/Config/config_Brain.json
+++ b/Config/config_Brain.json
@ -1,12 +1,14 @@
 {
  "ollama_model": "qwen2.5vl:3b",
+  "ollama_host":  "http://127.0.0.1:11434",
  "max_history": 6,
  "num_batch": 128,
  "num_ctx": 2048,
  "subsystems": {
-    "lidar": true,
-    "voice": true,
-    "imgsearch": false,
+    "vlm":        true,
+    "lidar":      true,
+    "voice":      true,
+    "imgsearch":  false,
    "autonomous": true
  },
  "num_predict_main": 120,
--- a/Config/config_Voice.json
+++ b/Config/config_Voice.json
@ -1,7 +1,7 @@
 {
  "tts": {
    "backend": "builtin_ttsmaker",
-    "builtin_speaker_id": 0,
+    "builtin_speaker_id": 2,
    "target_sample_rate": 16000
  },
  "stt": {
--- a/Data/Brain/Sessions/session_029_2026-04-22/alerts.json
+++ b/Data/Brain/Sessions/session_029_2026-04-22/alerts.json
@ -0,0 +1 @@
+[]
--- a/Data/Brain/Sessions/session_029_2026-04-22/commands.json
+++ b/Data/Brain/Sessions/session_029_2026-04-22/commands.json
@ -0,0 +1,8 @@
+[
+  {
+    "time": "10:54:15",
+    "cmd": "hi",
+    "response": "Hello! I am Sanad. How can I help you?",
+    "duration_s": 0.0
+  }
+]
--- a/Data/Brain/Sessions/session_029_2026-04-22/detections.json
+++ b/Data/Brain/Sessions/session_029_2026-04-22/detections.json
@ -0,0 +1 @@
+[]
--- a/Data/Brain/Sessions/session_029_2026-04-22/places.json
+++ b/Data/Brain/Sessions/session_029_2026-04-22/places.json
@ -0,0 +1 @@
+{}
--- a/Data/Brain/Sessions/session_030_2026-04-22/alerts.json
+++ b/Data/Brain/Sessions/session_030_2026-04-22/alerts.json
@ -0,0 +1 @@
+[]
--- a/Data/Brain/Sessions/session_030_2026-04-22/commands.json
+++ b/Data/Brain/Sessions/session_030_2026-04-22/commands.json
@ -0,0 +1 @@
+[]
--- a/Data/Brain/Sessions/session_030_2026-04-22/detections.json
+++ b/Data/Brain/Sessions/session_030_2026-04-22/detections.json
@ -0,0 +1 @@
+[]
--- a/Data/Brain/Sessions/session_030_2026-04-22/places.json
+++ b/Data/Brain/Sessions/session_030_2026-04-22/places.json
@ -0,0 +1 @@
+{}
--- a/Data/Brain/Sessions/session_030_2026-04-22/summary.txt
+++ b/Data/Brain/Sessions/session_030_2026-04-22/summary.txt
@ -0,0 +1,9 @@
+Session: session_030_2026-04-22
+Date: 2026-04-22 10:56
+Duration: 0m 37s
+Commands: 0
+YOLO detections: 0
+Alerts: 0
+Known places: none
+
+First commands:
--- a/Vision/marcus_imgsearch.py
+++ b/Vision/marcus_imgsearch.py
@ -349,12 +349,14 @@ class ImageSearch:

                try:
                    if has_ref:
-                        # Pass BOTH images: [reference, current_frame]
-                        # num_batch/num_ctx mirror llava_api.py — without these
-                        # caps the compute graph OOMs the runner on Jetson.
-                        import ollama as _ollama
-                        from API.llava_api import NUM_BATCH, NUM_CTX
-                        r = _ollama.chat(
+                        # Pass BOTH images: [reference, current_frame]. Route through
+                        # the shared Ollama client (so VLM-off and remote-host config
+                        # are honored) and mirror the compute-graph caps.
+                        from API.llava_api import NUM_BATCH, NUM_CTX, VLM_ENABLED, _client as _llava_client
+                        if not VLM_ENABLED:
+                            print(f"  [{step}/{max_steps}] VLM disabled — skipping image-match")
+                            continue
+                        r = _llava_client.chat(
                            model=self._model,
                            messages=[{
                                "role": "user",