Update 2026-04-22 12:17:30

2026-04-22 12:17:32 +04:00 · 2026-04-22 12:17:32 +04:00 · d257808e48
commit d257808e48
parent dcf5f9f39b
14 changed files with 111 additions and 30 deletions
--- a/API/llava_api.py
+++ b/API/llava_api.py
@ -1,5 +1,24 @@
 """
-llava_api.py — LLaVA / Qwen VL query interface
+llava_api.py — Qwen-VL query interface (via Ollama)
 Three deployment modes, chosen via config_Brain.json:
  1. subsystems.vlm = false
     → every ask*() returns a safe fallback dict. Marcus runs in
       regex-only "safe mode": no LLM load on the Jetson, no GPU/CPU
       contention with Holosoma, robot won't fall from thrashing.
       Vision questions just answer "Scene understanding is disabled
       — running in safe mode." Everything else (movement, places,
       patrol, autonomous) still works.
  2. ollama_host = "http://127.0.0.1:11434" + subsystems.vlm = true
     → Ollama runs on the Jetson. Old behavior — competes with
       Holosoma for memory. Unsafe during walking with a 3B VL model.
  3. ollama_host = "http://192.168.123.222:11434" + subsystems.vlm = true
     → Ollama runs on the workstation. Jetson stays light, Holosoma
       keeps its 50 Hz real-time deadline, and the brain still gets
       full Qwen-VL. Best mode for demos / walking with conversation.
 """
 import json
 import ollama
@ -9,12 +28,13 @@ from Core.config_loader import load_config
 _cfg = load_config("Brain")
 # Load prompts from YAML (the authoritative source — bilingual, complete)
 _yaml_path = Path(__file__).resolve().parent.parent / "Config" / "marcus_prompts.yaml"
 with open(_yaml_path, encoding="utf-8") as _f:
    _prompts = yaml.safe_load(_f)
 OLLAMA_MODEL  = _cfg["ollama_model"]
 OLLAMA_HOST   = _cfg.get("ollama_host", "http://127.0.0.1:11434")
 VLM_ENABLED   = bool(_cfg.get("subsystems", {}).get("vlm", True))
 MAX_HISTORY   = _cfg["max_history"]
 # Cap batch and context on every request. Without this, llama.cpp on Jetson
 # Orin NX allocates a ~7.5 GiB compute graph (defaults: batch 512, ctx 4096)
@ -28,6 +48,14 @@ PATROL_PROMPT = _prompts["patrol_prompt"]
 TALK_PROMPT   = _prompts["talk_prompt"]
 VERIFY_PROMPT = _prompts["verify_prompt"]
 # Explicit Ollama client — lets us route to a remote host (e.g., workstation)
 # without relying on the OLLAMA_HOST env var being set in the launch shell.
 _client = ollama.Client(host=OLLAMA_HOST)
 # Safe-mode replies used when subsystems.vlm == false
 _VLM_OFF_TALK  = "Scene understanding is disabled — Sanad is in safe mode."
 _VLM_OFF_EMPTY = {"actions": [], "arm": None, "speak": _VLM_OFF_TALK, "abort": None}
 # Conversation state
 _conversation_history = []
 _facts = []
@ -48,6 +76,8 @@ def add_to_history(user_msg: str, assistant_msg: str):
 def call_llava(prompt: str, img_b64, num_predict: int = 200, use_history: bool = False) -> str:
    if not VLM_ENABLED:
        return ""    # safe-mode — caller must handle empty string
    messages = []
    if use_history and _conversation_history:
        messages.extend(_conversation_history)
@ -55,7 +85,7 @@ def call_llava(prompt: str, img_b64, num_predict: int = 200, use_history: bool =
    if img_b64:
        msg["images"] = [img_b64]
    messages.append(msg)
-    r = ollama.chat(model=OLLAMA_MODEL, messages=messages,
+    r = _client.chat(model=OLLAMA_MODEL, messages=messages,
                     options={
                         "temperature": 0.0,
                         "num_predict": num_predict,
@ -79,7 +109,9 @@ def parse_json(raw: str):
 def ask(command: str, img_b64) -> dict:
-    """Send command + camera frame to LLaVA with conversation history."""
+    """Send command + camera frame to the VLM with conversation history."""
    if not VLM_ENABLED:
        return dict(_VLM_OFF_EMPTY)
    try:
        facts_str = ("\nKnown facts: " + "; ".join(_facts) + ".") if _facts else ""
        raw = call_llava(MAIN_PROMPT.format(command=command, facts=facts_str), img_b64,
@ -92,16 +124,19 @@ def ask(command: str, img_b64) -> dict:
            return {"actions": [], "arm": None, "speak": raw, "abort": None}
        return d
    except Exception as ex:
-        print(f"  LLaVA error: {ex}")
+        print(f"  VLM error: {ex}")
-        return {"actions": [], "arm": None, "speak": "Error.", "abort": None}
+        return {"actions": [], "arm": None, "speak": "VLM error.", "abort": None}
 def ask_goal(goal: str, img_b64) -> dict:
-    """Ask LLaVA if goal is reached."""
+    """Ask the VLM if the goal is reached."""
    if not VLM_ENABLED:
        return {"reached": False, "next_move": "left", "duration": 0.5,
                "speak": "VLM disabled — relying on YOLO fast-match only."}
    try:
        raw = call_llava(GOAL_PROMPT.format(goal=goal), img_b64,
                         num_predict=_cfg["num_predict_goal"])
-        print(f"  LLaVA: {raw}")
+        print(f"  VLM: {raw}")
        d = parse_json(raw)
        if d is None:
            text = raw.lower()
@ -119,6 +154,8 @@ def ask_goal(goal: str, img_b64) -> dict:
 def ask_talk(command: str, img_b64, facts: str = "") -> dict:
    """Handle talk-only commands using the YAML talk_prompt."""
    if not VLM_ENABLED:
        return dict(_VLM_OFF_EMPTY)
    try:
        prompt = TALK_PROMPT.format(command=command, facts=facts)
        raw = call_llava(prompt, img_b64, num_predict=_cfg["num_predict_talk"],
@ -137,6 +174,9 @@ def ask_talk(command: str, img_b64, facts: str = "") -> dict:
 def ask_verify(target: str, condition: str, img_b64) -> str:
    """Verify a condition on a detected target. Returns 'yes' or 'no'."""
    if not VLM_ENABLED:
        # Without VLM we can't verify compound conditions; trust the YOLO match.
        return "yes"
    try:
        prompt = VERIFY_PROMPT.format(target=target, condition=condition)
        raw = call_llava(prompt, img_b64, num_predict=_cfg["num_predict_verify"])
@ -148,7 +188,10 @@ def ask_verify(target: str, condition: str, img_b64) -> str:
 def ask_patrol(img_b64) -> dict:
-    """Ask LLaVA to assess scene during patrol."""
+    """Ask the VLM to assess the scene during patrol."""
    if not VLM_ENABLED:
        return {"observation": "VLM off — patrolling without scene analysis.",
                "alert": None, "next_move": "forward", "duration": 1.0}
    try:
        raw = call_llava(PATROL_PROMPT, img_b64, num_predict=_cfg["num_predict_patrol"])
        d = parse_json(raw)
--- a/Brain/marcus_brain.py
+++ b/Brain/marcus_brain.py
@ -147,6 +147,14 @@ def init_brain():
    _log("Brain initialized", "info", "brain")
    # Skip warmup when VLM is off — there's no model to warm, and the
    # dashboard should mention that Marcus is in safe mode.
    from API.llava_api import VLM_ENABLED, OLLAMA_HOST
    if not VLM_ENABLED:
        print("  [VLM] disabled by config — safe mode (no Ollama load)")
    else:
        host_short = OLLAMA_HOST.replace("http://", "")
        print(f"  [VLM] target: {host_short}  ({OLLAMA_MODEL})")
        # Warmup runs in a daemon thread so the dashboard + Command: prompt
        # appear immediately. The first real user command will either hit a
        # warm model (fast) or pay the cold-load itself (same as before).
@ -540,7 +548,9 @@ def run_terminal():
    print("\n\n" + "╔" + "═" * (W-2) + "╗")
    print("║" + _pad("   SANAD — AI BRAIN READY", W-2) + "║")
    print("╠" + "═" * (W-2) + "╣")
    from API.llava_api import VLM_ENABLED
    left  = [("model",    status["model"]),
             ("vlm",      _fmt(VLM_ENABLED)),
             ("voice",    _fmt(status["voice"])),
             ("camera",   status["camera"])]
    right = [("yolo",     _fmt(status["yolo"])),
--- a/Config/config_Brain.json
+++ b/Config/config_Brain.json
@ -1,9 +1,11 @@
 {
  "ollama_model": "qwen2.5vl:3b",
  "ollama_host":  "http://127.0.0.1:11434",
  "max_history": 6,
  "num_batch": 128,
  "num_ctx": 2048,
  "subsystems": {
    "vlm":        true,
    "lidar":      true,
    "voice":      true,
    "imgsearch":  false,
--- a/Config/config_Voice.json
+++ b/Config/config_Voice.json
@ -1,7 +1,7 @@
 {
  "tts": {
    "backend": "builtin_ttsmaker",
-    "builtin_speaker_id": 0,
+    "builtin_speaker_id": 2,
    "target_sample_rate": 16000
  },
  "stt": {
--- a/Data/Brain/Sessions/session_029_2026-04-22/alerts.json
+++ b/Data/Brain/Sessions/session_029_2026-04-22/alerts.json
@ -0,0 +1 @@
 []
--- a/Data/Brain/Sessions/session_029_2026-04-22/commands.json
+++ b/Data/Brain/Sessions/session_029_2026-04-22/commands.json
@ -0,0 +1,8 @@
 [
  {
    "time": "10:54:15",
    "cmd": "hi",
    "response": "Hello! I am Sanad. How can I help you?",
    "duration_s": 0.0
  }
 ]
--- a/Data/Brain/Sessions/session_029_2026-04-22/detections.json
+++ b/Data/Brain/Sessions/session_029_2026-04-22/detections.json
@ -0,0 +1 @@
 []
--- a/Data/Brain/Sessions/session_029_2026-04-22/places.json
+++ b/Data/Brain/Sessions/session_029_2026-04-22/places.json
@ -0,0 +1 @@
 {}
--- a/Data/Brain/Sessions/session_030_2026-04-22/alerts.json
+++ b/Data/Brain/Sessions/session_030_2026-04-22/alerts.json
@ -0,0 +1 @@
 []
--- a/Data/Brain/Sessions/session_030_2026-04-22/commands.json
+++ b/Data/Brain/Sessions/session_030_2026-04-22/commands.json
@ -0,0 +1 @@
 []
--- a/Data/Brain/Sessions/session_030_2026-04-22/detections.json
+++ b/Data/Brain/Sessions/session_030_2026-04-22/detections.json
@ -0,0 +1 @@
 []
--- a/Data/Brain/Sessions/session_030_2026-04-22/places.json
+++ b/Data/Brain/Sessions/session_030_2026-04-22/places.json
@ -0,0 +1 @@
 {}
--- a/Data/Brain/Sessions/session_030_2026-04-22/summary.txt
+++ b/Data/Brain/Sessions/session_030_2026-04-22/summary.txt
@ -0,0 +1,9 @@
 Session: session_030_2026-04-22
 Date: 2026-04-22 10:56
 Duration: 0m 37s
 Commands: 0
 YOLO detections: 0
 Alerts: 0
 Known places: none
 First commands:
--- a/Vision/marcus_imgsearch.py
+++ b/Vision/marcus_imgsearch.py
@ -349,12 +349,14 @@ class ImageSearch:
                try:
                    if has_ref:
-                        # Pass BOTH images: [reference, current_frame]
+                        # Pass BOTH images: [reference, current_frame]. Route through
-                        # num_batch/num_ctx mirror llava_api.py — without these
+                        # the shared Ollama client (so VLM-off and remote-host config
-                        # caps the compute graph OOMs the runner on Jetson.
+                        # are honored) and mirror the compute-graph caps.
-                        import ollama as _ollama
+                        from API.llava_api import NUM_BATCH, NUM_CTX, VLM_ENABLED, _client as _llava_client
-                        from API.llava_api import NUM_BATCH, NUM_CTX
+                        if not VLM_ENABLED:
-                        r = _ollama.chat(
+                            print(f"  [{step}/{max_steps}] VLM disabled — skipping image-match")
                            continue
                        r = _llava_client.chat(
                            model=self._model,
                            messages=[{
                                "role": "user",