Update 2026-04-22 12:17:30

This commit is contained in:
kassam 2026-04-22 12:17:32 +04:00
parent dcf5f9f39b
commit d257808e48
14 changed files with 111 additions and 30 deletions

View File

@ -1,5 +1,24 @@
""" """
llava_api.py LLaVA / Qwen VL query interface llava_api.py Qwen-VL query interface (via Ollama)
Three deployment modes, chosen via config_Brain.json:
1. subsystems.vlm = false
every ask*() returns a safe fallback dict. Marcus runs in
regex-only "safe mode": no LLM load on the Jetson, no GPU/CPU
contention with Holosoma, robot won't fall from thrashing.
Vision questions just answer "Scene understanding is disabled
running in safe mode." Everything else (movement, places,
patrol, autonomous) still works.
2. ollama_host = "http://127.0.0.1:11434" + subsystems.vlm = true
Ollama runs on the Jetson. Old behavior competes with
Holosoma for memory. Unsafe during walking with a 3B VL model.
3. ollama_host = "http://192.168.123.222:11434" + subsystems.vlm = true
Ollama runs on the workstation. Jetson stays light, Holosoma
keeps its 50 Hz real-time deadline, and the brain still gets
full Qwen-VL. Best mode for demos / walking with conversation.
""" """
import json import json
import ollama import ollama
@ -9,12 +28,13 @@ from Core.config_loader import load_config
_cfg = load_config("Brain") _cfg = load_config("Brain")
# Load prompts from YAML (the authoritative source — bilingual, complete)
_yaml_path = Path(__file__).resolve().parent.parent / "Config" / "marcus_prompts.yaml" _yaml_path = Path(__file__).resolve().parent.parent / "Config" / "marcus_prompts.yaml"
with open(_yaml_path, encoding="utf-8") as _f: with open(_yaml_path, encoding="utf-8") as _f:
_prompts = yaml.safe_load(_f) _prompts = yaml.safe_load(_f)
OLLAMA_MODEL = _cfg["ollama_model"] OLLAMA_MODEL = _cfg["ollama_model"]
OLLAMA_HOST = _cfg.get("ollama_host", "http://127.0.0.1:11434")
VLM_ENABLED = bool(_cfg.get("subsystems", {}).get("vlm", True))
MAX_HISTORY = _cfg["max_history"] MAX_HISTORY = _cfg["max_history"]
# Cap batch and context on every request. Without this, llama.cpp on Jetson # Cap batch and context on every request. Without this, llama.cpp on Jetson
# Orin NX allocates a ~7.5 GiB compute graph (defaults: batch 512, ctx 4096) # Orin NX allocates a ~7.5 GiB compute graph (defaults: batch 512, ctx 4096)
@ -28,6 +48,14 @@ PATROL_PROMPT = _prompts["patrol_prompt"]
TALK_PROMPT = _prompts["talk_prompt"] TALK_PROMPT = _prompts["talk_prompt"]
VERIFY_PROMPT = _prompts["verify_prompt"] VERIFY_PROMPT = _prompts["verify_prompt"]
# Explicit Ollama client — lets us route to a remote host (e.g., workstation)
# without relying on the OLLAMA_HOST env var being set in the launch shell.
_client = ollama.Client(host=OLLAMA_HOST)
# Safe-mode replies used when subsystems.vlm == false
_VLM_OFF_TALK = "Scene understanding is disabled — Sanad is in safe mode."
_VLM_OFF_EMPTY = {"actions": [], "arm": None, "speak": _VLM_OFF_TALK, "abort": None}
# Conversation state # Conversation state
_conversation_history = [] _conversation_history = []
_facts = [] _facts = []
@ -48,6 +76,8 @@ def add_to_history(user_msg: str, assistant_msg: str):
def call_llava(prompt: str, img_b64, num_predict: int = 200, use_history: bool = False) -> str: def call_llava(prompt: str, img_b64, num_predict: int = 200, use_history: bool = False) -> str:
if not VLM_ENABLED:
return "" # safe-mode — caller must handle empty string
messages = [] messages = []
if use_history and _conversation_history: if use_history and _conversation_history:
messages.extend(_conversation_history) messages.extend(_conversation_history)
@ -55,7 +85,7 @@ def call_llava(prompt: str, img_b64, num_predict: int = 200, use_history: bool =
if img_b64: if img_b64:
msg["images"] = [img_b64] msg["images"] = [img_b64]
messages.append(msg) messages.append(msg)
r = ollama.chat(model=OLLAMA_MODEL, messages=messages, r = _client.chat(model=OLLAMA_MODEL, messages=messages,
options={ options={
"temperature": 0.0, "temperature": 0.0,
"num_predict": num_predict, "num_predict": num_predict,
@ -79,7 +109,9 @@ def parse_json(raw: str):
def ask(command: str, img_b64) -> dict: def ask(command: str, img_b64) -> dict:
"""Send command + camera frame to LLaVA with conversation history.""" """Send command + camera frame to the VLM with conversation history."""
if not VLM_ENABLED:
return dict(_VLM_OFF_EMPTY)
try: try:
facts_str = ("\nKnown facts: " + "; ".join(_facts) + ".") if _facts else "" facts_str = ("\nKnown facts: " + "; ".join(_facts) + ".") if _facts else ""
raw = call_llava(MAIN_PROMPT.format(command=command, facts=facts_str), img_b64, raw = call_llava(MAIN_PROMPT.format(command=command, facts=facts_str), img_b64,
@ -92,16 +124,19 @@ def ask(command: str, img_b64) -> dict:
return {"actions": [], "arm": None, "speak": raw, "abort": None} return {"actions": [], "arm": None, "speak": raw, "abort": None}
return d return d
except Exception as ex: except Exception as ex:
print(f" LLaVA error: {ex}") print(f" VLM error: {ex}")
return {"actions": [], "arm": None, "speak": "Error.", "abort": None} return {"actions": [], "arm": None, "speak": "VLM error.", "abort": None}
def ask_goal(goal: str, img_b64) -> dict: def ask_goal(goal: str, img_b64) -> dict:
"""Ask LLaVA if goal is reached.""" """Ask the VLM if the goal is reached."""
if not VLM_ENABLED:
return {"reached": False, "next_move": "left", "duration": 0.5,
"speak": "VLM disabled — relying on YOLO fast-match only."}
try: try:
raw = call_llava(GOAL_PROMPT.format(goal=goal), img_b64, raw = call_llava(GOAL_PROMPT.format(goal=goal), img_b64,
num_predict=_cfg["num_predict_goal"]) num_predict=_cfg["num_predict_goal"])
print(f" LLaVA: {raw}") print(f" VLM: {raw}")
d = parse_json(raw) d = parse_json(raw)
if d is None: if d is None:
text = raw.lower() text = raw.lower()
@ -119,6 +154,8 @@ def ask_goal(goal: str, img_b64) -> dict:
def ask_talk(command: str, img_b64, facts: str = "") -> dict: def ask_talk(command: str, img_b64, facts: str = "") -> dict:
"""Handle talk-only commands using the YAML talk_prompt.""" """Handle talk-only commands using the YAML talk_prompt."""
if not VLM_ENABLED:
return dict(_VLM_OFF_EMPTY)
try: try:
prompt = TALK_PROMPT.format(command=command, facts=facts) prompt = TALK_PROMPT.format(command=command, facts=facts)
raw = call_llava(prompt, img_b64, num_predict=_cfg["num_predict_talk"], raw = call_llava(prompt, img_b64, num_predict=_cfg["num_predict_talk"],
@ -137,6 +174,9 @@ def ask_talk(command: str, img_b64, facts: str = "") -> dict:
def ask_verify(target: str, condition: str, img_b64) -> str: def ask_verify(target: str, condition: str, img_b64) -> str:
"""Verify a condition on a detected target. Returns 'yes' or 'no'.""" """Verify a condition on a detected target. Returns 'yes' or 'no'."""
if not VLM_ENABLED:
# Without VLM we can't verify compound conditions; trust the YOLO match.
return "yes"
try: try:
prompt = VERIFY_PROMPT.format(target=target, condition=condition) prompt = VERIFY_PROMPT.format(target=target, condition=condition)
raw = call_llava(prompt, img_b64, num_predict=_cfg["num_predict_verify"]) raw = call_llava(prompt, img_b64, num_predict=_cfg["num_predict_verify"])
@ -148,7 +188,10 @@ def ask_verify(target: str, condition: str, img_b64) -> str:
def ask_patrol(img_b64) -> dict: def ask_patrol(img_b64) -> dict:
"""Ask LLaVA to assess scene during patrol.""" """Ask the VLM to assess the scene during patrol."""
if not VLM_ENABLED:
return {"observation": "VLM off — patrolling without scene analysis.",
"alert": None, "next_move": "forward", "duration": 1.0}
try: try:
raw = call_llava(PATROL_PROMPT, img_b64, num_predict=_cfg["num_predict_patrol"]) raw = call_llava(PATROL_PROMPT, img_b64, num_predict=_cfg["num_predict_patrol"])
d = parse_json(raw) d = parse_json(raw)

View File

@ -147,6 +147,14 @@ def init_brain():
_log("Brain initialized", "info", "brain") _log("Brain initialized", "info", "brain")
# Skip warmup when VLM is off — there's no model to warm, and the
# dashboard should mention that Marcus is in safe mode.
from API.llava_api import VLM_ENABLED, OLLAMA_HOST
if not VLM_ENABLED:
print(" [VLM] disabled by config — safe mode (no Ollama load)")
else:
host_short = OLLAMA_HOST.replace("http://", "")
print(f" [VLM] target: {host_short} ({OLLAMA_MODEL})")
# Warmup runs in a daemon thread so the dashboard + Command: prompt # Warmup runs in a daemon thread so the dashboard + Command: prompt
# appear immediately. The first real user command will either hit a # appear immediately. The first real user command will either hit a
# warm model (fast) or pay the cold-load itself (same as before). # warm model (fast) or pay the cold-load itself (same as before).
@ -540,7 +548,9 @@ def run_terminal():
print("\n\n" + "" + "" * (W-2) + "") print("\n\n" + "" + "" * (W-2) + "")
print("" + _pad(" SANAD — AI BRAIN READY", W-2) + "") print("" + _pad(" SANAD — AI BRAIN READY", W-2) + "")
print("" + "" * (W-2) + "") print("" + "" * (W-2) + "")
from API.llava_api import VLM_ENABLED
left = [("model", status["model"]), left = [("model", status["model"]),
("vlm", _fmt(VLM_ENABLED)),
("voice", _fmt(status["voice"])), ("voice", _fmt(status["voice"])),
("camera", status["camera"])] ("camera", status["camera"])]
right = [("yolo", _fmt(status["yolo"])), right = [("yolo", _fmt(status["yolo"])),

View File

@ -1,9 +1,11 @@
{ {
"ollama_model": "qwen2.5vl:3b", "ollama_model": "qwen2.5vl:3b",
"ollama_host": "http://127.0.0.1:11434",
"max_history": 6, "max_history": 6,
"num_batch": 128, "num_batch": 128,
"num_ctx": 2048, "num_ctx": 2048,
"subsystems": { "subsystems": {
"vlm": true,
"lidar": true, "lidar": true,
"voice": true, "voice": true,
"imgsearch": false, "imgsearch": false,

View File

@ -1,7 +1,7 @@
{ {
"tts": { "tts": {
"backend": "builtin_ttsmaker", "backend": "builtin_ttsmaker",
"builtin_speaker_id": 0, "builtin_speaker_id": 2,
"target_sample_rate": 16000 "target_sample_rate": 16000
}, },
"stt": { "stt": {

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1,8 @@
[
{
"time": "10:54:15",
"cmd": "hi",
"response": "Hello! I am Sanad. How can I help you?",
"duration_s": 0.0
}
]

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1 @@
{}

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1 @@
{}

View File

@ -0,0 +1,9 @@
Session: session_030_2026-04-22
Date: 2026-04-22 10:56
Duration: 0m 37s
Commands: 0
YOLO detections: 0
Alerts: 0
Known places: none
First commands:

View File

@ -349,12 +349,14 @@ class ImageSearch:
try: try:
if has_ref: if has_ref:
# Pass BOTH images: [reference, current_frame] # Pass BOTH images: [reference, current_frame]. Route through
# num_batch/num_ctx mirror llava_api.py — without these # the shared Ollama client (so VLM-off and remote-host config
# caps the compute graph OOMs the runner on Jetson. # are honored) and mirror the compute-graph caps.
import ollama as _ollama from API.llava_api import NUM_BATCH, NUM_CTX, VLM_ENABLED, _client as _llava_client
from API.llava_api import NUM_BATCH, NUM_CTX if not VLM_ENABLED:
r = _ollama.chat( print(f" [{step}/{max_steps}] VLM disabled — skipping image-match")
continue
r = _llava_client.chat(
model=self._model, model=self._model,
messages=[{ messages=[{
"role": "user", "role": "user",