Update 2026-04-22 12:17:30
This commit is contained in:
parent
dcf5f9f39b
commit
d257808e48
@ -1,5 +1,24 @@
|
||||
"""
|
||||
llava_api.py — LLaVA / Qwen VL query interface
|
||||
llava_api.py — Qwen-VL query interface (via Ollama)
|
||||
|
||||
Three deployment modes, chosen via config_Brain.json:
|
||||
|
||||
1. subsystems.vlm = false
|
||||
→ every ask*() returns a safe fallback dict. Marcus runs in
|
||||
regex-only "safe mode": no LLM load on the Jetson, no GPU/CPU
|
||||
contention with Holosoma, robot won't fall from thrashing.
|
||||
Vision questions just answer "Scene understanding is disabled
|
||||
— running in safe mode." Everything else (movement, places,
|
||||
patrol, autonomous) still works.
|
||||
|
||||
2. ollama_host = "http://127.0.0.1:11434" + subsystems.vlm = true
|
||||
→ Ollama runs on the Jetson. Old behavior — competes with
|
||||
Holosoma for memory. Unsafe during walking with a 3B VL model.
|
||||
|
||||
3. ollama_host = "http://192.168.123.222:11434" + subsystems.vlm = true
|
||||
→ Ollama runs on the workstation. Jetson stays light, Holosoma
|
||||
keeps its 50 Hz real-time deadline, and the brain still gets
|
||||
full Qwen-VL. Best mode for demos / walking with conversation.
|
||||
"""
|
||||
import json
|
||||
import ollama
|
||||
@ -9,12 +28,13 @@ from Core.config_loader import load_config
|
||||
|
||||
_cfg = load_config("Brain")
|
||||
|
||||
# Load prompts from YAML (the authoritative source — bilingual, complete)
|
||||
_yaml_path = Path(__file__).resolve().parent.parent / "Config" / "marcus_prompts.yaml"
|
||||
with open(_yaml_path, encoding="utf-8") as _f:
|
||||
_prompts = yaml.safe_load(_f)
|
||||
|
||||
OLLAMA_MODEL = _cfg["ollama_model"]
|
||||
OLLAMA_HOST = _cfg.get("ollama_host", "http://127.0.0.1:11434")
|
||||
VLM_ENABLED = bool(_cfg.get("subsystems", {}).get("vlm", True))
|
||||
MAX_HISTORY = _cfg["max_history"]
|
||||
# Cap batch and context on every request. Without this, llama.cpp on Jetson
|
||||
# Orin NX allocates a ~7.5 GiB compute graph (defaults: batch 512, ctx 4096)
|
||||
@ -28,6 +48,14 @@ PATROL_PROMPT = _prompts["patrol_prompt"]
|
||||
TALK_PROMPT = _prompts["talk_prompt"]
|
||||
VERIFY_PROMPT = _prompts["verify_prompt"]
|
||||
|
||||
# Explicit Ollama client — lets us route to a remote host (e.g., workstation)
|
||||
# without relying on the OLLAMA_HOST env var being set in the launch shell.
|
||||
_client = ollama.Client(host=OLLAMA_HOST)
|
||||
|
||||
# Safe-mode replies used when subsystems.vlm == false
|
||||
_VLM_OFF_TALK = "Scene understanding is disabled — Sanad is in safe mode."
|
||||
_VLM_OFF_EMPTY = {"actions": [], "arm": None, "speak": _VLM_OFF_TALK, "abort": None}
|
||||
|
||||
# Conversation state
|
||||
_conversation_history = []
|
||||
_facts = []
|
||||
@ -48,6 +76,8 @@ def add_to_history(user_msg: str, assistant_msg: str):
|
||||
|
||||
|
||||
def call_llava(prompt: str, img_b64, num_predict: int = 200, use_history: bool = False) -> str:
|
||||
if not VLM_ENABLED:
|
||||
return "" # safe-mode — caller must handle empty string
|
||||
messages = []
|
||||
if use_history and _conversation_history:
|
||||
messages.extend(_conversation_history)
|
||||
@ -55,13 +85,13 @@ def call_llava(prompt: str, img_b64, num_predict: int = 200, use_history: bool =
|
||||
if img_b64:
|
||||
msg["images"] = [img_b64]
|
||||
messages.append(msg)
|
||||
r = ollama.chat(model=OLLAMA_MODEL, messages=messages,
|
||||
options={
|
||||
"temperature": 0.0,
|
||||
"num_predict": num_predict,
|
||||
"num_batch": NUM_BATCH,
|
||||
"num_ctx": NUM_CTX,
|
||||
})
|
||||
r = _client.chat(model=OLLAMA_MODEL, messages=messages,
|
||||
options={
|
||||
"temperature": 0.0,
|
||||
"num_predict": num_predict,
|
||||
"num_batch": NUM_BATCH,
|
||||
"num_ctx": NUM_CTX,
|
||||
})
|
||||
return r["message"]["content"].strip()
|
||||
|
||||
|
||||
@ -79,7 +109,9 @@ def parse_json(raw: str):
|
||||
|
||||
|
||||
def ask(command: str, img_b64) -> dict:
|
||||
"""Send command + camera frame to LLaVA with conversation history."""
|
||||
"""Send command + camera frame to the VLM with conversation history."""
|
||||
if not VLM_ENABLED:
|
||||
return dict(_VLM_OFF_EMPTY)
|
||||
try:
|
||||
facts_str = ("\nKnown facts: " + "; ".join(_facts) + ".") if _facts else ""
|
||||
raw = call_llava(MAIN_PROMPT.format(command=command, facts=facts_str), img_b64,
|
||||
@ -92,16 +124,19 @@ def ask(command: str, img_b64) -> dict:
|
||||
return {"actions": [], "arm": None, "speak": raw, "abort": None}
|
||||
return d
|
||||
except Exception as ex:
|
||||
print(f" LLaVA error: {ex}")
|
||||
return {"actions": [], "arm": None, "speak": "Error.", "abort": None}
|
||||
print(f" VLM error: {ex}")
|
||||
return {"actions": [], "arm": None, "speak": "VLM error.", "abort": None}
|
||||
|
||||
|
||||
def ask_goal(goal: str, img_b64) -> dict:
|
||||
"""Ask LLaVA if goal is reached."""
|
||||
"""Ask the VLM if the goal is reached."""
|
||||
if not VLM_ENABLED:
|
||||
return {"reached": False, "next_move": "left", "duration": 0.5,
|
||||
"speak": "VLM disabled — relying on YOLO fast-match only."}
|
||||
try:
|
||||
raw = call_llava(GOAL_PROMPT.format(goal=goal), img_b64,
|
||||
num_predict=_cfg["num_predict_goal"])
|
||||
print(f" LLaVA: {raw}")
|
||||
print(f" VLM: {raw}")
|
||||
d = parse_json(raw)
|
||||
if d is None:
|
||||
text = raw.lower()
|
||||
@ -119,6 +154,8 @@ def ask_goal(goal: str, img_b64) -> dict:
|
||||
|
||||
def ask_talk(command: str, img_b64, facts: str = "") -> dict:
|
||||
"""Handle talk-only commands using the YAML talk_prompt."""
|
||||
if not VLM_ENABLED:
|
||||
return dict(_VLM_OFF_EMPTY)
|
||||
try:
|
||||
prompt = TALK_PROMPT.format(command=command, facts=facts)
|
||||
raw = call_llava(prompt, img_b64, num_predict=_cfg["num_predict_talk"],
|
||||
@ -137,6 +174,9 @@ def ask_talk(command: str, img_b64, facts: str = "") -> dict:
|
||||
|
||||
def ask_verify(target: str, condition: str, img_b64) -> str:
|
||||
"""Verify a condition on a detected target. Returns 'yes' or 'no'."""
|
||||
if not VLM_ENABLED:
|
||||
# Without VLM we can't verify compound conditions; trust the YOLO match.
|
||||
return "yes"
|
||||
try:
|
||||
prompt = VERIFY_PROMPT.format(target=target, condition=condition)
|
||||
raw = call_llava(prompt, img_b64, num_predict=_cfg["num_predict_verify"])
|
||||
@ -148,7 +188,10 @@ def ask_verify(target: str, condition: str, img_b64) -> str:
|
||||
|
||||
|
||||
def ask_patrol(img_b64) -> dict:
|
||||
"""Ask LLaVA to assess scene during patrol."""
|
||||
"""Ask the VLM to assess the scene during patrol."""
|
||||
if not VLM_ENABLED:
|
||||
return {"observation": "VLM off — patrolling without scene analysis.",
|
||||
"alert": None, "next_move": "forward", "duration": 1.0}
|
||||
try:
|
||||
raw = call_llava(PATROL_PROMPT, img_b64, num_predict=_cfg["num_predict_patrol"])
|
||||
d = parse_json(raw)
|
||||
|
||||
@ -147,11 +147,19 @@ def init_brain():
|
||||
|
||||
_log("Brain initialized", "info", "brain")
|
||||
|
||||
# Warmup runs in a daemon thread so the dashboard + Command: prompt
|
||||
# appear immediately. The first real user command will either hit a
|
||||
# warm model (fast) or pay the cold-load itself (same as before).
|
||||
import threading as _t
|
||||
_t.Thread(target=_warmup_llava, daemon=True, name="llava-warmup").start()
|
||||
# Skip warmup when VLM is off — there's no model to warm, and the
|
||||
# dashboard should mention that Marcus is in safe mode.
|
||||
from API.llava_api import VLM_ENABLED, OLLAMA_HOST
|
||||
if not VLM_ENABLED:
|
||||
print(" [VLM] disabled by config — safe mode (no Ollama load)")
|
||||
else:
|
||||
host_short = OLLAMA_HOST.replace("http://", "")
|
||||
print(f" [VLM] target: {host_short} ({OLLAMA_MODEL})")
|
||||
# Warmup runs in a daemon thread so the dashboard + Command: prompt
|
||||
# appear immediately. The first real user command will either hit a
|
||||
# warm model (fast) or pay the cold-load itself (same as before).
|
||||
import threading as _t
|
||||
_t.Thread(target=_warmup_llava, daemon=True, name="llava-warmup").start()
|
||||
|
||||
|
||||
# Global voice references
|
||||
@ -540,7 +548,9 @@ def run_terminal():
|
||||
print("\n\n" + "╔" + "═" * (W-2) + "╗")
|
||||
print("║" + _pad(" SANAD — AI BRAIN READY", W-2) + "║")
|
||||
print("╠" + "═" * (W-2) + "╣")
|
||||
from API.llava_api import VLM_ENABLED
|
||||
left = [("model", status["model"]),
|
||||
("vlm", _fmt(VLM_ENABLED)),
|
||||
("voice", _fmt(status["voice"])),
|
||||
("camera", status["camera"])]
|
||||
right = [("yolo", _fmt(status["yolo"])),
|
||||
|
||||
@ -1,12 +1,14 @@
|
||||
{
|
||||
"ollama_model": "qwen2.5vl:3b",
|
||||
"ollama_host": "http://127.0.0.1:11434",
|
||||
"max_history": 6,
|
||||
"num_batch": 128,
|
||||
"num_ctx": 2048,
|
||||
"subsystems": {
|
||||
"lidar": true,
|
||||
"voice": true,
|
||||
"imgsearch": false,
|
||||
"vlm": true,
|
||||
"lidar": true,
|
||||
"voice": true,
|
||||
"imgsearch": false,
|
||||
"autonomous": true
|
||||
},
|
||||
"num_predict_main": 120,
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
{
|
||||
"tts": {
|
||||
"backend": "builtin_ttsmaker",
|
||||
"builtin_speaker_id": 0,
|
||||
"builtin_speaker_id": 2,
|
||||
"target_sample_rate": 16000
|
||||
},
|
||||
"stt": {
|
||||
|
||||
1
Data/Brain/Sessions/session_029_2026-04-22/alerts.json
Normal file
1
Data/Brain/Sessions/session_029_2026-04-22/alerts.json
Normal file
@ -0,0 +1 @@
|
||||
[]
|
||||
8
Data/Brain/Sessions/session_029_2026-04-22/commands.json
Normal file
8
Data/Brain/Sessions/session_029_2026-04-22/commands.json
Normal file
@ -0,0 +1,8 @@
|
||||
[
|
||||
{
|
||||
"time": "10:54:15",
|
||||
"cmd": "hi",
|
||||
"response": "Hello! I am Sanad. How can I help you?",
|
||||
"duration_s": 0.0
|
||||
}
|
||||
]
|
||||
@ -0,0 +1 @@
|
||||
[]
|
||||
1
Data/Brain/Sessions/session_029_2026-04-22/places.json
Normal file
1
Data/Brain/Sessions/session_029_2026-04-22/places.json
Normal file
@ -0,0 +1 @@
|
||||
{}
|
||||
1
Data/Brain/Sessions/session_030_2026-04-22/alerts.json
Normal file
1
Data/Brain/Sessions/session_030_2026-04-22/alerts.json
Normal file
@ -0,0 +1 @@
|
||||
[]
|
||||
1
Data/Brain/Sessions/session_030_2026-04-22/commands.json
Normal file
1
Data/Brain/Sessions/session_030_2026-04-22/commands.json
Normal file
@ -0,0 +1 @@
|
||||
[]
|
||||
@ -0,0 +1 @@
|
||||
[]
|
||||
1
Data/Brain/Sessions/session_030_2026-04-22/places.json
Normal file
1
Data/Brain/Sessions/session_030_2026-04-22/places.json
Normal file
@ -0,0 +1 @@
|
||||
{}
|
||||
9
Data/Brain/Sessions/session_030_2026-04-22/summary.txt
Normal file
9
Data/Brain/Sessions/session_030_2026-04-22/summary.txt
Normal file
@ -0,0 +1,9 @@
|
||||
Session: session_030_2026-04-22
|
||||
Date: 2026-04-22 10:56
|
||||
Duration: 0m 37s
|
||||
Commands: 0
|
||||
YOLO detections: 0
|
||||
Alerts: 0
|
||||
Known places: none
|
||||
|
||||
First commands:
|
||||
@ -349,12 +349,14 @@ class ImageSearch:
|
||||
|
||||
try:
|
||||
if has_ref:
|
||||
# Pass BOTH images: [reference, current_frame]
|
||||
# num_batch/num_ctx mirror llava_api.py — without these
|
||||
# caps the compute graph OOMs the runner on Jetson.
|
||||
import ollama as _ollama
|
||||
from API.llava_api import NUM_BATCH, NUM_CTX
|
||||
r = _ollama.chat(
|
||||
# Pass BOTH images: [reference, current_frame]. Route through
|
||||
# the shared Ollama client (so VLM-off and remote-host config
|
||||
# are honored) and mirror the compute-graph caps.
|
||||
from API.llava_api import NUM_BATCH, NUM_CTX, VLM_ENABLED, _client as _llava_client
|
||||
if not VLM_ENABLED:
|
||||
print(f" [{step}/{max_steps}] VLM disabled — skipping image-match")
|
||||
continue
|
||||
r = _llava_client.chat(
|
||||
model=self._model,
|
||||
messages=[{
|
||||
"role": "user",
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user