Update 2026-04-22 12:32:54
This commit is contained in:
parent
d257808e48
commit
9991e742da
@ -50,7 +50,15 @@ VERIFY_PROMPT = _prompts["verify_prompt"]
|
|||||||
|
|
||||||
# Explicit Ollama client — lets us route to a remote host (e.g., workstation)
|
# Explicit Ollama client — lets us route to a remote host (e.g., workstation)
|
||||||
# without relying on the OLLAMA_HOST env var being set in the launch shell.
|
# without relying on the OLLAMA_HOST env var being set in the launch shell.
|
||||||
_client = ollama.Client(host=OLLAMA_HOST)
|
#
|
||||||
|
# CRITICAL: timeout=300 (5 min). The Python `ollama` library defaults to
|
||||||
|
# httpx's short timeout. On the Jetson a cold-load of qwen2.5vl:3b takes
|
||||||
|
# 60-90 s; with the default timeout the client disconnects mid-load,
|
||||||
|
# Ollama interprets that as "client cancelled", aborts the in-progress
|
||||||
|
# load, and starts over on the next request. This caused the repeated
|
||||||
|
# OOM crashes — the model was never finishing a single load before being
|
||||||
|
# thrown away and re-started.
|
||||||
|
_client = ollama.Client(host=OLLAMA_HOST, timeout=300)
|
||||||
|
|
||||||
# Safe-mode replies used when subsystems.vlm == false
|
# Safe-mode replies used when subsystems.vlm == false
|
||||||
_VLM_OFF_TALK = "Scene understanding is disabled — Sanad is in safe mode."
|
_VLM_OFF_TALK = "Scene understanding is disabled — Sanad is in safe mode."
|
||||||
|
|||||||
@ -27,6 +27,15 @@ _RE_WALK_BACK = re.compile(
|
|||||||
r"^(?:walk|go|move)\s+backward?\s+(\d+(?:\.\d+)?)\s*m(?:eter(?:s)?)?$", re.I)
|
r"^(?:walk|go|move)\s+backward?\s+(\d+(?:\.\d+)?)\s*m(?:eter(?:s)?)?$", re.I)
|
||||||
_RE_TURN_DEG = re.compile(
|
_RE_TURN_DEG = re.compile(
|
||||||
r"^turn\s+(?:(left|right)\s+)?(\d+(?:\.\d+)?)\s*deg(?:ree(?:s)?)?$", re.I)
|
r"^turn\s+(?:(left|right)\s+)?(\d+(?:\.\d+)?)\s*deg(?:ree(?:s)?)?$", re.I)
|
||||||
|
# Step-based motion: "walk 1 step", "walk forward 2 steps", "move back 1 step",
|
||||||
|
# "turn left", "turn right 2 steps". Kept local so these never fall through to
|
||||||
|
# the VLM — on the Jetson the cold-load is 60-90 s and we don't want to pay
|
||||||
|
# that for a trivial two-second motion. One step = 2 s of motion at the default
|
||||||
|
# velocity, matching the undo-loop duration already used below.
|
||||||
|
_RE_WALK_STEP = re.compile(
|
||||||
|
r"^(?:walk|go|move|step)(?:\s+(forward|back(?:ward)?))?\s+(\d+)\s*steps?$", re.I)
|
||||||
|
_RE_TURN_STEP = re.compile(
|
||||||
|
r"^turn\s+(left|right)(?:\s+(\d+)\s*steps?)?$", re.I)
|
||||||
_RE_PATROL_RT = re.compile(
|
_RE_PATROL_RT = re.compile(
|
||||||
r"^patrol[/:]\s*(.+)$", re.I)
|
r"^patrol[/:]\s*(.+)$", re.I)
|
||||||
_RE_LAST_CMD = re.compile(
|
_RE_LAST_CMD = re.compile(
|
||||||
@ -144,6 +153,32 @@ def try_local_command(cmd: str) -> bool:
|
|||||||
gradual_stop()
|
gradual_stop()
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
m = _RE_WALK_STEP.match(cmd)
|
||||||
|
if m:
|
||||||
|
direction = (m.group(1) or "forward").lower()
|
||||||
|
steps = int(m.group(2))
|
||||||
|
vx = -0.2 if direction.startswith("back") else 0.3
|
||||||
|
duration = 2.0 * steps
|
||||||
|
t0 = time.time()
|
||||||
|
while time.time() - t0 < duration:
|
||||||
|
send_vel(vx=vx)
|
||||||
|
time.sleep(0.05)
|
||||||
|
gradual_stop()
|
||||||
|
return True
|
||||||
|
|
||||||
|
m = _RE_TURN_STEP.match(cmd)
|
||||||
|
if m:
|
||||||
|
direction = m.group(1).lower()
|
||||||
|
steps = int(m.group(2)) if m.group(2) else 1
|
||||||
|
vyaw = 0.3 if direction == "left" else -0.3
|
||||||
|
duration = 2.0 * steps
|
||||||
|
t0 = time.time()
|
||||||
|
while time.time() - t0 < duration:
|
||||||
|
send_vel(vyaw=vyaw)
|
||||||
|
time.sleep(0.05)
|
||||||
|
gradual_stop()
|
||||||
|
return True
|
||||||
|
|
||||||
# ── NAMED PATROL ROUTE ───────────────────────────────────────────────
|
# ── NAMED PATROL ROUTE ───────────────────────────────────────────────
|
||||||
m = _RE_PATROL_RT.match(cmd)
|
m = _RE_PATROL_RT.match(cmd)
|
||||||
if m:
|
if m:
|
||||||
|
|||||||
@ -147,19 +147,19 @@ def init_brain():
|
|||||||
|
|
||||||
_log("Brain initialized", "info", "brain")
|
_log("Brain initialized", "info", "brain")
|
||||||
|
|
||||||
# Skip warmup when VLM is off — there's no model to warm, and the
|
# Report VLM config only — no warmup thread. This matches Marcus_v1's
|
||||||
# dashboard should mention that Marcus is in safe mode.
|
# concept: the first real VLM command performs the cold-load synchronously
|
||||||
|
# inside ollama.chat(), which takes ~60-90 s once on the Jetson and is
|
||||||
|
# fast for every subsequent call. A background warmup thread races with
|
||||||
|
# YOLO/camera/audio/Holosoma startup and with user input, and on a
|
||||||
|
# 16 GB unified-memory board that race is what triggers the OOM killer.
|
||||||
from API.llava_api import VLM_ENABLED, OLLAMA_HOST
|
from API.llava_api import VLM_ENABLED, OLLAMA_HOST
|
||||||
if not VLM_ENABLED:
|
if not VLM_ENABLED:
|
||||||
print(" [VLM] disabled by config — safe mode (no Ollama load)")
|
print(" [VLM] disabled by config — safe mode (no Ollama load)")
|
||||||
else:
|
else:
|
||||||
host_short = OLLAMA_HOST.replace("http://", "")
|
host_short = OLLAMA_HOST.replace("http://", "")
|
||||||
print(f" [VLM] target: {host_short} ({OLLAMA_MODEL})")
|
print(f" [VLM] target: {host_short} ({OLLAMA_MODEL}) "
|
||||||
# Warmup runs in a daemon thread so the dashboard + Command: prompt
|
f"— first vision command will cold-load (~60-90 s)")
|
||||||
# appear immediately. The first real user command will either hit a
|
|
||||||
# warm model (fast) or pay the cold-load itself (same as before).
|
|
||||||
import threading as _t
|
|
||||||
_t.Thread(target=_warmup_llava, daemon=True, name="llava-warmup").start()
|
|
||||||
|
|
||||||
|
|
||||||
# Global voice references
|
# Global voice references
|
||||||
@ -419,47 +419,6 @@ def _strip_ansi(s: str) -> str:
|
|||||||
return _re.sub(r"\x1b\[[0-9;]*m", "", s)
|
return _re.sub(r"\x1b\[[0-9;]*m", "", s)
|
||||||
|
|
||||||
|
|
||||||
def _warmup_llava():
|
|
||||||
"""
|
|
||||||
Runs in a daemon thread — primes the Ollama model into VRAM so the first
|
|
||||||
user command doesn't pay the ~15-20 s cold-load. Tolerates client-timeout
|
|
||||||
on the first attempt (common on the 16 GB Jetson when the compute graph
|
|
||||||
needs a second pass to fit) by doing one silent retry.
|
|
||||||
"""
|
|
||||||
import ollama
|
|
||||||
# Quiet heartbeat in the terminal so the operator knows something is happening
|
|
||||||
# without flooding stdout once the banner prints.
|
|
||||||
print(" [Warmup] Loading Qwen2.5-VL into GPU...")
|
|
||||||
base_options = {
|
|
||||||
"temperature": 0.0,
|
|
||||||
"num_predict": _cfg["warmup_num_predict"],
|
|
||||||
# Honor the same compute-graph caps everything else uses, otherwise
|
|
||||||
# Ollama reverts to batch=512/ctx=4096 for this call and OOMs.
|
|
||||||
"num_batch": _cfg.get("num_batch", 128),
|
|
||||||
"num_ctx": _cfg.get("num_ctx", 2048),
|
|
||||||
}
|
|
||||||
for attempt in (1, 2):
|
|
||||||
try:
|
|
||||||
img = get_frame()
|
|
||||||
ollama.chat(
|
|
||||||
model=OLLAMA_MODEL,
|
|
||||||
messages=[{"role": "user", "content": "hi",
|
|
||||||
"images": [img] if img else []}],
|
|
||||||
options=base_options,
|
|
||||||
)
|
|
||||||
print(" [Warmup] Ready — first command will be fast")
|
|
||||||
return
|
|
||||||
except Exception as e:
|
|
||||||
if attempt == 1:
|
|
||||||
# Cold-load frequently times out on attempt #1 while Ollama
|
|
||||||
# is still allocating the compute graph. The model stays
|
|
||||||
# loaded though, so attempt #2 almost always succeeds.
|
|
||||||
print(f" [Warmup] first attempt timed out, retrying...")
|
|
||||||
continue
|
|
||||||
print(f" [Warmup] failed after retry ({e}) — first real command "
|
|
||||||
f"will pay the cold-load (~15-20 s)")
|
|
||||||
|
|
||||||
|
|
||||||
def get_brain_status() -> dict:
|
def get_brain_status() -> dict:
|
||||||
"""Return current brain status for server status message."""
|
"""Return current brain status for server status message."""
|
||||||
from API.yolo_api import YOLO_AVAILABLE as _ya
|
from API.yolo_api import YOLO_AVAILABLE as _ya
|
||||||
|
|||||||
@ -15,6 +15,5 @@
|
|||||||
"num_predict_goal": 80,
|
"num_predict_goal": 80,
|
||||||
"num_predict_patrol": 100,
|
"num_predict_patrol": 100,
|
||||||
"num_predict_talk": 80,
|
"num_predict_talk": 80,
|
||||||
"num_predict_verify": 10,
|
"num_predict_verify": 10
|
||||||
"warmup_num_predict": 5
|
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user