Update 2026-04-22 11:47:53

This commit is contained in:
kassam 2026-04-22 11:47:54 +04:00
parent 3122a52966
commit 5b8e94f42e
2 changed files with 91 additions and 25 deletions

View File

@ -146,7 +146,12 @@ def init_brain():
print(" [Voice] disabled by config")
_log("Brain initialized", "info", "brain")
_warmup_llava()
# Warmup runs in a daemon thread so the dashboard + Command: prompt
# appear immediately. The first real user command will either hit a
# warm model (fast) or pay the cold-load itself (same as before).
import threading as _t
_t.Thread(target=_warmup_llava, daemon=True, name="llava-warmup").start()
# Global voice references
@ -400,20 +405,51 @@ def _handle_llava(cmd):
# HELPERS
# ══════════════════════════════════════════════════════════════════════════════
def _strip_ansi(s: str) -> str:
"""Strip ANSI colour escapes for correct visual width calculations."""
import re as _re
return _re.sub(r"\x1b\[[0-9;]*m", "", s)
def _warmup_llava():
"""
Runs in a daemon thread primes the Ollama model into VRAM so the first
user command doesn't pay the ~15-20 s cold-load. Tolerates client-timeout
on the first attempt (common on the 16 GB Jetson when the compute graph
needs a second pass to fit) by doing one silent retry.
"""
import ollama
print(" Warming up LLaVA... (loading into GPU)")
try:
img = get_frame()
ollama.chat(
model=OLLAMA_MODEL,
messages=[{"role": "user", "content": "hi",
"images": [img] if img else []}],
options={"temperature": 0.0, "num_predict": _cfg["warmup_num_predict"]}
)
print(" LLaVA warm - first command will be fast\n")
except Exception as e:
print(f" Warmup failed ({e}) - first command may be slow\n")
# Quiet heartbeat in the terminal so the operator knows something is happening
# without flooding stdout once the banner prints.
print(" [Warmup] Loading Qwen2.5-VL into GPU...")
base_options = {
"temperature": 0.0,
"num_predict": _cfg["warmup_num_predict"],
# Honor the same compute-graph caps everything else uses, otherwise
# Ollama reverts to batch=512/ctx=4096 for this call and OOMs.
"num_batch": _cfg.get("num_batch", 128),
"num_ctx": _cfg.get("num_ctx", 2048),
}
for attempt in (1, 2):
try:
img = get_frame()
ollama.chat(
model=OLLAMA_MODEL,
messages=[{"role": "user", "content": "hi",
"images": [img] if img else []}],
options=base_options,
)
print(" [Warmup] Ready — first command will be fast")
return
except Exception as e:
if attempt == 1:
# Cold-load frequently times out on attempt #1 while Ollama
# is still allocating the compute graph. The model stays
# loaded though, so attempt #2 almost always succeeds.
print(f" [Warmup] first attempt timed out, retrying...")
continue
print(f" [Warmup] failed after retry ({e}) — first real command "
f"will pay the cold-load (~15-20 s)")
def get_brain_status() -> dict:
@ -480,18 +516,39 @@ def run_terminal():
"""Run brain with terminal input loop."""
init_brain()
# ─── DASHBOARD ───────────────────────────────────────────────────────
# Separate the boot log from the interactive dashboard with a clear
# visual break. Print at the end of init so it's always the last thing
# on screen before the first `Command:` prompt — even if the operator
# scrolled through a wall of subsystem init messages.
status = get_brain_status()
print()
print("=" * 54)
print(" SANAD AI BRAIN — READY")
print("=" * 54)
for k, v in status.items():
print(f" {k:<12}: {v}")
print("-" * 54)
print(" Type a command, or say \"Sanad, <command>\".")
print(" Shortcuts: help | example | yolo | patrol | auto on/off | q")
print("=" * 54)
print()
def _fmt(v):
if v is True: return "\033[92mON \033[0m" # green
if v is False: return "\033[91mOFF\033[0m" # red
return str(v)
W = 58
print("\n\n" + "" + "" * (W-2) + "")
print("" + " SANAD — AI BRAIN READY".ljust(W-2) + "")
print("" + "" * (W-2) + "")
left = [("model", status["model"]),
("voice", _fmt(status["voice"])),
("camera", status["camera"])]
right = [("yolo", _fmt(status["yolo"])),
("lidar", _fmt(status["lidar"])),
("memory", _fmt(status["memory"])),
("odometry", _fmt(status["odometry"]))]
for i in range(max(len(left), len(right))):
l = f" {left[i][0]:<8}: {left[i][1]}" if i < len(left) else ""
r = f" {right[i][0]:<8}: {right[i][1]}" if i < len(right) else ""
# account for ANSI escapes when padding the left column
pad = 28 + (len(l) - len(_strip_ansi(l)))
print("" + l.ljust(pad) + r.ljust(W - 2 - pad) + "")
print("" + "" * (W-2) + "")
print("" + " Type a command, or say \"Sanad, <command>\".".ljust(W-2) + "")
print("" + " help · example · yolo · patrol · auto on/off · q".ljust(W-2) + "")
print("" + "" * (W-2) + "\n")
try:
while True:

View File

@ -595,7 +595,16 @@ def slam_worker(
def init_slam_stack():
nonlocal slam, filt, stable
cfg = load_config(config_file=None, max_range=float(eng_cfg.max_range))
# kiss_icp >= 1.2.x requires a `deskew` positional arg; pre-1.2 doesn't.
# Try the new signature first, fall back to the old one.
try:
cfg = load_config(
config_file=None,
deskew=bool(getattr(eng_cfg, "deskew", True)),
max_range=float(eng_cfg.max_range),
)
except TypeError:
cfg = load_config(config_file=None, max_range=float(eng_cfg.max_range))
cfg.mapping.voxel_size = float(eng_cfg.slam_voxel_size)
slam = KissICP(cfg)