diff --git a/Brain/marcus_brain.py b/Brain/marcus_brain.py index edb966f..9b351b3 100644 --- a/Brain/marcus_brain.py +++ b/Brain/marcus_brain.py @@ -146,7 +146,12 @@ def init_brain(): print(" [Voice] disabled by config") _log("Brain initialized", "info", "brain") - _warmup_llava() + + # Warmup runs in a daemon thread so the dashboard + Command: prompt + # appear immediately. The first real user command will either hit a + # warm model (fast) or pay the cold-load itself (same as before). + import threading as _t + _t.Thread(target=_warmup_llava, daemon=True, name="llava-warmup").start() # Global voice references @@ -400,20 +405,51 @@ def _handle_llava(cmd): # HELPERS # ══════════════════════════════════════════════════════════════════════════════ +def _strip_ansi(s: str) -> str: + """Strip ANSI colour escapes for correct visual width calculations.""" + import re as _re + return _re.sub(r"\x1b\[[0-9;]*m", "", s) + + def _warmup_llava(): + """ + Runs in a daemon thread — primes the Ollama model into VRAM so the first + user command doesn't pay the ~15-20 s cold-load. Tolerates client-timeout + on the first attempt (common on the 16 GB Jetson when the compute graph + needs a second pass to fit) by doing one silent retry. + """ import ollama - print(" Warming up LLaVA... (loading into GPU)") - try: - img = get_frame() - ollama.chat( - model=OLLAMA_MODEL, - messages=[{"role": "user", "content": "hi", - "images": [img] if img else []}], - options={"temperature": 0.0, "num_predict": _cfg["warmup_num_predict"]} - ) - print(" LLaVA warm - first command will be fast\n") - except Exception as e: - print(f" Warmup failed ({e}) - first command may be slow\n") + # Quiet heartbeat in the terminal so the operator knows something is happening + # without flooding stdout once the banner prints. + print(" [Warmup] Loading Qwen2.5-VL into GPU...") + base_options = { + "temperature": 0.0, + "num_predict": _cfg["warmup_num_predict"], + # Honor the same compute-graph caps everything else uses, otherwise + # Ollama reverts to batch=512/ctx=4096 for this call and OOMs. + "num_batch": _cfg.get("num_batch", 128), + "num_ctx": _cfg.get("num_ctx", 2048), + } + for attempt in (1, 2): + try: + img = get_frame() + ollama.chat( + model=OLLAMA_MODEL, + messages=[{"role": "user", "content": "hi", + "images": [img] if img else []}], + options=base_options, + ) + print(" [Warmup] Ready — first command will be fast") + return + except Exception as e: + if attempt == 1: + # Cold-load frequently times out on attempt #1 while Ollama + # is still allocating the compute graph. The model stays + # loaded though, so attempt #2 almost always succeeds. + print(f" [Warmup] first attempt timed out, retrying...") + continue + print(f" [Warmup] failed after retry ({e}) — first real command " + f"will pay the cold-load (~15-20 s)") def get_brain_status() -> dict: @@ -480,18 +516,39 @@ def run_terminal(): """Run brain with terminal input loop.""" init_brain() + # ─── DASHBOARD ─────────────────────────────────────────────────────── + # Separate the boot log from the interactive dashboard with a clear + # visual break. Print at the end of init so it's always the last thing + # on screen before the first `Command:` prompt — even if the operator + # scrolled through a wall of subsystem init messages. status = get_brain_status() - print() - print("=" * 54) - print(" SANAD AI BRAIN — READY") - print("=" * 54) - for k, v in status.items(): - print(f" {k:<12}: {v}") - print("-" * 54) - print(" Type a command, or say \"Sanad, \".") - print(" Shortcuts: help | example | yolo | patrol | auto on/off | q") - print("=" * 54) - print() + + def _fmt(v): + if v is True: return "\033[92mON \033[0m" # green + if v is False: return "\033[91mOFF\033[0m" # red + return str(v) + + W = 58 + print("\n\n" + "╔" + "═" * (W-2) + "╗") + print("║" + " SANAD — AI BRAIN READY".ljust(W-2) + "║") + print("╠" + "═" * (W-2) + "╣") + left = [("model", status["model"]), + ("voice", _fmt(status["voice"])), + ("camera", status["camera"])] + right = [("yolo", _fmt(status["yolo"])), + ("lidar", _fmt(status["lidar"])), + ("memory", _fmt(status["memory"])), + ("odometry", _fmt(status["odometry"]))] + for i in range(max(len(left), len(right))): + l = f" {left[i][0]:<8}: {left[i][1]}" if i < len(left) else "" + r = f" {right[i][0]:<8}: {right[i][1]}" if i < len(right) else "" + # account for ANSI escapes when padding the left column + pad = 28 + (len(l) - len(_strip_ansi(l))) + print("║" + l.ljust(pad) + r.ljust(W - 2 - pad) + "║") + print("╠" + "═" * (W-2) + "╣") + print("║" + " Type a command, or say \"Sanad, \".".ljust(W-2) + "║") + print("║" + " help · example · yolo · patrol · auto on/off · q".ljust(W-2) + "║") + print("╚" + "═" * (W-2) + "╝\n") try: while True: diff --git a/Lidar/SLAM_worker.py b/Lidar/SLAM_worker.py index 0b82394..4045fc3 100644 --- a/Lidar/SLAM_worker.py +++ b/Lidar/SLAM_worker.py @@ -595,7 +595,16 @@ def slam_worker( def init_slam_stack(): nonlocal slam, filt, stable - cfg = load_config(config_file=None, max_range=float(eng_cfg.max_range)) + # kiss_icp >= 1.2.x requires a `deskew` positional arg; pre-1.2 doesn't. + # Try the new signature first, fall back to the old one. + try: + cfg = load_config( + config_file=None, + deskew=bool(getattr(eng_cfg, "deskew", True)), + max_range=float(eng_cfg.max_range), + ) + except TypeError: + cfg = load_config(config_file=None, max_range=float(eng_cfg.max_range)) cfg.mapping.voxel_size = float(eng_cfg.slam_voxel_size) slam = KissICP(cfg)