""" marcus_brain.py — Marcus AI Brain Orchestrator ================================================ Shared brain logic for both terminal (run_marcus.py) and server (marcus_server.py). Usage: Terminal: python3 run_marcus.py Server: python3 -m Server.marcus_server (imports init_brain + process_command) """ import json import os import re import time import sys PROJECT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) if PROJECT_DIR not in sys.path: sys.path.insert(0, PROJECT_DIR) from API.zmq_api import init_zmq, send_vel, gradual_stop, send_cmd from API.camera_api import start_camera, stop_camera, get_frame from API.yolo_api import ( init_yolo, yolo_summary, yolo_fps, yolo_all_classes, yolo_closest, yolo_sees, ) from API.odometry_api import init_odometry from API.memory_api import init_memory, log_cmd, log_detection from API.llava_api import ( OLLAMA_MODEL, ask, ask_talk, call_llava, parse_json, add_to_history, ) from API.imgsearch_api import init_imgsearch, get_searcher from Core.config_loader import load_config from Core.logger import log as _log from Brain.command_parser import try_local_command, init_autonomous from Brain.executor import execute, execute_action from Navigation.goal_nav import navigate_to_goal from Navigation.patrol import patrol from Autonomous.marcus_autonomous import AutonomousMode _cfg = load_config("Brain") _TALK_PATTERNS = [ # Questions r"^(?:what|who|where|when|how|why|is|are|do|does|can|tell|describe|explain|show|analyze)\s+", # Identity / facts told to the robot r"^(?:my name is|i am|call me|that is|that person|note that|remember that)\s+", # Acknowledgements r"^(?:ok|okay|yes|no|good|nice|great|thanks|thank you|got it|understood|correct)\s*[!.]*$", ] _NAT_GOAL_RE = re.compile( r'^(?:keep\s+(?:turn|rotat|spin)\w*\s+\w+\s+until\s+(?:you\s+)?(?:see|find|spot)\s+.+' r'|stop\s+when\s+(?:you\s+)?(?:see|find|spot)\s+.+' r'|find\s+(?:a\s+|the\s+|me\s+a\s+)?\w.+' r'|look\s+for\s+(?:a\s+|the\s+)?\w.+' r'|search\s+for\s+(?:a\s+|the\s+)?\w.+)$', re.IGNORECASE ) # ══════════════════════════════════════════════════════════════════════════════ # INIT — called once by both run_marcus.py and marcus_server.py # ══════════════════════════════════════════════════════════════════════════════ def init_brain(): """Initialize all subsystems. Call once at startup from the parent process. Optional subsystems (lidar / voice / imgsearch / autonomous) are gated on `config_Brain.json::subsystems.`. Disabling the ones you don't need brings Marcus's boot time down from ~18 s to ~5-7 s. """ subsys = _cfg.get("subsystems", {}) or {} # Bind the ZMQ PUB socket before anything tries to publish on it. # This is now explicit (previously it happened as an import side effect, # which crashed every multiprocessing child that re-imported zmq_api). init_zmq() raw_frame, raw_lock = start_camera() init_yolo(raw_frame, raw_lock) from API.zmq_api import get_socket init_odometry(zmq_sock=get_socket()) init_memory() # LiDAR — optional if subsys.get("lidar", True): try: from API.lidar_api import init_lidar init_lidar() except Exception as e: print(f" [LiDAR] Init failed: {e} — continuing without LiDAR") else: print(" [LiDAR] disabled by config") # Image search — optional if subsys.get("imgsearch", False): init_imgsearch( get_frame_fn=get_frame, send_vel_fn=send_vel, gradual_stop_fn=gradual_stop, llava_fn=call_llava, yolo_sees_fn=yolo_sees, model=OLLAMA_MODEL, ) else: print(" [ImgSearch] disabled by config") # Autonomous exploration mode — optional if subsys.get("autonomous", True): from API.memory_api import mem as _mem_ref from API.llava_api import PATROL_PROMPT auto = AutonomousMode( get_frame_fn=get_frame, send_vel_fn=send_vel, gradual_stop_fn=gradual_stop, yolo_sees_fn=yolo_sees, yolo_summary_fn=yolo_summary, yolo_all_classes_fn=yolo_all_classes, yolo_closest_fn=yolo_closest, odom_fn=lambda: {"x": 0, "y": 0, "heading": 0}, call_llava_fn=call_llava, patrol_prompt=PATROL_PROMPT, mem=_mem_ref, ) from API.odometry_api import odom as _odom_ref, ODOM_AVAILABLE if _odom_ref and ODOM_AVAILABLE: auto._odom_pos = lambda: { "x": _odom_ref._x, "y": _odom_ref._y, "heading": _odom_ref._heading } init_autonomous(auto) else: print(" [Autonomous] disabled by config") send_cmd("start") time.sleep(0.5) send_cmd("walk") time.sleep(0.5) # Voice module — optional if subsys.get("voice", True): _init_voice() else: print(" [Voice] disabled by config") _log("Brain initialized", "info", "brain") _warmup_llava() # Global voice references _audio_api = None _voice_module = None def _init_voice(): """ Initialize the voice subsystem: G1 built-in mic + Whisper STT + G1 built-in TtsMaker for replies. Every transcribed command flows through process_command(), and the resulting `speak` string is sent to the G1 speaker. """ global _audio_api, _voice_module try: from API.audio_api import AudioAPI from Voice.marcus_voice import VoiceModule _audio_api = AudioAPI() def _on_command(text, lang): text = (text or "").strip() if not text: return print(f" [Voice] {text}") try: result = process_command(text) except Exception as e: print(f" [Brain] Error processing voice command: {e}") return if isinstance(result, dict): sp = (result.get("speak") or "").strip() if sp and _audio_api: _audio_api.speak(sp) _voice_module = VoiceModule(_audio_api, on_command=_on_command) _voice_module.start() print(" [Voice] Always listening (Whisper + G1 mic + TtsMaker)") except Exception as e: print(f" [Voice] Init failed: {e} — continuing without voice") _audio_api = None _voice_module = None # ══════════════════════════════════════════════════════════════════════════════ # PROCESS COMMAND — shared by terminal loop and WebSocket server # ══════════════════════════════════════════════════════════════════════════════ def process_command(cmd: str) -> dict: """ Process a single command through the full brain pipeline. Returns: {"type": str, "speak": str, "action": str, "elapsed": float} Used by both run_marcus.py (terminal) and marcus_server.py (WebSocket). """ cmd = cmd.strip() if not cmd: return {"type": "empty", "speak": "", "action": "NONE", "elapsed": 0} t0 = time.time() # ── YOLO status ────────────────────────────────────────────────────── if any(w in cmd.lower() for w in ("yolo", "what does yolo", "vision", "using yolo")): from API.yolo_api import YOLO_AVAILABLE as _ya status = "active" if _ya else "not loaded" speak = f"YOLO: {status} | {yolo_summary()} | {yolo_fps():.1f}fps" print(f" {speak}") log_cmd(cmd, speak) return {"type": "status", "speak": speak, "action": "YOLO", "elapsed": 0} # ── Image search ───────────────────────────────────────────────────── if cmd.lower().startswith("search/"): speak = _handle_search(cmd) return {"type": "search", "speak": speak, "action": "SEARCH", "elapsed": time.time() - t0} # ── Auto-detect natural language goals ─────────────────────────────── if _NAT_GOAL_RE.match(cmd) and not cmd.lower().startswith("goal/"): print(f" [Goal] Auto-detected: '{cmd}'") navigate_to_goal(cmd.strip()) elapsed = time.time() - t0 log_cmd(cmd, f"Goal navigation: {cmd}", elapsed) return {"type": "goal", "speak": f"Goal navigation: {cmd}", "action": "GOAL", "elapsed": elapsed} # ── Explicit goal/ ─────────────────────────────────────────────────── if cmd.lower().startswith("goal/"): goal = cmd[5:].strip() if goal: navigate_to_goal(goal) elapsed = time.time() - t0 log_cmd(cmd, f"Goal navigation: {goal}", elapsed) return {"type": "goal", "speak": f"Goal navigation: {goal}", "action": "GOAL", "elapsed": elapsed} return {"type": "error", "speak": "Usage: goal/ stop when you see a person", "action": "NONE", "elapsed": 0} # ── Autonomous patrol ──────────────────────────────────────────────── if cmd.lower().startswith("patrol"): mins = 5.0 if " " in cmd: try: mins = float(cmd.split()[-1]) except ValueError: pass patrol(duration_minutes=mins) elapsed = time.time() - t0 log_cmd(cmd, f"Patrol {mins}min", elapsed) return {"type": "patrol", "speak": f"Patrol {mins}min complete", "action": "PATROL", "elapsed": elapsed} # ── Local commands (place / odom / memory / help) ──────────────────── if try_local_command(cmd): log_cmd(cmd, "local command") return {"type": "local", "speak": "Done", "action": "LOCAL", "elapsed": time.time() - t0} # ── Talk-only (questions / acknowledgements) ───────────────────────── if any(re.match(p, cmd, re.IGNORECASE) for p in _TALK_PATTERNS): speak = _handle_talk(cmd) return {"type": "talk", "speak": speak, "action": "TALK", "elapsed": time.time() - t0} # ── Greeting ───────────────────────────────────────────────────────── if re.match(r"^(?:hi+|hey+|hello+|sup|yo+|greetings|good (?:morning|afternoon|evening))\s*[!.]*$", cmd, re.IGNORECASE): response = "Hello! I am Sanad. How can I help you?" print(f"Sanad: {response}") add_to_history(cmd, response) log_cmd(cmd, response) return {"type": "greeting", "speak": response, "action": "GREETING", "elapsed": 0} # ── "Come to me" shortcut ──────────────────────────────────────────── if re.match(r"^(?:come(?:\s+back)?(?:\s+to\s+me)?|come\s+here|get\s+closer|approach|move\s+closer)\s*[!.]*$", cmd, re.IGNORECASE): execute_action("forward", 2.0) resp = "Coming to you" print(f"Sanad: {resp}") add_to_history(cmd, resp) log_cmd(cmd, resp) return {"type": "move", "speak": resp, "action": "FORWARD 2.0s", "elapsed": 2.0} # ── Multi-step compound ────────────────────────────────────────────── _multi = re.match( r"turn\s+(right|left)\s*(\d+)?\s*(?:deg(?:rees?)?)?\s+(?:and\s+then|then|and)?\s+" r"(?:move\s+|go\s+|walk\s+|step\s+)?(back(?:ward)?|forward)\s*(\d+)?\s*(?:steps?|meter)?", cmd, re.IGNORECASE) if _multi: turn_dir = _multi.group(1).lower() turn_deg = float(_multi.group(2) or 90) walk_dir = "backward" if "back" in _multi.group(3).lower() else "forward" walk_dur = float(_multi.group(4) or 2) execute_action("right" if turn_dir == "right" else "left", turn_deg / 18.0) execute_action(walk_dir, walk_dur) resp = f"Turned {turn_dir} {int(turn_deg)} degrees then moved {walk_dir}" print(f"Sanad: {resp}") add_to_history(cmd, resp) log_cmd(cmd, resp) return {"type": "move", "speak": resp, "action": f"MULTI {turn_dir}+{walk_dir}", "elapsed": time.time() - t0} # ── Standard LLaVA command ─────────────────────────────────────────── return _handle_llava(cmd) # ══════════════════════════════════════════════════════════════════════════════ # HANDLERS (return speak text) # ══════════════════════════════════════════════════════════════════════════════ def _handle_search(cmd): args = cmd[7:].strip() if not args: print(" Usage: search/ /path/to/photo.jpg [hint]") return "Usage: search/ " searcher = get_searcher() if not searcher: print(" [Search] Image search not available") return "Image search not available" parts = args.split(None, 1) if parts and os.path.exists(parts[0]): img_path = parts[0] hint = parts[1].strip() if len(parts) > 1 else "" yolo_pre = "person" if not hint or "person" in hint.lower() else None log_cmd(cmd, f"Image search: {img_path}") result = searcher.search_from_file(img_path, hint=hint, yolo_prefilter=yolo_pre) return result.get("description", "Search complete") hint = args yolo_pre = "person" if any(w in hint.lower() for w in ("person", "guy", "man", "woman")) else None log_cmd(cmd, f"Image search: {hint}") result = searcher.search(ref_img_b64=None, hint=hint, yolo_prefilter=yolo_pre) return result.get("description", "Search complete") def _handle_talk(cmd): print("Thinking...") try: img = get_frame() facts_str = "" try: from API.llava_api import _facts if _facts: facts_str = "\nKnown facts: " + "; ".join(_facts) + "." except ImportError: pass d = ask_talk(cmd, img, facts=facts_str) sp = d.get("speak", "") print(f"Sanad: {sp}") log_cmd(cmd, sp) return sp except Exception as ex: print(f" Error: {ex}") return f"Error: {ex}" def _handle_llava(cmd): print("Thinking...") t0 = time.time() img = get_frame() # Poll up to 500 ms in 50 ms slices instead of blocking a full second. # Returns the moment a frame is available — most drops recover in <100 ms. if img is None: print(" Waiting for camera...") for _ in range(10): time.sleep(0.05) img = get_frame() if img is not None: break if img is None: print(" Camera not ready — command cancelled") log_cmd(cmd, "camera not ready") return {"type": "error", "speak": "Camera not ready", "action": "NONE", "elapsed": 0} d = ask(cmd, img) dur = time.time() - t0 print(f" ({dur:.1f}s) -> {json.dumps(d)}") resp = execute(d) log_cmd(cmd, resp or "", dur) from API.yolo_api import YOLO_AVAILABLE as _ya if _ya: for cls in yolo_all_classes(): det = yolo_closest(cls) if det: log_detection(cls, det.position, det.distance_estimate) action_str = d.get("actions", [{}])[0].get("move", "NONE") if d.get("actions") else "NONE" return {"type": "decision", "speak": resp or "", "action": action_str.upper(), "elapsed": dur, "raw": d} # ══════════════════════════════════════════════════════════════════════════════ # HELPERS # ══════════════════════════════════════════════════════════════════════════════ def _warmup_llava(): import ollama print(" Warming up LLaVA... (loading into GPU)") try: img = get_frame() ollama.chat( model=OLLAMA_MODEL, messages=[{"role": "user", "content": "hi", "images": [img] if img else []}], options={"temperature": 0.0, "num_predict": _cfg["warmup_num_predict"]} ) print(" LLaVA warm - first command will be fast\n") except Exception as e: print(f" Warmup failed ({e}) - first command may be slow\n") def get_brain_status() -> dict: """Return current brain status for server status message.""" from API.yolo_api import YOLO_AVAILABLE as _ya from API.odometry_api import ODOM_AVAILABLE as _oa from API.memory_api import MEMORY_AVAILABLE as _ma from API.camera_api import CAM_WIDTH, CAM_HEIGHT, CAM_FPS try: from API.lidar_api import LIDAR_AVAILABLE as _la, get_loc_state lidar_state = get_loc_state() if _la else "off" except ImportError: _la = False lidar_state = "off" return { "model": OLLAMA_MODEL, "yolo": _ya, "odometry": _oa, "memory": _ma, "lidar": _la, "lidar_state": lidar_state, "voice": _voice_module is not None and _voice_module.is_running, "camera": f"{CAM_WIDTH}x{CAM_HEIGHT}@{CAM_FPS}", } def shutdown(): """Clean shutdown of all subsystems.""" print("\nShutting down Marcus...") # Stop voice module if _voice_module and _voice_module.is_running: _voice_module.stop() # Stop autonomous mode if running from Brain.command_parser import _auto if _auto and _auto.is_enabled(): _auto.disable() stop_camera() gradual_stop() send_cmd("stop") from API.odometry_api import odom as _o if _o: _o.stop() from API.memory_api import mem as _m if _m: _m.end_session() try: from API.lidar_api import stop_lidar stop_lidar() except Exception: pass _log("Marcus stopped", "info", "brain") print("Marcus stopped.") # ══════════════════════════════════════════════════════════════════════════════ # TERMINAL MODE — used by run_marcus.py # ══════════════════════════════════════════════════════════════════════════════ def run_terminal(): """Run brain with terminal input loop.""" init_brain() status = get_brain_status() print() print("=" * 48) print(" SANAD AI BRAIN — READY") print("=" * 48) for k, v in status.items(): print(f" {k:<10}: {v}") print("=" * 48) print(" help | example | yolo | patrol | auto on/off | q") print() try: while True: try: cmd = input("Command: ").strip() except (EOFError, KeyboardInterrupt): break if not cmd: continue if cmd.lower() in ("q", "quit", "exit"): break if cmd.lower() in ("mute/", "unmute/"): # Route through the audio API so the action respects whichever # mic backend is active (BuiltinMic flushes the UDP buffer; # the legacy pactl path mutes PulseAudio source 3). if _audio_api is None: print(" Voice is not initialized") continue if cmd.lower() == "mute/": _audio_api._mute_mic() print(" Mic muted") else: _audio_api._unmute_mic() print(" Mic unmuted") continue result = process_command(cmd) sp = result.get("speak", "") if isinstance(result, dict) else "" if sp and _audio_api: _audio_api.speak(sp) except KeyboardInterrupt: pass shutdown() if __name__ == "__main__": run_terminal()