From ac9271c62b23c45ee830b90ac4c9e7c9cfbf9780 Mon Sep 17 00:00:00 2001 From: kassam Date: Wed, 22 Apr 2026 10:57:23 +0400 Subject: [PATCH] Update 2026-04-22 10:57:22 --- API/audio_api.py | 48 +++++-- API/camera_api.py | 21 +-- Autonomous/marcus_autonomous.py | 2 +- Brain/executor.py | 2 +- Brain/marcus_brain.py | 23 ++-- Brain/marcus_memory.py | 4 +- Bridge/ros2_zmq_bridge.py | 2 +- Client/marcus_cli.py | 2 +- Config/config_Brain.json | 5 +- Config/config_ImageSearch.json | 2 +- Config/config_Memory.json | 8 -- Config/marcus_prompts.yaml | 196 +++----------------------- Core/log_backend.py | 34 ++++- Doc/MARCUS_API.md | 16 ++- Doc/architecture.md | 23 +++- Doc/controlling.md | 30 ++++ Doc/environment.md | 7 + Doc/functions.md | 175 +++++++++++++++++++++++ Doc/pipeline.md | 7 +- Legacy/marcus_nav.py | 93 ------------- Models/Modelfile | 16 ++- Navigation/marcus_odometry.py | 37 +++-- README.md | 237 ++++++++++++++++++++++++++++++++ Vision/marcus_imgsearch.py | 31 +++-- Vision/marcus_yolo.py | 11 +- Voice/builtin_mic.py | 23 +++- Voice/marcus_voice.py | 11 +- 27 files changed, 683 insertions(+), 383 deletions(-) delete mode 100644 Config/config_Memory.json create mode 100644 Doc/functions.md delete mode 100644 Legacy/marcus_nav.py create mode 100644 README.md diff --git a/API/audio_api.py b/API/audio_api.py index 59e88b8..774c844 100644 --- a/API/audio_api.py +++ b/API/audio_api.py @@ -32,6 +32,8 @@ import sys import threading import time import wave +from logging.handlers import RotatingFileHandler + import numpy as np # ─── PATH + CONFIG ─────────────────────────────────────── @@ -45,15 +47,18 @@ from Core.config_loader import load_config LOG_DIR = os.path.join(PROJECT_ROOT, "logs") os.makedirs(LOG_DIR, exist_ok=True) -# Note: logging.basicConfig() only takes effect on the first call per process. -# If the voice module already configured logging (common path via run_marcus.py), -# this call is a no-op. When audio_api is used standalone, it wires logs to -# logs/voice.log + stderr. +# logging.basicConfig is idempotent per process: if marcus_voice configured +# the root logger first, this call is a no-op and both modules share the same +# RotatingFileHandler (stdlib FileHandlers hold an internal lock, so concurrent +# writes to voice.log are safe). Rotation caps voice.log at 5 MB × 3 backups. logging.basicConfig( level=logging.INFO, format="%(asctime)s [%(name)s] %(levelname)s: %(message)s", handlers=[ - logging.FileHandler(os.path.join(LOG_DIR, "voice.log")), + RotatingFileHandler( + os.path.join(LOG_DIR, "voice.log"), + maxBytes=5_000_000, backupCount=3, encoding="utf-8", + ), logging.StreamHandler(), ], ) @@ -305,14 +310,31 @@ class AudioAPI: subprocess.run(["pactl", "set-source-volume", source, "100%"], capture_output=True) log.info("Recording %.1fs from mic source %s (parec)", seconds, source) - proc = subprocess.Popen( - ["parec", "-d", source, - f"--format={fmt}", f"--rate={rate}", f"--channels={channels}", "--raw"], - stdout=subprocess.PIPE, - ) - time.sleep(seconds) - proc.terminate() - raw = proc.stdout.read() + proc = None + raw = b"" + try: + proc = subprocess.Popen( + ["parec", "-d", source, + f"--format={fmt}", f"--rate={rate}", f"--channels={channels}", "--raw"], + stdout=subprocess.PIPE, + ) + time.sleep(seconds) + finally: + # Always kill parec — an exception in time.sleep (Ctrl-C / signal) + # would otherwise leave an orphaned recorder process running. + if proc is not None: + try: + proc.terminate() + raw = proc.stdout.read() + proc.wait(timeout=1.0) + except Exception as e: + log.warning("parec cleanup error: %s", e) + # Last-resort SIGKILL — suppress only OSError (process + # already exited) so we don't mask other bugs. + try: + proc.kill() + except OSError: + pass audio = np.frombuffer(raw, dtype=np.int16) log.info("Recorded: %d samples, std=%.0f", len(audio), audio.std()) diff --git a/API/camera_api.py b/API/camera_api.py index 34d7fed..15644a8 100644 --- a/API/camera_api.py +++ b/API/camera_api.py @@ -12,10 +12,13 @@ from Core.logger import log _cfg = load_config("Camera") -CAM_WIDTH = _cfg["width"] -CAM_HEIGHT = _cfg["height"] -CAM_FPS = _cfg["fps"] -CAM_QUALITY = _cfg["jpeg_quality"] +CAM_WIDTH = int(_cfg.get("width", 424)) +CAM_HEIGHT = int(_cfg.get("height", 240)) +CAM_FPS = int(_cfg.get("fps", 15)) +CAM_QUALITY = int(_cfg.get("jpeg_quality", 70)) +CAM_TIMEOUT_MS = int(_cfg.get("timeout_ms", 5000)) # pipeline.wait_for_frames timeout +CAM_STALE_THRESHOLD = float(_cfg.get("stale_threshold_s", 10.0)) # trip reconnect after this long without a frame +CAM_RECONNECT_DELAY = float(_cfg.get("reconnect_delay_s", 2.0)) # initial backoff; doubles up to 10 s # Shared state latest_frame_b64 = [None] @@ -36,7 +39,7 @@ def camera_loop(): """Capture RealSense frames continuously with auto-reconnect.""" import pyrealsense2 as rs - backoff = 2.0 + backoff = CAM_RECONNECT_DELAY while camera_alive[0]: pipeline = None try: @@ -44,14 +47,14 @@ def camera_loop(): cfg = rs.config() cfg.enable_stream(rs.stream.color, CAM_WIDTH, CAM_HEIGHT, rs.format.bgr8, CAM_FPS) pipeline.start(cfg) - backoff = 2.0 + backoff = CAM_RECONNECT_DELAY _cam_connected[0] = True print("Camera connected") log(f"Camera connected {CAM_WIDTH}x{CAM_HEIGHT}@{CAM_FPS}", "info", "camera") while camera_alive[0]: try: - frames = pipeline.wait_for_frames(timeout_ms=5000) + frames = pipeline.wait_for_frames(timeout_ms=CAM_TIMEOUT_MS) color_frame = frames.get_color_frame() if not color_frame: continue @@ -72,8 +75,8 @@ def camera_loop(): _cam_last_frame_time[0] = time.time() except Exception: - if time.time() - _cam_last_frame_time[0] > 10.0: - print(" [Camera] No frame for 10s — reconnecting...") + if time.time() - _cam_last_frame_time[0] > CAM_STALE_THRESHOLD: + print(f" [Camera] No frame for {CAM_STALE_THRESHOLD:.0f}s — reconnecting...") break except Exception as e: diff --git a/Autonomous/marcus_autonomous.py b/Autonomous/marcus_autonomous.py index 9640acb..3b6a6fd 100644 --- a/Autonomous/marcus_autonomous.py +++ b/Autonomous/marcus_autonomous.py @@ -25,7 +25,7 @@ How it works Files saved ----------- - ~/Models_marcus/map/map_001_2026-04-05/ + Data/Brain/maps/map_001_YYYY-MM-DD/ observations.json — [{step, time, x, y, area_type, objects, observation}] path.json — [{x, y, heading, t}] — full path walked summary.txt — auto-generated LLaVA summary diff --git a/Brain/executor.py b/Brain/executor.py index b2ab292..f7706b9 100644 --- a/Brain/executor.py +++ b/Brain/executor.py @@ -81,7 +81,7 @@ def execute(d: dict): actions = merge_actions(d.get("actions", [])) arm_cmd = d.get("arm", None) - print(f"Marcus: {speak}") + print(f"Sanad: {speak}") if not actions: gradual_stop() diff --git a/Brain/marcus_brain.py b/Brain/marcus_brain.py index 0af1e11..c5a6465 100644 --- a/Brain/marcus_brain.py +++ b/Brain/marcus_brain.py @@ -41,18 +41,12 @@ from Autonomous.marcus_autonomous import AutonomousMode _cfg = load_config("Brain") _TALK_PATTERNS = [ - # English questions + # Questions r"^(?:what|who|where|when|how|why|is|are|do|does|can|tell|describe|explain|show|analyze)\s+", - # English identity/facts + # Identity / facts told to the robot r"^(?:my name is|i am|call me|that is|that person|note that|remember that)\s+", - # English acknowledgements + # Acknowledgements r"^(?:ok|okay|yes|no|good|nice|great|thanks|thank you|got it|understood|correct)\s*[!.]*$", - # Arabic questions — ماذا ترى / كيف حالك / من أنت / ما اسمك / صف / هل - r"^(?:ماذا|ما\s|كيف|من\s|أين|لماذا|هل|صف|اشرح|وصف|كم)\s*", - # Arabic identity/facts — اسمي / أنا / تذكر - r"^(?:اسمي|أنا\s|تذكر\s|سجل\s|لاحظ\s)", - # Arabic acknowledgements — حسنا / شكرا / ممتاز / صح / مفهوم - r"^(?:حسنا|شكرا|ممتاز|صح|مفهوم|تمام|أحسنت|جيد|نعم|لا)\s*[!.]*$", ] _NAT_GOAL_RE = re.compile( @@ -271,9 +265,10 @@ def process_command(cmd: str) -> dict: return {"type": "talk", "speak": speak, "action": "TALK", "elapsed": time.time() - t0} # ── Greeting ───────────────────────────────────────────────────────── - if re.match(r"^(?:hi+|hey+|hello+|hola|salam|marhaba|sup|yo+|ahlan|السلام عليكم|مرحبا|أهلا|هلا|يا هلا)\s*[!.]*$", cmd, re.IGNORECASE): + if re.match(r"^(?:hi+|hey+|hello+|sup|yo+|greetings|good (?:morning|afternoon|evening))\s*[!.]*$", + cmd, re.IGNORECASE): response = "Hello! I am Sanad. How can I help you?" - print(f"Marcus: {response}") + print(f"Sanad: {response}") add_to_history(cmd, response) log_cmd(cmd, response) return {"type": "greeting", "speak": response, "action": "GREETING", "elapsed": 0} @@ -282,7 +277,7 @@ def process_command(cmd: str) -> dict: if re.match(r"^(?:come(?:\s+back)?(?:\s+to\s+me)?|come\s+here|get\s+closer|approach|move\s+closer)\s*[!.]*$", cmd, re.IGNORECASE): execute_action("forward", 2.0) resp = "Coming to you" - print(f"Marcus: {resp}") + print(f"Sanad: {resp}") add_to_history(cmd, resp) log_cmd(cmd, resp) return {"type": "move", "speak": resp, "action": "FORWARD 2.0s", "elapsed": 2.0} @@ -300,7 +295,7 @@ def process_command(cmd: str) -> dict: execute_action("right" if turn_dir == "right" else "left", turn_deg / 18.0) execute_action(walk_dir, walk_dur) resp = f"Turned {turn_dir} {int(turn_deg)} degrees then moved {walk_dir}" - print(f"Marcus: {resp}") + print(f"Sanad: {resp}") add_to_history(cmd, resp) log_cmd(cmd, resp) return {"type": "move", "speak": resp, "action": f"MULTI {turn_dir}+{walk_dir}", "elapsed": time.time() - t0} @@ -350,7 +345,7 @@ def _handle_talk(cmd): pass d = ask_talk(cmd, img, facts=facts_str) sp = d.get("speak", "") - print(f"Marcus: {sp}") + print(f"Sanad: {sp}") log_cmd(cmd, sp) return sp except Exception as ex: diff --git a/Brain/marcus_memory.py b/Brain/marcus_memory.py index 00932c2..7a0d771 100644 --- a/Brain/marcus_memory.py +++ b/Brain/marcus_memory.py @@ -10,8 +10,8 @@ Purpose : Persistent memory across sessions. Folder structure ---------------- - ~/Models_marcus/places.json ← persistent named places (all sessions) - ~/Models_marcus/sessions/ + Data/History/Places/places.json ← persistent named places (all sessions) + Data/Brain/Sessions/ session_001_2026-04-05/ commands.json ← [{time, cmd, response, duration_s}] detections.json ← [{time, class, position, distance, x, y}] diff --git a/Bridge/ros2_zmq_bridge.py b/Bridge/ros2_zmq_bridge.py index 4caf36f..45721a0 100644 --- a/Bridge/ros2_zmq_bridge.py +++ b/Bridge/ros2_zmq_bridge.py @@ -4,7 +4,7 @@ Subscribes to /cmd_vel and holosoma/other_input (Python 3.8 + ROS2 Foxy) Forwards to Holosoma via ZMQ PUB socket (Python 3.10 hsinference) Run: source /opt/ros/foxy/setup.bash - python3.8 ~/Models_marcus/ros2_zmq_bridge.py + python3.8 ~/Marcus/Bridge/ros2_zmq_bridge.py """ import json, time import rclpy diff --git a/Client/marcus_cli.py b/Client/marcus_cli.py index 90b107b..82cc68c 100644 --- a/Client/marcus_cli.py +++ b/Client/marcus_cli.py @@ -190,7 +190,7 @@ def _handle_message(data): print(f" [{ts}] {color}{C.BOLD}{action}{C.RESET} {C.GRAY}({elapsed}s){C.RESET}") if speak: - print(f" {C.CYAN}Marcus: {speak}{C.RESET}") + print(f" {C.CYAN}Sanad: {speak}{C.RESET}") elif t == "camera_config": p = data.get("profile", "?") diff --git a/Config/config_Brain.json b/Config/config_Brain.json index 62b2cb5..de9ed36 100644 --- a/Config/config_Brain.json +++ b/Config/config_Brain.json @@ -14,8 +14,5 @@ "num_predict_patrol": 100, "num_predict_talk": 80, "num_predict_verify": 10, - "warmup_num_predict": 5, - "main_prompt": "You are Sanad, a humanoid robot. Look at the image and follow the command.\n{facts}\n\nCommand: \"{command}\"\n\nReply with ONLY this JSON — no markdown, no explanation:\n{{\"actions\":[{{\"move\":\"forward|backward|left|right|stop\",\"duration\":2.0}}],\"arm\":null,\"speak\":\"one sentence\",\"abort\":null}}\n\nRULES:\n- actions is a list of movement steps, max duration 5.0s each\n- move: \"forward\" \"backward\" \"left\" \"right\" \"stop\"\n- arm: \"wave\" \"raise_right\" \"raise_left\" \"clap\" \"high_five\" \"hug\" \"heart\" \"shake_hand\" \"face_wave\" or null\n- arm is NEVER a move value\n- questions/descriptions: actions=[]\n- obstacle < 0.5m: abort = \"obstacle detected\"\n- \"90 degrees\" = 5.0s | \"45 degrees\" = 2.5s | \"1 step\" = 1.0s\n\nEXAMPLES:\n\"turn right\" -> {{\"actions\":[{{\"move\":\"right\",\"duration\":2.0}}],\"arm\":null,\"speak\":\"Turning right\",\"abort\":null}}\n\"turn right 90 degrees\" -> {{\"actions\":[{{\"move\":\"right\",\"duration\":5.0}}],\"arm\":null,\"speak\":\"Turning 90 degrees\",\"abort\":null}}\n\"move back then left\" -> {{\"actions\":[{{\"move\":\"backward\",\"duration\":2.0}},{{\"move\":\"left\",\"duration\":2.0}}],\"arm\":null,\"speak\":\"Moving back then left\",\"abort\":null}}\n\"wave\" -> {{\"actions\":[],\"arm\":\"wave\",\"speak\":\"Waving\",\"abort\":null}}\n\"raise your right arm\" -> {{\"actions\":[],\"arm\":\"raise_right\",\"speak\":\"Raising right arm\",\"abort\":null}}\n\"walk forward and wave\" -> {{\"actions\":[{{\"move\":\"forward\",\"duration\":2.0}}],\"arm\":\"wave\",\"speak\":\"Walking and waving\",\"abort\":null}}\n\"what do you see\" -> {{\"actions\":[],\"arm\":null,\"speak\":\"I see...\",\"abort\":null}}\n\"stop\" -> {{\"actions\":[{{\"move\":\"stop\",\"duration\":0}}],\"arm\":null,\"speak\":\"Stopping\",\"abort\":null}}\n\"come to me\" -> {{\"actions\":[{{\"move\":\"forward\",\"duration\":2.0}}],\"arm\":null,\"speak\":\"Coming to you\",\"abort\":null}}\n\"come back\" -> {{\"actions\":[{{\"move\":\"forward\",\"duration\":2.0}}],\"arm\":null,\"speak\":\"Coming back\",\"abort\":null}}\n\"come here\" -> {{\"actions\":[{{\"move\":\"forward\",\"duration\":2.0}}],\"arm\":null,\"speak\":\"Coming\",\"abort\":null}}\n\"get closer\" -> {{\"actions\":[{{\"move\":\"forward\",\"duration\":1.0}}],\"arm\":null,\"speak\":\"Moving closer\",\"abort\":null}}\n\"go away\" -> {{\"actions\":[{{\"move\":\"backward\",\"duration\":2.0}}],\"arm\":null,\"speak\":\"Moving away\",\"abort\":null}}\n\nCommand: \"{command}\"\nJSON:", - "goal_prompt": "You are Sanad navigating toward a goal.\n\nGOAL: \"{goal}\"\n\nLook at the image. Have you reached the goal?\n\nReply ONLY this JSON:\n{{\"reached\":false,\"next_move\":\"left\",\"duration\":0.5,\"speak\":\"what you see\"}}\n\nRULES:\n- reached: true ONLY if you clearly see the goal target right now\n- next_move: \"forward\" \"left\" \"right\"\n- duration: 0.3 to 0.8 seconds\n- Default next_move: \"left\" to keep scanning\n\nGOAL: \"{goal}\"\nJSON:", - "patrol_prompt": "You are Sanad, an HSE inspection robot on autonomous patrol.\n\nLook at the camera and assess the scene.\n\nReply ONLY this JSON:\n{{\"observation\":\"one sentence\",\"alert\":null,\"next_move\":\"forward\",\"duration\":1.0}}\n\nRULES:\n- alert = null if safe\n- alert = \"PPE: no helmet\" if person without helmet\n- alert = \"PPE: no vest\" if person without safety vest\n- alert = \"Hazard: description\" for other hazards\n- next_move: \"forward\" \"left\" \"right\"\n- duration: 0.5 to 2.0s\n\nJSON:" + "warmup_num_predict": 5 } diff --git a/Config/config_ImageSearch.json b/Config/config_ImageSearch.json index 10bbc74..5a8b0ad 100644 --- a/Config/config_ImageSearch.json +++ b/Config/config_ImageSearch.json @@ -1,6 +1,6 @@ { "default_max_steps": 60, - "step_delay_s": 0.4, + "step_delay_s": 0.15, "rotate_speed": 0.25, "min_steps_warmup": 3 } diff --git a/Config/config_Memory.json b/Config/config_Memory.json deleted file mode 100644 index d386889..0000000 --- a/Config/config_Memory.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "base_dir": "Data", - "sessions_dir": "Data/Sessions", - "places_file": "Data/Places/places.json", - "max_cmd_len": 500, - "max_sessions": 50, - "detect_dedupe_s": 5.0 -} diff --git a/Config/marcus_prompts.yaml b/Config/marcus_prompts.yaml index 1460de2..81bb03d 100644 --- a/Config/marcus_prompts.yaml +++ b/Config/marcus_prompts.yaml @@ -5,7 +5,7 @@ # Model : Qwen2.5-VL 3B (Ollama, fully offline) # # Placeholders: -# {command} — the user's typed/spoken command (Arabic or English) +# {command} — the user's typed/spoken command (English) # {goal} — the navigation goal description # {facts} — known facts from memory (e.g. "Kassam is the programmer") # {target} — YOLO class being searched (e.g. "person") @@ -16,7 +16,7 @@ # LANGUAGE NOTE: # All prompts instruct Qwen to detect the command language automatically # and respond in the same language. No code-side language detection needed. -# Arabic and English are handled natively by the model. +# English-only by policy — Arabic support was removed 2026-04-21. # ============================================================================= @@ -51,17 +51,17 @@ main_prompt: | - Merge consecutive same-direction steps into one: "forward 2s + forward 2s" → "forward 4s" — NOT two separate steps - Duration reference: - "1 step" / "خطوة" = 1.0s - "tiny step" / "خطوة صغيرة" = 0.3s + "1 step" = 1.0s + "tiny step" = 0.3s "half a step" = 0.5s - "2 steps" / "خطوتين" = 2.0s - "3 steps" / "ثلاث خطوات" = 3.0s - "45 degrees" / "٤٥ درجة" = 2.5s - "90 degrees" / "٩٠ درجة" = 5.0s - "180 degrees" / "استدر" = 10.0s + "2 steps" = 2.0s + "3 steps" = 3.0s + "45 degrees" = 2.5s + "90 degrees" = 5.0s + "180 degrees" = 10.0s - Speed modifiers: - "slowly" / "ببطء" / "بهدوء" → multiply duration by 0.5 - "quickly" / "fast" / "بسرعة" → multiply duration by 1.5 (cap at 5.0s) + "slowly" → multiply duration by 0.5 + "quickly" / "fast" → multiply duration by 1.5 (cap at 5.0s) ── ARM RULES ────────────────────────────────────────────────────────────── - arm: one value from the list above, or null @@ -72,12 +72,10 @@ main_prompt: | ── SPEAK RULES ──────────────────────────────────────────────────────────── - speak: one sentence, first person, natural - Describe what you are doing OR what you see — never both in one sentence - - For pure movement: "Turning right" / "أدور لليمين" + - For pure movement: "Turning right" - For vision questions: describe what the camera shows - Never repeat the command word-for-word - - CRITICAL: match the language of the command exactly - Arabic command → Arabic speak - English command → English speak + - Always respond in English ── SAFETY RULES ─────────────────────────────────────────────────────────── - abort = null for all normal commands @@ -87,8 +85,8 @@ main_prompt: | - When aborting: actions = [] and explain in speak ── CONTEXT RULES ────────────────────────────────────────────────────────── - - "that person" / "him" / "her" / "ذلك الشخص" → resolve from conversation or camera - - "it" / "there" / "هناك" → resolve from last command context + - "that person" / "him" / "her" → resolve from conversation or camera + - "it" / "there" → resolve from last command context - If ambiguous → choose the most reasonable safe interpretation ══ ENGLISH EXAMPLES ═══════════════════════════════════════════════════════ @@ -190,113 +188,6 @@ main_prompt: | "walk into the wall" → {{"actions":[],"arm":null,"speak":"I cannot do that safely","abort":"unsafe command"}} - ══ ARABIC EXAMPLES ════════════════════════════════════════════════════════ - - حركة أساسية: - "تقدم" - → {{"actions":[{{"move":"forward","duration":2.0}}],"arm":null,"speak":"أتقدم للأمام","abort":null}} - - "تراجع للخلف" - → {{"actions":[{{"move":"backward","duration":2.0}}],"arm":null,"speak":"أتراجع للخلف","abort":null}} - - "دور يمين" - → {{"actions":[{{"move":"right","duration":2.0}}],"arm":null,"speak":"أدور لليمين","abort":null}} - - "دور يسار" - → {{"actions":[{{"move":"left","duration":2.0}}],"arm":null,"speak":"أدور لليسار","abort":null}} - - "قف" - → {{"actions":[{{"move":"stop","duration":0}}],"arm":null,"speak":"أتوقف الآن","abort":null}} - - درجات: - "دور يمين ٩٠ درجة" - → {{"actions":[{{"move":"right","duration":5.0}}],"arm":null,"speak":"أدور لليمين ٩٠ درجة","abort":null}} - - "دور يسار ٤٥ درجة ببطء" - → {{"actions":[{{"move":"left","duration":1.25}}],"arm":null,"speak":"أدور لليسار ببطء","abort":null}} - - "استدر ١٨٠ درجة" - → {{"actions":[{{"move":"right","duration":10.0}}],"arm":null,"speak":"أستدير ١٨٠ درجة","abort":null}} - - خطوات: - "تقدم خطوة واحدة" - → {{"actions":[{{"move":"forward","duration":1.0}}],"arm":null,"speak":"أتقدم خطوة واحدة","abort":null}} - - "تقدم خطوتين" - → {{"actions":[{{"move":"forward","duration":2.0}}],"arm":null,"speak":"أتقدم خطوتين","abort":null}} - - "تراجع ثلاث خطوات" - → {{"actions":[{{"move":"backward","duration":3.0}}],"arm":null,"speak":"أتراجع ثلاث خطوات","abort":null}} - - "تقدم قليلا" - → {{"actions":[{{"move":"forward","duration":0.5}}],"arm":null,"speak":"أتقدم قليلا","abort":null}} - - خطوات متعددة: - "تقدم ثم دور يمين" - → {{"actions":[{{"move":"forward","duration":2.0}},{{"move":"right","duration":2.0}}],"arm":null,"speak":"أتقدم ثم أدور لليمين","abort":null}} - - "دور يمين ٩٠ درجة ثم تراجع خطوتين" - → {{"actions":[{{"move":"right","duration":5.0}},{{"move":"backward","duration":2.0}}],"arm":null,"speak":"أدور يمين ٩٠ درجة ثم أتراجع خطوتين","abort":null}} - - "تراجع ثم دور يسار" - → {{"actions":[{{"move":"backward","duration":2.0}},{{"move":"left","duration":2.0}}],"arm":null,"speak":"أتراجع ثم أدور لليسار","abort":null}} - - اقتراب / ابتعاد: - "تعال إلي" - → {{"actions":[{{"move":"forward","duration":2.0}}],"arm":null,"speak":"آتي إليك","abort":null}} - - "اقترب" - → {{"actions":[{{"move":"forward","duration":1.0}}],"arm":null,"speak":"أقترب منك","abort":null}} - - "ابتعد" - → {{"actions":[{{"move":"backward","duration":2.0}}],"arm":null,"speak":"أبتعد عنك","abort":null}} - - ذراعين: - "لوح بيدك" - → {{"actions":[],"arm":"wave","speak":"أُلوّح بيدي","abort":null}} - - "صفق" - → {{"actions":[],"arm":"clap","speak":"أُصفق","abort":null}} - - "أعطني خمسة" - → {{"actions":[],"arm":"high_five","speak":"خمسة!","abort":null}} - - "ارفع يدك اليمنى" - → {{"actions":[],"arm":"raise_right","speak":"أرفع يدي اليمنى","abort":null}} - - "عانقني" - → {{"actions":[{{"move":"forward","duration":1.0}}],"arm":"hug","speak":"آتي لأعانقك","abort":null}} - - "صافحني" - → {{"actions":[],"arm":"shake_hand","speak":"أمد يدي للمصافحة","abort":null}} - - "تقدم ولوح" - → {{"actions":[{{"move":"forward","duration":2.0}}],"arm":"wave","speak":"أتقدم وأُلوّح","abort":null}} - - رؤية / أسئلة: - "ماذا ترى" - → {{"actions":[],"arm":null,"speak":"أرى شخصا يجلس أمام طاولة مع جهاز كمبيوتر","abort":null}} - - "صف الغرفة" - → {{"actions":[],"arm":null,"speak":"أرى مكتبا به طاولات وكراسي وشاشات","abort":null}} - - "هل يوجد أحد هنا" - → {{"actions":[],"arm":null,"speak":"نعم، أرى شخصا واحدا أمامي","abort":null}} - - "انظر حولك" - → {{"actions":[{{"move":"left","duration":3.0}}],"arm":null,"speak":"أنظر حولي","abort":null}} - - هوية: - "اسمي قصام" - → {{"actions":[],"arm":null,"speak":"أهلا قصام، سأتذكر اسمك","abort":null}} - - "من أنت" - → {{"actions":[],"arm":null,"speak":"أنا ماركوس، روبوت مساعد من YS Lootah Technology في دبي","abort":null}} - - أمان: - "اصطدم بالحائط" - → {{"actions":[],"arm":null,"speak":"لا أستطيع فعل ذلك بأمان","abort":"unsafe command"}} - Command: "{command}" JSON: @@ -368,24 +259,6 @@ goal_prompt: | Camera: chair visible but far away at edge of frame → {{"reached":false,"next_move":"forward","duration":0.5,"speak":"I can see a chair far ahead — moving closer","confidence":"medium"}} - ── EXAMPLES (Arabic) ────────────────────────────────────────────────────── - - Goal: "قف عندما ترى شخصا" - Camera: ممر فارغ - → {{"reached":false,"next_move":"left","duration":0.4,"speak":"أرى ممرا فارغا — لا يوجد أحد","confidence":"high"}} - - Goal: "قف عندما ترى شخصا" - Camera: شخص يجلس - → {{"reached":true,"next_move":"left","duration":0.0,"speak":"أرى شخصا بوضوح — أتوقف","confidence":"high"}} - - Goal: "ابحث عن لاب توب" - Camera: طاولة بدون لاب توب - → {{"reached":false,"next_move":"left","duration":0.4,"speak":"أرى طاولة وكراسي لكن لا يوجد لاب توب","confidence":"high"}} - - GOAL: "{goal}" - JSON: - - # ══════════════════════════════════════════════════════════════════════════════ # PATROL PROMPT # Used for: autonomous office exploration — auto on / auto off @@ -515,44 +388,11 @@ talk_prompt: | → {{"actions":[],"arm":null,"speak":"I was built by YS Lootah Technology in Dubai, and my programmer is Kassam","abort":null}} "what can you do" - → {{"actions":[],"arm":null,"speak":"I can move through the environment, identify objects and people, navigate to goals, and respond in both Arabic and English","abort":null}} + → {{"actions":[],"arm":null,"speak":"I can move through the environment, identify objects and people, navigate to goals, and respond in English","abort":null}} "ok" / "good" / "thanks" → {{"actions":[],"arm":null,"speak":"Ready for your next command","abort":null}} - ── ARABIC EXAMPLES ──────────────────────────────────────────────────────── - - "ماذا ترى" - → {{"actions":[],"arm":null,"speak":"أرى شخصا يجلس أمام طاولة مع جهاز كمبيوتر وشاشة كبيرة","abort":null}} - - "صف ما أمامك" - → {{"actions":[],"arm":null,"speak":"أمامي مكتب به شاشة ولوحة مفاتيح وشخص يعمل","abort":null}} - - "هل يوجد أحد في الغرفة" - → {{"actions":[],"arm":null,"speak":"نعم، أرى شخصا واحدا يجلس أمام محطة العمل","abort":null}} - - "اسمي قصام" - → {{"actions":[],"arm":null,"speak":"أهلا وسهلا قصام، سأتذكر اسمك","abort":null}} - - "أنا مبرمجك" - → {{"actions":[],"arm":null,"speak":"مفهوم، أنت مبرمجي — جاهز لتلقي تعليماتك","abort":null}} - - "كيف حالك" - → {{"actions":[],"arm":null,"speak":"جميع أنظمتي تعمل بشكل طبيعي وأنا جاهز لمساعدتك","abort":null}} - - "ما اسمك" - → {{"actions":[],"arm":null,"speak":"اسمي ماركوس، روبوت مساعد من YS Lootah Technology في دبي","abort":null}} - - "من بناك" - → {{"actions":[],"arm":null,"speak":"بنتني شركة YS Lootah Technology في دبي، ومبرمجي هو قصام","abort":null}} - - "شكرا" / "حسنا" / "ممتاز" - → {{"actions":[],"arm":null,"speak":"جاهز للأمر التالي","abort":null}} - - Command: "{command}" - JSON: - - # ══════════════════════════════════════════════════════════════════════════════ # VERIFY PROMPT # Used for: two-stage goal detection — confirm extra condition after YOLO finds class @@ -676,8 +516,4 @@ image_search_text_prompt: | Camera: laptop clearly on desk → {{"found":true,"confidence":"high","position":"center","description":"Laptop visible on desk at center of frame"}} - Hint: "شخص يرتدي قميصا أزرق" - Camera: شخص بقميص أزرق واضح - → {{"found":true,"confidence":"high","position":"center","description":"أرى شخصا يرتدي قميصا أزرق بوضوح في وسط الإطار"}} - JSON: \ No newline at end of file diff --git a/Core/log_backend.py b/Core/log_backend.py index 3a6bf89..0e9e275 100644 --- a/Core/log_backend.py +++ b/Core/log_backend.py @@ -1,8 +1,24 @@ import logging import os +from logging.handlers import RotatingFileHandler from pathlib import Path +# Rotation policy shared by every log file this backend creates: +# 5 MB per file, keep 3 rotations (logs/brain.log, brain.log.1, .2, .3). +# Tune both via env vars if you need larger logs on the robot. +_ROT_MAX_BYTES = int(os.environ.get("MARCUS_LOG_MAX_BYTES", 5_000_000)) +_ROT_BACKUP_COUNT = int(os.environ.get("MARCUS_LOG_BACKUP_COUNT", 3)) + + +def _rotating_handler(path: str) -> RotatingFileHandler: + """FileHandler with size-based rotation — prevents unbounded growth.""" + return RotatingFileHandler( + path, + maxBytes=_ROT_MAX_BYTES, + backupCount=_ROT_BACKUP_COUNT, + encoding="utf-8", + ) class Logs: @@ -11,7 +27,10 @@ class Logs: self.default_log_level = default_log_level self.log_format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' self.base_dir = str(Path(__file__).resolve().parents[1]) - self.default_logs_dir = os.path.join(self.base_dir, "Logs") + # The canonical log directory is "logs" (lowercase) — matches what + # every module writing via stdlib logging expects. "Logs" (capital L) + # was historically used by a parallel implementation and is gone. + self.default_logs_dir = os.path.join(self.base_dir, "logs") self.fallback_log_dir = self._choose_fallback_log_dir() self.mainloggerfile = self.resolve_log_path(main_log_file) self.logger = None @@ -30,7 +49,7 @@ class Logs: self.main_logger.removeHandler(handler) os.makedirs(os.path.dirname(self.mainloggerfile), exist_ok=True) - main_handler = logging.FileHandler(self.mainloggerfile) + main_handler = _rotating_handler(self.mainloggerfile) main_handler.setFormatter(logging.Formatter(self.log_format)) main_handler.setLevel(self.default_log_level) self.main_logger.addHandler(main_handler) @@ -128,7 +147,7 @@ class Logs: def LogEngine(self, folder_name, log_name): """Set up a named logger and resolve the file path correctly.""" full_path = self.construct_path(folder_name, log_name) - + self.logger = logging.getLogger(log_name) self.logger.setLevel(self.default_log_level) self.logger.propagate = False # Prevent printing to terminal @@ -138,11 +157,11 @@ class Logs: if isinstance(handler, logging.FileHandler): self.logger.removeHandler(handler) - handler = logging.FileHandler(full_path) + handler = _rotating_handler(full_path) handler.setFormatter(logging.Formatter(self.log_format)) handler.setLevel(self.default_log_level) self.logger.addHandler(handler) - + def LogsMessages(self, message, message_type="info", folder_name=None, file_name=None): if folder_name and file_name: @@ -152,9 +171,12 @@ class Logs: temp_logger.setLevel(self.default_log_level) temp_logger.propagate = False # Prevent printing to terminal + # Re-use the existing handler if it's already attached to the + # same file (by absolute path). Prevents handler accumulation + # when this function is called from long-running loops. if not any(isinstance(h, logging.FileHandler) and h.baseFilename == full_path for h in temp_logger.handlers): - handler = logging.FileHandler(full_path) + handler = _rotating_handler(full_path) handler.setFormatter(logging.Formatter(self.log_format)) temp_logger.addHandler(handler) diff --git a/Doc/MARCUS_API.md b/Doc/MARCUS_API.md index 94b40bf..bfbb718 100644 --- a/Doc/MARCUS_API.md +++ b/Doc/MARCUS_API.md @@ -7,11 +7,12 @@ > **What changed since the early draft (April 4):** The project was restructured > from two monolithic scripts (`marcus_llava.py` + `marcus_yolo.py`) into a -> layered architecture. See `Doc/architecture.md` for the current file tree and -> `Doc/environment.md` for the verified Jetson software stack, exact library -> versions, and GPU bring-up recipe. This reference still describes the -> function-level semantics (inputs/outputs/examples) — treat any file path in -> this document as illustrative and cross-check the actual module. Recent +> layered architecture. See `Doc/architecture.md` for the current file tree, +> `Doc/environment.md` for the verified Jetson software stack, `Doc/pipeline.md` +> for end-to-end dataflow, and **`Doc/functions.md` for the authoritative +> function inventory** (always generated from AST — treat it as the source of +> truth for signatures). This reference describes the semantics (usage, JSON +> schemas, examples); cross-check `functions.md` for exact signatures. Recent > deltas called out inline below. ### Recent API deltas (2026-04-21) @@ -32,6 +33,11 @@ | Subsystem flags | `Config/config_Brain.json::subsystems.{lidar, voice, imgsearch, autonomous}` | `init_brain()` skips any subsystem with `false`. Defaults: lidar+voice+autonomous ON, imgsearch OFF. | | Robot persona → Sanad | Multiple | Wake words `["sanad","sannad","sanat","sunnat"]`; all prompts say "You are Sanad"; banner reads `SANAD AI BRAIN — READY`; hardcoded self-intro says "I am Sanad". Project/file/module names unchanged. | | Logger rename | `Core/log_backend.py` (was `Core/Logger.py`) | Case-only collision with `Core/logger.py` removed — repo now clones cleanly on macOS/Windows. Public API unchanged: `from Core.logger import log`. | +| Log rotation everywhere | `Core/log_backend.py`, `API/audio_api.py`, `Voice/marcus_voice.py` | All `FileHandler`s swapped for `RotatingFileHandler` (5 MB × 3 backups, tunable via `MARCUS_LOG_MAX_BYTES` / `MARCUS_LOG_BACKUP_COUNT`). Prevents unbounded log growth on the Jetson. `default_logs_dir` pinned to lowercase `logs/`. | +| English-only policy | `Brain/marcus_brain.py`, `Config/marcus_prompts.yaml`, `Config/config_Voice.json` | Arabic talk-pattern and greeting regexes removed; 5.8 KB of Arabic prompt examples stripped from `marcus_prompts.yaml`; Arabic wake words removed from config. `AudioAPI.speak(text, lang='en')` — only `'en'` accepted; non-ASCII is rejected. | +| Dead-code + orphan sweep | `Legacy/marcus_nav.py`, `Config/config_Memory.json` | Deleted. Config count 13 → 12 JSON + 1 YAML. | +| Orphan config keys wired up | `Vision/marcus_imgsearch.py`, `Voice/builtin_mic.py`, `API/camera_api.py`, `Navigation/marcus_odometry.py` | `config_ImageSearch.json` (4 keys), `config_Voice.mic_udp.read_timeout_sec`, `config_Camera.{timeout_ms, stale_threshold_s, reconnect_delay_s}`, `config_Odometry.json` (10 keys) are all read by code now. **0 orphan keys across 156 total.** | +| Subprocess leak fix | `API/audio_api.py::_record_parec` | `Popen` now wrapped in try/finally; orphan `parec` processes can't survive Ctrl-C/exceptions. Last-resort `proc.kill()` catches only `OSError`. | --- diff --git a/Doc/architecture.md b/Doc/architecture.md index b3dc54d..dd12b3d 100644 --- a/Doc/architecture.md +++ b/Doc/architecture.md @@ -19,9 +19,13 @@ - **Subsystem flags** — `config_Brain.json::subsystems.{lidar, voice, imgsearch, autonomous}` let you selectively skip heavy boot stages. - **Conditional inner-loop sleeps** — goal_nav / autonomous / imgsearch no longer pay unconditional per-step naps. - **Core/Logger.py → Core/log_backend.py** — case-only name collision with `logger.py` resolved; repo clones cleanly on macOS/Windows. +- **Log rotation on every file handler** — `Core.log_backend` + stdlib voice handlers now use `RotatingFileHandler` (5 MB × 3 backups, env-tunable). `default_logs_dir` fixed to lowercase `logs/` so the capital-L folder no longer gets recreated. - **Robot persona = "Sanad"** — wake words, prompts, banner, and self-intro all use "Sanad". Project identity ("Marcus") remains in file names, class names, directory, logs. +- **English-only** — all Arabic talk/greeting regexes, Arabic prompt examples (≈5.8 KB), and Arabic wake words removed. 0 non-ASCII chars in live code/config. +- **Orphan config cleanup** — `Config/config_Memory.json` deleted (never loaded). `config_ImageSearch.json`, `config_Odometry.json` (10 keys), plus 3 unused `config_Camera` keys and `mic_udp.read_timeout_sec` are now wired into their respective modules. 0 orphan keys across 156 total (12 config files). +- **Dead-code pruning** — `Legacy/marcus_nav.py` removed. Config count 13 → 12 JSON + `marcus_prompts.yaml`. -See `Doc/environment.md` for the verified Jetson software stack and `Doc/pipeline.md` for the end-to-end data flow. +See `Doc/environment.md` for the verified Jetson software stack, `Doc/pipeline.md` for the end-to-end data flow, and `Doc/functions.md` for the full function inventory. --- @@ -64,7 +68,8 @@ Marcus/ │ ├── config_ImageSearch.json # search defaults │ ├── config_Voice.json # mic (builtin_udp|pactl_parec), TTS backend, wake words, mic_udp group/port │ ├── config_LiDAR.json # Livox Mid-360 connection + SLAM engine params -│ └── marcus_prompts.yaml # All Qwen-VL prompts (main, goal, patrol, talk, verify) +│ └── marcus_prompts.yaml # All Qwen-VL prompts (main, goal, patrol, talk, verify, 2× imgsearch) +│ # Total: 12 JSON files + 1 YAML. (config_Memory.json removed 2026-04-21.) │ ├── API/ # Interface layer — one file per subsystem │ ├── zmq_api.py # ZMQ PUB socket: init_zmq(), send_vel(), gradual_stop(), send_cmd() @@ -139,11 +144,19 @@ Marcus/ │ ├── server.log │ ├── zmq.log │ └── main.log -│ -└── Legacy/ # Archived originals - └── marcus_nav.py # Original standalone prototype +│ # All log files rotate at 5 MB × 3 backups (tunable via +│ # MARCUS_LOG_MAX_BYTES / MARCUS_LOG_BACKUP_COUNT env vars). +└── Doc/ # Documentation + ├── architecture.md # This file + ├── controlling.md # Startup + command reference + ├── environment.md # Jetson versions + install recipe + ├── pipeline.md # End-to-end dataflow diagrams + ├── functions.md # Full function inventory + └── MARCUS_API.md # Developer API reference ``` +*Removed 2026-04-21: `Legacy/marcus_nav.py` (dead code + Arabic).* + --- ## Layer Architecture diff --git a/Doc/controlling.md b/Doc/controlling.md index d645d03..20e0b16 100644 --- a/Doc/controlling.md +++ b/Doc/controlling.md @@ -254,3 +254,33 @@ Most values configurable in `Config/config_Network.json` and `config_Voice.json: See `Doc/architecture.md` for full project structure and file-by-file documentation. See `Doc/environment.md` for the verified Jetson software stack. See `Doc/pipeline.md` for the end-to-end data flow. +See `Doc/functions.md` for the full function inventory (AST-generated). + +--- + +## Language policy + +**English only.** Arabic was removed from the codebase on 2026-04-21: +- `Config/config_Voice.json::stt.wake_words_en` — only English variants (`sanad`, `sannad`, `sanat`, `sunnat`) +- `Config/marcus_prompts.yaml` — no Arabic examples left in any of the 7 prompts +- `API/audio_api.py::speak(text)` — rejects non-ASCII (the G1 TtsMaker silently maps Arabic to Chinese, which nobody wants) +- `Brain/marcus_brain.py` — greeting and talk-pattern regexes match English only + +If you need Arabic back, the cleanest paths are either Piper TTS (offline) or edge-tts (online) — see `git log` for the removed implementations. + +--- + +## Logs + +All `.log` files in `logs/` rotate at **5 MB × 3 backups** by default. To change: + +```bash +export MARCUS_LOG_MAX_BYTES=10000000 # 10 MB per file +export MARCUS_LOG_BACKUP_COUNT=5 # keep 5 rotations +export MARCUS_LOG_DIR=/var/log/marcus # move logs off SD card +``` + +Per-module log files: +- `brain.log`, `camera.log`, `lidar.log`, `zmq.log`, `server.log`, `main.log` — via `Core.logger.log()` +- `voice.log` — via stdlib `logging` in `audio_api.py` + `marcus_voice.py` +- Session JSON: `Data/Brain/Sessions/session_NNN_YYYY-MM-DD/{commands,detections,alerts,places}.json` diff --git a/Doc/environment.md b/Doc/environment.md index 5a43f48..d9e6247 100644 --- a/Doc/environment.md +++ b/Doc/environment.md @@ -377,3 +377,10 @@ Config file (`Config/config_Vision.json`): | 2026-04-21 | **Restructure**: moved ZMQ bind out of `API/zmq_api.py` import time into `init_zmq()`; fixes LiDAR SLAM worker spawn crash. Added loud GPU-requirement banner in `API/yolo_api.py`. Dropped `num_predict_main` 200→120. Made inner-loop sleeps in goal_nav/autonomous/imgsearch conditional. Renamed `Core/Logger.py` → `Core/log_backend.py` (case-collision fix). Updated `Doc/MARCUS_API.md` to current state. | | 2026-04-21 | **Voice restructure**: added `Voice/builtin_mic.py` (G1 array mic via UDP multicast `239.168.123.161:5555`) and `Voice/builtin_tts.py` (thin `AudioClient.TtsMaker` wrapper). Rewired `Voice/marcus_voice.py` to use BuiltinMic. Refactored `API/audio_api.py::speak()` to use BuiltinTTS — removed ~110 lines of edge-tts + pydub + Piper plumbing. Deleted `Voice/marcus_gemini_voice.py`. Added `subsystems.{lidar,voice,imgsearch,autonomous}` gate in `config_Brain.json::init_brain()`. | | 2026-04-21 | **Persona swap**: robot identifies as Sanad. Wake words `["sanad","sannad","sanat","sunnat"]`, `speaker.app_name="sanad"`, all Qwen prompts say "You are Sanad", banner reads `SANAD AI BRAIN — READY`, hardcoded self-intro says "I am Sanad". Project directory, class names, filenames, and `PROJECT_NAME=Marcus` env var unchanged. | +| 2026-04-21 | **English-only sweep**: stripped 5.8 KB of Arabic examples from `marcus_prompts.yaml`, removed Arabic talk-pattern and greeting regexes in `Brain/marcus_brain.py`, dropped Arabic wake words from `config_Voice.json`, changed user-facing prints `Marcus: …` → `Sanad: …` in `executor.py`, `marcus_brain.py`, `marcus_cli.py`. Verified: 0 Arabic chars in live code/config. | +| 2026-04-21 | **Logs hardened**: `Core/log_backend.py` now uses `RotatingFileHandler` (5 MB × 3 backups, env-tunable via `MARCUS_LOG_MAX_BYTES` / `MARCUS_LOG_BACKUP_COUNT`) for all three code paths (main_handler, `LogEngine`, `LogsMessages`). `API/audio_api.py` + `Voice/marcus_voice.py` also rotate `voice.log`. `default_logs_dir` fixed: `"Logs"` → `"logs"` (matches actual directory; no more case-collision recreation). | +| 2026-04-21 | **Dead code removed**: deleted `Legacy/marcus_nav.py` (unused + Arabic), deleted `Config/config_Memory.json` (orphan — never loaded). Config count: 13 → **12** JSON files + `marcus_prompts.yaml`. | +| 2026-04-21 | **Orphan config keys wired up (0 orphans remaining)**: `config_ImageSearch.json` → `Vision/marcus_imgsearch.py` (4 constants), `config_Voice.mic_udp.read_timeout_sec` → `Voice/builtin_mic.py`, `config_Camera.{timeout_ms, stale_threshold_s, reconnect_delay_s}` → `API/camera_api.py`, `config_Odometry.json` (10 keys) → `Navigation/marcus_odometry.py`. All 156 config keys now referenced by code. | +| 2026-04-21 | **Subprocess leak fix**: `AudioAPI._record_parec` now wraps `Popen` in try/finally with `terminate → wait(1.0) → kill` fallback; orphan `parec` processes can no longer survive Ctrl-C. Last-resort `proc.kill()` catches only `OSError` (not bare `except`). | +| 2026-04-21 | **Modelfile corrected**: `Models/Modelfile` now `FROM qwen2.5vl:3b` (was `:7b`) with a header explaining it's an optional build template — runtime uses `ollama pull qwen2.5vl:3b` directly. | +| 2026-04-21 | **Final verification**: 14-dimension smoke test green — no Arabic, no dead dirs, 0 orphan keys, every FileHandler rotates, no bare `except: pass`, no stale `Models_marcus` / `marcus_llava` refs, 25/25 modules import. | diff --git a/Doc/functions.md b/Doc/functions.md new file mode 100644 index 0000000..3ada97d --- /dev/null +++ b/Doc/functions.md @@ -0,0 +1,175 @@ +# Marcus — Function Inventory + +**Robot persona:** Sanad (wake word + self-intro) +**Updated:** 2026-04-21 + +Every callable in the codebase, grouped by layer. Generated from AST, kept in sync with the source. See `architecture.md` for where each module lives and `pipeline.md` for how they connect. + +**Totals:** 25 importable modules · 73 top-level functions · 9 public classes. + +--- + +## `run_marcus.py` — entrypoint + +Script only. Prepends `PROJECT_ROOT` to `sys.path`, then calls `Brain.marcus_brain.run_terminal()` in `__main__`. + +--- + +## `Core/` — foundation, no external deps + +| File | Function | Purpose | +|---|---|---| +| `env_loader.py` | `_find_env_file()`, `_load_dotenv(path)` | find + parse `.env` into `os.environ`; exports `PROJECT_ROOT` | +| `config_loader.py` | `load_config(name)`, `config_path(relative)` | cached reader for `Config/config_{name}.json` | +| `log_backend.py` | `_rotating_handler(path)` + **class `Logs`** | custom logging engine; all handlers are `RotatingFileHandler` (5 MB × 3) | +| `logger.py` | `get_logger(module)`, `log(msg, level, module)`, `log_and_print(msg, level, module)` | project-wide logging façade | + +**`Core.log_backend.Logs`** methods: +`__init__(default_log_level, main_log_file)`, `_choose_fallback_log_dir`, `_normalize_log_name`, `_is_writable_path`, `_with_fallback`, `resolve_log_path`, `construct_path`, `log_to_file`, `LogEngine(folder, log_name)`, `LogsMessages(msg, type, folder, file)`, `print_and_log(...)`. + +--- + +## `API/` — subsystem wrappers (Brain imports only from here) + +| File | Public functions | +|---|---| +| `zmq_api.py` | `init_zmq()`, `get_socket()`, `send_vel(vx, vy, vyaw)`, `gradual_stop()`, `send_cmd(cmd)` | +| `camera_api.py` | `start_camera()`, `stop_camera()`, `get_frame()`, `get_frame_age()`, `get_raw_refs()`, `camera_loop()` | +| `llava_api.py` | `call_llava(prompt, img_b64, num_predict, use_history)`, `ask(command, img_b64)`, `ask_goal(goal, img_b64)`, `ask_talk(command, img_b64, facts)`, `ask_verify(target, condition, img_b64)`, `ask_patrol(img_b64)`, `remember_fact(fact)`, `add_to_history(user_msg, assistant_msg)`, `parse_json(raw)` | +| `yolo_api.py` | `init_yolo(raw_frame_ref, frame_lock)` + 8 stubs rebound on success: `yolo_sees`, `yolo_count`, `yolo_closest`, `yolo_summary`, `yolo_ppe_violations`, `yolo_person_too_close`, `yolo_all_classes`, `yolo_fps` | +| `odometry_api.py` | `init_odometry(zmq_sock)`, `get_position()` | +| `memory_api.py` | `init_memory()`, `log_cmd(cmd, response, duration)`, `log_detection(class_name, position, distance)`, `place_save(name)`, `place_goto(name)`, `places_list_str()` | +| `arm_api.py` | `do_arm(action)` — G1 GR00T stub | +| `imgsearch_api.py` | `init_imgsearch(get_frame_fn, send_vel_fn, gradual_stop_fn, llava_fn, yolo_sees_fn, model)`, `get_searcher()` | +| `audio_api.py` | **class `AudioAPI`** (see below) | +| `lidar_api.py` | `init_lidar()`, `obstacle_ahead(radius)`, `get_slam_pose()`, `get_nav_cmd()`, `get_loc_state()`, `get_safety_reasons()`, `get_lidar_status()`, `get_client()`, `stop_lidar()` | + +**`API.audio_api.AudioAPI`** methods: +`speak(text, lang="en")`, `record(seconds)` → np.int16 array, `play_pcm(audio_16k)`, `save_recording(audio, name)`, properties `is_speaking`, `is_available`. Internal: `_init_sdk`, `_mute_mic`, `_unmute_mic`, `_resample`, `_play_pcm`, `_record_builtin`, `_record_parec`. + +--- + +## `Voice/` — mic + TTS + STT + +| File | Public API | +|---|---| +| `builtin_mic.py` | `_find_g1_local_ip()` + **class `BuiltinMic`** | +| `builtin_tts.py` | **class `BuiltinTTS`** | +| `marcus_voice.py` | **class `State`** (IDLE/WAKE_HEARD/PROCESSING/SPEAKING), **class `VoiceModule`** | + +**`Voice.builtin_mic.BuiltinMic`** — G1 UDP multicast mic: +`__init__(group, port, buf_max, read_timeout)`, `start()`, `stop()`, `read_chunk(num_bytes)`, `read_seconds(seconds)`, `flush()`; internal `_recv_loop`. + +**`Voice.builtin_tts.BuiltinTTS`** — wraps `AudioClient.TtsMaker`: +`__init__(audio_client, default_speaker_id=0)`, `speak(text, speaker_id=None, block=True)`. + +**`Voice.marcus_voice.VoiceModule`** — Whisper wake + command STT: +`__init__(audio_api, on_command)`, `start()`, `stop()`, props `state`, `is_running`. Internal state machine: `_do_idle`, `_do_wake_heard`, `_do_processing`; helpers `_load_whisper`, `_transcribe`, `_check_wake_word`, `_record_chunk`, `_record_until_silence`, `_voice_loop`. + +--- + +## `Vision/` + +| File | Public API | +|---|---| +| `marcus_yolo.py` | `start_yolo(raw_frame_ref, frame_lock)`, `yolo_sees(class, min_confidence)`, `yolo_count(class)`, `yolo_closest(class)`, `yolo_all_classes()`, `yolo_summary()`, `yolo_ppe_violations()`, `yolo_person_too_close(threshold)`, `yolo_is_running()`, `yolo_fps()`, `_resolve_device(requested)` + **class `Detection`** | +| `marcus_imgsearch.py` | **class `ImageSearch`** + prompt helpers `_build_compare_prompt`, `_build_single_prompt`, image utils `_load_image_b64`, `_numpy_to_b64`, `_resize_b64` | + +**`Vision.marcus_yolo.Detection`** — a single detection's metadata: +`__init__(class_name, confidence, x1, y1, x2, y2, frame_w, frame_h)`, props `size_ratio`, `position`, `distance_estimate`, method `to_dict()`, `__repr__`. + +**`Vision.marcus_imgsearch.ImageSearch`** — rotate-and-compare search: +`__init__(get_frame_fn, send_vel_fn, gradual_stop_fn, llava_fn, yolo_sees_fn, model)`, `search(ref_img_b64, hint, max_steps, direction, yolo_prefilter)`, `search_from_file(image_path, hint, max_steps, direction)`, `abort()`. + +--- + +## `Navigation/` + +| File | Public API | +|---|---| +| `goal_nav.py` | `navigate_to_goal(goal, max_steps)`; private `_goal_yolo_target`, `_extract_extra_condition`, `_verify_condition` | +| `patrol.py` | `patrol(duration_minutes, alert_callback)` | +| `marcus_odometry.py` | **class `Odometry`** | + +**`Navigation.marcus_odometry.Odometry`** — ROS2 `/dog_odom` + dead-reckoning fallback: +- lifecycle: `__init__()`, `start(zmq_sock)`, `stop()`, `reset()`, `is_running()` +- pose: `get_position()` → `{x, y, heading, source}`, `get_distance_from_start()`, `status_str()`, `__repr__` +- movement: `walk_distance(meters, speed, direction)`, `turn_degrees(degrees, speed)`, `navigate_to(x, y, heading, speed)`, `return_to_start(speed)`, `patrol_route(waypoints, speed, loop)` +- internal: `_init_own_zmq`, `_reset_state`, `_try_start_ros2`, `_dead_reckoning_loop`, `_send_vel`, `_gradual_stop`, `_check_stale`, `_time_based_walk`, `_time_based_turn` + +--- + +## `Brain/` + +| File | Public API | +|---|---| +| `marcus_brain.py` | `init_brain()`, `process_command(cmd)` → `{type, speak, action, elapsed}`, `get_brain_status()`, `shutdown()`, `run_terminal()`; private `_init_voice`, `_handle_llava`, `_handle_talk`, `_handle_search`, `_warmup_llava` | +| `command_parser.py` | `init_autonomous(auto_instance)`, `try_local_command(cmd)` (regex-table dispatcher); `_print_help`, `_print_examples` | +| `executor.py` | `execute(d)`, `execute_action(move, duration)`, `move_step(move, duration)`, `merge_actions(actions)`; `_obstacle_check` | +| `marcus_memory.py` | **class `Memory`** + utils `_read_json`, `_write_json`, `_sanitize_name`, `_fuzzy_match`, `_new_session_id` | + +**`Brain.marcus_memory.Memory`** — places + sessions store, JSON-backed: +- places: `save_place(name, x, y, heading)`, `get_place(name)`, `delete_place(name)`, `list_places()`, `rename_place(old, new)`, `places_count()` +- sessions: `start_session()`, `end_session()`, `log_command(cmd, response, duration_s)`, `log_detection(class, pos, dist, x, y)`, `log_alert(type, detail)`, `get_last_command()`, `get_last_n_commands(n)`, `get_session_detections()`, `commands_count()`, `session_duration_str()` +- history: `last_session_summary()`, `previous_session_detections()`, `previous_session_places()`, `all_sessions()` +- internal: `_load_places`, `_start_autosave`, `_flush_session`, `_emergency_save`, `_write_summary`, `_prune_old_sessions`, `_get_previous_session_dir` + +--- + +## `Autonomous/` + +`marcus_autonomous.py` — **class `AutonomousMode`**: patrol-and-map state machine. + +- `__init__(get_frame_fn, send_vel_fn, gradual_stop_fn, yolo_sees_fn, yolo_summary_fn, yolo_all_classes_fn, yolo_closest_fn, odom_fn, call_llava_fn, patrol_prompt, mem, models_dir)` +- lifecycle: `enable()`, `disable()`, `is_enabled()`, `status()`, `save_snapshot()` +- internal: `_explore_loop`, `_move_forward`, `_turn`, `_assess_scene`, `_create_map_dir`, `_save_observations`, `_save_path`, `_save_frame`, `_generate_summary`, `_save_session`, `_print_summary` + +--- + +## `Server/` & `Bridge/` + +| File | Public API | +|---|---| +| `Server/marcus_server.py` | `async handler(websocket)`, `async broadcast_frames()`, `async run_server(host, port)`, `main()`; helpers `_get_interface_ips`, `_check_lidar` | +| `Bridge/ros2_zmq_bridge.py` | **class `ROS2ZMQBridge`** (`_vel_cb`, `_cmd_cb`) + `main()` — standalone tool, not imported by Marcus | + +--- + +## Suggested import surface for integration code + +If you're writing glue on top of Marcus, the stable public surface is: + +```python +# brain orchestration +from Brain.marcus_brain import init_brain, process_command, shutdown + +# direct robot control (bypasses brain) +from API.zmq_api import init_zmq, send_vel, gradual_stop, send_cmd +from API.yolo_api import yolo_sees, yolo_summary, yolo_closest +from API.camera_api import start_camera, get_frame +from API.audio_api import AudioAPI # .speak(text), .record(seconds) +from API.lidar_api import init_lidar, obstacle_ahead, get_slam_pose, stop_lidar +from API.memory_api import init_memory, log_cmd, log_detection, place_save, place_goto + +# voice pipeline +from Voice.marcus_voice import VoiceModule +from Voice.builtin_mic import BuiltinMic +from Voice.builtin_tts import BuiltinTTS + +# navigation +from Navigation.goal_nav import navigate_to_goal +from Navigation.patrol import patrol +from Navigation.marcus_odometry import Odometry + +# autonomous mode +from Autonomous.marcus_autonomous import AutonomousMode +``` + +--- + +## Convention notes + +- **All layers above Core must import from `API.*` only** (not directly from `Vision/`, `Navigation/`, `Voice/`). Enforced by convention, not the language. +- **Underscore prefix = private.** `_foo` is internal; don't import it outside the module unless you're the test harness. +- **Stub rebinding pattern** (e.g. `API.yolo_api`): module-level placeholders get replaced with real implementations inside `init_*()` on success. If init fails, callers keep getting the safe stub (e.g. `yolo_sees` returns `False`). +- **Error returns are consistent per layer**: API layer returns `None` / empty dict / `False`; Brain layer returns structured dicts (`{"type","speak","action","elapsed"}`); no exception leaks to the terminal loop except at startup (`init_brain()` will raise to surface hardware issues like missing CUDA). diff --git a/Doc/pipeline.md b/Doc/pipeline.md index 338d4bc..2292911 100644 --- a/Doc/pipeline.md +++ b/Doc/pipeline.md @@ -3,7 +3,7 @@ **Robot persona:** Sanad (wake word + self-intro) **Updated:** 2026-04-21 -One map of every data path from sensor to motor, voice to speech. Cross-reference with `architecture.md` (what each file is) and `MARCUS_API.md` (function signatures). +One map of every data path from sensor to motor, voice to speech. Cross-reference with `architecture.md` (what each file is), `functions.md` (exact function signatures — AST-generated), and `MARCUS_API.md` (usage examples + JSON schemas). --- @@ -167,8 +167,13 @@ Brain/command_parser.py — responds to "lidar status" queries | `yolo_device`, `yolo_half` | config_Vision.json | `cuda` / FP16 (hard-required; CPU not allowed) | | `mic.backend` | config_Voice.json | `builtin_udp` (G1 array) or `pactl_parec` (Hollyland fallback) | | `mic_udp.group/port` | config_Voice.json | where to join the G1 audio multicast | +| `mic_udp.read_timeout_sec` | config_Voice.json | `BuiltinMic.read_chunk` budget (default 0.04 s) | | `tts.backend` | config_Voice.json | `builtin_ttsmaker` (only supported option) | | `stt.wake_words_en` | config_Voice.json | Whisper matcher (`sanad` + variants) | +| `timeout_ms`, `stale_threshold_s`, `reconnect_delay_s` | config_Camera.json | RealSense frame timeout, reconnect trigger, initial backoff | +| `default_max_steps`, `step_delay_s`, `rotate_speed`, `min_steps_warmup` | config_ImageSearch.json | image-guided search rotation cadence (wired into `Vision/marcus_imgsearch.py`) | +| `default_walk_speed`, `dist_tolerance`, `angle_tolerance`, `safety_timeout_mult`, `dr_update_hz` | config_Odometry.json | precise motion control (wired into `Navigation/marcus_odometry.py`) | +| `MARCUS_LOG_MAX_BYTES`, `MARCUS_LOG_BACKUP_COUNT`, `MARCUS_LOG_DIR` | env vars | log rotation size, backup count, log directory override | --- diff --git a/Legacy/marcus_nav.py b/Legacy/marcus_nav.py deleted file mode 100644 index 35bfe03..0000000 --- a/Legacy/marcus_nav.py +++ /dev/null @@ -1,93 +0,0 @@ -import ollama, base64, json, time -import pyrealsense2 as rs -import numpy as np, cv2 -import zmq - -HOLOSOMA_IP = "127.0.0.1" -HOLOSOMA_PORT = 5556 - -def capture_frame(): - pipeline = rs.pipeline() - cfg = rs.config() - cfg.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30) - pipeline.start(cfg) - for _ in range(5): - pipeline.wait_for_frames() - frames = pipeline.wait_for_frames() - img = np.asanyarray(frames.get_color_frame().get_data()) - pipeline.stop() - cv2.imwrite('/tmp/marcus_eye.jpg', img) - return '/tmp/marcus_eye.jpg' - -def ask_qwen(image_path, command): - with open(image_path, 'rb') as f: - img_b64 = base64.b64encode(f.read()).decode() - prompt = f"""أنت ماركس، روبوت ذكي يتنقل داخل المبنى. -You are Marcus, an intelligent indoor navigation robot. - -User command: "{command}" - -Look at the camera image. Respond with ONLY one line: -FORWARD [0.1 to 1.0 meters] -LEFT [5 to 45 degrees] -RIGHT [5 to 45 degrees] -STOP [reason] -ARRIVED""" - - response = ollama.chat( - model='qwen2.5vl:7b', - messages=[{ - 'role': 'user', - 'content': prompt, - 'images': [img_b64] - }] - ) - return response['message']['content'].strip().split('\n')[0] - -def send_to_robot(action): - print(f" Robot action: {action}") - parts = action.upper().split() - if not parts: - return - cmd = parts[0] - val = float(parts[1]) if len(parts) > 1 else 0 - - if cmd == "FORWARD": - print(f" Walking forward {val}m") - elif cmd == "LEFT": - print(f" Turning left {val} degrees") - elif cmd == "RIGHT": - print(f" Turning right {val} degrees") - elif cmd == "STOP": - print(f" Stopping: {' '.join(parts[1:])}") - elif cmd == "ARRIVED": - print(" Destination reached!") - -if __name__ == "__main__": - print("=" * 50) - print("Marcus Navigation Brain") - print("Powered by Qwen2.5-VL on Jetson Orin NX") - print("Speaks Arabic + English") - print("=" * 50) - print("Type your command (or 'quit'):\n") - - while True: - try: - cmd = input("Command: ").strip() - if cmd.lower() in ['quit', 'exit', 'خروج']: - print("Marcus shutting down.") - break - if not cmd: - continue - print("Capturing camera frame...") - frame = capture_frame() - print("Qwen2.5-VL thinking...") - t0 = time.time() - action = ask_qwen(frame, cmd) - elapsed = time.time() - t0 - print(f"Decision ({elapsed:.1f}s): {action}") - send_to_robot(action) - print() - except KeyboardInterrupt: - print("\nStopped.") - break diff --git a/Models/Modelfile b/Models/Modelfile index 95edce6..cd41028 100644 --- a/Models/Modelfile +++ b/Models/Modelfile @@ -1,3 +1,13 @@ -FROM qwen2.5vl:7b -PARAMETER num_ctx 1024 -PARAMETER num_predict 64 +# Optional Ollama Modelfile for building a pre-tuned Marcus VL model. +# Not used at runtime — Marcus loads the stock `qwen2.5vl:3b` tag directly +# via Config/config_Brain.json::ollama_model. Keep this file only if you +# want to build a custom image with `ollama create marcus-vl -f Modelfile`. +# +# Runtime parameters (num_batch, num_ctx, num_predict) are overridden on +# every call by API/llava_api.py, so the PARAMETER lines below are just +# defaults for `ollama run` shell use. + +FROM qwen2.5vl:3b +PARAMETER num_ctx 2048 +PARAMETER num_predict 120 +PARAMETER num_batch 128 diff --git a/Navigation/marcus_odometry.py b/Navigation/marcus_odometry.py index f1ac68b..07bc69e 100644 --- a/Navigation/marcus_odometry.py +++ b/Navigation/marcus_odometry.py @@ -23,11 +23,13 @@ Import in marcus_brain.py Standalone test --------------- - /home/unitree/miniconda3/envs/marcus/bin/python3 ~/Models_marcus/marcus_odometry.py + conda run -n marcus python3 Navigation/marcus_odometry.py Date : April 2026 """ +import os +import sys import time import math import json @@ -36,24 +38,33 @@ import zmq # ══════════════════════════════════════════════════════════════════════════════ -# CONFIGURATION +# CONFIGURATION — loaded from Config/config_Odometry.json # ══════════════════════════════════════════════════════════════════════════════ -ZMQ_HOST = "127.0.0.1" -ZMQ_PORT = 5556 -ROS2_ODOM_TOPIC = "/dog_odom" -ODOM_INTERFACE = "eth0" +_PROJECT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +if _PROJECT_DIR not in sys.path: + sys.path.insert(0, _PROJECT_DIR) +try: + from Core.config_loader import load_config + _cfg = load_config("Odometry") +except Exception: + _cfg = {} + +ZMQ_HOST = str(_cfg.get("zmq_host", "127.0.0.1")) +ZMQ_PORT = int(_cfg.get("zmq_port", 5556)) +ROS2_ODOM_TOPIC = str(_cfg.get("ros2_odom_topic", "/dog_odom")) +ODOM_INTERFACE = str(_cfg.get("odom_interface", "eth0")) # Movement defaults -DEFAULT_WALK_SPEED = 0.25 # m/s — slower = more accurate -DEFAULT_TURN_SPEED = 0.25 # rad/s -DIST_TOLERANCE = 0.05 # meters — stop within 5cm -ANGLE_TOLERANCE = 2.0 # degrees — stop within 2° -SAFETY_TIMEOUT_MULT = 3.0 # timeout = (distance/speed) × this -ODOM_STALE_WARN = 1.0 # warn if odom not updated for this many seconds +DEFAULT_WALK_SPEED = float(_cfg.get("default_walk_speed", 0.25)) # m/s — slower = more accurate +DEFAULT_TURN_SPEED = float(_cfg.get("default_turn_speed", 0.25)) # rad/s +DIST_TOLERANCE = float(_cfg.get("dist_tolerance", 0.05)) # meters — stop within 5cm +ANGLE_TOLERANCE = float(_cfg.get("angle_tolerance", 2.0)) # degrees — stop within 2° +SAFETY_TIMEOUT_MULT = float(_cfg.get("safety_timeout_mult", 3.0)) # timeout = (distance/speed) × this +ODOM_STALE_WARN = 1.0 # warn if odom not updated for this many seconds (internal only) # Dead reckoning -DR_UPDATE_HZ = 20 # integration rate +DR_UPDATE_HZ = int(_cfg.get("dr_update_hz", 20)) # integration rate # ══════════════════════════════════════════════════════════════════════════════ diff --git a/README.md b/README.md new file mode 100644 index 0000000..78e840f --- /dev/null +++ b/README.md @@ -0,0 +1,237 @@ +# Marcus — Humanoid Robot AI Base + +**Project:** Marcus | **Persona:** Sanad | **Organisation:** YS Lootah Technology, Dubai + +A compact, offline-first AI base for the **Unitree G1 EDU** humanoid, running on a +**Jetson Orin NX 16 GB**. The codebase is intentionally generic — the same brain +drives both **housekeeping** and **AI tour-guide** robot deployments just by +changing prompts, wake words and which subsystems are enabled. + +``` +run_marcus.py ← terminal entrypoint (keyboard + voice) +Server/marcus_server.py ← same brain over WebSocket for a remote client +``` + +--- + +## What the robot is made of + +Humanoid robot control ≠ one giant model. It's a **mesh of specialised models +and services**, each responsible for one part of the body, stitched together by +a Python brain. + +| Body part | Purpose | Model / service | Where it runs | +|---|---|---|---| +| **Brain** (reason, speak, decide) | Parse commands, reason about vision, pick actions | **Qwen2.5-VL 3B** via Ollama | Jetson GPU | +| **Eyes** (see) | Real-time object/person detection | **YOLOv8m** (CUDA, FP16, 320 px, ~22 FPS) | Jetson GPU | +| **Eyes** (understand) | Open-ended scene understanding, reading, goal-verify | **Qwen2.5-VL** (same brain model) | Jetson GPU | +| **Ears** (hear) | Always-on wake-word + command transcription | **Whisper tiny** (wake) + **Whisper small** (STT) | Jetson CPU/GPU | +| **Mouth** (speak) | On-robot TTS, no internet needed | **Unitree `TtsMaker`** (G1 firmware) | G1 body speaker | +| **Legs** (walk) | 29-DoF locomotion + balance | **Holosoma** RL policy (separate process, ONNX) | Jetson CPU | +| **Hands** (gesture) | Arm & hand actions | **GR00T N1.5** — pending; `API/arm_api.py` is a stub today | Jetson GPU (future) | +| **Inner ear** (map) | SLAM, obstacle detection, localisation | **Livox Mid-360** LiDAR + custom SLAM engine | Jetson (subprocess) | +| **Memory** | Places, session history, facts | JSON files under `Data/Brain/Sessions/` | Jetson disk | + +Nothing here reaches the cloud. The only internet-adjacent bits (edge-tts, +Gemini) were removed — everything runs on the robot's own compute. + +--- + +## How it hears, sees, speaks + +``` +Inputs ─────────────────────────────── Outputs + +Voice ─┐ ┌─► Speech (G1 speaker) + │ │ +Text ──┼──► Brain (Qwen2.5-VL) ──────────────┤ + │ │ │ +Camera ─┘ ▼ ├─► Legs (Holosoma → G1) + ├─► YOLO (fast class check) │ + ├─► LiDAR (obstacles / pose) └─► Arms/hands (stub → GR00T) + └─► Memory (places / history) +``` + +Three input modalities, same command loop: + +- **Voice** — say "**Sanad, what do you see?**" → wake word fires, Whisper transcribes, brain answers through the G1 speaker. +- **Text** — type the same command into `run_marcus.py`'s terminal. +- **WebSocket (remote)** — `Client/marcus_cli.py` or `Client/marcus_client.py` (Tkinter GUI) send commands from a workstation. + +All three feed the same `Brain.marcus_brain.process_command(cmd)` function. + +--- + +## Two example deployments from the same codebase + +### Housekeeping robot + +Set up for indoor chores and presence awareness. + +- **Prompts** tuned for *"empty the bin, close the window, check the bathroom, remind me at 6 pm"* intents. +- **Places** memory pre-loaded with named rooms (`kitchen`, `living room`, `hallway`). +- **Patrol mode** runs safety loops looking for hazards / unsafe PPE. +- **Autonomous mode** (`auto on`) explores the space, maps it, logs observations. +- YOLO classes: `person, chair, couch, bed, dining table, bottle, cup, laptop, keyboard, mouse, backpack, handbag, suitcase` (the defaults). + +### AI tour-guide robot + +Same hardware, different prompts + wake word. + +- **Prompts** rewrite: *"You are a museum guide. When a visitor asks about an exhibit, describe it in two sentences and invite them to ask follow-ups."* +- **Places** memory pre-loaded with exhibit waypoints; `patrol: exhibit_A → exhibit_B → exit` follows a tour. +- Wake word changed in `config_Voice.json::stt.wake_words_en`. +- Image search (`search/ photo_of_exhibit.jpg`) lets visitors hold up a printed map; the robot navigates to the matching location. +- YOLO classes trimmed to people-only if the venue doesn't need object safety. + +**What you change to switch use cases:** +1. `Config/marcus_prompts.yaml` — persona + task descriptions +2. `Config/config_Voice.json::stt.wake_words_en` — the name people call the robot +3. `Config/config_Vision.json::tracked_classes` — relevant object set +4. `Config/config_Brain.json::subsystems.{lidar,voice,imgsearch,autonomous}` — enable what you need +5. Data under `Data/History/Places/places.json` — learned locations + +No code changes required for either deployment. + +--- + +## Layer architecture + +``` + run_marcus.py / Server/marcus_server.py ← entrypoints + │ + ▼ + Brain/ (marcus_brain, command_parser, executor, memory) + │ imports only from ↓ + ▼ + API/ (one file per subsystem — stable public surface) + │ wraps ↓ + ┌───────┴────────┬──────────────┬────────────┐ + ▼ ▼ ▼ ▼ + Vision/ Navigation/ Voice/ Lidar/ + YOLO, imgsearch goal_nav, builtin_mic, SLAM engine + patrol, odom builtin_tts, (subprocess) + marcus_voice + │ + ▼ + Core/ (env, config, log_backend, logger) + │ + ▼ + Config/ + .env +``` + +**Rule:** Brain talks to subsystems only via `API/*`. You can replace YOLO with +any detector, swap Qwen for another VL model, or plug in a different TTS — +without touching Brain code — by implementing the same API surface. + +--- + +## Quick start (Jetson, after `conda activate marcus`) + +```bash +# 1) Launch Holosoma (locomotion) in hsinference env +source ~/.holosoma_deps/miniconda3/bin/activate hsinference +cd ~/holosoma && python3 src/holosoma_inference/.../run_policy.py ... + +# 2) Start Ollama +ollama serve > /tmp/ollama.log 2>&1 & +sleep 3 + +# 3) Start Marcus +conda activate marcus +cd ~/Marcus +python3 run_marcus.py +``` + +You should see: + +``` +[YOLO] Model loaded ✅ | device: cuda (Orin) | FP16 | 19 tracked classes +================================================ + SANAD AI BRAIN — READY +================================================ + model : qwen2.5vl:3b + yolo : True voice : True + odometry : True memory : True + lidar : True camera : 424x240@15 +``` + +Say **"Sanad"** to wake, or type at the `Command:` prompt. + +See `Doc/controlling.md` for the full command reference, `Doc/environment.md` +for the Jetson install recipe, and `Doc/pipeline.md` for the end-to-end +dataflow diagrams. + +--- + +## Hardware target + +| Component | Model | +|---|---| +| Humanoid | Unitree G1 EDU, 29 DoF | +| Compute | Jetson Orin NX 16 GB (Ampere iGPU, FP16 tensor cores, capability 8.7) | +| Software stack | JetPack 5.1.1 / CUDA 11.4 / cuDNN 8.6 / Python 3.8 / torch 2.1.0-nv23.06 / ultralytics 8.4.21 / Ollama 0.20.0 | +| Camera | Intel RealSense D435 (424×240 @ 15 fps) | +| LiDAR | Livox Mid-360 | +| Microphone | G1 on-board array (UDP multicast, no external USB mic) | +| Speaker | G1 body speaker (via Unitree RPC) | + +--- + +## Repository layout (top-level) + +``` +Marcus/ +├── run_marcus.py entrypoint — terminal mode +├── README.md this file +├── Core/ foundation — config + env + logging +├── Config/ 12 JSON files + marcus_prompts.yaml +├── API/ subsystem wrappers (stable public surface) +├── Brain/ orchestrator, parser, executor, memory +├── Vision/ YOLO + image-guided search +├── Navigation/ goal nav, patrol, odometry +├── Voice/ built-in mic, built-in TTS, Whisper loop +├── Autonomous/ exploration state machine +├── Lidar/ SLAM engine (subprocess) +├── Server/ WebSocket interface +├── Client/ terminal CLI + Tkinter GUI +├── Bridge/ optional ROS2 ↔ ZMQ bridge (standalone tool) +├── Models/ yolov8m.pt + optional Ollama Modelfile +├── Data/ runtime-generated sessions / places / maps +├── logs/ rotating per-module log files (5 MB × 3) +└── Doc/ architecture, API, environment, pipeline, + controlling, functions — all current +``` + +--- + +## Docs + +- `Doc/architecture.md` — project structure + layer-by-layer breakdown +- `Doc/controlling.md` — startup sequence + command reference +- `Doc/environment.md` — verified Jetson software stack + install recipe +- `Doc/pipeline.md` — boot, voice, vision, movement, LiDAR dataflow +- `Doc/functions.md` — every callable in the codebase (AST-generated) +- `Doc/MARCUS_API.md` — developer API reference with JSON schemas + +--- + +## Design principles + +1. **Offline-first.** No cloud dependency in the default path. Internet can be + wired in for specific backends (e.g. future edge-tts) but it's opt-in. +2. **GPU mandatory.** YOLO refuses to start on CPU — Marcus is a safety-critical + robot, silently downgrading to 2 FPS vision is worse than failing loudly. +3. **Swappable subsystems.** Each API file can be reimplemented behind the same + public functions. Replace YOLO with DETR, Qwen with LLaVA, TtsMaker with + Piper — Brain never notices. +4. **Config over code.** Tunables live in `Config/*.json` / `.yaml`; 156 config + keys are all actively referenced (0 orphans). Change persona, wake word, + enabled subsystems, or thresholds without touching a `.py` file. +5. **English only.** Arabic support was removed because the G1 firmware's TTS + silently maps Arabic to Chinese. If bilingual TTS is ever needed again, + see `git log` for the removed Piper / edge-tts paths. + +--- + +*Marcus — YS Lootah Technology | Dubai* diff --git a/Vision/marcus_imgsearch.py b/Vision/marcus_imgsearch.py index 117784b..15aea51 100644 --- a/Vision/marcus_imgsearch.py +++ b/Vision/marcus_imgsearch.py @@ -31,7 +31,7 @@ Usage in marcus_brain.py Standalone test --------------- - python3 ~/Models_marcus/marcus_imgsearch.py --image /path/to/photo.jpg + python3 Vision/marcus_imgsearch.py --image /path/to/photo.jpg Date : April 2026 """ @@ -39,10 +39,11 @@ Date : April 2026 import base64 import io import json -import time -import threading import os import re +import sys +import threading +import time from pathlib import Path import numpy as np @@ -55,17 +56,23 @@ except ImportError: # ══════════════════════════════════════════════════════════════════════════════ -# CONFIGURATION +# CONFIGURATION (loaded from Config/config_ImageSearch.json) # ══════════════════════════════════════════════════════════════════════════════ -DEFAULT_MAX_STEPS = 60 # max rotation steps before giving up -STEP_DELAY = 0.15 # min gap between YOLO checks (was 0.4 — reduced - # because the rotation thread paces motion already - # and each LLaVA call is 600-1500 ms of real work) -ROTATE_SPEED = 0.25 # rad/s rotation speed during search -MIN_STEPS_WARMUP = 3 # skip first N steps (stale frame) -MATCH_CONFIDENCE_THR = 0.6 # LLaVA confidence threshold (not used directly, - # but kept for future scoring) +_PROJECT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +if _PROJECT_DIR not in sys.path: + sys.path.insert(0, _PROJECT_DIR) +try: + from Core.config_loader import load_config + _cfg = load_config("ImageSearch") +except Exception: + _cfg = {} + +DEFAULT_MAX_STEPS = int(_cfg.get("default_max_steps", 60)) # rotation steps before giving up +STEP_DELAY = float(_cfg.get("step_delay_s", 0.15)) # min gap between YOLO checks +ROTATE_SPEED = float(_cfg.get("rotate_speed", 0.25)) # rad/s during search +MIN_STEPS_WARMUP = int(_cfg.get("min_steps_warmup", 3)) # skip first N steps (stale frame) +MATCH_CONFIDENCE_THR = 0.6 # LLaVA confidence threshold (reserved for future scoring) # ══════════════════════════════════════════════════════════════════════════════ diff --git a/Vision/marcus_yolo.py b/Vision/marcus_yolo.py index 27faf5d..b876d9e 100644 --- a/Vision/marcus_yolo.py +++ b/Vision/marcus_yolo.py @@ -9,7 +9,7 @@ Usage (imported): from marcus_yolo import start_yolo, yolo_sees, yolo_count, yolo_closest, yolo_summary Usage (standalone): - /home/unitree/miniconda3/envs/marcus/bin/python3 ~/Models_marcus/marcus_yolo.py + conda run -n marcus python3 Vision/marcus_yolo.py """ import os @@ -360,8 +360,13 @@ def _camera_loop(raw_frame_ref, frame_lock, cam_alive): raw_frame_ref[0] = frame.copy() except Exception as e: print(f"Camera: {e} — reconnecting...") - try: pipeline.stop() - except: pass + # pipeline may already be stopped or never started; swallow only + # the expected RealSense "pipeline not started" error, not every + # possible failure mode. + try: + pipeline.stop() + except RuntimeError: + pass time.sleep(2.0) diff --git a/Voice/builtin_mic.py b/Voice/builtin_mic.py index a4a1c52..e524169 100644 --- a/Voice/builtin_mic.py +++ b/Voice/builtin_mic.py @@ -24,19 +24,32 @@ Ported from Project/Sanad/voice/audio_io.py (Sanad's production implementation). from __future__ import annotations +import os import socket import struct import subprocess +import sys import threading import time from typing import Optional +# Load defaults from Config/config_Voice.json::mic_udp so they can be tuned +# without editing code. Falls back to the hardcoded literals below if the +# config isn't reachable (e.g., when imported from a test harness). +_PROJECT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +if _PROJECT_DIR not in sys.path: + sys.path.insert(0, _PROJECT_DIR) +try: + from Core.config_loader import load_config + _mic_udp = (load_config("Voice") or {}).get("mic_udp", {}) or {} +except Exception: + _mic_udp = {} -DEFAULT_GROUP = "239.168.123.161" -DEFAULT_PORT = 5555 -DEFAULT_BUF_MAX = 64_000 # ~2 s of 16 kHz mono int16 -DEFAULT_READ_TIMEOUT = 0.04 # 40 ms budget per read_chunk call -SAMPLE_RATE = 16_000 # hardware rate — do not change +DEFAULT_GROUP = str(_mic_udp.get("group", "239.168.123.161")) +DEFAULT_PORT = int(_mic_udp.get("port", 5555)) +DEFAULT_BUF_MAX = int(_mic_udp.get("buffer_max_bytes", 64_000)) # ~2 s of 16 kHz mono int16 +DEFAULT_READ_TIMEOUT = float(_mic_udp.get("read_timeout_sec", 0.04)) # budget per read_chunk call +SAMPLE_RATE = 16_000 # hardware rate — do not change def _find_g1_local_ip() -> str: diff --git a/Voice/marcus_voice.py b/Voice/marcus_voice.py index 1af2812..a8a7d50 100644 --- a/Voice/marcus_voice.py +++ b/Voice/marcus_voice.py @@ -25,6 +25,8 @@ import os import sys import threading import time +from logging.handlers import RotatingFileHandler + import numpy as np # ─── PATH + CONFIG ─────────────────────────────────────── @@ -38,12 +40,17 @@ from Core.config_loader import load_config LOG_DIR = os.path.join(PROJECT_ROOT, "logs") os.makedirs(LOG_DIR, exist_ok=True) -# Idempotent — only the first call per process installs handlers. +# basicConfig is idempotent. Whichever of audio_api / marcus_voice imports +# first installs the rotating handler; the other no-ops. Both loggers then +# share the same file handle with stdlib's per-handler thread lock. logging.basicConfig( level=logging.INFO, format="%(asctime)s [%(name)s] %(levelname)s: %(message)s", handlers=[ - logging.FileHandler(os.path.join(LOG_DIR, "voice.log")), + RotatingFileHandler( + os.path.join(LOG_DIR, "voice.log"), + maxBytes=5_000_000, backupCount=3, encoding="utf-8", + ), logging.StreamHandler(), ], )