Update 2026-04-22 10:57:22

This commit is contained in:
kassam 2026-04-22 10:57:23 +04:00
parent af1d0c1b8a
commit ac9271c62b
27 changed files with 683 additions and 383 deletions

View File

@ -32,6 +32,8 @@ import sys
import threading import threading
import time import time
import wave import wave
from logging.handlers import RotatingFileHandler
import numpy as np import numpy as np
# ─── PATH + CONFIG ─────────────────────────────────────── # ─── PATH + CONFIG ───────────────────────────────────────
@ -45,15 +47,18 @@ from Core.config_loader import load_config
LOG_DIR = os.path.join(PROJECT_ROOT, "logs") LOG_DIR = os.path.join(PROJECT_ROOT, "logs")
os.makedirs(LOG_DIR, exist_ok=True) os.makedirs(LOG_DIR, exist_ok=True)
# Note: logging.basicConfig() only takes effect on the first call per process. # logging.basicConfig is idempotent per process: if marcus_voice configured
# If the voice module already configured logging (common path via run_marcus.py), # the root logger first, this call is a no-op and both modules share the same
# this call is a no-op. When audio_api is used standalone, it wires logs to # RotatingFileHandler (stdlib FileHandlers hold an internal lock, so concurrent
# logs/voice.log + stderr. # writes to voice.log are safe). Rotation caps voice.log at 5 MB × 3 backups.
logging.basicConfig( logging.basicConfig(
level=logging.INFO, level=logging.INFO,
format="%(asctime)s [%(name)s] %(levelname)s: %(message)s", format="%(asctime)s [%(name)s] %(levelname)s: %(message)s",
handlers=[ handlers=[
logging.FileHandler(os.path.join(LOG_DIR, "voice.log")), RotatingFileHandler(
os.path.join(LOG_DIR, "voice.log"),
maxBytes=5_000_000, backupCount=3, encoding="utf-8",
),
logging.StreamHandler(), logging.StreamHandler(),
], ],
) )
@ -305,14 +310,31 @@ class AudioAPI:
subprocess.run(["pactl", "set-source-volume", source, "100%"], capture_output=True) subprocess.run(["pactl", "set-source-volume", source, "100%"], capture_output=True)
log.info("Recording %.1fs from mic source %s (parec)", seconds, source) log.info("Recording %.1fs from mic source %s (parec)", seconds, source)
proc = None
raw = b""
try:
proc = subprocess.Popen( proc = subprocess.Popen(
["parec", "-d", source, ["parec", "-d", source,
f"--format={fmt}", f"--rate={rate}", f"--channels={channels}", "--raw"], f"--format={fmt}", f"--rate={rate}", f"--channels={channels}", "--raw"],
stdout=subprocess.PIPE, stdout=subprocess.PIPE,
) )
time.sleep(seconds) time.sleep(seconds)
finally:
# Always kill parec — an exception in time.sleep (Ctrl-C / signal)
# would otherwise leave an orphaned recorder process running.
if proc is not None:
try:
proc.terminate() proc.terminate()
raw = proc.stdout.read() raw = proc.stdout.read()
proc.wait(timeout=1.0)
except Exception as e:
log.warning("parec cleanup error: %s", e)
# Last-resort SIGKILL — suppress only OSError (process
# already exited) so we don't mask other bugs.
try:
proc.kill()
except OSError:
pass
audio = np.frombuffer(raw, dtype=np.int16) audio = np.frombuffer(raw, dtype=np.int16)
log.info("Recorded: %d samples, std=%.0f", len(audio), audio.std()) log.info("Recorded: %d samples, std=%.0f", len(audio), audio.std())

View File

@ -12,10 +12,13 @@ from Core.logger import log
_cfg = load_config("Camera") _cfg = load_config("Camera")
CAM_WIDTH = _cfg["width"] CAM_WIDTH = int(_cfg.get("width", 424))
CAM_HEIGHT = _cfg["height"] CAM_HEIGHT = int(_cfg.get("height", 240))
CAM_FPS = _cfg["fps"] CAM_FPS = int(_cfg.get("fps", 15))
CAM_QUALITY = _cfg["jpeg_quality"] CAM_QUALITY = int(_cfg.get("jpeg_quality", 70))
CAM_TIMEOUT_MS = int(_cfg.get("timeout_ms", 5000)) # pipeline.wait_for_frames timeout
CAM_STALE_THRESHOLD = float(_cfg.get("stale_threshold_s", 10.0)) # trip reconnect after this long without a frame
CAM_RECONNECT_DELAY = float(_cfg.get("reconnect_delay_s", 2.0)) # initial backoff; doubles up to 10 s
# Shared state # Shared state
latest_frame_b64 = [None] latest_frame_b64 = [None]
@ -36,7 +39,7 @@ def camera_loop():
"""Capture RealSense frames continuously with auto-reconnect.""" """Capture RealSense frames continuously with auto-reconnect."""
import pyrealsense2 as rs import pyrealsense2 as rs
backoff = 2.0 backoff = CAM_RECONNECT_DELAY
while camera_alive[0]: while camera_alive[0]:
pipeline = None pipeline = None
try: try:
@ -44,14 +47,14 @@ def camera_loop():
cfg = rs.config() cfg = rs.config()
cfg.enable_stream(rs.stream.color, CAM_WIDTH, CAM_HEIGHT, rs.format.bgr8, CAM_FPS) cfg.enable_stream(rs.stream.color, CAM_WIDTH, CAM_HEIGHT, rs.format.bgr8, CAM_FPS)
pipeline.start(cfg) pipeline.start(cfg)
backoff = 2.0 backoff = CAM_RECONNECT_DELAY
_cam_connected[0] = True _cam_connected[0] = True
print("Camera connected") print("Camera connected")
log(f"Camera connected {CAM_WIDTH}x{CAM_HEIGHT}@{CAM_FPS}", "info", "camera") log(f"Camera connected {CAM_WIDTH}x{CAM_HEIGHT}@{CAM_FPS}", "info", "camera")
while camera_alive[0]: while camera_alive[0]:
try: try:
frames = pipeline.wait_for_frames(timeout_ms=5000) frames = pipeline.wait_for_frames(timeout_ms=CAM_TIMEOUT_MS)
color_frame = frames.get_color_frame() color_frame = frames.get_color_frame()
if not color_frame: if not color_frame:
continue continue
@ -72,8 +75,8 @@ def camera_loop():
_cam_last_frame_time[0] = time.time() _cam_last_frame_time[0] = time.time()
except Exception: except Exception:
if time.time() - _cam_last_frame_time[0] > 10.0: if time.time() - _cam_last_frame_time[0] > CAM_STALE_THRESHOLD:
print(" [Camera] No frame for 10s — reconnecting...") print(f" [Camera] No frame for {CAM_STALE_THRESHOLD:.0f}s — reconnecting...")
break break
except Exception as e: except Exception as e:

View File

@ -25,7 +25,7 @@ How it works
Files saved Files saved
----------- -----------
~/Models_marcus/map/map_001_2026-04-05/ Data/Brain/maps/map_001_YYYY-MM-DD/
observations.json [{step, time, x, y, area_type, objects, observation}] observations.json [{step, time, x, y, area_type, objects, observation}]
path.json [{x, y, heading, t}] full path walked path.json [{x, y, heading, t}] full path walked
summary.txt auto-generated LLaVA summary summary.txt auto-generated LLaVA summary

View File

@ -81,7 +81,7 @@ def execute(d: dict):
actions = merge_actions(d.get("actions", [])) actions = merge_actions(d.get("actions", []))
arm_cmd = d.get("arm", None) arm_cmd = d.get("arm", None)
print(f"Marcus: {speak}") print(f"Sanad: {speak}")
if not actions: if not actions:
gradual_stop() gradual_stop()

View File

@ -41,18 +41,12 @@ from Autonomous.marcus_autonomous import AutonomousMode
_cfg = load_config("Brain") _cfg = load_config("Brain")
_TALK_PATTERNS = [ _TALK_PATTERNS = [
# English questions # Questions
r"^(?:what|who|where|when|how|why|is|are|do|does|can|tell|describe|explain|show|analyze)\s+", r"^(?:what|who|where|when|how|why|is|are|do|does|can|tell|describe|explain|show|analyze)\s+",
# English identity/facts # Identity / facts told to the robot
r"^(?:my name is|i am|call me|that is|that person|note that|remember that)\s+", r"^(?:my name is|i am|call me|that is|that person|note that|remember that)\s+",
# English acknowledgements # Acknowledgements
r"^(?:ok|okay|yes|no|good|nice|great|thanks|thank you|got it|understood|correct)\s*[!.]*$", r"^(?:ok|okay|yes|no|good|nice|great|thanks|thank you|got it|understood|correct)\s*[!.]*$",
# Arabic questions — ماذا ترى / كيف حالك / من أنت / ما اسمك / صف / هل
r"^(?:ماذا|ما\s|كيف|من\s|أين|لماذا|هل|صف|اشرح|وصف|كم)\s*",
# Arabic identity/facts — اسمي / أنا / تذكر
r"^(?:اسمي|أنا\s|تذكر\s|سجل\s|لاحظ\s)",
# Arabic acknowledgements — حسنا / شكرا / ممتاز / صح / مفهوم
r"^(?:حسنا|شكرا|ممتاز|صح|مفهوم|تمام|أحسنت|جيد|نعم|لا)\s*[!.]*$",
] ]
_NAT_GOAL_RE = re.compile( _NAT_GOAL_RE = re.compile(
@ -271,9 +265,10 @@ def process_command(cmd: str) -> dict:
return {"type": "talk", "speak": speak, "action": "TALK", "elapsed": time.time() - t0} return {"type": "talk", "speak": speak, "action": "TALK", "elapsed": time.time() - t0}
# ── Greeting ───────────────────────────────────────────────────────── # ── Greeting ─────────────────────────────────────────────────────────
if re.match(r"^(?:hi+|hey+|hello+|hola|salam|marhaba|sup|yo+|ahlan|السلام عليكم|مرحبا|أهلا|هلا|يا هلا)\s*[!.]*$", cmd, re.IGNORECASE): if re.match(r"^(?:hi+|hey+|hello+|sup|yo+|greetings|good (?:morning|afternoon|evening))\s*[!.]*$",
cmd, re.IGNORECASE):
response = "Hello! I am Sanad. How can I help you?" response = "Hello! I am Sanad. How can I help you?"
print(f"Marcus: {response}") print(f"Sanad: {response}")
add_to_history(cmd, response) add_to_history(cmd, response)
log_cmd(cmd, response) log_cmd(cmd, response)
return {"type": "greeting", "speak": response, "action": "GREETING", "elapsed": 0} return {"type": "greeting", "speak": response, "action": "GREETING", "elapsed": 0}
@ -282,7 +277,7 @@ def process_command(cmd: str) -> dict:
if re.match(r"^(?:come(?:\s+back)?(?:\s+to\s+me)?|come\s+here|get\s+closer|approach|move\s+closer)\s*[!.]*$", cmd, re.IGNORECASE): if re.match(r"^(?:come(?:\s+back)?(?:\s+to\s+me)?|come\s+here|get\s+closer|approach|move\s+closer)\s*[!.]*$", cmd, re.IGNORECASE):
execute_action("forward", 2.0) execute_action("forward", 2.0)
resp = "Coming to you" resp = "Coming to you"
print(f"Marcus: {resp}") print(f"Sanad: {resp}")
add_to_history(cmd, resp) add_to_history(cmd, resp)
log_cmd(cmd, resp) log_cmd(cmd, resp)
return {"type": "move", "speak": resp, "action": "FORWARD 2.0s", "elapsed": 2.0} return {"type": "move", "speak": resp, "action": "FORWARD 2.0s", "elapsed": 2.0}
@ -300,7 +295,7 @@ def process_command(cmd: str) -> dict:
execute_action("right" if turn_dir == "right" else "left", turn_deg / 18.0) execute_action("right" if turn_dir == "right" else "left", turn_deg / 18.0)
execute_action(walk_dir, walk_dur) execute_action(walk_dir, walk_dur)
resp = f"Turned {turn_dir} {int(turn_deg)} degrees then moved {walk_dir}" resp = f"Turned {turn_dir} {int(turn_deg)} degrees then moved {walk_dir}"
print(f"Marcus: {resp}") print(f"Sanad: {resp}")
add_to_history(cmd, resp) add_to_history(cmd, resp)
log_cmd(cmd, resp) log_cmd(cmd, resp)
return {"type": "move", "speak": resp, "action": f"MULTI {turn_dir}+{walk_dir}", "elapsed": time.time() - t0} return {"type": "move", "speak": resp, "action": f"MULTI {turn_dir}+{walk_dir}", "elapsed": time.time() - t0}
@ -350,7 +345,7 @@ def _handle_talk(cmd):
pass pass
d = ask_talk(cmd, img, facts=facts_str) d = ask_talk(cmd, img, facts=facts_str)
sp = d.get("speak", "") sp = d.get("speak", "")
print(f"Marcus: {sp}") print(f"Sanad: {sp}")
log_cmd(cmd, sp) log_cmd(cmd, sp)
return sp return sp
except Exception as ex: except Exception as ex:

View File

@ -10,8 +10,8 @@ Purpose : Persistent memory across sessions.
Folder structure Folder structure
---------------- ----------------
~/Models_marcus/places.json persistent named places (all sessions) Data/History/Places/places.json persistent named places (all sessions)
~/Models_marcus/sessions/ Data/Brain/Sessions/
session_001_2026-04-05/ session_001_2026-04-05/
commands.json [{time, cmd, response, duration_s}] commands.json [{time, cmd, response, duration_s}]
detections.json [{time, class, position, distance, x, y}] detections.json [{time, class, position, distance, x, y}]

View File

@ -4,7 +4,7 @@ Subscribes to /cmd_vel and holosoma/other_input (Python 3.8 + ROS2 Foxy)
Forwards to Holosoma via ZMQ PUB socket (Python 3.10 hsinference) Forwards to Holosoma via ZMQ PUB socket (Python 3.10 hsinference)
Run: source /opt/ros/foxy/setup.bash Run: source /opt/ros/foxy/setup.bash
python3.8 ~/Models_marcus/ros2_zmq_bridge.py python3.8 ~/Marcus/Bridge/ros2_zmq_bridge.py
""" """
import json, time import json, time
import rclpy import rclpy

View File

@ -190,7 +190,7 @@ def _handle_message(data):
print(f" [{ts}] {color}{C.BOLD}{action}{C.RESET} {C.GRAY}({elapsed}s){C.RESET}") print(f" [{ts}] {color}{C.BOLD}{action}{C.RESET} {C.GRAY}({elapsed}s){C.RESET}")
if speak: if speak:
print(f" {C.CYAN}Marcus: {speak}{C.RESET}") print(f" {C.CYAN}Sanad: {speak}{C.RESET}")
elif t == "camera_config": elif t == "camera_config":
p = data.get("profile", "?") p = data.get("profile", "?")

View File

@ -14,8 +14,5 @@
"num_predict_patrol": 100, "num_predict_patrol": 100,
"num_predict_talk": 80, "num_predict_talk": 80,
"num_predict_verify": 10, "num_predict_verify": 10,
"warmup_num_predict": 5, "warmup_num_predict": 5
"main_prompt": "You are Sanad, a humanoid robot. Look at the image and follow the command.\n{facts}\n\nCommand: \"{command}\"\n\nReply with ONLY this JSON — no markdown, no explanation:\n{{\"actions\":[{{\"move\":\"forward|backward|left|right|stop\",\"duration\":2.0}}],\"arm\":null,\"speak\":\"one sentence\",\"abort\":null}}\n\nRULES:\n- actions is a list of movement steps, max duration 5.0s each\n- move: \"forward\" \"backward\" \"left\" \"right\" \"stop\"\n- arm: \"wave\" \"raise_right\" \"raise_left\" \"clap\" \"high_five\" \"hug\" \"heart\" \"shake_hand\" \"face_wave\" or null\n- arm is NEVER a move value\n- questions/descriptions: actions=[]\n- obstacle < 0.5m: abort = \"obstacle detected\"\n- \"90 degrees\" = 5.0s | \"45 degrees\" = 2.5s | \"1 step\" = 1.0s\n\nEXAMPLES:\n\"turn right\" -> {{\"actions\":[{{\"move\":\"right\",\"duration\":2.0}}],\"arm\":null,\"speak\":\"Turning right\",\"abort\":null}}\n\"turn right 90 degrees\" -> {{\"actions\":[{{\"move\":\"right\",\"duration\":5.0}}],\"arm\":null,\"speak\":\"Turning 90 degrees\",\"abort\":null}}\n\"move back then left\" -> {{\"actions\":[{{\"move\":\"backward\",\"duration\":2.0}},{{\"move\":\"left\",\"duration\":2.0}}],\"arm\":null,\"speak\":\"Moving back then left\",\"abort\":null}}\n\"wave\" -> {{\"actions\":[],\"arm\":\"wave\",\"speak\":\"Waving\",\"abort\":null}}\n\"raise your right arm\" -> {{\"actions\":[],\"arm\":\"raise_right\",\"speak\":\"Raising right arm\",\"abort\":null}}\n\"walk forward and wave\" -> {{\"actions\":[{{\"move\":\"forward\",\"duration\":2.0}}],\"arm\":\"wave\",\"speak\":\"Walking and waving\",\"abort\":null}}\n\"what do you see\" -> {{\"actions\":[],\"arm\":null,\"speak\":\"I see...\",\"abort\":null}}\n\"stop\" -> {{\"actions\":[{{\"move\":\"stop\",\"duration\":0}}],\"arm\":null,\"speak\":\"Stopping\",\"abort\":null}}\n\"come to me\" -> {{\"actions\":[{{\"move\":\"forward\",\"duration\":2.0}}],\"arm\":null,\"speak\":\"Coming to you\",\"abort\":null}}\n\"come back\" -> {{\"actions\":[{{\"move\":\"forward\",\"duration\":2.0}}],\"arm\":null,\"speak\":\"Coming back\",\"abort\":null}}\n\"come here\" -> {{\"actions\":[{{\"move\":\"forward\",\"duration\":2.0}}],\"arm\":null,\"speak\":\"Coming\",\"abort\":null}}\n\"get closer\" -> {{\"actions\":[{{\"move\":\"forward\",\"duration\":1.0}}],\"arm\":null,\"speak\":\"Moving closer\",\"abort\":null}}\n\"go away\" -> {{\"actions\":[{{\"move\":\"backward\",\"duration\":2.0}}],\"arm\":null,\"speak\":\"Moving away\",\"abort\":null}}\n\nCommand: \"{command}\"\nJSON:",
"goal_prompt": "You are Sanad navigating toward a goal.\n\nGOAL: \"{goal}\"\n\nLook at the image. Have you reached the goal?\n\nReply ONLY this JSON:\n{{\"reached\":false,\"next_move\":\"left\",\"duration\":0.5,\"speak\":\"what you see\"}}\n\nRULES:\n- reached: true ONLY if you clearly see the goal target right now\n- next_move: \"forward\" \"left\" \"right\"\n- duration: 0.3 to 0.8 seconds\n- Default next_move: \"left\" to keep scanning\n\nGOAL: \"{goal}\"\nJSON:",
"patrol_prompt": "You are Sanad, an HSE inspection robot on autonomous patrol.\n\nLook at the camera and assess the scene.\n\nReply ONLY this JSON:\n{{\"observation\":\"one sentence\",\"alert\":null,\"next_move\":\"forward\",\"duration\":1.0}}\n\nRULES:\n- alert = null if safe\n- alert = \"PPE: no helmet\" if person without helmet\n- alert = \"PPE: no vest\" if person without safety vest\n- alert = \"Hazard: description\" for other hazards\n- next_move: \"forward\" \"left\" \"right\"\n- duration: 0.5 to 2.0s\n\nJSON:"
} }

View File

@ -1,6 +1,6 @@
{ {
"default_max_steps": 60, "default_max_steps": 60,
"step_delay_s": 0.4, "step_delay_s": 0.15,
"rotate_speed": 0.25, "rotate_speed": 0.25,
"min_steps_warmup": 3 "min_steps_warmup": 3
} }

View File

@ -1,8 +0,0 @@
{
"base_dir": "Data",
"sessions_dir": "Data/Sessions",
"places_file": "Data/Places/places.json",
"max_cmd_len": 500,
"max_sessions": 50,
"detect_dedupe_s": 5.0
}

View File

@ -5,7 +5,7 @@
# Model : Qwen2.5-VL 3B (Ollama, fully offline) # Model : Qwen2.5-VL 3B (Ollama, fully offline)
# #
# Placeholders: # Placeholders:
# {command} — the user's typed/spoken command (Arabic or English) # {command} — the user's typed/spoken command (English)
# {goal} — the navigation goal description # {goal} — the navigation goal description
# {facts} — known facts from memory (e.g. "Kassam is the programmer") # {facts} — known facts from memory (e.g. "Kassam is the programmer")
# {target} — YOLO class being searched (e.g. "person") # {target} — YOLO class being searched (e.g. "person")
@ -16,7 +16,7 @@
# LANGUAGE NOTE: # LANGUAGE NOTE:
# All prompts instruct Qwen to detect the command language automatically # All prompts instruct Qwen to detect the command language automatically
# and respond in the same language. No code-side language detection needed. # and respond in the same language. No code-side language detection needed.
# Arabic and English are handled natively by the model. # English-only by policy — Arabic support was removed 2026-04-21.
# ============================================================================= # =============================================================================
@ -51,17 +51,17 @@ main_prompt: |
- Merge consecutive same-direction steps into one: - Merge consecutive same-direction steps into one:
"forward 2s + forward 2s" → "forward 4s" — NOT two separate steps "forward 2s + forward 2s" → "forward 4s" — NOT two separate steps
- Duration reference: - Duration reference:
"1 step" / "خطوة" = 1.0s "1 step" = 1.0s
"tiny step" / "خطوة صغيرة" = 0.3s "tiny step" = 0.3s
"half a step" = 0.5s "half a step" = 0.5s
"2 steps" / "خطوتين" = 2.0s "2 steps" = 2.0s
"3 steps" / "ثلاث خطوات" = 3.0s "3 steps" = 3.0s
"45 degrees" / "٤٥ درجة" = 2.5s "45 degrees" = 2.5s
"90 degrees" / "٩٠ درجة" = 5.0s "90 degrees" = 5.0s
"180 degrees" / "استدر" = 10.0s "180 degrees" = 10.0s
- Speed modifiers: - Speed modifiers:
"slowly" / "ببطء" / "بهدوء" → multiply duration by 0.5 "slowly" → multiply duration by 0.5
"quickly" / "fast" / "بسرعة" → multiply duration by 1.5 (cap at 5.0s) "quickly" / "fast" → multiply duration by 1.5 (cap at 5.0s)
── ARM RULES ────────────────────────────────────────────────────────────── ── ARM RULES ──────────────────────────────────────────────────────────────
- arm: one value from the list above, or null - arm: one value from the list above, or null
@ -72,12 +72,10 @@ main_prompt: |
── SPEAK RULES ──────────────────────────────────────────────────────────── ── SPEAK RULES ────────────────────────────────────────────────────────────
- speak: one sentence, first person, natural - speak: one sentence, first person, natural
- Describe what you are doing OR what you see — never both in one sentence - Describe what you are doing OR what you see — never both in one sentence
- For pure movement: "Turning right" / "أدور لليمين" - For pure movement: "Turning right"
- For vision questions: describe what the camera shows - For vision questions: describe what the camera shows
- Never repeat the command word-for-word - Never repeat the command word-for-word
- CRITICAL: match the language of the command exactly - Always respond in English
Arabic command → Arabic speak
English command → English speak
── SAFETY RULES ─────────────────────────────────────────────────────────── ── SAFETY RULES ───────────────────────────────────────────────────────────
- abort = null for all normal commands - abort = null for all normal commands
@ -87,8 +85,8 @@ main_prompt: |
- When aborting: actions = [] and explain in speak - When aborting: actions = [] and explain in speak
── CONTEXT RULES ────────────────────────────────────────────────────────── ── CONTEXT RULES ──────────────────────────────────────────────────────────
- "that person" / "him" / "her" / "ذلك الشخص" → resolve from conversation or camera - "that person" / "him" / "her" → resolve from conversation or camera
- "it" / "there" / "هناك" → resolve from last command context - "it" / "there" → resolve from last command context
- If ambiguous → choose the most reasonable safe interpretation - If ambiguous → choose the most reasonable safe interpretation
══ ENGLISH EXAMPLES ═══════════════════════════════════════════════════════ ══ ENGLISH EXAMPLES ═══════════════════════════════════════════════════════
@ -190,113 +188,6 @@ main_prompt: |
"walk into the wall" "walk into the wall"
→ {{"actions":[],"arm":null,"speak":"I cannot do that safely","abort":"unsafe command"}} → {{"actions":[],"arm":null,"speak":"I cannot do that safely","abort":"unsafe command"}}
══ ARABIC EXAMPLES ════════════════════════════════════════════════════════
حركة أساسية:
"تقدم"
→ {{"actions":[{{"move":"forward","duration":2.0}}],"arm":null,"speak":"أتقدم للأمام","abort":null}}
"تراجع للخلف"
→ {{"actions":[{{"move":"backward","duration":2.0}}],"arm":null,"speak":"أتراجع للخلف","abort":null}}
"دور يمين"
→ {{"actions":[{{"move":"right","duration":2.0}}],"arm":null,"speak":"أدور لليمين","abort":null}}
"دور يسار"
→ {{"actions":[{{"move":"left","duration":2.0}}],"arm":null,"speak":"أدور لليسار","abort":null}}
"قف"
→ {{"actions":[{{"move":"stop","duration":0}}],"arm":null,"speak":"أتوقف الآن","abort":null}}
درجات:
"دور يمين ٩٠ درجة"
→ {{"actions":[{{"move":"right","duration":5.0}}],"arm":null,"speak":"أدور لليمين ٩٠ درجة","abort":null}}
"دور يسار ٤٥ درجة ببطء"
→ {{"actions":[{{"move":"left","duration":1.25}}],"arm":null,"speak":"أدور لليسار ببطء","abort":null}}
"استدر ١٨٠ درجة"
→ {{"actions":[{{"move":"right","duration":10.0}}],"arm":null,"speak":"أستدير ١٨٠ درجة","abort":null}}
خطوات:
"تقدم خطوة واحدة"
→ {{"actions":[{{"move":"forward","duration":1.0}}],"arm":null,"speak":"أتقدم خطوة واحدة","abort":null}}
"تقدم خطوتين"
→ {{"actions":[{{"move":"forward","duration":2.0}}],"arm":null,"speak":"أتقدم خطوتين","abort":null}}
"تراجع ثلاث خطوات"
→ {{"actions":[{{"move":"backward","duration":3.0}}],"arm":null,"speak":"أتراجع ثلاث خطوات","abort":null}}
"تقدم قليلا"
→ {{"actions":[{{"move":"forward","duration":0.5}}],"arm":null,"speak":"أتقدم قليلا","abort":null}}
خطوات متعددة:
"تقدم ثم دور يمين"
→ {{"actions":[{{"move":"forward","duration":2.0}},{{"move":"right","duration":2.0}}],"arm":null,"speak":"أتقدم ثم أدور لليمين","abort":null}}
"دور يمين ٩٠ درجة ثم تراجع خطوتين"
→ {{"actions":[{{"move":"right","duration":5.0}},{{"move":"backward","duration":2.0}}],"arm":null,"speak":"أدور يمين ٩٠ درجة ثم أتراجع خطوتين","abort":null}}
"تراجع ثم دور يسار"
→ {{"actions":[{{"move":"backward","duration":2.0}},{{"move":"left","duration":2.0}}],"arm":null,"speak":"أتراجع ثم أدور لليسار","abort":null}}
اقتراب / ابتعاد:
"تعال إلي"
→ {{"actions":[{{"move":"forward","duration":2.0}}],"arm":null,"speak":"آتي إليك","abort":null}}
"اقترب"
→ {{"actions":[{{"move":"forward","duration":1.0}}],"arm":null,"speak":"أقترب منك","abort":null}}
"ابتعد"
→ {{"actions":[{{"move":"backward","duration":2.0}}],"arm":null,"speak":"أبتعد عنك","abort":null}}
ذراعين:
"لوح بيدك"
→ {{"actions":[],"arm":"wave","speak":"أُلوّح بيدي","abort":null}}
"صفق"
→ {{"actions":[],"arm":"clap","speak":"أُصفق","abort":null}}
"أعطني خمسة"
→ {{"actions":[],"arm":"high_five","speak":"خمسة!","abort":null}}
"ارفع يدك اليمنى"
→ {{"actions":[],"arm":"raise_right","speak":"أرفع يدي اليمنى","abort":null}}
"عانقني"
→ {{"actions":[{{"move":"forward","duration":1.0}}],"arm":"hug","speak":"آتي لأعانقك","abort":null}}
"صافحني"
→ {{"actions":[],"arm":"shake_hand","speak":"أمد يدي للمصافحة","abort":null}}
"تقدم ولوح"
→ {{"actions":[{{"move":"forward","duration":2.0}}],"arm":"wave","speak":"أتقدم وأُلوّح","abort":null}}
رؤية / أسئلة:
"ماذا ترى"
→ {{"actions":[],"arm":null,"speak":"أرى شخصا يجلس أمام طاولة مع جهاز كمبيوتر","abort":null}}
"صف الغرفة"
→ {{"actions":[],"arm":null,"speak":"أرى مكتبا به طاولات وكراسي وشاشات","abort":null}}
"هل يوجد أحد هنا"
→ {{"actions":[],"arm":null,"speak":"نعم، أرى شخصا واحدا أمامي","abort":null}}
"انظر حولك"
→ {{"actions":[{{"move":"left","duration":3.0}}],"arm":null,"speak":"أنظر حولي","abort":null}}
هوية:
"اسمي قصام"
→ {{"actions":[],"arm":null,"speak":"أهلا قصام، سأتذكر اسمك","abort":null}}
"من أنت"
→ {{"actions":[],"arm":null,"speak":"أنا ماركوس، روبوت مساعد من YS Lootah Technology في دبي","abort":null}}
أمان:
"اصطدم بالحائط"
→ {{"actions":[],"arm":null,"speak":"لا أستطيع فعل ذلك بأمان","abort":"unsafe command"}}
Command: "{command}" Command: "{command}"
JSON: JSON:
@ -368,24 +259,6 @@ goal_prompt: |
Camera: chair visible but far away at edge of frame Camera: chair visible but far away at edge of frame
→ {{"reached":false,"next_move":"forward","duration":0.5,"speak":"I can see a chair far ahead — moving closer","confidence":"medium"}} → {{"reached":false,"next_move":"forward","duration":0.5,"speak":"I can see a chair far ahead — moving closer","confidence":"medium"}}
── EXAMPLES (Arabic) ──────────────────────────────────────────────────────
Goal: "قف عندما ترى شخصا"
Camera: ممر فارغ
→ {{"reached":false,"next_move":"left","duration":0.4,"speak":"أرى ممرا فارغا — لا يوجد أحد","confidence":"high"}}
Goal: "قف عندما ترى شخصا"
Camera: شخص يجلس
→ {{"reached":true,"next_move":"left","duration":0.0,"speak":"أرى شخصا بوضوح — أتوقف","confidence":"high"}}
Goal: "ابحث عن لاب توب"
Camera: طاولة بدون لاب توب
→ {{"reached":false,"next_move":"left","duration":0.4,"speak":"أرى طاولة وكراسي لكن لا يوجد لاب توب","confidence":"high"}}
GOAL: "{goal}"
JSON:
# ══════════════════════════════════════════════════════════════════════════════ # ══════════════════════════════════════════════════════════════════════════════
# PATROL PROMPT # PATROL PROMPT
# Used for: autonomous office exploration — auto on / auto off # Used for: autonomous office exploration — auto on / auto off
@ -515,44 +388,11 @@ talk_prompt: |
→ {{"actions":[],"arm":null,"speak":"I was built by YS Lootah Technology in Dubai, and my programmer is Kassam","abort":null}} → {{"actions":[],"arm":null,"speak":"I was built by YS Lootah Technology in Dubai, and my programmer is Kassam","abort":null}}
"what can you do" "what can you do"
→ {{"actions":[],"arm":null,"speak":"I can move through the environment, identify objects and people, navigate to goals, and respond in both Arabic and English","abort":null}} → {{"actions":[],"arm":null,"speak":"I can move through the environment, identify objects and people, navigate to goals, and respond in English","abort":null}}
"ok" / "good" / "thanks" "ok" / "good" / "thanks"
→ {{"actions":[],"arm":null,"speak":"Ready for your next command","abort":null}} → {{"actions":[],"arm":null,"speak":"Ready for your next command","abort":null}}
── ARABIC EXAMPLES ────────────────────────────────────────────────────────
"ماذا ترى"
→ {{"actions":[],"arm":null,"speak":"أرى شخصا يجلس أمام طاولة مع جهاز كمبيوتر وشاشة كبيرة","abort":null}}
"صف ما أمامك"
→ {{"actions":[],"arm":null,"speak":"أمامي مكتب به شاشة ولوحة مفاتيح وشخص يعمل","abort":null}}
"هل يوجد أحد في الغرفة"
→ {{"actions":[],"arm":null,"speak":"نعم، أرى شخصا واحدا يجلس أمام محطة العمل","abort":null}}
"اسمي قصام"
→ {{"actions":[],"arm":null,"speak":"أهلا وسهلا قصام، سأتذكر اسمك","abort":null}}
"أنا مبرمجك"
→ {{"actions":[],"arm":null,"speak":"مفهوم، أنت مبرمجي — جاهز لتلقي تعليماتك","abort":null}}
"كيف حالك"
→ {{"actions":[],"arm":null,"speak":"جميع أنظمتي تعمل بشكل طبيعي وأنا جاهز لمساعدتك","abort":null}}
"ما اسمك"
→ {{"actions":[],"arm":null,"speak":"اسمي ماركوس، روبوت مساعد من YS Lootah Technology في دبي","abort":null}}
"من بناك"
→ {{"actions":[],"arm":null,"speak":"بنتني شركة YS Lootah Technology في دبي، ومبرمجي هو قصام","abort":null}}
"شكرا" / "حسنا" / "ممتاز"
→ {{"actions":[],"arm":null,"speak":"جاهز للأمر التالي","abort":null}}
Command: "{command}"
JSON:
# ══════════════════════════════════════════════════════════════════════════════ # ══════════════════════════════════════════════════════════════════════════════
# VERIFY PROMPT # VERIFY PROMPT
# Used for: two-stage goal detection — confirm extra condition after YOLO finds class # Used for: two-stage goal detection — confirm extra condition after YOLO finds class
@ -676,8 +516,4 @@ image_search_text_prompt: |
Camera: laptop clearly on desk Camera: laptop clearly on desk
→ {{"found":true,"confidence":"high","position":"center","description":"Laptop visible on desk at center of frame"}} → {{"found":true,"confidence":"high","position":"center","description":"Laptop visible on desk at center of frame"}}
Hint: "شخص يرتدي قميصا أزرق"
Camera: شخص بقميص أزرق واضح
→ {{"found":true,"confidence":"high","position":"center","description":"أرى شخصا يرتدي قميصا أزرق بوضوح في وسط الإطار"}}
JSON: JSON:

View File

@ -1,8 +1,24 @@
import logging import logging
import os import os
from logging.handlers import RotatingFileHandler
from pathlib import Path from pathlib import Path
# Rotation policy shared by every log file this backend creates:
# 5 MB per file, keep 3 rotations (logs/brain.log, brain.log.1, .2, .3).
# Tune both via env vars if you need larger logs on the robot.
_ROT_MAX_BYTES = int(os.environ.get("MARCUS_LOG_MAX_BYTES", 5_000_000))
_ROT_BACKUP_COUNT = int(os.environ.get("MARCUS_LOG_BACKUP_COUNT", 3))
def _rotating_handler(path: str) -> RotatingFileHandler:
"""FileHandler with size-based rotation — prevents unbounded growth."""
return RotatingFileHandler(
path,
maxBytes=_ROT_MAX_BYTES,
backupCount=_ROT_BACKUP_COUNT,
encoding="utf-8",
)
class Logs: class Logs:
@ -11,7 +27,10 @@ class Logs:
self.default_log_level = default_log_level self.default_log_level = default_log_level
self.log_format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' self.log_format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
self.base_dir = str(Path(__file__).resolve().parents[1]) self.base_dir = str(Path(__file__).resolve().parents[1])
self.default_logs_dir = os.path.join(self.base_dir, "Logs") # The canonical log directory is "logs" (lowercase) — matches what
# every module writing via stdlib logging expects. "Logs" (capital L)
# was historically used by a parallel implementation and is gone.
self.default_logs_dir = os.path.join(self.base_dir, "logs")
self.fallback_log_dir = self._choose_fallback_log_dir() self.fallback_log_dir = self._choose_fallback_log_dir()
self.mainloggerfile = self.resolve_log_path(main_log_file) self.mainloggerfile = self.resolve_log_path(main_log_file)
self.logger = None self.logger = None
@ -30,7 +49,7 @@ class Logs:
self.main_logger.removeHandler(handler) self.main_logger.removeHandler(handler)
os.makedirs(os.path.dirname(self.mainloggerfile), exist_ok=True) os.makedirs(os.path.dirname(self.mainloggerfile), exist_ok=True)
main_handler = logging.FileHandler(self.mainloggerfile) main_handler = _rotating_handler(self.mainloggerfile)
main_handler.setFormatter(logging.Formatter(self.log_format)) main_handler.setFormatter(logging.Formatter(self.log_format))
main_handler.setLevel(self.default_log_level) main_handler.setLevel(self.default_log_level)
self.main_logger.addHandler(main_handler) self.main_logger.addHandler(main_handler)
@ -138,7 +157,7 @@ class Logs:
if isinstance(handler, logging.FileHandler): if isinstance(handler, logging.FileHandler):
self.logger.removeHandler(handler) self.logger.removeHandler(handler)
handler = logging.FileHandler(full_path) handler = _rotating_handler(full_path)
handler.setFormatter(logging.Formatter(self.log_format)) handler.setFormatter(logging.Formatter(self.log_format))
handler.setLevel(self.default_log_level) handler.setLevel(self.default_log_level)
self.logger.addHandler(handler) self.logger.addHandler(handler)
@ -152,9 +171,12 @@ class Logs:
temp_logger.setLevel(self.default_log_level) temp_logger.setLevel(self.default_log_level)
temp_logger.propagate = False # Prevent printing to terminal temp_logger.propagate = False # Prevent printing to terminal
# Re-use the existing handler if it's already attached to the
# same file (by absolute path). Prevents handler accumulation
# when this function is called from long-running loops.
if not any(isinstance(h, logging.FileHandler) and h.baseFilename == full_path if not any(isinstance(h, logging.FileHandler) and h.baseFilename == full_path
for h in temp_logger.handlers): for h in temp_logger.handlers):
handler = logging.FileHandler(full_path) handler = _rotating_handler(full_path)
handler.setFormatter(logging.Formatter(self.log_format)) handler.setFormatter(logging.Formatter(self.log_format))
temp_logger.addHandler(handler) temp_logger.addHandler(handler)

View File

@ -7,11 +7,12 @@
> **What changed since the early draft (April 4):** The project was restructured > **What changed since the early draft (April 4):** The project was restructured
> from two monolithic scripts (`marcus_llava.py` + `marcus_yolo.py`) into a > from two monolithic scripts (`marcus_llava.py` + `marcus_yolo.py`) into a
> layered architecture. See `Doc/architecture.md` for the current file tree and > layered architecture. See `Doc/architecture.md` for the current file tree,
> `Doc/environment.md` for the verified Jetson software stack, exact library > `Doc/environment.md` for the verified Jetson software stack, `Doc/pipeline.md`
> versions, and GPU bring-up recipe. This reference still describes the > for end-to-end dataflow, and **`Doc/functions.md` for the authoritative
> function-level semantics (inputs/outputs/examples) — treat any file path in > function inventory** (always generated from AST — treat it as the source of
> this document as illustrative and cross-check the actual module. Recent > truth for signatures). This reference describes the semantics (usage, JSON
> schemas, examples); cross-check `functions.md` for exact signatures. Recent
> deltas called out inline below. > deltas called out inline below.
### Recent API deltas (2026-04-21) ### Recent API deltas (2026-04-21)
@ -32,6 +33,11 @@
| Subsystem flags | `Config/config_Brain.json::subsystems.{lidar, voice, imgsearch, autonomous}` | `init_brain()` skips any subsystem with `false`. Defaults: lidar+voice+autonomous ON, imgsearch OFF. | | Subsystem flags | `Config/config_Brain.json::subsystems.{lidar, voice, imgsearch, autonomous}` | `init_brain()` skips any subsystem with `false`. Defaults: lidar+voice+autonomous ON, imgsearch OFF. |
| Robot persona → Sanad | Multiple | Wake words `["sanad","sannad","sanat","sunnat"]`; all prompts say "You are Sanad"; banner reads `SANAD AI BRAIN — READY`; hardcoded self-intro says "I am Sanad". Project/file/module names unchanged. | | Robot persona → Sanad | Multiple | Wake words `["sanad","sannad","sanat","sunnat"]`; all prompts say "You are Sanad"; banner reads `SANAD AI BRAIN — READY`; hardcoded self-intro says "I am Sanad". Project/file/module names unchanged. |
| Logger rename | `Core/log_backend.py` (was `Core/Logger.py`) | Case-only collision with `Core/logger.py` removed — repo now clones cleanly on macOS/Windows. Public API unchanged: `from Core.logger import log`. | | Logger rename | `Core/log_backend.py` (was `Core/Logger.py`) | Case-only collision with `Core/logger.py` removed — repo now clones cleanly on macOS/Windows. Public API unchanged: `from Core.logger import log`. |
| Log rotation everywhere | `Core/log_backend.py`, `API/audio_api.py`, `Voice/marcus_voice.py` | All `FileHandler`s swapped for `RotatingFileHandler` (5 MB × 3 backups, tunable via `MARCUS_LOG_MAX_BYTES` / `MARCUS_LOG_BACKUP_COUNT`). Prevents unbounded log growth on the Jetson. `default_logs_dir` pinned to lowercase `logs/`. |
| English-only policy | `Brain/marcus_brain.py`, `Config/marcus_prompts.yaml`, `Config/config_Voice.json` | Arabic talk-pattern and greeting regexes removed; 5.8 KB of Arabic prompt examples stripped from `marcus_prompts.yaml`; Arabic wake words removed from config. `AudioAPI.speak(text, lang='en')` — only `'en'` accepted; non-ASCII is rejected. |
| Dead-code + orphan sweep | `Legacy/marcus_nav.py`, `Config/config_Memory.json` | Deleted. Config count 13 → 12 JSON + 1 YAML. |
| Orphan config keys wired up | `Vision/marcus_imgsearch.py`, `Voice/builtin_mic.py`, `API/camera_api.py`, `Navigation/marcus_odometry.py` | `config_ImageSearch.json` (4 keys), `config_Voice.mic_udp.read_timeout_sec`, `config_Camera.{timeout_ms, stale_threshold_s, reconnect_delay_s}`, `config_Odometry.json` (10 keys) are all read by code now. **0 orphan keys across 156 total.** |
| Subprocess leak fix | `API/audio_api.py::_record_parec` | `Popen` now wrapped in try/finally; orphan `parec` processes can't survive Ctrl-C/exceptions. Last-resort `proc.kill()` catches only `OSError`. |
--- ---

View File

@ -19,9 +19,13 @@
- **Subsystem flags**`config_Brain.json::subsystems.{lidar, voice, imgsearch, autonomous}` let you selectively skip heavy boot stages. - **Subsystem flags**`config_Brain.json::subsystems.{lidar, voice, imgsearch, autonomous}` let you selectively skip heavy boot stages.
- **Conditional inner-loop sleeps** — goal_nav / autonomous / imgsearch no longer pay unconditional per-step naps. - **Conditional inner-loop sleeps** — goal_nav / autonomous / imgsearch no longer pay unconditional per-step naps.
- **Core/Logger.py → Core/log_backend.py** — case-only name collision with `logger.py` resolved; repo clones cleanly on macOS/Windows. - **Core/Logger.py → Core/log_backend.py** — case-only name collision with `logger.py` resolved; repo clones cleanly on macOS/Windows.
- **Log rotation on every file handler**`Core.log_backend` + stdlib voice handlers now use `RotatingFileHandler` (5 MB × 3 backups, env-tunable). `default_logs_dir` fixed to lowercase `logs/` so the capital-L folder no longer gets recreated.
- **Robot persona = "Sanad"** — wake words, prompts, banner, and self-intro all use "Sanad". Project identity ("Marcus") remains in file names, class names, directory, logs. - **Robot persona = "Sanad"** — wake words, prompts, banner, and self-intro all use "Sanad". Project identity ("Marcus") remains in file names, class names, directory, logs.
- **English-only** — all Arabic talk/greeting regexes, Arabic prompt examples (≈5.8 KB), and Arabic wake words removed. 0 non-ASCII chars in live code/config.
- **Orphan config cleanup**`Config/config_Memory.json` deleted (never loaded). `config_ImageSearch.json`, `config_Odometry.json` (10 keys), plus 3 unused `config_Camera` keys and `mic_udp.read_timeout_sec` are now wired into their respective modules. 0 orphan keys across 156 total (12 config files).
- **Dead-code pruning**`Legacy/marcus_nav.py` removed. Config count 13 → 12 JSON + `marcus_prompts.yaml`.
See `Doc/environment.md` for the verified Jetson software stack and `Doc/pipeline.md` for the end-to-end data flow. See `Doc/environment.md` for the verified Jetson software stack, `Doc/pipeline.md` for the end-to-end data flow, and `Doc/functions.md` for the full function inventory.
--- ---
@ -64,7 +68,8 @@ Marcus/
│ ├── config_ImageSearch.json # search defaults │ ├── config_ImageSearch.json # search defaults
│ ├── config_Voice.json # mic (builtin_udp|pactl_parec), TTS backend, wake words, mic_udp group/port │ ├── config_Voice.json # mic (builtin_udp|pactl_parec), TTS backend, wake words, mic_udp group/port
│ ├── config_LiDAR.json # Livox Mid-360 connection + SLAM engine params │ ├── config_LiDAR.json # Livox Mid-360 connection + SLAM engine params
│ └── marcus_prompts.yaml # All Qwen-VL prompts (main, goal, patrol, talk, verify) │ └── marcus_prompts.yaml # All Qwen-VL prompts (main, goal, patrol, talk, verify, 2× imgsearch)
│ # Total: 12 JSON files + 1 YAML. (config_Memory.json removed 2026-04-21.)
├── API/ # Interface layer — one file per subsystem ├── API/ # Interface layer — one file per subsystem
│ ├── zmq_api.py # ZMQ PUB socket: init_zmq(), send_vel(), gradual_stop(), send_cmd() │ ├── zmq_api.py # ZMQ PUB socket: init_zmq(), send_vel(), gradual_stop(), send_cmd()
@ -139,11 +144,19 @@ Marcus/
│ ├── server.log │ ├── server.log
│ ├── zmq.log │ ├── zmq.log
│ └── main.log │ └── main.log
│ # All log files rotate at 5 MB × 3 backups (tunable via
└── Legacy/ # Archived originals │ # MARCUS_LOG_MAX_BYTES / MARCUS_LOG_BACKUP_COUNT env vars).
└── marcus_nav.py # Original standalone prototype └── Doc/ # Documentation
├── architecture.md # This file
├── controlling.md # Startup + command reference
├── environment.md # Jetson versions + install recipe
├── pipeline.md # End-to-end dataflow diagrams
├── functions.md # Full function inventory
└── MARCUS_API.md # Developer API reference
``` ```
*Removed 2026-04-21: `Legacy/marcus_nav.py` (dead code + Arabic).*
--- ---
## Layer Architecture ## Layer Architecture

View File

@ -254,3 +254,33 @@ Most values configurable in `Config/config_Network.json` and `config_Voice.json:
See `Doc/architecture.md` for full project structure and file-by-file documentation. See `Doc/architecture.md` for full project structure and file-by-file documentation.
See `Doc/environment.md` for the verified Jetson software stack. See `Doc/environment.md` for the verified Jetson software stack.
See `Doc/pipeline.md` for the end-to-end data flow. See `Doc/pipeline.md` for the end-to-end data flow.
See `Doc/functions.md` for the full function inventory (AST-generated).
---
## Language policy
**English only.** Arabic was removed from the codebase on 2026-04-21:
- `Config/config_Voice.json::stt.wake_words_en` — only English variants (`sanad`, `sannad`, `sanat`, `sunnat`)
- `Config/marcus_prompts.yaml` — no Arabic examples left in any of the 7 prompts
- `API/audio_api.py::speak(text)` — rejects non-ASCII (the G1 TtsMaker silently maps Arabic to Chinese, which nobody wants)
- `Brain/marcus_brain.py` — greeting and talk-pattern regexes match English only
If you need Arabic back, the cleanest paths are either Piper TTS (offline) or edge-tts (online) — see `git log` for the removed implementations.
---
## Logs
All `.log` files in `logs/` rotate at **5 MB × 3 backups** by default. To change:
```bash
export MARCUS_LOG_MAX_BYTES=10000000 # 10 MB per file
export MARCUS_LOG_BACKUP_COUNT=5 # keep 5 rotations
export MARCUS_LOG_DIR=/var/log/marcus # move logs off SD card
```
Per-module log files:
- `brain.log`, `camera.log`, `lidar.log`, `zmq.log`, `server.log`, `main.log` — via `Core.logger.log()`
- `voice.log` — via stdlib `logging` in `audio_api.py` + `marcus_voice.py`
- Session JSON: `Data/Brain/Sessions/session_NNN_YYYY-MM-DD/{commands,detections,alerts,places}.json`

View File

@ -377,3 +377,10 @@ Config file (`Config/config_Vision.json`):
| 2026-04-21 | **Restructure**: moved ZMQ bind out of `API/zmq_api.py` import time into `init_zmq()`; fixes LiDAR SLAM worker spawn crash. Added loud GPU-requirement banner in `API/yolo_api.py`. Dropped `num_predict_main` 200→120. Made inner-loop sleeps in goal_nav/autonomous/imgsearch conditional. Renamed `Core/Logger.py``Core/log_backend.py` (case-collision fix). Updated `Doc/MARCUS_API.md` to current state. | | 2026-04-21 | **Restructure**: moved ZMQ bind out of `API/zmq_api.py` import time into `init_zmq()`; fixes LiDAR SLAM worker spawn crash. Added loud GPU-requirement banner in `API/yolo_api.py`. Dropped `num_predict_main` 200→120. Made inner-loop sleeps in goal_nav/autonomous/imgsearch conditional. Renamed `Core/Logger.py``Core/log_backend.py` (case-collision fix). Updated `Doc/MARCUS_API.md` to current state. |
| 2026-04-21 | **Voice restructure**: added `Voice/builtin_mic.py` (G1 array mic via UDP multicast `239.168.123.161:5555`) and `Voice/builtin_tts.py` (thin `AudioClient.TtsMaker` wrapper). Rewired `Voice/marcus_voice.py` to use BuiltinMic. Refactored `API/audio_api.py::speak()` to use BuiltinTTS — removed ~110 lines of edge-tts + pydub + Piper plumbing. Deleted `Voice/marcus_gemini_voice.py`. Added `subsystems.{lidar,voice,imgsearch,autonomous}` gate in `config_Brain.json::init_brain()`. | | 2026-04-21 | **Voice restructure**: added `Voice/builtin_mic.py` (G1 array mic via UDP multicast `239.168.123.161:5555`) and `Voice/builtin_tts.py` (thin `AudioClient.TtsMaker` wrapper). Rewired `Voice/marcus_voice.py` to use BuiltinMic. Refactored `API/audio_api.py::speak()` to use BuiltinTTS — removed ~110 lines of edge-tts + pydub + Piper plumbing. Deleted `Voice/marcus_gemini_voice.py`. Added `subsystems.{lidar,voice,imgsearch,autonomous}` gate in `config_Brain.json::init_brain()`. |
| 2026-04-21 | **Persona swap**: robot identifies as Sanad. Wake words `["sanad","sannad","sanat","sunnat"]`, `speaker.app_name="sanad"`, all Qwen prompts say "You are Sanad", banner reads `SANAD AI BRAIN — READY`, hardcoded self-intro says "I am Sanad". Project directory, class names, filenames, and `PROJECT_NAME=Marcus` env var unchanged. | | 2026-04-21 | **Persona swap**: robot identifies as Sanad. Wake words `["sanad","sannad","sanat","sunnat"]`, `speaker.app_name="sanad"`, all Qwen prompts say "You are Sanad", banner reads `SANAD AI BRAIN — READY`, hardcoded self-intro says "I am Sanad". Project directory, class names, filenames, and `PROJECT_NAME=Marcus` env var unchanged. |
| 2026-04-21 | **English-only sweep**: stripped 5.8 KB of Arabic examples from `marcus_prompts.yaml`, removed Arabic talk-pattern and greeting regexes in `Brain/marcus_brain.py`, dropped Arabic wake words from `config_Voice.json`, changed user-facing prints `Marcus: …``Sanad: …` in `executor.py`, `marcus_brain.py`, `marcus_cli.py`. Verified: 0 Arabic chars in live code/config. |
| 2026-04-21 | **Logs hardened**: `Core/log_backend.py` now uses `RotatingFileHandler` (5 MB × 3 backups, env-tunable via `MARCUS_LOG_MAX_BYTES` / `MARCUS_LOG_BACKUP_COUNT`) for all three code paths (main_handler, `LogEngine`, `LogsMessages`). `API/audio_api.py` + `Voice/marcus_voice.py` also rotate `voice.log`. `default_logs_dir` fixed: `"Logs"``"logs"` (matches actual directory; no more case-collision recreation). |
| 2026-04-21 | **Dead code removed**: deleted `Legacy/marcus_nav.py` (unused + Arabic), deleted `Config/config_Memory.json` (orphan — never loaded). Config count: 13 → **12** JSON files + `marcus_prompts.yaml`. |
| 2026-04-21 | **Orphan config keys wired up (0 orphans remaining)**: `config_ImageSearch.json``Vision/marcus_imgsearch.py` (4 constants), `config_Voice.mic_udp.read_timeout_sec``Voice/builtin_mic.py`, `config_Camera.{timeout_ms, stale_threshold_s, reconnect_delay_s}``API/camera_api.py`, `config_Odometry.json` (10 keys) → `Navigation/marcus_odometry.py`. All 156 config keys now referenced by code. |
| 2026-04-21 | **Subprocess leak fix**: `AudioAPI._record_parec` now wraps `Popen` in try/finally with `terminate → wait(1.0) → kill` fallback; orphan `parec` processes can no longer survive Ctrl-C. Last-resort `proc.kill()` catches only `OSError` (not bare `except`). |
| 2026-04-21 | **Modelfile corrected**: `Models/Modelfile` now `FROM qwen2.5vl:3b` (was `:7b`) with a header explaining it's an optional build template — runtime uses `ollama pull qwen2.5vl:3b` directly. |
| 2026-04-21 | **Final verification**: 14-dimension smoke test green — no Arabic, no dead dirs, 0 orphan keys, every FileHandler rotates, no bare `except: pass`, no stale `Models_marcus` / `marcus_llava` refs, 25/25 modules import. |

175
Doc/functions.md Normal file
View File

@ -0,0 +1,175 @@
# Marcus — Function Inventory
**Robot persona:** Sanad (wake word + self-intro)
**Updated:** 2026-04-21
Every callable in the codebase, grouped by layer. Generated from AST, kept in sync with the source. See `architecture.md` for where each module lives and `pipeline.md` for how they connect.
**Totals:** 25 importable modules · 73 top-level functions · 9 public classes.
---
## `run_marcus.py` — entrypoint
Script only. Prepends `PROJECT_ROOT` to `sys.path`, then calls `Brain.marcus_brain.run_terminal()` in `__main__`.
---
## `Core/` — foundation, no external deps
| File | Function | Purpose |
|---|---|---|
| `env_loader.py` | `_find_env_file()`, `_load_dotenv(path)` | find + parse `.env` into `os.environ`; exports `PROJECT_ROOT` |
| `config_loader.py` | `load_config(name)`, `config_path(relative)` | cached reader for `Config/config_{name}.json` |
| `log_backend.py` | `_rotating_handler(path)` + **class `Logs`** | custom logging engine; all handlers are `RotatingFileHandler` (5 MB × 3) |
| `logger.py` | `get_logger(module)`, `log(msg, level, module)`, `log_and_print(msg, level, module)` | project-wide logging façade |
**`Core.log_backend.Logs`** methods:
`__init__(default_log_level, main_log_file)`, `_choose_fallback_log_dir`, `_normalize_log_name`, `_is_writable_path`, `_with_fallback`, `resolve_log_path`, `construct_path`, `log_to_file`, `LogEngine(folder, log_name)`, `LogsMessages(msg, type, folder, file)`, `print_and_log(...)`.
---
## `API/` — subsystem wrappers (Brain imports only from here)
| File | Public functions |
|---|---|
| `zmq_api.py` | `init_zmq()`, `get_socket()`, `send_vel(vx, vy, vyaw)`, `gradual_stop()`, `send_cmd(cmd)` |
| `camera_api.py` | `start_camera()`, `stop_camera()`, `get_frame()`, `get_frame_age()`, `get_raw_refs()`, `camera_loop()` |
| `llava_api.py` | `call_llava(prompt, img_b64, num_predict, use_history)`, `ask(command, img_b64)`, `ask_goal(goal, img_b64)`, `ask_talk(command, img_b64, facts)`, `ask_verify(target, condition, img_b64)`, `ask_patrol(img_b64)`, `remember_fact(fact)`, `add_to_history(user_msg, assistant_msg)`, `parse_json(raw)` |
| `yolo_api.py` | `init_yolo(raw_frame_ref, frame_lock)` + 8 stubs rebound on success: `yolo_sees`, `yolo_count`, `yolo_closest`, `yolo_summary`, `yolo_ppe_violations`, `yolo_person_too_close`, `yolo_all_classes`, `yolo_fps` |
| `odometry_api.py` | `init_odometry(zmq_sock)`, `get_position()` |
| `memory_api.py` | `init_memory()`, `log_cmd(cmd, response, duration)`, `log_detection(class_name, position, distance)`, `place_save(name)`, `place_goto(name)`, `places_list_str()` |
| `arm_api.py` | `do_arm(action)` — G1 GR00T stub |
| `imgsearch_api.py` | `init_imgsearch(get_frame_fn, send_vel_fn, gradual_stop_fn, llava_fn, yolo_sees_fn, model)`, `get_searcher()` |
| `audio_api.py` | **class `AudioAPI`** (see below) |
| `lidar_api.py` | `init_lidar()`, `obstacle_ahead(radius)`, `get_slam_pose()`, `get_nav_cmd()`, `get_loc_state()`, `get_safety_reasons()`, `get_lidar_status()`, `get_client()`, `stop_lidar()` |
**`API.audio_api.AudioAPI`** methods:
`speak(text, lang="en")`, `record(seconds)` → np.int16 array, `play_pcm(audio_16k)`, `save_recording(audio, name)`, properties `is_speaking`, `is_available`. Internal: `_init_sdk`, `_mute_mic`, `_unmute_mic`, `_resample`, `_play_pcm`, `_record_builtin`, `_record_parec`.
---
## `Voice/` — mic + TTS + STT
| File | Public API |
|---|---|
| `builtin_mic.py` | `_find_g1_local_ip()` + **class `BuiltinMic`** |
| `builtin_tts.py` | **class `BuiltinTTS`** |
| `marcus_voice.py` | **class `State`** (IDLE/WAKE_HEARD/PROCESSING/SPEAKING), **class `VoiceModule`** |
**`Voice.builtin_mic.BuiltinMic`** — G1 UDP multicast mic:
`__init__(group, port, buf_max, read_timeout)`, `start()`, `stop()`, `read_chunk(num_bytes)`, `read_seconds(seconds)`, `flush()`; internal `_recv_loop`.
**`Voice.builtin_tts.BuiltinTTS`** — wraps `AudioClient.TtsMaker`:
`__init__(audio_client, default_speaker_id=0)`, `speak(text, speaker_id=None, block=True)`.
**`Voice.marcus_voice.VoiceModule`** — Whisper wake + command STT:
`__init__(audio_api, on_command)`, `start()`, `stop()`, props `state`, `is_running`. Internal state machine: `_do_idle`, `_do_wake_heard`, `_do_processing`; helpers `_load_whisper`, `_transcribe`, `_check_wake_word`, `_record_chunk`, `_record_until_silence`, `_voice_loop`.
---
## `Vision/`
| File | Public API |
|---|---|
| `marcus_yolo.py` | `start_yolo(raw_frame_ref, frame_lock)`, `yolo_sees(class, min_confidence)`, `yolo_count(class)`, `yolo_closest(class)`, `yolo_all_classes()`, `yolo_summary()`, `yolo_ppe_violations()`, `yolo_person_too_close(threshold)`, `yolo_is_running()`, `yolo_fps()`, `_resolve_device(requested)` + **class `Detection`** |
| `marcus_imgsearch.py` | **class `ImageSearch`** + prompt helpers `_build_compare_prompt`, `_build_single_prompt`, image utils `_load_image_b64`, `_numpy_to_b64`, `_resize_b64` |
**`Vision.marcus_yolo.Detection`** — a single detection's metadata:
`__init__(class_name, confidence, x1, y1, x2, y2, frame_w, frame_h)`, props `size_ratio`, `position`, `distance_estimate`, method `to_dict()`, `__repr__`.
**`Vision.marcus_imgsearch.ImageSearch`** — rotate-and-compare search:
`__init__(get_frame_fn, send_vel_fn, gradual_stop_fn, llava_fn, yolo_sees_fn, model)`, `search(ref_img_b64, hint, max_steps, direction, yolo_prefilter)`, `search_from_file(image_path, hint, max_steps, direction)`, `abort()`.
---
## `Navigation/`
| File | Public API |
|---|---|
| `goal_nav.py` | `navigate_to_goal(goal, max_steps)`; private `_goal_yolo_target`, `_extract_extra_condition`, `_verify_condition` |
| `patrol.py` | `patrol(duration_minutes, alert_callback)` |
| `marcus_odometry.py` | **class `Odometry`** |
**`Navigation.marcus_odometry.Odometry`** — ROS2 `/dog_odom` + dead-reckoning fallback:
- lifecycle: `__init__()`, `start(zmq_sock)`, `stop()`, `reset()`, `is_running()`
- pose: `get_position()``{x, y, heading, source}`, `get_distance_from_start()`, `status_str()`, `__repr__`
- movement: `walk_distance(meters, speed, direction)`, `turn_degrees(degrees, speed)`, `navigate_to(x, y, heading, speed)`, `return_to_start(speed)`, `patrol_route(waypoints, speed, loop)`
- internal: `_init_own_zmq`, `_reset_state`, `_try_start_ros2`, `_dead_reckoning_loop`, `_send_vel`, `_gradual_stop`, `_check_stale`, `_time_based_walk`, `_time_based_turn`
---
## `Brain/`
| File | Public API |
|---|---|
| `marcus_brain.py` | `init_brain()`, `process_command(cmd)``{type, speak, action, elapsed}`, `get_brain_status()`, `shutdown()`, `run_terminal()`; private `_init_voice`, `_handle_llava`, `_handle_talk`, `_handle_search`, `_warmup_llava` |
| `command_parser.py` | `init_autonomous(auto_instance)`, `try_local_command(cmd)` (regex-table dispatcher); `_print_help`, `_print_examples` |
| `executor.py` | `execute(d)`, `execute_action(move, duration)`, `move_step(move, duration)`, `merge_actions(actions)`; `_obstacle_check` |
| `marcus_memory.py` | **class `Memory`** + utils `_read_json`, `_write_json`, `_sanitize_name`, `_fuzzy_match`, `_new_session_id` |
**`Brain.marcus_memory.Memory`** — places + sessions store, JSON-backed:
- places: `save_place(name, x, y, heading)`, `get_place(name)`, `delete_place(name)`, `list_places()`, `rename_place(old, new)`, `places_count()`
- sessions: `start_session()`, `end_session()`, `log_command(cmd, response, duration_s)`, `log_detection(class, pos, dist, x, y)`, `log_alert(type, detail)`, `get_last_command()`, `get_last_n_commands(n)`, `get_session_detections()`, `commands_count()`, `session_duration_str()`
- history: `last_session_summary()`, `previous_session_detections()`, `previous_session_places()`, `all_sessions()`
- internal: `_load_places`, `_start_autosave`, `_flush_session`, `_emergency_save`, `_write_summary`, `_prune_old_sessions`, `_get_previous_session_dir`
---
## `Autonomous/`
`marcus_autonomous.py`**class `AutonomousMode`**: patrol-and-map state machine.
- `__init__(get_frame_fn, send_vel_fn, gradual_stop_fn, yolo_sees_fn, yolo_summary_fn, yolo_all_classes_fn, yolo_closest_fn, odom_fn, call_llava_fn, patrol_prompt, mem, models_dir)`
- lifecycle: `enable()`, `disable()`, `is_enabled()`, `status()`, `save_snapshot()`
- internal: `_explore_loop`, `_move_forward`, `_turn`, `_assess_scene`, `_create_map_dir`, `_save_observations`, `_save_path`, `_save_frame`, `_generate_summary`, `_save_session`, `_print_summary`
---
## `Server/` & `Bridge/`
| File | Public API |
|---|---|
| `Server/marcus_server.py` | `async handler(websocket)`, `async broadcast_frames()`, `async run_server(host, port)`, `main()`; helpers `_get_interface_ips`, `_check_lidar` |
| `Bridge/ros2_zmq_bridge.py` | **class `ROS2ZMQBridge`** (`_vel_cb`, `_cmd_cb`) + `main()` — standalone tool, not imported by Marcus |
---
## Suggested import surface for integration code
If you're writing glue on top of Marcus, the stable public surface is:
```python
# brain orchestration
from Brain.marcus_brain import init_brain, process_command, shutdown
# direct robot control (bypasses brain)
from API.zmq_api import init_zmq, send_vel, gradual_stop, send_cmd
from API.yolo_api import yolo_sees, yolo_summary, yolo_closest
from API.camera_api import start_camera, get_frame
from API.audio_api import AudioAPI # .speak(text), .record(seconds)
from API.lidar_api import init_lidar, obstacle_ahead, get_slam_pose, stop_lidar
from API.memory_api import init_memory, log_cmd, log_detection, place_save, place_goto
# voice pipeline
from Voice.marcus_voice import VoiceModule
from Voice.builtin_mic import BuiltinMic
from Voice.builtin_tts import BuiltinTTS
# navigation
from Navigation.goal_nav import navigate_to_goal
from Navigation.patrol import patrol
from Navigation.marcus_odometry import Odometry
# autonomous mode
from Autonomous.marcus_autonomous import AutonomousMode
```
---
## Convention notes
- **All layers above Core must import from `API.*` only** (not directly from `Vision/`, `Navigation/`, `Voice/`). Enforced by convention, not the language.
- **Underscore prefix = private.** `_foo` is internal; don't import it outside the module unless you're the test harness.
- **Stub rebinding pattern** (e.g. `API.yolo_api`): module-level placeholders get replaced with real implementations inside `init_*()` on success. If init fails, callers keep getting the safe stub (e.g. `yolo_sees` returns `False`).
- **Error returns are consistent per layer**: API layer returns `None` / empty dict / `False`; Brain layer returns structured dicts (`{"type","speak","action","elapsed"}`); no exception leaks to the terminal loop except at startup (`init_brain()` will raise to surface hardware issues like missing CUDA).

View File

@ -3,7 +3,7 @@
**Robot persona:** Sanad (wake word + self-intro) **Robot persona:** Sanad (wake word + self-intro)
**Updated:** 2026-04-21 **Updated:** 2026-04-21
One map of every data path from sensor to motor, voice to speech. Cross-reference with `architecture.md` (what each file is) and `MARCUS_API.md` (function signatures). One map of every data path from sensor to motor, voice to speech. Cross-reference with `architecture.md` (what each file is), `functions.md` (exact function signatures — AST-generated), and `MARCUS_API.md` (usage examples + JSON schemas).
--- ---
@ -167,8 +167,13 @@ Brain/command_parser.py — responds to "lidar status" queries
| `yolo_device`, `yolo_half` | config_Vision.json | `cuda` / FP16 (hard-required; CPU not allowed) | | `yolo_device`, `yolo_half` | config_Vision.json | `cuda` / FP16 (hard-required; CPU not allowed) |
| `mic.backend` | config_Voice.json | `builtin_udp` (G1 array) or `pactl_parec` (Hollyland fallback) | | `mic.backend` | config_Voice.json | `builtin_udp` (G1 array) or `pactl_parec` (Hollyland fallback) |
| `mic_udp.group/port` | config_Voice.json | where to join the G1 audio multicast | | `mic_udp.group/port` | config_Voice.json | where to join the G1 audio multicast |
| `mic_udp.read_timeout_sec` | config_Voice.json | `BuiltinMic.read_chunk` budget (default 0.04 s) |
| `tts.backend` | config_Voice.json | `builtin_ttsmaker` (only supported option) | | `tts.backend` | config_Voice.json | `builtin_ttsmaker` (only supported option) |
| `stt.wake_words_en` | config_Voice.json | Whisper matcher (`sanad` + variants) | | `stt.wake_words_en` | config_Voice.json | Whisper matcher (`sanad` + variants) |
| `timeout_ms`, `stale_threshold_s`, `reconnect_delay_s` | config_Camera.json | RealSense frame timeout, reconnect trigger, initial backoff |
| `default_max_steps`, `step_delay_s`, `rotate_speed`, `min_steps_warmup` | config_ImageSearch.json | image-guided search rotation cadence (wired into `Vision/marcus_imgsearch.py`) |
| `default_walk_speed`, `dist_tolerance`, `angle_tolerance`, `safety_timeout_mult`, `dr_update_hz` | config_Odometry.json | precise motion control (wired into `Navigation/marcus_odometry.py`) |
| `MARCUS_LOG_MAX_BYTES`, `MARCUS_LOG_BACKUP_COUNT`, `MARCUS_LOG_DIR` | env vars | log rotation size, backup count, log directory override |
--- ---

View File

@ -1,93 +0,0 @@
import ollama, base64, json, time
import pyrealsense2 as rs
import numpy as np, cv2
import zmq
HOLOSOMA_IP = "127.0.0.1"
HOLOSOMA_PORT = 5556
def capture_frame():
pipeline = rs.pipeline()
cfg = rs.config()
cfg.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30)
pipeline.start(cfg)
for _ in range(5):
pipeline.wait_for_frames()
frames = pipeline.wait_for_frames()
img = np.asanyarray(frames.get_color_frame().get_data())
pipeline.stop()
cv2.imwrite('/tmp/marcus_eye.jpg', img)
return '/tmp/marcus_eye.jpg'
def ask_qwen(image_path, command):
with open(image_path, 'rb') as f:
img_b64 = base64.b64encode(f.read()).decode()
prompt = f"""أنت ماركس، روبوت ذكي يتنقل داخل المبنى.
You are Marcus, an intelligent indoor navigation robot.
User command: "{command}"
Look at the camera image. Respond with ONLY one line:
FORWARD [0.1 to 1.0 meters]
LEFT [5 to 45 degrees]
RIGHT [5 to 45 degrees]
STOP [reason]
ARRIVED"""
response = ollama.chat(
model='qwen2.5vl:7b',
messages=[{
'role': 'user',
'content': prompt,
'images': [img_b64]
}]
)
return response['message']['content'].strip().split('\n')[0]
def send_to_robot(action):
print(f" Robot action: {action}")
parts = action.upper().split()
if not parts:
return
cmd = parts[0]
val = float(parts[1]) if len(parts) > 1 else 0
if cmd == "FORWARD":
print(f" Walking forward {val}m")
elif cmd == "LEFT":
print(f" Turning left {val} degrees")
elif cmd == "RIGHT":
print(f" Turning right {val} degrees")
elif cmd == "STOP":
print(f" Stopping: {' '.join(parts[1:])}")
elif cmd == "ARRIVED":
print(" Destination reached!")
if __name__ == "__main__":
print("=" * 50)
print("Marcus Navigation Brain")
print("Powered by Qwen2.5-VL on Jetson Orin NX")
print("Speaks Arabic + English")
print("=" * 50)
print("Type your command (or 'quit'):\n")
while True:
try:
cmd = input("Command: ").strip()
if cmd.lower() in ['quit', 'exit', 'خروج']:
print("Marcus shutting down.")
break
if not cmd:
continue
print("Capturing camera frame...")
frame = capture_frame()
print("Qwen2.5-VL thinking...")
t0 = time.time()
action = ask_qwen(frame, cmd)
elapsed = time.time() - t0
print(f"Decision ({elapsed:.1f}s): {action}")
send_to_robot(action)
print()
except KeyboardInterrupt:
print("\nStopped.")
break

View File

@ -1,3 +1,13 @@
FROM qwen2.5vl:7b # Optional Ollama Modelfile for building a pre-tuned Marcus VL model.
PARAMETER num_ctx 1024 # Not used at runtime — Marcus loads the stock `qwen2.5vl:3b` tag directly
PARAMETER num_predict 64 # via Config/config_Brain.json::ollama_model. Keep this file only if you
# want to build a custom image with `ollama create marcus-vl -f Modelfile`.
#
# Runtime parameters (num_batch, num_ctx, num_predict) are overridden on
# every call by API/llava_api.py, so the PARAMETER lines below are just
# defaults for `ollama run` shell use.
FROM qwen2.5vl:3b
PARAMETER num_ctx 2048
PARAMETER num_predict 120
PARAMETER num_batch 128

View File

@ -23,11 +23,13 @@ Import in marcus_brain.py
Standalone test Standalone test
--------------- ---------------
/home/unitree/miniconda3/envs/marcus/bin/python3 ~/Models_marcus/marcus_odometry.py conda run -n marcus python3 Navigation/marcus_odometry.py
Date : April 2026 Date : April 2026
""" """
import os
import sys
import time import time
import math import math
import json import json
@ -36,24 +38,33 @@ import zmq
# ══════════════════════════════════════════════════════════════════════════════ # ══════════════════════════════════════════════════════════════════════════════
# CONFIGURATION # CONFIGURATION — loaded from Config/config_Odometry.json
# ══════════════════════════════════════════════════════════════════════════════ # ══════════════════════════════════════════════════════════════════════════════
ZMQ_HOST = "127.0.0.1" _PROJECT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
ZMQ_PORT = 5556 if _PROJECT_DIR not in sys.path:
ROS2_ODOM_TOPIC = "/dog_odom" sys.path.insert(0, _PROJECT_DIR)
ODOM_INTERFACE = "eth0" try:
from Core.config_loader import load_config
_cfg = load_config("Odometry")
except Exception:
_cfg = {}
ZMQ_HOST = str(_cfg.get("zmq_host", "127.0.0.1"))
ZMQ_PORT = int(_cfg.get("zmq_port", 5556))
ROS2_ODOM_TOPIC = str(_cfg.get("ros2_odom_topic", "/dog_odom"))
ODOM_INTERFACE = str(_cfg.get("odom_interface", "eth0"))
# Movement defaults # Movement defaults
DEFAULT_WALK_SPEED = 0.25 # m/s — slower = more accurate DEFAULT_WALK_SPEED = float(_cfg.get("default_walk_speed", 0.25)) # m/s — slower = more accurate
DEFAULT_TURN_SPEED = 0.25 # rad/s DEFAULT_TURN_SPEED = float(_cfg.get("default_turn_speed", 0.25)) # rad/s
DIST_TOLERANCE = 0.05 # meters — stop within 5cm DIST_TOLERANCE = float(_cfg.get("dist_tolerance", 0.05)) # meters — stop within 5cm
ANGLE_TOLERANCE = 2.0 # degrees — stop within 2° ANGLE_TOLERANCE = float(_cfg.get("angle_tolerance", 2.0)) # degrees — stop within 2°
SAFETY_TIMEOUT_MULT = 3.0 # timeout = (distance/speed) × this SAFETY_TIMEOUT_MULT = float(_cfg.get("safety_timeout_mult", 3.0)) # timeout = (distance/speed) × this
ODOM_STALE_WARN = 1.0 # warn if odom not updated for this many seconds ODOM_STALE_WARN = 1.0 # warn if odom not updated for this many seconds (internal only)
# Dead reckoning # Dead reckoning
DR_UPDATE_HZ = 20 # integration rate DR_UPDATE_HZ = int(_cfg.get("dr_update_hz", 20)) # integration rate
# ══════════════════════════════════════════════════════════════════════════════ # ══════════════════════════════════════════════════════════════════════════════

237
README.md Normal file
View File

@ -0,0 +1,237 @@
# Marcus — Humanoid Robot AI Base
**Project:** Marcus | **Persona:** Sanad | **Organisation:** YS Lootah Technology, Dubai
A compact, offline-first AI base for the **Unitree G1 EDU** humanoid, running on a
**Jetson Orin NX 16 GB**. The codebase is intentionally generic — the same brain
drives both **housekeeping** and **AI tour-guide** robot deployments just by
changing prompts, wake words and which subsystems are enabled.
```
run_marcus.py ← terminal entrypoint (keyboard + voice)
Server/marcus_server.py ← same brain over WebSocket for a remote client
```
---
## What the robot is made of
Humanoid robot control ≠ one giant model. It's a **mesh of specialised models
and services**, each responsible for one part of the body, stitched together by
a Python brain.
| Body part | Purpose | Model / service | Where it runs |
|---|---|---|---|
| **Brain** (reason, speak, decide) | Parse commands, reason about vision, pick actions | **Qwen2.5-VL 3B** via Ollama | Jetson GPU |
| **Eyes** (see) | Real-time object/person detection | **YOLOv8m** (CUDA, FP16, 320 px, ~22 FPS) | Jetson GPU |
| **Eyes** (understand) | Open-ended scene understanding, reading, goal-verify | **Qwen2.5-VL** (same brain model) | Jetson GPU |
| **Ears** (hear) | Always-on wake-word + command transcription | **Whisper tiny** (wake) + **Whisper small** (STT) | Jetson CPU/GPU |
| **Mouth** (speak) | On-robot TTS, no internet needed | **Unitree `TtsMaker`** (G1 firmware) | G1 body speaker |
| **Legs** (walk) | 29-DoF locomotion + balance | **Holosoma** RL policy (separate process, ONNX) | Jetson CPU |
| **Hands** (gesture) | Arm & hand actions | **GR00T N1.5** — pending; `API/arm_api.py` is a stub today | Jetson GPU (future) |
| **Inner ear** (map) | SLAM, obstacle detection, localisation | **Livox Mid-360** LiDAR + custom SLAM engine | Jetson (subprocess) |
| **Memory** | Places, session history, facts | JSON files under `Data/Brain/Sessions/` | Jetson disk |
Nothing here reaches the cloud. The only internet-adjacent bits (edge-tts,
Gemini) were removed — everything runs on the robot's own compute.
---
## How it hears, sees, speaks
```
Inputs ─────────────────────────────── Outputs
Voice ─┐ ┌─► Speech (G1 speaker)
│ │
Text ──┼──► Brain (Qwen2.5-VL) ──────────────┤
│ │ │
Camera ─┘ ▼ ├─► Legs (Holosoma → G1)
├─► YOLO (fast class check) │
├─► LiDAR (obstacles / pose) └─► Arms/hands (stub → GR00T)
└─► Memory (places / history)
```
Three input modalities, same command loop:
- **Voice** — say "**Sanad, what do you see?**" → wake word fires, Whisper transcribes, brain answers through the G1 speaker.
- **Text** — type the same command into `run_marcus.py`'s terminal.
- **WebSocket (remote)**`Client/marcus_cli.py` or `Client/marcus_client.py` (Tkinter GUI) send commands from a workstation.
All three feed the same `Brain.marcus_brain.process_command(cmd)` function.
---
## Two example deployments from the same codebase
### Housekeeping robot
Set up for indoor chores and presence awareness.
- **Prompts** tuned for *"empty the bin, close the window, check the bathroom, remind me at 6 pm"* intents.
- **Places** memory pre-loaded with named rooms (`kitchen`, `living room`, `hallway`).
- **Patrol mode** runs safety loops looking for hazards / unsafe PPE.
- **Autonomous mode** (`auto on`) explores the space, maps it, logs observations.
- YOLO classes: `person, chair, couch, bed, dining table, bottle, cup, laptop, keyboard, mouse, backpack, handbag, suitcase` (the defaults).
### AI tour-guide robot
Same hardware, different prompts + wake word.
- **Prompts** rewrite: *"You are a museum guide. When a visitor asks about an exhibit, describe it in two sentences and invite them to ask follow-ups."*
- **Places** memory pre-loaded with exhibit waypoints; `patrol: exhibit_A → exhibit_B → exit` follows a tour.
- Wake word changed in `config_Voice.json::stt.wake_words_en`.
- Image search (`search/ photo_of_exhibit.jpg`) lets visitors hold up a printed map; the robot navigates to the matching location.
- YOLO classes trimmed to people-only if the venue doesn't need object safety.
**What you change to switch use cases:**
1. `Config/marcus_prompts.yaml` — persona + task descriptions
2. `Config/config_Voice.json::stt.wake_words_en` — the name people call the robot
3. `Config/config_Vision.json::tracked_classes` — relevant object set
4. `Config/config_Brain.json::subsystems.{lidar,voice,imgsearch,autonomous}` — enable what you need
5. Data under `Data/History/Places/places.json` — learned locations
No code changes required for either deployment.
---
## Layer architecture
```
run_marcus.py / Server/marcus_server.py ← entrypoints
Brain/ (marcus_brain, command_parser, executor, memory)
│ imports only from ↓
API/ (one file per subsystem — stable public surface)
│ wraps ↓
┌───────┴────────┬──────────────┬────────────┐
▼ ▼ ▼ ▼
Vision/ Navigation/ Voice/ Lidar/
YOLO, imgsearch goal_nav, builtin_mic, SLAM engine
patrol, odom builtin_tts, (subprocess)
marcus_voice
Core/ (env, config, log_backend, logger)
Config/ + .env
```
**Rule:** Brain talks to subsystems only via `API/*`. You can replace YOLO with
any detector, swap Qwen for another VL model, or plug in a different TTS —
without touching Brain code — by implementing the same API surface.
---
## Quick start (Jetson, after `conda activate marcus`)
```bash
# 1) Launch Holosoma (locomotion) in hsinference env
source ~/.holosoma_deps/miniconda3/bin/activate hsinference
cd ~/holosoma && python3 src/holosoma_inference/.../run_policy.py ...
# 2) Start Ollama
ollama serve > /tmp/ollama.log 2>&1 &
sleep 3
# 3) Start Marcus
conda activate marcus
cd ~/Marcus
python3 run_marcus.py
```
You should see:
```
[YOLO] Model loaded ✅ | device: cuda (Orin) | FP16 | 19 tracked classes
================================================
SANAD AI BRAIN — READY
================================================
model : qwen2.5vl:3b
yolo : True voice : True
odometry : True memory : True
lidar : True camera : 424x240@15
```
Say **"Sanad"** to wake, or type at the `Command:` prompt.
See `Doc/controlling.md` for the full command reference, `Doc/environment.md`
for the Jetson install recipe, and `Doc/pipeline.md` for the end-to-end
dataflow diagrams.
---
## Hardware target
| Component | Model |
|---|---|
| Humanoid | Unitree G1 EDU, 29 DoF |
| Compute | Jetson Orin NX 16 GB (Ampere iGPU, FP16 tensor cores, capability 8.7) |
| Software stack | JetPack 5.1.1 / CUDA 11.4 / cuDNN 8.6 / Python 3.8 / torch 2.1.0-nv23.06 / ultralytics 8.4.21 / Ollama 0.20.0 |
| Camera | Intel RealSense D435 (424×240 @ 15 fps) |
| LiDAR | Livox Mid-360 |
| Microphone | G1 on-board array (UDP multicast, no external USB mic) |
| Speaker | G1 body speaker (via Unitree RPC) |
---
## Repository layout (top-level)
```
Marcus/
├── run_marcus.py entrypoint — terminal mode
├── README.md this file
├── Core/ foundation — config + env + logging
├── Config/ 12 JSON files + marcus_prompts.yaml
├── API/ subsystem wrappers (stable public surface)
├── Brain/ orchestrator, parser, executor, memory
├── Vision/ YOLO + image-guided search
├── Navigation/ goal nav, patrol, odometry
├── Voice/ built-in mic, built-in TTS, Whisper loop
├── Autonomous/ exploration state machine
├── Lidar/ SLAM engine (subprocess)
├── Server/ WebSocket interface
├── Client/ terminal CLI + Tkinter GUI
├── Bridge/ optional ROS2 ↔ ZMQ bridge (standalone tool)
├── Models/ yolov8m.pt + optional Ollama Modelfile
├── Data/ runtime-generated sessions / places / maps
├── logs/ rotating per-module log files (5 MB × 3)
└── Doc/ architecture, API, environment, pipeline,
controlling, functions — all current
```
---
## Docs
- `Doc/architecture.md` — project structure + layer-by-layer breakdown
- `Doc/controlling.md` — startup sequence + command reference
- `Doc/environment.md` — verified Jetson software stack + install recipe
- `Doc/pipeline.md` — boot, voice, vision, movement, LiDAR dataflow
- `Doc/functions.md` — every callable in the codebase (AST-generated)
- `Doc/MARCUS_API.md` — developer API reference with JSON schemas
---
## Design principles
1. **Offline-first.** No cloud dependency in the default path. Internet can be
wired in for specific backends (e.g. future edge-tts) but it's opt-in.
2. **GPU mandatory.** YOLO refuses to start on CPU — Marcus is a safety-critical
robot, silently downgrading to 2 FPS vision is worse than failing loudly.
3. **Swappable subsystems.** Each API file can be reimplemented behind the same
public functions. Replace YOLO with DETR, Qwen with LLaVA, TtsMaker with
Piper — Brain never notices.
4. **Config over code.** Tunables live in `Config/*.json` / `.yaml`; 156 config
keys are all actively referenced (0 orphans). Change persona, wake word,
enabled subsystems, or thresholds without touching a `.py` file.
5. **English only.** Arabic support was removed because the G1 firmware's TTS
silently maps Arabic to Chinese. If bilingual TTS is ever needed again,
see `git log` for the removed Piper / edge-tts paths.
---
*Marcus — YS Lootah Technology | Dubai*

View File

@ -31,7 +31,7 @@ Usage in marcus_brain.py
Standalone test Standalone test
--------------- ---------------
python3 ~/Models_marcus/marcus_imgsearch.py --image /path/to/photo.jpg python3 Vision/marcus_imgsearch.py --image /path/to/photo.jpg
Date : April 2026 Date : April 2026
""" """
@ -39,10 +39,11 @@ Date : April 2026
import base64 import base64
import io import io
import json import json
import time
import threading
import os import os
import re import re
import sys
import threading
import time
from pathlib import Path from pathlib import Path
import numpy as np import numpy as np
@ -55,17 +56,23 @@ except ImportError:
# ══════════════════════════════════════════════════════════════════════════════ # ══════════════════════════════════════════════════════════════════════════════
# CONFIGURATION # CONFIGURATION (loaded from Config/config_ImageSearch.json)
# ══════════════════════════════════════════════════════════════════════════════ # ══════════════════════════════════════════════════════════════════════════════
DEFAULT_MAX_STEPS = 60 # max rotation steps before giving up _PROJECT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
STEP_DELAY = 0.15 # min gap between YOLO checks (was 0.4 — reduced if _PROJECT_DIR not in sys.path:
# because the rotation thread paces motion already sys.path.insert(0, _PROJECT_DIR)
# and each LLaVA call is 600-1500 ms of real work) try:
ROTATE_SPEED = 0.25 # rad/s rotation speed during search from Core.config_loader import load_config
MIN_STEPS_WARMUP = 3 # skip first N steps (stale frame) _cfg = load_config("ImageSearch")
MATCH_CONFIDENCE_THR = 0.6 # LLaVA confidence threshold (not used directly, except Exception:
# but kept for future scoring) _cfg = {}
DEFAULT_MAX_STEPS = int(_cfg.get("default_max_steps", 60)) # rotation steps before giving up
STEP_DELAY = float(_cfg.get("step_delay_s", 0.15)) # min gap between YOLO checks
ROTATE_SPEED = float(_cfg.get("rotate_speed", 0.25)) # rad/s during search
MIN_STEPS_WARMUP = int(_cfg.get("min_steps_warmup", 3)) # skip first N steps (stale frame)
MATCH_CONFIDENCE_THR = 0.6 # LLaVA confidence threshold (reserved for future scoring)
# ══════════════════════════════════════════════════════════════════════════════ # ══════════════════════════════════════════════════════════════════════════════

View File

@ -9,7 +9,7 @@ Usage (imported):
from marcus_yolo import start_yolo, yolo_sees, yolo_count, yolo_closest, yolo_summary from marcus_yolo import start_yolo, yolo_sees, yolo_count, yolo_closest, yolo_summary
Usage (standalone): Usage (standalone):
/home/unitree/miniconda3/envs/marcus/bin/python3 ~/Models_marcus/marcus_yolo.py conda run -n marcus python3 Vision/marcus_yolo.py
""" """
import os import os
@ -360,8 +360,13 @@ def _camera_loop(raw_frame_ref, frame_lock, cam_alive):
raw_frame_ref[0] = frame.copy() raw_frame_ref[0] = frame.copy()
except Exception as e: except Exception as e:
print(f"Camera: {e} — reconnecting...") print(f"Camera: {e} — reconnecting...")
try: pipeline.stop() # pipeline may already be stopped or never started; swallow only
except: pass # the expected RealSense "pipeline not started" error, not every
# possible failure mode.
try:
pipeline.stop()
except RuntimeError:
pass
time.sleep(2.0) time.sleep(2.0)

View File

@ -24,18 +24,31 @@ Ported from Project/Sanad/voice/audio_io.py (Sanad's production implementation).
from __future__ import annotations from __future__ import annotations
import os
import socket import socket
import struct import struct
import subprocess import subprocess
import sys
import threading import threading
import time import time
from typing import Optional from typing import Optional
# Load defaults from Config/config_Voice.json::mic_udp so they can be tuned
# without editing code. Falls back to the hardcoded literals below if the
# config isn't reachable (e.g., when imported from a test harness).
_PROJECT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
if _PROJECT_DIR not in sys.path:
sys.path.insert(0, _PROJECT_DIR)
try:
from Core.config_loader import load_config
_mic_udp = (load_config("Voice") or {}).get("mic_udp", {}) or {}
except Exception:
_mic_udp = {}
DEFAULT_GROUP = "239.168.123.161" DEFAULT_GROUP = str(_mic_udp.get("group", "239.168.123.161"))
DEFAULT_PORT = 5555 DEFAULT_PORT = int(_mic_udp.get("port", 5555))
DEFAULT_BUF_MAX = 64_000 # ~2 s of 16 kHz mono int16 DEFAULT_BUF_MAX = int(_mic_udp.get("buffer_max_bytes", 64_000)) # ~2 s of 16 kHz mono int16
DEFAULT_READ_TIMEOUT = 0.04 # 40 ms budget per read_chunk call DEFAULT_READ_TIMEOUT = float(_mic_udp.get("read_timeout_sec", 0.04)) # budget per read_chunk call
SAMPLE_RATE = 16_000 # hardware rate — do not change SAMPLE_RATE = 16_000 # hardware rate — do not change

View File

@ -25,6 +25,8 @@ import os
import sys import sys
import threading import threading
import time import time
from logging.handlers import RotatingFileHandler
import numpy as np import numpy as np
# ─── PATH + CONFIG ─────────────────────────────────────── # ─── PATH + CONFIG ───────────────────────────────────────
@ -38,12 +40,17 @@ from Core.config_loader import load_config
LOG_DIR = os.path.join(PROJECT_ROOT, "logs") LOG_DIR = os.path.join(PROJECT_ROOT, "logs")
os.makedirs(LOG_DIR, exist_ok=True) os.makedirs(LOG_DIR, exist_ok=True)
# Idempotent — only the first call per process installs handlers. # basicConfig is idempotent. Whichever of audio_api / marcus_voice imports
# first installs the rotating handler; the other no-ops. Both loggers then
# share the same file handle with stdlib's per-handler thread lock.
logging.basicConfig( logging.basicConfig(
level=logging.INFO, level=logging.INFO,
format="%(asctime)s [%(name)s] %(levelname)s: %(message)s", format="%(asctime)s [%(name)s] %(levelname)s: %(message)s",
handlers=[ handlers=[
logging.FileHandler(os.path.join(LOG_DIR, "voice.log")), RotatingFileHandler(
os.path.join(LOG_DIR, "voice.log"),
maxBytes=5_000_000, backupCount=3, encoding="utf-8",
),
logging.StreamHandler(), logging.StreamHandler(),
], ],
) )