Update 2026-04-22 10:57:22

This commit is contained in:
kassam 2026-04-22 10:57:23 +04:00
parent af1d0c1b8a
commit ac9271c62b
27 changed files with 683 additions and 383 deletions

View File

@ -32,6 +32,8 @@ import sys
import threading
import time
import wave
from logging.handlers import RotatingFileHandler
import numpy as np
# ─── PATH + CONFIG ───────────────────────────────────────
@ -45,15 +47,18 @@ from Core.config_loader import load_config
LOG_DIR = os.path.join(PROJECT_ROOT, "logs")
os.makedirs(LOG_DIR, exist_ok=True)
# Note: logging.basicConfig() only takes effect on the first call per process.
# If the voice module already configured logging (common path via run_marcus.py),
# this call is a no-op. When audio_api is used standalone, it wires logs to
# logs/voice.log + stderr.
# logging.basicConfig is idempotent per process: if marcus_voice configured
# the root logger first, this call is a no-op and both modules share the same
# RotatingFileHandler (stdlib FileHandlers hold an internal lock, so concurrent
# writes to voice.log are safe). Rotation caps voice.log at 5 MB × 3 backups.
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(name)s] %(levelname)s: %(message)s",
handlers=[
logging.FileHandler(os.path.join(LOG_DIR, "voice.log")),
RotatingFileHandler(
os.path.join(LOG_DIR, "voice.log"),
maxBytes=5_000_000, backupCount=3, encoding="utf-8",
),
logging.StreamHandler(),
],
)
@ -305,14 +310,31 @@ class AudioAPI:
subprocess.run(["pactl", "set-source-volume", source, "100%"], capture_output=True)
log.info("Recording %.1fs from mic source %s (parec)", seconds, source)
proc = None
raw = b""
try:
proc = subprocess.Popen(
["parec", "-d", source,
f"--format={fmt}", f"--rate={rate}", f"--channels={channels}", "--raw"],
stdout=subprocess.PIPE,
)
time.sleep(seconds)
finally:
# Always kill parec — an exception in time.sleep (Ctrl-C / signal)
# would otherwise leave an orphaned recorder process running.
if proc is not None:
try:
proc.terminate()
raw = proc.stdout.read()
proc.wait(timeout=1.0)
except Exception as e:
log.warning("parec cleanup error: %s", e)
# Last-resort SIGKILL — suppress only OSError (process
# already exited) so we don't mask other bugs.
try:
proc.kill()
except OSError:
pass
audio = np.frombuffer(raw, dtype=np.int16)
log.info("Recorded: %d samples, std=%.0f", len(audio), audio.std())

View File

@ -12,10 +12,13 @@ from Core.logger import log
_cfg = load_config("Camera")
CAM_WIDTH = _cfg["width"]
CAM_HEIGHT = _cfg["height"]
CAM_FPS = _cfg["fps"]
CAM_QUALITY = _cfg["jpeg_quality"]
CAM_WIDTH = int(_cfg.get("width", 424))
CAM_HEIGHT = int(_cfg.get("height", 240))
CAM_FPS = int(_cfg.get("fps", 15))
CAM_QUALITY = int(_cfg.get("jpeg_quality", 70))
CAM_TIMEOUT_MS = int(_cfg.get("timeout_ms", 5000)) # pipeline.wait_for_frames timeout
CAM_STALE_THRESHOLD = float(_cfg.get("stale_threshold_s", 10.0)) # trip reconnect after this long without a frame
CAM_RECONNECT_DELAY = float(_cfg.get("reconnect_delay_s", 2.0)) # initial backoff; doubles up to 10 s
# Shared state
latest_frame_b64 = [None]
@ -36,7 +39,7 @@ def camera_loop():
"""Capture RealSense frames continuously with auto-reconnect."""
import pyrealsense2 as rs
backoff = 2.0
backoff = CAM_RECONNECT_DELAY
while camera_alive[0]:
pipeline = None
try:
@ -44,14 +47,14 @@ def camera_loop():
cfg = rs.config()
cfg.enable_stream(rs.stream.color, CAM_WIDTH, CAM_HEIGHT, rs.format.bgr8, CAM_FPS)
pipeline.start(cfg)
backoff = 2.0
backoff = CAM_RECONNECT_DELAY
_cam_connected[0] = True
print("Camera connected")
log(f"Camera connected {CAM_WIDTH}x{CAM_HEIGHT}@{CAM_FPS}", "info", "camera")
while camera_alive[0]:
try:
frames = pipeline.wait_for_frames(timeout_ms=5000)
frames = pipeline.wait_for_frames(timeout_ms=CAM_TIMEOUT_MS)
color_frame = frames.get_color_frame()
if not color_frame:
continue
@ -72,8 +75,8 @@ def camera_loop():
_cam_last_frame_time[0] = time.time()
except Exception:
if time.time() - _cam_last_frame_time[0] > 10.0:
print(" [Camera] No frame for 10s — reconnecting...")
if time.time() - _cam_last_frame_time[0] > CAM_STALE_THRESHOLD:
print(f" [Camera] No frame for {CAM_STALE_THRESHOLD:.0f}s — reconnecting...")
break
except Exception as e:

View File

@ -25,7 +25,7 @@ How it works
Files saved
-----------
~/Models_marcus/map/map_001_2026-04-05/
Data/Brain/maps/map_001_YYYY-MM-DD/
observations.json [{step, time, x, y, area_type, objects, observation}]
path.json [{x, y, heading, t}] full path walked
summary.txt auto-generated LLaVA summary

View File

@ -81,7 +81,7 @@ def execute(d: dict):
actions = merge_actions(d.get("actions", []))
arm_cmd = d.get("arm", None)
print(f"Marcus: {speak}")
print(f"Sanad: {speak}")
if not actions:
gradual_stop()

View File

@ -41,18 +41,12 @@ from Autonomous.marcus_autonomous import AutonomousMode
_cfg = load_config("Brain")
_TALK_PATTERNS = [
# English questions
# Questions
r"^(?:what|who|where|when|how|why|is|are|do|does|can|tell|describe|explain|show|analyze)\s+",
# English identity/facts
# Identity / facts told to the robot
r"^(?:my name is|i am|call me|that is|that person|note that|remember that)\s+",
# English acknowledgements
# Acknowledgements
r"^(?:ok|okay|yes|no|good|nice|great|thanks|thank you|got it|understood|correct)\s*[!.]*$",
# Arabic questions — ماذا ترى / كيف حالك / من أنت / ما اسمك / صف / هل
r"^(?:ماذا|ما\s|كيف|من\s|أين|لماذا|هل|صف|اشرح|وصف|كم)\s*",
# Arabic identity/facts — اسمي / أنا / تذكر
r"^(?:اسمي|أنا\s|تذكر\s|سجل\s|لاحظ\s)",
# Arabic acknowledgements — حسنا / شكرا / ممتاز / صح / مفهوم
r"^(?:حسنا|شكرا|ممتاز|صح|مفهوم|تمام|أحسنت|جيد|نعم|لا)\s*[!.]*$",
]
_NAT_GOAL_RE = re.compile(
@ -271,9 +265,10 @@ def process_command(cmd: str) -> dict:
return {"type": "talk", "speak": speak, "action": "TALK", "elapsed": time.time() - t0}
# ── Greeting ─────────────────────────────────────────────────────────
if re.match(r"^(?:hi+|hey+|hello+|hola|salam|marhaba|sup|yo+|ahlan|السلام عليكم|مرحبا|أهلا|هلا|يا هلا)\s*[!.]*$", cmd, re.IGNORECASE):
if re.match(r"^(?:hi+|hey+|hello+|sup|yo+|greetings|good (?:morning|afternoon|evening))\s*[!.]*$",
cmd, re.IGNORECASE):
response = "Hello! I am Sanad. How can I help you?"
print(f"Marcus: {response}")
print(f"Sanad: {response}")
add_to_history(cmd, response)
log_cmd(cmd, response)
return {"type": "greeting", "speak": response, "action": "GREETING", "elapsed": 0}
@ -282,7 +277,7 @@ def process_command(cmd: str) -> dict:
if re.match(r"^(?:come(?:\s+back)?(?:\s+to\s+me)?|come\s+here|get\s+closer|approach|move\s+closer)\s*[!.]*$", cmd, re.IGNORECASE):
execute_action("forward", 2.0)
resp = "Coming to you"
print(f"Marcus: {resp}")
print(f"Sanad: {resp}")
add_to_history(cmd, resp)
log_cmd(cmd, resp)
return {"type": "move", "speak": resp, "action": "FORWARD 2.0s", "elapsed": 2.0}
@ -300,7 +295,7 @@ def process_command(cmd: str) -> dict:
execute_action("right" if turn_dir == "right" else "left", turn_deg / 18.0)
execute_action(walk_dir, walk_dur)
resp = f"Turned {turn_dir} {int(turn_deg)} degrees then moved {walk_dir}"
print(f"Marcus: {resp}")
print(f"Sanad: {resp}")
add_to_history(cmd, resp)
log_cmd(cmd, resp)
return {"type": "move", "speak": resp, "action": f"MULTI {turn_dir}+{walk_dir}", "elapsed": time.time() - t0}
@ -350,7 +345,7 @@ def _handle_talk(cmd):
pass
d = ask_talk(cmd, img, facts=facts_str)
sp = d.get("speak", "")
print(f"Marcus: {sp}")
print(f"Sanad: {sp}")
log_cmd(cmd, sp)
return sp
except Exception as ex:

View File

@ -10,8 +10,8 @@ Purpose : Persistent memory across sessions.
Folder structure
----------------
~/Models_marcus/places.json persistent named places (all sessions)
~/Models_marcus/sessions/
Data/History/Places/places.json persistent named places (all sessions)
Data/Brain/Sessions/
session_001_2026-04-05/
commands.json [{time, cmd, response, duration_s}]
detections.json [{time, class, position, distance, x, y}]

View File

@ -4,7 +4,7 @@ Subscribes to /cmd_vel and holosoma/other_input (Python 3.8 + ROS2 Foxy)
Forwards to Holosoma via ZMQ PUB socket (Python 3.10 hsinference)
Run: source /opt/ros/foxy/setup.bash
python3.8 ~/Models_marcus/ros2_zmq_bridge.py
python3.8 ~/Marcus/Bridge/ros2_zmq_bridge.py
"""
import json, time
import rclpy

View File

@ -190,7 +190,7 @@ def _handle_message(data):
print(f" [{ts}] {color}{C.BOLD}{action}{C.RESET} {C.GRAY}({elapsed}s){C.RESET}")
if speak:
print(f" {C.CYAN}Marcus: {speak}{C.RESET}")
print(f" {C.CYAN}Sanad: {speak}{C.RESET}")
elif t == "camera_config":
p = data.get("profile", "?")

View File

@ -14,8 +14,5 @@
"num_predict_patrol": 100,
"num_predict_talk": 80,
"num_predict_verify": 10,
"warmup_num_predict": 5,
"main_prompt": "You are Sanad, a humanoid robot. Look at the image and follow the command.\n{facts}\n\nCommand: \"{command}\"\n\nReply with ONLY this JSON — no markdown, no explanation:\n{{\"actions\":[{{\"move\":\"forward|backward|left|right|stop\",\"duration\":2.0}}],\"arm\":null,\"speak\":\"one sentence\",\"abort\":null}}\n\nRULES:\n- actions is a list of movement steps, max duration 5.0s each\n- move: \"forward\" \"backward\" \"left\" \"right\" \"stop\"\n- arm: \"wave\" \"raise_right\" \"raise_left\" \"clap\" \"high_five\" \"hug\" \"heart\" \"shake_hand\" \"face_wave\" or null\n- arm is NEVER a move value\n- questions/descriptions: actions=[]\n- obstacle < 0.5m: abort = \"obstacle detected\"\n- \"90 degrees\" = 5.0s | \"45 degrees\" = 2.5s | \"1 step\" = 1.0s\n\nEXAMPLES:\n\"turn right\" -> {{\"actions\":[{{\"move\":\"right\",\"duration\":2.0}}],\"arm\":null,\"speak\":\"Turning right\",\"abort\":null}}\n\"turn right 90 degrees\" -> {{\"actions\":[{{\"move\":\"right\",\"duration\":5.0}}],\"arm\":null,\"speak\":\"Turning 90 degrees\",\"abort\":null}}\n\"move back then left\" -> {{\"actions\":[{{\"move\":\"backward\",\"duration\":2.0}},{{\"move\":\"left\",\"duration\":2.0}}],\"arm\":null,\"speak\":\"Moving back then left\",\"abort\":null}}\n\"wave\" -> {{\"actions\":[],\"arm\":\"wave\",\"speak\":\"Waving\",\"abort\":null}}\n\"raise your right arm\" -> {{\"actions\":[],\"arm\":\"raise_right\",\"speak\":\"Raising right arm\",\"abort\":null}}\n\"walk forward and wave\" -> {{\"actions\":[{{\"move\":\"forward\",\"duration\":2.0}}],\"arm\":\"wave\",\"speak\":\"Walking and waving\",\"abort\":null}}\n\"what do you see\" -> {{\"actions\":[],\"arm\":null,\"speak\":\"I see...\",\"abort\":null}}\n\"stop\" -> {{\"actions\":[{{\"move\":\"stop\",\"duration\":0}}],\"arm\":null,\"speak\":\"Stopping\",\"abort\":null}}\n\"come to me\" -> {{\"actions\":[{{\"move\":\"forward\",\"duration\":2.0}}],\"arm\":null,\"speak\":\"Coming to you\",\"abort\":null}}\n\"come back\" -> {{\"actions\":[{{\"move\":\"forward\",\"duration\":2.0}}],\"arm\":null,\"speak\":\"Coming back\",\"abort\":null}}\n\"come here\" -> {{\"actions\":[{{\"move\":\"forward\",\"duration\":2.0}}],\"arm\":null,\"speak\":\"Coming\",\"abort\":null}}\n\"get closer\" -> {{\"actions\":[{{\"move\":\"forward\",\"duration\":1.0}}],\"arm\":null,\"speak\":\"Moving closer\",\"abort\":null}}\n\"go away\" -> {{\"actions\":[{{\"move\":\"backward\",\"duration\":2.0}}],\"arm\":null,\"speak\":\"Moving away\",\"abort\":null}}\n\nCommand: \"{command}\"\nJSON:",
"goal_prompt": "You are Sanad navigating toward a goal.\n\nGOAL: \"{goal}\"\n\nLook at the image. Have you reached the goal?\n\nReply ONLY this JSON:\n{{\"reached\":false,\"next_move\":\"left\",\"duration\":0.5,\"speak\":\"what you see\"}}\n\nRULES:\n- reached: true ONLY if you clearly see the goal target right now\n- next_move: \"forward\" \"left\" \"right\"\n- duration: 0.3 to 0.8 seconds\n- Default next_move: \"left\" to keep scanning\n\nGOAL: \"{goal}\"\nJSON:",
"patrol_prompt": "You are Sanad, an HSE inspection robot on autonomous patrol.\n\nLook at the camera and assess the scene.\n\nReply ONLY this JSON:\n{{\"observation\":\"one sentence\",\"alert\":null,\"next_move\":\"forward\",\"duration\":1.0}}\n\nRULES:\n- alert = null if safe\n- alert = \"PPE: no helmet\" if person without helmet\n- alert = \"PPE: no vest\" if person without safety vest\n- alert = \"Hazard: description\" for other hazards\n- next_move: \"forward\" \"left\" \"right\"\n- duration: 0.5 to 2.0s\n\nJSON:"
"warmup_num_predict": 5
}

View File

@ -1,6 +1,6 @@
{
"default_max_steps": 60,
"step_delay_s": 0.4,
"step_delay_s": 0.15,
"rotate_speed": 0.25,
"min_steps_warmup": 3
}

View File

@ -1,8 +0,0 @@
{
"base_dir": "Data",
"sessions_dir": "Data/Sessions",
"places_file": "Data/Places/places.json",
"max_cmd_len": 500,
"max_sessions": 50,
"detect_dedupe_s": 5.0
}

View File

@ -5,7 +5,7 @@
# Model : Qwen2.5-VL 3B (Ollama, fully offline)
#
# Placeholders:
# {command} — the user's typed/spoken command (Arabic or English)
# {command} — the user's typed/spoken command (English)
# {goal} — the navigation goal description
# {facts} — known facts from memory (e.g. "Kassam is the programmer")
# {target} — YOLO class being searched (e.g. "person")
@ -16,7 +16,7 @@
# LANGUAGE NOTE:
# All prompts instruct Qwen to detect the command language automatically
# and respond in the same language. No code-side language detection needed.
# Arabic and English are handled natively by the model.
# English-only by policy — Arabic support was removed 2026-04-21.
# =============================================================================
@ -51,17 +51,17 @@ main_prompt: |
- Merge consecutive same-direction steps into one:
"forward 2s + forward 2s" → "forward 4s" — NOT two separate steps
- Duration reference:
"1 step" / "خطوة" = 1.0s
"tiny step" / "خطوة صغيرة" = 0.3s
"1 step" = 1.0s
"tiny step" = 0.3s
"half a step" = 0.5s
"2 steps" / "خطوتين" = 2.0s
"3 steps" / "ثلاث خطوات" = 3.0s
"45 degrees" / "٤٥ درجة" = 2.5s
"90 degrees" / "٩٠ درجة" = 5.0s
"180 degrees" / "استدر" = 10.0s
"2 steps" = 2.0s
"3 steps" = 3.0s
"45 degrees" = 2.5s
"90 degrees" = 5.0s
"180 degrees" = 10.0s
- Speed modifiers:
"slowly" / "ببطء" / "بهدوء" → multiply duration by 0.5
"quickly" / "fast" / "بسرعة" → multiply duration by 1.5 (cap at 5.0s)
"slowly" → multiply duration by 0.5
"quickly" / "fast" → multiply duration by 1.5 (cap at 5.0s)
── ARM RULES ──────────────────────────────────────────────────────────────
- arm: one value from the list above, or null
@ -72,12 +72,10 @@ main_prompt: |
── SPEAK RULES ────────────────────────────────────────────────────────────
- speak: one sentence, first person, natural
- Describe what you are doing OR what you see — never both in one sentence
- For pure movement: "Turning right" / "أدور لليمين"
- For pure movement: "Turning right"
- For vision questions: describe what the camera shows
- Never repeat the command word-for-word
- CRITICAL: match the language of the command exactly
Arabic command → Arabic speak
English command → English speak
- Always respond in English
── SAFETY RULES ───────────────────────────────────────────────────────────
- abort = null for all normal commands
@ -87,8 +85,8 @@ main_prompt: |
- When aborting: actions = [] and explain in speak
── CONTEXT RULES ──────────────────────────────────────────────────────────
- "that person" / "him" / "her" / "ذلك الشخص" → resolve from conversation or camera
- "it" / "there" / "هناك" → resolve from last command context
- "that person" / "him" / "her" → resolve from conversation or camera
- "it" / "there" → resolve from last command context
- If ambiguous → choose the most reasonable safe interpretation
══ ENGLISH EXAMPLES ═══════════════════════════════════════════════════════
@ -190,113 +188,6 @@ main_prompt: |
"walk into the wall"
→ {{"actions":[],"arm":null,"speak":"I cannot do that safely","abort":"unsafe command"}}
══ ARABIC EXAMPLES ════════════════════════════════════════════════════════
حركة أساسية:
"تقدم"
→ {{"actions":[{{"move":"forward","duration":2.0}}],"arm":null,"speak":"أتقدم للأمام","abort":null}}
"تراجع للخلف"
→ {{"actions":[{{"move":"backward","duration":2.0}}],"arm":null,"speak":"أتراجع للخلف","abort":null}}
"دور يمين"
→ {{"actions":[{{"move":"right","duration":2.0}}],"arm":null,"speak":"أدور لليمين","abort":null}}
"دور يسار"
→ {{"actions":[{{"move":"left","duration":2.0}}],"arm":null,"speak":"أدور لليسار","abort":null}}
"قف"
→ {{"actions":[{{"move":"stop","duration":0}}],"arm":null,"speak":"أتوقف الآن","abort":null}}
درجات:
"دور يمين ٩٠ درجة"
→ {{"actions":[{{"move":"right","duration":5.0}}],"arm":null,"speak":"أدور لليمين ٩٠ درجة","abort":null}}
"دور يسار ٤٥ درجة ببطء"
→ {{"actions":[{{"move":"left","duration":1.25}}],"arm":null,"speak":"أدور لليسار ببطء","abort":null}}
"استدر ١٨٠ درجة"
→ {{"actions":[{{"move":"right","duration":10.0}}],"arm":null,"speak":"أستدير ١٨٠ درجة","abort":null}}
خطوات:
"تقدم خطوة واحدة"
→ {{"actions":[{{"move":"forward","duration":1.0}}],"arm":null,"speak":"أتقدم خطوة واحدة","abort":null}}
"تقدم خطوتين"
→ {{"actions":[{{"move":"forward","duration":2.0}}],"arm":null,"speak":"أتقدم خطوتين","abort":null}}
"تراجع ثلاث خطوات"
→ {{"actions":[{{"move":"backward","duration":3.0}}],"arm":null,"speak":"أتراجع ثلاث خطوات","abort":null}}
"تقدم قليلا"
→ {{"actions":[{{"move":"forward","duration":0.5}}],"arm":null,"speak":"أتقدم قليلا","abort":null}}
خطوات متعددة:
"تقدم ثم دور يمين"
→ {{"actions":[{{"move":"forward","duration":2.0}},{{"move":"right","duration":2.0}}],"arm":null,"speak":"أتقدم ثم أدور لليمين","abort":null}}
"دور يمين ٩٠ درجة ثم تراجع خطوتين"
→ {{"actions":[{{"move":"right","duration":5.0}},{{"move":"backward","duration":2.0}}],"arm":null,"speak":"أدور يمين ٩٠ درجة ثم أتراجع خطوتين","abort":null}}
"تراجع ثم دور يسار"
→ {{"actions":[{{"move":"backward","duration":2.0}},{{"move":"left","duration":2.0}}],"arm":null,"speak":"أتراجع ثم أدور لليسار","abort":null}}
اقتراب / ابتعاد:
"تعال إلي"
→ {{"actions":[{{"move":"forward","duration":2.0}}],"arm":null,"speak":"آتي إليك","abort":null}}
"اقترب"
→ {{"actions":[{{"move":"forward","duration":1.0}}],"arm":null,"speak":"أقترب منك","abort":null}}
"ابتعد"
→ {{"actions":[{{"move":"backward","duration":2.0}}],"arm":null,"speak":"أبتعد عنك","abort":null}}
ذراعين:
"لوح بيدك"
→ {{"actions":[],"arm":"wave","speak":"أُلوّح بيدي","abort":null}}
"صفق"
→ {{"actions":[],"arm":"clap","speak":"أُصفق","abort":null}}
"أعطني خمسة"
→ {{"actions":[],"arm":"high_five","speak":"خمسة!","abort":null}}
"ارفع يدك اليمنى"
→ {{"actions":[],"arm":"raise_right","speak":"أرفع يدي اليمنى","abort":null}}
"عانقني"
→ {{"actions":[{{"move":"forward","duration":1.0}}],"arm":"hug","speak":"آتي لأعانقك","abort":null}}
"صافحني"
→ {{"actions":[],"arm":"shake_hand","speak":"أمد يدي للمصافحة","abort":null}}
"تقدم ولوح"
→ {{"actions":[{{"move":"forward","duration":2.0}}],"arm":"wave","speak":"أتقدم وأُلوّح","abort":null}}
رؤية / أسئلة:
"ماذا ترى"
→ {{"actions":[],"arm":null,"speak":"أرى شخصا يجلس أمام طاولة مع جهاز كمبيوتر","abort":null}}
"صف الغرفة"
→ {{"actions":[],"arm":null,"speak":"أرى مكتبا به طاولات وكراسي وشاشات","abort":null}}
"هل يوجد أحد هنا"
→ {{"actions":[],"arm":null,"speak":"نعم، أرى شخصا واحدا أمامي","abort":null}}
"انظر حولك"
→ {{"actions":[{{"move":"left","duration":3.0}}],"arm":null,"speak":"أنظر حولي","abort":null}}
هوية:
"اسمي قصام"
→ {{"actions":[],"arm":null,"speak":"أهلا قصام، سأتذكر اسمك","abort":null}}
"من أنت"
→ {{"actions":[],"arm":null,"speak":"أنا ماركوس، روبوت مساعد من YS Lootah Technology في دبي","abort":null}}
أمان:
"اصطدم بالحائط"
→ {{"actions":[],"arm":null,"speak":"لا أستطيع فعل ذلك بأمان","abort":"unsafe command"}}
Command: "{command}"
JSON:
@ -368,24 +259,6 @@ goal_prompt: |
Camera: chair visible but far away at edge of frame
→ {{"reached":false,"next_move":"forward","duration":0.5,"speak":"I can see a chair far ahead — moving closer","confidence":"medium"}}
── EXAMPLES (Arabic) ──────────────────────────────────────────────────────
Goal: "قف عندما ترى شخصا"
Camera: ممر فارغ
→ {{"reached":false,"next_move":"left","duration":0.4,"speak":"أرى ممرا فارغا — لا يوجد أحد","confidence":"high"}}
Goal: "قف عندما ترى شخصا"
Camera: شخص يجلس
→ {{"reached":true,"next_move":"left","duration":0.0,"speak":"أرى شخصا بوضوح — أتوقف","confidence":"high"}}
Goal: "ابحث عن لاب توب"
Camera: طاولة بدون لاب توب
→ {{"reached":false,"next_move":"left","duration":0.4,"speak":"أرى طاولة وكراسي لكن لا يوجد لاب توب","confidence":"high"}}
GOAL: "{goal}"
JSON:
# ══════════════════════════════════════════════════════════════════════════════
# PATROL PROMPT
# Used for: autonomous office exploration — auto on / auto off
@ -515,44 +388,11 @@ talk_prompt: |
→ {{"actions":[],"arm":null,"speak":"I was built by YS Lootah Technology in Dubai, and my programmer is Kassam","abort":null}}
"what can you do"
→ {{"actions":[],"arm":null,"speak":"I can move through the environment, identify objects and people, navigate to goals, and respond in both Arabic and English","abort":null}}
→ {{"actions":[],"arm":null,"speak":"I can move through the environment, identify objects and people, navigate to goals, and respond in English","abort":null}}
"ok" / "good" / "thanks"
→ {{"actions":[],"arm":null,"speak":"Ready for your next command","abort":null}}
── ARABIC EXAMPLES ────────────────────────────────────────────────────────
"ماذا ترى"
→ {{"actions":[],"arm":null,"speak":"أرى شخصا يجلس أمام طاولة مع جهاز كمبيوتر وشاشة كبيرة","abort":null}}
"صف ما أمامك"
→ {{"actions":[],"arm":null,"speak":"أمامي مكتب به شاشة ولوحة مفاتيح وشخص يعمل","abort":null}}
"هل يوجد أحد في الغرفة"
→ {{"actions":[],"arm":null,"speak":"نعم، أرى شخصا واحدا يجلس أمام محطة العمل","abort":null}}
"اسمي قصام"
→ {{"actions":[],"arm":null,"speak":"أهلا وسهلا قصام، سأتذكر اسمك","abort":null}}
"أنا مبرمجك"
→ {{"actions":[],"arm":null,"speak":"مفهوم، أنت مبرمجي — جاهز لتلقي تعليماتك","abort":null}}
"كيف حالك"
→ {{"actions":[],"arm":null,"speak":"جميع أنظمتي تعمل بشكل طبيعي وأنا جاهز لمساعدتك","abort":null}}
"ما اسمك"
→ {{"actions":[],"arm":null,"speak":"اسمي ماركوس، روبوت مساعد من YS Lootah Technology في دبي","abort":null}}
"من بناك"
→ {{"actions":[],"arm":null,"speak":"بنتني شركة YS Lootah Technology في دبي، ومبرمجي هو قصام","abort":null}}
"شكرا" / "حسنا" / "ممتاز"
→ {{"actions":[],"arm":null,"speak":"جاهز للأمر التالي","abort":null}}
Command: "{command}"
JSON:
# ══════════════════════════════════════════════════════════════════════════════
# VERIFY PROMPT
# Used for: two-stage goal detection — confirm extra condition after YOLO finds class
@ -676,8 +516,4 @@ image_search_text_prompt: |
Camera: laptop clearly on desk
→ {{"found":true,"confidence":"high","position":"center","description":"Laptop visible on desk at center of frame"}}
Hint: "شخص يرتدي قميصا أزرق"
Camera: شخص بقميص أزرق واضح
→ {{"found":true,"confidence":"high","position":"center","description":"أرى شخصا يرتدي قميصا أزرق بوضوح في وسط الإطار"}}
JSON:

View File

@ -1,8 +1,24 @@
import logging
import os
from logging.handlers import RotatingFileHandler
from pathlib import Path
# Rotation policy shared by every log file this backend creates:
# 5 MB per file, keep 3 rotations (logs/brain.log, brain.log.1, .2, .3).
# Tune both via env vars if you need larger logs on the robot.
_ROT_MAX_BYTES = int(os.environ.get("MARCUS_LOG_MAX_BYTES", 5_000_000))
_ROT_BACKUP_COUNT = int(os.environ.get("MARCUS_LOG_BACKUP_COUNT", 3))
def _rotating_handler(path: str) -> RotatingFileHandler:
"""FileHandler with size-based rotation — prevents unbounded growth."""
return RotatingFileHandler(
path,
maxBytes=_ROT_MAX_BYTES,
backupCount=_ROT_BACKUP_COUNT,
encoding="utf-8",
)
class Logs:
@ -11,7 +27,10 @@ class Logs:
self.default_log_level = default_log_level
self.log_format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
self.base_dir = str(Path(__file__).resolve().parents[1])
self.default_logs_dir = os.path.join(self.base_dir, "Logs")
# The canonical log directory is "logs" (lowercase) — matches what
# every module writing via stdlib logging expects. "Logs" (capital L)
# was historically used by a parallel implementation and is gone.
self.default_logs_dir = os.path.join(self.base_dir, "logs")
self.fallback_log_dir = self._choose_fallback_log_dir()
self.mainloggerfile = self.resolve_log_path(main_log_file)
self.logger = None
@ -30,7 +49,7 @@ class Logs:
self.main_logger.removeHandler(handler)
os.makedirs(os.path.dirname(self.mainloggerfile), exist_ok=True)
main_handler = logging.FileHandler(self.mainloggerfile)
main_handler = _rotating_handler(self.mainloggerfile)
main_handler.setFormatter(logging.Formatter(self.log_format))
main_handler.setLevel(self.default_log_level)
self.main_logger.addHandler(main_handler)
@ -138,7 +157,7 @@ class Logs:
if isinstance(handler, logging.FileHandler):
self.logger.removeHandler(handler)
handler = logging.FileHandler(full_path)
handler = _rotating_handler(full_path)
handler.setFormatter(logging.Formatter(self.log_format))
handler.setLevel(self.default_log_level)
self.logger.addHandler(handler)
@ -152,9 +171,12 @@ class Logs:
temp_logger.setLevel(self.default_log_level)
temp_logger.propagate = False # Prevent printing to terminal
# Re-use the existing handler if it's already attached to the
# same file (by absolute path). Prevents handler accumulation
# when this function is called from long-running loops.
if not any(isinstance(h, logging.FileHandler) and h.baseFilename == full_path
for h in temp_logger.handlers):
handler = logging.FileHandler(full_path)
handler = _rotating_handler(full_path)
handler.setFormatter(logging.Formatter(self.log_format))
temp_logger.addHandler(handler)

View File

@ -7,11 +7,12 @@
> **What changed since the early draft (April 4):** The project was restructured
> from two monolithic scripts (`marcus_llava.py` + `marcus_yolo.py`) into a
> layered architecture. See `Doc/architecture.md` for the current file tree and
> `Doc/environment.md` for the verified Jetson software stack, exact library
> versions, and GPU bring-up recipe. This reference still describes the
> function-level semantics (inputs/outputs/examples) — treat any file path in
> this document as illustrative and cross-check the actual module. Recent
> layered architecture. See `Doc/architecture.md` for the current file tree,
> `Doc/environment.md` for the verified Jetson software stack, `Doc/pipeline.md`
> for end-to-end dataflow, and **`Doc/functions.md` for the authoritative
> function inventory** (always generated from AST — treat it as the source of
> truth for signatures). This reference describes the semantics (usage, JSON
> schemas, examples); cross-check `functions.md` for exact signatures. Recent
> deltas called out inline below.
### Recent API deltas (2026-04-21)
@ -32,6 +33,11 @@
| Subsystem flags | `Config/config_Brain.json::subsystems.{lidar, voice, imgsearch, autonomous}` | `init_brain()` skips any subsystem with `false`. Defaults: lidar+voice+autonomous ON, imgsearch OFF. |
| Robot persona → Sanad | Multiple | Wake words `["sanad","sannad","sanat","sunnat"]`; all prompts say "You are Sanad"; banner reads `SANAD AI BRAIN — READY`; hardcoded self-intro says "I am Sanad". Project/file/module names unchanged. |
| Logger rename | `Core/log_backend.py` (was `Core/Logger.py`) | Case-only collision with `Core/logger.py` removed — repo now clones cleanly on macOS/Windows. Public API unchanged: `from Core.logger import log`. |
| Log rotation everywhere | `Core/log_backend.py`, `API/audio_api.py`, `Voice/marcus_voice.py` | All `FileHandler`s swapped for `RotatingFileHandler` (5 MB × 3 backups, tunable via `MARCUS_LOG_MAX_BYTES` / `MARCUS_LOG_BACKUP_COUNT`). Prevents unbounded log growth on the Jetson. `default_logs_dir` pinned to lowercase `logs/`. |
| English-only policy | `Brain/marcus_brain.py`, `Config/marcus_prompts.yaml`, `Config/config_Voice.json` | Arabic talk-pattern and greeting regexes removed; 5.8 KB of Arabic prompt examples stripped from `marcus_prompts.yaml`; Arabic wake words removed from config. `AudioAPI.speak(text, lang='en')` — only `'en'` accepted; non-ASCII is rejected. |
| Dead-code + orphan sweep | `Legacy/marcus_nav.py`, `Config/config_Memory.json` | Deleted. Config count 13 → 12 JSON + 1 YAML. |
| Orphan config keys wired up | `Vision/marcus_imgsearch.py`, `Voice/builtin_mic.py`, `API/camera_api.py`, `Navigation/marcus_odometry.py` | `config_ImageSearch.json` (4 keys), `config_Voice.mic_udp.read_timeout_sec`, `config_Camera.{timeout_ms, stale_threshold_s, reconnect_delay_s}`, `config_Odometry.json` (10 keys) are all read by code now. **0 orphan keys across 156 total.** |
| Subprocess leak fix | `API/audio_api.py::_record_parec` | `Popen` now wrapped in try/finally; orphan `parec` processes can't survive Ctrl-C/exceptions. Last-resort `proc.kill()` catches only `OSError`. |
---

View File

@ -19,9 +19,13 @@
- **Subsystem flags**`config_Brain.json::subsystems.{lidar, voice, imgsearch, autonomous}` let you selectively skip heavy boot stages.
- **Conditional inner-loop sleeps** — goal_nav / autonomous / imgsearch no longer pay unconditional per-step naps.
- **Core/Logger.py → Core/log_backend.py** — case-only name collision with `logger.py` resolved; repo clones cleanly on macOS/Windows.
- **Log rotation on every file handler**`Core.log_backend` + stdlib voice handlers now use `RotatingFileHandler` (5 MB × 3 backups, env-tunable). `default_logs_dir` fixed to lowercase `logs/` so the capital-L folder no longer gets recreated.
- **Robot persona = "Sanad"** — wake words, prompts, banner, and self-intro all use "Sanad". Project identity ("Marcus") remains in file names, class names, directory, logs.
- **English-only** — all Arabic talk/greeting regexes, Arabic prompt examples (≈5.8 KB), and Arabic wake words removed. 0 non-ASCII chars in live code/config.
- **Orphan config cleanup**`Config/config_Memory.json` deleted (never loaded). `config_ImageSearch.json`, `config_Odometry.json` (10 keys), plus 3 unused `config_Camera` keys and `mic_udp.read_timeout_sec` are now wired into their respective modules. 0 orphan keys across 156 total (12 config files).
- **Dead-code pruning**`Legacy/marcus_nav.py` removed. Config count 13 → 12 JSON + `marcus_prompts.yaml`.
See `Doc/environment.md` for the verified Jetson software stack and `Doc/pipeline.md` for the end-to-end data flow.
See `Doc/environment.md` for the verified Jetson software stack, `Doc/pipeline.md` for the end-to-end data flow, and `Doc/functions.md` for the full function inventory.
---
@ -64,7 +68,8 @@ Marcus/
│ ├── config_ImageSearch.json # search defaults
│ ├── config_Voice.json # mic (builtin_udp|pactl_parec), TTS backend, wake words, mic_udp group/port
│ ├── config_LiDAR.json # Livox Mid-360 connection + SLAM engine params
│ └── marcus_prompts.yaml # All Qwen-VL prompts (main, goal, patrol, talk, verify)
│ └── marcus_prompts.yaml # All Qwen-VL prompts (main, goal, patrol, talk, verify, 2× imgsearch)
│ # Total: 12 JSON files + 1 YAML. (config_Memory.json removed 2026-04-21.)
├── API/ # Interface layer — one file per subsystem
│ ├── zmq_api.py # ZMQ PUB socket: init_zmq(), send_vel(), gradual_stop(), send_cmd()
@ -139,11 +144,19 @@ Marcus/
│ ├── server.log
│ ├── zmq.log
│ └── main.log
└── Legacy/ # Archived originals
└── marcus_nav.py # Original standalone prototype
│ # All log files rotate at 5 MB × 3 backups (tunable via
│ # MARCUS_LOG_MAX_BYTES / MARCUS_LOG_BACKUP_COUNT env vars).
└── Doc/ # Documentation
├── architecture.md # This file
├── controlling.md # Startup + command reference
├── environment.md # Jetson versions + install recipe
├── pipeline.md # End-to-end dataflow diagrams
├── functions.md # Full function inventory
└── MARCUS_API.md # Developer API reference
```
*Removed 2026-04-21: `Legacy/marcus_nav.py` (dead code + Arabic).*
---
## Layer Architecture

View File

@ -254,3 +254,33 @@ Most values configurable in `Config/config_Network.json` and `config_Voice.json:
See `Doc/architecture.md` for full project structure and file-by-file documentation.
See `Doc/environment.md` for the verified Jetson software stack.
See `Doc/pipeline.md` for the end-to-end data flow.
See `Doc/functions.md` for the full function inventory (AST-generated).
---
## Language policy
**English only.** Arabic was removed from the codebase on 2026-04-21:
- `Config/config_Voice.json::stt.wake_words_en` — only English variants (`sanad`, `sannad`, `sanat`, `sunnat`)
- `Config/marcus_prompts.yaml` — no Arabic examples left in any of the 7 prompts
- `API/audio_api.py::speak(text)` — rejects non-ASCII (the G1 TtsMaker silently maps Arabic to Chinese, which nobody wants)
- `Brain/marcus_brain.py` — greeting and talk-pattern regexes match English only
If you need Arabic back, the cleanest paths are either Piper TTS (offline) or edge-tts (online) — see `git log` for the removed implementations.
---
## Logs
All `.log` files in `logs/` rotate at **5 MB × 3 backups** by default. To change:
```bash
export MARCUS_LOG_MAX_BYTES=10000000 # 10 MB per file
export MARCUS_LOG_BACKUP_COUNT=5 # keep 5 rotations
export MARCUS_LOG_DIR=/var/log/marcus # move logs off SD card
```
Per-module log files:
- `brain.log`, `camera.log`, `lidar.log`, `zmq.log`, `server.log`, `main.log` — via `Core.logger.log()`
- `voice.log` — via stdlib `logging` in `audio_api.py` + `marcus_voice.py`
- Session JSON: `Data/Brain/Sessions/session_NNN_YYYY-MM-DD/{commands,detections,alerts,places}.json`

View File

@ -377,3 +377,10 @@ Config file (`Config/config_Vision.json`):
| 2026-04-21 | **Restructure**: moved ZMQ bind out of `API/zmq_api.py` import time into `init_zmq()`; fixes LiDAR SLAM worker spawn crash. Added loud GPU-requirement banner in `API/yolo_api.py`. Dropped `num_predict_main` 200→120. Made inner-loop sleeps in goal_nav/autonomous/imgsearch conditional. Renamed `Core/Logger.py``Core/log_backend.py` (case-collision fix). Updated `Doc/MARCUS_API.md` to current state. |
| 2026-04-21 | **Voice restructure**: added `Voice/builtin_mic.py` (G1 array mic via UDP multicast `239.168.123.161:5555`) and `Voice/builtin_tts.py` (thin `AudioClient.TtsMaker` wrapper). Rewired `Voice/marcus_voice.py` to use BuiltinMic. Refactored `API/audio_api.py::speak()` to use BuiltinTTS — removed ~110 lines of edge-tts + pydub + Piper plumbing. Deleted `Voice/marcus_gemini_voice.py`. Added `subsystems.{lidar,voice,imgsearch,autonomous}` gate in `config_Brain.json::init_brain()`. |
| 2026-04-21 | **Persona swap**: robot identifies as Sanad. Wake words `["sanad","sannad","sanat","sunnat"]`, `speaker.app_name="sanad"`, all Qwen prompts say "You are Sanad", banner reads `SANAD AI BRAIN — READY`, hardcoded self-intro says "I am Sanad". Project directory, class names, filenames, and `PROJECT_NAME=Marcus` env var unchanged. |
| 2026-04-21 | **English-only sweep**: stripped 5.8 KB of Arabic examples from `marcus_prompts.yaml`, removed Arabic talk-pattern and greeting regexes in `Brain/marcus_brain.py`, dropped Arabic wake words from `config_Voice.json`, changed user-facing prints `Marcus: …``Sanad: …` in `executor.py`, `marcus_brain.py`, `marcus_cli.py`. Verified: 0 Arabic chars in live code/config. |
| 2026-04-21 | **Logs hardened**: `Core/log_backend.py` now uses `RotatingFileHandler` (5 MB × 3 backups, env-tunable via `MARCUS_LOG_MAX_BYTES` / `MARCUS_LOG_BACKUP_COUNT`) for all three code paths (main_handler, `LogEngine`, `LogsMessages`). `API/audio_api.py` + `Voice/marcus_voice.py` also rotate `voice.log`. `default_logs_dir` fixed: `"Logs"``"logs"` (matches actual directory; no more case-collision recreation). |
| 2026-04-21 | **Dead code removed**: deleted `Legacy/marcus_nav.py` (unused + Arabic), deleted `Config/config_Memory.json` (orphan — never loaded). Config count: 13 → **12** JSON files + `marcus_prompts.yaml`. |
| 2026-04-21 | **Orphan config keys wired up (0 orphans remaining)**: `config_ImageSearch.json``Vision/marcus_imgsearch.py` (4 constants), `config_Voice.mic_udp.read_timeout_sec``Voice/builtin_mic.py`, `config_Camera.{timeout_ms, stale_threshold_s, reconnect_delay_s}``API/camera_api.py`, `config_Odometry.json` (10 keys) → `Navigation/marcus_odometry.py`. All 156 config keys now referenced by code. |
| 2026-04-21 | **Subprocess leak fix**: `AudioAPI._record_parec` now wraps `Popen` in try/finally with `terminate → wait(1.0) → kill` fallback; orphan `parec` processes can no longer survive Ctrl-C. Last-resort `proc.kill()` catches only `OSError` (not bare `except`). |
| 2026-04-21 | **Modelfile corrected**: `Models/Modelfile` now `FROM qwen2.5vl:3b` (was `:7b`) with a header explaining it's an optional build template — runtime uses `ollama pull qwen2.5vl:3b` directly. |
| 2026-04-21 | **Final verification**: 14-dimension smoke test green — no Arabic, no dead dirs, 0 orphan keys, every FileHandler rotates, no bare `except: pass`, no stale `Models_marcus` / `marcus_llava` refs, 25/25 modules import. |

175
Doc/functions.md Normal file
View File

@ -0,0 +1,175 @@
# Marcus — Function Inventory
**Robot persona:** Sanad (wake word + self-intro)
**Updated:** 2026-04-21
Every callable in the codebase, grouped by layer. Generated from AST, kept in sync with the source. See `architecture.md` for where each module lives and `pipeline.md` for how they connect.
**Totals:** 25 importable modules · 73 top-level functions · 9 public classes.
---
## `run_marcus.py` — entrypoint
Script only. Prepends `PROJECT_ROOT` to `sys.path`, then calls `Brain.marcus_brain.run_terminal()` in `__main__`.
---
## `Core/` — foundation, no external deps
| File | Function | Purpose |
|---|---|---|
| `env_loader.py` | `_find_env_file()`, `_load_dotenv(path)` | find + parse `.env` into `os.environ`; exports `PROJECT_ROOT` |
| `config_loader.py` | `load_config(name)`, `config_path(relative)` | cached reader for `Config/config_{name}.json` |
| `log_backend.py` | `_rotating_handler(path)` + **class `Logs`** | custom logging engine; all handlers are `RotatingFileHandler` (5 MB × 3) |
| `logger.py` | `get_logger(module)`, `log(msg, level, module)`, `log_and_print(msg, level, module)` | project-wide logging façade |
**`Core.log_backend.Logs`** methods:
`__init__(default_log_level, main_log_file)`, `_choose_fallback_log_dir`, `_normalize_log_name`, `_is_writable_path`, `_with_fallback`, `resolve_log_path`, `construct_path`, `log_to_file`, `LogEngine(folder, log_name)`, `LogsMessages(msg, type, folder, file)`, `print_and_log(...)`.
---
## `API/` — subsystem wrappers (Brain imports only from here)
| File | Public functions |
|---|---|
| `zmq_api.py` | `init_zmq()`, `get_socket()`, `send_vel(vx, vy, vyaw)`, `gradual_stop()`, `send_cmd(cmd)` |
| `camera_api.py` | `start_camera()`, `stop_camera()`, `get_frame()`, `get_frame_age()`, `get_raw_refs()`, `camera_loop()` |
| `llava_api.py` | `call_llava(prompt, img_b64, num_predict, use_history)`, `ask(command, img_b64)`, `ask_goal(goal, img_b64)`, `ask_talk(command, img_b64, facts)`, `ask_verify(target, condition, img_b64)`, `ask_patrol(img_b64)`, `remember_fact(fact)`, `add_to_history(user_msg, assistant_msg)`, `parse_json(raw)` |
| `yolo_api.py` | `init_yolo(raw_frame_ref, frame_lock)` + 8 stubs rebound on success: `yolo_sees`, `yolo_count`, `yolo_closest`, `yolo_summary`, `yolo_ppe_violations`, `yolo_person_too_close`, `yolo_all_classes`, `yolo_fps` |
| `odometry_api.py` | `init_odometry(zmq_sock)`, `get_position()` |
| `memory_api.py` | `init_memory()`, `log_cmd(cmd, response, duration)`, `log_detection(class_name, position, distance)`, `place_save(name)`, `place_goto(name)`, `places_list_str()` |
| `arm_api.py` | `do_arm(action)` — G1 GR00T stub |
| `imgsearch_api.py` | `init_imgsearch(get_frame_fn, send_vel_fn, gradual_stop_fn, llava_fn, yolo_sees_fn, model)`, `get_searcher()` |
| `audio_api.py` | **class `AudioAPI`** (see below) |
| `lidar_api.py` | `init_lidar()`, `obstacle_ahead(radius)`, `get_slam_pose()`, `get_nav_cmd()`, `get_loc_state()`, `get_safety_reasons()`, `get_lidar_status()`, `get_client()`, `stop_lidar()` |
**`API.audio_api.AudioAPI`** methods:
`speak(text, lang="en")`, `record(seconds)` → np.int16 array, `play_pcm(audio_16k)`, `save_recording(audio, name)`, properties `is_speaking`, `is_available`. Internal: `_init_sdk`, `_mute_mic`, `_unmute_mic`, `_resample`, `_play_pcm`, `_record_builtin`, `_record_parec`.
---
## `Voice/` — mic + TTS + STT
| File | Public API |
|---|---|
| `builtin_mic.py` | `_find_g1_local_ip()` + **class `BuiltinMic`** |
| `builtin_tts.py` | **class `BuiltinTTS`** |
| `marcus_voice.py` | **class `State`** (IDLE/WAKE_HEARD/PROCESSING/SPEAKING), **class `VoiceModule`** |
**`Voice.builtin_mic.BuiltinMic`** — G1 UDP multicast mic:
`__init__(group, port, buf_max, read_timeout)`, `start()`, `stop()`, `read_chunk(num_bytes)`, `read_seconds(seconds)`, `flush()`; internal `_recv_loop`.
**`Voice.builtin_tts.BuiltinTTS`** — wraps `AudioClient.TtsMaker`:
`__init__(audio_client, default_speaker_id=0)`, `speak(text, speaker_id=None, block=True)`.
**`Voice.marcus_voice.VoiceModule`** — Whisper wake + command STT:
`__init__(audio_api, on_command)`, `start()`, `stop()`, props `state`, `is_running`. Internal state machine: `_do_idle`, `_do_wake_heard`, `_do_processing`; helpers `_load_whisper`, `_transcribe`, `_check_wake_word`, `_record_chunk`, `_record_until_silence`, `_voice_loop`.
---
## `Vision/`
| File | Public API |
|---|---|
| `marcus_yolo.py` | `start_yolo(raw_frame_ref, frame_lock)`, `yolo_sees(class, min_confidence)`, `yolo_count(class)`, `yolo_closest(class)`, `yolo_all_classes()`, `yolo_summary()`, `yolo_ppe_violations()`, `yolo_person_too_close(threshold)`, `yolo_is_running()`, `yolo_fps()`, `_resolve_device(requested)` + **class `Detection`** |
| `marcus_imgsearch.py` | **class `ImageSearch`** + prompt helpers `_build_compare_prompt`, `_build_single_prompt`, image utils `_load_image_b64`, `_numpy_to_b64`, `_resize_b64` |
**`Vision.marcus_yolo.Detection`** — a single detection's metadata:
`__init__(class_name, confidence, x1, y1, x2, y2, frame_w, frame_h)`, props `size_ratio`, `position`, `distance_estimate`, method `to_dict()`, `__repr__`.
**`Vision.marcus_imgsearch.ImageSearch`** — rotate-and-compare search:
`__init__(get_frame_fn, send_vel_fn, gradual_stop_fn, llava_fn, yolo_sees_fn, model)`, `search(ref_img_b64, hint, max_steps, direction, yolo_prefilter)`, `search_from_file(image_path, hint, max_steps, direction)`, `abort()`.
---
## `Navigation/`
| File | Public API |
|---|---|
| `goal_nav.py` | `navigate_to_goal(goal, max_steps)`; private `_goal_yolo_target`, `_extract_extra_condition`, `_verify_condition` |
| `patrol.py` | `patrol(duration_minutes, alert_callback)` |
| `marcus_odometry.py` | **class `Odometry`** |
**`Navigation.marcus_odometry.Odometry`** — ROS2 `/dog_odom` + dead-reckoning fallback:
- lifecycle: `__init__()`, `start(zmq_sock)`, `stop()`, `reset()`, `is_running()`
- pose: `get_position()``{x, y, heading, source}`, `get_distance_from_start()`, `status_str()`, `__repr__`
- movement: `walk_distance(meters, speed, direction)`, `turn_degrees(degrees, speed)`, `navigate_to(x, y, heading, speed)`, `return_to_start(speed)`, `patrol_route(waypoints, speed, loop)`
- internal: `_init_own_zmq`, `_reset_state`, `_try_start_ros2`, `_dead_reckoning_loop`, `_send_vel`, `_gradual_stop`, `_check_stale`, `_time_based_walk`, `_time_based_turn`
---
## `Brain/`
| File | Public API |
|---|---|
| `marcus_brain.py` | `init_brain()`, `process_command(cmd)``{type, speak, action, elapsed}`, `get_brain_status()`, `shutdown()`, `run_terminal()`; private `_init_voice`, `_handle_llava`, `_handle_talk`, `_handle_search`, `_warmup_llava` |
| `command_parser.py` | `init_autonomous(auto_instance)`, `try_local_command(cmd)` (regex-table dispatcher); `_print_help`, `_print_examples` |
| `executor.py` | `execute(d)`, `execute_action(move, duration)`, `move_step(move, duration)`, `merge_actions(actions)`; `_obstacle_check` |
| `marcus_memory.py` | **class `Memory`** + utils `_read_json`, `_write_json`, `_sanitize_name`, `_fuzzy_match`, `_new_session_id` |
**`Brain.marcus_memory.Memory`** — places + sessions store, JSON-backed:
- places: `save_place(name, x, y, heading)`, `get_place(name)`, `delete_place(name)`, `list_places()`, `rename_place(old, new)`, `places_count()`
- sessions: `start_session()`, `end_session()`, `log_command(cmd, response, duration_s)`, `log_detection(class, pos, dist, x, y)`, `log_alert(type, detail)`, `get_last_command()`, `get_last_n_commands(n)`, `get_session_detections()`, `commands_count()`, `session_duration_str()`
- history: `last_session_summary()`, `previous_session_detections()`, `previous_session_places()`, `all_sessions()`
- internal: `_load_places`, `_start_autosave`, `_flush_session`, `_emergency_save`, `_write_summary`, `_prune_old_sessions`, `_get_previous_session_dir`
---
## `Autonomous/`
`marcus_autonomous.py`**class `AutonomousMode`**: patrol-and-map state machine.
- `__init__(get_frame_fn, send_vel_fn, gradual_stop_fn, yolo_sees_fn, yolo_summary_fn, yolo_all_classes_fn, yolo_closest_fn, odom_fn, call_llava_fn, patrol_prompt, mem, models_dir)`
- lifecycle: `enable()`, `disable()`, `is_enabled()`, `status()`, `save_snapshot()`
- internal: `_explore_loop`, `_move_forward`, `_turn`, `_assess_scene`, `_create_map_dir`, `_save_observations`, `_save_path`, `_save_frame`, `_generate_summary`, `_save_session`, `_print_summary`
---
## `Server/` & `Bridge/`
| File | Public API |
|---|---|
| `Server/marcus_server.py` | `async handler(websocket)`, `async broadcast_frames()`, `async run_server(host, port)`, `main()`; helpers `_get_interface_ips`, `_check_lidar` |
| `Bridge/ros2_zmq_bridge.py` | **class `ROS2ZMQBridge`** (`_vel_cb`, `_cmd_cb`) + `main()` — standalone tool, not imported by Marcus |
---
## Suggested import surface for integration code
If you're writing glue on top of Marcus, the stable public surface is:
```python
# brain orchestration
from Brain.marcus_brain import init_brain, process_command, shutdown
# direct robot control (bypasses brain)
from API.zmq_api import init_zmq, send_vel, gradual_stop, send_cmd
from API.yolo_api import yolo_sees, yolo_summary, yolo_closest
from API.camera_api import start_camera, get_frame
from API.audio_api import AudioAPI # .speak(text), .record(seconds)
from API.lidar_api import init_lidar, obstacle_ahead, get_slam_pose, stop_lidar
from API.memory_api import init_memory, log_cmd, log_detection, place_save, place_goto
# voice pipeline
from Voice.marcus_voice import VoiceModule
from Voice.builtin_mic import BuiltinMic
from Voice.builtin_tts import BuiltinTTS
# navigation
from Navigation.goal_nav import navigate_to_goal
from Navigation.patrol import patrol
from Navigation.marcus_odometry import Odometry
# autonomous mode
from Autonomous.marcus_autonomous import AutonomousMode
```
---
## Convention notes
- **All layers above Core must import from `API.*` only** (not directly from `Vision/`, `Navigation/`, `Voice/`). Enforced by convention, not the language.
- **Underscore prefix = private.** `_foo` is internal; don't import it outside the module unless you're the test harness.
- **Stub rebinding pattern** (e.g. `API.yolo_api`): module-level placeholders get replaced with real implementations inside `init_*()` on success. If init fails, callers keep getting the safe stub (e.g. `yolo_sees` returns `False`).
- **Error returns are consistent per layer**: API layer returns `None` / empty dict / `False`; Brain layer returns structured dicts (`{"type","speak","action","elapsed"}`); no exception leaks to the terminal loop except at startup (`init_brain()` will raise to surface hardware issues like missing CUDA).

View File

@ -3,7 +3,7 @@
**Robot persona:** Sanad (wake word + self-intro)
**Updated:** 2026-04-21
One map of every data path from sensor to motor, voice to speech. Cross-reference with `architecture.md` (what each file is) and `MARCUS_API.md` (function signatures).
One map of every data path from sensor to motor, voice to speech. Cross-reference with `architecture.md` (what each file is), `functions.md` (exact function signatures — AST-generated), and `MARCUS_API.md` (usage examples + JSON schemas).
---
@ -167,8 +167,13 @@ Brain/command_parser.py — responds to "lidar status" queries
| `yolo_device`, `yolo_half` | config_Vision.json | `cuda` / FP16 (hard-required; CPU not allowed) |
| `mic.backend` | config_Voice.json | `builtin_udp` (G1 array) or `pactl_parec` (Hollyland fallback) |
| `mic_udp.group/port` | config_Voice.json | where to join the G1 audio multicast |
| `mic_udp.read_timeout_sec` | config_Voice.json | `BuiltinMic.read_chunk` budget (default 0.04 s) |
| `tts.backend` | config_Voice.json | `builtin_ttsmaker` (only supported option) |
| `stt.wake_words_en` | config_Voice.json | Whisper matcher (`sanad` + variants) |
| `timeout_ms`, `stale_threshold_s`, `reconnect_delay_s` | config_Camera.json | RealSense frame timeout, reconnect trigger, initial backoff |
| `default_max_steps`, `step_delay_s`, `rotate_speed`, `min_steps_warmup` | config_ImageSearch.json | image-guided search rotation cadence (wired into `Vision/marcus_imgsearch.py`) |
| `default_walk_speed`, `dist_tolerance`, `angle_tolerance`, `safety_timeout_mult`, `dr_update_hz` | config_Odometry.json | precise motion control (wired into `Navigation/marcus_odometry.py`) |
| `MARCUS_LOG_MAX_BYTES`, `MARCUS_LOG_BACKUP_COUNT`, `MARCUS_LOG_DIR` | env vars | log rotation size, backup count, log directory override |
---

View File

@ -1,93 +0,0 @@
import ollama, base64, json, time
import pyrealsense2 as rs
import numpy as np, cv2
import zmq
HOLOSOMA_IP = "127.0.0.1"
HOLOSOMA_PORT = 5556
def capture_frame():
pipeline = rs.pipeline()
cfg = rs.config()
cfg.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30)
pipeline.start(cfg)
for _ in range(5):
pipeline.wait_for_frames()
frames = pipeline.wait_for_frames()
img = np.asanyarray(frames.get_color_frame().get_data())
pipeline.stop()
cv2.imwrite('/tmp/marcus_eye.jpg', img)
return '/tmp/marcus_eye.jpg'
def ask_qwen(image_path, command):
with open(image_path, 'rb') as f:
img_b64 = base64.b64encode(f.read()).decode()
prompt = f"""أنت ماركس، روبوت ذكي يتنقل داخل المبنى.
You are Marcus, an intelligent indoor navigation robot.
User command: "{command}"
Look at the camera image. Respond with ONLY one line:
FORWARD [0.1 to 1.0 meters]
LEFT [5 to 45 degrees]
RIGHT [5 to 45 degrees]
STOP [reason]
ARRIVED"""
response = ollama.chat(
model='qwen2.5vl:7b',
messages=[{
'role': 'user',
'content': prompt,
'images': [img_b64]
}]
)
return response['message']['content'].strip().split('\n')[0]
def send_to_robot(action):
print(f" Robot action: {action}")
parts = action.upper().split()
if not parts:
return
cmd = parts[0]
val = float(parts[1]) if len(parts) > 1 else 0
if cmd == "FORWARD":
print(f" Walking forward {val}m")
elif cmd == "LEFT":
print(f" Turning left {val} degrees")
elif cmd == "RIGHT":
print(f" Turning right {val} degrees")
elif cmd == "STOP":
print(f" Stopping: {' '.join(parts[1:])}")
elif cmd == "ARRIVED":
print(" Destination reached!")
if __name__ == "__main__":
print("=" * 50)
print("Marcus Navigation Brain")
print("Powered by Qwen2.5-VL on Jetson Orin NX")
print("Speaks Arabic + English")
print("=" * 50)
print("Type your command (or 'quit'):\n")
while True:
try:
cmd = input("Command: ").strip()
if cmd.lower() in ['quit', 'exit', 'خروج']:
print("Marcus shutting down.")
break
if not cmd:
continue
print("Capturing camera frame...")
frame = capture_frame()
print("Qwen2.5-VL thinking...")
t0 = time.time()
action = ask_qwen(frame, cmd)
elapsed = time.time() - t0
print(f"Decision ({elapsed:.1f}s): {action}")
send_to_robot(action)
print()
except KeyboardInterrupt:
print("\nStopped.")
break

View File

@ -1,3 +1,13 @@
FROM qwen2.5vl:7b
PARAMETER num_ctx 1024
PARAMETER num_predict 64
# Optional Ollama Modelfile for building a pre-tuned Marcus VL model.
# Not used at runtime — Marcus loads the stock `qwen2.5vl:3b` tag directly
# via Config/config_Brain.json::ollama_model. Keep this file only if you
# want to build a custom image with `ollama create marcus-vl -f Modelfile`.
#
# Runtime parameters (num_batch, num_ctx, num_predict) are overridden on
# every call by API/llava_api.py, so the PARAMETER lines below are just
# defaults for `ollama run` shell use.
FROM qwen2.5vl:3b
PARAMETER num_ctx 2048
PARAMETER num_predict 120
PARAMETER num_batch 128

View File

@ -23,11 +23,13 @@ Import in marcus_brain.py
Standalone test
---------------
/home/unitree/miniconda3/envs/marcus/bin/python3 ~/Models_marcus/marcus_odometry.py
conda run -n marcus python3 Navigation/marcus_odometry.py
Date : April 2026
"""
import os
import sys
import time
import math
import json
@ -36,24 +38,33 @@ import zmq
# ══════════════════════════════════════════════════════════════════════════════
# CONFIGURATION
# CONFIGURATION — loaded from Config/config_Odometry.json
# ══════════════════════════════════════════════════════════════════════════════
ZMQ_HOST = "127.0.0.1"
ZMQ_PORT = 5556
ROS2_ODOM_TOPIC = "/dog_odom"
ODOM_INTERFACE = "eth0"
_PROJECT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
if _PROJECT_DIR not in sys.path:
sys.path.insert(0, _PROJECT_DIR)
try:
from Core.config_loader import load_config
_cfg = load_config("Odometry")
except Exception:
_cfg = {}
ZMQ_HOST = str(_cfg.get("zmq_host", "127.0.0.1"))
ZMQ_PORT = int(_cfg.get("zmq_port", 5556))
ROS2_ODOM_TOPIC = str(_cfg.get("ros2_odom_topic", "/dog_odom"))
ODOM_INTERFACE = str(_cfg.get("odom_interface", "eth0"))
# Movement defaults
DEFAULT_WALK_SPEED = 0.25 # m/s — slower = more accurate
DEFAULT_TURN_SPEED = 0.25 # rad/s
DIST_TOLERANCE = 0.05 # meters — stop within 5cm
ANGLE_TOLERANCE = 2.0 # degrees — stop within 2°
SAFETY_TIMEOUT_MULT = 3.0 # timeout = (distance/speed) × this
ODOM_STALE_WARN = 1.0 # warn if odom not updated for this many seconds
DEFAULT_WALK_SPEED = float(_cfg.get("default_walk_speed", 0.25)) # m/s — slower = more accurate
DEFAULT_TURN_SPEED = float(_cfg.get("default_turn_speed", 0.25)) # rad/s
DIST_TOLERANCE = float(_cfg.get("dist_tolerance", 0.05)) # meters — stop within 5cm
ANGLE_TOLERANCE = float(_cfg.get("angle_tolerance", 2.0)) # degrees — stop within 2°
SAFETY_TIMEOUT_MULT = float(_cfg.get("safety_timeout_mult", 3.0)) # timeout = (distance/speed) × this
ODOM_STALE_WARN = 1.0 # warn if odom not updated for this many seconds (internal only)
# Dead reckoning
DR_UPDATE_HZ = 20 # integration rate
DR_UPDATE_HZ = int(_cfg.get("dr_update_hz", 20)) # integration rate
# ══════════════════════════════════════════════════════════════════════════════

237
README.md Normal file
View File

@ -0,0 +1,237 @@
# Marcus — Humanoid Robot AI Base
**Project:** Marcus | **Persona:** Sanad | **Organisation:** YS Lootah Technology, Dubai
A compact, offline-first AI base for the **Unitree G1 EDU** humanoid, running on a
**Jetson Orin NX 16 GB**. The codebase is intentionally generic — the same brain
drives both **housekeeping** and **AI tour-guide** robot deployments just by
changing prompts, wake words and which subsystems are enabled.
```
run_marcus.py ← terminal entrypoint (keyboard + voice)
Server/marcus_server.py ← same brain over WebSocket for a remote client
```
---
## What the robot is made of
Humanoid robot control ≠ one giant model. It's a **mesh of specialised models
and services**, each responsible for one part of the body, stitched together by
a Python brain.
| Body part | Purpose | Model / service | Where it runs |
|---|---|---|---|
| **Brain** (reason, speak, decide) | Parse commands, reason about vision, pick actions | **Qwen2.5-VL 3B** via Ollama | Jetson GPU |
| **Eyes** (see) | Real-time object/person detection | **YOLOv8m** (CUDA, FP16, 320 px, ~22 FPS) | Jetson GPU |
| **Eyes** (understand) | Open-ended scene understanding, reading, goal-verify | **Qwen2.5-VL** (same brain model) | Jetson GPU |
| **Ears** (hear) | Always-on wake-word + command transcription | **Whisper tiny** (wake) + **Whisper small** (STT) | Jetson CPU/GPU |
| **Mouth** (speak) | On-robot TTS, no internet needed | **Unitree `TtsMaker`** (G1 firmware) | G1 body speaker |
| **Legs** (walk) | 29-DoF locomotion + balance | **Holosoma** RL policy (separate process, ONNX) | Jetson CPU |
| **Hands** (gesture) | Arm & hand actions | **GR00T N1.5** — pending; `API/arm_api.py` is a stub today | Jetson GPU (future) |
| **Inner ear** (map) | SLAM, obstacle detection, localisation | **Livox Mid-360** LiDAR + custom SLAM engine | Jetson (subprocess) |
| **Memory** | Places, session history, facts | JSON files under `Data/Brain/Sessions/` | Jetson disk |
Nothing here reaches the cloud. The only internet-adjacent bits (edge-tts,
Gemini) were removed — everything runs on the robot's own compute.
---
## How it hears, sees, speaks
```
Inputs ─────────────────────────────── Outputs
Voice ─┐ ┌─► Speech (G1 speaker)
│ │
Text ──┼──► Brain (Qwen2.5-VL) ──────────────┤
│ │ │
Camera ─┘ ▼ ├─► Legs (Holosoma → G1)
├─► YOLO (fast class check) │
├─► LiDAR (obstacles / pose) └─► Arms/hands (stub → GR00T)
└─► Memory (places / history)
```
Three input modalities, same command loop:
- **Voice** — say "**Sanad, what do you see?**" → wake word fires, Whisper transcribes, brain answers through the G1 speaker.
- **Text** — type the same command into `run_marcus.py`'s terminal.
- **WebSocket (remote)**`Client/marcus_cli.py` or `Client/marcus_client.py` (Tkinter GUI) send commands from a workstation.
All three feed the same `Brain.marcus_brain.process_command(cmd)` function.
---
## Two example deployments from the same codebase
### Housekeeping robot
Set up for indoor chores and presence awareness.
- **Prompts** tuned for *"empty the bin, close the window, check the bathroom, remind me at 6 pm"* intents.
- **Places** memory pre-loaded with named rooms (`kitchen`, `living room`, `hallway`).
- **Patrol mode** runs safety loops looking for hazards / unsafe PPE.
- **Autonomous mode** (`auto on`) explores the space, maps it, logs observations.
- YOLO classes: `person, chair, couch, bed, dining table, bottle, cup, laptop, keyboard, mouse, backpack, handbag, suitcase` (the defaults).
### AI tour-guide robot
Same hardware, different prompts + wake word.
- **Prompts** rewrite: *"You are a museum guide. When a visitor asks about an exhibit, describe it in two sentences and invite them to ask follow-ups."*
- **Places** memory pre-loaded with exhibit waypoints; `patrol: exhibit_A → exhibit_B → exit` follows a tour.
- Wake word changed in `config_Voice.json::stt.wake_words_en`.
- Image search (`search/ photo_of_exhibit.jpg`) lets visitors hold up a printed map; the robot navigates to the matching location.
- YOLO classes trimmed to people-only if the venue doesn't need object safety.
**What you change to switch use cases:**
1. `Config/marcus_prompts.yaml` — persona + task descriptions
2. `Config/config_Voice.json::stt.wake_words_en` — the name people call the robot
3. `Config/config_Vision.json::tracked_classes` — relevant object set
4. `Config/config_Brain.json::subsystems.{lidar,voice,imgsearch,autonomous}` — enable what you need
5. Data under `Data/History/Places/places.json` — learned locations
No code changes required for either deployment.
---
## Layer architecture
```
run_marcus.py / Server/marcus_server.py ← entrypoints
Brain/ (marcus_brain, command_parser, executor, memory)
│ imports only from ↓
API/ (one file per subsystem — stable public surface)
│ wraps ↓
┌───────┴────────┬──────────────┬────────────┐
▼ ▼ ▼ ▼
Vision/ Navigation/ Voice/ Lidar/
YOLO, imgsearch goal_nav, builtin_mic, SLAM engine
patrol, odom builtin_tts, (subprocess)
marcus_voice
Core/ (env, config, log_backend, logger)
Config/ + .env
```
**Rule:** Brain talks to subsystems only via `API/*`. You can replace YOLO with
any detector, swap Qwen for another VL model, or plug in a different TTS —
without touching Brain code — by implementing the same API surface.
---
## Quick start (Jetson, after `conda activate marcus`)
```bash
# 1) Launch Holosoma (locomotion) in hsinference env
source ~/.holosoma_deps/miniconda3/bin/activate hsinference
cd ~/holosoma && python3 src/holosoma_inference/.../run_policy.py ...
# 2) Start Ollama
ollama serve > /tmp/ollama.log 2>&1 &
sleep 3
# 3) Start Marcus
conda activate marcus
cd ~/Marcus
python3 run_marcus.py
```
You should see:
```
[YOLO] Model loaded ✅ | device: cuda (Orin) | FP16 | 19 tracked classes
================================================
SANAD AI BRAIN — READY
================================================
model : qwen2.5vl:3b
yolo : True voice : True
odometry : True memory : True
lidar : True camera : 424x240@15
```
Say **"Sanad"** to wake, or type at the `Command:` prompt.
See `Doc/controlling.md` for the full command reference, `Doc/environment.md`
for the Jetson install recipe, and `Doc/pipeline.md` for the end-to-end
dataflow diagrams.
---
## Hardware target
| Component | Model |
|---|---|
| Humanoid | Unitree G1 EDU, 29 DoF |
| Compute | Jetson Orin NX 16 GB (Ampere iGPU, FP16 tensor cores, capability 8.7) |
| Software stack | JetPack 5.1.1 / CUDA 11.4 / cuDNN 8.6 / Python 3.8 / torch 2.1.0-nv23.06 / ultralytics 8.4.21 / Ollama 0.20.0 |
| Camera | Intel RealSense D435 (424×240 @ 15 fps) |
| LiDAR | Livox Mid-360 |
| Microphone | G1 on-board array (UDP multicast, no external USB mic) |
| Speaker | G1 body speaker (via Unitree RPC) |
---
## Repository layout (top-level)
```
Marcus/
├── run_marcus.py entrypoint — terminal mode
├── README.md this file
├── Core/ foundation — config + env + logging
├── Config/ 12 JSON files + marcus_prompts.yaml
├── API/ subsystem wrappers (stable public surface)
├── Brain/ orchestrator, parser, executor, memory
├── Vision/ YOLO + image-guided search
├── Navigation/ goal nav, patrol, odometry
├── Voice/ built-in mic, built-in TTS, Whisper loop
├── Autonomous/ exploration state machine
├── Lidar/ SLAM engine (subprocess)
├── Server/ WebSocket interface
├── Client/ terminal CLI + Tkinter GUI
├── Bridge/ optional ROS2 ↔ ZMQ bridge (standalone tool)
├── Models/ yolov8m.pt + optional Ollama Modelfile
├── Data/ runtime-generated sessions / places / maps
├── logs/ rotating per-module log files (5 MB × 3)
└── Doc/ architecture, API, environment, pipeline,
controlling, functions — all current
```
---
## Docs
- `Doc/architecture.md` — project structure + layer-by-layer breakdown
- `Doc/controlling.md` — startup sequence + command reference
- `Doc/environment.md` — verified Jetson software stack + install recipe
- `Doc/pipeline.md` — boot, voice, vision, movement, LiDAR dataflow
- `Doc/functions.md` — every callable in the codebase (AST-generated)
- `Doc/MARCUS_API.md` — developer API reference with JSON schemas
---
## Design principles
1. **Offline-first.** No cloud dependency in the default path. Internet can be
wired in for specific backends (e.g. future edge-tts) but it's opt-in.
2. **GPU mandatory.** YOLO refuses to start on CPU — Marcus is a safety-critical
robot, silently downgrading to 2 FPS vision is worse than failing loudly.
3. **Swappable subsystems.** Each API file can be reimplemented behind the same
public functions. Replace YOLO with DETR, Qwen with LLaVA, TtsMaker with
Piper — Brain never notices.
4. **Config over code.** Tunables live in `Config/*.json` / `.yaml`; 156 config
keys are all actively referenced (0 orphans). Change persona, wake word,
enabled subsystems, or thresholds without touching a `.py` file.
5. **English only.** Arabic support was removed because the G1 firmware's TTS
silently maps Arabic to Chinese. If bilingual TTS is ever needed again,
see `git log` for the removed Piper / edge-tts paths.
---
*Marcus — YS Lootah Technology | Dubai*

View File

@ -31,7 +31,7 @@ Usage in marcus_brain.py
Standalone test
---------------
python3 ~/Models_marcus/marcus_imgsearch.py --image /path/to/photo.jpg
python3 Vision/marcus_imgsearch.py --image /path/to/photo.jpg
Date : April 2026
"""
@ -39,10 +39,11 @@ Date : April 2026
import base64
import io
import json
import time
import threading
import os
import re
import sys
import threading
import time
from pathlib import Path
import numpy as np
@ -55,17 +56,23 @@ except ImportError:
# ══════════════════════════════════════════════════════════════════════════════
# CONFIGURATION
# CONFIGURATION (loaded from Config/config_ImageSearch.json)
# ══════════════════════════════════════════════════════════════════════════════
DEFAULT_MAX_STEPS = 60 # max rotation steps before giving up
STEP_DELAY = 0.15 # min gap between YOLO checks (was 0.4 — reduced
# because the rotation thread paces motion already
# and each LLaVA call is 600-1500 ms of real work)
ROTATE_SPEED = 0.25 # rad/s rotation speed during search
MIN_STEPS_WARMUP = 3 # skip first N steps (stale frame)
MATCH_CONFIDENCE_THR = 0.6 # LLaVA confidence threshold (not used directly,
# but kept for future scoring)
_PROJECT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
if _PROJECT_DIR not in sys.path:
sys.path.insert(0, _PROJECT_DIR)
try:
from Core.config_loader import load_config
_cfg = load_config("ImageSearch")
except Exception:
_cfg = {}
DEFAULT_MAX_STEPS = int(_cfg.get("default_max_steps", 60)) # rotation steps before giving up
STEP_DELAY = float(_cfg.get("step_delay_s", 0.15)) # min gap between YOLO checks
ROTATE_SPEED = float(_cfg.get("rotate_speed", 0.25)) # rad/s during search
MIN_STEPS_WARMUP = int(_cfg.get("min_steps_warmup", 3)) # skip first N steps (stale frame)
MATCH_CONFIDENCE_THR = 0.6 # LLaVA confidence threshold (reserved for future scoring)
# ══════════════════════════════════════════════════════════════════════════════

View File

@ -9,7 +9,7 @@ Usage (imported):
from marcus_yolo import start_yolo, yolo_sees, yolo_count, yolo_closest, yolo_summary
Usage (standalone):
/home/unitree/miniconda3/envs/marcus/bin/python3 ~/Models_marcus/marcus_yolo.py
conda run -n marcus python3 Vision/marcus_yolo.py
"""
import os
@ -360,8 +360,13 @@ def _camera_loop(raw_frame_ref, frame_lock, cam_alive):
raw_frame_ref[0] = frame.copy()
except Exception as e:
print(f"Camera: {e} — reconnecting...")
try: pipeline.stop()
except: pass
# pipeline may already be stopped or never started; swallow only
# the expected RealSense "pipeline not started" error, not every
# possible failure mode.
try:
pipeline.stop()
except RuntimeError:
pass
time.sleep(2.0)

View File

@ -24,18 +24,31 @@ Ported from Project/Sanad/voice/audio_io.py (Sanad's production implementation).
from __future__ import annotations
import os
import socket
import struct
import subprocess
import sys
import threading
import time
from typing import Optional
# Load defaults from Config/config_Voice.json::mic_udp so they can be tuned
# without editing code. Falls back to the hardcoded literals below if the
# config isn't reachable (e.g., when imported from a test harness).
_PROJECT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
if _PROJECT_DIR not in sys.path:
sys.path.insert(0, _PROJECT_DIR)
try:
from Core.config_loader import load_config
_mic_udp = (load_config("Voice") or {}).get("mic_udp", {}) or {}
except Exception:
_mic_udp = {}
DEFAULT_GROUP = "239.168.123.161"
DEFAULT_PORT = 5555
DEFAULT_BUF_MAX = 64_000 # ~2 s of 16 kHz mono int16
DEFAULT_READ_TIMEOUT = 0.04 # 40 ms budget per read_chunk call
DEFAULT_GROUP = str(_mic_udp.get("group", "239.168.123.161"))
DEFAULT_PORT = int(_mic_udp.get("port", 5555))
DEFAULT_BUF_MAX = int(_mic_udp.get("buffer_max_bytes", 64_000)) # ~2 s of 16 kHz mono int16
DEFAULT_READ_TIMEOUT = float(_mic_udp.get("read_timeout_sec", 0.04)) # budget per read_chunk call
SAMPLE_RATE = 16_000 # hardware rate — do not change

View File

@ -25,6 +25,8 @@ import os
import sys
import threading
import time
from logging.handlers import RotatingFileHandler
import numpy as np
# ─── PATH + CONFIG ───────────────────────────────────────
@ -38,12 +40,17 @@ from Core.config_loader import load_config
LOG_DIR = os.path.join(PROJECT_ROOT, "logs")
os.makedirs(LOG_DIR, exist_ok=True)
# Idempotent — only the first call per process installs handlers.
# basicConfig is idempotent. Whichever of audio_api / marcus_voice imports
# first installs the rotating handler; the other no-ops. Both loggers then
# share the same file handle with stdlib's per-handler thread lock.
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(name)s] %(levelname)s: %(message)s",
handlers=[
logging.FileHandler(os.path.join(LOG_DIR, "voice.log")),
RotatingFileHandler(
os.path.join(LOG_DIR, "voice.log"),
maxBytes=5_000_000, backupCount=3, encoding="utf-8",
),
logging.StreamHandler(),
],
)