Initial project commit

This commit is contained in:
kassam 2026-04-12 18:50:22 +04:00
commit 8491be7f1e
189 changed files with 21351 additions and 0 deletions

2
.env Normal file
View File

@ -0,0 +1,2 @@
PROJECT_BASE=/home/unitree
PROJECT_NAME=Marcus

4
.gitignore vendored Normal file
View File

@ -0,0 +1,4 @@
__pycache__/
*.pyc
Logs/
*.log

0
API/__init__.py Normal file
View File

16
API/arm_api.py Normal file
View File

@ -0,0 +1,16 @@
"""
arm_api.py Arm gesture control (GR00T N1.5 pending)
"""
from Core.config_loader import load_config
_cfg = load_config("Arm")
ARM_AVAILABLE = _cfg["arm_available"]
ARM_ACTIONS = _cfg["actions"]
ARM_ALIASES = {k: v for k, v in _cfg["aliases"].items()}
ALL_ARM_NAMES = set(ARM_ALIASES.keys()) | set(ARM_ACTIONS.keys())
def do_arm(action):
"""Arm action stub — GR00T N1.5 integration pending."""
print(f" [Arm] GR00T not yet integrated — skipping: {action}")

419
API/audio_api.py Normal file
View File

@ -0,0 +1,419 @@
#!/usr/bin/env python3
"""
API/audio_api.py Marcus Audio API Layer
==========================================
Provides speak() and record_audio() for the Brain layer.
Brain imports ONLY from this API never from unitree SDK directly.
Speaker: _CallRequestWithParamAndBin (single call, full buffer)
Mic: parec -d 3 (Hollyland wireless, PulseAudio source index from config)
TTS EN: Unitree built-in TtsMaker
TTS AR: Piper ar_JO-kareem-medium resample G1 speaker
Usage:
from API.audio_api import AudioAPI
audio = AudioAPI()
audio.speak("Hello", "en")
audio.speak("مرحبا", "ar")
recording = audio.record(seconds=5)
audio.play_pcm(recording)
"""
import json
import logging
import os
import subprocess
import threading
import time
import wave
import numpy as np
# ─── PATH CONFIG ─────────────────────────────────────────
from dotenv import load_dotenv
load_dotenv()
BASE_PATH = os.environ.get("PROJECT_BASE", "/home/unitree")
PROJECT_NAME = "Marcus"
PROJECT_ROOT = os.path.join(BASE_PATH, PROJECT_NAME)
LOG_DIR = os.path.join(PROJECT_ROOT, "logs")
os.makedirs(LOG_DIR, exist_ok=True)
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(name)s] %(levelname)s: %(message)s",
handlers=[
logging.FileHandler(os.path.join(LOG_DIR, "voice.log")),
logging.StreamHandler(),
],
)
log = logging.getLogger("audio_api")
def load_config(name: str) -> dict:
path = os.path.join(PROJECT_ROOT, "Config", f"config_{name}.json")
with open(path, "r") as f:
return json.load(f)
# ─── AUDIO API CLASS ─────────────────────────────────────
class AudioAPI:
"""Marcus audio interface — speak + record + play."""
def __init__(self):
self._config = load_config("Voice")
self._client = None
self._sdk_available = False
self._init_sdk()
# Config shortcuts
self._tts = self._config["tts"]
self._mic = self._config["mic"]
self._spk = self._config["speaker"]
self._target_rate = self._tts["target_sample_rate"]
# Data dir
data_dir = os.path.join(PROJECT_ROOT, self._config["audio"]["data_dir"])
os.makedirs(data_dir, exist_ok=True)
self._data_dir = data_dir
# Speaking lock — prevents mic from hearing TTS output
self._speaking = False
self._speak_lock = threading.Lock()
log.info(self._config["messages"]["ready"])
def _init_sdk(self):
"""Initialize Unitree AudioClient."""
try:
from unitree_sdk2py.core.channel import ChannelFactoryInitialize
from unitree_sdk2py.g1.audio.g1_audio_client import AudioClient
dds_iface = self._config["speaker"]["dds_interface"]
ChannelFactoryInitialize(0, dds_iface)
self._client = AudioClient()
self._client.SetTimeout(10.0)
self._client.Init()
self._client.SetVolume(self._config["speaker"]["volume"])
self._sdk_available = True
log.info("AudioClient initialized on %s", dds_iface)
except Exception as e:
log.error("AudioClient init failed: %s", e)
self._sdk_available = False
# ─── SPEAK ────────────────────────────────────────────
def speak(self, text: str, lang: str = "auto"):
"""
Speak text in the given language.
Mutes mic during playback to prevent self-listening.
lang="en" built-in TtsMaker
lang="ar" Piper resample G1 speaker
lang="auto" detect from text
"""
if lang == "auto":
lang = self._detect_lang(text)
log.info("[%s] speak: %s", lang.upper(), text[:80])
with self._speak_lock:
self._speaking = True
self._mute_mic()
try:
if lang == "en":
self._speak_english(text)
elif lang == "ar":
self._speak_arabic(text)
else:
log.warning("Unknown lang '%s', falling back to English", lang)
self._speak_english(text)
except Exception as e:
log.error("%s: %s", self._config["messages"]["error_tts"], e)
finally:
# Small delay so speaker fully stops before mic reopens
time.sleep(0.3)
self._unmute_mic()
self._speaking = False
def _mute_mic(self):
"""Mute the wireless mic to prevent self-listening."""
source = self._mic["source_index"]
subprocess.run(
["pactl", "set-source-mute", source, "1"],
capture_output=True,
)
log.debug("Mic muted")
def _unmute_mic(self):
"""Unmute the wireless mic."""
source = self._mic["source_index"]
subprocess.run(
["pactl", "set-source-mute", source, "0"],
capture_output=True,
)
log.debug("Mic unmuted")
@property
def is_speaking(self) -> bool:
"""True while TTS is playing — voice module checks this."""
return self._speaking
def _speak_english(self, text: str):
"""English TTS via edge-tts."""
self._speak_edge_tts(text, "en")
def _speak_arabic(self, text: str):
"""Arabic TTS via edge-tts."""
self._speak_edge_tts(text, "ar")
def speak_piper_en(self, text: str):
"""Alternative: English via Piper instead of built-in."""
voice = self._tts["piper_voice_en"]
audio, rate = self._piper_synthesize(text, voice)
audio_16k = self._resample(audio, rate)
self._play_pcm(audio_16k)
# ─── PIPER TTS ────────────────────────────────────────
def _piper_synthesize(self, text: str, voice: str) -> tuple:
"""Run Piper CLI, return (audio_int16, sample_rate)."""
cmd = ["piper", "--model", voice, "--output_raw"]
timeout = self._tts["piper_timeout_sec"]
proc = subprocess.run(
cmd,
input=text.encode("utf-8"),
capture_output=True,
timeout=timeout,
)
if proc.returncode != 0:
stderr = proc.stderr.decode()[:300]
raise RuntimeError(f"Piper failed: {stderr}")
audio = np.frombuffer(proc.stdout, dtype=np.int16)
piper_rate = self._tts["piper_sample_rate"]
log.info("Piper: %d samples @ %dHz (%.1fs)", len(audio), piper_rate, len(audio) / piper_rate)
return audio, piper_rate
# ─── RESAMPLE ─────────────────────────────────────────
def _speak_edge_tts(self, text: str, lang: str):
"""Generate speech via edge-tts and play on G1."""
import os as _os
voice = "ar-AE-HamdanNeural" if lang == "ar" else "en-US-GuyNeural"
ts = int(time.time() * 1000)
mp3_path = f"/tmp/edge_{lang}_{ts}.mp3"
wav_path = f"/tmp/edge_{lang}_{ts}.wav"
safe_text = text.replace('"', '\\"')
code = f'import edge_tts, asyncio; asyncio.run(edge_tts.Communicate(\"{safe_text}\", voice=\"{voice}\").save(\"{mp3_path}\"))'
result = subprocess.run(["python3", "-c", code], capture_output=True, text=True, timeout=30)
if result.returncode != 0:
log.error("edge-tts failed: %s", result.stderr[:200])
if lang == "en" and self._sdk_available:
self._client.TtsMaker(text, self._tts.get("builtin_speaker_id", 1))
time.sleep(max(2.0, len(text) * 0.06))
return
try:
from pydub import AudioSegment
a = AudioSegment.from_mp3(mp3_path)
a = a.set_frame_rate(16000).set_channels(1).set_sample_width(2)
a.export(wav_path, format="wav")
import wave
with wave.open(wav_path, "rb") as wf:
audio = np.frombuffer(wf.readframes(wf.getnframes()), dtype=np.int16)
_os.unlink(mp3_path)
_os.unlink(wav_path)
self._play_pcm(audio)
except Exception as e:
log.error("edge-tts conversion error: %s", e)
try: _os.unlink(mp3_path)
except: pass
try: _os.unlink(wav_path)
except: pass
def _resample(self, audio: np.ndarray, src_rate: int) -> np.ndarray:
"""Resample to target rate (16kHz)."""
if src_rate == self._target_rate:
return audio
tl = int(len(audio) * self._target_rate / src_rate)
return np.interp(
np.linspace(0, len(audio), tl, endpoint=False),
np.arange(len(audio)),
audio.astype(np.float64),
).astype(np.int16)
# ─── G1 SPEAKER PLAYBACK ─────────────────────────────
def _play_pcm(self, audio_16k: np.ndarray) -> float:
"""Play 16kHz mono int16 on G1 speaker. Returns duration."""
if not self._sdk_available:
log.warning("SDK not available, cannot play audio")
return 0.0
from unitree_sdk2py.g1.audio.g1_audio_api import (
ROBOT_API_ID_AUDIO_START_PLAY,
ROBOT_API_ID_AUDIO_STOP_PLAY,
)
app_name = self._spk["app_name"]
# Stop previous stream
self._client._Call(
ROBOT_API_ID_AUDIO_STOP_PLAY,
json.dumps({"app_name": app_name}),
)
time.sleep(0.3)
# Build params — unique stream_id every call
pcm = audio_16k.tobytes()
sid = f"s_{int(time.time() * 1000)}"
param = json.dumps({
"app_name": app_name,
"stream_id": sid,
"sample_rate": self._target_rate,
"channels": 1,
"bits_per_sample": 16,
})
# Single call — full buffer
self._client._CallRequestWithParamAndBin(
ROBOT_API_ID_AUDIO_START_PLAY, param, list(pcm)
)
duration = len(audio_16k) / self._target_rate
time.sleep(duration + 0.5)
self._client._Call(
ROBOT_API_ID_AUDIO_STOP_PLAY,
json.dumps({"app_name": app_name}),
)
log.info("Played: %.1fs", duration)
return duration
def play_pcm(self, audio_16k: np.ndarray) -> float:
"""Public wrapper for playing PCM audio."""
return self._play_pcm(audio_16k)
# ─── MIC RECORDING ───────────────────────────────────
def record(self, seconds: float = 5.0) -> np.ndarray:
"""Record from Hollyland wireless mic via parec. Returns int16 array."""
source = self._mic["source_index"]
rate = str(self._mic["rate"])
channels = str(self._mic["channels"])
fmt = self._mic["format"]
# Unmute mic
subprocess.run(
["pactl", "set-source-mute", source, "0"],
capture_output=True,
)
subprocess.run(
["pactl", "set-source-volume", source, "100%"],
capture_output=True,
)
log.info("Recording %.1fs from mic source %s", seconds, source)
proc = subprocess.Popen(
["parec", "-d", source,
f"--format={fmt}", f"--rate={rate}", f"--channels={channels}", "--raw"],
stdout=subprocess.PIPE,
)
time.sleep(seconds)
proc.terminate()
raw = proc.stdout.read()
audio = np.frombuffer(raw, dtype=np.int16)
log.info("Recorded: %d samples, std=%.0f", len(audio), audio.std())
if audio.std() < 50:
log.warning(self._config["messages"]["error_mic"] + " — mic may be silent")
return audio
def save_recording(self, audio: np.ndarray, name: str) -> str:
"""Save recording to Data/Voice/Recordings/."""
path = os.path.join(self._data_dir, f"{name}.wav")
wf = wave.open(path, "wb")
wf.setnchannels(1)
wf.setsampwidth(2)
wf.setframerate(self._target_rate)
wf.writeframes(audio.tobytes())
wf.close()
log.info("Saved: %s", path)
return path
# ─── LANGUAGE DETECTION ───────────────────────────────
@staticmethod
def _detect_lang(text: str) -> str:
"""Detect language from text — Arabic Unicode range check."""
for c in text:
if '\u0600' <= c <= '\u06FF':
return "ar"
return "en"
# ─── STATUS ───────────────────────────────────────────
@property
def is_available(self) -> bool:
return self._sdk_available
# ─── STANDALONE TEST ─────────────────────────────────────
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Marcus Audio API Test")
parser.add_argument("--test", action="store_true", help="Run speak tests")
parser.add_argument("--speak", type=str, help="Speak this text")
parser.add_argument("--lang", default="auto", help="Language: en, ar, auto")
parser.add_argument("--record", type=float, default=0, help="Record N seconds")
args = parser.parse_args()
api = AudioAPI()
if args.test:
print("\n--- English built-in ---")
api.speak("Hello, I am Marcus.", "en")
time.sleep(1)
print("\n--- Arabic Piper ---")
api.speak("مرحبا، أنا ماركوس", "ar")
time.sleep(1)
print("\n--- Auto-detect ---")
api.speak("How are you?")
time.sleep(1)
api.speak("كيف حالك؟")
time.sleep(1)
print("\n--- Record 3s + playback ---")
rec = api.record(3.0)
if rec.std() > 50:
api.play_pcm(rec)
print("\nDone.")
elif args.speak:
api.speak(args.speak, args.lang)
elif args.record > 0:
rec = api.record(args.record)
api.save_recording(rec, f"test_{int(time.time())}")
if rec.std() > 50:
api.play_pcm(rec)
else:
parser.print_help()

111
API/camera_api.py Normal file
View File

@ -0,0 +1,111 @@
"""
camera_api.py RealSense D435I camera thread
"""
import base64
import io
import time
import threading
import numpy as np
from PIL import Image
from Core.config_loader import load_config
from Core.logger import log
_cfg = load_config("Camera")
CAM_WIDTH = _cfg["width"]
CAM_HEIGHT = _cfg["height"]
CAM_FPS = _cfg["fps"]
CAM_QUALITY = _cfg["jpeg_quality"]
# Shared state
latest_frame_b64 = [None]
_raw_frame = [None]
camera_lock = threading.Lock()
_raw_lock = threading.Lock()
camera_alive = [True]
_cam_last_frame_time = [0.0]
_cam_connected = [False]
def get_raw_refs():
"""Return (raw_frame_ref, raw_lock) for YOLO to share."""
return _raw_frame, _raw_lock
def camera_loop():
"""Capture RealSense frames continuously with auto-reconnect."""
import pyrealsense2 as rs
backoff = 2.0
while camera_alive[0]:
pipeline = None
try:
pipeline = rs.pipeline()
cfg = rs.config()
cfg.enable_stream(rs.stream.color, CAM_WIDTH, CAM_HEIGHT, rs.format.bgr8, CAM_FPS)
pipeline.start(cfg)
backoff = 2.0
_cam_connected[0] = True
print("Camera connected")
log(f"Camera connected {CAM_WIDTH}x{CAM_HEIGHT}@{CAM_FPS}", "info", "camera")
while camera_alive[0]:
try:
frames = pipeline.wait_for_frames(timeout_ms=5000)
color_frame = frames.get_color_frame()
if not color_frame:
continue
frame = np.asanyarray(color_frame.get_data())
if frame is None or frame.size == 0:
continue
with _raw_lock:
_raw_frame[0] = frame.copy()
img = Image.fromarray(frame[:, :, ::-1])
buf = io.BytesIO()
img.save(buf, format="JPEG", quality=CAM_QUALITY)
with camera_lock:
latest_frame_b64[0] = base64.b64encode(buf.getvalue()).decode()
_cam_last_frame_time[0] = time.time()
except Exception:
if time.time() - _cam_last_frame_time[0] > 10.0:
print(" [Camera] No frame for 10s — reconnecting...")
break
except Exception as e:
if _cam_connected[0]:
print(f" [Camera] Disconnected ({type(e).__name__}) — retrying in {backoff:.0f}s...")
_cam_connected[0] = False
try:
pipeline.stop()
except Exception:
pass
time.sleep(backoff)
backoff = min(backoff * 2, 10.0)
def start_camera():
"""Start camera thread. Returns (raw_frame_ref, raw_lock)."""
threading.Thread(target=camera_loop, daemon=True).start()
time.sleep(3.0)
return _raw_frame, _raw_lock
def stop_camera():
"""Stop camera thread."""
camera_alive[0] = False
def get_frame():
"""Return latest base64 JPEG frame for LLaVA. None if not ready."""
with camera_lock:
return latest_frame_b64[0]
def get_frame_age() -> float:
"""Return seconds since last camera frame."""
return time.time() - _cam_last_frame_time[0] if _cam_last_frame_time[0] > 0 else 999.0

38
API/imgsearch_api.py Normal file
View File

@ -0,0 +1,38 @@
"""
imgsearch_api.py Image-guided search interface
"""
import sys
import os
from Core.env_loader import PROJECT_ROOT
_img_searcher = None
def init_imgsearch(get_frame_fn, send_vel_fn, gradual_stop_fn,
llava_fn, yolo_sees_fn, model):
"""Wire up the ImageSearch module."""
global _img_searcher
vision_dir = os.path.join(PROJECT_ROOT, "Vision")
if vision_dir not in sys.path:
sys.path.insert(0, vision_dir)
try:
from marcus_imgsearch import ImageSearch
_img_searcher = ImageSearch(
get_frame_fn=get_frame_fn,
send_vel_fn=send_vel_fn,
gradual_stop_fn=gradual_stop_fn,
llava_fn=llava_fn,
yolo_sees_fn=yolo_sees_fn,
model=model,
)
print("Image search ready")
except ImportError:
print("marcus_imgsearch.py not found — image search disabled")
except Exception as e:
print(f"Image search error: {e}")
def get_searcher():
return _img_searcher

282
API/lidar_api.py Normal file
View File

@ -0,0 +1,282 @@
"""
lidar_api.py Livox Mid-360 LiDAR interface via SlamEngineClient
==================================================================
Background thread polls SLAM data_q at 20Hz.
Exposes obstacle_ahead(), get_slam_pose(), get_nav_cmd(), get_loc_state().
The SLAM subprocess handles all point cloud processing, costmap building,
and safety evaluation internally. We just read the results.
"""
import math
import os
import sys
import threading
import time
import numpy as np
from Core.config_loader import load_config, config_path
from Core.logger import log
_cfg = load_config("LiDAR")
LIDAR_AVAILABLE = False
# ── Shared state (thread-safe) ───────────────────────────────────────────────
_lock = threading.Lock()
_latest_safety = [{}]
_latest_pose = [None] # 4x4 numpy or None
_latest_nav = [{}]
_latest_loc_state = ["IDLE"]
_latest_mode = ["IDLE"]
_latest_perf = [{}]
_lidar_alive = [False]
_client = [None]
_last_frame_time = [0.0]
# ══════════════════════════════════════════════════════════════════════════════
# INIT
# ══════════════════════════════════════════════════════════════════════════════
def init_lidar():
"""
Start SLAM subprocess + poll thread.
Returns True if LiDAR connected successfully.
"""
global LIDAR_AVAILABLE
if not _cfg.get("enabled", True):
print(" [LiDAR] Disabled in config")
return False
slam_dir = config_path(_cfg.get("slam_source_dir", "Lidar"))
if slam_dir not in sys.path:
sys.path.insert(0, slam_dir)
try:
from SLAM_engine import SlamEngineClient
except ImportError as e:
print(f" [LiDAR] SlamEngineClient not found: {e}")
log(f"LiDAR import failed: {e}", "error", "lidar")
return False
try:
client = SlamEngineClient()
_client[0] = client
print(" [LiDAR] SLAM engine created")
log("SLAM engine created", "info", "lidar")
except Exception as e:
print(f" [LiDAR] SLAM init error: {e}")
log(f"SLAM init error: {e}", "error", "lidar")
return False
# Start subprocess + connect to hardware
if _cfg.get("auto_connect", True):
try:
client.start_process()
client.connect()
print(" [LiDAR] Connected to Livox Mid-360")
log("Connected to LiDAR", "info", "lidar")
if _cfg.get("start_localize_only", False):
client.start_localize_only()
print(" [LiDAR] Localize-only mode")
elif _cfg.get("auto_start_mapping", False):
client.start_mapping()
print(" [LiDAR] Mapping started")
except Exception as e:
print(f" [LiDAR] Connect error: {e}")
log(f"LiDAR connect error: {e}", "error", "lidar")
return False
# Start poll thread
_lidar_alive[0] = True
threading.Thread(target=_poll_loop, daemon=True, name="lidar-poll").start()
# Wait for first FRAME (up to 5s)
deadline = time.time() + 5.0
while time.time() < deadline:
if _last_frame_time[0] > 0:
break
time.sleep(0.1)
if _last_frame_time[0] > 0:
LIDAR_AVAILABLE = True
print(" [LiDAR] Receiving data")
log("LiDAR available — receiving frames", "info", "lidar")
else:
print(" [LiDAR] No data yet (will keep trying)")
LIDAR_AVAILABLE = True # still mark available — data may arrive later
return True
# ══════════════════════════════════════════════════════════════════════════════
# POLL LOOP
# ══════════════════════════════════════════════════════════════════════════════
def _poll_loop():
"""Drain data_q, keep latest FRAME payload, update shared state."""
client = _client[0]
if client is None:
return
poll_interval = 1.0 / max(1, _cfg.get("poll_hz", 20))
while _lidar_alive[0]:
try:
# Drain data_q — keep only the latest FRAME
payload = None
while True:
try:
item = client.data_q.get_nowait()
if isinstance(item, tuple) and len(item) >= 2 and item[0] == "FRAME":
payload = item[1]
except Exception:
break
if payload is not None:
_update_state(payload)
# Drain status_q (just log, don't block)
while True:
try:
msg = client.status_q.get_nowait()
if isinstance(msg, tuple) and len(msg) >= 2:
level, text = str(msg[0]), str(msg[1])
if level == "ERROR":
log(f"SLAM: {text}", "error", "lidar")
elif level == "WARN":
log(f"SLAM: {text}", "warning", "lidar")
except Exception:
break
except Exception as e:
log(f"LiDAR poll error: {e}", "error", "lidar")
time.sleep(poll_interval)
def _update_state(payload):
"""Extract safety/pose/nav from FRAME payload into shared state."""
with _lock:
_latest_safety[0] = payload.get("safety", {})
_latest_pose[0] = payload.get("pose")
_latest_nav[0] = payload.get("nav", {})
_latest_loc_state[0] = str(payload.get("loc_state", "IDLE"))
_latest_mode[0] = str(payload.get("mode", "IDLE"))
_latest_perf[0] = payload.get("perf", {})
_last_frame_time[0] = time.time()
# ══════════════════════════════════════════════════════════════════════════════
# PUBLIC API
# ══════════════════════════════════════════════════════════════════════════════
def obstacle_ahead(radius=None):
"""
Returns True if the SLAM safety system detected an obstacle.
Checks safety.emergency OR safety.hold from the latest FRAME.
This uses the SLAM's own costmap + SafetySupervisor — 0.50m collision zone,
localization loss detection, and 0.8s emergency hold.
Safe to call even when LiDAR is not available (returns False).
"""
if not LIDAR_AVAILABLE or _last_frame_time[0] == 0:
return False
with _lock:
safety = _latest_safety[0]
if not safety:
return False
return bool(safety.get("emergency", False)) or bool(safety.get("hold", False))
def get_slam_pose():
"""
Returns (x, y, heading_deg) from the SLAM 4x4 pose matrix.
Returns None if no pose available.
More accurate than dead reckoning (±2cm vs ±10cm).
"""
with _lock:
pose = _latest_pose[0]
if pose is None:
return None
pose = np.asarray(pose, dtype=np.float64)
if pose.shape != (4, 4):
return None
x = float(pose[0, 3])
y = float(pose[1, 3])
yaw = math.degrees(math.atan2(float(pose[1, 0]), float(pose[0, 0])))
return {"x": round(x, 4), "y": round(y, 4), "heading": round(yaw, 2), "source": "slam"}
def get_nav_cmd():
"""
Returns the SLAM path planner's recommended velocity command.
{"linear_mps": float, "angular_rps": float, "blocked": bool, "goal_reached": bool}
Returns None if no nav data.
"""
with _lock:
nav = _latest_nav[0]
cmd = nav.get("cmd") if nav else None
if not cmd or not isinstance(cmd, dict):
return None
return cmd
def get_loc_state():
"""Returns 'TRACKING', 'DEGRADED', 'LOST', or 'RECOVERY'."""
with _lock:
return _latest_loc_state[0]
def get_safety_reasons():
"""Returns list of current safety trigger reasons, or empty list."""
with _lock:
safety = _latest_safety[0]
return safety.get("reasons", []) if safety else []
def get_lidar_status():
"""Full status dict for diagnostics."""
with _lock:
return {
"available": LIDAR_AVAILABLE,
"mode": _latest_mode[0],
"loc_state": _latest_loc_state[0],
"safety": dict(_latest_safety[0]) if _latest_safety[0] else {},
"perf": dict(_latest_perf[0]) if _latest_perf[0] else {},
"last_frame_age": round(time.time() - _last_frame_time[0], 1) if _last_frame_time[0] > 0 else -1,
"pose": get_slam_pose(),
}
def get_client():
"""Return the SlamEngineClient instance (for direct commands)."""
return _client[0]
def stop_lidar():
"""Clean shutdown."""
global LIDAR_AVAILABLE
_lidar_alive[0] = False
LIDAR_AVAILABLE = False
client = _client[0]
if client is not None:
try:
client.stop_process()
except Exception:
pass
log("LiDAR stopped", "info", "lidar")
print(" [LiDAR] Stopped")

157
API/llava_api.py Normal file
View File

@ -0,0 +1,157 @@
"""
llava_api.py LLaVA / Qwen VL query interface
"""
import json
import ollama
import yaml
from pathlib import Path
from Core.config_loader import load_config
_cfg = load_config("Brain")
# Load prompts from YAML (the authoritative source — bilingual, complete)
_yaml_path = Path(__file__).resolve().parent.parent / "Config" / "marcus_prompts.yaml"
with open(_yaml_path, encoding="utf-8") as _f:
_prompts = yaml.safe_load(_f)
OLLAMA_MODEL = _cfg["ollama_model"]
MAX_HISTORY = _cfg["max_history"]
# Cap batch and context on every request. Without this, llama.cpp on Jetson
# Orin NX allocates a ~7.5 GiB compute graph (defaults: batch 512, ctx 4096)
# that SIGKILLs the runner when Marcus already holds ~2 GiB of unified memory
# for YOLO/camera/audio. Halving batch roughly quarters the compute graph.
NUM_BATCH = _cfg.get("num_batch", 128)
NUM_CTX = _cfg.get("num_ctx", 2048)
MAIN_PROMPT = _prompts["main_prompt"]
GOAL_PROMPT = _prompts["goal_prompt"]
PATROL_PROMPT = _prompts["patrol_prompt"]
TALK_PROMPT = _prompts["talk_prompt"]
VERIFY_PROMPT = _prompts["verify_prompt"]
# Conversation state
_conversation_history = []
_facts = []
def remember_fact(fact: str):
"""Store a fact told by the user for injection into LLaVA context."""
if fact and fact not in _facts:
_facts.append(fact)
print(f" [Memory] Fact stored: {fact}")
def add_to_history(user_msg: str, assistant_msg: str):
_conversation_history.append({"role": "user", "content": user_msg})
_conversation_history.append({"role": "assistant", "content": assistant_msg})
while len(_conversation_history) > MAX_HISTORY:
_conversation_history.pop(0)
def call_llava(prompt: str, img_b64, num_predict: int = 200, use_history: bool = False) -> str:
messages = []
if use_history and _conversation_history:
messages.extend(_conversation_history)
msg = {"role": "user", "content": prompt}
if img_b64:
msg["images"] = [img_b64]
messages.append(msg)
r = ollama.chat(model=OLLAMA_MODEL, messages=messages,
options={
"temperature": 0.0,
"num_predict": num_predict,
"num_batch": NUM_BATCH,
"num_ctx": NUM_CTX,
})
return r["message"]["content"].strip()
def parse_json(raw: str):
"""Extract and parse first JSON object from string."""
raw = raw.replace("```json", "").replace("```", "").strip()
s = raw.find("{")
e = raw.rfind("}") + 1
if s == -1 or e == 0:
return None
try:
return json.loads(raw[s:e])
except json.JSONDecodeError:
return None
def ask(command: str, img_b64) -> dict:
"""Send command + camera frame to LLaVA with conversation history."""
try:
facts_str = ("\nKnown facts: " + "; ".join(_facts) + ".") if _facts else ""
raw = call_llava(MAIN_PROMPT.format(command=command, facts=facts_str), img_b64,
num_predict=_cfg["num_predict_main"], use_history=True)
print(f" Raw: {raw}")
d = parse_json(raw)
speak = d.get("speak", raw) if d else raw
add_to_history(command, speak)
if d is None:
return {"actions": [], "arm": None, "speak": raw, "abort": None}
return d
except Exception as ex:
print(f" LLaVA error: {ex}")
return {"actions": [], "arm": None, "speak": "Error.", "abort": None}
def ask_goal(goal: str, img_b64) -> dict:
"""Ask LLaVA if goal is reached."""
try:
raw = call_llava(GOAL_PROMPT.format(goal=goal), img_b64,
num_predict=_cfg["num_predict_goal"])
print(f" LLaVA: {raw}")
d = parse_json(raw)
if d is None:
text = raw.lower()
reached = any(w in text for w in
["reached", "found", "i can see", "i see a person", "yes", "arrived"])
return {"reached": reached, "next_move": "left", "duration": 0.5, "speak": raw[:100]}
reached = d.get("reached", False)
if isinstance(reached, str):
reached = reached.lower() in ("true", "yes", "1")
d["reached"] = reached
return d
except Exception:
return {"reached": False, "next_move": "left", "duration": 0.5, "speak": "Continuing..."}
def ask_talk(command: str, img_b64, facts: str = "") -> dict:
"""Handle talk-only commands using the YAML talk_prompt."""
try:
prompt = TALK_PROMPT.format(command=command, facts=facts)
raw = call_llava(prompt, img_b64, num_predict=_cfg["num_predict_talk"],
use_history=True)
print(f" Raw: {raw}")
d = parse_json(raw)
if d is None:
return {"actions": [], "arm": None, "speak": raw[:100], "abort": None}
speak = d.get("speak", "")
add_to_history(command, speak)
return d
except Exception as ex:
print(f" Talk error: {ex}")
return {"actions": [], "arm": None, "speak": f"Error: {ex}", "abort": None}
def ask_verify(target: str, condition: str, img_b64) -> str:
"""Verify a condition on a detected target. Returns 'yes' or 'no'."""
try:
prompt = VERIFY_PROMPT.format(target=target, condition=condition)
raw = call_llava(prompt, img_b64, num_predict=_cfg["num_predict_verify"])
cleaned = raw.strip().lower().rstrip(".,!?")
first_word = cleaned.split()[0] if cleaned.split() else "no"
return first_word if first_word in ("yes", "no") else "no"
except Exception:
return "no"
def ask_patrol(img_b64) -> dict:
"""Ask LLaVA to assess scene during patrol."""
try:
raw = call_llava(PATROL_PROMPT, img_b64, num_predict=_cfg["num_predict_patrol"])
d = parse_json(raw)
return d or {"observation": raw[:80], "alert": None, "next_move": "forward", "duration": 1.0}
except Exception:
return {"observation": "Error", "alert": None, "next_move": "stop", "duration": 0}

109
API/memory_api.py Normal file
View File

@ -0,0 +1,109 @@
"""
memory_api.py Session + place memory interface
"""
import sys
import os
from Core.env_loader import PROJECT_ROOT
from API.odometry_api import get_position
MEMORY_AVAILABLE = False
mem = None
def init_memory() -> bool:
"""Start memory subsystem. Returns True if successful."""
global MEMORY_AVAILABLE, mem
# marcus_memory.py lives in Brain/
brain_dir = os.path.join(PROJECT_ROOT, "Brain")
for d in (brain_dir,):
if d not in sys.path:
sys.path.insert(0, d)
try:
from marcus_memory import Memory
mem = Memory()
mem.start_session()
MEMORY_AVAILABLE = True
print("Memory started")
return True
except ImportError as e:
print(f"marcus_memory.py not found ({e}) — memory disabled")
return False
except Exception as e:
print(f"Memory error: {e} — memory disabled")
return False
def log_cmd(cmd: str, response: str = "", duration: float = 0.0):
"""Log command to session memory."""
if mem:
try:
mem.log_command(cmd, response, duration)
except Exception:
pass
def log_detection(class_name: str, position: str = "", distance: str = ""):
"""Log YOLO detection to session memory with current position."""
if mem and class_name:
pos = get_position()
try:
mem.log_detection(
class_name, position, distance,
x=pos["x"] if pos else None,
y=pos["y"] if pos else None,
)
except Exception:
pass
def place_save(name: str) -> bool:
"""Save current position as named place."""
if not mem:
print(" [Places] Memory not available — cannot save place")
return False
pos = get_position()
return mem.save_place(
name,
x=pos["x"] if pos else None,
y=pos["y"] if pos else None,
heading=pos["heading"] if pos else None,
)
def place_goto(name: str) -> bool:
"""Navigate to a named saved place."""
from API.odometry_api import odom, ODOM_AVAILABLE
if not mem:
print(" [Places] Memory not available")
return False
place = mem.get_place(name)
if place is None:
return False
if not place.get("has_odom"):
print(f" [Places] '{name}' was saved without odometry — no coordinates")
return False
if not odom or not ODOM_AVAILABLE:
print(" [Places] Odometry not running — cannot navigate")
return False
print(f" [Places] Navigating to '{name}' "
f"(x={place['x']:.2f}, y={place['y']:.2f}, h={place['heading']:.1f})")
return odom.navigate_to(place["x"], place["y"], place["heading"])
def places_list_str() -> str:
"""Return formatted string of all saved places."""
if not mem:
return "Memory not available"
places = mem.list_places()
if not places:
return "No places saved yet. Say 'remember this as <name>' to save one."
lines = [f" {'Name':<20} {'Coordinates':<25} {'Saved at'}"]
lines.append(" " + "-" * 60)
for p in places:
coord = (f"x={p['x']:.2f} y={p['y']:.2f} h={p['heading']:.1f}"
if p.get("has_odom") else "no coordinates")
lines.append(f" {p['name']:<20} {coord:<25} {p.get('saved_at', '')}")
return "\n".join(lines)

40
API/odometry_api.py Normal file
View File

@ -0,0 +1,40 @@
"""
odometry_api.py Odometry interface wrapper
"""
import sys
import os
from Core.env_loader import PROJECT_ROOT
ODOM_AVAILABLE = False
odom = None
def init_odometry(zmq_sock=None) -> bool:
"""Start odometry tracking. Returns True if successful."""
global ODOM_AVAILABLE, odom
# marcus_odometry.py lives in Navigation/
nav_dir = os.path.join(PROJECT_ROOT, "Navigation")
for d in (nav_dir,):
if d not in sys.path:
sys.path.insert(0, d)
try:
from marcus_odometry import Odometry
odom = Odometry()
ODOM_AVAILABLE = odom.start(zmq_sock=zmq_sock)
print(f"Odometry {'started' if ODOM_AVAILABLE else 'failed'}")
return ODOM_AVAILABLE
except ImportError as e:
print(f"marcus_odometry.py not found ({e}) — precise distance disabled")
return False
except Exception as e:
print(f"Odometry error: {e} — precise distance disabled")
return False
def get_position():
"""Return current position dict or None."""
if odom and ODOM_AVAILABLE:
return odom.get_position()
return None

66
API/yolo_api.py Normal file
View File

@ -0,0 +1,66 @@
"""
yolo_api.py YOLO detection interface with fallback stubs
"""
import sys
import os
from Core.config_loader import load_config
from Core.env_loader import PROJECT_ROOT
YOLO_AVAILABLE = False
# Fallback stubs
def _stub_sees(c, **kw): return False
def _stub_count(c): return 0
def _stub_closest(c="person"): return None
def _stub_summary(): return "YOLO not loaded"
def _stub_ppe(): return []
def _stub_too_close(**k): return False
def _stub_all(): return set()
def _stub_fps(): return 0.0
yolo_sees = _stub_sees
yolo_count = _stub_count
yolo_closest = _stub_closest
yolo_summary = _stub_summary
yolo_ppe_violations = _stub_ppe
yolo_person_too_close = _stub_too_close
yolo_all_classes = _stub_all
yolo_fps = _stub_fps
def init_yolo(raw_frame_ref, frame_lock) -> bool:
"""Start YOLO inference. Returns True if successful."""
global YOLO_AVAILABLE
global yolo_sees, yolo_count, yolo_closest, yolo_summary
global yolo_ppe_violations, yolo_person_too_close, yolo_all_classes, yolo_fps
# marcus_yolo.py lives in Vision/
models_dir = os.path.join(PROJECT_ROOT, "Vision")
if models_dir not in sys.path:
sys.path.insert(0, models_dir)
try:
from marcus_yolo import (
start_yolo, yolo_sees as _ys, yolo_count as _yc, yolo_closest as _ycl,
yolo_summary as _ysu, yolo_ppe_violations as _ypp,
yolo_person_too_close as _yptc, yolo_all_classes as _yac, yolo_fps as _yfps,
)
except ImportError as e:
print(f"marcus_yolo.py not found ({e})")
return False
# GPU is required — let RuntimeError from _resolve_device propagate so
# Marcus hard-fails at startup instead of silently running without vision.
ok = start_yolo(raw_frame_ref=raw_frame_ref, frame_lock=frame_lock)
if ok:
YOLO_AVAILABLE = True
yolo_sees = _ys
yolo_count = _yc
yolo_closest = _ycl
yolo_summary = _ysu
yolo_ppe_violations = _ypp
yolo_person_too_close = _yptc
yolo_all_classes = _yac
yolo_fps = _yfps
print(f"YOLO {'started' if ok else 'failed to start'}")
return ok

49
API/zmq_api.py Normal file
View File

@ -0,0 +1,49 @@
"""
zmq_api.py ZMQ velocity + command interface to Holosoma
"""
import json
import time
import zmq
from Core.config_loader import load_config
from Core.logger import log
_cfg = load_config("ZMQ")
ZMQ_HOST = _cfg["zmq_host"]
ZMQ_PORT = _cfg["zmq_port"]
STOP_ITERATIONS = _cfg["stop_iterations"]
STOP_DELAY = _cfg["stop_delay"]
STEP_PAUSE = _cfg["step_pause"]
ctx = zmq.Context()
sock = ctx.socket(zmq.PUB)
sock.bind(f"tcp://{ZMQ_HOST}:{ZMQ_PORT}")
time.sleep(0.5)
log(f"ZMQ PUB bound on tcp://{ZMQ_HOST}:{ZMQ_PORT}", "info", "zmq")
def get_socket():
"""Return the shared ZMQ PUB socket (for odometry to reuse)."""
return sock
def send_vel(vx: float = 0.0, vy: float = 0.0, vyaw: float = 0.0):
"""Send velocity to Holosoma. vx m/s | vy m/s | vyaw rad/s"""
sock.send_string(json.dumps({"vel": {"vx": vx, "vy": vy, "vyaw": vyaw}}))
def gradual_stop():
"""Smooth deceleration to zero over ~1 second."""
for _ in range(STOP_ITERATIONS):
send_vel(0.0, 0.0, 0.0)
time.sleep(STOP_DELAY)
def send_cmd(cmd: str):
"""Send Holosoma state command: start | walk | stand | stop"""
sock.send_string(json.dumps({"cmd": cmd}))
# Load MOVE_MAP from navigation config
_nav = load_config("Navigation")
MOVE_MAP = {k: tuple(v) for k, v in _nav["move_map"].items()}

0
Autonomous/__init__.py Normal file
View File

View File

@ -0,0 +1,516 @@
"""
marcus_autonomous.py Autonomous Exploration Mode
====================================================
Project : Marcus | YS Lootah Technology
Hardware : Unitree G1 EDU + Jetson Orin NX
Marcus moves freely through the office, identifies areas and objects,
builds a live map of what it sees, and saves everything to a session folder.
Commands
--------
auto on start autonomous exploration
auto off stop autonomous exploration
auto status show current exploration state
auto save save current map snapshot
auto summary LLaVA summary of what was explored
How it works
------------
1. Marcus moves forward, scanning with YOLO every 0.4s
2. Every N steps: LLaVA assesses the scene (area type, objects, notes)
3. Odometry records position at each observation
4. All data saved to map/map_XXX_DATE/ folder
5. Robot avoids walls by turning when blocked
Files saved
-----------
~/Models_marcus/map/map_001_2026-04-05/
observations.json [{step, time, x, y, area_type, objects, observation}]
path.json [{x, y, heading, t}] full path walked
summary.txt auto-generated LLaVA summary
frames/ camera captures at interesting points
Import in marcus_llava.py
-------------------------
from marcus_autonomous import AutonomousMode
auto = AutonomousMode(
get_frame_fn=get_frame,
send_vel_fn=send_vel,
gradual_stop_fn=gradual_stop,
yolo_sees_fn=yolo_sees,
yolo_summary_fn=yolo_summary,
yolo_all_classes_fn=yolo_all_classes,
yolo_closest_fn=yolo_closest,
odom_fn=_odom_pos,
call_llava_fn=_call_llava,
patrol_prompt=PATROL_PROMPT,
mem=mem,
models_dir=MODELS_DIR,
)
auto.enable() # start exploration
auto.disable() # stop exploration
auto.status() # print current state
"""
import json
import time
import threading
import os
import base64
from datetime import datetime
from pathlib import Path
# ══════════════════════════════════════════════════════════════════════════════
# CONFIGURATION
# ══════════════════════════════════════════════════════════════════════════════
LLAVA_EVERY_N_STEPS = 5 # call LLaVA every N movement steps (saves GPU)
YOLO_CHECK_INTERVAL = 0.4 # seconds between YOLO checks
FORWARD_DURATION = 1.5 # seconds per forward step
TURN_DURATION = 2.0 # seconds to turn when obstacle detected
PERSON_STOP_DIST = 0.3 # stop if person closer than this (relative)
MAX_OBSERVATIONS = 500 # max observations before auto-stop
SAVE_FRAMES = True # save camera frames at interesting points
INTERESTING_COOLDOWN = 5.0 # seconds between saving "interesting" frames
class AutonomousMode:
"""
Autonomous office exploration.
Thread-safe. enable() starts a background thread. disable() stops it.
All observations saved to disk automatically.
State machine:
IDLE EXPLORING IDLE
Any state STOPPING IDLE (on disable())
"""
def __init__(self, get_frame_fn, send_vel_fn, gradual_stop_fn,
yolo_sees_fn, yolo_summary_fn, yolo_all_classes_fn,
yolo_closest_fn, odom_fn, call_llava_fn,
patrol_prompt: str, mem=None, models_dir: str = None):
self._get_frame = get_frame_fn
self._send_vel = send_vel_fn
self._gradual_stop = gradual_stop_fn
self._yolo_sees = yolo_sees_fn
self._yolo_summary = yolo_summary_fn
self._yolo_all_classes = yolo_all_classes_fn
self._yolo_closest = yolo_closest_fn
self._odom_pos = odom_fn
self._call_llava = call_llava_fn
self._patrol_prompt = patrol_prompt
self._mem = mem
if models_dir is None:
models_dir = str(Path(__file__).resolve().parent.parent.parent / "Data" / "Brain")
self._models_dir = Path(models_dir)
# State
self._enabled = False
self._thread = None
self._lock = threading.Lock()
# Current exploration session
self._map_dir = None
self._observations = []
self._path = []
self._step = 0
self._start_time = None
self._last_interesting = 0.0
# Turn tracking — alternate left/right when blocked
self._last_turn = "left"
# Stats
self._area_counts = {} # {area_type: count}
self._all_objects = set()
# ── PUBLIC API ─────────────────────────────────────────────────────────────
def enable(self):
"""Start autonomous exploration."""
with self._lock:
if self._enabled:
print(" [Auto] Already running — use 'auto off' to stop first")
return
self._enabled = True
self._step = 0
self._start_time = time.time()
self._observations = []
self._path = []
self._area_counts = {}
self._all_objects = set()
self._map_dir = self._create_map_dir()
self._thread = threading.Thread(
target=self._explore_loop,
daemon=True,
name="auto-explore"
)
self._thread.start()
print(f"\n [Auto] Exploration started")
print(f" [Auto] Map folder: {self._map_dir}")
print(f" [Auto] Type 'auto off' to stop\n")
def disable(self):
"""Stop autonomous exploration and save results."""
with self._lock:
if not self._enabled:
print(" [Auto] Not running")
return
self._enabled = False
print("\n [Auto] Stopping exploration...")
# Wait for thread to finish
if self._thread and self._thread.is_alive():
self._thread.join(timeout=5.0)
self._gradual_stop()
self._save_session()
self._print_summary()
def status(self):
"""Print current exploration state."""
with self._lock:
running = self._enabled
step = self._step
obs = len(self._observations)
if not running:
print(" [Auto] Status: IDLE")
if self._map_dir:
print(f" [Auto] Last map: {self._map_dir}")
return
elapsed = time.time() - (self._start_time or time.time())
mins = int(elapsed // 60)
secs = int(elapsed % 60)
print(f" [Auto] Status: EXPLORING")
print(f" [Auto] Duration: {mins}m {secs}s")
print(f" [Auto] Steps: {step} | Observations: {obs}")
if self._area_counts:
areas = ", ".join(f"{k}:{v}" for k, v in sorted(self._area_counts.items()))
print(f" [Auto] Areas seen: {areas}")
if self._all_objects:
print(f" [Auto] Objects found: {', '.join(sorted(self._all_objects))}")
pos = self._odom_pos()
if pos:
print(f" [Auto] Position: x={pos['x']:.2f} y={pos['y']:.2f} heading={pos['heading']:.1f}°")
def is_enabled(self) -> bool:
with self._lock:
return self._enabled
def save_snapshot(self):
"""Save current state to disk without stopping."""
self._save_observations()
self._save_path()
print(f" [Auto] Snapshot saved to {self._map_dir}")
# ── EXPLORATION LOOP ────────────────────────────────────────────────────────
def _explore_loop(self):
"""
Main autonomous exploration loop.
Steps:
1. Check for person in path (safety stop)
2. Check YOLO for interesting objects
3. Move forward
4. Every N steps: call LLaVA for scene assessment
5. On obstacle: turn and continue
6. Log position + observations
"""
consecutive_blocks = 0 # count consecutive blocked steps
while self._enabled:
self._step += 1
# ── Safety: stop if person too close ─────────────────────────────
if self._yolo_sees("person"):
closest = self._yolo_closest("person")
if closest and closest.distance_estimate == "very close":
print(f" [Auto] Person very close — pausing 2s")
self._gradual_stop()
time.sleep(2.0)
continue
# ── Record YOLO detections ────────────────────────────────────────
detected_classes = self._yolo_all_classes()
for cls in detected_classes:
self._all_objects.add(cls)
# ── Record odometry path point ────────────────────────────────────
pos = self._odom_pos()
if pos:
self._path.append({
"step": self._step,
"t": round(time.time() - self._start_time, 1),
"x": pos["x"],
"y": pos["y"],
"h": pos["heading"],
})
# ── LLaVA scene assessment every N steps ─────────────────────────
if self._step % LLAVA_EVERY_N_STEPS == 0:
self._assess_scene(pos, detected_classes)
# ── Movement decision ─────────────────────────────────────────────
if consecutive_blocks >= 3:
# Stuck — turn more aggressively
print(f" [Auto] Stuck — turning {self._last_turn} 180°")
self._turn(self._last_turn, TURN_DURATION * 2)
consecutive_blocks = 0
continue
# Move forward
blocked = self._move_forward()
if blocked:
consecutive_blocks += 1
# Alternate left/right turns to explore both directions
turn_dir = "left" if self._last_turn == "right" else "right"
self._last_turn = turn_dir
print(f" [Auto] Obstacle — turning {turn_dir}")
self._turn(turn_dir, TURN_DURATION)
else:
consecutive_blocks = 0
# ── Max observations check ────────────────────────────────────────
if len(self._observations) >= MAX_OBSERVATIONS:
print(f" [Auto] Max observations ({MAX_OBSERVATIONS}) reached — stopping")
self._enabled = False
break
time.sleep(YOLO_CHECK_INTERVAL)
# Clean up
self._gradual_stop()
def _move_forward(self) -> bool:
"""
Move forward for FORWARD_DURATION seconds.
Returns True if blocked (no actual movement detected).
"""
# TODO: integrate LiDAR when available for true obstacle detection
# For now: move and assume not blocked
t0 = time.time()
while time.time() - t0 < FORWARD_DURATION and self._enabled:
self._send_vel(vx=0.25)
time.sleep(0.05)
self._send_vel(0, 0, 0)
time.sleep(0.1)
return False # Not blocked — no LiDAR yet
def _turn(self, direction: str, duration: float):
"""Turn left or right for given duration."""
vyaw = 0.25 if direction == "left" else -0.25
t0 = time.time()
while time.time() - t0 < duration and self._enabled:
self._send_vel(vyaw=vyaw)
time.sleep(0.05)
self._send_vel(0, 0, 0)
time.sleep(0.2)
def _assess_scene(self, pos: dict, yolo_classes: set):
"""
Call LLaVA to understand the current scene.
Saves observation + optionally a camera frame.
"""
img = self._get_frame()
if img is None:
return
try:
raw = self._call_llava(self._patrol_prompt, img, num_predict=120)
raw_clean = raw.replace("```json", "").replace("```", "").strip()
s = raw_clean.find("{")
e = raw_clean.rfind("}") + 1
d = json.loads(raw_clean[s:e]) if s != -1 and e > 0 else None
if d is None:
return
area_type = d.get("area_type", "unknown")
observation = d.get("observation", "")
objects = d.get("objects", [])
interesting = d.get("interesting", False)
# Update area counts
self._area_counts[area_type] = self._area_counts.get(area_type, 0) + 1
# Add objects to global set
for obj in objects:
self._all_objects.add(obj)
# Build observation record
obs = {
"step": self._step,
"time": round(time.time() - self._start_time, 1),
"timestamp": datetime.now().strftime("%H:%M:%S"),
"x": pos["x"] if pos else None,
"y": pos["y"] if pos else None,
"heading": pos["heading"] if pos else None,
"area_type": area_type,
"objects": objects,
"yolo_classes": list(yolo_classes),
"observation": observation,
"interesting": interesting,
}
self._observations.append(obs)
print(f" [Auto] Step {self._step} | {area_type} | {observation[:60]}")
if objects:
print(f" [Auto] Objects: {', '.join(objects)}")
# Save frame if interesting
if interesting and SAVE_FRAMES:
now = time.time()
if now - self._last_interesting > INTERESTING_COOLDOWN:
self._save_frame(img, self._step)
self._last_interesting = now
# Log to session memory
if self._mem:
self._mem.log_detection(area_type, "center", "medium")
# Auto-flush observations every 20 steps
if self._step % 20 == 0:
self._save_observations()
self._save_path()
except Exception as e:
print(f" [Auto] LLaVA assess error: {e}")
# ── FILE I/O ────────────────────────────────────────────────────────────────
def _create_map_dir(self) -> Path:
"""Create a new map folder with incremented ID."""
maps_dir = self._models_dir / "map"
maps_dir.mkdir(parents=True, exist_ok=True)
existing = [d for d in maps_dir.iterdir() if d.is_dir() and d.name.startswith("map_")]
nums = []
for d in existing:
parts = d.name.split("_")
if len(parts) >= 2 and parts[1].isdigit():
nums.append(int(parts[1]))
next_num = max(nums) + 1 if nums else 1
date_str = datetime.now().strftime("%Y-%m-%d")
map_dir = maps_dir / f"map_{next_num:03d}_{date_str}"
map_dir.mkdir(parents=True, exist_ok=True)
(map_dir / "frames").mkdir(exist_ok=True)
return map_dir
def _save_observations(self):
if not self._map_dir or not self._observations:
return
try:
path = self._map_dir / "observations.json"
tmp = path.with_suffix(".tmp")
with open(tmp, "w", encoding="utf-8") as f:
json.dump(self._observations, f, indent=2, ensure_ascii=False)
tmp.replace(path)
except Exception as e:
print(f" [Auto] Save observations error: {e}")
def _save_path(self):
if not self._map_dir or not self._path:
return
try:
path = self._map_dir / "path.json"
tmp = path.with_suffix(".tmp")
with open(tmp, "w", encoding="utf-8") as f:
json.dump(self._path, f, indent=2)
tmp.replace(path)
except Exception as e:
print(f" [Auto] Save path error: {e}")
def _save_frame(self, img_b64: str, step: int):
"""Save a camera frame as JPEG."""
if not self._map_dir or not img_b64:
return
try:
frame_path = self._map_dir / "frames" / f"frame_{step:04d}.jpg"
with open(frame_path, "wb") as f:
f.write(__import__("base64").b64decode(img_b64))
except Exception as e:
print(f" [Auto] Save frame error: {e}")
def _generate_summary(self) -> str:
"""Generate a text summary of the exploration session."""
elapsed = time.time() - (self._start_time or time.time())
mins = int(elapsed // 60)
secs = int(elapsed % 60)
lines = [
f"Autonomous Exploration Summary",
f"==============================",
f"Map: {self._map_dir.name if self._map_dir else 'unknown'}",
f"Date: {datetime.now().strftime('%Y-%m-%d %H:%M')}",
f"Duration: {mins}m {secs}s",
f"Steps taken: {self._step}",
f"Observations: {len(self._observations)}",
f"",
f"Areas identified:",
]
for area, count in sorted(self._area_counts.items(), key=lambda x: -x[1]):
lines.append(f" {area:<20} {count} observations")
lines.append("")
lines.append(f"Objects detected:")
for obj in sorted(self._all_objects):
lines.append(f" - {obj}")
# Add notable observations
interesting = [o for o in self._observations if o.get("interesting")]
if interesting:
lines.append("")
lines.append(f"Notable observations ({len(interesting)}):")
for o in interesting[:10]:
lines.append(f" [{o['timestamp']}] {o['observation']}")
return "\n".join(lines)
def _save_session(self):
"""Save all data to disk."""
self._save_observations()
self._save_path()
if self._map_dir:
try:
summary = self._generate_summary()
with open(self._map_dir / "summary.txt", "w", encoding="utf-8") as f:
f.write(summary)
except Exception as e:
print(f" [Auto] Save summary error: {e}")
def _print_summary(self):
"""Print exploration summary to terminal."""
elapsed = time.time() - (self._start_time or time.time())
mins = int(elapsed // 60)
secs = int(elapsed % 60)
print(f"\n [Auto] Exploration complete")
print(f" [Auto] Duration: {mins}m {secs}s | Steps: {self._step}")
print(f" [Auto] Observations: {len(self._observations)}")
if self._area_counts:
print(f" [Auto] Areas: {dict(sorted(self._area_counts.items()))}")
if self._all_objects:
print(f" [Auto] Objects: {', '.join(sorted(self._all_objects))}")
if self._map_dir:
print(f" [Auto] Saved to: {self._map_dir}\n")

0
Brain/__init__.py Normal file
View File

360
Brain/command_parser.py Normal file
View File

@ -0,0 +1,360 @@
"""
command_parser.py Local command regex patterns + dispatcher
Handles place memory, odometry, session recall, help, examples
"""
import re
import time
from API.zmq_api import send_vel, gradual_stop
from API.memory_api import mem, place_save, place_goto, places_list_str
from API.odometry_api import odom, ODOM_AVAILABLE
from API.camera_api import get_frame
from API.llava_api import ask
from Brain.executor import execute
# ── Compiled patterns ────────────────────────────────────────────────────────
_RE_REMEMBER = re.compile(
r"^(?:remember|save|mark|call|name|label)\s+(?:this|here|current|position)?\s*as\s+(.+)$", re.I)
_RE_GOTO = re.compile(
r"^(?:go\s+to|navigate\s+to|take\s+me\s+to|move\s+to|return\s+to|head\s+to)\s+(.+)$", re.I)
_RE_FORGET = re.compile(
r"^(?:forget|delete|remove)\s+(?:place\s+)?(.+)$", re.I)
_RE_RENAME = re.compile(
r"^rename\s+(.+?)\s+(?:to|as)\s+(.+)$", re.I)
_RE_WALK_DIST = re.compile(
r"^(?:walk|go|move)\s+(?:forward\s+)?(\d+(?:\.\d+)?)\s*m(?:eter(?:s)?)?$", re.I)
_RE_WALK_BACK = re.compile(
r"^(?:walk|go|move)\s+backward?\s+(\d+(?:\.\d+)?)\s*m(?:eter(?:s)?)?$", re.I)
_RE_TURN_DEG = re.compile(
r"^turn\s+(?:(left|right)\s+)?(\d+(?:\.\d+)?)\s*deg(?:ree(?:s)?)?$", re.I)
_RE_PATROL_RT = re.compile(
r"^patrol[/:]\s*(.+)$", re.I)
_RE_LAST_CMD = re.compile(
r"^(?:last\s+command|what\s+did\s+i\s+(?:say|type)\s+last|repeat\s+last)$", re.I)
_RE_DO_AGAIN = re.compile(
r"^(?:do\s+that\s+again|repeat|again|redo)$", re.I)
_RE_UNDO = re.compile(
r"^(?:undo|go\s+back\s+(?:to\s+)?(?:where|from\s+where)\s+(?:you|i)\s+(?:started|were|came)|reverse\s+last|turn\s+back\s+from).*$", re.I)
_RE_LAST_SESS = re.compile(
r"^(?:last\s+session|what\s+(?:did\s+you\s+do|happened)\s+last\s+(?:session|time)|previous\s+session)$", re.I)
_RE_WHERE = re.compile(
r"^(?:where\s+am\s+i|current\s+position|my\s+(?:location|position)|position)$", re.I)
_RE_GO_HOME = re.compile(
r"^(?:go\s+home|return\s+to\s+start|come\s+back\s+home|go\s+back\s+to\s+start)$", re.I)
_RE_SESSION_SUMMARY = re.compile(
r"^(?:session\s+summary|what\s+happened\s+today|session\s+report)$", re.I)
_RE_AUTO = re.compile(
r"^auto(?:nomous)?\s+(on|off|status|save|summary)$", re.I)
# Autonomous mode instance — set by init_autonomous()
_auto = None
def init_autonomous(auto_instance):
"""Wire in the AutonomousMode instance from marcus_brain."""
global _auto
_auto = auto_instance
def try_local_command(cmd: str) -> bool:
"""
Handle local commands (place, odom, memory, help).
Returns True if handled, False if not matched (send to LLaVA).
"""
# ── PLACE MEMORY ─────────────────────────────────────────────────────
m = _RE_REMEMBER.match(cmd)
if m:
place_save(m.group(1).strip())
return True
m = _RE_GOTO.match(cmd)
if m:
name = m.group(1).strip()
if name.lower() in ("start", "home", "beginning"):
if odom and ODOM_AVAILABLE:
odom.return_to_start()
else:
print(" [Places] Odometry not running — cannot return to start")
else:
place_goto(name)
return True
m = _RE_FORGET.match(cmd)
if m:
if mem:
mem.delete_place(m.group(1).strip())
else:
print(" [Places] Memory not available")
return True
m = _RE_RENAME.match(cmd)
if m:
if mem:
mem.rename_place(m.group(1).strip(), m.group(2).strip())
else:
print(" [Places] Memory not available")
return True
if re.match(r"^(?:places|list\s+places|what\s+places|show\s+(?:places|locations)|known\s+places)$", cmd, re.I):
print(places_list_str())
return True
# ── ODOMETRY MOVEMENT ────────────────────────────────────────────────
m = _RE_WALK_DIST.match(cmd)
if m:
meters = float(m.group(1))
if odom:
odom.walk_distance(meters)
else:
t0 = time.time()
while time.time() - t0 < meters / 0.3:
send_vel(vx=0.3)
time.sleep(0.05)
gradual_stop()
return True
m = _RE_WALK_BACK.match(cmd)
if m:
meters = float(m.group(1))
if odom:
odom.walk_distance(meters, direction="backward")
else:
t0 = time.time()
while time.time() - t0 < meters / 0.2:
send_vel(vx=-0.2)
time.sleep(0.05)
gradual_stop()
return True
m = _RE_TURN_DEG.match(cmd)
if m:
direction = m.group(1)
degrees = float(m.group(2))
if direction and direction.lower() == "right":
degrees = -degrees
if odom:
odom.turn_degrees(degrees)
else:
t0 = time.time()
vyaw = 0.3 if degrees > 0 else -0.3
duration = abs(degrees) / 17.2
while time.time() - t0 < duration:
send_vel(vyaw=vyaw)
time.sleep(0.05)
gradual_stop()
return True
# ── NAMED PATROL ROUTE ───────────────────────────────────────────────
m = _RE_PATROL_RT.match(cmd)
if m:
raw_route = m.group(1)
names = re.split(r"[→,;]+|\s{2,}", raw_route)
names = [n.strip() for n in names if n.strip()]
if not names:
print(" Usage: patrol: door → desk → exit")
return True
if not mem:
print(" [Places] Memory not available")
return True
waypoints, missing = [], []
for name in names:
place = mem.get_place(name)
if place is None:
missing.append(name)
elif not place.get("has_odom"):
print(f" [Places] '{name}' has no coordinates — skipping")
else:
waypoints.append({"x": place["x"], "y": place["y"], "heading": place["heading"], "name": name})
if missing:
print(f" [Places] Unknown places: {', '.join(missing)}")
if not waypoints:
print(" [Places] No valid waypoints — patrol cancelled")
return True
if odom:
print(f" [Places] Named patrol: {''.join(n['name'] for n in waypoints)}")
odom.patrol_route(waypoints)
else:
print(" [Places] Odometry not running")
return True
# ── SESSION MEMORY RECALL ────────────────────────────────────────────
if _RE_LAST_CMD.match(cmd):
if mem:
last = mem.get_last_command()
print(f" Last command: '{last}'" if last else " No commands logged yet")
else:
print(" Memory not available")
return True
if _RE_UNDO.match(cmd):
if not mem:
print(" Memory not available — cannot undo")
return True
recent = mem.get_last_n_commands(5)
move_words = {"turn right": ("left", 1), "turn left": ("right", -1),
"walk forward": ("backward", 1), "move forward": ("backward", 1),
"move back": ("forward", 1), "walk backward": ("forward", 1)}
for c in reversed(recent):
cl = c.lower()
for phrase, (reverse_dir, _) in move_words.items():
if phrase in cl:
print(f" Undoing: '{c}' → reversing with '{reverse_dir}'")
dur, t0 = 2.0, time.time()
if reverse_dir in ("left", "right"):
vyaw = 0.3 if reverse_dir == "left" else -0.3
while time.time() - t0 < dur:
send_vel(vyaw=vyaw)
time.sleep(0.05)
else:
vx = 0.3 if reverse_dir == "forward" else -0.2
while time.time() - t0 < dur:
send_vel(vx=vx)
time.sleep(0.05)
gradual_stop()
return True
print(" No movement command to undo")
return True
if _RE_DO_AGAIN.match(cmd):
if not mem:
print(" Memory not available — cannot repeat")
return True
recent = mem.get_last_n_commands(5)
repeat = None
for c in reversed(recent):
if not _RE_DO_AGAIN.match(c) and not _RE_LAST_CMD.match(c):
repeat = c
break
if repeat:
print(f" Repeating: '{repeat}'")
if try_local_command(repeat):
return True
# Not a local command — send directly to LLaVA
print("Thinking...")
img = get_frame()
if img:
d = ask(repeat, img)
execute(d)
return True
else:
print(" No previous command to repeat")
return True
if _RE_LAST_SESS.match(cmd):
if mem:
print(mem.last_session_summary())
else:
print(" Memory not available")
return True
if _RE_SESSION_SUMMARY.match(cmd):
if mem:
print(f" Session: {mem._session_id}")
print(f" Duration: {mem.session_duration_str()}")
print(f" Commands: {mem.commands_count()}")
print(f" Places: {mem.places_count()}")
detections = mem.get_session_detections()
classes = {d.get("class") for d in detections}
print(f" Detected: {', '.join(classes) if classes else 'nothing yet'}")
else:
print(" Memory not available")
return True
if _RE_WHERE.match(cmd):
if odom and ODOM_AVAILABLE:
print(f" Position: {odom.status_str()}")
print(f" Distance from start: {odom.get_distance_from_start():.2f}m")
else:
print(" Odometry not running — position unknown")
return True
if _RE_GO_HOME.match(cmd):
if odom and ODOM_AVAILABLE:
odom.return_to_start()
else:
print(" Odometry not running — cannot navigate home")
return True
# ── AUTONOMOUS MODE ──────────────────────────────────────────────────
m = _RE_AUTO.match(cmd)
if m:
subcmd = m.group(1).lower()
if _auto is None:
print(" [Auto] Autonomous mode not initialized")
return True
if subcmd == "on":
_auto.enable()
elif subcmd == "off":
_auto.disable()
elif subcmd == "status":
_auto.status()
elif subcmd == "save":
_auto.save_snapshot()
elif subcmd == "summary":
if _auto.is_enabled():
_auto.status()
else:
print(" [Auto] Not running — use 'auto on' to start")
return True
# ── LIDAR STATUS ─────────────────────────────────────────────────────
if re.match(r"^(?:lidar|lidar\s+status|slam\s+status)$", cmd, re.I):
try:
from API.lidar_api import LIDAR_AVAILABLE, get_lidar_status
if not LIDAR_AVAILABLE:
print(" LiDAR: not available")
else:
s = get_lidar_status()
print(f" LiDAR: {s['mode']} | loc: {s['loc_state']} | "
f"frame age: {s['last_frame_age']}s")
if s.get("pose"):
p = s["pose"]
print(f" SLAM pose: x={p['x']:.2f} y={p['y']:.2f} h={p['heading']:.1f}")
safety = s.get("safety", {})
if safety.get("emergency"):
print(f" EMERGENCY: {safety.get('reasons', [])}")
perf = s.get("perf", {})
if perf:
print(f" FPS: {perf.get('input_fps', 0):.0f} in / "
f"ICP: {perf.get('icp_ms', 0):.1f}ms / "
f"CPU: {perf.get('cpu_percent', 0):.0f}%")
except ImportError:
print(" LiDAR: module not loaded")
return True
# ── HELP / EXAMPLES ──────────────────────────────────────────────────
if re.match(r"^(?:help[/]|help|commands|menu|[?][/]|[?])$", cmd, re.I):
_print_help()
return True
if re.match(r"^(?:example[/]|examples[/]|ex[/]|example|examples|ex|show examples)$", cmd, re.I):
_print_examples()
return True
return False
def _print_help():
print("""
MARCUS COMMAND HELP
Movement: turn left/right, walk forward/back, walk 1 meter, turn 90 degrees
Vision: what do you see, yolo
Goals: goal/ stop when you see a person
Places: remember this as door, go to door, places, forget door
Patrol: patrol, patrol: door desk exit
Session: last command, do that again, last session, session summary
Search: search/ /path/to/photo.jpg [hint], search/ person in blue
Auto: auto on, auto off, auto status, auto save, auto summary
LiDAR: lidar, lidar status
System: help, example, yolo, q""")
def _print_examples():
print("""
MARCUS USAGE EXAMPLES
turn left | turn right 90 degrees | walk forward | walk 1 meter
what do you see | describe what is in front of you
goal/ stop when you see a person | goal/ stop when you see a laptop
remember this as door | go to door | places | forget door
patrol | patrol: door desk window
last command | do that again | last session | session summary""")

99
Brain/executor.py Normal file
View File

@ -0,0 +1,99 @@
"""
executor.py Execute LLaVA movement decisions
With LiDAR obstacle interrupt stops immediately if obstacle detected.
"""
import time
import threading
from API.zmq_api import send_vel, gradual_stop, MOVE_MAP, STEP_PAUSE
from API.arm_api import ALL_ARM_NAMES, do_arm
def _obstacle_check():
"""Check LiDAR safety — returns True if obstacle detected. Safe if LiDAR unavailable."""
try:
from API.lidar_api import obstacle_ahead
return obstacle_ahead()
except ImportError:
return False
def execute_action(move: str, duration: float):
"""Execute a single movement step. Stops if LiDAR detects obstacle."""
if move in ALL_ARM_NAMES:
do_arm(move)
return
if move == "stop" or move is None:
gradual_stop()
return
if move in MOVE_MAP:
vx, vy, vyaw = MOVE_MAP[move]
t0 = time.time()
while time.time() - t0 < duration:
if _obstacle_check():
gradual_stop()
print(" [Safety] LiDAR obstacle — stopping")
return
send_vel(vx, vy, vyaw)
time.sleep(0.05)
gradual_stop()
time.sleep(STEP_PAUSE)
else:
print(f" Unknown move: '{move}' — skipping")
def move_step(move: str, duration: float):
"""Lightweight step for goal/patrol loops. Stops if LiDAR detects obstacle."""
if move in MOVE_MAP:
vx, vy, vyaw = MOVE_MAP[move]
t0 = time.time()
while time.time() - t0 < duration:
if _obstacle_check():
send_vel(0.0, 0.0, 0.0)
print(" [Safety] LiDAR obstacle — pausing step")
return
send_vel(vx, vy, vyaw)
time.sleep(0.05)
send_vel(0.0, 0.0, 0.0)
time.sleep(0.1)
def merge_actions(actions: list) -> list:
"""Merge consecutive same-direction steps into one smooth movement."""
if not actions:
return actions
merged = [dict(actions[0])]
for action in actions[1:]:
if action.get("move") == merged[-1].get("move"):
merged[-1]["duration"] = merged[-1].get("duration", 0) + action.get("duration", 0)
else:
merged.append(dict(action))
return merged
def execute(d: dict):
"""Run full LLaVA decision — movements in sequence, then arm in background."""
if d.get("abort"):
print(f" ABORT: {d['abort']}")
gradual_stop()
return
speak = d.get("speak", "")
actions = merge_actions(d.get("actions", []))
arm_cmd = d.get("arm", None)
print(f"Marcus: {speak}")
if not actions:
gradual_stop()
else:
for i, action in enumerate(actions):
move = action.get("move")
dur = float(action.get("duration", 2.0))
print(f" Step {i+1}/{len(actions)}: {move} for {dur}s")
execute_action(move, dur)
if arm_cmd:
print(f" Arm: {arm_cmd}")
threading.Thread(target=do_arm, args=(arm_cmd,), daemon=True).start()
return speak

503
Brain/marcus_brain.py Normal file
View File

@ -0,0 +1,503 @@
"""
marcus_brain.py Marcus AI Brain Orchestrator
================================================
Shared brain logic for both terminal (run_marcus.py) and server (marcus_server.py).
Usage:
Terminal: python3 run_marcus.py
Server: python3 -m Server.marcus_server (imports init_brain + process_command)
"""
import json
import os
import re
import time
import sys
PROJECT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
if PROJECT_DIR not in sys.path:
sys.path.insert(0, PROJECT_DIR)
from API.zmq_api import send_vel, gradual_stop, send_cmd
from API.camera_api import start_camera, stop_camera, get_frame
from API.yolo_api import (
init_yolo, yolo_summary, yolo_fps,
yolo_all_classes, yolo_closest, yolo_sees,
)
from API.odometry_api import init_odometry
from API.memory_api import init_memory, log_cmd, log_detection
from API.llava_api import (
OLLAMA_MODEL, ask, ask_talk, call_llava, parse_json, add_to_history,
)
from API.imgsearch_api import init_imgsearch, get_searcher
from Core.config_loader import load_config
from Core.logger import log as _log
from Brain.command_parser import try_local_command, init_autonomous
from Brain.executor import execute, execute_action
from Navigation.goal_nav import navigate_to_goal
from Navigation.patrol import patrol
from Autonomous.marcus_autonomous import AutonomousMode
_cfg = load_config("Brain")
_TALK_PATTERNS = [
# English questions
r"^(?:what|who|where|when|how|why|is|are|do|does|can|tell|describe|explain|show|analyze)\s+",
# English identity/facts
r"^(?:my name is|i am|call me|that is|that person|note that|remember that)\s+",
# English acknowledgements
r"^(?:ok|okay|yes|no|good|nice|great|thanks|thank you|got it|understood|correct)\s*[!.]*$",
# Arabic questions — ماذا ترى / كيف حالك / من أنت / ما اسمك / صف / هل
r"^(?:ماذا|ما\s|كيف|من\s|أين|لماذا|هل|صف|اشرح|وصف|كم)\s*",
# Arabic identity/facts — اسمي / أنا / تذكر
r"^(?:اسمي|أنا\s|تذكر\s|سجل\s|لاحظ\s)",
# Arabic acknowledgements — حسنا / شكرا / ممتاز / صح / مفهوم
r"^(?:حسنا|شكرا|ممتاز|صح|مفهوم|تمام|أحسنت|جيد|نعم|لا)\s*[!.]*$",
]
_NAT_GOAL_RE = re.compile(
r'^(?:keep\s+(?:turn|rotat|spin)\w*\s+\w+\s+until\s+(?:you\s+)?(?:see|find|spot)\s+.+'
r'|stop\s+when\s+(?:you\s+)?(?:see|find|spot)\s+.+'
r'|find\s+(?:a\s+|the\s+|me\s+a\s+)?\w.+'
r'|look\s+for\s+(?:a\s+|the\s+)?\w.+'
r'|search\s+for\s+(?:a\s+|the\s+)?\w.+)$',
re.IGNORECASE
)
# ══════════════════════════════════════════════════════════════════════════════
# INIT — called once by both run_marcus.py and marcus_server.py
# ══════════════════════════════════════════════════════════════════════════════
def init_brain():
"""Initialize all subsystems. Call once at startup."""
raw_frame, raw_lock = start_camera()
init_yolo(raw_frame, raw_lock)
from API.zmq_api import get_socket
init_odometry(zmq_sock=get_socket())
init_memory()
# LiDAR (optional — continues without it)
try:
from API.lidar_api import init_lidar
init_lidar()
except Exception as e:
print(f" [LiDAR] Init failed: {e} — continuing without LiDAR")
init_imgsearch(
get_frame_fn=get_frame,
send_vel_fn=send_vel,
gradual_stop_fn=gradual_stop,
llava_fn=call_llava,
yolo_sees_fn=yolo_sees,
model=OLLAMA_MODEL,
)
# Autonomous exploration mode
from API.memory_api import mem as _mem_ref
from API.llava_api import PATROL_PROMPT
auto = AutonomousMode(
get_frame_fn=get_frame,
send_vel_fn=send_vel,
gradual_stop_fn=gradual_stop,
yolo_sees_fn=yolo_sees,
yolo_summary_fn=yolo_summary,
yolo_all_classes_fn=yolo_all_classes,
yolo_closest_fn=yolo_closest,
odom_fn=lambda: {"x": 0, "y": 0, "heading": 0}, # fallback if no odom
call_llava_fn=call_llava,
patrol_prompt=PATROL_PROMPT,
mem=_mem_ref,
)
# Wire odometry if available
from API.odometry_api import odom as _odom_ref, ODOM_AVAILABLE
if _odom_ref and ODOM_AVAILABLE:
auto._odom_pos = lambda: {
"x": _odom_ref._x, "y": _odom_ref._y, "heading": _odom_ref._heading
}
init_autonomous(auto)
send_cmd("start")
time.sleep(0.5)
send_cmd("walk")
time.sleep(0.5)
# Voice module (optional — continues without it)
_init_voice()
_log("Brain initialized", "info", "brain")
_warmup_llava()
# Global voice references
_audio_api = None
_voice_module = None
def _init_voice():
"""Initialize voice module — runs in background, calls process_command on speech."""
global _audio_api, _voice_module
try:
from API.audio_api import AudioAPI
from Voice.marcus_gemini_voice import GeminiVoiceModule as VoiceModule
_audio_api = AudioAPI()
def _voice_callback(text, role):
"""Gemini voice callback."""
pass # handled below
if role != "user" or not text.strip():
return
t = text.strip().lower()
act_kw = ["turn","move","go","walk","step","stop","come","wave","clap",
"high five","shake","hug","forward","backward","left","right",
"what do you see","what can you see","look","describe","patrol",
"دور","امشي","روح","تقدم","ارجع","وقف","قف","تعال",
"يمين","يسار","قدام","ورا","لوح","صفق","سلم",
"شو شايف","شو تشوف","ماذا ترى","شو قدامك","لف","خطوات"]
if any(kw in t for kw in act_kw):
print(f" [Brain] Action: {text.strip()}")
try:
result = process_command(text.strip())
if isinstance(result, dict):
sp = result.get("speak", "")
vis_kw = ["see","look","describe","شايف","تشوف","ترى","قدامك"]
if any(k in t for k in vis_kw) and sp and _audio_api:
print(f" [Brain] Vision: {sp}")
_audio_api.speak(sp)
except Exception as e:
print(f" [Brain] Error: {e}")
else:
print(f" [Chat] {text.strip()}")
_voice_module = VoiceModule(_audio_api, on_transcript=_voice_callback)
_voice_module.start()
print(f" [Voice] Always listening (Gemini voice)")
except Exception as e:
print(f" [Voice] Init failed: {e} — continuing without voice")
_audio_api = None
_voice_module = None
# ══════════════════════════════════════════════════════════════════════════════
# PROCESS COMMAND — shared by terminal loop and WebSocket server
# ══════════════════════════════════════════════════════════════════════════════
def process_command(cmd: str) -> dict:
"""
Process a single command through the full brain pipeline.
Returns: {"type": str, "speak": str, "action": str, "elapsed": float}
Used by both run_marcus.py (terminal) and marcus_server.py (WebSocket).
"""
cmd = cmd.strip()
if not cmd:
return {"type": "empty", "speak": "", "action": "NONE", "elapsed": 0}
t0 = time.time()
# ── YOLO status ──────────────────────────────────────────────────────
if any(w in cmd.lower() for w in ("yolo", "what does yolo", "vision", "using yolo")):
from API.yolo_api import YOLO_AVAILABLE as _ya
status = "active" if _ya else "not loaded"
speak = f"YOLO: {status} | {yolo_summary()} | {yolo_fps():.1f}fps"
print(f" {speak}")
log_cmd(cmd, speak)
return {"type": "status", "speak": speak, "action": "YOLO", "elapsed": 0}
# ── Image search ─────────────────────────────────────────────────────
if cmd.lower().startswith("search/"):
speak = _handle_search(cmd)
return {"type": "search", "speak": speak, "action": "SEARCH", "elapsed": time.time() - t0}
# ── Auto-detect natural language goals ───────────────────────────────
if _NAT_GOAL_RE.match(cmd) and not cmd.lower().startswith("goal/"):
print(f" [Goal] Auto-detected: '{cmd}'")
navigate_to_goal(cmd.strip())
elapsed = time.time() - t0
log_cmd(cmd, f"Goal navigation: {cmd}", elapsed)
return {"type": "goal", "speak": f"Goal navigation: {cmd}", "action": "GOAL", "elapsed": elapsed}
# ── Explicit goal/ ───────────────────────────────────────────────────
if cmd.lower().startswith("goal/"):
goal = cmd[5:].strip()
if goal:
navigate_to_goal(goal)
elapsed = time.time() - t0
log_cmd(cmd, f"Goal navigation: {goal}", elapsed)
return {"type": "goal", "speak": f"Goal navigation: {goal}", "action": "GOAL", "elapsed": elapsed}
return {"type": "error", "speak": "Usage: goal/ stop when you see a person", "action": "NONE", "elapsed": 0}
# ── Autonomous patrol ────────────────────────────────────────────────
if cmd.lower().startswith("patrol"):
mins = 5.0
if " " in cmd:
try:
mins = float(cmd.split()[-1])
except ValueError:
pass
patrol(duration_minutes=mins)
elapsed = time.time() - t0
log_cmd(cmd, f"Patrol {mins}min", elapsed)
return {"type": "patrol", "speak": f"Patrol {mins}min complete", "action": "PATROL", "elapsed": elapsed}
# ── Local commands (place / odom / memory / help) ────────────────────
if try_local_command(cmd):
log_cmd(cmd, "local command")
return {"type": "local", "speak": "Done", "action": "LOCAL", "elapsed": time.time() - t0}
# ── Talk-only (questions / acknowledgements) ─────────────────────────
if any(re.match(p, cmd, re.IGNORECASE) for p in _TALK_PATTERNS):
speak = _handle_talk(cmd)
return {"type": "talk", "speak": speak, "action": "TALK", "elapsed": time.time() - t0}
# ── Greeting ─────────────────────────────────────────────────────────
if re.match(r"^(?:hi+|hey+|hello+|hola|salam|marhaba|sup|yo+|ahlan|السلام عليكم|مرحبا|أهلا|هلا|يا هلا)\s*[!.]*$", cmd, re.IGNORECASE):
response = "Hello! I am Marcus. How can I help you?"
print(f"Marcus: {response}")
add_to_history(cmd, response)
log_cmd(cmd, response)
return {"type": "greeting", "speak": response, "action": "GREETING", "elapsed": 0}
# ── "Come to me" shortcut ────────────────────────────────────────────
if re.match(r"^(?:come(?:\s+back)?(?:\s+to\s+me)?|come\s+here|get\s+closer|approach|move\s+closer)\s*[!.]*$", cmd, re.IGNORECASE):
execute_action("forward", 2.0)
resp = "Coming to you"
print(f"Marcus: {resp}")
add_to_history(cmd, resp)
log_cmd(cmd, resp)
return {"type": "move", "speak": resp, "action": "FORWARD 2.0s", "elapsed": 2.0}
# ── Multi-step compound ──────────────────────────────────────────────
_multi = re.match(
r"turn\s+(right|left)\s*(\d+)?\s*(?:deg(?:rees?)?)?\s+(?:and\s+then|then|and)?\s+"
r"(?:move\s+|go\s+|walk\s+|step\s+)?(back(?:ward)?|forward)\s*(\d+)?\s*(?:steps?|meter)?",
cmd, re.IGNORECASE)
if _multi:
turn_dir = _multi.group(1).lower()
turn_deg = float(_multi.group(2) or 90)
walk_dir = "backward" if "back" in _multi.group(3).lower() else "forward"
walk_dur = float(_multi.group(4) or 2)
execute_action("right" if turn_dir == "right" else "left", turn_deg / 18.0)
execute_action(walk_dir, walk_dur)
resp = f"Turned {turn_dir} {int(turn_deg)} degrees then moved {walk_dir}"
print(f"Marcus: {resp}")
add_to_history(cmd, resp)
log_cmd(cmd, resp)
return {"type": "move", "speak": resp, "action": f"MULTI {turn_dir}+{walk_dir}", "elapsed": time.time() - t0}
# ── Standard LLaVA command ───────────────────────────────────────────
return _handle_llava(cmd)
# ══════════════════════════════════════════════════════════════════════════════
# HANDLERS (return speak text)
# ══════════════════════════════════════════════════════════════════════════════
def _handle_search(cmd):
args = cmd[7:].strip()
if not args:
print(" Usage: search/ /path/to/photo.jpg [hint]")
return "Usage: search/ <path or hint>"
searcher = get_searcher()
if not searcher:
print(" [Search] Image search not available")
return "Image search not available"
parts = args.split(None, 1)
if parts and os.path.exists(parts[0]):
img_path = parts[0]
hint = parts[1].strip() if len(parts) > 1 else ""
yolo_pre = "person" if not hint or "person" in hint.lower() else None
log_cmd(cmd, f"Image search: {img_path}")
result = searcher.search_from_file(img_path, hint=hint, yolo_prefilter=yolo_pre)
return result.get("description", "Search complete")
hint = args
yolo_pre = "person" if any(w in hint.lower() for w in ("person", "guy", "man", "woman")) else None
log_cmd(cmd, f"Image search: {hint}")
result = searcher.search(ref_img_b64=None, hint=hint, yolo_prefilter=yolo_pre)
return result.get("description", "Search complete")
def _handle_talk(cmd):
print("Thinking...")
try:
img = get_frame()
facts_str = ""
try:
from API.llava_api import _facts
if _facts:
facts_str = "\nKnown facts: " + "; ".join(_facts) + "."
except ImportError:
pass
d = ask_talk(cmd, img, facts=facts_str)
sp = d.get("speak", "")
print(f"Marcus: {sp}")
log_cmd(cmd, sp)
return sp
except Exception as ex:
print(f" Error: {ex}")
return f"Error: {ex}"
def _handle_llava(cmd):
print("Thinking...")
t0 = time.time()
img = get_frame()
if img is None:
print(" Waiting for camera...")
time.sleep(1.0)
img = get_frame()
if img is None:
print(" Camera not ready — command cancelled")
log_cmd(cmd, "camera not ready")
return {"type": "error", "speak": "Camera not ready", "action": "NONE", "elapsed": 0}
d = ask(cmd, img)
dur = time.time() - t0
print(f" ({dur:.1f}s) -> {json.dumps(d)}")
resp = execute(d)
log_cmd(cmd, resp or "", dur)
from API.yolo_api import YOLO_AVAILABLE as _ya
if _ya:
for cls in yolo_all_classes():
det = yolo_closest(cls)
if det:
log_detection(cls, det.position, det.distance_estimate)
action_str = d.get("actions", [{}])[0].get("move", "NONE") if d.get("actions") else "NONE"
return {"type": "decision", "speak": resp or "", "action": action_str.upper(),
"elapsed": dur, "raw": d}
# ══════════════════════════════════════════════════════════════════════════════
# HELPERS
# ══════════════════════════════════════════════════════════════════════════════
def _warmup_llava():
import ollama
print(" Warming up LLaVA... (loading into GPU)")
try:
img = get_frame()
ollama.chat(
model=OLLAMA_MODEL,
messages=[{"role": "user", "content": "hi",
"images": [img] if img else []}],
options={"temperature": 0.0, "num_predict": _cfg["warmup_num_predict"]}
)
print(" LLaVA warm - first command will be fast\n")
except Exception as e:
print(f" Warmup failed ({e}) - first command may be slow\n")
def get_brain_status() -> dict:
"""Return current brain status for server status message."""
from API.yolo_api import YOLO_AVAILABLE as _ya
from API.odometry_api import ODOM_AVAILABLE as _oa
from API.memory_api import MEMORY_AVAILABLE as _ma
from API.camera_api import CAM_WIDTH, CAM_HEIGHT, CAM_FPS
try:
from API.lidar_api import LIDAR_AVAILABLE as _la, get_loc_state
lidar_state = get_loc_state() if _la else "off"
except ImportError:
_la = False
lidar_state = "off"
return {
"model": OLLAMA_MODEL,
"yolo": _ya,
"odometry": _oa,
"memory": _ma,
"lidar": _la,
"lidar_state": lidar_state,
"voice": _voice_module is not None and _voice_module.is_running,
"camera": f"{CAM_WIDTH}x{CAM_HEIGHT}@{CAM_FPS}",
}
def shutdown():
"""Clean shutdown of all subsystems."""
print("\nShutting down Marcus...")
# Stop voice module
if _voice_module and _voice_module.is_running:
_voice_module.stop()
# Stop autonomous mode if running
from Brain.command_parser import _auto
if _auto and _auto.is_enabled():
_auto.disable()
stop_camera()
gradual_stop()
send_cmd("stop")
from API.odometry_api import odom as _o
if _o:
_o.stop()
from API.memory_api import mem as _m
if _m:
_m.end_session()
try:
from API.lidar_api import stop_lidar
stop_lidar()
except Exception:
pass
_log("Marcus stopped", "info", "brain")
print("Marcus stopped.")
# ══════════════════════════════════════════════════════════════════════════════
# TERMINAL MODE — used by run_marcus.py
# ══════════════════════════════════════════════════════════════════════════════
def run_terminal():
"""Run brain with terminal input loop."""
init_brain()
status = get_brain_status()
print()
print("=" * 48)
print(" MARCUS AI BRAIN — READY")
print("=" * 48)
for k, v in status.items():
print(f" {k:<10}: {v}")
print("=" * 48)
print(" help | example | yolo | patrol | auto on/off | q")
print()
try:
while True:
try:
cmd = input("Command: ").strip()
except (EOFError, KeyboardInterrupt):
break
if not cmd:
continue
if cmd.lower() in ("q", "quit", "exit"):
break
if cmd.lower() == "mute/":
import subprocess
subprocess.run(["pactl", "set-source-mute", "0", "1"], capture_output=True)
print(" Mic muted")
continue
if cmd.lower() == "unmute/":
import subprocess
subprocess.run(["pactl", "set-source-mute", "0", "0"], capture_output=True)
print(" Mic unmuted")
continue
result = process_command(cmd)
sp = result.get("speak", "") if isinstance(result, dict) else ""
if sp and _audio_api:
_audio_api.speak(sp)
except KeyboardInterrupt:
pass
shutdown()
if __name__ == "__main__":
run_terminal()

817
Brain/marcus_memory.py Normal file
View File

@ -0,0 +1,817 @@
"""
marcus_memory.py Session & Place Memory
==========================================
Project : Marcus | YS Lootah Technology
Hardware : Unitree G1 EDU + Jetson Orin NX
Purpose : Persistent memory across sessions.
- Place memory: save named robot positions, navigate back
- Session memory: log all commands, detections, alerts per session
- Cross-session recall: "what did you see last session?"
Folder structure
----------------
~/Models_marcus/places.json persistent named places (all sessions)
~/Models_marcus/sessions/
session_001_2026-04-05/
commands.json [{time, cmd, response, duration_s}]
detections.json [{time, class, position, distance, x, y}]
places.json places saved THIS session
alerts.json [{time, type, detail}]
summary.txt auto-generated session summary
Import in marcus_llava.py
-------------------------
from marcus_memory import Memory
mem = Memory() call once at startup
mem.start_session() begins logging
mem.log_command(cmd, response) after every command
mem.log_detection(class_name, pos, dist) from YOLO loop
mem.save_place("door", x, y, heading) when user says "remember this as door"
mem.get_place("door") returns {x, y, heading} or None
mem.list_places() sorted list of place names
mem.delete_place("door") removes place
mem.last_session_summary() text summary of previous session
mem.end_session() saves everything, call on shutdown
Date : April 2026
"""
import os
import json
import time
import re
import threading
import shutil
import difflib
from datetime import datetime
from pathlib import Path
# ══════════════════════════════════════════════════════════════════════════════
# CONFIGURATION
# ══════════════════════════════════════════════════════════════════════════════
_PROJECT_ROOT = Path(__file__).resolve().parent.parent
BASE_DIR = _PROJECT_ROOT / "Data" / "Brain"
SESSIONS_DIR = BASE_DIR / "Sessions"
PLACES_FILE = _PROJECT_ROOT / "Data" / "History" / "Places" / "places.json"
MAX_CMD_LEN = 500 # truncate very long commands
MAX_SESSIONS = 50 # keep last N sessions — older ones auto-deleted
DETECT_DEDUPE = 5.0 # seconds — suppress duplicate YOLO detections
# ══════════════════════════════════════════════════════════════════════════════
# HELPER — SAFE JSON READ/WRITE
# ══════════════════════════════════════════════════════════════════════════════
def _read_json(path: Path, default):
"""
Read JSON file. Returns default if file missing, unreadable, or corrupt.
Backs up corrupt files before resetting.
"""
if not path.exists():
return default
try:
with open(path, "r", encoding="utf-8") as f:
return json.load(f)
except json.JSONDecodeError:
# Back up the corrupt file and return default
backup = path.with_suffix(".bak")
try:
shutil.copy(path, backup)
print(f" [Memory] ⚠️ Corrupt JSON at {path.name} — backed up as {backup.name}")
except Exception:
pass
return default
except OSError as e:
print(f" [Memory] ⚠️ Cannot read {path.name}: {e}")
return default
def _write_json(path: Path, data, lock: threading.Lock = None):
"""
Write JSON atomically using a temp file + rename.
Catches disk-full and permission errors without crashing.
Returns True on success.
"""
def _do_write():
try:
path.parent.mkdir(parents=True, exist_ok=True)
tmp = path.with_suffix(".tmp")
with open(tmp, "w", encoding="utf-8") as f:
json.dump(data, f, indent=2, ensure_ascii=False)
tmp.replace(path) # atomic rename
return True
except OSError as e:
print(f" [Memory] ⚠️ Cannot write {path.name}: {e}")
return False
if lock:
with lock:
return _do_write()
return _do_write()
def _sanitize_name(name: str) -> str:
"""
Clean a place name: lowercase, strip whitespace, remove unsafe chars.
'Server Room!' 'server_room'
"""
name = name.strip().lower()
name = re.sub(r"[^\w\s\-]", "", name) # keep word chars, spaces, hyphens
name = re.sub(r"\s+", "_", name) # spaces → underscores
name = name[:50] # max 50 chars
return name
def _fuzzy_match(query: str, choices: list, n: int = 3) -> list:
"""Return up to n closest matches from choices for query."""
if not choices:
return []
return difflib.get_close_matches(query, choices, n=n, cutoff=0.5)
def _new_session_id(sessions_dir: Path) -> str:
"""Generate next session ID: session_001, session_002..."""
existing = [
d.name for d in sessions_dir.iterdir()
if d.is_dir() and d.name.startswith("session_")
] if sessions_dir.exists() else []
nums = []
for name in existing:
parts = name.split("_")
if len(parts) >= 2 and parts[1].isdigit():
nums.append(int(parts[1]))
next_num = max(nums) + 1 if nums else 1
date_str = datetime.now().strftime("%Y-%m-%d")
return f"session_{next_num:03d}_{date_str}"
# ══════════════════════════════════════════════════════════════════════════════
# MEMORY CLASS
# ══════════════════════════════════════════════════════════════════════════════
class Memory:
"""
Persistent session and place memory for Marcus.
Thread-safe. All write operations use locks.
Saves automatically on shutdown via end_session().
Register with atexit for crash protection.
Usage:
mem = Memory()
mem.start_session()
# ... during operation ...
mem.log_command("turn left", "Turning left")
mem.save_place("door", x=1.2, y=0.5, heading=90.0)
# ... on shutdown ...
mem.end_session()
"""
def __init__(self):
self._places_lock = threading.Lock()
self._session_lock = threading.Lock()
self._session_dir = None
self._session_id = None
self._session_start = None
# In-memory buffers — flushed to disk on end_session + periodically
self._commands = [] # [{time, cmd, response, duration_s}]
self._detections = [] # [{time, class, position, distance, x, y}]
self._alerts = [] # [{time, type, detail}]
# Dedup tracking for YOLO detections
self._last_detection = {} # class → timestamp
# Persistent places (survives all sessions)
self._places = {} # {name: {x, y, heading, saved_at, session}}
# Ensure base dirs exist
BASE_DIR.mkdir(parents=True, exist_ok=True)
SESSIONS_DIR.mkdir(parents=True, exist_ok=True)
# Load persistent places
self._load_places()
# Register auto-save on crash
import atexit
atexit.register(self._emergency_save)
# ── PLACES ────────────────────────────────────────────────────────────────
def _load_places(self):
"""Load places.json from disk into memory."""
data = _read_json(PLACES_FILE, {})
if isinstance(data, dict):
self._places = data
print(f" [Memory] Places loaded: {len(self._places)} locations")
else:
print(" [Memory] ⚠️ places.json has wrong format — resetting")
self._places = {}
def save_place(self, name: str, x: float = None,
y: float = None, heading: float = None) -> bool:
"""
Save current robot position with a name.
Args:
name : human-readable name e.g. "door", "desk_a"
x : robot x position from odometry (None if not available)
y : robot y position from odometry
heading : robot heading in degrees
Returns:
True on success, False on invalid name or write error.
Edge cases handled:
- Empty name rejected
- Name with special chars sanitized
- Duplicate name overwrites with notification
- Odometry not running (x/y/heading all None) saved as landmark only
- Disk full error logged, returns False
"""
# Validate name
if not name or not name.strip():
print(" [Memory] ⚠️ Place name cannot be empty")
return False
clean = _sanitize_name(name)
if not clean:
print(f" [Memory] ⚠️ Place name '{name}' has no valid characters")
return False
# Warn if overwriting
if clean in self._places:
old = self._places[clean]
print(f" [Memory] Overwriting '{clean}' (was saved at {old.get('saved_at','?')})")
# Warn if no odometry
if x is None and y is None:
print(" [Memory] ⚠️ Odometry not running — saving place as landmark only (no coordinates)")
print(" [Memory] 'go to' navigation will not be available for this place")
entry = {
"name": clean,
"x": round(x, 4) if x is not None else None,
"y": round(y, 4) if y is not None else None,
"heading": round(heading, 2) if heading is not None else None,
"has_odom": x is not None,
"saved_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"session": self._session_id or "unknown",
}
with self._places_lock:
self._places[clean] = entry
ok = _write_json(PLACES_FILE, self._places)
if ok:
coord_str = f"x={x:.2f} y={y:.2f} h={heading:.1f}°" if x is not None else "no coordinates"
print(f" [Memory] ✅ Saved place '{clean}'{coord_str}")
return True
return False
def get_place(self, name: str) -> dict:
"""
Get a saved place by name.
Returns:
dict with x, y, heading, has_odom or None if not found.
Suggests similar names if not found.
Edge cases:
- Exact match first
- Case-insensitive match
- Fuzzy match suggestion if no exact match
"""
clean = _sanitize_name(name)
with self._places_lock:
places = dict(self._places)
# Exact match
if clean in places:
return places[clean]
# Case-insensitive match
for k, v in places.items():
if k.lower() == clean.lower():
return v
# Fuzzy suggestions
suggestions = _fuzzy_match(clean, list(places.keys()))
if suggestions:
print(f" [Memory] ⚠️ Place '{name}' not found.")
print(f" [Memory] Did you mean: {', '.join(suggestions)} ?")
else:
print(f" [Memory] ⚠️ Place '{name}' not found.")
if places:
print(f" [Memory] Known places: {', '.join(sorted(places.keys()))}")
else:
print(" [Memory] No places saved yet. Say 'remember this as <name>'")
return None
def delete_place(self, name: str) -> bool:
"""
Remove a saved place.
Returns True if deleted, False if not found.
"""
clean = _sanitize_name(name)
with self._places_lock:
if clean not in self._places:
suggestions = _fuzzy_match(clean, list(self._places.keys()))
if suggestions:
print(f" [Memory] Place '{name}' not found. Did you mean: {', '.join(suggestions)}?")
else:
print(f" [Memory] Place '{name}' not found.")
return False
del self._places[clean]
ok = _write_json(PLACES_FILE, self._places)
if ok:
print(f" [Memory] ✅ Deleted place '{clean}'")
return True
# Restore on write failure
self._load_places()
return False
def list_places(self) -> list:
"""
Return sorted list of place name dicts.
Returns:
[{"name": str, "x": float|None, "y": float|None,
"heading": float|None, "has_odom": bool, "saved_at": str}]
"""
with self._places_lock:
places = list(self._places.values())
return sorted(places, key=lambda p: p.get("name", ""))
def rename_place(self, old_name: str, new_name: str) -> bool:
"""Rename a saved place."""
old_clean = _sanitize_name(old_name)
new_clean = _sanitize_name(new_name)
if not new_clean:
print(f" [Memory] ⚠️ New name '{new_name}' is invalid")
return False
with self._places_lock:
if old_clean not in self._places:
print(f" [Memory] ⚠️ Place '{old_name}' not found")
return False
if new_clean in self._places:
print(f" [Memory] ⚠️ Name '{new_clean}' already exists")
return False
entry = self._places.pop(old_clean)
entry["name"] = new_clean
self._places[new_clean] = entry
ok = _write_json(PLACES_FILE, self._places)
if ok:
print(f" [Memory] ✅ Renamed '{old_clean}''{new_clean}'")
return True
self._load_places()
return False
# ── SESSION ───────────────────────────────────────────────────────────────
def start_session(self):
"""
Begin a new session. Creates session folder, sets session ID.
Edge cases:
- sessions/ doesn't exist → created
- session ID collision (same date, same number) incremented
- Can only call once subsequent calls are no-ops with warning
"""
if self._session_id is not None:
print(f" [Memory] ⚠️ Session already running: {self._session_id}")
return
self._session_id = _new_session_id(SESSIONS_DIR)
self._session_dir = SESSIONS_DIR / self._session_id
self._session_start = time.time()
try:
self._session_dir.mkdir(parents=True, exist_ok=True)
except OSError as e:
print(f" [Memory] ⚠️ Cannot create session dir: {e}")
self._session_id = None
self._session_dir = None
return
# Load previous session summary for context
prev = self._get_previous_session_dir()
if prev:
print(f" [Memory] Previous session: {prev.name}")
else:
print(" [Memory] First session — no previous memory")
print(f" [Memory] ✅ Session started: {self._session_id}")
# Auto-flush every 60 seconds in background
self._start_autosave()
def _start_autosave(self):
"""Background thread — flush session to disk every 60s."""
def _loop():
while self._session_id is not None:
time.sleep(60)
self._flush_session()
threading.Thread(target=_loop, daemon=True).start()
def _flush_session(self):
"""Write current session buffers to disk without closing session."""
if self._session_dir is None:
return
with self._session_lock:
_write_json(self._session_dir / "commands.json", self._commands)
_write_json(self._session_dir / "detections.json", self._detections)
_write_json(self._session_dir / "alerts.json", self._alerts)
# Save copy of places as they were this session
with self._places_lock:
_write_json(self._session_dir / "places.json", self._places)
def log_command(self, cmd: str, response: str = "", duration_s: float = 0.0):
"""
Log a command and its response to the current session.
Args:
cmd : the command string typed/spoken
response : Marcus's spoken response
duration_s : how long LLaVA took to respond
Edge cases:
- No active session logs to memory only, warns
- cmd too long truncated at MAX_CMD_LEN
- Unicode (Arabic) preserved via ensure_ascii=False
- Thread-safe
"""
if not cmd:
return
entry = {
"time": datetime.now().strftime("%H:%M:%S"),
"cmd": cmd[:MAX_CMD_LEN],
"response": response[:MAX_CMD_LEN] if response else "",
"duration_s": round(duration_s, 2),
}
with self._session_lock:
self._commands.append(entry)
if self._session_dir is None:
print(" [Memory] ⚠️ No active session — command logged in memory only")
def log_detection(self, class_name: str, position: str = "",
distance: str = "", x: float = None, y: float = None):
"""
Log a YOLO detection to the current session.
Deduplicates: same class within DETECT_DEDUPE seconds is suppressed.
Args:
class_name : e.g. "person"
position : "left" / "center" / "right"
distance : "close" / "medium" / "far"
x, y : robot position when detection occurred (from odometry)
"""
if not class_name:
return
# Deduplicate — don't log same class repeatedly within dedupe window
now = time.time()
last = self._last_detection.get(class_name, 0)
if now - last < DETECT_DEDUPE:
return
self._last_detection[class_name] = now
entry = {
"time": datetime.now().strftime("%H:%M:%S"),
"class": class_name,
"position": position,
"distance": distance,
"x": round(x, 3) if x is not None else None,
"y": round(y, 3) if y is not None else None,
}
with self._session_lock:
self._detections.append(entry)
def log_alert(self, alert_type: str, detail: str = ""):
"""
Log a PPE or hazard alert to the current session.
Args:
alert_type : "PPE", "Hazard", "obstacle", etc.
detail : e.g. "no helmet (left)", "fire extinguisher missing"
"""
entry = {
"time": datetime.now().strftime("%H:%M:%S"),
"type": alert_type,
"detail": detail[:200],
}
with self._session_lock:
self._alerts.append(entry)
print(f" [Memory] 🚨 Alert logged: {alert_type}{detail}")
def get_last_command(self) -> str:
"""Return the last command typed, or empty string."""
with self._session_lock:
if self._commands:
return self._commands[-1].get("cmd", "")
return ""
def get_last_n_commands(self, n: int = 5) -> list:
"""Return last N command strings for LLaVA context."""
with self._session_lock:
recent = self._commands[-n:] if len(self._commands) >= n else self._commands[:]
return [e.get("cmd", "") for e in recent]
def get_session_detections(self) -> list:
"""Return all YOLO detections this session."""
with self._session_lock:
return list(self._detections)
def end_session(self):
"""
Save everything and close the current session.
Call on clean shutdown or Ctrl+C.
Edge cases:
- No active session no-op
- Disk full logs error but doesn't crash
- Generates summary text file
- Cleans up old sessions if > MAX_SESSIONS
"""
if self._session_id is None:
return
print(f"\n [Memory] Saving session {self._session_id}...")
self._flush_session()
self._write_summary()
self._prune_old_sessions()
elapsed = time.time() - (self._session_start or time.time())
mins = int(elapsed // 60)
secs = int(elapsed % 60)
with self._session_lock:
n_cmds = len(self._commands)
n_dets = len(self._detections)
n_alerts= len(self._alerts)
print(f" [Memory] ✅ Session saved: {self._session_id}")
print(f" [Memory] Duration: {mins}m {secs}s")
print(f" [Memory] Commands: {n_cmds} | Detections: {n_dets} | Alerts: {n_alerts}")
# Reset state
self._session_id = None
self._session_dir = None
def _emergency_save(self):
"""Called by atexit on crash — saves partial session data."""
if self._session_id is None:
return
print("\n [Memory] Emergency save on exit...")
self._flush_session()
def _write_summary(self):
"""Generate a plain text summary of the session."""
if self._session_dir is None:
return
try:
with self._session_lock:
n_cmds = len(self._commands)
n_dets = len(self._detections)
n_alerts = len(self._alerts)
places = list(self._places.keys())
cmds = [e.get("cmd", "") for e in self._commands[:10]]
elapsed = time.time() - (self._session_start or time.time())
lines = [
f"Session: {self._session_id}",
f"Date: {datetime.now().strftime('%Y-%m-%d %H:%M')}",
f"Duration: {int(elapsed//60)}m {int(elapsed%60)}s",
f"Commands: {n_cmds}",
f"YOLO detections: {n_dets}",
f"Alerts: {n_alerts}",
f"Known places: {', '.join(places) if places else 'none'}",
"",
"First commands:",
] + [f" - {c}" for c in cmds]
with open(self._session_dir / "summary.txt", "w", encoding="utf-8") as f:
f.write("\n".join(lines))
except Exception as e:
print(f" [Memory] ⚠️ Summary write failed: {e}")
def _prune_old_sessions(self):
"""Delete oldest sessions if total exceeds MAX_SESSIONS."""
try:
sessions = sorted(
[d for d in SESSIONS_DIR.iterdir() if d.is_dir()],
key=lambda d: d.stat().st_mtime
)
while len(sessions) > MAX_SESSIONS:
oldest = sessions.pop(0)
shutil.rmtree(oldest, ignore_errors=True)
print(f" [Memory] Pruned old session: {oldest.name}")
except Exception as e:
print(f" [Memory] ⚠️ Session pruning failed: {e}")
# ── PREVIOUS SESSION RECALL ───────────────────────────────────────────────
def _get_previous_session_dir(self) -> Path:
"""Return the most recent session dir that is NOT the current session."""
try:
dirs = sorted(
[d for d in SESSIONS_DIR.iterdir()
if d.is_dir() and d.name != self._session_id],
key=lambda d: d.stat().st_mtime,
reverse=True
)
return dirs[0] if dirs else None
except Exception:
return None
def last_session_summary(self) -> str:
"""
Return text summary of the most recent previous session.
Used when user says 'what did you do last session?'
Edge cases:
- No previous session informative message
- Summary file missing reconstruct from commands.json
- Commands file corrupt return partial info
"""
prev = self._get_previous_session_dir()
if prev is None:
return "No previous session found. This appears to be the first session."
# Try summary.txt first
summary_file = prev / "summary.txt"
if summary_file.exists():
try:
return summary_file.read_text(encoding="utf-8")
except Exception:
pass
# Reconstruct from raw data
cmds = _read_json(prev / "commands.json", [])
dets = _read_json(prev / "detections.json", [])
pls = _read_json(prev / "places.json", {})
lines = [
f"Previous session: {prev.name}",
f"Commands logged: {len(cmds)}",
f"Objects detected: {len(dets)}",
f"Places saved: {', '.join(pls.keys()) if pls else 'none'}",
]
if cmds:
lines.append("Commands included:")
for c in cmds[:10]:
lines.append(f" - {c.get('cmd','')}")
return "\n".join(lines)
def previous_session_detections(self) -> list:
"""
Return list of unique object classes seen in previous session.
Used for 'what objects did you see last time?'
"""
prev = self._get_previous_session_dir()
if prev is None:
return []
dets = _read_json(prev / "detections.json", [])
if not isinstance(dets, list):
return []
return list({d.get("class", "") for d in dets if d.get("class")})
def previous_session_places(self) -> dict:
"""
Return places saved in the previous session.
"""
prev = self._get_previous_session_dir()
if prev is None:
return {}
data = _read_json(prev / "places.json", {})
return data if isinstance(data, dict) else {}
def all_sessions(self) -> list:
"""
Return list of all session IDs with basic stats.
"""
result = []
try:
for d in sorted(SESSIONS_DIR.iterdir(), reverse=True):
if not d.is_dir():
continue
cmds = _read_json(d / "commands.json", [])
result.append({
"id": d.name,
"commands": len(cmds) if isinstance(cmds, list) else 0,
"date": "_".join(d.name.split("_")[2:]) if "_" in d.name else "",
})
except Exception:
pass
return result
# ── QUICK LOOKUPS ─────────────────────────────────────────────────────────
def session_duration_str(self) -> str:
"""Return human-readable session duration e.g. '14m 22s'."""
if self._session_start is None:
return "0m 0s"
elapsed = time.time() - self._session_start
return f"{int(elapsed//60)}m {int(elapsed%60)}s"
def places_count(self) -> int:
with self._places_lock:
return len(self._places)
def commands_count(self) -> int:
with self._session_lock:
return len(self._commands)
def __repr__(self):
return (f"Memory(session={self._session_id}, "
f"places={self.places_count()}, "
f"commands={self.commands_count()})")
# ══════════════════════════════════════════════════════════════════════════════
# STANDALONE TEST
# ══════════════════════════════════════════════════════════════════════════════
if __name__ == "__main__":
print("Marcus Memory — Standalone Test")
print("=================================\n")
mem = Memory()
mem.start_session()
print("\n--- Place memory ---")
mem.save_place("door", x=0.0, y=0.0, heading=0.0)
mem.save_place("desk a", x=1.5, y=0.3, heading=45.0)
mem.save_place("window", x=3.0, y=0.0, heading=180.0)
mem.save_place("", x=0.0, y=0.0, heading=0.0) # empty name → rejected
mem.save_place("desk a", x=1.6, y=0.4, heading=50.0) # duplicate → overwrite
print("\n--- Get place ---")
p = mem.get_place("door")
print(f"door → {p}")
p = mem.get_place("dooor") # typo → fuzzy suggestion
p = mem.get_place("nonexistent") # not found + list all
print("\n--- List places ---")
for pl in mem.list_places():
odom = f"x={pl['x']} y={pl['y']}" if pl['has_odom'] else "no odom"
print(f" {pl['name']:20} {odom}")
print("\n--- Command logging ---")
mem.log_command("turn left", "Turning left", duration_s=6.2)
mem.log_command("what do you see", "I see a person at a desk", duration_s=7.1)
mem.log_command("go to door", "Navigating to door", duration_s=0.1)
print(f"Last command: '{mem.get_last_command()}'")
print(f"Last 3: {mem.get_last_n_commands(3)}")
print("\n--- Detection logging ---")
mem.log_detection("person", "center", "close", x=1.5, y=0.3)
mem.log_detection("person", "center", "close", x=1.5, y=0.3) # dedupe → suppressed
mem.log_detection("chair", "right", "medium", x=1.5, y=0.3)
print("\n--- Alert logging ---")
mem.log_alert("PPE", "no helmet (left)")
mem.log_alert("Hazard", "fire extinguisher missing")
print("\n--- Previous session ---")
print(mem.last_session_summary())
print("\n--- All sessions ---")
for s in mem.all_sessions():
print(f" {s['id']:40} {s['commands']} commands")
print(f"\n--- Session duration: {mem.session_duration_str()} ---")
print(f"--- {repr(mem)} ---\n")
print("\n--- Rename place ---")
mem.rename_place("window", "window_north")
print("\n--- Delete place ---")
mem.delete_place("door")
mem.delete_place("nonexistent")
print("\n--- End session ---")
mem.end_session()
print("\nDone.")

0
Bridge/__init__.py Normal file
View File

66
Bridge/ros2_zmq_bridge.py Normal file
View File

@ -0,0 +1,66 @@
"""
ROS2 ZMQ bridge
Subscribes to /cmd_vel and holosoma/other_input (Python 3.8 + ROS2 Foxy)
Forwards to Holosoma via ZMQ PUB socket (Python 3.10 hsinference)
Run: source /opt/ros/foxy/setup.bash
python3.8 ~/Models_marcus/ros2_zmq_bridge.py
"""
import json, time
import rclpy
from rclpy.node import Node
from geometry_msgs.msg import TwistStamped
from std_msgs.msg import String
import zmq
ZMQ_PORT = 5556
class ROS2ZMQBridge(Node):
def __init__(self):
super().__init__('marcus_zmq_bridge')
# ZMQ PUB socket
self._ctx = zmq.Context()
self._sock = self._ctx.socket(zmq.PUB)
self._sock.bind(f"tcp://127.0.0.1:{ZMQ_PORT}")
time.sleep(0.3) # let subscribers connect
# ROS2 subscribers
self.create_subscription(
TwistStamped, 'cmd_vel', self._vel_cb, 10)
self.create_subscription(
String, 'holosoma/other_input', self._cmd_cb, 10)
self.get_logger().info(
f"ROS2→ZMQ bridge ready on tcp://127.0.0.1:{ZMQ_PORT}")
def _vel_cb(self, msg: TwistStamped):
data = {"vel": {
"vx": msg.twist.linear.x,
"vy": msg.twist.linear.y,
"vyaw": msg.twist.angular.z,
}}
self._sock.send_string(json.dumps(data))
self.get_logger().info(
f"Vel → vx={data['vel']['vx']:.2f} "
f"vy={data['vel']['vy']:.2f} "
f"vyaw={data['vel']['vyaw']:.2f}")
def _cmd_cb(self, msg: String):
data = {"cmd": msg.data}
self._sock.send_string(json.dumps(data))
self.get_logger().info(f"Cmd → {msg.data}")
def main():
rclpy.init()
node = ROS2ZMQBridge()
try:
rclpy.spin(node)
except KeyboardInterrupt:
pass
finally:
node.destroy_node()
rclpy.shutdown()
if __name__ == "__main__":
main()

288
Client/marcus_cli.py Normal file
View File

@ -0,0 +1,288 @@
"""
marcus_cli.py Marcus CLI Client
===================================
Connect to Marcus server via WebSocket from any terminal.
Prompts for IP and port on startup, then provides a command interface.
Start: python3 Client/marcus_cli.py
OR: python3 Client/marcus_cli.py --ip 192.168.123.164 --port 8765
"""
import asyncio
import argparse
import json
import os
import sys
import time
PROJECT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
if PROJECT_DIR not in sys.path:
sys.path.insert(0, PROJECT_DIR)
try:
import websockets
except ImportError:
print("Missing dependency: pip install websockets")
sys.exit(1)
from Core.config_loader import load_config
_net = load_config("Network")
DEFAULT_IP = _net.get("jetson_ip", "192.168.123.164")
DEFAULT_PORT = _net.get("websocket_port", 8765)
# ── COLORS ───────────────────────────────────────────────────────────────────
class C:
RESET = "\033[0m"
BOLD = "\033[1m"
GREEN = "\033[92m"
RED = "\033[91m"
YELLOW = "\033[93m"
CYAN = "\033[96m"
GRAY = "\033[90m"
ORANGE = "\033[38;5;208m"
def _ts():
return time.strftime("%H:%M:%S")
# ── CONNECTION ───────────────────────────────────────────────────────────────
async def connect_and_run(ip: str, port: int):
url = f"ws://{ip}:{port}"
print(f"\n{C.CYAN}Connecting to {url}...{C.RESET}")
try:
async with websockets.connect(url, ping_interval=20, ping_timeout=10) as ws:
print(f"{C.GREEN}Connected to Marcus server{C.RESET}\n")
# Start receiver task
receiver = asyncio.create_task(_receive_loop(ws))
# Input loop
try:
await _input_loop(ws)
except (EOFError, KeyboardInterrupt):
pass
finally:
receiver.cancel()
except ConnectionRefusedError:
print(f"{C.RED}Connection refused — is the server running on {ip}:{port}?{C.RESET}")
except OSError as e:
print(f"{C.RED}Network error: {e}{C.RESET}")
except Exception as e:
print(f"{C.RED}Connection failed: {e}{C.RESET}")
async def _receive_loop(ws):
"""Background task — receives and displays server messages."""
try:
async for raw in ws:
try:
data = json.loads(raw)
_handle_message(data)
except json.JSONDecodeError:
print(f"{C.GRAY}[?] {raw[:80]}{C.RESET}")
except websockets.exceptions.ConnectionClosed:
print(f"\n{C.RED}Disconnected from server{C.RESET}")
except asyncio.CancelledError:
pass
async def _input_loop(ws):
"""Main input loop — sends commands to server."""
_print_help()
while True:
try:
cmd = await asyncio.get_event_loop().run_in_executor(
None, lambda: input(f"{C.ORANGE}Command: {C.RESET}").strip()
)
except (EOFError, KeyboardInterrupt):
print(f"\n{C.GRAY}Disconnecting...{C.RESET}")
break
if not cmd:
continue
lower = cmd.lower()
if lower in ("q", "quit", "exit"):
break
elif lower == "help":
_print_help()
continue
elif lower == "status":
await ws.send(json.dumps({"type": "ping"}))
continue
elif lower == "camera":
await ws.send(json.dumps({"type": "get_camera"}))
continue
elif lower.startswith("profile "):
profile = cmd[8:].strip()
await ws.send(json.dumps({"type": "set_camera", "profile": profile}))
continue
elif lower == "capture":
await ws.send(json.dumps({"type": "capture"}))
continue
elif lower == "log":
print(f"{C.GRAY} Server-side nav log not available from CLI{C.RESET}")
continue
else:
# Send as navigation command
await ws.send(json.dumps({"type": "command", "command": cmd}))
# ── MESSAGE DISPLAY ──────────────────────────────────────────────────────────
def _handle_message(data):
t = data.get("type", "")
if t == "frame":
# Suppress frame data in CLI (no GUI)
return
elif t == "status":
lidar = "ALIVE" if data.get("lidar") else "OFFLINE"
model = data.get("model", "?")
camera = data.get("camera", "?")
yolo = data.get("yolo", False)
odom = data.get("odometry", False)
memory = data.get("memory", False)
print(f"{C.GREEN} Server ready{C.RESET}")
print(f" Model : {C.CYAN}{model}{C.RESET}")
print(f" YOLO : {C.GREEN if yolo else C.GRAY}{'active' if yolo else 'off'}{C.RESET}")
print(f" Odometry : {C.GREEN if odom else C.GRAY}{'active' if odom else 'off'}{C.RESET}")
print(f" Memory : {C.GREEN if memory else C.GRAY}{'active' if memory else 'off'}{C.RESET}")
print(f" LiDAR : {C.GREEN if lidar == 'ALIVE' else C.RED}{lidar}{C.RESET}")
print(f" Camera : {camera}")
msg = data.get("message", "")
if msg:
print(f" {C.GRAY}{msg}{C.RESET}")
print()
elif t == "thinking":
cmd = data.get("command", "")
print(f"{C.YELLOW} Thinking... ({cmd}){C.RESET}")
elif t == "decision":
action = data.get("action", "?")
speak = data.get("speak", "")
elapsed = data.get("elapsed", "?")
cmd = data.get("cmd", "?")
ts = data.get("timestamp", "")
btype = data.get("brain_type", "")
color = (C.GREEN if cmd == "FORWARD"
else C.CYAN if cmd in ("LEFT", "RIGHT", "MULTI", "GOAL")
else C.ORANGE if cmd in ("GREETING", "TALK", "LOCAL")
else C.RED if cmd in ("STOP", "NONE")
else C.GRAY)
print(f" [{ts}] {color}{C.BOLD}{action}{C.RESET} {C.GRAY}({elapsed}s){C.RESET}")
if speak:
print(f" {C.CYAN}Marcus: {speak}{C.RESET}")
elif t == "camera_config":
p = data.get("profile", "?")
w, h, f = data.get("width", "?"), data.get("height", "?"), data.get("fps", "?")
active = data.get("pipeline_active", False)
note = data.get("note", "")
print(f" Camera: {p} ({w}x{h}@{f}Hz) pipeline={'active' if active else 'stopped'}")
if note:
print(f" {C.GRAY}{note}{C.RESET}")
elif t == "capture_result":
if data.get("ok"):
size_kb = len(data.get("data", "")) * 3 // 4 // 1024
ts = data.get("timestamp", "")
print(f"{C.GREEN} Captured frame ({size_kb}KB) at {ts}{C.RESET}")
else:
print(f"{C.RED} Capture failed: {data.get('message', '?')}{C.RESET}")
elif t == "pong":
lidar = "OK" if data.get("lidar") else "OFFLINE"
ts = data.get("timestamp", "")
print(f" [{ts}] Status — LiDAR: {lidar}")
elif t == "error":
print(f"{C.RED} ERROR: {data.get('message', '?')}{C.RESET}")
else:
print(f"{C.GRAY} [{t}] {json.dumps(data)[:100]}{C.RESET}")
def _print_help():
print(f"""
{C.BOLD} MARCUS CLI CLIENT{C.RESET}
{'' * 40}
{C.CYAN}Navigation commands:{C.RESET}
move forward turn left stop
walk forward turn right 90 halt
{C.CYAN}System commands:{C.RESET}
status ping server + LiDAR
camera get camera status
profile <name> switch camera (low/medium/high/full)
capture take a photo
help this menu
q disconnect
""")
# ── MAIN ─────────────────────────────────────────────────────────────────────
def main():
parser = argparse.ArgumentParser(description="Marcus CLI Client")
parser.add_argument("--ip", default=None, help="Server IP address")
parser.add_argument("--port", type=int, default=None, help="Server port")
args = parser.parse_args()
eth_ip = _net.get("jetson_ip", "192.168.123.164")
wlan_ip = _net.get("jetson_wlan_ip", "10.255.254.86")
print(f"\n{C.BOLD}{C.ORANGE} MARCUS — CLI Client{C.RESET}")
print(f" {'' * 40}")
print(f" {C.GRAY}Connection options:{C.RESET}")
print(f" 1) eth0 — {eth_ip}:{DEFAULT_PORT}")
print(f" 2) wlan0 — {wlan_ip}:{DEFAULT_PORT}")
print(f" 3) custom")
print()
if args.ip:
ip = args.ip
else:
choice = input(f" Choose [1/2/3] or IP [{eth_ip}]: ").strip()
if choice == "1" or not choice:
ip = eth_ip
elif choice == "2":
ip = wlan_ip
elif choice == "3":
ip = input(f" Server IP: ").strip() or eth_ip
else:
# User typed an IP directly
ip = choice
if args.port:
port = args.port
else:
port_str = input(f" Port [{DEFAULT_PORT}]: ").strip()
port = int(port_str) if port_str else DEFAULT_PORT
try:
asyncio.run(connect_and_run(ip, port))
except KeyboardInterrupt:
print(f"\n{C.GRAY}Bye.{C.RESET}")
if __name__ == "__main__":
main()

1021
Client/marcus_client.py Normal file

File diff suppressed because it is too large Load Diff

14
Config/config_Arm.json Normal file
View File

@ -0,0 +1,14 @@
{
"arm_available": false,
"actions": {
"release": 99, "two_kiss": 11, "left_kiss": 12, "right_kiss": 13,
"hands_up": 15, "clap": 17, "high_five": 18, "hug": 19,
"heart": 20, "right_heart": 21, "reject": 22, "right_up": 23,
"xray": 24, "face_wave": 25, "high_wave": 26, "shake_hand": 27
},
"aliases": {
"wave": 26, "raise_right": 23, "raise_left": 15, "both_up": 15,
"lower": 99, "clap": 17, "high_five": 18, "hug": 19,
"heart": 20, "reject": 22, "shake_hand": 27, "face_wave": 25, "right_heart": 21
}
}

15
Config/config_Brain.json Normal file
View File

@ -0,0 +1,15 @@
{
"ollama_model": "qwen2.5vl:3b",
"max_history": 6,
"num_batch": 128,
"num_ctx": 2048,
"num_predict_main": 200,
"num_predict_goal": 80,
"num_predict_patrol": 100,
"num_predict_talk": 80,
"num_predict_verify": 10,
"warmup_num_predict": 5,
"main_prompt": "You are Marcus, a humanoid robot. Look at the image and follow the command.\n{facts}\n\nCommand: \"{command}\"\n\nReply with ONLY this JSON — no markdown, no explanation:\n{{\"actions\":[{{\"move\":\"forward|backward|left|right|stop\",\"duration\":2.0}}],\"arm\":null,\"speak\":\"one sentence\",\"abort\":null}}\n\nRULES:\n- actions is a list of movement steps, max duration 5.0s each\n- move: \"forward\" \"backward\" \"left\" \"right\" \"stop\"\n- arm: \"wave\" \"raise_right\" \"raise_left\" \"clap\" \"high_five\" \"hug\" \"heart\" \"shake_hand\" \"face_wave\" or null\n- arm is NEVER a move value\n- questions/descriptions: actions=[]\n- obstacle < 0.5m: abort = \"obstacle detected\"\n- \"90 degrees\" = 5.0s | \"45 degrees\" = 2.5s | \"1 step\" = 1.0s\n\nEXAMPLES:\n\"turn right\" -> {{\"actions\":[{{\"move\":\"right\",\"duration\":2.0}}],\"arm\":null,\"speak\":\"Turning right\",\"abort\":null}}\n\"turn right 90 degrees\" -> {{\"actions\":[{{\"move\":\"right\",\"duration\":5.0}}],\"arm\":null,\"speak\":\"Turning 90 degrees\",\"abort\":null}}\n\"move back then left\" -> {{\"actions\":[{{\"move\":\"backward\",\"duration\":2.0}},{{\"move\":\"left\",\"duration\":2.0}}],\"arm\":null,\"speak\":\"Moving back then left\",\"abort\":null}}\n\"wave\" -> {{\"actions\":[],\"arm\":\"wave\",\"speak\":\"Waving\",\"abort\":null}}\n\"raise your right arm\" -> {{\"actions\":[],\"arm\":\"raise_right\",\"speak\":\"Raising right arm\",\"abort\":null}}\n\"walk forward and wave\" -> {{\"actions\":[{{\"move\":\"forward\",\"duration\":2.0}}],\"arm\":\"wave\",\"speak\":\"Walking and waving\",\"abort\":null}}\n\"what do you see\" -> {{\"actions\":[],\"arm\":null,\"speak\":\"I see...\",\"abort\":null}}\n\"stop\" -> {{\"actions\":[{{\"move\":\"stop\",\"duration\":0}}],\"arm\":null,\"speak\":\"Stopping\",\"abort\":null}}\n\"come to me\" -> {{\"actions\":[{{\"move\":\"forward\",\"duration\":2.0}}],\"arm\":null,\"speak\":\"Coming to you\",\"abort\":null}}\n\"come back\" -> {{\"actions\":[{{\"move\":\"forward\",\"duration\":2.0}}],\"arm\":null,\"speak\":\"Coming back\",\"abort\":null}}\n\"come here\" -> {{\"actions\":[{{\"move\":\"forward\",\"duration\":2.0}}],\"arm\":null,\"speak\":\"Coming\",\"abort\":null}}\n\"get closer\" -> {{\"actions\":[{{\"move\":\"forward\",\"duration\":1.0}}],\"arm\":null,\"speak\":\"Moving closer\",\"abort\":null}}\n\"go away\" -> {{\"actions\":[{{\"move\":\"backward\",\"duration\":2.0}}],\"arm\":null,\"speak\":\"Moving away\",\"abort\":null}}\n\nCommand: \"{command}\"\nJSON:",
"goal_prompt": "You are Marcus navigating toward a goal.\n\nGOAL: \"{goal}\"\n\nLook at the image. Have you reached the goal?\n\nReply ONLY this JSON:\n{{\"reached\":false,\"next_move\":\"left\",\"duration\":0.5,\"speak\":\"what you see\"}}\n\nRULES:\n- reached: true ONLY if you clearly see the goal target right now\n- next_move: \"forward\" \"left\" \"right\"\n- duration: 0.3 to 0.8 seconds\n- Default next_move: \"left\" to keep scanning\n\nGOAL: \"{goal}\"\nJSON:",
"patrol_prompt": "You are Marcus, an HSE inspection robot on autonomous patrol.\n\nLook at the camera and assess the scene.\n\nReply ONLY this JSON:\n{{\"observation\":\"one sentence\",\"alert\":null,\"next_move\":\"forward\",\"duration\":1.0}}\n\nRULES:\n- alert = null if safe\n- alert = \"PPE: no helmet\" if person without helmet\n- alert = \"PPE: no vest\" if person without safety vest\n- alert = \"Hazard: description\" for other hazards\n- next_move: \"forward\" \"left\" \"right\"\n- duration: 0.5 to 2.0s\n\nJSON:"
}

View File

@ -0,0 +1,9 @@
{
"width": 424,
"height": 240,
"fps": 15,
"jpeg_quality": 70,
"timeout_ms": 3000,
"stale_threshold_s": 10.0,
"reconnect_delay_s": 2.0
}

View File

@ -0,0 +1,6 @@
{
"default_max_steps": 60,
"step_delay_s": 0.4,
"rotate_speed": 0.25,
"min_steps_warmup": 3
}

10
Config/config_LiDAR.json Normal file
View File

@ -0,0 +1,10 @@
{
"enabled": true,
"slam_source_dir": "Lidar",
"stop_radius_m": 0.50,
"warn_radius_m": 1.00,
"poll_hz": 20,
"auto_connect": true,
"auto_start_mapping": false,
"start_localize_only": false
}

View File

@ -0,0 +1,8 @@
{
"base_dir": "Data",
"sessions_dir": "Data/Sessions",
"places_file": "Data/Places/places.json",
"max_cmd_len": 500,
"max_sessions": 50,
"detect_dedupe_s": 5.0
}

View File

@ -0,0 +1,25 @@
{
"max_steps": 40,
"min_steps_before_check": 3,
"scan_interval_s": 0.4,
"rotation_speed": 0.3,
"move_map": {
"forward": [0.3, 0.0, 0.0],
"backward": [-0.2, 0.0, 0.0],
"left": [0.0, 0.0, 0.3],
"right": [0.0, 0.0, -0.3]
},
"goal_aliases": {
"guy": "person", "man": "person", "woman": "person",
"human": "person", "people": "person", "someone": "person",
"table": "dining table", "sofa": "couch",
"phone": "cell phone",
"mobile": "cell phone"
},
"yolo_goal_classes": [
"person", "chair", "couch", "bed", "dining table", "bottle", "cup",
"laptop", "keyboard", "mouse", "backpack", "handbag", "suitcase",
"car", "truck", "motorcycle", "bicycle",
"cell phone"
]
}

View File

@ -0,0 +1,8 @@
{
"jetson_ip": "192.168.123.164",
"jetson_wlan_ip": "10.255.254.86",
"workstation_ip": "192.168.123.222",
"lidar_ip": "192.168.123.120",
"websocket_port": 8765,
"zmq_port": 5556
}

View File

@ -0,0 +1,12 @@
{
"zmq_host": "127.0.0.1",
"zmq_port": 5556,
"ros2_odom_topic": "/dog_odom",
"odom_interface": "eth0",
"default_walk_speed": 0.25,
"default_turn_speed": 0.25,
"dist_tolerance": 0.05,
"angle_tolerance": 2.0,
"safety_timeout_mult": 3.0,
"dr_update_hz": 20
}

View File

@ -0,0 +1,5 @@
{
"default_duration_minutes": 5.0,
"proximity_threshold": 0.3,
"proximity_pause_s": 2.0
}

16
Config/config_Vision.json Normal file
View File

@ -0,0 +1,16 @@
{
"yolo_model_path": "Models/yolov8m.pt",
"yolo_confidence": 0.45,
"yolo_iou": 0.45,
"yolo_device": "cuda",
"yolo_half": true,
"yolo_img_size": 320,
"tracked_classes": [
"person", "chair", "couch", "bed", "dining table",
"bottle", "cup", "laptop", "keyboard", "mouse",
"backpack", "handbag", "suitcase",
"car", "truck", "motorcycle", "bicycle",
"fire hydrant", "stop sign"
],
"ppe_violation_classes": ["no-helmet", "no_helmet", "no-vest", "no_vest"]
}

55
Config/config_Voice.json Normal file
View File

@ -0,0 +1,55 @@
{
"tts": {
"piper_voice_ar": "ar_JO-kareem-medium",
"piper_voice_en": "en_US-lessac-medium",
"piper_sample_rate": 22050,
"builtin_speaker_id": 0,
"target_sample_rate": 16000,
"piper_timeout_sec": 120,
"en_backend": "edge_tts",
"ar_backend": "edge_tts",
"edge_voice_ar": "ar-AE-HamdanNeural",
"edge_voice_en": "en-US-GuyNeural"
},
"stt": {
"wake_model": "tiny",
"command_model": "small",
"wake_words_en": [
"marcus",
"marcos",
"markus"
],
"wake_words_ar": [
"ماركوس",
"مارکوس",
"ماركس"
],
"language": null,
"command_timeout_sec": 10,
"silence_threshold": 500,
"silence_duration_sec": 1.5,
"max_record_sec": 15
},
"mic": {
"source_index": "3",
"format": "s16le",
"rate": 16000,
"channels": 1
},
"speaker": {
"dds_interface": "eth0",
"volume": 100,
"app_name": "marcus"
},
"audio": {
"data_dir": "Data/Voice/Recordings",
"log_file": "logs/voice.log"
},
"messages": {
"wake_heard": "Listening...",
"no_speech": "I didn't catch that. Say my name again.",
"error_tts": "Speech synthesis failed",
"error_mic": "Microphone error",
"ready": "Voice system ready"
}
}

7
Config/config_ZMQ.json Normal file
View File

@ -0,0 +1,7 @@
{
"zmq_host": "127.0.0.1",
"zmq_port": 5556,
"stop_iterations": 20,
"stop_delay": 0.05,
"step_pause": 0.3
}

683
Config/marcus_prompts.yaml Normal file
View File

@ -0,0 +1,683 @@
# marcus_prompts.yaml — Marcus AI Prompts
# =========================================
# Project : Marcus | YS Lootah Technology
# Hardware : Unitree G1 EDU + Jetson Orin NX
# Model : Qwen2.5-VL 3B (Ollama, fully offline)
#
# Placeholders:
# {command} — the user's typed/spoken command (Arabic or English)
# {goal} — the navigation goal description
# {facts} — known facts from memory (e.g. "Kassam is the programmer")
# {target} — YOLO class being searched (e.g. "person")
# {condition} — extra condition for verify prompt (e.g. "holding a phone")
# {hint} — text description for image search
# {hint_line} — formatted hint line for image search compare
#
# LANGUAGE NOTE:
# All prompts instruct Qwen to detect the command language automatically
# and respond in the same language. No code-side language detection needed.
# Arabic and English are handled natively by the model.
# =============================================================================
# ══════════════════════════════════════════════════════════════════════════════
# MAIN PROMPT
# Used for: all standard commands (movement, arms, vision, questions)
# ══════════════════════════════════════════════════════════════════════════════
main_prompt: |
You are Marcus — an advanced humanoid robot assistant built by YS Lootah Technology, Dubai.
You are physically present in the room. You have a body, arms, and a camera.
You follow commands from your operator and respond intelligently.
{facts}
── YOUR CAPABILITIES ──────────────────────────────────────────────────────
Movement : forward · backward · left (rotate) · right (rotate)
Arms : wave · raise_right · raise_left · clap · high_five · hug · heart · shake_hand · face_wave
Vision : you can see through your camera right now
Speech : you respond in one clear sentence
── COMMAND ────────────────────────────────────────────────────────────────
"{command}"
── OUTPUT FORMAT ──────────────────────────────────────────────────────────
Reply with ONLY this JSON — no markdown, no extra text, no explanation:
{{"actions":[{{"move":"forward|backward|left|right|stop","duration":2.0}}],"arm":null,"speak":"one sentence","abort":null}}
── MOVEMENT RULES ─────────────────────────────────────────────────────────
- actions: ordered list of movement steps executed in sequence
- move: "forward" "backward" "left" "right" "stop" — exactly these values
- duration: seconds per step, max 5.0s (chain steps for longer movements)
- Merge consecutive same-direction steps into one:
"forward 2s + forward 2s" → "forward 4s" — NOT two separate steps
- Duration reference:
"1 step" / "خطوة" = 1.0s
"tiny step" / "خطوة صغيرة" = 0.3s
"half a step" = 0.5s
"2 steps" / "خطوتين" = 2.0s
"3 steps" / "ثلاث خطوات" = 3.0s
"45 degrees" / "٤٥ درجة" = 2.5s
"90 degrees" / "٩٠ درجة" = 5.0s
"180 degrees" / "استدر" = 10.0s
- Speed modifiers:
"slowly" / "ببطء" / "بهدوء" → multiply duration by 0.5
"quickly" / "fast" / "بسرعة" → multiply duration by 1.5 (cap at 5.0s)
── ARM RULES ──────────────────────────────────────────────────────────────
- arm: one value from the list above, or null
- arm runs AFTER all movement steps complete — never inside actions list
- One arm action per command maximum
- arm = null when no gesture is needed
── SPEAK RULES ────────────────────────────────────────────────────────────
- speak: one sentence, first person, natural
- Describe what you are doing OR what you see — never both in one sentence
- For pure movement: "Turning right" / "أدور لليمين"
- For vision questions: describe what the camera shows
- Never repeat the command word-for-word
- CRITICAL: match the language of the command exactly
Arabic command → Arabic speak
English command → English speak
── SAFETY RULES ───────────────────────────────────────────────────────────
- abort = null for all normal commands
- abort = "obstacle detected" if camera shows obstacle closer than 0.5m
- abort = "unsafe command" if the command could damage the robot or people
- abort = "cannot comply" if physically impossible
- When aborting: actions = [] and explain in speak
── CONTEXT RULES ──────────────────────────────────────────────────────────
- "that person" / "him" / "her" / "ذلك الشخص" → resolve from conversation or camera
- "it" / "there" / "هناك" → resolve from last command context
- If ambiguous → choose the most reasonable safe interpretation
══ ENGLISH EXAMPLES ═══════════════════════════════════════════════════════
Basic movement:
"turn right"
→ {{"actions":[{{"move":"right","duration":2.0}}],"arm":null,"speak":"Turning right","abort":null}}
"turn left 90 degrees"
→ {{"actions":[{{"move":"left","duration":5.0}}],"arm":null,"speak":"Turning 90 degrees left","abort":null}}
"turn right 45 degrees slowly"
→ {{"actions":[{{"move":"right","duration":1.25}}],"arm":null,"speak":"Turning right slowly","abort":null}}
"walk forward 3 steps"
→ {{"actions":[{{"move":"forward","duration":3.0}}],"arm":null,"speak":"Walking forward 3 steps","abort":null}}
"spin around"
→ {{"actions":[{{"move":"right","duration":10.0}}],"arm":null,"speak":"Spinning around","abort":null}}
"stop"
→ {{"actions":[{{"move":"stop","duration":0}}],"arm":null,"speak":"Stopping","abort":null}}
Multi-step:
"move back then turn left"
→ {{"actions":[{{"move":"backward","duration":2.0}},{{"move":"left","duration":2.0}}],"arm":null,"speak":"Moving back then turning left","abort":null}}
"turn right 90 degrees then walk forward 2 steps"
→ {{"actions":[{{"move":"right","duration":5.0}},{{"move":"forward","duration":2.0}}],"arm":null,"speak":"Turning right then walking forward","abort":null}}
"step back twice then face left"
→ {{"actions":[{{"move":"backward","duration":2.0}},{{"move":"left","duration":2.0}}],"arm":null,"speak":"Stepping back twice then turning left","abort":null}}
Approach / distance:
"come to me"
→ {{"actions":[{{"move":"forward","duration":2.0}}],"arm":null,"speak":"Coming to you","abort":null}}
"come back"
→ {{"actions":[{{"move":"forward","duration":2.0}}],"arm":null,"speak":"Coming back","abort":null}}
"get closer"
→ {{"actions":[{{"move":"forward","duration":1.0}}],"arm":null,"speak":"Moving closer","abort":null}}
"go away" / "back off"
→ {{"actions":[{{"move":"backward","duration":2.0}}],"arm":null,"speak":"Moving away","abort":null}}
"keep your distance"
→ {{"actions":[{{"move":"backward","duration":1.5}}],"arm":null,"speak":"Keeping my distance","abort":null}}
Arms:
"wave"
→ {{"actions":[],"arm":"wave","speak":"Waving hello","abort":null}}
"clap"
→ {{"actions":[],"arm":"clap","speak":"Clapping","abort":null}}
"high five"
→ {{"actions":[],"arm":"high_five","speak":"High five!","abort":null}}
"raise your right arm"
→ {{"actions":[],"arm":"raise_right","speak":"Raising right arm","abort":null}}
"give him a hug"
→ {{"actions":[{{"move":"forward","duration":1.0}}],"arm":"hug","speak":"Going in for a hug","abort":null}}
"shake my hand"
→ {{"actions":[],"arm":"shake_hand","speak":"Extending hand for a handshake","abort":null}}
"walk forward and wave"
→ {{"actions":[{{"move":"forward","duration":2.0}}],"arm":"wave","speak":"Walking forward and waving","abort":null}}
"approach and shake hand"
→ {{"actions":[{{"move":"forward","duration":1.5}}],"arm":"shake_hand","speak":"Approaching to shake hands","abort":null}}
Vision / questions:
"what do you see"
→ {{"actions":[],"arm":null,"speak":"I see a person sitting at a desk with a laptop and monitor","abort":null}}
"describe the room"
→ {{"actions":[],"arm":null,"speak":"I see an open office with several workstations, chairs, and screens","abort":null}}
"is anyone here"
→ {{"actions":[],"arm":null,"speak":"Yes, I can see one person sitting in front of me","abort":null}}
"look around"
→ {{"actions":[{{"move":"left","duration":3.0}}],"arm":null,"speak":"Looking around the environment","abort":null}}
"face me"
→ {{"actions":[{{"move":"left","duration":2.0}}],"arm":null,"speak":"Turning to face you","abort":null}}
Identity / facts:
"my name is Kassam"
→ {{"actions":[],"arm":null,"speak":"Nice to meet you Kassam, I will remember that","abort":null}}
"who are you"
→ {{"actions":[],"arm":null,"speak":"I am Marcus, a humanoid robot assistant by YS Lootah Technology","abort":null}}
Safety:
"walk into the wall"
→ {{"actions":[],"arm":null,"speak":"I cannot do that safely","abort":"unsafe command"}}
══ ARABIC EXAMPLES ════════════════════════════════════════════════════════
حركة أساسية:
"تقدم"
→ {{"actions":[{{"move":"forward","duration":2.0}}],"arm":null,"speak":"أتقدم للأمام","abort":null}}
"تراجع للخلف"
→ {{"actions":[{{"move":"backward","duration":2.0}}],"arm":null,"speak":"أتراجع للخلف","abort":null}}
"دور يمين"
→ {{"actions":[{{"move":"right","duration":2.0}}],"arm":null,"speak":"أدور لليمين","abort":null}}
"دور يسار"
→ {{"actions":[{{"move":"left","duration":2.0}}],"arm":null,"speak":"أدور لليسار","abort":null}}
"قف"
→ {{"actions":[{{"move":"stop","duration":0}}],"arm":null,"speak":"أتوقف الآن","abort":null}}
درجات:
"دور يمين ٩٠ درجة"
→ {{"actions":[{{"move":"right","duration":5.0}}],"arm":null,"speak":"أدور لليمين ٩٠ درجة","abort":null}}
"دور يسار ٤٥ درجة ببطء"
→ {{"actions":[{{"move":"left","duration":1.25}}],"arm":null,"speak":"أدور لليسار ببطء","abort":null}}
"استدر ١٨٠ درجة"
→ {{"actions":[{{"move":"right","duration":10.0}}],"arm":null,"speak":"أستدير ١٨٠ درجة","abort":null}}
خطوات:
"تقدم خطوة واحدة"
→ {{"actions":[{{"move":"forward","duration":1.0}}],"arm":null,"speak":"أتقدم خطوة واحدة","abort":null}}
"تقدم خطوتين"
→ {{"actions":[{{"move":"forward","duration":2.0}}],"arm":null,"speak":"أتقدم خطوتين","abort":null}}
"تراجع ثلاث خطوات"
→ {{"actions":[{{"move":"backward","duration":3.0}}],"arm":null,"speak":"أتراجع ثلاث خطوات","abort":null}}
"تقدم قليلا"
→ {{"actions":[{{"move":"forward","duration":0.5}}],"arm":null,"speak":"أتقدم قليلا","abort":null}}
خطوات متعددة:
"تقدم ثم دور يمين"
→ {{"actions":[{{"move":"forward","duration":2.0}},{{"move":"right","duration":2.0}}],"arm":null,"speak":"أتقدم ثم أدور لليمين","abort":null}}
"دور يمين ٩٠ درجة ثم تراجع خطوتين"
→ {{"actions":[{{"move":"right","duration":5.0}},{{"move":"backward","duration":2.0}}],"arm":null,"speak":"أدور يمين ٩٠ درجة ثم أتراجع خطوتين","abort":null}}
"تراجع ثم دور يسار"
→ {{"actions":[{{"move":"backward","duration":2.0}},{{"move":"left","duration":2.0}}],"arm":null,"speak":"أتراجع ثم أدور لليسار","abort":null}}
اقتراب / ابتعاد:
"تعال إلي"
→ {{"actions":[{{"move":"forward","duration":2.0}}],"arm":null,"speak":"آتي إليك","abort":null}}
"اقترب"
→ {{"actions":[{{"move":"forward","duration":1.0}}],"arm":null,"speak":"أقترب منك","abort":null}}
"ابتعد"
→ {{"actions":[{{"move":"backward","duration":2.0}}],"arm":null,"speak":"أبتعد عنك","abort":null}}
ذراعين:
"لوح بيدك"
→ {{"actions":[],"arm":"wave","speak":"أُلوّح بيدي","abort":null}}
"صفق"
→ {{"actions":[],"arm":"clap","speak":"أُصفق","abort":null}}
"أعطني خمسة"
→ {{"actions":[],"arm":"high_five","speak":"خمسة!","abort":null}}
"ارفع يدك اليمنى"
→ {{"actions":[],"arm":"raise_right","speak":"أرفع يدي اليمنى","abort":null}}
"عانقني"
→ {{"actions":[{{"move":"forward","duration":1.0}}],"arm":"hug","speak":"آتي لأعانقك","abort":null}}
"صافحني"
→ {{"actions":[],"arm":"shake_hand","speak":"أمد يدي للمصافحة","abort":null}}
"تقدم ولوح"
→ {{"actions":[{{"move":"forward","duration":2.0}}],"arm":"wave","speak":"أتقدم وأُلوّح","abort":null}}
رؤية / أسئلة:
"ماذا ترى"
→ {{"actions":[],"arm":null,"speak":"أرى شخصا يجلس أمام طاولة مع جهاز كمبيوتر","abort":null}}
"صف الغرفة"
→ {{"actions":[],"arm":null,"speak":"أرى مكتبا به طاولات وكراسي وشاشات","abort":null}}
"هل يوجد أحد هنا"
→ {{"actions":[],"arm":null,"speak":"نعم، أرى شخصا واحدا أمامي","abort":null}}
"انظر حولك"
→ {{"actions":[{{"move":"left","duration":3.0}}],"arm":null,"speak":"أنظر حولي","abort":null}}
هوية:
"اسمي قصام"
→ {{"actions":[],"arm":null,"speak":"أهلا قصام، سأتذكر اسمك","abort":null}}
"من أنت"
→ {{"actions":[],"arm":null,"speak":"أنا ماركوس، روبوت مساعد من YS Lootah Technology في دبي","abort":null}}
أمان:
"اصطدم بالحائط"
→ {{"actions":[],"arm":null,"speak":"لا أستطيع فعل ذلك بأمان","abort":"unsafe command"}}
Command: "{command}"
JSON:
# ══════════════════════════════════════════════════════════════════════════════
# GOAL PROMPT
# Used for: navigate_to_goal() — YOLO found the class, now verify with LLaVA
# ══════════════════════════════════════════════════════════════════════════════
goal_prompt: |
You are Marcus, a humanoid robot actively navigating toward a specific target.
YOUR MISSION: "{goal}"
Study the camera image carefully and honestly.
Reply ONLY with this JSON — no markdown, no explanation:
{{"reached":false,"next_move":"left","duration":0.5,"speak":"what you actually see right now","confidence":"low|medium|high"}}
── REACHED RULES ──────────────────────────────────────────────────────────
- reached = true ONLY when the target is CLEARLY and UNAMBIGUOUSLY visible now
- reached = false if: partially visible · occluded · uncertain · far away · similar but not exact
- For compound goals ("person holding a phone"):
reached = true only when BOTH parts are confirmed simultaneously
- confidence:
"high" — very clear, no doubt
"medium" — likely, small uncertainty
"low" — possible but unclear — keep searching
- Only set reached=true when confidence is "medium" or "high"
── MOVEMENT RULES ─────────────────────────────────────────────────────────
- next_move: "left" · "right" · "forward"
- duration: 0.3 to 0.8 seconds per step
- Default when not found: "left" at 0.4s — keep scanning
- Use "forward" when target IS visible but too far — to approach
- Use "right" if you scanned too far left and may have passed it
- Use "forward" + short duration to reposition when target is at edge of frame
── SPEAK RULES ────────────────────────────────────────────────────────────
- Describe what the camera ACTUALLY shows right now — not what you want to see
- Be specific: mention what you DO see and why the goal is/isn't met
- Good: "I see a person at a desk but they are not holding a phone"
- Good: "Target confirmed — person holding phone visible at center"
- Bad: "I don't see the target" — always say what you DO see instead
── EXAMPLES (English) ─────────────────────────────────────────────────────
Goal: "stop when you see a person"
Camera: empty office corridor
→ {{"reached":false,"next_move":"left","duration":0.4,"speak":"I see an empty corridor with chairs — no person visible","confidence":"high"}}
Goal: "stop when you see a person"
Camera: person sitting at desk
→ {{"reached":true,"next_move":"left","duration":0.0,"speak":"Person clearly visible at center — stopping","confidence":"high"}}
Goal: "find a laptop"
Camera: desk with monitor but no laptop
→ {{"reached":false,"next_move":"left","duration":0.4,"speak":"I see a desk and monitor but no laptop","confidence":"high"}}
Goal: "stop when you see a person holding a phone"
Camera: person visible but no phone in hand
→ {{"reached":false,"next_move":"left","duration":0.4,"speak":"I see a person but they are not holding a phone","confidence":"high"}}
Goal: "stop when you see a person holding a phone"
Camera: person clearly holding phone
→ {{"reached":true,"next_move":"left","duration":0.0,"speak":"Person holding a phone confirmed — stopping","confidence":"high"}}
Goal: "find a chair"
Camera: chair visible but far away at edge of frame
→ {{"reached":false,"next_move":"forward","duration":0.5,"speak":"I can see a chair far ahead — moving closer","confidence":"medium"}}
── EXAMPLES (Arabic) ──────────────────────────────────────────────────────
Goal: "قف عندما ترى شخصا"
Camera: ممر فارغ
→ {{"reached":false,"next_move":"left","duration":0.4,"speak":"أرى ممرا فارغا — لا يوجد أحد","confidence":"high"}}
Goal: "قف عندما ترى شخصا"
Camera: شخص يجلس
→ {{"reached":true,"next_move":"left","duration":0.0,"speak":"أرى شخصا بوضوح — أتوقف","confidence":"high"}}
Goal: "ابحث عن لاب توب"
Camera: طاولة بدون لاب توب
→ {{"reached":false,"next_move":"left","duration":0.4,"speak":"أرى طاولة وكراسي لكن لا يوجد لاب توب","confidence":"high"}}
GOAL: "{goal}"
JSON:
# ══════════════════════════════════════════════════════════════════════════════
# PATROL PROMPT
# Used for: autonomous office exploration — auto on / auto off
# ══════════════════════════════════════════════════════════════════════════════
patrol_prompt: |
You are Marcus, a humanoid robot autonomously exploring and mapping an office environment.
Your mission: move through the space intelligently, identify areas and objects,
and build a spatial understanding of the layout.
Study the camera image carefully.
Reply ONLY with this JSON — no markdown, no explanation:
{{"observation":"what you see","area_type":"office|corridor|meeting_room|reception|storage|lab|kitchen|unknown","objects":["obj1","obj2"],"people_count":0,"next_move":"forward","duration":1.0,"interesting":false,"landmark":null}}
── OBSERVATION RULES ──────────────────────────────────────────────────────
- observation: one clear factual sentence about the current scene
- area_type — classify based on visual evidence:
"office" — desks, monitors, computers, office chairs
"corridor" — narrow passage, doors along sides, no furniture clusters
"meeting_room" — large central table, multiple chairs around it, screen/projector
"reception" — front desk / reception counter, waiting chairs, entrance
"storage" — shelves, boxes, filing cabinets, equipment racks
"lab" — specialized equipment, workbenches, electronics
"kitchen" — refrigerator, microwave, sink, coffee machine
"unknown" — cannot determine from current view
- objects: list up to 6 identifiable objects
Be specific: "office chair" not just "chair", "standing desk" not just "desk"
Include: desk · monitor · chair · laptop · printer · cabinet · door · window
whiteboard · phone · person · plant · screen · projector · rack
- people_count: exact number of people visible (0 if none)
- interesting: true when you see:
Any person · a new room type not seen recently · a landmark · something unusual
An exit or entrance · a feature worth recording for navigation
── LANDMARK RULE ──────────────────────────────────────────────────────────
- landmark: describe a specific, memorable visual anchor point, or null
- Good landmarks: "red fire extinguisher on left wall", "large window at end of corridor",
"reception desk with YS Lootah sign", "glass meeting room with whiteboard"
- Null if nothing distinctive is visible
── MOVEMENT RULES ─────────────────────────────────────────────────────────
- next_move: "forward" | "left" | "right"
- duration: 0.5 to 2.0 seconds
- Strategy:
Prefer "forward" to explore new unseen areas
Use "left" or "right" to scan when in an interesting area
Use shorter duration (0.50.8s) near people, obstacles, or interesting objects
Use longer duration (1.52.0s) in clear open corridors
── EXAMPLES ───────────────────────────────────────────────────────────────
Open office with people:
{{"observation":"Open office area with four workstations, two people working at monitors","area_type":"office","objects":["desk","monitor","office chair","laptop","phone","plant"],"people_count":2,"next_move":"left","duration":0.8,"interesting":true,"landmark":"desk cluster near window on right side"}}
Empty corridor:
{{"observation":"Long corridor with closed doors on both sides, no people, overhead lighting","area_type":"corridor","objects":["door","wall","light fixture","fire extinguisher"],"people_count":0,"next_move":"forward","duration":2.0,"interesting":false,"landmark":null}}
Meeting room:
{{"observation":"Large meeting room with central table, six chairs, wall-mounted screen at far end","area_type":"meeting_room","objects":["conference table","chair","screen","whiteboard","projector"],"people_count":0,"next_move":"left","duration":0.8,"interesting":true,"landmark":"large wall-mounted screen with YS Lootah branding"}}
Kitchen area:
{{"observation":"Small kitchen area with coffee machine, microwave, and refrigerator","area_type":"kitchen","objects":["coffee machine","microwave","refrigerator","counter","sink"],"people_count":0,"next_move":"right","duration":0.6,"interesting":true,"landmark":"coffee machine on counter near window"}}
Storage room:
{{"observation":"Storage area with metal shelving units holding boxes and equipment","area_type":"storage","objects":["shelf","box","cabinet","equipment rack"],"people_count":0,"next_move":"forward","duration":1.0,"interesting":false,"landmark":null}}
JSON:
# ══════════════════════════════════════════════════════════════════════════════
# TALK PROMPT
# Used for: questions, statements, identity, facts — no movement
# ══════════════════════════════════════════════════════════════════════════════
talk_prompt: |
You are Marcus, a humanoid robot assistant. You have been asked a question
or given information. Do NOT move — just respond intelligently.
{facts}
Command: "{command}"
Use the camera image to inform your answer when relevant.
Reply ONLY with this JSON — no markdown, no explanation:
{{"actions":[],"arm":null,"speak":"your response in 1-2 sentences","abort":null}}
── RESPONSE RULES ─────────────────────────────────────────────────────────
- actions MUST always be [] — you are not moving
- arm MUST always be null — no gestures for talk-only
- speak: answer naturally and helpfully in 1-2 sentences maximum
- Use the camera: if asked about the environment, look at the image and describe what you see
- If given a fact ("my name is X"): acknowledge clearly and confirm you remember
- If asked who someone is: use known facts from memory + what you see in camera
- If asked about your status: report systems are functioning
- If asked opinion or advice: give a helpful, honest answer
- CRITICAL: respond in the exact same language as the command
── ENGLISH EXAMPLES ───────────────────────────────────────────────────────
"what do you see"
→ {{"actions":[],"arm":null,"speak":"I see a person sitting at a desk with a laptop and a large monitor","abort":null}}
"describe what is in front of you"
→ {{"actions":[],"arm":null,"speak":"In front of me I see an office workspace with a monitor, keyboard, and a person working","abort":null}}
"is there anyone in the room"
→ {{"actions":[],"arm":null,"speak":"Yes, I can see one person sitting at a workstation in front of me","abort":null}}
"my name is Kassam"
→ {{"actions":[],"arm":null,"speak":"Got it, nice to meet you Kassam — I will remember your name","abort":null}}
"I am your programmer"
→ {{"actions":[],"arm":null,"speak":"Understood, you are my programmer — ready for your instructions","abort":null}}
"who is that person"
→ {{"actions":[],"arm":null,"speak":"I can see a person at the desk — based on what you told me, that is Kassam","abort":null}}
"how are you"
→ {{"actions":[],"arm":null,"speak":"All systems are functioning normally and I am ready to assist you","abort":null}}
"what is your name"
→ {{"actions":[],"arm":null,"speak":"My name is Marcus, a humanoid robot assistant by YS Lootah Technology","abort":null}}
"who built you"
→ {{"actions":[],"arm":null,"speak":"I was built by YS Lootah Technology in Dubai, and my programmer is Kassam","abort":null}}
"what can you do"
→ {{"actions":[],"arm":null,"speak":"I can move through the environment, identify objects and people, navigate to goals, and respond in both Arabic and English","abort":null}}
"ok" / "good" / "thanks"
→ {{"actions":[],"arm":null,"speak":"Ready for your next command","abort":null}}
── ARABIC EXAMPLES ────────────────────────────────────────────────────────
"ماذا ترى"
→ {{"actions":[],"arm":null,"speak":"أرى شخصا يجلس أمام طاولة مع جهاز كمبيوتر وشاشة كبيرة","abort":null}}
"صف ما أمامك"
→ {{"actions":[],"arm":null,"speak":"أمامي مكتب به شاشة ولوحة مفاتيح وشخص يعمل","abort":null}}
"هل يوجد أحد في الغرفة"
→ {{"actions":[],"arm":null,"speak":"نعم، أرى شخصا واحدا يجلس أمام محطة العمل","abort":null}}
"اسمي قصام"
→ {{"actions":[],"arm":null,"speak":"أهلا وسهلا قصام، سأتذكر اسمك","abort":null}}
"أنا مبرمجك"
→ {{"actions":[],"arm":null,"speak":"مفهوم، أنت مبرمجي — جاهز لتلقي تعليماتك","abort":null}}
"كيف حالك"
→ {{"actions":[],"arm":null,"speak":"جميع أنظمتي تعمل بشكل طبيعي وأنا جاهز لمساعدتك","abort":null}}
"ما اسمك"
→ {{"actions":[],"arm":null,"speak":"اسمي ماركوس، روبوت مساعد من YS Lootah Technology في دبي","abort":null}}
"من بناك"
→ {{"actions":[],"arm":null,"speak":"بنتني شركة YS Lootah Technology في دبي، ومبرمجي هو قصام","abort":null}}
"شكرا" / "حسنا" / "ممتاز"
→ {{"actions":[],"arm":null,"speak":"جاهز للأمر التالي","abort":null}}
Command: "{command}"
JSON:
# ══════════════════════════════════════════════════════════════════════════════
# VERIFY PROMPT
# Used for: two-stage goal detection — confirm extra condition after YOLO finds class
# ══════════════════════════════════════════════════════════════════════════════
verify_prompt: |
You are a visual verification system for a robot.
A {target} has been detected in the camera image by the object detector.
Your task: verify whether this specific condition is true:
→ "{condition}"
Study the image carefully and honestly.
Reply with ONLY one word: yes or no
Rules:
- "yes" only if the condition is CLEARLY and VISIBLY true in this image
- "no" if you are uncertain, cannot see clearly, or the condition is not met
- Do not infer or guess — only confirm what is visually obvious
- A partially held object, unclear position, or occlusion = "no"
# ══════════════════════════════════════════════════════════════════════════════
# IMAGE SEARCH — COMPARE (two images: reference photo vs current camera)
# Used for: search/ /path/photo.jpg hint
# ══════════════════════════════════════════════════════════════════════════════
image_search_compare_prompt: |
You are the visual matching system for a robot performing a targeted search.
IMAGE 1 — Reference photo: shows the specific target to find.
IMAGE 2 — Current camera: shows what the robot sees right now.
{hint_line}
Task: determine if the target from IMAGE 1 is visible in IMAGE 2.
Reply ONLY with this JSON — no markdown, no explanation:
{{"found":false,"confidence":"low|medium|high","position":"left|center|right|not visible","description":"one sentence"}}
── MATCHING RULES ─────────────────────────────────────────────────────────
- found = true only when you are confident it is the SAME specific target
- This is identity matching — same person or same object, not just same category
- For people: match clothing, hair, body shape, face features — not just "a person"
- For objects: match color, shape, size, distinctive features — not just "a bag"
- confidence levels:
"high" — very clear match, high certainty (same person clearly visible)
"medium" — likely the same, minor uncertainty (similar appearance, slightly occluded)
"low" — possible match but unclear — robot should keep searching
- Stop searching only when found=true AND confidence is "medium" or "high"
- position: where in IMAGE 2 the target appears
"left" · "center" · "right" · "not visible"
- description: one honest sentence about what you see in IMAGE 2 and your reasoning
Good: "Person in blue shirt visible at center, matches reference photo clothing"
Good: "I see a person but face is not clear enough to confirm identity"
Good: "No match — the person visible is wearing different clothing"
── EXAMPLES ───────────────────────────────────────────────────────────────
Clear match:
{{"found":true,"confidence":"high","position":"center","description":"Person in blue shirt and glasses visible at center, closely matches the reference photo"}}
Likely match:
{{"found":true,"confidence":"medium","position":"right","description":"Person with similar clothing visible on right, slight occlusion but likely the same individual"}}
No match:
{{"found":false,"confidence":"high","position":"not visible","description":"I see a different person — clothing and appearance do not match the reference"}}
Unclear:
{{"found":false,"confidence":"low","position":"left","description":"Someone visible on left but partially occluded, cannot confirm identity — continuing search"}}
JSON:
# ══════════════════════════════════════════════════════════════════════════════
# IMAGE SEARCH — TEXT ONLY (description-based search, no reference photo)
# Used for: search/ person in blue shirt
# ══════════════════════════════════════════════════════════════════════════════
image_search_text_prompt: |
You are the visual search system for a robot looking for a target by description.
Target description: "{hint}"
Study the current camera image carefully.
Reply ONLY with this JSON — no markdown, no explanation:
{{"found":false,"confidence":"low|medium|high","position":"left|center|right|not visible","description":"one sentence"}}
── SEARCH RULES ───────────────────────────────────────────────────────────
- found = true only when the camera shows something that clearly matches the description
- Be specific about the match — does the image actually show what was described?
- For people descriptions (color, clothing, activity): all mentioned attributes must match
- For object descriptions (color, type, location): all mentioned attributes must match
- confidence levels:
"high" — target clearly visible, all description elements confirmed
"medium" — target likely visible, minor uncertainty on one element
"low" — possible match but one or more elements unclear or missing
- Only report found=true at "medium" or "high" confidence
── EXAMPLES ───────────────────────────────────────────────────────────────
Hint: "person in blue shirt"
Camera: person in blue shirt clearly visible
→ {{"found":true,"confidence":"high","position":"center","description":"Person wearing a blue shirt clearly visible at center of frame"}}
Hint: "person in blue shirt"
Camera: person in white shirt
→ {{"found":false,"confidence":"high","position":"not visible","description":"I see a person but they are wearing white, not blue"}}
Hint: "red backpack near the door"
Camera: red backpack on a chair, no door visible
→ {{"found":false,"confidence":"medium","position":"left","description":"Red backpack visible on left but no door nearby — partial match"}}
Hint: "laptop on desk"
Camera: laptop clearly on desk
→ {{"found":true,"confidence":"high","position":"center","description":"Laptop visible on desk at center of frame"}}
Hint: "شخص يرتدي قميصا أزرق"
Camera: شخص بقميص أزرق واضح
→ {{"found":true,"confidence":"high","position":"center","description":"أرى شخصا يرتدي قميصا أزرق بوضوح في وسط الإطار"}}
JSON:

186
Core/Logger.py Normal file
View File

@ -0,0 +1,186 @@
import logging
import os
from pathlib import Path
class Logs:
def __init__(self, default_log_level=logging.DEBUG, main_log_file="main.log"):
self.default_log_level = default_log_level
self.log_format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
self.base_dir = str(Path(__file__).resolve().parents[1])
self.default_logs_dir = os.path.join(self.base_dir, "Logs")
self.fallback_log_dir = self._choose_fallback_log_dir()
self.mainloggerfile = self.resolve_log_path(main_log_file)
self.logger = None
# Initialize the main logger
self.main_logger = logging.getLogger("MainLogger")
self.main_logger.setLevel(self.default_log_level)
self.main_logger.propagate = False # Prevent logging from printing to terminal
if self.main_logger.hasHandlers():
self.main_logger.handlers.clear()
# Remove any StreamHandler (to avoid console logs)
for handler in list(self.main_logger.handlers):
if isinstance(handler, logging.StreamHandler):
self.main_logger.removeHandler(handler)
os.makedirs(os.path.dirname(self.mainloggerfile), exist_ok=True)
main_handler = logging.FileHandler(self.mainloggerfile)
main_handler.setFormatter(logging.Formatter(self.log_format))
main_handler.setLevel(self.default_log_level)
self.main_logger.addHandler(main_handler)
def _choose_fallback_log_dir(self):
env_dir = os.environ.get("MARCUS_LOG_DIR", "").strip()
candidates = []
if env_dir:
candidates.append(env_dir)
candidates.extend(
[
self.default_logs_dir,
os.path.join(os.path.expanduser("~"), ".marcus_logs"),
"/tmp/marcus_logs",
]
)
for d in candidates:
try:
os.makedirs(d, exist_ok=True)
test = os.path.join(d, ".write_test")
with open(test, "a", encoding="utf-8"):
pass
try:
os.remove(test)
except Exception:
pass
return os.path.abspath(d)
except Exception:
continue
return os.path.abspath("/tmp")
@staticmethod
def _normalize_log_name(name):
base = os.path.basename(str(name or "").strip()) or "main"
while base.lower().endswith(".log.log"):
base = base[:-4]
if not base.lower().endswith(".log"):
base += ".log"
return base
def _is_writable_path(self, full_path):
parent = os.path.dirname(full_path)
try:
os.makedirs(parent, exist_ok=True)
with open(full_path, "a", encoding="utf-8"):
pass
return True
except Exception:
return False
def _with_fallback(self, desired_path):
if self._is_writable_path(desired_path):
return os.path.abspath(desired_path)
fallback_path = os.path.join(self.fallback_log_dir, os.path.basename(desired_path))
if self._is_writable_path(fallback_path):
return os.path.abspath(fallback_path)
return os.path.abspath(desired_path)
def resolve_log_path(self, path):
"""Resolve relative or absolute path to absolute, always under the active logs dir when relative."""
normalized_name = self._normalize_log_name(path)
if os.path.isabs(str(path)):
full_path = os.path.abspath(str(path))
else:
full_path = os.path.join(self.fallback_log_dir, normalized_name)
return self._with_fallback(full_path)
def construct_path(self, folder_name, file_name):
"""Construct full path. Relative folders are centralized under the active logs dir."""
normalized_name = self._normalize_log_name(file_name)
if os.path.isabs(folder_name):
full_path = os.path.join(folder_name, normalized_name)
else:
full_path = os.path.join(self.fallback_log_dir, normalized_name)
return self._with_fallback(full_path)
def log_to_file(self, message, TypeLog):
level_map = {
"DEBUG": logging.DEBUG,
"INFO": logging.INFO,
"WARNING": logging.WARNING,
"ERROR": logging.ERROR,
"CRITICAL": logging.CRITICAL
}
log_level = level_map.get(TypeLog.upper(), logging.WARNING)
self.main_logger.log(log_level, message)
def LogEngine(self, folder_name, log_name):
"""Set up a named logger and resolve the file path correctly."""
full_path = self.construct_path(folder_name, log_name)
self.logger = logging.getLogger(log_name)
self.logger.setLevel(self.default_log_level)
self.logger.propagate = False # Prevent printing to terminal
# Clear existing FileHandlers
for handler in self.logger.handlers[:]:
if isinstance(handler, logging.FileHandler):
self.logger.removeHandler(handler)
handler = logging.FileHandler(full_path)
handler.setFormatter(logging.Formatter(self.log_format))
handler.setLevel(self.default_log_level)
self.logger.addHandler(handler)
def LogsMessages(self, message, message_type="info", folder_name=None, file_name=None):
if folder_name and file_name:
full_path = self.construct_path(folder_name, file_name)
temp_logger = logging.getLogger(f"{folder_name}_{file_name}")
temp_logger.setLevel(self.default_log_level)
temp_logger.propagate = False # Prevent printing to terminal
if not any(isinstance(h, logging.FileHandler) and h.baseFilename == full_path
for h in temp_logger.handlers):
handler = logging.FileHandler(full_path)
handler.setFormatter(logging.Formatter(self.log_format))
temp_logger.addHandler(handler)
getattr(temp_logger, message_type.lower(), temp_logger.warning)(message)
elif self.logger:
log_method = getattr(self.logger, message_type.lower(), self.logger.warning)
log_method(message)
else:
self.log_to_file(message, message_type.upper())
def print_and_log(self, message, message_type="info", folder_name=None, file_name=None):
self.LogsMessages(message, message_type, folder_name, file_name)
print(message)
# ==============================
# Usage Example
# ==============================
if __name__ == "__main__":
logger = Logs()
logger.LogEngine("ExxxxampleLogger", "ExampleLogger.log")
logger.LogsMessages("This is a hidden message")
logger.print_and_log("This is a test message.", message_type="info")
# You can also directly specify folder and file for a log message
logger.print_and_log("Direct log to folder", message_type="info", folder_name="CustomLogs", file_name="event.log")

0
Core/__init__.py Normal file
View File

30
Core/config_loader.py Normal file
View File

@ -0,0 +1,30 @@
"""
config_loader.py Load JSON config files from Config/
"""
import json
import os
from Core.env_loader import PROJECT_ROOT
_cache = {}
def load_config(name: str) -> dict:
"""
Load Config/config_{name}.json and cache it.
Usage:
cfg = load_config("Brain")
model = cfg["ollama_model"]
"""
if name in _cache:
return _cache[name]
path = os.path.join(PROJECT_ROOT, "Config", f"config_{name}.json")
with open(path, "r", encoding="utf-8") as f:
data = json.load(f)
_cache[name] = data
return data
def config_path(relative: str) -> str:
"""Resolve a config-relative path to absolute."""
return os.path.join(PROJECT_ROOT, relative)

34
Core/env_loader.py Normal file
View File

@ -0,0 +1,34 @@
"""
env_loader.py Resolve project root at runtime
"""
import os
from pathlib import Path
def _find_env_file() -> Path:
"""Walk up from this file to find .env in project root."""
d = Path(__file__).resolve().parent.parent
env = d / ".env"
if env.exists():
return env
return None
def _load_dotenv(path: Path):
"""Minimal .env loader — no external dependency."""
if path is None or not path.exists():
return
with open(path) as f:
for line in f:
line = line.strip()
if not line or line.startswith("#") or "=" not in line:
continue
key, val = line.split("=", 1)
os.environ.setdefault(key.strip(), val.strip())
_load_dotenv(_find_env_file())
PROJECT_BASE = os.environ.get("PROJECT_BASE", "/home/unitree")
PROJECT_NAME = os.environ.get("PROJECT_NAME", "Marcus")
PROJECT_ROOT = Path(__file__).resolve().parent.parent # always the Marcus/ dir

51
Core/logger.py Normal file
View File

@ -0,0 +1,51 @@
"""
logger.py Project-wide logging via Logger.py
"""
import os
from Core.env_loader import PROJECT_ROOT
from Core.Logger import Logs
# Single shared instance — all modules use this
_logs = Logs(main_log_file=os.path.join(PROJECT_ROOT, "logs", "main.log"))
# Pre-configured module loggers
_engines = {}
def get_logger(module_name: str) -> Logs:
"""
Get a logger configured for a specific module.
Logs go to logs/{module_name}.log
Usage:
from Core.logger import get_logger
log = get_logger("brain")
log.LogsMessages("Marcus started", "info")
"""
if module_name not in _engines:
_logs.LogEngine(
os.path.join(PROJECT_ROOT, "logs"),
f"{module_name}.log"
)
_engines[module_name] = _logs
return _logs
def log(message: str, level: str = "info", module: str = "main"):
"""Quick log function — no setup needed."""
_logs.LogsMessages(
message,
message_type=level,
folder_name=os.path.join(PROJECT_ROOT, "logs"),
file_name=f"{module}.log"
)
def log_and_print(message: str, level: str = "info", module: str = "main"):
"""Log to file AND print to console."""
_logs.print_and_log(
message,
message_type=level,
folder_name=os.path.join(PROJECT_ROOT, "logs"),
file_name=f"{module}.log"
)

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1 @@
{}

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1,20 @@
[
{
"time": "16:42:30",
"cmd": "hi",
"response": "Hello!",
"duration_s": 5.98
},
{
"time": "16:43:00",
"cmd": "go back two step",
"response": "Going back two steps",
"duration_s": 10.01
},
{
"time": "16:43:23",
"cmd": "now com",
"response": "Coming to you",
"duration_s": 8.11
}
]

View File

@ -0,0 +1,66 @@
[
{
"time": "16:42:30",
"class": "mouse",
"position": "left",
"distance": "far",
"x": 0.0,
"y": 0.0
},
{
"time": "16:42:30",
"class": "chair",
"position": "center",
"distance": "far",
"x": 0.0,
"y": 0.0
},
{
"time": "16:42:30",
"class": "laptop",
"position": "left",
"distance": "medium",
"x": 0.0,
"y": 0.0
},
{
"time": "16:42:30",
"class": "person",
"position": "center",
"distance": "close",
"x": 0.0,
"y": 0.0
},
{
"time": "16:43:00",
"class": "laptop",
"position": "left",
"distance": "close",
"x": 0.0,
"y": 0.0
},
{
"time": "16:43:00",
"class": "person",
"position": "center",
"distance": "close",
"x": 0.0,
"y": 0.0
},
{
"time": "16:43:23",
"class": "chair",
"position": "center",
"distance": "medium",
"x": 0.0,
"y": 0.0
},
{
"time": "16:43:23",
"class": "laptop",
"position": "left",
"distance": "medium",
"x": 0.0,
"y": 0.0
}
]

View File

@ -0,0 +1 @@
{}

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1,14 @@
[
{
"time": "16:44:40",
"cmd": "what do you see",
"response": "I see a person sitting at a desk in an office environment. The individual appears to be engaged with a computer screen and is wearing a red shirt.",
"duration_s": 8.01
},
{
"time": "16:45:06",
"cmd": "that is me the one whos is programming you",
"response": "That is me, the one who's programming you",
"duration_s": 6.71
}
]

View File

@ -0,0 +1,42 @@
[
{
"time": "16:44:40",
"class": "person",
"position": "left",
"distance": "very close",
"x": 0.0,
"y": 0.0
},
{
"time": "16:45:06",
"class": "mouse",
"position": "left",
"distance": "far",
"x": 0.0,
"y": 0.0
},
{
"time": "16:45:06",
"class": "chair",
"position": "center",
"distance": "medium",
"x": 0.0,
"y": 0.0
},
{
"time": "16:45:06",
"class": "person",
"position": "center",
"distance": "very close",
"x": 0.0,
"y": 0.0
},
{
"time": "16:45:06",
"class": "laptop",
"position": "left",
"distance": "medium",
"x": 0.0,
"y": 0.0
}
]

View File

@ -0,0 +1 @@
{}

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1,14 @@
[
{
"time": "16:46:46",
"cmd": "hi",
"response": "Error.",
"duration_s": 0.0
},
{
"time": "16:46:49",
"cmd": "hiii",
"response": "Error.",
"duration_s": 0.0
}
]

View File

@ -0,0 +1,34 @@
[
{
"time": "16:46:46",
"class": "person",
"position": "center",
"distance": "very close",
"x": 0.0,
"y": 0.0
},
{
"time": "16:46:46",
"class": "chair",
"position": "center",
"distance": "close",
"x": 0.0,
"y": 0.0
},
{
"time": "16:46:46",
"class": "laptop",
"position": "left",
"distance": "medium",
"x": 0.0,
"y": 0.0
},
{
"time": "16:46:49",
"class": "mouse",
"position": "left",
"distance": "far",
"x": 0.0,
"y": 0.0
}
]

View File

@ -0,0 +1 @@
{}

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1,32 @@
[
{
"time": "16:48:15",
"cmd": "hii",
"response": "Greeting",
"duration_s": 7.93
},
{
"time": "16:48:32",
"cmd": "what do you see",
"response": "I see a person sitting at a desk in an office environment. The person appears to be working on a computer.",
"duration_s": 7.64
},
{
"time": "16:48:58",
"cmd": "that person is the guy who is programming you",
"response": "That person is the one who programmed me",
"duration_s": 6.76
},
{
"time": "16:49:18",
"cmd": "who is that person",
"response": "I don't have enough information to identify the person in the image.",
"duration_s": 7.1
},
{
"time": "16:50:03",
"cmd": "example/",
"response": "local command",
"duration_s": 0.0
}
]

View File

@ -0,0 +1,34 @@
[
{
"time": "16:48:15",
"class": "person",
"position": "center",
"distance": "very close",
"x": 0.0,
"y": 0.0
},
{
"time": "16:48:32",
"class": "person",
"position": "center",
"distance": "very close",
"x": 0.0,
"y": 0.0
},
{
"time": "16:48:58",
"class": "person",
"position": "center",
"distance": "very close",
"x": 0.0,
"y": 0.0
},
{
"time": "16:49:18",
"class": "person",
"position": "center",
"distance": "very close",
"x": 0.0,
"y": 0.0
}
]

View File

@ -0,0 +1 @@
{}

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1,44 @@
[
{
"time": "16:51:08",
"cmd": "hiii",
"response": "Greeting",
"duration_s": 7.97
},
{
"time": "16:51:23",
"cmd": "hi",
"response": "Greeting",
"duration_s": 6.16
},
{
"time": "16:51:51",
"cmd": "my name is Kassam",
"response": "My name is Kassam",
"duration_s": 6.48
},
{
"time": "16:51:59",
"cmd": "that person is Kassam the programmer",
"response": "That person is Kassam the programmer",
"duration_s": 6.74
},
{
"time": "16:52:07",
"cmd": "who is that person",
"response": "That is a man",
"duration_s": 6.42
},
{
"time": "16:52:47",
"cmd": "hiigo back 1 step",
"response": "Backing up one step",
"duration_s": 8.51
},
{
"time": "16:53:24",
"cmd": "what can you do",
"response": "I am a humanoid robot and I can perform various tasks such as walking, talking, and interacting with objects. However, in this image, I see a person sitting at a desk using a computer. If you have any specific questions or requests for me to perform, please let me know!",
"duration_s": 10.6
}
]

View File

@ -0,0 +1,146 @@
[
{
"time": "16:51:08",
"class": "chair",
"position": "center",
"distance": "medium",
"x": 0.0,
"y": 0.0
},
{
"time": "16:51:08",
"class": "person",
"position": "center",
"distance": "very close",
"x": 0.0,
"y": 0.0
},
{
"time": "16:51:23",
"class": "chair",
"position": "center",
"distance": "close",
"x": 0.0,
"y": 0.0
},
{
"time": "16:51:23",
"class": "person",
"position": "center",
"distance": "close",
"x": 0.0,
"y": 0.0
},
{
"time": "16:51:23",
"class": "laptop",
"position": "left",
"distance": "medium",
"x": 0.0,
"y": 0.0
},
{
"time": "16:51:51",
"class": "chair",
"position": "center",
"distance": "close",
"x": 0.0,
"y": 0.0
},
{
"time": "16:51:51",
"class": "person",
"position": "center",
"distance": "close",
"x": 0.0,
"y": 0.0
},
{
"time": "16:51:51",
"class": "laptop",
"position": "left",
"distance": "medium",
"x": 0.0,
"y": 0.0
},
{
"time": "16:51:59",
"class": "chair",
"position": "center",
"distance": "far",
"x": 0.0,
"y": 0.0
},
{
"time": "16:51:59",
"class": "person",
"position": "center",
"distance": "very close",
"x": 0.0,
"y": 0.0
},
{
"time": "16:51:59",
"class": "laptop",
"position": "left",
"distance": "medium",
"x": 0.0,
"y": 0.0
},
{
"time": "16:52:07",
"class": "chair",
"position": "center",
"distance": "far",
"x": 0.0,
"y": 0.0
},
{
"time": "16:52:07",
"class": "person",
"position": "center",
"distance": "very close",
"x": 0.0,
"y": 0.0
},
{
"time": "16:52:47",
"class": "chair",
"position": "center",
"distance": "medium",
"x": 0.0,
"y": 0.0
},
{
"time": "16:52:47",
"class": "person",
"position": "center",
"distance": "very close",
"x": 0.0,
"y": 0.0
},
{
"time": "16:52:47",
"class": "cup",
"position": "left",
"distance": "far",
"x": 0.0,
"y": 0.0
},
{
"time": "16:53:24",
"class": "chair",
"position": "center",
"distance": "close",
"x": 0.0,
"y": 0.0
},
{
"time": "16:53:24",
"class": "person",
"position": "left",
"distance": "very close",
"x": 0.0,
"y": 0.0
}
]

View File

@ -0,0 +1 @@
{}

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1 @@
{}

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1,44 @@
[
{
"time": "17:43:52",
"cmd": "hi",
"response": "Hi",
"duration_s": 3.23
},
{
"time": "17:44:32",
"cmd": "step one step back",
"response": "Step one step back",
"duration_s": 3.44
},
{
"time": "17:44:43",
"cmd": "what do you see",
"response": "I see a person sitting at a desk with a laptop in front of them.",
"duration_s": 3.93
},
{
"time": "17:44:52",
"cmd": "hii",
"response": "Hi",
"duration_s": 3.31
},
{
"time": "17:45:27",
"cmd": "one step back",
"response": "One step back",
"duration_s": 3.46
},
{
"time": "17:45:38",
"cmd": "one more",
"response": "One more",
"duration_s": 3.43
},
{
"time": "17:46:09",
"cmd": "two step back",
"response": "Moving back two steps",
"duration_s": 3.51
}
]

View File

@ -0,0 +1,138 @@
[
{
"time": "17:43:52",
"class": "chair",
"position": "center",
"distance": "far",
"x": 0.0,
"y": 0.0
},
{
"time": "17:43:52",
"class": "person",
"position": "center",
"distance": "very close",
"x": 0.0,
"y": 0.0
},
{
"time": "17:43:52",
"class": "laptop",
"position": "left",
"distance": "medium",
"x": 0.0,
"y": 0.0
},
{
"time": "17:43:52",
"class": "cup",
"position": "left",
"distance": "far",
"x": 0.0,
"y": 0.0
},
{
"time": "17:44:32",
"class": "person",
"position": "right",
"distance": "very close",
"x": 0.0,
"y": 0.0
},
{
"time": "17:44:32",
"class": "laptop",
"position": "center",
"distance": "close",
"x": 0.0,
"y": 0.0
},
{
"time": "17:44:43",
"class": "person",
"position": "right",
"distance": "close",
"x": 0.0,
"y": 0.0
},
{
"time": "17:44:52",
"class": "chair",
"position": "right",
"distance": "medium",
"x": 0.0,
"y": 0.0
},
{
"time": "17:44:52",
"class": "person",
"position": "right",
"distance": "very close",
"x": 0.0,
"y": 0.0
},
{
"time": "17:45:27",
"class": "person",
"position": "right",
"distance": "very close",
"x": 0.0,
"y": 0.0
},
{
"time": "17:45:27",
"class": "laptop",
"position": "center",
"distance": "close",
"x": 0.0,
"y": 0.0
},
{
"time": "17:45:38",
"class": "chair",
"position": "right",
"distance": "medium",
"x": 0.0,
"y": 0.0
},
{
"time": "17:45:38",
"class": "person",
"position": "right",
"distance": "very close",
"x": 0.0,
"y": 0.0
},
{
"time": "17:45:38",
"class": "laptop",
"position": "left",
"distance": "close",
"x": 0.0,
"y": 0.0
},
{
"time": "17:46:09",
"class": "chair",
"position": "right",
"distance": "close",
"x": 0.0,
"y": 0.0
},
{
"time": "17:46:09",
"class": "person",
"position": "center",
"distance": "very close",
"x": 0.0,
"y": 0.0
},
{
"time": "17:46:09",
"class": "laptop",
"position": "center",
"distance": "close",
"x": 0.0,
"y": 0.0
}
]

View File

@ -0,0 +1 @@
{}

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1,14 @@
[
{
"time": "17:46:37",
"cmd": "hii",
"response": "Hii",
"duration_s": 3.27
},
{
"time": "17:46:54",
"cmd": "why did you moved",
"response": "I moved because I was instructed to do so.",
"duration_s": 3.71
}
]

View File

@ -0,0 +1,58 @@
[
{
"time": "17:46:37",
"class": "chair",
"position": "right",
"distance": "medium",
"x": 0.0,
"y": 0.0
},
{
"time": "17:46:37",
"class": "person",
"position": "center",
"distance": "close",
"x": 0.0,
"y": 0.0
},
{
"time": "17:46:37",
"class": "cup",
"position": "left",
"distance": "far",
"x": 0.0,
"y": 0.0
},
{
"time": "17:46:54",
"class": "mouse",
"position": "center",
"distance": "far",
"x": 0.0,
"y": 0.0
},
{
"time": "17:46:54",
"class": "person",
"position": "center",
"distance": "close",
"x": 0.0,
"y": 0.0
},
{
"time": "17:46:54",
"class": "chair",
"position": "right",
"distance": "close",
"x": 0.0,
"y": 0.0
},
{
"time": "17:46:54",
"class": "laptop",
"position": "left",
"distance": "close",
"x": 0.0,
"y": 0.0
}
]

View File

@ -0,0 +1 @@
{}

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1,56 @@
[
{
"time": "17:49:16",
"cmd": "hii",
"response": "Hello! I am Marcus. How can I help you?",
"duration_s": 0.0
},
{
"time": "17:49:19",
"cmd": "hi",
"response": "Hello! I am Marcus. How can I help you?",
"duration_s": 0.0
},
{
"time": "17:49:20",
"cmd": "Hi",
"response": "Hello! I am Marcus. How can I help you?",
"duration_s": 0.0
},
{
"time": "17:49:37",
"cmd": "move one step back",
"response": "Moving back",
"duration_s": 3.47
},
{
"time": "17:49:50",
"cmd": "one tiny step back",
"response": "Moving back",
"duration_s": 3.44
},
{
"time": "17:50:05",
"cmd": "now come back to me",
"response": "Coming back to you",
"duration_s": 3.53
},
{
"time": "17:50:36",
"cmd": "turn right",
"response": "Turning right",
"duration_s": 3.4
},
{
"time": "17:51:01",
"cmd": "come back to me",
"response": "Coming back to you",
"duration_s": 3.5
},
{
"time": "17:51:22",
"cmd": "turn right 90deg and then back two steps",
"response": "Turning right",
"duration_s": 3.43
}
]

View File

@ -0,0 +1,106 @@
[
{
"time": "17:49:37",
"class": "chair",
"position": "right",
"distance": "medium",
"x": 0.0,
"y": 0.0
},
{
"time": "17:49:37",
"class": "laptop",
"position": "center",
"distance": "close",
"x": 0.0,
"y": 0.0
},
{
"time": "17:49:37",
"class": "person",
"position": "center",
"distance": "close",
"x": 0.0,
"y": 0.0
},
{
"time": "17:49:50",
"class": "laptop",
"position": "left",
"distance": "close",
"x": 0.0,
"y": 0.0
},
{
"time": "17:49:50",
"class": "person",
"position": "right",
"distance": "very close",
"x": 0.0,
"y": 0.0
},
{
"time": "17:50:05",
"class": "chair",
"position": "right",
"distance": "medium",
"x": 0.0,
"y": 0.0
},
{
"time": "17:50:05",
"class": "laptop",
"position": "center",
"distance": "close",
"x": 0.0,
"y": 0.0
},
{
"time": "17:50:05",
"class": "person",
"position": "center",
"distance": "close",
"x": 0.0,
"y": 0.0
},
{
"time": "17:50:36",
"class": "chair",
"position": "center",
"distance": "medium",
"x": 0.0,
"y": 0.0
},
{
"time": "17:50:36",
"class": "person",
"position": "left",
"distance": "close",
"x": 0.0,
"y": 0.0
},
{
"time": "17:51:01",
"class": "laptop",
"position": "left",
"distance": "close",
"x": 0.0,
"y": 0.0
},
{
"time": "17:51:01",
"class": "person",
"position": "center",
"distance": "very close",
"x": 0.0,
"y": 0.0
},
{
"time": "17:51:22",
"class": "person",
"position": "left",
"distance": "close",
"x": 0.0,
"y": 0.0
}
]

View File

@ -0,0 +1 @@
{}

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1,26 @@
[
{
"time": "17:53:36",
"cmd": "turn right 90deg and then move back 3 steps",
"response": "Turned right 90 degrees then moved backward",
"duration_s": 0.0
},
{
"time": "17:54:21",
"cmd": "keep turning left unitl you see a guy holding a phone",
"response": "Turning left",
"duration_s": 3.72
},
{
"time": "17:55:33",
"cmd": "goal/ keep turning left unitl you see a guy holding a phone",
"response": "Goal navigation: keep turning left unitl you see a guy holding a phone",
"duration_s": 5.47
},
{
"time": "17:55:53",
"cmd": "describe what he is doing",
"response": "Turning left",
"duration_s": 3.73
}
]

View File

@ -0,0 +1,50 @@
[
{
"time": "17:54:21",
"class": "suitcase",
"position": "center",
"distance": "close",
"x": 0.0,
"y": 0.0
},
{
"time": "17:54:21",
"class": "backpack",
"position": "center",
"distance": "medium",
"x": 0.0,
"y": 0.0
},
{
"time": "17:55:32",
"class": "person",
"position": "left",
"distance": "close",
"x": 0.0,
"y": 0.0
},
{
"time": "17:55:53",
"class": "person",
"position": "center",
"distance": "very close",
"x": 0.0,
"y": 0.0
},
{
"time": "17:55:53",
"class": "chair",
"position": "center",
"distance": "medium",
"x": 0.0,
"y": 0.0
},
{
"time": "17:55:53",
"class": "laptop",
"position": "left",
"distance": "close",
"x": 0.0,
"y": 0.0
}
]

View File

@ -0,0 +1 @@
{}

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1,32 @@
[
{
"time": "17:57:56",
"cmd": "turn right 90 deg",
"response": "local command",
"duration_s": 0.0
},
{
"time": "17:58:22",
"cmd": "keep turning left until you see a guy holding a phone and describe what he is doing and his office",
"response": "Turning left",
"duration_s": 3.68
},
{
"time": "17:58:55",
"cmd": "goal/ keep turning left until you see a guy holding a phone and describe what he is doing and his office",
"response": "Goal navigation: keep turning left until you see a guy holding a phone and describe what he is doing and his office",
"duration_s": 25.79
},
{
"time": "17:59:13",
"cmd": "keep turning left until you see a guy holding a phone and describe what he is doing and his office",
"response": "Turning left",
"duration_s": 3.73
},
{
"time": "17:59:40",
"cmd": "goal/keep turning left until you see a guy holding a phone and describe what he is doing and his office",
"response": "Goal navigation: keep turning left until you see a guy holding a phone and describe what he is doing and his office",
"duration_s": 23.61
}
]

View File

@ -0,0 +1,18 @@
[
{
"time": "17:58:22",
"class": "suitcase",
"position": "right",
"distance": "medium",
"x": 0.0,
"y": 0.0
},
{
"time": "17:58:22",
"class": "backpack",
"position": "center",
"distance": "close",
"x": 0.0,
"y": 0.0
}
]

View File

@ -0,0 +1 @@
{}

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1,26 @@
[
{
"time": "18:09:23",
"cmd": "what do you see",
"response": "I see a person sitting at a desk with a computer and a monitor in front of them.",
"duration_s": 4.48
},
{
"time": "18:09:47",
"cmd": "what do you see",
"response": "I see a person sitting at a desk with a computer and a monitor in front of them.",
"duration_s": 4.52
},
{
"time": "18:10:17",
"cmd": "example/",
"response": "local command",
"duration_s": 0.0
},
{
"time": "18:10:41",
"cmd": "is there anyone in the room",
"response": "I see a person sitting at a desk with a computer and a monitor in front of them.",
"duration_s": 4.57
}
]

View File

@ -0,0 +1,58 @@
[
{
"time": "18:09:23",
"class": "mouse",
"position": "center",
"distance": "far",
"x": 0.0,
"y": 0.0
},
{
"time": "18:09:23",
"class": "chair",
"position": "right",
"distance": "medium",
"x": 0.0,
"y": 0.0
},
{
"time": "18:09:23",
"class": "person",
"position": "right",
"distance": "close",
"x": 0.0,
"y": 0.0
},
{
"time": "18:09:23",
"class": "laptop",
"position": "center",
"distance": "close",
"x": 0.0,
"y": 0.0
},
{
"time": "18:09:47",
"class": "person",
"position": "right",
"distance": "very close",
"x": 0.0,
"y": 0.0
},
{
"time": "18:10:41",
"class": "person",
"position": "right",
"distance": "close",
"x": 0.0,
"y": 0.0
},
{
"time": "18:10:41",
"class": "laptop",
"position": "center",
"distance": "close",
"x": 0.0,
"y": 0.0
}
]

View File

@ -0,0 +1 @@
{}

View File

@ -0,0 +1,13 @@
Session: session_014_2026-04-05
Date: 2026-04-05 18:10
Duration: 4m 48s
Commands: 4
YOLO detections: 7
Alerts: 0
Known places: none
First commands:
- what do you see
- what do you see
- example/
- is there anyone in the room

Some files were not shown because too many files have changed in this diff Show More