commit 8491be7f1ef49da4acc9e48463ee952358cd5a49 Author: kassam Date: Sun Apr 12 18:50:22 2026 +0400 Initial project commit diff --git a/.env b/.env new file mode 100644 index 0000000..4e118b8 --- /dev/null +++ b/.env @@ -0,0 +1,2 @@ +PROJECT_BASE=/home/unitree +PROJECT_NAME=Marcus diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c02264b --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +__pycache__/ +*.pyc +Logs/ +*.log diff --git a/API/__init__.py b/API/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/API/arm_api.py b/API/arm_api.py new file mode 100644 index 0000000..5bd521c --- /dev/null +++ b/API/arm_api.py @@ -0,0 +1,16 @@ +""" +arm_api.py — Arm gesture control (GR00T N1.5 pending) +""" +from Core.config_loader import load_config + +_cfg = load_config("Arm") + +ARM_AVAILABLE = _cfg["arm_available"] +ARM_ACTIONS = _cfg["actions"] +ARM_ALIASES = {k: v for k, v in _cfg["aliases"].items()} +ALL_ARM_NAMES = set(ARM_ALIASES.keys()) | set(ARM_ACTIONS.keys()) + + +def do_arm(action): + """Arm action stub — GR00T N1.5 integration pending.""" + print(f" [Arm] GR00T not yet integrated — skipping: {action}") diff --git a/API/audio_api.py b/API/audio_api.py new file mode 100644 index 0000000..fd8256b --- /dev/null +++ b/API/audio_api.py @@ -0,0 +1,419 @@ +#!/usr/bin/env python3 +""" +API/audio_api.py — Marcus Audio API Layer +========================================== +Provides speak() and record_audio() for the Brain layer. +Brain imports ONLY from this API — never from unitree SDK directly. + +Speaker: _CallRequestWithParamAndBin (single call, full buffer) +Mic: parec -d 3 (Hollyland wireless, PulseAudio source index from config) +TTS EN: Unitree built-in TtsMaker +TTS AR: Piper ar_JO-kareem-medium → resample → G1 speaker + +Usage: + from API.audio_api import AudioAPI + audio = AudioAPI() + audio.speak("Hello", "en") + audio.speak("مرحبا", "ar") + recording = audio.record(seconds=5) + audio.play_pcm(recording) +""" + +import json +import logging +import os +import subprocess +import threading +import time +import wave +import numpy as np + +# ─── PATH CONFIG ───────────────────────────────────────── +from dotenv import load_dotenv +load_dotenv() + +BASE_PATH = os.environ.get("PROJECT_BASE", "/home/unitree") +PROJECT_NAME = "Marcus" +PROJECT_ROOT = os.path.join(BASE_PATH, PROJECT_NAME) + +LOG_DIR = os.path.join(PROJECT_ROOT, "logs") +os.makedirs(LOG_DIR, exist_ok=True) + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(name)s] %(levelname)s: %(message)s", + handlers=[ + logging.FileHandler(os.path.join(LOG_DIR, "voice.log")), + logging.StreamHandler(), + ], +) +log = logging.getLogger("audio_api") + + +def load_config(name: str) -> dict: + path = os.path.join(PROJECT_ROOT, "Config", f"config_{name}.json") + with open(path, "r") as f: + return json.load(f) + + +# ─── AUDIO API CLASS ───────────────────────────────────── + +class AudioAPI: + """Marcus audio interface — speak + record + play.""" + + def __init__(self): + self._config = load_config("Voice") + self._client = None + self._sdk_available = False + self._init_sdk() + + # Config shortcuts + self._tts = self._config["tts"] + self._mic = self._config["mic"] + self._spk = self._config["speaker"] + self._target_rate = self._tts["target_sample_rate"] + + # Data dir + data_dir = os.path.join(PROJECT_ROOT, self._config["audio"]["data_dir"]) + os.makedirs(data_dir, exist_ok=True) + self._data_dir = data_dir + + # Speaking lock — prevents mic from hearing TTS output + self._speaking = False + self._speak_lock = threading.Lock() + + log.info(self._config["messages"]["ready"]) + + def _init_sdk(self): + """Initialize Unitree AudioClient.""" + try: + from unitree_sdk2py.core.channel import ChannelFactoryInitialize + from unitree_sdk2py.g1.audio.g1_audio_client import AudioClient + + dds_iface = self._config["speaker"]["dds_interface"] + ChannelFactoryInitialize(0, dds_iface) + + self._client = AudioClient() + self._client.SetTimeout(10.0) + self._client.Init() + self._client.SetVolume(self._config["speaker"]["volume"]) + self._sdk_available = True + log.info("AudioClient initialized on %s", dds_iface) + except Exception as e: + log.error("AudioClient init failed: %s", e) + self._sdk_available = False + + # ─── SPEAK ──────────────────────────────────────────── + + def speak(self, text: str, lang: str = "auto"): + """ + Speak text in the given language. + Mutes mic during playback to prevent self-listening. + lang="en" → built-in TtsMaker + lang="ar" → Piper → resample → G1 speaker + lang="auto" → detect from text + """ + if lang == "auto": + lang = self._detect_lang(text) + + log.info("[%s] speak: %s", lang.upper(), text[:80]) + + with self._speak_lock: + self._speaking = True + self._mute_mic() + + try: + if lang == "en": + self._speak_english(text) + elif lang == "ar": + self._speak_arabic(text) + else: + log.warning("Unknown lang '%s', falling back to English", lang) + self._speak_english(text) + except Exception as e: + log.error("%s: %s", self._config["messages"]["error_tts"], e) + finally: + # Small delay so speaker fully stops before mic reopens + time.sleep(0.3) + self._unmute_mic() + self._speaking = False + + def _mute_mic(self): + """Mute the wireless mic to prevent self-listening.""" + source = self._mic["source_index"] + subprocess.run( + ["pactl", "set-source-mute", source, "1"], + capture_output=True, + ) + log.debug("Mic muted") + + def _unmute_mic(self): + """Unmute the wireless mic.""" + source = self._mic["source_index"] + subprocess.run( + ["pactl", "set-source-mute", source, "0"], + capture_output=True, + ) + log.debug("Mic unmuted") + + @property + def is_speaking(self) -> bool: + """True while TTS is playing — voice module checks this.""" + return self._speaking + + def _speak_english(self, text: str): + """English TTS via edge-tts.""" + self._speak_edge_tts(text, "en") + + def _speak_arabic(self, text: str): + """Arabic TTS via edge-tts.""" + self._speak_edge_tts(text, "ar") + + def speak_piper_en(self, text: str): + """Alternative: English via Piper instead of built-in.""" + voice = self._tts["piper_voice_en"] + audio, rate = self._piper_synthesize(text, voice) + audio_16k = self._resample(audio, rate) + self._play_pcm(audio_16k) + + # ─── PIPER TTS ──────────────────────────────────────── + + def _piper_synthesize(self, text: str, voice: str) -> tuple: + """Run Piper CLI, return (audio_int16, sample_rate).""" + cmd = ["piper", "--model", voice, "--output_raw"] + timeout = self._tts["piper_timeout_sec"] + + proc = subprocess.run( + cmd, + input=text.encode("utf-8"), + capture_output=True, + timeout=timeout, + ) + + if proc.returncode != 0: + stderr = proc.stderr.decode()[:300] + raise RuntimeError(f"Piper failed: {stderr}") + + audio = np.frombuffer(proc.stdout, dtype=np.int16) + piper_rate = self._tts["piper_sample_rate"] + log.info("Piper: %d samples @ %dHz (%.1fs)", len(audio), piper_rate, len(audio) / piper_rate) + return audio, piper_rate + + # ─── RESAMPLE ───────────────────────────────────────── + + + def _speak_edge_tts(self, text: str, lang: str): + """Generate speech via edge-tts and play on G1.""" + import os as _os + voice = "ar-AE-HamdanNeural" if lang == "ar" else "en-US-GuyNeural" + ts = int(time.time() * 1000) + mp3_path = f"/tmp/edge_{lang}_{ts}.mp3" + wav_path = f"/tmp/edge_{lang}_{ts}.wav" + + safe_text = text.replace('"', '\\"') + code = f'import edge_tts, asyncio; asyncio.run(edge_tts.Communicate(\"{safe_text}\", voice=\"{voice}\").save(\"{mp3_path}\"))' + result = subprocess.run(["python3", "-c", code], capture_output=True, text=True, timeout=30) + + if result.returncode != 0: + log.error("edge-tts failed: %s", result.stderr[:200]) + if lang == "en" and self._sdk_available: + self._client.TtsMaker(text, self._tts.get("builtin_speaker_id", 1)) + time.sleep(max(2.0, len(text) * 0.06)) + return + + try: + from pydub import AudioSegment + a = AudioSegment.from_mp3(mp3_path) + a = a.set_frame_rate(16000).set_channels(1).set_sample_width(2) + a.export(wav_path, format="wav") + + import wave + with wave.open(wav_path, "rb") as wf: + audio = np.frombuffer(wf.readframes(wf.getnframes()), dtype=np.int16) + + _os.unlink(mp3_path) + _os.unlink(wav_path) + self._play_pcm(audio) + except Exception as e: + log.error("edge-tts conversion error: %s", e) + try: _os.unlink(mp3_path) + except: pass + try: _os.unlink(wav_path) + except: pass + + def _resample(self, audio: np.ndarray, src_rate: int) -> np.ndarray: + """Resample to target rate (16kHz).""" + if src_rate == self._target_rate: + return audio + tl = int(len(audio) * self._target_rate / src_rate) + return np.interp( + np.linspace(0, len(audio), tl, endpoint=False), + np.arange(len(audio)), + audio.astype(np.float64), + ).astype(np.int16) + + # ─── G1 SPEAKER PLAYBACK ───────────────────────────── + + def _play_pcm(self, audio_16k: np.ndarray) -> float: + """Play 16kHz mono int16 on G1 speaker. Returns duration.""" + if not self._sdk_available: + log.warning("SDK not available, cannot play audio") + return 0.0 + + from unitree_sdk2py.g1.audio.g1_audio_api import ( + ROBOT_API_ID_AUDIO_START_PLAY, + ROBOT_API_ID_AUDIO_STOP_PLAY, + ) + + app_name = self._spk["app_name"] + + # Stop previous stream + self._client._Call( + ROBOT_API_ID_AUDIO_STOP_PLAY, + json.dumps({"app_name": app_name}), + ) + time.sleep(0.3) + + # Build params — unique stream_id every call + pcm = audio_16k.tobytes() + sid = f"s_{int(time.time() * 1000)}" + param = json.dumps({ + "app_name": app_name, + "stream_id": sid, + "sample_rate": self._target_rate, + "channels": 1, + "bits_per_sample": 16, + }) + + # Single call — full buffer + self._client._CallRequestWithParamAndBin( + ROBOT_API_ID_AUDIO_START_PLAY, param, list(pcm) + ) + + duration = len(audio_16k) / self._target_rate + time.sleep(duration + 0.5) + + self._client._Call( + ROBOT_API_ID_AUDIO_STOP_PLAY, + json.dumps({"app_name": app_name}), + ) + + log.info("Played: %.1fs", duration) + return duration + + def play_pcm(self, audio_16k: np.ndarray) -> float: + """Public wrapper for playing PCM audio.""" + return self._play_pcm(audio_16k) + + # ─── MIC RECORDING ─────────────────────────────────── + + def record(self, seconds: float = 5.0) -> np.ndarray: + """Record from Hollyland wireless mic via parec. Returns int16 array.""" + source = self._mic["source_index"] + rate = str(self._mic["rate"]) + channels = str(self._mic["channels"]) + fmt = self._mic["format"] + + # Unmute mic + subprocess.run( + ["pactl", "set-source-mute", source, "0"], + capture_output=True, + ) + subprocess.run( + ["pactl", "set-source-volume", source, "100%"], + capture_output=True, + ) + + log.info("Recording %.1fs from mic source %s", seconds, source) + + proc = subprocess.Popen( + ["parec", "-d", source, + f"--format={fmt}", f"--rate={rate}", f"--channels={channels}", "--raw"], + stdout=subprocess.PIPE, + ) + time.sleep(seconds) + proc.terminate() + raw = proc.stdout.read() + + audio = np.frombuffer(raw, dtype=np.int16) + log.info("Recorded: %d samples, std=%.0f", len(audio), audio.std()) + + if audio.std() < 50: + log.warning(self._config["messages"]["error_mic"] + " — mic may be silent") + + return audio + + def save_recording(self, audio: np.ndarray, name: str) -> str: + """Save recording to Data/Voice/Recordings/.""" + path = os.path.join(self._data_dir, f"{name}.wav") + wf = wave.open(path, "wb") + wf.setnchannels(1) + wf.setsampwidth(2) + wf.setframerate(self._target_rate) + wf.writeframes(audio.tobytes()) + wf.close() + log.info("Saved: %s", path) + return path + + # ─── LANGUAGE DETECTION ─────────────────────────────── + + @staticmethod + def _detect_lang(text: str) -> str: + """Detect language from text — Arabic Unicode range check.""" + for c in text: + if '\u0600' <= c <= '\u06FF': + return "ar" + return "en" + + # ─── STATUS ─────────────────────────────────────────── + + @property + def is_available(self) -> bool: + return self._sdk_available + + +# ─── STANDALONE TEST ───────────────────────────────────── + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser(description="Marcus Audio API Test") + parser.add_argument("--test", action="store_true", help="Run speak tests") + parser.add_argument("--speak", type=str, help="Speak this text") + parser.add_argument("--lang", default="auto", help="Language: en, ar, auto") + parser.add_argument("--record", type=float, default=0, help="Record N seconds") + args = parser.parse_args() + + api = AudioAPI() + + if args.test: + print("\n--- English built-in ---") + api.speak("Hello, I am Marcus.", "en") + time.sleep(1) + + print("\n--- Arabic Piper ---") + api.speak("مرحبا، أنا ماركوس", "ar") + time.sleep(1) + + print("\n--- Auto-detect ---") + api.speak("How are you?") + time.sleep(1) + api.speak("كيف حالك؟") + time.sleep(1) + + print("\n--- Record 3s + playback ---") + rec = api.record(3.0) + if rec.std() > 50: + api.play_pcm(rec) + print("\nDone.") + + elif args.speak: + api.speak(args.speak, args.lang) + + elif args.record > 0: + rec = api.record(args.record) + api.save_recording(rec, f"test_{int(time.time())}") + if rec.std() > 50: + api.play_pcm(rec) + else: + parser.print_help() diff --git a/API/camera_api.py b/API/camera_api.py new file mode 100644 index 0000000..34d7fed --- /dev/null +++ b/API/camera_api.py @@ -0,0 +1,111 @@ +""" +camera_api.py — RealSense D435I camera thread +""" +import base64 +import io +import time +import threading +import numpy as np +from PIL import Image +from Core.config_loader import load_config +from Core.logger import log + +_cfg = load_config("Camera") + +CAM_WIDTH = _cfg["width"] +CAM_HEIGHT = _cfg["height"] +CAM_FPS = _cfg["fps"] +CAM_QUALITY = _cfg["jpeg_quality"] + +# Shared state +latest_frame_b64 = [None] +_raw_frame = [None] +camera_lock = threading.Lock() +_raw_lock = threading.Lock() +camera_alive = [True] +_cam_last_frame_time = [0.0] +_cam_connected = [False] + + +def get_raw_refs(): + """Return (raw_frame_ref, raw_lock) for YOLO to share.""" + return _raw_frame, _raw_lock + + +def camera_loop(): + """Capture RealSense frames continuously with auto-reconnect.""" + import pyrealsense2 as rs + + backoff = 2.0 + while camera_alive[0]: + pipeline = None + try: + pipeline = rs.pipeline() + cfg = rs.config() + cfg.enable_stream(rs.stream.color, CAM_WIDTH, CAM_HEIGHT, rs.format.bgr8, CAM_FPS) + pipeline.start(cfg) + backoff = 2.0 + _cam_connected[0] = True + print("Camera connected") + log(f"Camera connected {CAM_WIDTH}x{CAM_HEIGHT}@{CAM_FPS}", "info", "camera") + + while camera_alive[0]: + try: + frames = pipeline.wait_for_frames(timeout_ms=5000) + color_frame = frames.get_color_frame() + if not color_frame: + continue + + frame = np.asanyarray(color_frame.get_data()) + if frame is None or frame.size == 0: + continue + + with _raw_lock: + _raw_frame[0] = frame.copy() + + img = Image.fromarray(frame[:, :, ::-1]) + buf = io.BytesIO() + img.save(buf, format="JPEG", quality=CAM_QUALITY) + with camera_lock: + latest_frame_b64[0] = base64.b64encode(buf.getvalue()).decode() + + _cam_last_frame_time[0] = time.time() + + except Exception: + if time.time() - _cam_last_frame_time[0] > 10.0: + print(" [Camera] No frame for 10s — reconnecting...") + break + + except Exception as e: + if _cam_connected[0]: + print(f" [Camera] Disconnected ({type(e).__name__}) — retrying in {backoff:.0f}s...") + _cam_connected[0] = False + try: + pipeline.stop() + except Exception: + pass + time.sleep(backoff) + backoff = min(backoff * 2, 10.0) + + +def start_camera(): + """Start camera thread. Returns (raw_frame_ref, raw_lock).""" + threading.Thread(target=camera_loop, daemon=True).start() + time.sleep(3.0) + return _raw_frame, _raw_lock + + +def stop_camera(): + """Stop camera thread.""" + camera_alive[0] = False + + +def get_frame(): + """Return latest base64 JPEG frame for LLaVA. None if not ready.""" + with camera_lock: + return latest_frame_b64[0] + + +def get_frame_age() -> float: + """Return seconds since last camera frame.""" + return time.time() - _cam_last_frame_time[0] if _cam_last_frame_time[0] > 0 else 999.0 diff --git a/API/imgsearch_api.py b/API/imgsearch_api.py new file mode 100644 index 0000000..e47e3ef --- /dev/null +++ b/API/imgsearch_api.py @@ -0,0 +1,38 @@ +""" +imgsearch_api.py — Image-guided search interface +""" +import sys +import os +from Core.env_loader import PROJECT_ROOT + +_img_searcher = None + + +def init_imgsearch(get_frame_fn, send_vel_fn, gradual_stop_fn, + llava_fn, yolo_sees_fn, model): + """Wire up the ImageSearch module.""" + global _img_searcher + + vision_dir = os.path.join(PROJECT_ROOT, "Vision") + if vision_dir not in sys.path: + sys.path.insert(0, vision_dir) + + try: + from marcus_imgsearch import ImageSearch + _img_searcher = ImageSearch( + get_frame_fn=get_frame_fn, + send_vel_fn=send_vel_fn, + gradual_stop_fn=gradual_stop_fn, + llava_fn=llava_fn, + yolo_sees_fn=yolo_sees_fn, + model=model, + ) + print("Image search ready") + except ImportError: + print("marcus_imgsearch.py not found — image search disabled") + except Exception as e: + print(f"Image search error: {e}") + + +def get_searcher(): + return _img_searcher diff --git a/API/lidar_api.py b/API/lidar_api.py new file mode 100644 index 0000000..120733c --- /dev/null +++ b/API/lidar_api.py @@ -0,0 +1,282 @@ +""" +lidar_api.py — Livox Mid-360 LiDAR interface via SlamEngineClient +================================================================== +Background thread polls SLAM data_q at 20Hz. +Exposes obstacle_ahead(), get_slam_pose(), get_nav_cmd(), get_loc_state(). + +The SLAM subprocess handles all point cloud processing, costmap building, +and safety evaluation internally. We just read the results. +""" +import math +import os +import sys +import threading +import time + +import numpy as np + +from Core.config_loader import load_config, config_path +from Core.logger import log + +_cfg = load_config("LiDAR") + +LIDAR_AVAILABLE = False + +# ── Shared state (thread-safe) ─────────────────────────────────────────────── +_lock = threading.Lock() +_latest_safety = [{}] +_latest_pose = [None] # 4x4 numpy or None +_latest_nav = [{}] +_latest_loc_state = ["IDLE"] +_latest_mode = ["IDLE"] +_latest_perf = [{}] +_lidar_alive = [False] +_client = [None] +_last_frame_time = [0.0] + + +# ══════════════════════════════════════════════════════════════════════════════ +# INIT +# ══════════════════════════════════════════════════════════════════════════════ + +def init_lidar(): + """ + Start SLAM subprocess + poll thread. + Returns True if LiDAR connected successfully. + """ + global LIDAR_AVAILABLE + + if not _cfg.get("enabled", True): + print(" [LiDAR] Disabled in config") + return False + + slam_dir = config_path(_cfg.get("slam_source_dir", "Lidar")) + if slam_dir not in sys.path: + sys.path.insert(0, slam_dir) + + try: + from SLAM_engine import SlamEngineClient + except ImportError as e: + print(f" [LiDAR] SlamEngineClient not found: {e}") + log(f"LiDAR import failed: {e}", "error", "lidar") + return False + + try: + client = SlamEngineClient() + _client[0] = client + print(" [LiDAR] SLAM engine created") + log("SLAM engine created", "info", "lidar") + except Exception as e: + print(f" [LiDAR] SLAM init error: {e}") + log(f"SLAM init error: {e}", "error", "lidar") + return False + + # Start subprocess + connect to hardware + if _cfg.get("auto_connect", True): + try: + client.start_process() + client.connect() + print(" [LiDAR] Connected to Livox Mid-360") + log("Connected to LiDAR", "info", "lidar") + + if _cfg.get("start_localize_only", False): + client.start_localize_only() + print(" [LiDAR] Localize-only mode") + elif _cfg.get("auto_start_mapping", False): + client.start_mapping() + print(" [LiDAR] Mapping started") + except Exception as e: + print(f" [LiDAR] Connect error: {e}") + log(f"LiDAR connect error: {e}", "error", "lidar") + return False + + # Start poll thread + _lidar_alive[0] = True + threading.Thread(target=_poll_loop, daemon=True, name="lidar-poll").start() + + # Wait for first FRAME (up to 5s) + deadline = time.time() + 5.0 + while time.time() < deadline: + if _last_frame_time[0] > 0: + break + time.sleep(0.1) + + if _last_frame_time[0] > 0: + LIDAR_AVAILABLE = True + print(" [LiDAR] Receiving data") + log("LiDAR available — receiving frames", "info", "lidar") + else: + print(" [LiDAR] No data yet (will keep trying)") + LIDAR_AVAILABLE = True # still mark available — data may arrive later + + return True + + +# ══════════════════════════════════════════════════════════════════════════════ +# POLL LOOP +# ══════════════════════════════════════════════════════════════════════════════ + +def _poll_loop(): + """Drain data_q, keep latest FRAME payload, update shared state.""" + client = _client[0] + if client is None: + return + + poll_interval = 1.0 / max(1, _cfg.get("poll_hz", 20)) + + while _lidar_alive[0]: + try: + # Drain data_q — keep only the latest FRAME + payload = None + while True: + try: + item = client.data_q.get_nowait() + if isinstance(item, tuple) and len(item) >= 2 and item[0] == "FRAME": + payload = item[1] + except Exception: + break + + if payload is not None: + _update_state(payload) + + # Drain status_q (just log, don't block) + while True: + try: + msg = client.status_q.get_nowait() + if isinstance(msg, tuple) and len(msg) >= 2: + level, text = str(msg[0]), str(msg[1]) + if level == "ERROR": + log(f"SLAM: {text}", "error", "lidar") + elif level == "WARN": + log(f"SLAM: {text}", "warning", "lidar") + except Exception: + break + + except Exception as e: + log(f"LiDAR poll error: {e}", "error", "lidar") + + time.sleep(poll_interval) + + +def _update_state(payload): + """Extract safety/pose/nav from FRAME payload into shared state.""" + with _lock: + _latest_safety[0] = payload.get("safety", {}) + _latest_pose[0] = payload.get("pose") + _latest_nav[0] = payload.get("nav", {}) + _latest_loc_state[0] = str(payload.get("loc_state", "IDLE")) + _latest_mode[0] = str(payload.get("mode", "IDLE")) + _latest_perf[0] = payload.get("perf", {}) + _last_frame_time[0] = time.time() + + +# ══════════════════════════════════════════════════════════════════════════════ +# PUBLIC API +# ══════════════════════════════════════════════════════════════════════════════ + +def obstacle_ahead(radius=None): + """ + Returns True if the SLAM safety system detected an obstacle. + + Checks safety.emergency OR safety.hold from the latest FRAME. + This uses the SLAM's own costmap + SafetySupervisor — 0.50m collision zone, + localization loss detection, and 0.8s emergency hold. + + Safe to call even when LiDAR is not available (returns False). + """ + if not LIDAR_AVAILABLE or _last_frame_time[0] == 0: + return False + + with _lock: + safety = _latest_safety[0] + + if not safety: + return False + + return bool(safety.get("emergency", False)) or bool(safety.get("hold", False)) + + +def get_slam_pose(): + """ + Returns (x, y, heading_deg) from the SLAM 4x4 pose matrix. + Returns None if no pose available. + + More accurate than dead reckoning (±2cm vs ±10cm). + """ + with _lock: + pose = _latest_pose[0] + + if pose is None: + return None + + pose = np.asarray(pose, dtype=np.float64) + if pose.shape != (4, 4): + return None + + x = float(pose[0, 3]) + y = float(pose[1, 3]) + yaw = math.degrees(math.atan2(float(pose[1, 0]), float(pose[0, 0]))) + return {"x": round(x, 4), "y": round(y, 4), "heading": round(yaw, 2), "source": "slam"} + + +def get_nav_cmd(): + """ + Returns the SLAM path planner's recommended velocity command. + {"linear_mps": float, "angular_rps": float, "blocked": bool, "goal_reached": bool} + Returns None if no nav data. + """ + with _lock: + nav = _latest_nav[0] + + cmd = nav.get("cmd") if nav else None + if not cmd or not isinstance(cmd, dict): + return None + return cmd + + +def get_loc_state(): + """Returns 'TRACKING', 'DEGRADED', 'LOST', or 'RECOVERY'.""" + with _lock: + return _latest_loc_state[0] + + +def get_safety_reasons(): + """Returns list of current safety trigger reasons, or empty list.""" + with _lock: + safety = _latest_safety[0] + return safety.get("reasons", []) if safety else [] + + +def get_lidar_status(): + """Full status dict for diagnostics.""" + with _lock: + return { + "available": LIDAR_AVAILABLE, + "mode": _latest_mode[0], + "loc_state": _latest_loc_state[0], + "safety": dict(_latest_safety[0]) if _latest_safety[0] else {}, + "perf": dict(_latest_perf[0]) if _latest_perf[0] else {}, + "last_frame_age": round(time.time() - _last_frame_time[0], 1) if _last_frame_time[0] > 0 else -1, + "pose": get_slam_pose(), + } + + +def get_client(): + """Return the SlamEngineClient instance (for direct commands).""" + return _client[0] + + +def stop_lidar(): + """Clean shutdown.""" + global LIDAR_AVAILABLE + _lidar_alive[0] = False + LIDAR_AVAILABLE = False + + client = _client[0] + if client is not None: + try: + client.stop_process() + except Exception: + pass + log("LiDAR stopped", "info", "lidar") + print(" [LiDAR] Stopped") diff --git a/API/llava_api.py b/API/llava_api.py new file mode 100644 index 0000000..f431a65 --- /dev/null +++ b/API/llava_api.py @@ -0,0 +1,157 @@ +""" +llava_api.py — LLaVA / Qwen VL query interface +""" +import json +import ollama +import yaml +from pathlib import Path +from Core.config_loader import load_config + +_cfg = load_config("Brain") + +# Load prompts from YAML (the authoritative source — bilingual, complete) +_yaml_path = Path(__file__).resolve().parent.parent / "Config" / "marcus_prompts.yaml" +with open(_yaml_path, encoding="utf-8") as _f: + _prompts = yaml.safe_load(_f) + +OLLAMA_MODEL = _cfg["ollama_model"] +MAX_HISTORY = _cfg["max_history"] +# Cap batch and context on every request. Without this, llama.cpp on Jetson +# Orin NX allocates a ~7.5 GiB compute graph (defaults: batch 512, ctx 4096) +# that SIGKILLs the runner when Marcus already holds ~2 GiB of unified memory +# for YOLO/camera/audio. Halving batch roughly quarters the compute graph. +NUM_BATCH = _cfg.get("num_batch", 128) +NUM_CTX = _cfg.get("num_ctx", 2048) +MAIN_PROMPT = _prompts["main_prompt"] +GOAL_PROMPT = _prompts["goal_prompt"] +PATROL_PROMPT = _prompts["patrol_prompt"] +TALK_PROMPT = _prompts["talk_prompt"] +VERIFY_PROMPT = _prompts["verify_prompt"] + +# Conversation state +_conversation_history = [] +_facts = [] + + +def remember_fact(fact: str): + """Store a fact told by the user for injection into LLaVA context.""" + if fact and fact not in _facts: + _facts.append(fact) + print(f" [Memory] Fact stored: {fact}") + + +def add_to_history(user_msg: str, assistant_msg: str): + _conversation_history.append({"role": "user", "content": user_msg}) + _conversation_history.append({"role": "assistant", "content": assistant_msg}) + while len(_conversation_history) > MAX_HISTORY: + _conversation_history.pop(0) + + +def call_llava(prompt: str, img_b64, num_predict: int = 200, use_history: bool = False) -> str: + messages = [] + if use_history and _conversation_history: + messages.extend(_conversation_history) + msg = {"role": "user", "content": prompt} + if img_b64: + msg["images"] = [img_b64] + messages.append(msg) + r = ollama.chat(model=OLLAMA_MODEL, messages=messages, + options={ + "temperature": 0.0, + "num_predict": num_predict, + "num_batch": NUM_BATCH, + "num_ctx": NUM_CTX, + }) + return r["message"]["content"].strip() + + +def parse_json(raw: str): + """Extract and parse first JSON object from string.""" + raw = raw.replace("```json", "").replace("```", "").strip() + s = raw.find("{") + e = raw.rfind("}") + 1 + if s == -1 or e == 0: + return None + try: + return json.loads(raw[s:e]) + except json.JSONDecodeError: + return None + + +def ask(command: str, img_b64) -> dict: + """Send command + camera frame to LLaVA with conversation history.""" + try: + facts_str = ("\nKnown facts: " + "; ".join(_facts) + ".") if _facts else "" + raw = call_llava(MAIN_PROMPT.format(command=command, facts=facts_str), img_b64, + num_predict=_cfg["num_predict_main"], use_history=True) + print(f" Raw: {raw}") + d = parse_json(raw) + speak = d.get("speak", raw) if d else raw + add_to_history(command, speak) + if d is None: + return {"actions": [], "arm": None, "speak": raw, "abort": None} + return d + except Exception as ex: + print(f" LLaVA error: {ex}") + return {"actions": [], "arm": None, "speak": "Error.", "abort": None} + + +def ask_goal(goal: str, img_b64) -> dict: + """Ask LLaVA if goal is reached.""" + try: + raw = call_llava(GOAL_PROMPT.format(goal=goal), img_b64, + num_predict=_cfg["num_predict_goal"]) + print(f" LLaVA: {raw}") + d = parse_json(raw) + if d is None: + text = raw.lower() + reached = any(w in text for w in + ["reached", "found", "i can see", "i see a person", "yes", "arrived"]) + return {"reached": reached, "next_move": "left", "duration": 0.5, "speak": raw[:100]} + reached = d.get("reached", False) + if isinstance(reached, str): + reached = reached.lower() in ("true", "yes", "1") + d["reached"] = reached + return d + except Exception: + return {"reached": False, "next_move": "left", "duration": 0.5, "speak": "Continuing..."} + + +def ask_talk(command: str, img_b64, facts: str = "") -> dict: + """Handle talk-only commands using the YAML talk_prompt.""" + try: + prompt = TALK_PROMPT.format(command=command, facts=facts) + raw = call_llava(prompt, img_b64, num_predict=_cfg["num_predict_talk"], + use_history=True) + print(f" Raw: {raw}") + d = parse_json(raw) + if d is None: + return {"actions": [], "arm": None, "speak": raw[:100], "abort": None} + speak = d.get("speak", "") + add_to_history(command, speak) + return d + except Exception as ex: + print(f" Talk error: {ex}") + return {"actions": [], "arm": None, "speak": f"Error: {ex}", "abort": None} + + +def ask_verify(target: str, condition: str, img_b64) -> str: + """Verify a condition on a detected target. Returns 'yes' or 'no'.""" + try: + prompt = VERIFY_PROMPT.format(target=target, condition=condition) + raw = call_llava(prompt, img_b64, num_predict=_cfg["num_predict_verify"]) + cleaned = raw.strip().lower().rstrip(".,!?") + first_word = cleaned.split()[0] if cleaned.split() else "no" + return first_word if first_word in ("yes", "no") else "no" + except Exception: + return "no" + + +def ask_patrol(img_b64) -> dict: + """Ask LLaVA to assess scene during patrol.""" + try: + raw = call_llava(PATROL_PROMPT, img_b64, num_predict=_cfg["num_predict_patrol"]) + d = parse_json(raw) + return d or {"observation": raw[:80], "alert": None, "next_move": "forward", "duration": 1.0} + except Exception: + return {"observation": "Error", "alert": None, "next_move": "stop", "duration": 0} diff --git a/API/memory_api.py b/API/memory_api.py new file mode 100644 index 0000000..577fb42 --- /dev/null +++ b/API/memory_api.py @@ -0,0 +1,109 @@ +""" +memory_api.py — Session + place memory interface +""" +import sys +import os +from Core.env_loader import PROJECT_ROOT +from API.odometry_api import get_position + +MEMORY_AVAILABLE = False +mem = None + + +def init_memory() -> bool: + """Start memory subsystem. Returns True if successful.""" + global MEMORY_AVAILABLE, mem + + # marcus_memory.py lives in Brain/ + brain_dir = os.path.join(PROJECT_ROOT, "Brain") + for d in (brain_dir,): + if d not in sys.path: + sys.path.insert(0, d) + + try: + from marcus_memory import Memory + mem = Memory() + mem.start_session() + MEMORY_AVAILABLE = True + print("Memory started") + return True + except ImportError as e: + print(f"marcus_memory.py not found ({e}) — memory disabled") + return False + except Exception as e: + print(f"Memory error: {e} — memory disabled") + return False + + +def log_cmd(cmd: str, response: str = "", duration: float = 0.0): + """Log command to session memory.""" + if mem: + try: + mem.log_command(cmd, response, duration) + except Exception: + pass + + +def log_detection(class_name: str, position: str = "", distance: str = ""): + """Log YOLO detection to session memory with current position.""" + if mem and class_name: + pos = get_position() + try: + mem.log_detection( + class_name, position, distance, + x=pos["x"] if pos else None, + y=pos["y"] if pos else None, + ) + except Exception: + pass + + +def place_save(name: str) -> bool: + """Save current position as named place.""" + if not mem: + print(" [Places] Memory not available — cannot save place") + return False + pos = get_position() + return mem.save_place( + name, + x=pos["x"] if pos else None, + y=pos["y"] if pos else None, + heading=pos["heading"] if pos else None, + ) + + +def place_goto(name: str) -> bool: + """Navigate to a named saved place.""" + from API.odometry_api import odom, ODOM_AVAILABLE + + if not mem: + print(" [Places] Memory not available") + return False + place = mem.get_place(name) + if place is None: + return False + if not place.get("has_odom"): + print(f" [Places] '{name}' was saved without odometry — no coordinates") + return False + if not odom or not ODOM_AVAILABLE: + print(" [Places] Odometry not running — cannot navigate") + return False + print(f" [Places] Navigating to '{name}' " + f"(x={place['x']:.2f}, y={place['y']:.2f}, h={place['heading']:.1f})") + return odom.navigate_to(place["x"], place["y"], place["heading"]) + + +def places_list_str() -> str: + """Return formatted string of all saved places.""" + if not mem: + return "Memory not available" + places = mem.list_places() + if not places: + return "No places saved yet. Say 'remember this as ' to save one." + lines = [f" {'Name':<20} {'Coordinates':<25} {'Saved at'}"] + lines.append(" " + "-" * 60) + for p in places: + coord = (f"x={p['x']:.2f} y={p['y']:.2f} h={p['heading']:.1f}" + if p.get("has_odom") else "no coordinates") + lines.append(f" {p['name']:<20} {coord:<25} {p.get('saved_at', '')}") + return "\n".join(lines) diff --git a/API/odometry_api.py b/API/odometry_api.py new file mode 100644 index 0000000..4db8dc5 --- /dev/null +++ b/API/odometry_api.py @@ -0,0 +1,40 @@ +""" +odometry_api.py — Odometry interface wrapper +""" +import sys +import os +from Core.env_loader import PROJECT_ROOT + +ODOM_AVAILABLE = False +odom = None + + +def init_odometry(zmq_sock=None) -> bool: + """Start odometry tracking. Returns True if successful.""" + global ODOM_AVAILABLE, odom + + # marcus_odometry.py lives in Navigation/ + nav_dir = os.path.join(PROJECT_ROOT, "Navigation") + for d in (nav_dir,): + if d not in sys.path: + sys.path.insert(0, d) + + try: + from marcus_odometry import Odometry + odom = Odometry() + ODOM_AVAILABLE = odom.start(zmq_sock=zmq_sock) + print(f"Odometry {'started' if ODOM_AVAILABLE else 'failed'}") + return ODOM_AVAILABLE + except ImportError as e: + print(f"marcus_odometry.py not found ({e}) — precise distance disabled") + return False + except Exception as e: + print(f"Odometry error: {e} — precise distance disabled") + return False + + +def get_position(): + """Return current position dict or None.""" + if odom and ODOM_AVAILABLE: + return odom.get_position() + return None diff --git a/API/yolo_api.py b/API/yolo_api.py new file mode 100644 index 0000000..1495c4c --- /dev/null +++ b/API/yolo_api.py @@ -0,0 +1,66 @@ +""" +yolo_api.py — YOLO detection interface with fallback stubs +""" +import sys +import os +from Core.config_loader import load_config +from Core.env_loader import PROJECT_ROOT + +YOLO_AVAILABLE = False + +# Fallback stubs +def _stub_sees(c, **kw): return False +def _stub_count(c): return 0 +def _stub_closest(c="person"): return None +def _stub_summary(): return "YOLO not loaded" +def _stub_ppe(): return [] +def _stub_too_close(**k): return False +def _stub_all(): return set() +def _stub_fps(): return 0.0 + +yolo_sees = _stub_sees +yolo_count = _stub_count +yolo_closest = _stub_closest +yolo_summary = _stub_summary +yolo_ppe_violations = _stub_ppe +yolo_person_too_close = _stub_too_close +yolo_all_classes = _stub_all +yolo_fps = _stub_fps + + +def init_yolo(raw_frame_ref, frame_lock) -> bool: + """Start YOLO inference. Returns True if successful.""" + global YOLO_AVAILABLE + global yolo_sees, yolo_count, yolo_closest, yolo_summary + global yolo_ppe_violations, yolo_person_too_close, yolo_all_classes, yolo_fps + + # marcus_yolo.py lives in Vision/ + models_dir = os.path.join(PROJECT_ROOT, "Vision") + if models_dir not in sys.path: + sys.path.insert(0, models_dir) + + try: + from marcus_yolo import ( + start_yolo, yolo_sees as _ys, yolo_count as _yc, yolo_closest as _ycl, + yolo_summary as _ysu, yolo_ppe_violations as _ypp, + yolo_person_too_close as _yptc, yolo_all_classes as _yac, yolo_fps as _yfps, + ) + except ImportError as e: + print(f"marcus_yolo.py not found ({e})") + return False + + # GPU is required — let RuntimeError from _resolve_device propagate so + # Marcus hard-fails at startup instead of silently running without vision. + ok = start_yolo(raw_frame_ref=raw_frame_ref, frame_lock=frame_lock) + if ok: + YOLO_AVAILABLE = True + yolo_sees = _ys + yolo_count = _yc + yolo_closest = _ycl + yolo_summary = _ysu + yolo_ppe_violations = _ypp + yolo_person_too_close = _yptc + yolo_all_classes = _yac + yolo_fps = _yfps + print(f"YOLO {'started' if ok else 'failed to start'}") + return ok diff --git a/API/zmq_api.py b/API/zmq_api.py new file mode 100644 index 0000000..de7a0c0 --- /dev/null +++ b/API/zmq_api.py @@ -0,0 +1,49 @@ +""" +zmq_api.py — ZMQ velocity + command interface to Holosoma +""" +import json +import time +import zmq +from Core.config_loader import load_config +from Core.logger import log + +_cfg = load_config("ZMQ") + +ZMQ_HOST = _cfg["zmq_host"] +ZMQ_PORT = _cfg["zmq_port"] +STOP_ITERATIONS = _cfg["stop_iterations"] +STOP_DELAY = _cfg["stop_delay"] +STEP_PAUSE = _cfg["step_pause"] + +ctx = zmq.Context() +sock = ctx.socket(zmq.PUB) +sock.bind(f"tcp://{ZMQ_HOST}:{ZMQ_PORT}") +time.sleep(0.5) +log(f"ZMQ PUB bound on tcp://{ZMQ_HOST}:{ZMQ_PORT}", "info", "zmq") + + +def get_socket(): + """Return the shared ZMQ PUB socket (for odometry to reuse).""" + return sock + + +def send_vel(vx: float = 0.0, vy: float = 0.0, vyaw: float = 0.0): + """Send velocity to Holosoma. vx m/s | vy m/s | vyaw rad/s""" + sock.send_string(json.dumps({"vel": {"vx": vx, "vy": vy, "vyaw": vyaw}})) + + +def gradual_stop(): + """Smooth deceleration to zero over ~1 second.""" + for _ in range(STOP_ITERATIONS): + send_vel(0.0, 0.0, 0.0) + time.sleep(STOP_DELAY) + + +def send_cmd(cmd: str): + """Send Holosoma state command: start | walk | stand | stop""" + sock.send_string(json.dumps({"cmd": cmd})) + + +# Load MOVE_MAP from navigation config +_nav = load_config("Navigation") +MOVE_MAP = {k: tuple(v) for k, v in _nav["move_map"].items()} diff --git a/Autonomous/__init__.py b/Autonomous/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/Autonomous/marcus_autonomous.py b/Autonomous/marcus_autonomous.py new file mode 100644 index 0000000..f4c2849 --- /dev/null +++ b/Autonomous/marcus_autonomous.py @@ -0,0 +1,516 @@ +""" +marcus_autonomous.py — Autonomous Exploration Mode +==================================================== +Project : Marcus | YS Lootah Technology +Hardware : Unitree G1 EDU + Jetson Orin NX + +Marcus moves freely through the office, identifies areas and objects, +builds a live map of what it sees, and saves everything to a session folder. + +Commands +-------- + auto on — start autonomous exploration + auto off — stop autonomous exploration + auto status — show current exploration state + auto save — save current map snapshot + auto summary — LLaVA summary of what was explored + +How it works +------------ + 1. Marcus moves forward, scanning with YOLO every 0.4s + 2. Every N steps: LLaVA assesses the scene (area type, objects, notes) + 3. Odometry records position at each observation + 4. All data saved to map/map_XXX_DATE/ folder + 5. Robot avoids walls by turning when blocked + +Files saved +----------- + ~/Models_marcus/map/map_001_2026-04-05/ + observations.json — [{step, time, x, y, area_type, objects, observation}] + path.json — [{x, y, heading, t}] — full path walked + summary.txt — auto-generated LLaVA summary + frames/ — camera captures at interesting points + +Import in marcus_llava.py +------------------------- + from marcus_autonomous import AutonomousMode + auto = AutonomousMode( + get_frame_fn=get_frame, + send_vel_fn=send_vel, + gradual_stop_fn=gradual_stop, + yolo_sees_fn=yolo_sees, + yolo_summary_fn=yolo_summary, + yolo_all_classes_fn=yolo_all_classes, + yolo_closest_fn=yolo_closest, + odom_fn=_odom_pos, + call_llava_fn=_call_llava, + patrol_prompt=PATROL_PROMPT, + mem=mem, + models_dir=MODELS_DIR, + ) + auto.enable() # start exploration + auto.disable() # stop exploration + auto.status() # print current state +""" + +import json +import time +import threading +import os +import base64 +from datetime import datetime +from pathlib import Path + + +# ══════════════════════════════════════════════════════════════════════════════ +# CONFIGURATION +# ══════════════════════════════════════════════════════════════════════════════ + +LLAVA_EVERY_N_STEPS = 5 # call LLaVA every N movement steps (saves GPU) +YOLO_CHECK_INTERVAL = 0.4 # seconds between YOLO checks +FORWARD_DURATION = 1.5 # seconds per forward step +TURN_DURATION = 2.0 # seconds to turn when obstacle detected +PERSON_STOP_DIST = 0.3 # stop if person closer than this (relative) +MAX_OBSERVATIONS = 500 # max observations before auto-stop +SAVE_FRAMES = True # save camera frames at interesting points +INTERESTING_COOLDOWN = 5.0 # seconds between saving "interesting" frames + + +class AutonomousMode: + """ + Autonomous office exploration. + + Thread-safe. enable() starts a background thread. disable() stops it. + All observations saved to disk automatically. + + State machine: + IDLE → EXPLORING → IDLE + Any state → STOPPING → IDLE (on disable()) + """ + + def __init__(self, get_frame_fn, send_vel_fn, gradual_stop_fn, + yolo_sees_fn, yolo_summary_fn, yolo_all_classes_fn, + yolo_closest_fn, odom_fn, call_llava_fn, + patrol_prompt: str, mem=None, models_dir: str = None): + + self._get_frame = get_frame_fn + self._send_vel = send_vel_fn + self._gradual_stop = gradual_stop_fn + self._yolo_sees = yolo_sees_fn + self._yolo_summary = yolo_summary_fn + self._yolo_all_classes = yolo_all_classes_fn + self._yolo_closest = yolo_closest_fn + self._odom_pos = odom_fn + self._call_llava = call_llava_fn + self._patrol_prompt = patrol_prompt + self._mem = mem + if models_dir is None: + models_dir = str(Path(__file__).resolve().parent.parent.parent / "Data" / "Brain") + self._models_dir = Path(models_dir) + + # State + self._enabled = False + self._thread = None + self._lock = threading.Lock() + + # Current exploration session + self._map_dir = None + self._observations = [] + self._path = [] + self._step = 0 + self._start_time = None + self._last_interesting = 0.0 + + # Turn tracking — alternate left/right when blocked + self._last_turn = "left" + + # Stats + self._area_counts = {} # {area_type: count} + self._all_objects = set() + + # ── PUBLIC API ───────────────────────────────────────────────────────────── + + def enable(self): + """Start autonomous exploration.""" + with self._lock: + if self._enabled: + print(" [Auto] Already running — use 'auto off' to stop first") + return + + self._enabled = True + self._step = 0 + self._start_time = time.time() + self._observations = [] + self._path = [] + self._area_counts = {} + self._all_objects = set() + self._map_dir = self._create_map_dir() + + self._thread = threading.Thread( + target=self._explore_loop, + daemon=True, + name="auto-explore" + ) + self._thread.start() + + print(f"\n [Auto] Exploration started") + print(f" [Auto] Map folder: {self._map_dir}") + print(f" [Auto] Type 'auto off' to stop\n") + + def disable(self): + """Stop autonomous exploration and save results.""" + with self._lock: + if not self._enabled: + print(" [Auto] Not running") + return + self._enabled = False + + print("\n [Auto] Stopping exploration...") + + # Wait for thread to finish + if self._thread and self._thread.is_alive(): + self._thread.join(timeout=5.0) + + self._gradual_stop() + self._save_session() + self._print_summary() + + def status(self): + """Print current exploration state.""" + with self._lock: + running = self._enabled + step = self._step + obs = len(self._observations) + + if not running: + print(" [Auto] Status: IDLE") + if self._map_dir: + print(f" [Auto] Last map: {self._map_dir}") + return + + elapsed = time.time() - (self._start_time or time.time()) + mins = int(elapsed // 60) + secs = int(elapsed % 60) + + print(f" [Auto] Status: EXPLORING") + print(f" [Auto] Duration: {mins}m {secs}s") + print(f" [Auto] Steps: {step} | Observations: {obs}") + + if self._area_counts: + areas = ", ".join(f"{k}:{v}" for k, v in sorted(self._area_counts.items())) + print(f" [Auto] Areas seen: {areas}") + + if self._all_objects: + print(f" [Auto] Objects found: {', '.join(sorted(self._all_objects))}") + + pos = self._odom_pos() + if pos: + print(f" [Auto] Position: x={pos['x']:.2f} y={pos['y']:.2f} heading={pos['heading']:.1f}°") + + def is_enabled(self) -> bool: + with self._lock: + return self._enabled + + def save_snapshot(self): + """Save current state to disk without stopping.""" + self._save_observations() + self._save_path() + print(f" [Auto] Snapshot saved to {self._map_dir}") + + # ── EXPLORATION LOOP ──────────────────────────────────────────────────────── + + def _explore_loop(self): + """ + Main autonomous exploration loop. + + Steps: + 1. Check for person in path (safety stop) + 2. Check YOLO for interesting objects + 3. Move forward + 4. Every N steps: call LLaVA for scene assessment + 5. On obstacle: turn and continue + 6. Log position + observations + """ + consecutive_blocks = 0 # count consecutive blocked steps + + while self._enabled: + self._step += 1 + + # ── Safety: stop if person too close ───────────────────────────── + if self._yolo_sees("person"): + closest = self._yolo_closest("person") + if closest and closest.distance_estimate == "very close": + print(f" [Auto] Person very close — pausing 2s") + self._gradual_stop() + time.sleep(2.0) + continue + + # ── Record YOLO detections ──────────────────────────────────────── + detected_classes = self._yolo_all_classes() + for cls in detected_classes: + self._all_objects.add(cls) + + # ── Record odometry path point ──────────────────────────────────── + pos = self._odom_pos() + if pos: + self._path.append({ + "step": self._step, + "t": round(time.time() - self._start_time, 1), + "x": pos["x"], + "y": pos["y"], + "h": pos["heading"], + }) + + # ── LLaVA scene assessment every N steps ───────────────────────── + if self._step % LLAVA_EVERY_N_STEPS == 0: + self._assess_scene(pos, detected_classes) + + # ── Movement decision ───────────────────────────────────────────── + if consecutive_blocks >= 3: + # Stuck — turn more aggressively + print(f" [Auto] Stuck — turning {self._last_turn} 180°") + self._turn(self._last_turn, TURN_DURATION * 2) + consecutive_blocks = 0 + continue + + # Move forward + blocked = self._move_forward() + + if blocked: + consecutive_blocks += 1 + # Alternate left/right turns to explore both directions + turn_dir = "left" if self._last_turn == "right" else "right" + self._last_turn = turn_dir + print(f" [Auto] Obstacle — turning {turn_dir}") + self._turn(turn_dir, TURN_DURATION) + else: + consecutive_blocks = 0 + + # ── Max observations check ──────────────────────────────────────── + if len(self._observations) >= MAX_OBSERVATIONS: + print(f" [Auto] Max observations ({MAX_OBSERVATIONS}) reached — stopping") + self._enabled = False + break + + time.sleep(YOLO_CHECK_INTERVAL) + + # Clean up + self._gradual_stop() + + def _move_forward(self) -> bool: + """ + Move forward for FORWARD_DURATION seconds. + Returns True if blocked (no actual movement detected). + """ + # TODO: integrate LiDAR when available for true obstacle detection + # For now: move and assume not blocked + t0 = time.time() + while time.time() - t0 < FORWARD_DURATION and self._enabled: + self._send_vel(vx=0.25) + time.sleep(0.05) + self._send_vel(0, 0, 0) + time.sleep(0.1) + return False # Not blocked — no LiDAR yet + + def _turn(self, direction: str, duration: float): + """Turn left or right for given duration.""" + vyaw = 0.25 if direction == "left" else -0.25 + t0 = time.time() + while time.time() - t0 < duration and self._enabled: + self._send_vel(vyaw=vyaw) + time.sleep(0.05) + self._send_vel(0, 0, 0) + time.sleep(0.2) + + def _assess_scene(self, pos: dict, yolo_classes: set): + """ + Call LLaVA to understand the current scene. + Saves observation + optionally a camera frame. + """ + img = self._get_frame() + if img is None: + return + + try: + raw = self._call_llava(self._patrol_prompt, img, num_predict=120) + raw_clean = raw.replace("```json", "").replace("```", "").strip() + s = raw_clean.find("{") + e = raw_clean.rfind("}") + 1 + d = json.loads(raw_clean[s:e]) if s != -1 and e > 0 else None + + if d is None: + return + + area_type = d.get("area_type", "unknown") + observation = d.get("observation", "") + objects = d.get("objects", []) + interesting = d.get("interesting", False) + + # Update area counts + self._area_counts[area_type] = self._area_counts.get(area_type, 0) + 1 + + # Add objects to global set + for obj in objects: + self._all_objects.add(obj) + + # Build observation record + obs = { + "step": self._step, + "time": round(time.time() - self._start_time, 1), + "timestamp": datetime.now().strftime("%H:%M:%S"), + "x": pos["x"] if pos else None, + "y": pos["y"] if pos else None, + "heading": pos["heading"] if pos else None, + "area_type": area_type, + "objects": objects, + "yolo_classes": list(yolo_classes), + "observation": observation, + "interesting": interesting, + } + self._observations.append(obs) + + print(f" [Auto] Step {self._step} | {area_type} | {observation[:60]}") + if objects: + print(f" [Auto] Objects: {', '.join(objects)}") + + # Save frame if interesting + if interesting and SAVE_FRAMES: + now = time.time() + if now - self._last_interesting > INTERESTING_COOLDOWN: + self._save_frame(img, self._step) + self._last_interesting = now + + # Log to session memory + if self._mem: + self._mem.log_detection(area_type, "center", "medium") + + # Auto-flush observations every 20 steps + if self._step % 20 == 0: + self._save_observations() + self._save_path() + + except Exception as e: + print(f" [Auto] LLaVA assess error: {e}") + + # ── FILE I/O ──────────────────────────────────────────────────────────────── + + def _create_map_dir(self) -> Path: + """Create a new map folder with incremented ID.""" + maps_dir = self._models_dir / "map" + maps_dir.mkdir(parents=True, exist_ok=True) + + existing = [d for d in maps_dir.iterdir() if d.is_dir() and d.name.startswith("map_")] + nums = [] + for d in existing: + parts = d.name.split("_") + if len(parts) >= 2 and parts[1].isdigit(): + nums.append(int(parts[1])) + + next_num = max(nums) + 1 if nums else 1 + date_str = datetime.now().strftime("%Y-%m-%d") + map_dir = maps_dir / f"map_{next_num:03d}_{date_str}" + map_dir.mkdir(parents=True, exist_ok=True) + (map_dir / "frames").mkdir(exist_ok=True) + return map_dir + + def _save_observations(self): + if not self._map_dir or not self._observations: + return + try: + path = self._map_dir / "observations.json" + tmp = path.with_suffix(".tmp") + with open(tmp, "w", encoding="utf-8") as f: + json.dump(self._observations, f, indent=2, ensure_ascii=False) + tmp.replace(path) + except Exception as e: + print(f" [Auto] Save observations error: {e}") + + def _save_path(self): + if not self._map_dir or not self._path: + return + try: + path = self._map_dir / "path.json" + tmp = path.with_suffix(".tmp") + with open(tmp, "w", encoding="utf-8") as f: + json.dump(self._path, f, indent=2) + tmp.replace(path) + except Exception as e: + print(f" [Auto] Save path error: {e}") + + def _save_frame(self, img_b64: str, step: int): + """Save a camera frame as JPEG.""" + if not self._map_dir or not img_b64: + return + try: + frame_path = self._map_dir / "frames" / f"frame_{step:04d}.jpg" + with open(frame_path, "wb") as f: + f.write(__import__("base64").b64decode(img_b64)) + except Exception as e: + print(f" [Auto] Save frame error: {e}") + + def _generate_summary(self) -> str: + """Generate a text summary of the exploration session.""" + elapsed = time.time() - (self._start_time or time.time()) + mins = int(elapsed // 60) + secs = int(elapsed % 60) + + lines = [ + f"Autonomous Exploration Summary", + f"==============================", + f"Map: {self._map_dir.name if self._map_dir else 'unknown'}", + f"Date: {datetime.now().strftime('%Y-%m-%d %H:%M')}", + f"Duration: {mins}m {secs}s", + f"Steps taken: {self._step}", + f"Observations: {len(self._observations)}", + f"", + f"Areas identified:", + ] + for area, count in sorted(self._area_counts.items(), key=lambda x: -x[1]): + lines.append(f" {area:<20} {count} observations") + + lines.append("") + lines.append(f"Objects detected:") + for obj in sorted(self._all_objects): + lines.append(f" - {obj}") + + # Add notable observations + interesting = [o for o in self._observations if o.get("interesting")] + if interesting: + lines.append("") + lines.append(f"Notable observations ({len(interesting)}):") + for o in interesting[:10]: + lines.append(f" [{o['timestamp']}] {o['observation']}") + + return "\n".join(lines) + + def _save_session(self): + """Save all data to disk.""" + self._save_observations() + self._save_path() + + if self._map_dir: + try: + summary = self._generate_summary() + with open(self._map_dir / "summary.txt", "w", encoding="utf-8") as f: + f.write(summary) + except Exception as e: + print(f" [Auto] Save summary error: {e}") + + def _print_summary(self): + """Print exploration summary to terminal.""" + elapsed = time.time() - (self._start_time or time.time()) + mins = int(elapsed // 60) + secs = int(elapsed % 60) + + print(f"\n [Auto] Exploration complete") + print(f" [Auto] Duration: {mins}m {secs}s | Steps: {self._step}") + print(f" [Auto] Observations: {len(self._observations)}") + + if self._area_counts: + print(f" [Auto] Areas: {dict(sorted(self._area_counts.items()))}") + + if self._all_objects: + print(f" [Auto] Objects: {', '.join(sorted(self._all_objects))}") + + if self._map_dir: + print(f" [Auto] Saved to: {self._map_dir}\n") diff --git a/Brain/__init__.py b/Brain/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/Brain/command_parser.py b/Brain/command_parser.py new file mode 100644 index 0000000..f5eb517 --- /dev/null +++ b/Brain/command_parser.py @@ -0,0 +1,360 @@ +""" +command_parser.py — Local command regex patterns + dispatcher +Handles place memory, odometry, session recall, help, examples +""" +import re +import time +from API.zmq_api import send_vel, gradual_stop +from API.memory_api import mem, place_save, place_goto, places_list_str +from API.odometry_api import odom, ODOM_AVAILABLE +from API.camera_api import get_frame +from API.llava_api import ask +from Brain.executor import execute + +# ── Compiled patterns ──────────────────────────────────────────────────────── + +_RE_REMEMBER = re.compile( + r"^(?:remember|save|mark|call|name|label)\s+(?:this|here|current|position)?\s*as\s+(.+)$", re.I) +_RE_GOTO = re.compile( + r"^(?:go\s+to|navigate\s+to|take\s+me\s+to|move\s+to|return\s+to|head\s+to)\s+(.+)$", re.I) +_RE_FORGET = re.compile( + r"^(?:forget|delete|remove)\s+(?:place\s+)?(.+)$", re.I) +_RE_RENAME = re.compile( + r"^rename\s+(.+?)\s+(?:to|as)\s+(.+)$", re.I) +_RE_WALK_DIST = re.compile( + r"^(?:walk|go|move)\s+(?:forward\s+)?(\d+(?:\.\d+)?)\s*m(?:eter(?:s)?)?$", re.I) +_RE_WALK_BACK = re.compile( + r"^(?:walk|go|move)\s+backward?\s+(\d+(?:\.\d+)?)\s*m(?:eter(?:s)?)?$", re.I) +_RE_TURN_DEG = re.compile( + r"^turn\s+(?:(left|right)\s+)?(\d+(?:\.\d+)?)\s*deg(?:ree(?:s)?)?$", re.I) +_RE_PATROL_RT = re.compile( + r"^patrol[/:]\s*(.+)$", re.I) +_RE_LAST_CMD = re.compile( + r"^(?:last\s+command|what\s+did\s+i\s+(?:say|type)\s+last|repeat\s+last)$", re.I) +_RE_DO_AGAIN = re.compile( + r"^(?:do\s+that\s+again|repeat|again|redo)$", re.I) +_RE_UNDO = re.compile( + r"^(?:undo|go\s+back\s+(?:to\s+)?(?:where|from\s+where)\s+(?:you|i)\s+(?:started|were|came)|reverse\s+last|turn\s+back\s+from).*$", re.I) +_RE_LAST_SESS = re.compile( + r"^(?:last\s+session|what\s+(?:did\s+you\s+do|happened)\s+last\s+(?:session|time)|previous\s+session)$", re.I) +_RE_WHERE = re.compile( + r"^(?:where\s+am\s+i|current\s+position|my\s+(?:location|position)|position)$", re.I) +_RE_GO_HOME = re.compile( + r"^(?:go\s+home|return\s+to\s+start|come\s+back\s+home|go\s+back\s+to\s+start)$", re.I) +_RE_SESSION_SUMMARY = re.compile( + r"^(?:session\s+summary|what\s+happened\s+today|session\s+report)$", re.I) +_RE_AUTO = re.compile( + r"^auto(?:nomous)?\s+(on|off|status|save|summary)$", re.I) + +# Autonomous mode instance — set by init_autonomous() +_auto = None + + +def init_autonomous(auto_instance): + """Wire in the AutonomousMode instance from marcus_brain.""" + global _auto + _auto = auto_instance + + +def try_local_command(cmd: str) -> bool: + """ + Handle local commands (place, odom, memory, help). + Returns True if handled, False if not matched (send to LLaVA). + """ + # ── PLACE MEMORY ───────────────────────────────────────────────────── + m = _RE_REMEMBER.match(cmd) + if m: + place_save(m.group(1).strip()) + return True + + m = _RE_GOTO.match(cmd) + if m: + name = m.group(1).strip() + if name.lower() in ("start", "home", "beginning"): + if odom and ODOM_AVAILABLE: + odom.return_to_start() + else: + print(" [Places] Odometry not running — cannot return to start") + else: + place_goto(name) + return True + + m = _RE_FORGET.match(cmd) + if m: + if mem: + mem.delete_place(m.group(1).strip()) + else: + print(" [Places] Memory not available") + return True + + m = _RE_RENAME.match(cmd) + if m: + if mem: + mem.rename_place(m.group(1).strip(), m.group(2).strip()) + else: + print(" [Places] Memory not available") + return True + + if re.match(r"^(?:places|list\s+places|what\s+places|show\s+(?:places|locations)|known\s+places)$", cmd, re.I): + print(places_list_str()) + return True + + # ── ODOMETRY MOVEMENT ──────────────────────────────────────────────── + m = _RE_WALK_DIST.match(cmd) + if m: + meters = float(m.group(1)) + if odom: + odom.walk_distance(meters) + else: + t0 = time.time() + while time.time() - t0 < meters / 0.3: + send_vel(vx=0.3) + time.sleep(0.05) + gradual_stop() + return True + + m = _RE_WALK_BACK.match(cmd) + if m: + meters = float(m.group(1)) + if odom: + odom.walk_distance(meters, direction="backward") + else: + t0 = time.time() + while time.time() - t0 < meters / 0.2: + send_vel(vx=-0.2) + time.sleep(0.05) + gradual_stop() + return True + + m = _RE_TURN_DEG.match(cmd) + if m: + direction = m.group(1) + degrees = float(m.group(2)) + if direction and direction.lower() == "right": + degrees = -degrees + if odom: + odom.turn_degrees(degrees) + else: + t0 = time.time() + vyaw = 0.3 if degrees > 0 else -0.3 + duration = abs(degrees) / 17.2 + while time.time() - t0 < duration: + send_vel(vyaw=vyaw) + time.sleep(0.05) + gradual_stop() + return True + + # ── NAMED PATROL ROUTE ─────────────────────────────────────────────── + m = _RE_PATROL_RT.match(cmd) + if m: + raw_route = m.group(1) + names = re.split(r"[→,;]+|\s{2,}", raw_route) + names = [n.strip() for n in names if n.strip()] + if not names: + print(" Usage: patrol: door → desk → exit") + return True + if not mem: + print(" [Places] Memory not available") + return True + waypoints, missing = [], [] + for name in names: + place = mem.get_place(name) + if place is None: + missing.append(name) + elif not place.get("has_odom"): + print(f" [Places] '{name}' has no coordinates — skipping") + else: + waypoints.append({"x": place["x"], "y": place["y"], "heading": place["heading"], "name": name}) + if missing: + print(f" [Places] Unknown places: {', '.join(missing)}") + if not waypoints: + print(" [Places] No valid waypoints — patrol cancelled") + return True + if odom: + print(f" [Places] Named patrol: {' → '.join(n['name'] for n in waypoints)}") + odom.patrol_route(waypoints) + else: + print(" [Places] Odometry not running") + return True + + # ── SESSION MEMORY RECALL ──────────────────────────────────────────── + if _RE_LAST_CMD.match(cmd): + if mem: + last = mem.get_last_command() + print(f" Last command: '{last}'" if last else " No commands logged yet") + else: + print(" Memory not available") + return True + + if _RE_UNDO.match(cmd): + if not mem: + print(" Memory not available — cannot undo") + return True + recent = mem.get_last_n_commands(5) + move_words = {"turn right": ("left", 1), "turn left": ("right", -1), + "walk forward": ("backward", 1), "move forward": ("backward", 1), + "move back": ("forward", 1), "walk backward": ("forward", 1)} + for c in reversed(recent): + cl = c.lower() + for phrase, (reverse_dir, _) in move_words.items(): + if phrase in cl: + print(f" Undoing: '{c}' → reversing with '{reverse_dir}'") + dur, t0 = 2.0, time.time() + if reverse_dir in ("left", "right"): + vyaw = 0.3 if reverse_dir == "left" else -0.3 + while time.time() - t0 < dur: + send_vel(vyaw=vyaw) + time.sleep(0.05) + else: + vx = 0.3 if reverse_dir == "forward" else -0.2 + while time.time() - t0 < dur: + send_vel(vx=vx) + time.sleep(0.05) + gradual_stop() + return True + print(" No movement command to undo") + return True + + if _RE_DO_AGAIN.match(cmd): + if not mem: + print(" Memory not available — cannot repeat") + return True + recent = mem.get_last_n_commands(5) + repeat = None + for c in reversed(recent): + if not _RE_DO_AGAIN.match(c) and not _RE_LAST_CMD.match(c): + repeat = c + break + if repeat: + print(f" Repeating: '{repeat}'") + if try_local_command(repeat): + return True + # Not a local command — send directly to LLaVA + print("Thinking...") + img = get_frame() + if img: + d = ask(repeat, img) + execute(d) + return True + else: + print(" No previous command to repeat") + return True + + if _RE_LAST_SESS.match(cmd): + if mem: + print(mem.last_session_summary()) + else: + print(" Memory not available") + return True + + if _RE_SESSION_SUMMARY.match(cmd): + if mem: + print(f" Session: {mem._session_id}") + print(f" Duration: {mem.session_duration_str()}") + print(f" Commands: {mem.commands_count()}") + print(f" Places: {mem.places_count()}") + detections = mem.get_session_detections() + classes = {d.get("class") for d in detections} + print(f" Detected: {', '.join(classes) if classes else 'nothing yet'}") + else: + print(" Memory not available") + return True + + if _RE_WHERE.match(cmd): + if odom and ODOM_AVAILABLE: + print(f" Position: {odom.status_str()}") + print(f" Distance from start: {odom.get_distance_from_start():.2f}m") + else: + print(" Odometry not running — position unknown") + return True + + if _RE_GO_HOME.match(cmd): + if odom and ODOM_AVAILABLE: + odom.return_to_start() + else: + print(" Odometry not running — cannot navigate home") + return True + + # ── AUTONOMOUS MODE ────────────────────────────────────────────────── + m = _RE_AUTO.match(cmd) + if m: + subcmd = m.group(1).lower() + if _auto is None: + print(" [Auto] Autonomous mode not initialized") + return True + if subcmd == "on": + _auto.enable() + elif subcmd == "off": + _auto.disable() + elif subcmd == "status": + _auto.status() + elif subcmd == "save": + _auto.save_snapshot() + elif subcmd == "summary": + if _auto.is_enabled(): + _auto.status() + else: + print(" [Auto] Not running — use 'auto on' to start") + return True + + # ── LIDAR STATUS ───────────────────────────────────────────────────── + if re.match(r"^(?:lidar|lidar\s+status|slam\s+status)$", cmd, re.I): + try: + from API.lidar_api import LIDAR_AVAILABLE, get_lidar_status + if not LIDAR_AVAILABLE: + print(" LiDAR: not available") + else: + s = get_lidar_status() + print(f" LiDAR: {s['mode']} | loc: {s['loc_state']} | " + f"frame age: {s['last_frame_age']}s") + if s.get("pose"): + p = s["pose"] + print(f" SLAM pose: x={p['x']:.2f} y={p['y']:.2f} h={p['heading']:.1f}") + safety = s.get("safety", {}) + if safety.get("emergency"): + print(f" EMERGENCY: {safety.get('reasons', [])}") + perf = s.get("perf", {}) + if perf: + print(f" FPS: {perf.get('input_fps', 0):.0f} in / " + f"ICP: {perf.get('icp_ms', 0):.1f}ms / " + f"CPU: {perf.get('cpu_percent', 0):.0f}%") + except ImportError: + print(" LiDAR: module not loaded") + return True + + # ── HELP / EXAMPLES ────────────────────────────────────────────────── + if re.match(r"^(?:help[/]|help|commands|menu|[?][/]|[?])$", cmd, re.I): + _print_help() + return True + + if re.match(r"^(?:example[/]|examples[/]|ex[/]|example|examples|ex|show examples)$", cmd, re.I): + _print_examples() + return True + + return False + + +def _print_help(): + print(""" + MARCUS — COMMAND HELP + Movement: turn left/right, walk forward/back, walk 1 meter, turn 90 degrees + Vision: what do you see, yolo + Goals: goal/ stop when you see a person + Places: remember this as door, go to door, places, forget door + Patrol: patrol, patrol: door → desk → exit + Session: last command, do that again, last session, session summary + Search: search/ /path/to/photo.jpg [hint], search/ person in blue + Auto: auto on, auto off, auto status, auto save, auto summary + LiDAR: lidar, lidar status + System: help, example, yolo, q""") + + +def _print_examples(): + print(""" + MARCUS — USAGE EXAMPLES + turn left | turn right 90 degrees | walk forward | walk 1 meter + what do you see | describe what is in front of you + goal/ stop when you see a person | goal/ stop when you see a laptop + remember this as door | go to door | places | forget door + patrol | patrol: door → desk → window + last command | do that again | last session | session summary""") diff --git a/Brain/executor.py b/Brain/executor.py new file mode 100644 index 0000000..b2ab292 --- /dev/null +++ b/Brain/executor.py @@ -0,0 +1,99 @@ +""" +executor.py — Execute LLaVA movement decisions +With LiDAR obstacle interrupt — stops immediately if obstacle detected. +""" +import time +import threading +from API.zmq_api import send_vel, gradual_stop, MOVE_MAP, STEP_PAUSE +from API.arm_api import ALL_ARM_NAMES, do_arm + + +def _obstacle_check(): + """Check LiDAR safety — returns True if obstacle detected. Safe if LiDAR unavailable.""" + try: + from API.lidar_api import obstacle_ahead + return obstacle_ahead() + except ImportError: + return False + + +def execute_action(move: str, duration: float): + """Execute a single movement step. Stops if LiDAR detects obstacle.""" + if move in ALL_ARM_NAMES: + do_arm(move) + return + if move == "stop" or move is None: + gradual_stop() + return + if move in MOVE_MAP: + vx, vy, vyaw = MOVE_MAP[move] + t0 = time.time() + while time.time() - t0 < duration: + if _obstacle_check(): + gradual_stop() + print(" [Safety] LiDAR obstacle — stopping") + return + send_vel(vx, vy, vyaw) + time.sleep(0.05) + gradual_stop() + time.sleep(STEP_PAUSE) + else: + print(f" Unknown move: '{move}' — skipping") + + +def move_step(move: str, duration: float): + """Lightweight step for goal/patrol loops. Stops if LiDAR detects obstacle.""" + if move in MOVE_MAP: + vx, vy, vyaw = MOVE_MAP[move] + t0 = time.time() + while time.time() - t0 < duration: + if _obstacle_check(): + send_vel(0.0, 0.0, 0.0) + print(" [Safety] LiDAR obstacle — pausing step") + return + send_vel(vx, vy, vyaw) + time.sleep(0.05) + send_vel(0.0, 0.0, 0.0) + time.sleep(0.1) + + +def merge_actions(actions: list) -> list: + """Merge consecutive same-direction steps into one smooth movement.""" + if not actions: + return actions + merged = [dict(actions[0])] + for action in actions[1:]: + if action.get("move") == merged[-1].get("move"): + merged[-1]["duration"] = merged[-1].get("duration", 0) + action.get("duration", 0) + else: + merged.append(dict(action)) + return merged + + +def execute(d: dict): + """Run full LLaVA decision — movements in sequence, then arm in background.""" + if d.get("abort"): + print(f" ABORT: {d['abort']}") + gradual_stop() + return + + speak = d.get("speak", "") + actions = merge_actions(d.get("actions", [])) + arm_cmd = d.get("arm", None) + + print(f"Marcus: {speak}") + + if not actions: + gradual_stop() + else: + for i, action in enumerate(actions): + move = action.get("move") + dur = float(action.get("duration", 2.0)) + print(f" Step {i+1}/{len(actions)}: {move} for {dur}s") + execute_action(move, dur) + + if arm_cmd: + print(f" Arm: {arm_cmd}") + threading.Thread(target=do_arm, args=(arm_cmd,), daemon=True).start() + + return speak diff --git a/Brain/marcus_brain.py b/Brain/marcus_brain.py new file mode 100644 index 0000000..27fd78e --- /dev/null +++ b/Brain/marcus_brain.py @@ -0,0 +1,503 @@ +""" +marcus_brain.py — Marcus AI Brain Orchestrator +================================================ +Shared brain logic for both terminal (run_marcus.py) and server (marcus_server.py). + +Usage: + Terminal: python3 run_marcus.py + Server: python3 -m Server.marcus_server (imports init_brain + process_command) +""" +import json +import os +import re +import time +import sys + +PROJECT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +if PROJECT_DIR not in sys.path: + sys.path.insert(0, PROJECT_DIR) + +from API.zmq_api import send_vel, gradual_stop, send_cmd +from API.camera_api import start_camera, stop_camera, get_frame +from API.yolo_api import ( + init_yolo, yolo_summary, yolo_fps, + yolo_all_classes, yolo_closest, yolo_sees, +) +from API.odometry_api import init_odometry +from API.memory_api import init_memory, log_cmd, log_detection +from API.llava_api import ( + OLLAMA_MODEL, ask, ask_talk, call_llava, parse_json, add_to_history, +) +from API.imgsearch_api import init_imgsearch, get_searcher +from Core.config_loader import load_config +from Core.logger import log as _log + +from Brain.command_parser import try_local_command, init_autonomous +from Brain.executor import execute, execute_action +from Navigation.goal_nav import navigate_to_goal +from Navigation.patrol import patrol +from Autonomous.marcus_autonomous import AutonomousMode + +_cfg = load_config("Brain") + +_TALK_PATTERNS = [ + # English questions + r"^(?:what|who|where|when|how|why|is|are|do|does|can|tell|describe|explain|show|analyze)\s+", + # English identity/facts + r"^(?:my name is|i am|call me|that is|that person|note that|remember that)\s+", + # English acknowledgements + r"^(?:ok|okay|yes|no|good|nice|great|thanks|thank you|got it|understood|correct)\s*[!.]*$", + # Arabic questions — ماذا ترى / كيف حالك / من أنت / ما اسمك / صف / هل + r"^(?:ماذا|ما\s|كيف|من\s|أين|لماذا|هل|صف|اشرح|وصف|كم)\s*", + # Arabic identity/facts — اسمي / أنا / تذكر + r"^(?:اسمي|أنا\s|تذكر\s|سجل\s|لاحظ\s)", + # Arabic acknowledgements — حسنا / شكرا / ممتاز / صح / مفهوم + r"^(?:حسنا|شكرا|ممتاز|صح|مفهوم|تمام|أحسنت|جيد|نعم|لا)\s*[!.]*$", +] + +_NAT_GOAL_RE = re.compile( + r'^(?:keep\s+(?:turn|rotat|spin)\w*\s+\w+\s+until\s+(?:you\s+)?(?:see|find|spot)\s+.+' + r'|stop\s+when\s+(?:you\s+)?(?:see|find|spot)\s+.+' + r'|find\s+(?:a\s+|the\s+|me\s+a\s+)?\w.+' + r'|look\s+for\s+(?:a\s+|the\s+)?\w.+' + r'|search\s+for\s+(?:a\s+|the\s+)?\w.+)$', + re.IGNORECASE +) + + +# ══════════════════════════════════════════════════════════════════════════════ +# INIT — called once by both run_marcus.py and marcus_server.py +# ══════════════════════════════════════════════════════════════════════════════ + +def init_brain(): + """Initialize all subsystems. Call once at startup.""" + raw_frame, raw_lock = start_camera() + init_yolo(raw_frame, raw_lock) + + from API.zmq_api import get_socket + init_odometry(zmq_sock=get_socket()) + + init_memory() + + # LiDAR (optional — continues without it) + try: + from API.lidar_api import init_lidar + init_lidar() + except Exception as e: + print(f" [LiDAR] Init failed: {e} — continuing without LiDAR") + + init_imgsearch( + get_frame_fn=get_frame, + send_vel_fn=send_vel, + gradual_stop_fn=gradual_stop, + llava_fn=call_llava, + yolo_sees_fn=yolo_sees, + model=OLLAMA_MODEL, + ) + + # Autonomous exploration mode + from API.memory_api import mem as _mem_ref + from API.llava_api import PATROL_PROMPT + auto = AutonomousMode( + get_frame_fn=get_frame, + send_vel_fn=send_vel, + gradual_stop_fn=gradual_stop, + yolo_sees_fn=yolo_sees, + yolo_summary_fn=yolo_summary, + yolo_all_classes_fn=yolo_all_classes, + yolo_closest_fn=yolo_closest, + odom_fn=lambda: {"x": 0, "y": 0, "heading": 0}, # fallback if no odom + call_llava_fn=call_llava, + patrol_prompt=PATROL_PROMPT, + mem=_mem_ref, + ) + # Wire odometry if available + from API.odometry_api import odom as _odom_ref, ODOM_AVAILABLE + if _odom_ref and ODOM_AVAILABLE: + auto._odom_pos = lambda: { + "x": _odom_ref._x, "y": _odom_ref._y, "heading": _odom_ref._heading + } + init_autonomous(auto) + + send_cmd("start") + time.sleep(0.5) + send_cmd("walk") + time.sleep(0.5) + + # Voice module (optional — continues without it) + _init_voice() + + _log("Brain initialized", "info", "brain") + _warmup_llava() + + +# Global voice references +_audio_api = None +_voice_module = None + + +def _init_voice(): + """Initialize voice module — runs in background, calls process_command on speech.""" + global _audio_api, _voice_module + try: + from API.audio_api import AudioAPI + from Voice.marcus_gemini_voice import GeminiVoiceModule as VoiceModule + + _audio_api = AudioAPI() + + def _voice_callback(text, role): + """Gemini voice callback.""" + pass # handled below + if role != "user" or not text.strip(): + return + t = text.strip().lower() + act_kw = ["turn","move","go","walk","step","stop","come","wave","clap", + "high five","shake","hug","forward","backward","left","right", + "what do you see","what can you see","look","describe","patrol", + "دور","امشي","روح","تقدم","ارجع","وقف","قف","تعال", + "يمين","يسار","قدام","ورا","لوح","صفق","سلم", + "شو شايف","شو تشوف","ماذا ترى","شو قدامك","لف","خطوات"] + if any(kw in t for kw in act_kw): + print(f" [Brain] Action: {text.strip()}") + try: + result = process_command(text.strip()) + if isinstance(result, dict): + sp = result.get("speak", "") + vis_kw = ["see","look","describe","شايف","تشوف","ترى","قدامك"] + if any(k in t for k in vis_kw) and sp and _audio_api: + print(f" [Brain] Vision: {sp}") + _audio_api.speak(sp) + except Exception as e: + print(f" [Brain] Error: {e}") + else: + print(f" [Chat] {text.strip()}") + + _voice_module = VoiceModule(_audio_api, on_transcript=_voice_callback) + _voice_module.start() + print(f" [Voice] Always listening (Gemini voice)") + except Exception as e: + print(f" [Voice] Init failed: {e} — continuing without voice") + _audio_api = None + _voice_module = None + + +# ══════════════════════════════════════════════════════════════════════════════ +# PROCESS COMMAND — shared by terminal loop and WebSocket server +# ══════════════════════════════════════════════════════════════════════════════ + +def process_command(cmd: str) -> dict: + """ + Process a single command through the full brain pipeline. + Returns: {"type": str, "speak": str, "action": str, "elapsed": float} + + Used by both run_marcus.py (terminal) and marcus_server.py (WebSocket). + """ + cmd = cmd.strip() + if not cmd: + return {"type": "empty", "speak": "", "action": "NONE", "elapsed": 0} + + t0 = time.time() + + # ── YOLO status ────────────────────────────────────────────────────── + if any(w in cmd.lower() for w in ("yolo", "what does yolo", "vision", "using yolo")): + from API.yolo_api import YOLO_AVAILABLE as _ya + status = "active" if _ya else "not loaded" + speak = f"YOLO: {status} | {yolo_summary()} | {yolo_fps():.1f}fps" + print(f" {speak}") + log_cmd(cmd, speak) + return {"type": "status", "speak": speak, "action": "YOLO", "elapsed": 0} + + # ── Image search ───────────────────────────────────────────────────── + if cmd.lower().startswith("search/"): + speak = _handle_search(cmd) + return {"type": "search", "speak": speak, "action": "SEARCH", "elapsed": time.time() - t0} + + # ── Auto-detect natural language goals ─────────────────────────────── + if _NAT_GOAL_RE.match(cmd) and not cmd.lower().startswith("goal/"): + print(f" [Goal] Auto-detected: '{cmd}'") + navigate_to_goal(cmd.strip()) + elapsed = time.time() - t0 + log_cmd(cmd, f"Goal navigation: {cmd}", elapsed) + return {"type": "goal", "speak": f"Goal navigation: {cmd}", "action": "GOAL", "elapsed": elapsed} + + # ── Explicit goal/ ─────────────────────────────────────────────────── + if cmd.lower().startswith("goal/"): + goal = cmd[5:].strip() + if goal: + navigate_to_goal(goal) + elapsed = time.time() - t0 + log_cmd(cmd, f"Goal navigation: {goal}", elapsed) + return {"type": "goal", "speak": f"Goal navigation: {goal}", "action": "GOAL", "elapsed": elapsed} + return {"type": "error", "speak": "Usage: goal/ stop when you see a person", "action": "NONE", "elapsed": 0} + + # ── Autonomous patrol ──────────────────────────────────────────────── + if cmd.lower().startswith("patrol"): + mins = 5.0 + if " " in cmd: + try: + mins = float(cmd.split()[-1]) + except ValueError: + pass + patrol(duration_minutes=mins) + elapsed = time.time() - t0 + log_cmd(cmd, f"Patrol {mins}min", elapsed) + return {"type": "patrol", "speak": f"Patrol {mins}min complete", "action": "PATROL", "elapsed": elapsed} + + # ── Local commands (place / odom / memory / help) ──────────────────── + if try_local_command(cmd): + log_cmd(cmd, "local command") + return {"type": "local", "speak": "Done", "action": "LOCAL", "elapsed": time.time() - t0} + + # ── Talk-only (questions / acknowledgements) ───────────────────────── + if any(re.match(p, cmd, re.IGNORECASE) for p in _TALK_PATTERNS): + speak = _handle_talk(cmd) + return {"type": "talk", "speak": speak, "action": "TALK", "elapsed": time.time() - t0} + + # ── Greeting ───────────────────────────────────────────────────────── + if re.match(r"^(?:hi+|hey+|hello+|hola|salam|marhaba|sup|yo+|ahlan|السلام عليكم|مرحبا|أهلا|هلا|يا هلا)\s*[!.]*$", cmd, re.IGNORECASE): + response = "Hello! I am Marcus. How can I help you?" + print(f"Marcus: {response}") + add_to_history(cmd, response) + log_cmd(cmd, response) + return {"type": "greeting", "speak": response, "action": "GREETING", "elapsed": 0} + + # ── "Come to me" shortcut ──────────────────────────────────────────── + if re.match(r"^(?:come(?:\s+back)?(?:\s+to\s+me)?|come\s+here|get\s+closer|approach|move\s+closer)\s*[!.]*$", cmd, re.IGNORECASE): + execute_action("forward", 2.0) + resp = "Coming to you" + print(f"Marcus: {resp}") + add_to_history(cmd, resp) + log_cmd(cmd, resp) + return {"type": "move", "speak": resp, "action": "FORWARD 2.0s", "elapsed": 2.0} + + # ── Multi-step compound ────────────────────────────────────────────── + _multi = re.match( + r"turn\s+(right|left)\s*(\d+)?\s*(?:deg(?:rees?)?)?\s+(?:and\s+then|then|and)?\s+" + r"(?:move\s+|go\s+|walk\s+|step\s+)?(back(?:ward)?|forward)\s*(\d+)?\s*(?:steps?|meter)?", + cmd, re.IGNORECASE) + if _multi: + turn_dir = _multi.group(1).lower() + turn_deg = float(_multi.group(2) or 90) + walk_dir = "backward" if "back" in _multi.group(3).lower() else "forward" + walk_dur = float(_multi.group(4) or 2) + execute_action("right" if turn_dir == "right" else "left", turn_deg / 18.0) + execute_action(walk_dir, walk_dur) + resp = f"Turned {turn_dir} {int(turn_deg)} degrees then moved {walk_dir}" + print(f"Marcus: {resp}") + add_to_history(cmd, resp) + log_cmd(cmd, resp) + return {"type": "move", "speak": resp, "action": f"MULTI {turn_dir}+{walk_dir}", "elapsed": time.time() - t0} + + # ── Standard LLaVA command ─────────────────────────────────────────── + return _handle_llava(cmd) + + +# ══════════════════════════════════════════════════════════════════════════════ +# HANDLERS (return speak text) +# ══════════════════════════════════════════════════════════════════════════════ + +def _handle_search(cmd): + args = cmd[7:].strip() + if not args: + print(" Usage: search/ /path/to/photo.jpg [hint]") + return "Usage: search/ " + searcher = get_searcher() + if not searcher: + print(" [Search] Image search not available") + return "Image search not available" + parts = args.split(None, 1) + if parts and os.path.exists(parts[0]): + img_path = parts[0] + hint = parts[1].strip() if len(parts) > 1 else "" + yolo_pre = "person" if not hint or "person" in hint.lower() else None + log_cmd(cmd, f"Image search: {img_path}") + result = searcher.search_from_file(img_path, hint=hint, yolo_prefilter=yolo_pre) + return result.get("description", "Search complete") + hint = args + yolo_pre = "person" if any(w in hint.lower() for w in ("person", "guy", "man", "woman")) else None + log_cmd(cmd, f"Image search: {hint}") + result = searcher.search(ref_img_b64=None, hint=hint, yolo_prefilter=yolo_pre) + return result.get("description", "Search complete") + + +def _handle_talk(cmd): + print("Thinking...") + try: + img = get_frame() + facts_str = "" + try: + from API.llava_api import _facts + if _facts: + facts_str = "\nKnown facts: " + "; ".join(_facts) + "." + except ImportError: + pass + d = ask_talk(cmd, img, facts=facts_str) + sp = d.get("speak", "") + print(f"Marcus: {sp}") + log_cmd(cmd, sp) + return sp + except Exception as ex: + print(f" Error: {ex}") + return f"Error: {ex}" + + +def _handle_llava(cmd): + print("Thinking...") + t0 = time.time() + img = get_frame() + + if img is None: + print(" Waiting for camera...") + time.sleep(1.0) + img = get_frame() + + if img is None: + print(" Camera not ready — command cancelled") + log_cmd(cmd, "camera not ready") + return {"type": "error", "speak": "Camera not ready", "action": "NONE", "elapsed": 0} + + d = ask(cmd, img) + dur = time.time() - t0 + print(f" ({dur:.1f}s) -> {json.dumps(d)}") + resp = execute(d) + log_cmd(cmd, resp or "", dur) + + from API.yolo_api import YOLO_AVAILABLE as _ya + if _ya: + for cls in yolo_all_classes(): + det = yolo_closest(cls) + if det: + log_detection(cls, det.position, det.distance_estimate) + + action_str = d.get("actions", [{}])[0].get("move", "NONE") if d.get("actions") else "NONE" + return {"type": "decision", "speak": resp or "", "action": action_str.upper(), + "elapsed": dur, "raw": d} + + +# ══════════════════════════════════════════════════════════════════════════════ +# HELPERS +# ══════════════════════════════════════════════════════════════════════════════ + +def _warmup_llava(): + import ollama + print(" Warming up LLaVA... (loading into GPU)") + try: + img = get_frame() + ollama.chat( + model=OLLAMA_MODEL, + messages=[{"role": "user", "content": "hi", + "images": [img] if img else []}], + options={"temperature": 0.0, "num_predict": _cfg["warmup_num_predict"]} + ) + print(" LLaVA warm - first command will be fast\n") + except Exception as e: + print(f" Warmup failed ({e}) - first command may be slow\n") + + +def get_brain_status() -> dict: + """Return current brain status for server status message.""" + from API.yolo_api import YOLO_AVAILABLE as _ya + from API.odometry_api import ODOM_AVAILABLE as _oa + from API.memory_api import MEMORY_AVAILABLE as _ma + from API.camera_api import CAM_WIDTH, CAM_HEIGHT, CAM_FPS + try: + from API.lidar_api import LIDAR_AVAILABLE as _la, get_loc_state + lidar_state = get_loc_state() if _la else "off" + except ImportError: + _la = False + lidar_state = "off" + return { + "model": OLLAMA_MODEL, + "yolo": _ya, + "odometry": _oa, + "memory": _ma, + "lidar": _la, + "lidar_state": lidar_state, + "voice": _voice_module is not None and _voice_module.is_running, + "camera": f"{CAM_WIDTH}x{CAM_HEIGHT}@{CAM_FPS}", + } + + +def shutdown(): + """Clean shutdown of all subsystems.""" + print("\nShutting down Marcus...") + # Stop voice module + if _voice_module and _voice_module.is_running: + _voice_module.stop() + # Stop autonomous mode if running + from Brain.command_parser import _auto + if _auto and _auto.is_enabled(): + _auto.disable() + stop_camera() + gradual_stop() + send_cmd("stop") + + from API.odometry_api import odom as _o + if _o: + _o.stop() + + from API.memory_api import mem as _m + if _m: + _m.end_session() + + try: + from API.lidar_api import stop_lidar + stop_lidar() + except Exception: + pass + + _log("Marcus stopped", "info", "brain") + print("Marcus stopped.") + + +# ══════════════════════════════════════════════════════════════════════════════ +# TERMINAL MODE — used by run_marcus.py +# ══════════════════════════════════════════════════════════════════════════════ + +def run_terminal(): + """Run brain with terminal input loop.""" + init_brain() + + status = get_brain_status() + print() + print("=" * 48) + print(" MARCUS AI BRAIN — READY") + print("=" * 48) + for k, v in status.items(): + print(f" {k:<10}: {v}") + print("=" * 48) + print(" help | example | yolo | patrol | auto on/off | q") + print() + + try: + while True: + try: + cmd = input("Command: ").strip() + except (EOFError, KeyboardInterrupt): + break + if not cmd: + continue + if cmd.lower() in ("q", "quit", "exit"): + break + if cmd.lower() == "mute/": + import subprocess + subprocess.run(["pactl", "set-source-mute", "0", "1"], capture_output=True) + print(" Mic muted") + continue + if cmd.lower() == "unmute/": + import subprocess + subprocess.run(["pactl", "set-source-mute", "0", "0"], capture_output=True) + print(" Mic unmuted") + continue + result = process_command(cmd) + sp = result.get("speak", "") if isinstance(result, dict) else "" + if sp and _audio_api: + _audio_api.speak(sp) + except KeyboardInterrupt: + pass + + shutdown() + + +if __name__ == "__main__": + run_terminal() diff --git a/Brain/marcus_memory.py b/Brain/marcus_memory.py new file mode 100644 index 0000000..77eff32 --- /dev/null +++ b/Brain/marcus_memory.py @@ -0,0 +1,817 @@ +""" +marcus_memory.py — Session & Place Memory +========================================== +Project : Marcus | YS Lootah Technology +Hardware : Unitree G1 EDU + Jetson Orin NX +Purpose : Persistent memory across sessions. + - Place memory: save named robot positions, navigate back + - Session memory: log all commands, detections, alerts per session + - Cross-session recall: "what did you see last session?" + +Folder structure +---------------- + ~/Models_marcus/places.json ← persistent named places (all sessions) + ~/Models_marcus/sessions/ + session_001_2026-04-05/ + commands.json ← [{time, cmd, response, duration_s}] + detections.json ← [{time, class, position, distance, x, y}] + places.json ← places saved THIS session + alerts.json ← [{time, type, detail}] + summary.txt ← auto-generated session summary + +Import in marcus_llava.py +------------------------- + from marcus_memory import Memory + mem = Memory() ← call once at startup + mem.start_session() ← begins logging + mem.log_command(cmd, response) ← after every command + mem.log_detection(class_name, pos, dist) ← from YOLO loop + mem.save_place("door", x, y, heading) ← when user says "remember this as door" + mem.get_place("door") ← returns {x, y, heading} or None + mem.list_places() ← sorted list of place names + mem.delete_place("door") ← removes place + mem.last_session_summary() ← text summary of previous session + mem.end_session() ← saves everything, call on shutdown + +Date : April 2026 +""" + +import os +import json +import time +import re +import threading +import shutil +import difflib +from datetime import datetime +from pathlib import Path + + +# ══════════════════════════════════════════════════════════════════════════════ +# CONFIGURATION +# ══════════════════════════════════════════════════════════════════════════════ + +_PROJECT_ROOT = Path(__file__).resolve().parent.parent +BASE_DIR = _PROJECT_ROOT / "Data" / "Brain" +SESSIONS_DIR = BASE_DIR / "Sessions" +PLACES_FILE = _PROJECT_ROOT / "Data" / "History" / "Places" / "places.json" +MAX_CMD_LEN = 500 # truncate very long commands +MAX_SESSIONS = 50 # keep last N sessions — older ones auto-deleted +DETECT_DEDUPE = 5.0 # seconds — suppress duplicate YOLO detections + + +# ══════════════════════════════════════════════════════════════════════════════ +# HELPER — SAFE JSON READ/WRITE +# ══════════════════════════════════════════════════════════════════════════════ + +def _read_json(path: Path, default): + """ + Read JSON file. Returns default if file missing, unreadable, or corrupt. + Backs up corrupt files before resetting. + """ + if not path.exists(): + return default + try: + with open(path, "r", encoding="utf-8") as f: + return json.load(f) + except json.JSONDecodeError: + # Back up the corrupt file and return default + backup = path.with_suffix(".bak") + try: + shutil.copy(path, backup) + print(f" [Memory] ⚠️ Corrupt JSON at {path.name} — backed up as {backup.name}") + except Exception: + pass + return default + except OSError as e: + print(f" [Memory] ⚠️ Cannot read {path.name}: {e}") + return default + + +def _write_json(path: Path, data, lock: threading.Lock = None): + """ + Write JSON atomically using a temp file + rename. + Catches disk-full and permission errors without crashing. + Returns True on success. + """ + def _do_write(): + try: + path.parent.mkdir(parents=True, exist_ok=True) + tmp = path.with_suffix(".tmp") + with open(tmp, "w", encoding="utf-8") as f: + json.dump(data, f, indent=2, ensure_ascii=False) + tmp.replace(path) # atomic rename + return True + except OSError as e: + print(f" [Memory] ⚠️ Cannot write {path.name}: {e}") + return False + + if lock: + with lock: + return _do_write() + return _do_write() + + +def _sanitize_name(name: str) -> str: + """ + Clean a place name: lowercase, strip whitespace, remove unsafe chars. + 'Server Room!' → 'server_room' + """ + name = name.strip().lower() + name = re.sub(r"[^\w\s\-]", "", name) # keep word chars, spaces, hyphens + name = re.sub(r"\s+", "_", name) # spaces → underscores + name = name[:50] # max 50 chars + return name + + +def _fuzzy_match(query: str, choices: list, n: int = 3) -> list: + """Return up to n closest matches from choices for query.""" + if not choices: + return [] + return difflib.get_close_matches(query, choices, n=n, cutoff=0.5) + + +def _new_session_id(sessions_dir: Path) -> str: + """Generate next session ID: session_001, session_002...""" + existing = [ + d.name for d in sessions_dir.iterdir() + if d.is_dir() and d.name.startswith("session_") + ] if sessions_dir.exists() else [] + + nums = [] + for name in existing: + parts = name.split("_") + if len(parts) >= 2 and parts[1].isdigit(): + nums.append(int(parts[1])) + + next_num = max(nums) + 1 if nums else 1 + date_str = datetime.now().strftime("%Y-%m-%d") + return f"session_{next_num:03d}_{date_str}" + + +# ══════════════════════════════════════════════════════════════════════════════ +# MEMORY CLASS +# ══════════════════════════════════════════════════════════════════════════════ + +class Memory: + """ + Persistent session and place memory for Marcus. + + Thread-safe. All write operations use locks. + Saves automatically on shutdown via end_session(). + Register with atexit for crash protection. + + Usage: + mem = Memory() + mem.start_session() + # ... during operation ... + mem.log_command("turn left", "Turning left") + mem.save_place("door", x=1.2, y=0.5, heading=90.0) + # ... on shutdown ... + mem.end_session() + """ + + def __init__(self): + self._places_lock = threading.Lock() + self._session_lock = threading.Lock() + self._session_dir = None + self._session_id = None + self._session_start = None + + # In-memory buffers — flushed to disk on end_session + periodically + self._commands = [] # [{time, cmd, response, duration_s}] + self._detections = [] # [{time, class, position, distance, x, y}] + self._alerts = [] # [{time, type, detail}] + + # Dedup tracking for YOLO detections + self._last_detection = {} # class → timestamp + + # Persistent places (survives all sessions) + self._places = {} # {name: {x, y, heading, saved_at, session}} + + # Ensure base dirs exist + BASE_DIR.mkdir(parents=True, exist_ok=True) + SESSIONS_DIR.mkdir(parents=True, exist_ok=True) + + # Load persistent places + self._load_places() + + # Register auto-save on crash + import atexit + atexit.register(self._emergency_save) + + # ── PLACES ──────────────────────────────────────────────────────────────── + + def _load_places(self): + """Load places.json from disk into memory.""" + data = _read_json(PLACES_FILE, {}) + if isinstance(data, dict): + self._places = data + print(f" [Memory] Places loaded: {len(self._places)} locations") + else: + print(" [Memory] ⚠️ places.json has wrong format — resetting") + self._places = {} + + def save_place(self, name: str, x: float = None, + y: float = None, heading: float = None) -> bool: + """ + Save current robot position with a name. + + Args: + name : human-readable name e.g. "door", "desk_a" + x : robot x position from odometry (None if not available) + y : robot y position from odometry + heading : robot heading in degrees + + Returns: + True on success, False on invalid name or write error. + + Edge cases handled: + - Empty name → rejected + - Name with special chars → sanitized + - Duplicate name → overwrites with notification + - Odometry not running (x/y/heading all None) → saved as landmark only + - Disk full → error logged, returns False + """ + # Validate name + if not name or not name.strip(): + print(" [Memory] ⚠️ Place name cannot be empty") + return False + + clean = _sanitize_name(name) + if not clean: + print(f" [Memory] ⚠️ Place name '{name}' has no valid characters") + return False + + # Warn if overwriting + if clean in self._places: + old = self._places[clean] + print(f" [Memory] Overwriting '{clean}' (was saved at {old.get('saved_at','?')})") + + # Warn if no odometry + if x is None and y is None: + print(" [Memory] ⚠️ Odometry not running — saving place as landmark only (no coordinates)") + print(" [Memory] 'go to' navigation will not be available for this place") + + entry = { + "name": clean, + "x": round(x, 4) if x is not None else None, + "y": round(y, 4) if y is not None else None, + "heading": round(heading, 2) if heading is not None else None, + "has_odom": x is not None, + "saved_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), + "session": self._session_id or "unknown", + } + + with self._places_lock: + self._places[clean] = entry + ok = _write_json(PLACES_FILE, self._places) + + if ok: + coord_str = f"x={x:.2f} y={y:.2f} h={heading:.1f}°" if x is not None else "no coordinates" + print(f" [Memory] ✅ Saved place '{clean}' — {coord_str}") + return True + return False + + def get_place(self, name: str) -> dict: + """ + Get a saved place by name. + + Returns: + dict with x, y, heading, has_odom — or None if not found. + Suggests similar names if not found. + + Edge cases: + - Exact match first + - Case-insensitive match + - Fuzzy match suggestion if no exact match + """ + clean = _sanitize_name(name) + + with self._places_lock: + places = dict(self._places) + + # Exact match + if clean in places: + return places[clean] + + # Case-insensitive match + for k, v in places.items(): + if k.lower() == clean.lower(): + return v + + # Fuzzy suggestions + suggestions = _fuzzy_match(clean, list(places.keys())) + if suggestions: + print(f" [Memory] ⚠️ Place '{name}' not found.") + print(f" [Memory] Did you mean: {', '.join(suggestions)} ?") + else: + print(f" [Memory] ⚠️ Place '{name}' not found.") + if places: + print(f" [Memory] Known places: {', '.join(sorted(places.keys()))}") + else: + print(" [Memory] No places saved yet. Say 'remember this as '") + return None + + def delete_place(self, name: str) -> bool: + """ + Remove a saved place. + + Returns True if deleted, False if not found. + """ + clean = _sanitize_name(name) + + with self._places_lock: + if clean not in self._places: + suggestions = _fuzzy_match(clean, list(self._places.keys())) + if suggestions: + print(f" [Memory] Place '{name}' not found. Did you mean: {', '.join(suggestions)}?") + else: + print(f" [Memory] Place '{name}' not found.") + return False + + del self._places[clean] + ok = _write_json(PLACES_FILE, self._places) + + if ok: + print(f" [Memory] ✅ Deleted place '{clean}'") + return True + + # Restore on write failure + self._load_places() + return False + + def list_places(self) -> list: + """ + Return sorted list of place name dicts. + + Returns: + [{"name": str, "x": float|None, "y": float|None, + "heading": float|None, "has_odom": bool, "saved_at": str}] + """ + with self._places_lock: + places = list(self._places.values()) + return sorted(places, key=lambda p: p.get("name", "")) + + def rename_place(self, old_name: str, new_name: str) -> bool: + """Rename a saved place.""" + old_clean = _sanitize_name(old_name) + new_clean = _sanitize_name(new_name) + + if not new_clean: + print(f" [Memory] ⚠️ New name '{new_name}' is invalid") + return False + + with self._places_lock: + if old_clean not in self._places: + print(f" [Memory] ⚠️ Place '{old_name}' not found") + return False + if new_clean in self._places: + print(f" [Memory] ⚠️ Name '{new_clean}' already exists") + return False + + entry = self._places.pop(old_clean) + entry["name"] = new_clean + self._places[new_clean] = entry + ok = _write_json(PLACES_FILE, self._places) + + if ok: + print(f" [Memory] ✅ Renamed '{old_clean}' → '{new_clean}'") + return True + + self._load_places() + return False + + # ── SESSION ─────────────────────────────────────────────────────────────── + + def start_session(self): + """ + Begin a new session. Creates session folder, sets session ID. + + Edge cases: + - sessions/ doesn't exist → created + - session ID collision (same date, same number) → incremented + - Can only call once — subsequent calls are no-ops with warning + """ + if self._session_id is not None: + print(f" [Memory] ⚠️ Session already running: {self._session_id}") + return + + self._session_id = _new_session_id(SESSIONS_DIR) + self._session_dir = SESSIONS_DIR / self._session_id + self._session_start = time.time() + + try: + self._session_dir.mkdir(parents=True, exist_ok=True) + except OSError as e: + print(f" [Memory] ⚠️ Cannot create session dir: {e}") + self._session_id = None + self._session_dir = None + return + + # Load previous session summary for context + prev = self._get_previous_session_dir() + if prev: + print(f" [Memory] Previous session: {prev.name}") + else: + print(" [Memory] First session — no previous memory") + + print(f" [Memory] ✅ Session started: {self._session_id}") + + # Auto-flush every 60 seconds in background + self._start_autosave() + + def _start_autosave(self): + """Background thread — flush session to disk every 60s.""" + def _loop(): + while self._session_id is not None: + time.sleep(60) + self._flush_session() + threading.Thread(target=_loop, daemon=True).start() + + def _flush_session(self): + """Write current session buffers to disk without closing session.""" + if self._session_dir is None: + return + with self._session_lock: + _write_json(self._session_dir / "commands.json", self._commands) + _write_json(self._session_dir / "detections.json", self._detections) + _write_json(self._session_dir / "alerts.json", self._alerts) + # Save copy of places as they were this session + with self._places_lock: + _write_json(self._session_dir / "places.json", self._places) + + def log_command(self, cmd: str, response: str = "", duration_s: float = 0.0): + """ + Log a command and its response to the current session. + + Args: + cmd : the command string typed/spoken + response : Marcus's spoken response + duration_s : how long LLaVA took to respond + + Edge cases: + - No active session → logs to memory only, warns + - cmd too long → truncated at MAX_CMD_LEN + - Unicode (Arabic) → preserved via ensure_ascii=False + - Thread-safe + """ + if not cmd: + return + + entry = { + "time": datetime.now().strftime("%H:%M:%S"), + "cmd": cmd[:MAX_CMD_LEN], + "response": response[:MAX_CMD_LEN] if response else "", + "duration_s": round(duration_s, 2), + } + + with self._session_lock: + self._commands.append(entry) + + if self._session_dir is None: + print(" [Memory] ⚠️ No active session — command logged in memory only") + + def log_detection(self, class_name: str, position: str = "", + distance: str = "", x: float = None, y: float = None): + """ + Log a YOLO detection to the current session. + + Deduplicates: same class within DETECT_DEDUPE seconds is suppressed. + + Args: + class_name : e.g. "person" + position : "left" / "center" / "right" + distance : "close" / "medium" / "far" + x, y : robot position when detection occurred (from odometry) + """ + if not class_name: + return + + # Deduplicate — don't log same class repeatedly within dedupe window + now = time.time() + last = self._last_detection.get(class_name, 0) + if now - last < DETECT_DEDUPE: + return + self._last_detection[class_name] = now + + entry = { + "time": datetime.now().strftime("%H:%M:%S"), + "class": class_name, + "position": position, + "distance": distance, + "x": round(x, 3) if x is not None else None, + "y": round(y, 3) if y is not None else None, + } + + with self._session_lock: + self._detections.append(entry) + + def log_alert(self, alert_type: str, detail: str = ""): + """ + Log a PPE or hazard alert to the current session. + + Args: + alert_type : "PPE", "Hazard", "obstacle", etc. + detail : e.g. "no helmet (left)", "fire extinguisher missing" + """ + entry = { + "time": datetime.now().strftime("%H:%M:%S"), + "type": alert_type, + "detail": detail[:200], + } + with self._session_lock: + self._alerts.append(entry) + print(f" [Memory] 🚨 Alert logged: {alert_type} — {detail}") + + def get_last_command(self) -> str: + """Return the last command typed, or empty string.""" + with self._session_lock: + if self._commands: + return self._commands[-1].get("cmd", "") + return "" + + def get_last_n_commands(self, n: int = 5) -> list: + """Return last N command strings for LLaVA context.""" + with self._session_lock: + recent = self._commands[-n:] if len(self._commands) >= n else self._commands[:] + return [e.get("cmd", "") for e in recent] + + def get_session_detections(self) -> list: + """Return all YOLO detections this session.""" + with self._session_lock: + return list(self._detections) + + def end_session(self): + """ + Save everything and close the current session. + Call on clean shutdown or Ctrl+C. + + Edge cases: + - No active session → no-op + - Disk full → logs error but doesn't crash + - Generates summary text file + - Cleans up old sessions if > MAX_SESSIONS + """ + if self._session_id is None: + return + + print(f"\n [Memory] Saving session {self._session_id}...") + + self._flush_session() + self._write_summary() + self._prune_old_sessions() + + elapsed = time.time() - (self._session_start or time.time()) + mins = int(elapsed // 60) + secs = int(elapsed % 60) + + with self._session_lock: + n_cmds = len(self._commands) + n_dets = len(self._detections) + n_alerts= len(self._alerts) + + print(f" [Memory] ✅ Session saved: {self._session_id}") + print(f" [Memory] Duration: {mins}m {secs}s") + print(f" [Memory] Commands: {n_cmds} | Detections: {n_dets} | Alerts: {n_alerts}") + + # Reset state + self._session_id = None + self._session_dir = None + + def _emergency_save(self): + """Called by atexit on crash — saves partial session data.""" + if self._session_id is None: + return + print("\n [Memory] Emergency save on exit...") + self._flush_session() + + def _write_summary(self): + """Generate a plain text summary of the session.""" + if self._session_dir is None: + return + try: + with self._session_lock: + n_cmds = len(self._commands) + n_dets = len(self._detections) + n_alerts = len(self._alerts) + places = list(self._places.keys()) + cmds = [e.get("cmd", "") for e in self._commands[:10]] + + elapsed = time.time() - (self._session_start or time.time()) + lines = [ + f"Session: {self._session_id}", + f"Date: {datetime.now().strftime('%Y-%m-%d %H:%M')}", + f"Duration: {int(elapsed//60)}m {int(elapsed%60)}s", + f"Commands: {n_cmds}", + f"YOLO detections: {n_dets}", + f"Alerts: {n_alerts}", + f"Known places: {', '.join(places) if places else 'none'}", + "", + "First commands:", + ] + [f" - {c}" for c in cmds] + + with open(self._session_dir / "summary.txt", "w", encoding="utf-8") as f: + f.write("\n".join(lines)) + except Exception as e: + print(f" [Memory] ⚠️ Summary write failed: {e}") + + def _prune_old_sessions(self): + """Delete oldest sessions if total exceeds MAX_SESSIONS.""" + try: + sessions = sorted( + [d for d in SESSIONS_DIR.iterdir() if d.is_dir()], + key=lambda d: d.stat().st_mtime + ) + while len(sessions) > MAX_SESSIONS: + oldest = sessions.pop(0) + shutil.rmtree(oldest, ignore_errors=True) + print(f" [Memory] Pruned old session: {oldest.name}") + except Exception as e: + print(f" [Memory] ⚠️ Session pruning failed: {e}") + + # ── PREVIOUS SESSION RECALL ─────────────────────────────────────────────── + + def _get_previous_session_dir(self) -> Path: + """Return the most recent session dir that is NOT the current session.""" + try: + dirs = sorted( + [d for d in SESSIONS_DIR.iterdir() + if d.is_dir() and d.name != self._session_id], + key=lambda d: d.stat().st_mtime, + reverse=True + ) + return dirs[0] if dirs else None + except Exception: + return None + + def last_session_summary(self) -> str: + """ + Return text summary of the most recent previous session. + Used when user says 'what did you do last session?' + + Edge cases: + - No previous session → informative message + - Summary file missing → reconstruct from commands.json + - Commands file corrupt → return partial info + """ + prev = self._get_previous_session_dir() + if prev is None: + return "No previous session found. This appears to be the first session." + + # Try summary.txt first + summary_file = prev / "summary.txt" + if summary_file.exists(): + try: + return summary_file.read_text(encoding="utf-8") + except Exception: + pass + + # Reconstruct from raw data + cmds = _read_json(prev / "commands.json", []) + dets = _read_json(prev / "detections.json", []) + pls = _read_json(prev / "places.json", {}) + + lines = [ + f"Previous session: {prev.name}", + f"Commands logged: {len(cmds)}", + f"Objects detected: {len(dets)}", + f"Places saved: {', '.join(pls.keys()) if pls else 'none'}", + ] + if cmds: + lines.append("Commands included:") + for c in cmds[:10]: + lines.append(f" - {c.get('cmd','')}") + return "\n".join(lines) + + def previous_session_detections(self) -> list: + """ + Return list of unique object classes seen in previous session. + Used for 'what objects did you see last time?' + """ + prev = self._get_previous_session_dir() + if prev is None: + return [] + dets = _read_json(prev / "detections.json", []) + if not isinstance(dets, list): + return [] + return list({d.get("class", "") for d in dets if d.get("class")}) + + def previous_session_places(self) -> dict: + """ + Return places saved in the previous session. + """ + prev = self._get_previous_session_dir() + if prev is None: + return {} + data = _read_json(prev / "places.json", {}) + return data if isinstance(data, dict) else {} + + def all_sessions(self) -> list: + """ + Return list of all session IDs with basic stats. + """ + result = [] + try: + for d in sorted(SESSIONS_DIR.iterdir(), reverse=True): + if not d.is_dir(): + continue + cmds = _read_json(d / "commands.json", []) + result.append({ + "id": d.name, + "commands": len(cmds) if isinstance(cmds, list) else 0, + "date": "_".join(d.name.split("_")[2:]) if "_" in d.name else "", + }) + except Exception: + pass + return result + + # ── QUICK LOOKUPS ───────────────────────────────────────────────────────── + + def session_duration_str(self) -> str: + """Return human-readable session duration e.g. '14m 22s'.""" + if self._session_start is None: + return "0m 0s" + elapsed = time.time() - self._session_start + return f"{int(elapsed//60)}m {int(elapsed%60)}s" + + def places_count(self) -> int: + with self._places_lock: + return len(self._places) + + def commands_count(self) -> int: + with self._session_lock: + return len(self._commands) + + def __repr__(self): + return (f"Memory(session={self._session_id}, " + f"places={self.places_count()}, " + f"commands={self.commands_count()})") + + +# ══════════════════════════════════════════════════════════════════════════════ +# STANDALONE TEST +# ══════════════════════════════════════════════════════════════════════════════ + +if __name__ == "__main__": + print("Marcus Memory — Standalone Test") + print("=================================\n") + + mem = Memory() + mem.start_session() + + print("\n--- Place memory ---") + mem.save_place("door", x=0.0, y=0.0, heading=0.0) + mem.save_place("desk a", x=1.5, y=0.3, heading=45.0) + mem.save_place("window", x=3.0, y=0.0, heading=180.0) + mem.save_place("", x=0.0, y=0.0, heading=0.0) # empty name → rejected + mem.save_place("desk a", x=1.6, y=0.4, heading=50.0) # duplicate → overwrite + + print("\n--- Get place ---") + p = mem.get_place("door") + print(f"door → {p}") + + p = mem.get_place("dooor") # typo → fuzzy suggestion + p = mem.get_place("nonexistent") # not found + list all + + print("\n--- List places ---") + for pl in mem.list_places(): + odom = f"x={pl['x']} y={pl['y']}" if pl['has_odom'] else "no odom" + print(f" {pl['name']:20} {odom}") + + print("\n--- Command logging ---") + mem.log_command("turn left", "Turning left", duration_s=6.2) + mem.log_command("what do you see", "I see a person at a desk", duration_s=7.1) + mem.log_command("go to door", "Navigating to door", duration_s=0.1) + print(f"Last command: '{mem.get_last_command()}'") + print(f"Last 3: {mem.get_last_n_commands(3)}") + + print("\n--- Detection logging ---") + mem.log_detection("person", "center", "close", x=1.5, y=0.3) + mem.log_detection("person", "center", "close", x=1.5, y=0.3) # dedupe → suppressed + mem.log_detection("chair", "right", "medium", x=1.5, y=0.3) + + print("\n--- Alert logging ---") + mem.log_alert("PPE", "no helmet (left)") + mem.log_alert("Hazard", "fire extinguisher missing") + + print("\n--- Previous session ---") + print(mem.last_session_summary()) + + print("\n--- All sessions ---") + for s in mem.all_sessions(): + print(f" {s['id']:40} {s['commands']} commands") + + print(f"\n--- Session duration: {mem.session_duration_str()} ---") + print(f"--- {repr(mem)} ---\n") + + print("\n--- Rename place ---") + mem.rename_place("window", "window_north") + + print("\n--- Delete place ---") + mem.delete_place("door") + mem.delete_place("nonexistent") + + print("\n--- End session ---") + mem.end_session() + print("\nDone.") diff --git a/Bridge/__init__.py b/Bridge/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/Bridge/ros2_zmq_bridge.py b/Bridge/ros2_zmq_bridge.py new file mode 100644 index 0000000..4caf36f --- /dev/null +++ b/Bridge/ros2_zmq_bridge.py @@ -0,0 +1,66 @@ +""" +ROS2 → ZMQ bridge +Subscribes to /cmd_vel and holosoma/other_input (Python 3.8 + ROS2 Foxy) +Forwards to Holosoma via ZMQ PUB socket (Python 3.10 hsinference) + +Run: source /opt/ros/foxy/setup.bash + python3.8 ~/Models_marcus/ros2_zmq_bridge.py +""" +import json, time +import rclpy +from rclpy.node import Node +from geometry_msgs.msg import TwistStamped +from std_msgs.msg import String +import zmq + +ZMQ_PORT = 5556 + +class ROS2ZMQBridge(Node): + def __init__(self): + super().__init__('marcus_zmq_bridge') + + # ZMQ PUB socket + self._ctx = zmq.Context() + self._sock = self._ctx.socket(zmq.PUB) + self._sock.bind(f"tcp://127.0.0.1:{ZMQ_PORT}") + time.sleep(0.3) # let subscribers connect + + # ROS2 subscribers + self.create_subscription( + TwistStamped, 'cmd_vel', self._vel_cb, 10) + self.create_subscription( + String, 'holosoma/other_input', self._cmd_cb, 10) + + self.get_logger().info( + f"ROS2→ZMQ bridge ready on tcp://127.0.0.1:{ZMQ_PORT}") + + def _vel_cb(self, msg: TwistStamped): + data = {"vel": { + "vx": msg.twist.linear.x, + "vy": msg.twist.linear.y, + "vyaw": msg.twist.angular.z, + }} + self._sock.send_string(json.dumps(data)) + self.get_logger().info( + f"Vel → vx={data['vel']['vx']:.2f} " + f"vy={data['vel']['vy']:.2f} " + f"vyaw={data['vel']['vyaw']:.2f}") + + def _cmd_cb(self, msg: String): + data = {"cmd": msg.data} + self._sock.send_string(json.dumps(data)) + self.get_logger().info(f"Cmd → {msg.data}") + +def main(): + rclpy.init() + node = ROS2ZMQBridge() + try: + rclpy.spin(node) + except KeyboardInterrupt: + pass + finally: + node.destroy_node() + rclpy.shutdown() + +if __name__ == "__main__": + main() diff --git a/Client/marcus_cli.py b/Client/marcus_cli.py new file mode 100644 index 0000000..90b107b --- /dev/null +++ b/Client/marcus_cli.py @@ -0,0 +1,288 @@ +""" +marcus_cli.py — Marcus CLI Client +=================================== +Connect to Marcus server via WebSocket from any terminal. +Prompts for IP and port on startup, then provides a command interface. + +Start: python3 Client/marcus_cli.py + OR: python3 Client/marcus_cli.py --ip 192.168.123.164 --port 8765 +""" +import asyncio +import argparse +import json +import os +import sys +import time + +PROJECT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +if PROJECT_DIR not in sys.path: + sys.path.insert(0, PROJECT_DIR) + +try: + import websockets +except ImportError: + print("Missing dependency: pip install websockets") + sys.exit(1) + +from Core.config_loader import load_config + +_net = load_config("Network") +DEFAULT_IP = _net.get("jetson_ip", "192.168.123.164") +DEFAULT_PORT = _net.get("websocket_port", 8765) + + +# ── COLORS ─────────────────────────────────────────────────────────────────── +class C: + RESET = "\033[0m" + BOLD = "\033[1m" + GREEN = "\033[92m" + RED = "\033[91m" + YELLOW = "\033[93m" + CYAN = "\033[96m" + GRAY = "\033[90m" + ORANGE = "\033[38;5;208m" + + +def _ts(): + return time.strftime("%H:%M:%S") + + +# ── CONNECTION ─────────────────────────────────────────────────────────────── + +async def connect_and_run(ip: str, port: int): + url = f"ws://{ip}:{port}" + print(f"\n{C.CYAN}Connecting to {url}...{C.RESET}") + + try: + async with websockets.connect(url, ping_interval=20, ping_timeout=10) as ws: + print(f"{C.GREEN}Connected to Marcus server{C.RESET}\n") + + # Start receiver task + receiver = asyncio.create_task(_receive_loop(ws)) + + # Input loop + try: + await _input_loop(ws) + except (EOFError, KeyboardInterrupt): + pass + finally: + receiver.cancel() + + except ConnectionRefusedError: + print(f"{C.RED}Connection refused — is the server running on {ip}:{port}?{C.RESET}") + except OSError as e: + print(f"{C.RED}Network error: {e}{C.RESET}") + except Exception as e: + print(f"{C.RED}Connection failed: {e}{C.RESET}") + + +async def _receive_loop(ws): + """Background task — receives and displays server messages.""" + try: + async for raw in ws: + try: + data = json.loads(raw) + _handle_message(data) + except json.JSONDecodeError: + print(f"{C.GRAY}[?] {raw[:80]}{C.RESET}") + except websockets.exceptions.ConnectionClosed: + print(f"\n{C.RED}Disconnected from server{C.RESET}") + except asyncio.CancelledError: + pass + + +async def _input_loop(ws): + """Main input loop — sends commands to server.""" + _print_help() + + while True: + try: + cmd = await asyncio.get_event_loop().run_in_executor( + None, lambda: input(f"{C.ORANGE}Command: {C.RESET}").strip() + ) + except (EOFError, KeyboardInterrupt): + print(f"\n{C.GRAY}Disconnecting...{C.RESET}") + break + + if not cmd: + continue + + lower = cmd.lower() + + if lower in ("q", "quit", "exit"): + break + + elif lower == "help": + _print_help() + continue + + elif lower == "status": + await ws.send(json.dumps({"type": "ping"})) + continue + + elif lower == "camera": + await ws.send(json.dumps({"type": "get_camera"})) + continue + + elif lower.startswith("profile "): + profile = cmd[8:].strip() + await ws.send(json.dumps({"type": "set_camera", "profile": profile})) + continue + + elif lower == "capture": + await ws.send(json.dumps({"type": "capture"})) + continue + + elif lower == "log": + print(f"{C.GRAY} Server-side nav log not available from CLI{C.RESET}") + continue + + else: + # Send as navigation command + await ws.send(json.dumps({"type": "command", "command": cmd})) + + +# ── MESSAGE DISPLAY ────────────────────────────────────────────────────────── + +def _handle_message(data): + t = data.get("type", "") + + if t == "frame": + # Suppress frame data in CLI (no GUI) + return + + elif t == "status": + lidar = "ALIVE" if data.get("lidar") else "OFFLINE" + model = data.get("model", "?") + camera = data.get("camera", "?") + yolo = data.get("yolo", False) + odom = data.get("odometry", False) + memory = data.get("memory", False) + print(f"{C.GREEN} Server ready{C.RESET}") + print(f" Model : {C.CYAN}{model}{C.RESET}") + print(f" YOLO : {C.GREEN if yolo else C.GRAY}{'active' if yolo else 'off'}{C.RESET}") + print(f" Odometry : {C.GREEN if odom else C.GRAY}{'active' if odom else 'off'}{C.RESET}") + print(f" Memory : {C.GREEN if memory else C.GRAY}{'active' if memory else 'off'}{C.RESET}") + print(f" LiDAR : {C.GREEN if lidar == 'ALIVE' else C.RED}{lidar}{C.RESET}") + print(f" Camera : {camera}") + msg = data.get("message", "") + if msg: + print(f" {C.GRAY}{msg}{C.RESET}") + print() + + elif t == "thinking": + cmd = data.get("command", "") + print(f"{C.YELLOW} Thinking... ({cmd}){C.RESET}") + + elif t == "decision": + action = data.get("action", "?") + speak = data.get("speak", "") + elapsed = data.get("elapsed", "?") + cmd = data.get("cmd", "?") + ts = data.get("timestamp", "") + btype = data.get("brain_type", "") + + color = (C.GREEN if cmd == "FORWARD" + else C.CYAN if cmd in ("LEFT", "RIGHT", "MULTI", "GOAL") + else C.ORANGE if cmd in ("GREETING", "TALK", "LOCAL") + else C.RED if cmd in ("STOP", "NONE") + else C.GRAY) + + print(f" [{ts}] {color}{C.BOLD}{action}{C.RESET} {C.GRAY}({elapsed}s){C.RESET}") + if speak: + print(f" {C.CYAN}Marcus: {speak}{C.RESET}") + + elif t == "camera_config": + p = data.get("profile", "?") + w, h, f = data.get("width", "?"), data.get("height", "?"), data.get("fps", "?") + active = data.get("pipeline_active", False) + note = data.get("note", "") + print(f" Camera: {p} ({w}x{h}@{f}Hz) pipeline={'active' if active else 'stopped'}") + if note: + print(f" {C.GRAY}{note}{C.RESET}") + + elif t == "capture_result": + if data.get("ok"): + size_kb = len(data.get("data", "")) * 3 // 4 // 1024 + ts = data.get("timestamp", "") + print(f"{C.GREEN} Captured frame ({size_kb}KB) at {ts}{C.RESET}") + else: + print(f"{C.RED} Capture failed: {data.get('message', '?')}{C.RESET}") + + elif t == "pong": + lidar = "OK" if data.get("lidar") else "OFFLINE" + ts = data.get("timestamp", "") + print(f" [{ts}] Status — LiDAR: {lidar}") + + elif t == "error": + print(f"{C.RED} ERROR: {data.get('message', '?')}{C.RESET}") + + else: + print(f"{C.GRAY} [{t}] {json.dumps(data)[:100]}{C.RESET}") + + +def _print_help(): + print(f""" +{C.BOLD} MARCUS CLI CLIENT{C.RESET} + {'─' * 40} + {C.CYAN}Navigation commands:{C.RESET} + move forward turn left stop + walk forward turn right 90 halt + + {C.CYAN}System commands:{C.RESET} + status — ping server + LiDAR + camera — get camera status + profile — switch camera (low/medium/high/full) + capture — take a photo + help — this menu + q — disconnect +""") + + +# ── MAIN ───────────────────────────────────────────────────────────────────── + +def main(): + parser = argparse.ArgumentParser(description="Marcus CLI Client") + parser.add_argument("--ip", default=None, help="Server IP address") + parser.add_argument("--port", type=int, default=None, help="Server port") + args = parser.parse_args() + + eth_ip = _net.get("jetson_ip", "192.168.123.164") + wlan_ip = _net.get("jetson_wlan_ip", "10.255.254.86") + + print(f"\n{C.BOLD}{C.ORANGE} MARCUS — CLI Client{C.RESET}") + print(f" {'═' * 40}") + print(f" {C.GRAY}Connection options:{C.RESET}") + print(f" 1) eth0 — {eth_ip}:{DEFAULT_PORT}") + print(f" 2) wlan0 — {wlan_ip}:{DEFAULT_PORT}") + print(f" 3) custom") + print() + + if args.ip: + ip = args.ip + else: + choice = input(f" Choose [1/2/3] or IP [{eth_ip}]: ").strip() + if choice == "1" or not choice: + ip = eth_ip + elif choice == "2": + ip = wlan_ip + elif choice == "3": + ip = input(f" Server IP: ").strip() or eth_ip + else: + # User typed an IP directly + ip = choice + + if args.port: + port = args.port + else: + port_str = input(f" Port [{DEFAULT_PORT}]: ").strip() + port = int(port_str) if port_str else DEFAULT_PORT + + try: + asyncio.run(connect_and_run(ip, port)) + except KeyboardInterrupt: + print(f"\n{C.GRAY}Bye.{C.RESET}") + + +if __name__ == "__main__": + main() diff --git a/Client/marcus_client.py b/Client/marcus_client.py new file mode 100644 index 0000000..0e51aef --- /dev/null +++ b/Client/marcus_client.py @@ -0,0 +1,1021 @@ +""" +Marcus Navigation GUI Client — runs on Workstation +Tabs: Navigation | Camera | LiDAR +Start: python marcus_client.py +Requires: pip install websockets pillow + +LiDAR/SLAM runs locally on this workstation (same as SLAM_GUI.py). +""" +import tkinter as tk +from tkinter import ttk, scrolledtext, filedialog +import asyncio, threading, json, base64, time, sys, os +from PIL import Image, ImageTk +import io +import numpy as np +import websockets + +JETSON_IP = "192.168.123.164" +JETSON_PORT = 8765 + +# ─── SLAM (runs locally on workstation) ─────────────────────────────────────── +SLAM_DIR = os.path.expanduser("~/Robotics_workspace/yslootahtech/G1_Lootah/Lidar") +if SLAM_DIR not in sys.path: + sys.path.insert(0, SLAM_DIR) + +try: + from SLAM_engine import SlamEngineClient + SLAM_AVAILABLE = True +except ImportError: + SlamEngineClient = None + SLAM_AVAILABLE = False + print("[SLAM] Not available — G1_Lootah/Lidar not found or dependencies missing") +WS_URL = f"ws://{JETSON_IP}:{JETSON_PORT}" + +# ─── THEME ──────────────────────────────────────────────────────────────────── +BG = "#131C2E" +BG2 = "#1C2942" +BG3 = "#243352" +CARD_BG = "#1E2E4A" +ACCENT = "#E87722" +TEAL = "#00838F" +GREEN = "#2E7D32" +GREEN_LT = "#66BB6A" +RED = "#C62828" +RED_LT = "#EF5350" +BLUE = "#1565C0" +PURPLE = "#6A1B9A" +ORANGE = "#E65100" +WHITE = "#ECEFF1" +GRAY = "#78909C" +DARK = "#0D1520" + +FONT = ("Segoe UI", 11) +FONT_SM = ("Segoe UI", 10) +FONT_LG = ("Segoe UI", 13, "bold") +FONT_XL = ("Segoe UI", 16, "bold") +MONO = ("Cascadia Code", 10) + +# ─── CAMERA ─────────────────────────────────────────────────────────────────── +RESOLUTION_PRESETS = [ + ("1080p 15fps", 1920, 1080, 15), + ("720p 30fps", 1280, 720, 30), + ("720p 15fps", 1280, 720, 15), + ("540p 30fps", 960, 540, 30), + ("540p 15fps", 960, 540, 15), + ("480p 60fps", 848, 480, 60), + ("480p 30fps", 640, 480, 30), + ("480p 15fps", 640, 480, 15), + ("240p 30fps", 424, 240, 30), +] + +CAMERA_PROFILES = { + "low": ("LOW", "424x240\n30Hz"), + "medium": ("MEDIUM", "640x480\n30Hz"), + "high": ("HIGH", "960x540\n15Hz"), + "full": ("FULL", "1280x720\n15Hz"), +} + + +# ─── SCROLLABLE FRAME HELPER ───────────────────────────────────────────────── +class ScrollFrame(tk.Frame): + """A frame with a vertical scrollbar for content that overflows.""" + def __init__(self, parent, bg=BG, **kw): + super().__init__(parent, bg=bg, **kw) + self._canvas = tk.Canvas(self, bg=bg, highlightthickness=0, bd=0) + self._scrollbar = ttk.Scrollbar(self, orient="vertical", command=self._canvas.yview) + self.inner = tk.Frame(self._canvas, bg=bg) + + self.inner.bind("", lambda e: self._canvas.configure(scrollregion=self._canvas.bbox("all"))) + self._canvas_window = self._canvas.create_window((0, 0), window=self.inner, anchor="nw") + self._canvas.configure(yscrollcommand=self._scrollbar.set) + + self._canvas.pack(side=tk.LEFT, fill=tk.BOTH, expand=True) + self._scrollbar.pack(side=tk.RIGHT, fill=tk.Y) + + self._canvas.bind("", self._on_canvas_resize) + self.inner.bind("", self._bind_mousewheel) + self.inner.bind("", self._unbind_mousewheel) + + def _on_canvas_resize(self, event): + self._canvas.itemconfig(self._canvas_window, width=event.width) + + def _bind_mousewheel(self, event): + self._canvas.bind_all("", self._on_mousewheel) + self._canvas.bind_all("", self._on_mousewheel) + self._canvas.bind_all("", self._on_mousewheel) + + def _unbind_mousewheel(self, event): + self._canvas.unbind_all("") + self._canvas.unbind_all("") + self._canvas.unbind_all("") + + def _on_mousewheel(self, event): + if event.num == 4: + self._canvas.yview_scroll(-1, "units") + elif event.num == 5: + self._canvas.yview_scroll(1, "units") + else: + self._canvas.yview_scroll(int(-1 * (event.delta / 120)), "units") + + +# ─── STYLED BUTTON ─────────────────────────────────────────────────────────── +def btn(parent, text, bg_color=BG3, fg_color=WHITE, font=FONT_SM, command=None, **kw): + return tk.Button(parent, text=text, font=font, bg=bg_color, fg=fg_color, + activebackground=ACCENT, activeforeground=WHITE, + relief=tk.FLAT, cursor="hand2", command=command, **kw) + + +class MarcusGUI: + def __init__(self, root): + self.root = root + self.root.title("Marcus — Navigation Control") + self.root.configure(bg=BG) + self.root.geometry("1360x860") + self.root.minsize(1000, 650) + + self.ws = None + self.loop = None + self.connected = False + self.ws_thread = None + self.cmd_history = [] + self.history_idx = -1 + self._cam_profile = "medium" + self._cam_preview_on = True + + # SLAM (local) + self._slam = None # type: SlamEngineClient or None + self._slam_running = False + self._map_points = [] + self._map_robot_pos = None + self._map_goal = None + self._map_scale = 30.0 + + self._build_ui() + self._start_ws_thread() + self._start_slam_poll() + + # ═════════════════════════════════════════════════════════════════════════ + # TOP BAR + # ═════════════════════════════════════════════════════════════════════════ + def _build_ui(self): + topbar = tk.Frame(self.root, bg=BG, pady=10) + topbar.pack(fill=tk.X, padx=16) + + tk.Label(topbar, text="MARCUS", font=("Segoe UI", 20, "bold"), + fg=ACCENT, bg=BG).pack(side=tk.LEFT) + tk.Label(topbar, text=" Navigation Control System | YS Lootah Technology", + font=FONT, fg=GRAY, bg=BG).pack(side=tk.LEFT) + + self.lbl_conn = tk.Label(topbar, text=" DISCONNECTED", font=FONT_SM, fg=RED_LT, bg=BG) + self.lbl_conn.pack(side=tk.RIGHT, padx=10) + self.lbl_lidar = tk.Label(topbar, text="LiDAR: -", font=FONT_SM, fg=GRAY, bg=BG) + self.lbl_lidar.pack(side=tk.RIGHT, padx=10) + self.lbl_model = tk.Label(topbar, text="Model: -", font=FONT_SM, fg=GRAY, bg=BG) + self.lbl_model.pack(side=tk.RIGHT, padx=10) + + tk.Frame(self.root, bg=ACCENT, height=2).pack(fill=tk.X) + + # ── Notebook ── + style = ttk.Style() + style.theme_use("clam") + style.configure("M.TNotebook", background=BG, borderwidth=0) + style.configure("M.TNotebook.Tab", background=BG2, foreground=WHITE, + font=("Segoe UI", 12, "bold"), padding=[24, 10]) + style.map("M.TNotebook.Tab", + background=[("selected", TEAL)], foreground=[("selected", WHITE)]) + + self.notebook = ttk.Notebook(self.root, style="M.TNotebook") + self.notebook.pack(fill=tk.BOTH, expand=True) + + self.tab_nav = tk.Frame(self.notebook, bg=BG) + self.tab_camera = tk.Frame(self.notebook, bg=BG) + self.tab_lidar = tk.Frame(self.notebook, bg=BG) + + self.notebook.add(self.tab_nav, text=" Navigation ") + self.notebook.add(self.tab_camera, text=" Camera ") + self.notebook.add(self.tab_lidar, text=" LiDAR ") + + self._build_nav_tab() + self._build_camera_tab() + self._build_lidar_tab() + + # ── Bottom ── + botbar = tk.Frame(self.root, bg=DARK, pady=6) + botbar.pack(fill=tk.X) + self.lbl_status = tk.Label(botbar, text=f"Connecting to {JETSON_IP}:{JETSON_PORT}...", + font=FONT_SM, fg=GRAY, bg=DARK) + self.lbl_status.pack(side=tk.LEFT, padx=16) + + # ═════════════════════════════════════════════════════════════════════════ + # TAB 1: NAVIGATION + # ═════════════════════════════════════════════════════════════════════════ + def _build_nav_tab(self): + main = tk.Frame(self.tab_nav, bg=BG) + main.pack(fill=tk.BOTH, expand=True, padx=16, pady=10) + + # Left: camera + left = tk.Frame(main, bg=BG) + left.pack(side=tk.LEFT, fill=tk.BOTH, expand=True) + + tk.Label(left, text="Live Camera - RealSense D435I", + font=FONT_SM, fg=GRAY, bg=BG).pack(anchor=tk.W, pady=(0, 6)) + + self.canvas = tk.Label(left, bg=DARK, text="Waiting for stream...", + fg=GRAY, font=FONT) + self.canvas.pack(fill=tk.BOTH, expand=True) + + # Decision overlay + action_card = tk.Frame(left, bg=CARD_BG, pady=12, padx=16) + action_card.pack(fill=tk.X, pady=(10, 0)) + tk.Label(action_card, text="Last Decision", font=FONT_SM, fg=GRAY, bg=CARD_BG).pack(anchor=tk.W) + self.lbl_action = tk.Label(action_card, text="-", font=FONT_XL, fg=ACCENT, bg=CARD_BG) + self.lbl_action.pack(anchor=tk.W) + self.lbl_elapsed = tk.Label(action_card, text="", font=FONT_SM, fg=GRAY, bg=CARD_BG) + self.lbl_elapsed.pack(anchor=tk.W) + + # Right: controls + right = tk.Frame(main, bg=BG, width=360) + right.pack(side=tk.RIGHT, fill=tk.BOTH, padx=(16, 0)) + right.pack_propagate(False) + + self._section(right, "Navigation Command") + + cmd_frame = tk.Frame(right, bg=BG) + cmd_frame.pack(fill=tk.X, pady=(0, 6)) + self.cmd_entry = tk.Entry(cmd_frame, font=FONT, bg=BG3, fg=WHITE, + insertbackground=WHITE, relief=tk.FLAT, bd=8) + self.cmd_entry.pack(side=tk.LEFT, fill=tk.X, expand=True) + self.cmd_entry.bind("", lambda e: self._send_command()) + self.cmd_entry.bind("", self._history_up) + self.cmd_entry.bind("", self._history_down) + btn(cmd_frame, "Send", ACCENT, command=self._send_command, padx=16, pady=6, + font=FONT).pack(side=tk.RIGHT, padx=(8, 0)) + + self._section(right, "Quick Commands") + qf = tk.Frame(right, bg=BG) + qf.pack(fill=tk.X, pady=(0, 10)) + quick = [("Forward", "move forward slowly"), ("Left", "turn left 30 degrees"), + ("Right", "turn right 30 degrees"), ("Stop", "stop"), + ("Walk", "walk forward"), ("Halt", "halt and wait"), + ("What?", "what do you see"), ("Status", "__status__")] + for i, (label, cmd) in enumerate(quick): + btn(qf, label, command=lambda c=cmd: self._quick_cmd(c), pady=6).grid( + row=i//4, column=i%4, padx=3, pady=3, sticky="nsew") + for c in range(4): qf.columnconfigure(c, weight=1) + + self._section(right, "Decision Log") + self.log = scrolledtext.ScrolledText(right, font=MONO, bg=DARK, fg=WHITE, + insertbackground=WHITE, relief=tk.FLAT, + wrap=tk.WORD, state=tk.DISABLED) + self.log.pack(fill=tk.BOTH, expand=True) + for tag, color in [("cmd", ACCENT), ("forward", GREEN_LT), ("turn", "#42A5F5"), + ("stop", RED_LT), ("arrived", ACCENT), ("system", GRAY), + ("error", RED_LT), ("think", "#FFD54F")]: + self.log.tag_config(tag, foreground=color) + btn(right, "Clear Log", command=self._clear_log, pady=4).pack(anchor=tk.E, pady=(6, 0)) + + # ═════════════════════════════════════════════════════════════════════════ + # TAB 2: CAMERA + # ═════════════════════════════════════════════════════════════════════════ + def _build_camera_tab(self): + main = tk.Frame(self.tab_camera, bg=BG) + main.pack(fill=tk.BOTH, expand=True, padx=16, pady=10) + + # Left: preview + left = tk.Frame(main, bg=BG) + left.pack(side=tk.LEFT, fill=tk.BOTH, expand=True) + + tk.Label(left, text="Camera Preview", font=FONT_LG, fg=WHITE, bg=BG).pack(anchor=tk.W, pady=(0, 6)) + self.cam_preview = tk.Label(left, bg=DARK, text="Camera preview", fg=GRAY, font=FONT) + self.cam_preview.pack(fill=tk.BOTH, expand=True) + + info_bar = tk.Frame(left, bg=CARD_BG, pady=8, padx=16) + info_bar.pack(fill=tk.X, pady=(10, 0)) + self.lbl_cam_info = tk.Label(info_bar, text="Profile: medium | 640x480 @ 30Hz | Pipeline: -", + font=MONO, fg=GRAY, bg=CARD_BG) + self.lbl_cam_info.pack(anchor=tk.W) + + # Right: scrollable controls + right_scroll = ScrollFrame(main, bg=BG) + right_scroll.pack(side=tk.RIGHT, fill=tk.BOTH, padx=(16, 0)) + right_scroll.configure(width=400) + right = right_scroll.inner + + # Profile buttons + self._section(right, "Camera Profile") + pf = tk.Frame(right, bg=BG) + pf.pack(fill=tk.X, pady=(0, 10)) + self.cam_profile_btns = {} + for i, (key, (label, desc)) in enumerate(CAMERA_PROFILES.items()): + b = btn(pf, f"{label}\n{desc}", bg_color=TEAL if key == "medium" else BG3, + pady=10, command=lambda k=key: self._set_cam_profile(k)) + b.grid(row=0, column=i, padx=4, pady=4, sticky="nsew") + self.cam_profile_btns[key] = b + for c in range(len(CAMERA_PROFILES)): pf.columnconfigure(c, weight=1) + + # Resolution + self._section(right, "Custom Resolution") + rf = tk.Frame(right, bg=BG) + rf.pack(fill=tk.X, pady=(0, 10)) + + tk.Label(rf, text="Preset:", font=FONT_SM, fg=GRAY, bg=BG).grid(row=0, column=0, sticky="w", padx=(0,4)) + self.cam_preset_var = tk.StringVar(value="640x480 @ 30fps") + ttk.Combobox(rf, textvariable=self.cam_preset_var, state="readonly", + values=[f"{w}x{h} @ {f}fps" for _, w, h, f in RESOLUTION_PRESETS], + width=22).grid(row=0, column=1, columnspan=3, padx=4, pady=4, sticky="ew") + + for r, (lbl, attr) in enumerate([(("W:"), "cam_w"), ("H:", "cam_h"), ("FPS:", "cam_fps")], 1): + tk.Label(rf, text=lbl, font=FONT_SM, fg=GRAY, bg=BG).grid(row=r, column=0, sticky="w", padx=(0,4)) + e = tk.Entry(rf, font=FONT_SM, bg=BG3, fg=WHITE, width=8, relief=tk.FLAT, bd=6) + e.grid(row=r, column=1, padx=4, pady=3, sticky="ew") + setattr(self, f"{attr}_entry", e) + + self.cam_w_entry.insert(0, "640") + self.cam_h_entry.insert(0, "480") + self.cam_fps_entry.insert(0, "30") + + btn(rf, "Apply", BLUE, command=self._apply_resolution, pady=6).grid( + row=3, column=2, columnspan=2, padx=4, pady=4, sticky="ew") + for c in range(4): rf.columnconfigure(c, weight=1) + + # Actions + self._section(right, "Camera Actions") + for label, color, cmd in [("Capture Photo", GREEN, self._cam_capture), + ("Toggle Preview", TEAL, self._cam_toggle_preview), + ("Refresh Status", BG3, self._cam_refresh)]: + btn(right, label, color, command=cmd, font=FONT, pady=10).pack(fill=tk.X, padx=4, pady=3) + + # Status log + self._section(right, "Camera Status") + self.cam_status_text = scrolledtext.ScrolledText(right, font=MONO, bg=DARK, fg=WHITE, + relief=tk.FLAT, wrap=tk.WORD, + state=tk.DISABLED, height=8) + self.cam_status_text.pack(fill=tk.X, padx=4, pady=(0, 10)) + + # ═════════════════════════════════════════════════════════════════════════ + # TAB 3: LiDAR — matches SLAM_GUI.py layout exactly + # ═════════════════════════════════════════════════════════════════════════ + def _build_lidar_tab(self): + # Colors matching SLAM_GUI.py + CBG = "#0a0a0a" # card bg (rgba(255,255,255,0.04) on dark) + CBDR = "#1a1a2e" # card border area + PILL = "#2b2b2b" # default pill button + SLAM_ACCENT = "#E87722" + + main = tk.PanedWindow(self.tab_lidar, orient=tk.HORIZONTAL, bg=BG, + sashwidth=4, sashrelief=tk.FLAT) + main.pack(fill=tk.BOTH, expand=True) + + # ── LEFT: Sidebar (controls) — matches SLAM_GUI sidebar ── + left_scroll = ScrollFrame(main, bg=CBDR) + main.add(left_scroll, width=360, minsize=300) + sb = left_scroll.inner + sb.configure(bg=CBDR) + + tk.Label(sb, text="SLAM COMMANDER", font=("Segoe UI", 18, "bold"), + fg=SLAM_ACCENT, bg=CBDR).pack(anchor=tk.W, padx=14, pady=(14, 10)) + + # --- Card: NETWORK SETTINGS --- + net = self._slam_card(sb, "NETWORK SETTINGS") + cf = tk.Frame(net, bg=CBG) + cf.pack(fill=tk.X, pady=4) + self.lidar_iface = self._slam_entry(cf, "enp3s0") + self.lidar_iface.pack(fill=tk.X, pady=2) + self.lidar_ip = self._slam_entry(cf, "192.168.123.222") + self.lidar_ip.pack(fill=tk.X, pady=2) + + r1 = tk.Frame(net, bg=CBG) + r1.pack(fill=tk.X, pady=4) + self._pill(r1, "CONNECT", "#1677ff", lambda: self._lidar_cmd("connect")).pack(side=tk.LEFT, fill=tk.X, expand=True, padx=(0,3)) + self._pill(r1, "START", "#21a453", lambda: self._lidar_cmd("start")).pack(side=tk.LEFT, fill=tk.X, expand=True, padx=(0,3)) + self._pill(r1, "LOCALIZE-ONLY", "#7a4cff", lambda: self._lidar_cmd("localize_only")).pack(side=tk.LEFT, fill=tk.X, expand=True) + + r2 = tk.Frame(net, bg=CBG) + r2.pack(fill=tk.X, pady=4) + self._pill(r2, "PAUSE", "#c78a12", lambda: self._lidar_cmd("pause")).pack(side=tk.LEFT, fill=tk.X, expand=True, padx=(0,3)) + self._pill(r2, "STOP", "#d0302f", lambda: self._lidar_cmd("stop")).pack(side=tk.LEFT, fill=tk.X, expand=True) + + self._pill(net, "RESET (NO SAVE)", "#3a3a3a", lambda: self._lidar_cmd("reset")).pack(fill=tk.X, pady=4) + + # --- Card: WORKFLOW --- + wf = self._slam_card(sb, "WORKFLOW (BEFORE RUN)") + for lbl, cmd in [("MAP NEW PLACE", "workflow_map_new"), + ("NAVIGATE IN MAPPED PLACE", "workflow_nav_map"), + ("LIVE NAV WITH MAP", "workflow_live_nav"), + ("LIVE NAV (NO MAP)", "workflow_live_nav_nomap"), + ("QUICK DEMO", "workflow_demo")]: + self._pill(wf, lbl, PILL, lambda c=cmd: self._lidar_cmd(c)).pack(fill=tk.X, pady=2) + + self.lbl_workflow = tk.Label(wf, text="Workflow: NOT_SELECTED", font=MONO, fg=WHITE, bg=CBG) + self.lbl_workflow.pack(anchor=tk.W, pady=(6, 0)) + + # --- Card: MAP CONTROLS --- + mc = self._slam_card(sb, "MAP CONTROLS") + self.lidar_map_name = self._slam_entry(mc, "Work") + self.lidar_map_name.pack(fill=tk.X, pady=4) + + self._pill(mc, "SAVE NOW", PILL, lambda: self._lidar_cmd("save_map")).pack(fill=tk.X, pady=2) + self._pill(mc, "EXPORT NAV", PILL, lambda: self._lidar_cmd("export_nav")).pack(fill=tk.X, pady=2) + + # Goal + tk.Label(mc, text="NAVIGATION GOAL", font=FONT_SM, fg="#ffffff99", bg=CBG).pack(anchor=tk.W, pady=(10, 4)) + gr = tk.Frame(mc, bg=CBG) + gr.pack(fill=tk.X, pady=2) + self.lidar_goal_x = self._slam_entry(gr, "0.0", width=8) + self.lidar_goal_x.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=(0, 3)) + self.lidar_goal_y = self._slam_entry(gr, "0.0", width=8) + self.lidar_goal_y.pack(side=tk.LEFT, fill=tk.X, expand=True) + gb = tk.Frame(mc, bg=CBG) + gb.pack(fill=tk.X, pady=2) + self._pill(gb, "SET GOAL", PILL, self._lidar_set_goal).pack(side=tk.LEFT, fill=tk.X, expand=True, padx=(0,3)) + self._pill(gb, "CLEAR", PILL, lambda: self._lidar_cmd("clear_goal")).pack(side=tk.LEFT, fill=tk.X, expand=True) + + # Mission + tk.Label(mc, text="MISSION WAYPOINTS", font=FONT_SM, fg="#ffffff99", bg=CBG).pack(anchor=tk.W, pady=(10, 4)) + self.lidar_mission = self._slam_entry(mc, "1.0,0.0; 2.0,0.0") + self.lidar_mission.pack(fill=tk.X, pady=2) + mm = tk.Frame(mc, bg=CBG) + mm.pack(fill=tk.X, pady=2) + self._pill(mm, "MISSION START", PILL, lambda: self._lidar_cmd("mission_start")).pack(side=tk.LEFT, fill=tk.X, expand=True, padx=(0,2)) + for lbl, cmd in [("PAUSE M", "mission_pause"), ("RESUME M", "mission_resume"), ("STOP M", "mission_stop")]: + self._pill(mm, lbl, PILL, lambda c=cmd: self._lidar_cmd(c)).pack(side=tk.LEFT, fill=tk.X, expand=True, padx=(2,0)) + + # Density + tk.Label(mc, text="POINT DENSITY", font=FONT_SM, fg="#ffffff99", bg=CBG).pack(anchor=tk.W, pady=(10, 4)) + dr = tk.Frame(mc, bg=CBG) + dr.pack(fill=tk.X, pady=2) + for lbl, val in [("LOW", "low"), ("MEDIUM", "medium"), ("HIGH", "high")]: + self._pill(dr, lbl, PILL, lambda v=val: self._lidar_cmd(f"density_{v}")).pack(side=tk.LEFT, fill=tk.X, expand=True, padx=1) + + # --- Card: LOCALIZATION --- + loc = self._slam_card(sb, "LOCALIZATION") + self._pill(loc, "LOAD REF MAP", PILL, lambda: self._lidar_cmd("load_ref")).pack(fill=tk.X, pady=2) + self._pill(loc, "LOCALIZE NOW", PILL, lambda: self._lidar_cmd("localize_now")).pack(fill=tk.X, pady=2) + self._pill(loc, "CLEAR REF", PILL, lambda: self._lidar_cmd("clear_ref")).pack(fill=tk.X, pady=2) + + # ── RIGHT: Map + Status (main view) ── + right = tk.Frame(main, bg=BG) + main.add(right, minsize=500) + + # Map canvas + self.map_canvas = tk.Canvas(right, bg=DARK, highlightthickness=0) + self.map_canvas.pack(fill=tk.BOTH, expand=True, padx=8, pady=(8, 4)) + self._map_points = [] + self._map_robot_pos = None + self._map_goal = None + self._map_scale = 30.0 + self.map_canvas.bind("", lambda e: self._render_map(self._map_points, self._map_robot_pos, self._map_goal)) + + # Status strip + status_strip = tk.Frame(right, bg=BG) + status_strip.pack(fill=tk.X, padx=8, pady=(0, 4)) + + self.lidar_cards = {} + for i, (key, title, default, color) in enumerate([ + ("status", "Status", "IDLE", GRAY), ("points", "Points", "0", GRAY), + ("pose", "Position", "0, 0, 0", GRAY), ("fps", "FPS", "- / -", GRAY)]): + card = tk.Frame(status_strip, bg=CARD_BG, padx=14, pady=8) + card.grid(row=0, column=i, padx=3, pady=3, sticky="nsew") + tk.Label(card, text=title, font=("Segoe UI", 9), fg=GRAY, bg=CARD_BG).pack(anchor=tk.W) + lbl = tk.Label(card, text=default, font=FONT_LG, fg=color, bg=CARD_BG) + lbl.pack(anchor=tk.W) + self.lidar_cards[key] = lbl + status_strip.columnconfigure(i, weight=1) + + # Perf strip + perf_strip = tk.Frame(right, bg=BG) + perf_strip.pack(fill=tk.X, padx=8, pady=(0, 4)) + self.lidar_perf = {} + for i, (key, title, default) in enumerate([ + ("icp", "ICP Time", "- ms"), ("cpu", "CPU", "-%"), + ("queue", "Queue Lag", "-"), ("growth", "Growth", "- pts/s")]): + card = tk.Frame(perf_strip, bg=CARD_BG, padx=10, pady=6) + card.grid(row=0, column=i, padx=3, pady=2, sticky="nsew") + tk.Label(card, text=title, font=("Segoe UI", 8), fg=GRAY, bg=CARD_BG).pack(anchor=tk.W) + lbl = tk.Label(card, text=default, font=FONT_SM, fg=WHITE, bg=CARD_BG) + lbl.pack(anchor=tk.W) + self.lidar_perf[key] = lbl + perf_strip.columnconfigure(i, weight=1) + + # Loc + Nav + Log row + bottom = tk.Frame(right, bg=BG) + bottom.pack(fill=tk.X, padx=8, pady=(0, 8)) + + loc_card = tk.Frame(bottom, bg=CARD_BG, padx=12, pady=8) + loc_card.pack(side=tk.LEFT, fill=tk.BOTH, expand=True, padx=(0, 3)) + tk.Label(loc_card, text="Localization", font=FONT_SM, fg=GRAY, bg=CARD_BG).pack(anchor=tk.W) + self.lbl_loc_state = tk.Label(loc_card, text="State: -", font=FONT_SM, fg=GRAY, bg=CARD_BG) + self.lbl_loc_state.pack(anchor=tk.W) + self.lbl_loc_fitness = tk.Label(loc_card, text="Fitness: - | RMSE: -", font=("Segoe UI", 9), fg=GRAY, bg=CARD_BG) + self.lbl_loc_fitness.pack(anchor=tk.W) + self.lbl_loc_match = tk.Label(loc_card, text="Match: -", font=("Segoe UI", 9), fg=GRAY, bg=CARD_BG) + self.lbl_loc_match.pack(anchor=tk.W) + + nav_card = tk.Frame(bottom, bg=CARD_BG, padx=12, pady=8) + nav_card.pack(side=tk.LEFT, fill=tk.BOTH, expand=True, padx=(3, 3)) + tk.Label(nav_card, text="Navigation", font=FONT_SM, fg=GRAY, bg=CARD_BG).pack(anchor=tk.W) + self.lbl_nav_goal = tk.Label(nav_card, text="Goal: -", font=FONT_SM, fg=GRAY, bg=CARD_BG) + self.lbl_nav_goal.pack(anchor=tk.W) + self.lbl_nav_cmd = tk.Label(nav_card, text="Cmd: -", font=("Segoe UI", 9), fg=GRAY, bg=CARD_BG) + self.lbl_nav_cmd.pack(anchor=tk.W) + + log_card = tk.Frame(bottom, bg=CARD_BG, padx=4, pady=4) + log_card.pack(side=tk.LEFT, fill=tk.BOTH, expand=True, padx=(3, 0)) + self.lidar_log = scrolledtext.ScrolledText(log_card, font=("Cascadia Code", 9), bg=DARK, fg=WHITE, + relief=tk.FLAT, wrap=tk.WORD, + state=tk.DISABLED, height=5, width=30) + self.lidar_log.pack(fill=tk.BOTH, expand=True) + for tag, c in [("ok", GREEN_LT), ("warn", "#c78a12"), ("err", RED_LT), ("info", GRAY)]: + self.lidar_log.tag_config(tag, foreground=c) + + # ─── SLAM_GUI-style widget helpers ───────────────────────────────────── + def _slam_card(self, parent, title): + """Card matching SLAM_GUI.py card_style().""" + CBG = "#0a0a0a" + card = tk.Frame(parent, bg=CBG, padx=14, pady=14, highlightbackground="#ffffff10", + highlightthickness=1) + card.pack(fill=tk.X, padx=14, pady=6) + tk.Label(card, text=title, font=("Segoe UI", 11, "bold"), fg="#ffffffc0", bg=CBG).pack(anchor=tk.W, pady=(0, 8)) + return card + + def _pill(self, parent, text, bg_color, command): + """Pill button matching SLAM_GUI.py pill_button().""" + return tk.Button(parent, text=text, font=("Segoe UI", 10, "bold"), + bg=bg_color, fg=WHITE, activebackground=ACCENT, activeforeground=WHITE, + relief=tk.FLAT, cursor="hand2", pady=8, command=command) + + def _slam_entry(self, parent, default="", width=0): + """Entry matching SLAM_GUI.py QLineEdit style.""" + kw = {"width": width} if width else {} + e = tk.Entry(parent, font=("Segoe UI", 10, "bold"), bg="#00000050", fg=WHITE, + insertbackground=WHITE, relief=tk.FLAT, bd=8, **kw) + e.insert(0, default) + return e + + # ═════════════════════════════════════════════════════════════════════════ + # HELPERS + # ═════════════════════════════════════════════════════════════════════════ + def _section(self, parent, title): + tk.Label(parent, text=title, font=FONT_LG, fg=WHITE, bg=BG).pack(anchor=tk.W, pady=(12, 2), padx=4) + tk.Frame(parent, bg=BG3, height=1).pack(fill=tk.X, padx=4, pady=(0, 6)) + + # ─── MAP DRAWING ────────────────────────────────────────────────────────── + def _draw_map_grid(self): + c = self.map_canvas + c.delete("grid") + w = c.winfo_width() or 600 + h = c.winfo_height() or 400 + cx, cy = w // 2, h // 2 + s = self._map_scale + # Draw grid lines every meter + grid_color = "#1A2A40" + for i in range(-20, 21): + x = cx + i * s + c.create_line(x, 0, x, h, fill=grid_color, tags="grid") + y = cy + i * s + c.create_line(0, y, w, y, fill=grid_color, tags="grid") + # Axes + c.create_line(cx, 0, cx, h, fill="#2A4060", width=1, tags="grid") + c.create_line(0, cy, w, cy, fill="#2A4060", width=1, tags="grid") + # Origin label + c.create_text(cx + 4, cy - 8, text="0,0", fill=GRAY, font=("Segoe UI", 8), anchor="w", tags="grid") + + def _render_map(self, points=None, robot_pos=None, goal=None): + """Render 2D top-down map with point cloud, robot, and goal.""" + c = self.map_canvas + c.update_idletasks() + w = c.winfo_width() or 600 + h = c.winfo_height() or 400 + cx, cy = w // 2, h // 2 + s = self._map_scale + + c.delete("all") + self._draw_map_grid() + + # Draw points + if points: + for px, py in points: + sx = cx + px * s + sy = cy - py * s # flip Y (up = positive) + if 0 <= sx <= w and 0 <= sy <= h: + c.create_rectangle(sx, sy, sx + 2, sy + 2, fill="#4FC3F7", outline="", tags="pts") + + # Draw goal + if goal: + gx = cx + goal[0] * s + gy = cy - goal[1] * s + c.create_oval(gx - 8, gy - 8, gx + 8, gy + 8, outline="#FF5722", width=2, tags="goal") + c.create_line(gx - 5, gy, gx + 5, gy, fill="#FF5722", width=2, tags="goal") + c.create_line(gx, gy - 5, gx, gy + 5, fill="#FF5722", width=2, tags="goal") + + # Draw robot + if robot_pos: + rx = cx + robot_pos[0] * s + ry = cy - robot_pos[1] * s + c.create_oval(rx - 6, ry - 6, rx + 6, ry + 6, fill="#66BB6A", outline=WHITE, width=2, tags="robot") + # Direction indicator (forward = up in robot frame) + c.create_line(rx, ry, rx, ry - 14, fill="#66BB6A", width=2, arrow=tk.LAST, tags="robot") + + # Scale indicator + bar_len = s # 1 meter + c.create_line(10, h - 15, 10 + bar_len, h - 15, fill=WHITE, width=2, tags="scale") + c.create_text(10 + bar_len / 2, h - 6, text="1m", fill=GRAY, font=("Segoe UI", 8), tags="scale") + + def _log(self, text, tag="system"): + self.log.configure(state=tk.NORMAL) + self.log.insert(tk.END, text + "\n", tag) + self.log.see(tk.END) + self.log.configure(state=tk.DISABLED) + + def _lidar_log(self, text, tag="info"): + self.lidar_log.configure(state=tk.NORMAL) + self.lidar_log.insert(tk.END, f"[{time.strftime('%H:%M:%S')}] {text}\n", tag) + self.lidar_log.see(tk.END) + self.lidar_log.configure(state=tk.DISABLED) + + def _cam_log(self, text): + self.cam_status_text.configure(state=tk.NORMAL) + self.cam_status_text.insert(tk.END, f"[{time.strftime('%H:%M:%S')}] {text}\n") + self.cam_status_text.see(tk.END) + self.cam_status_text.configure(state=tk.DISABLED) + + def _clear_log(self): + self.log.configure(state=tk.NORMAL) + self.log.delete("1.0", tk.END) + self.log.configure(state=tk.DISABLED) + + # ─── NAVIGATION ─────────────────────────────────────────────────────────── + def _send_command(self): + cmd = self.cmd_entry.get().strip() + if not cmd or not self.connected: return + self.cmd_entry.delete(0, tk.END) + self.cmd_history.insert(0, cmd) + self.history_idx = -1 + if cmd == "__status__": + asyncio.run_coroutine_threadsafe(self._ws_send({"type": "ping"}), self.loop) + return + self._log(f"\n[{time.strftime('%H:%M:%S')}] You: {cmd}", "cmd") + asyncio.run_coroutine_threadsafe( + self._ws_send({"type": "command", "command": cmd}), self.loop) + + def _quick_cmd(self, cmd): + self.cmd_entry.delete(0, tk.END) + self.cmd_entry.insert(0, cmd) + self._send_command() + + def _history_up(self, event): + if self.cmd_history and self.history_idx < len(self.cmd_history) - 1: + self.history_idx += 1 + self.cmd_entry.delete(0, tk.END) + self.cmd_entry.insert(0, self.cmd_history[self.history_idx]) + + def _history_down(self, event): + if self.history_idx > 0: + self.history_idx -= 1 + self.cmd_entry.delete(0, tk.END) + self.cmd_entry.insert(0, self.cmd_history[self.history_idx]) + elif self.history_idx == 0: + self.history_idx = -1 + self.cmd_entry.delete(0, tk.END) + + # ─── CAMERA ─────────────────────────────────────────────────────────────── + def _set_cam_profile(self, profile): + self._cam_profile = profile + for k, b in self.cam_profile_btns.items(): + b.configure(bg=TEAL if k == profile else BG3) + asyncio.run_coroutine_threadsafe( + self._ws_send({"type": "set_camera", "profile": profile}), self.loop) + self._cam_log(f"Switching to profile: {profile}") + + def _apply_resolution(self): + try: + w, h, f = int(self.cam_w_entry.get()), int(self.cam_h_entry.get()), int(self.cam_fps_entry.get()) + except ValueError: + self._cam_log("Invalid resolution values"); return + asyncio.run_coroutine_threadsafe( + self._ws_send({"type": "set_resolution", "width": w, "height": h, "fps": f}), self.loop) + self._cam_log(f"Applying: {w}x{h} @ {f}fps") + + def _cam_capture(self): + asyncio.run_coroutine_threadsafe(self._ws_send({"type": "capture"}), self.loop) + self._cam_log("Capture requested") + + def _cam_toggle_preview(self): + self._cam_preview_on = not self._cam_preview_on + if not self._cam_preview_on: + self.cam_preview.configure(image="", text="Preview paused") + self.cam_preview.image = None + self._cam_log(f"Preview {'ON' if self._cam_preview_on else 'OFF'}") + + def _cam_refresh(self): + asyncio.run_coroutine_threadsafe(self._ws_send({"type": "get_camera"}), self.loop) + self._cam_log("Refreshing...") + + # ─── LiDAR (local SLAM engine) ─────────────────────────────────────────── + def _ensure_slam(self) -> bool: + if not SLAM_AVAILABLE: + self._lidar_log("SLAM not available — check G1_Lootah/Lidar path", "err") + return False + if self._slam is None: + try: + self._slam = SlamEngineClient() + self._lidar_log("SLAM engine created", "ok") + except Exception as e: + self._lidar_log(f"SLAM init error: {e}", "err") + return False + return True + + def _lidar_cmd(self, cmd): + if not self._ensure_slam(): + return + try: + if cmd == "connect": + self._slam.start_process() + self._slam.connect() + self._slam_running = True + self._lidar_log("SLAM started and connected", "ok") + elif cmd == "start": + self._slam.start_mapping() + self._lidar_log("Mapping started", "ok") + elif cmd == "pause": + self._slam.pause_mapping() + self._lidar_log("Mapping paused", "warn") + elif cmd == "stop": + self._slam.stop_mapping() + self._lidar_log("Mapping stopped", "info") + elif cmd == "reset": + self._slam.stop_process() + self._slam_running = False + self._lidar_log("SLAM reset (no save)", "warn") + elif cmd == "localize_only": + self._slam.start_process() + self._slam.connect() + self._slam_running = True + self._lidar_log("Localize-only mode", "ok") + elif cmd == "save_map": + name = self.lidar_map_name.get().strip() or "Work" + self._slam.export_map(name) + self._lidar_log(f"Map saved: {name}", "ok") + elif cmd == "export_nav": + name = self.lidar_map_name.get().strip() or "nav" + self._slam.export_nav(name) + self._lidar_log(f"Nav exported: {name}", "ok") + elif cmd == "load_ref": + path = filedialog.askopenfilename(title="Select Reference Map", + filetypes=[("PLY files", "*.ply"), ("All", "*.*")]) + if not path: return + self._slam.load_ref_map(path) + self._lidar_log(f"Ref loaded: {path}", "ok") + elif cmd == "localize_now": + self._slam.localize_now() + self._lidar_log("Localization triggered", "info") + elif cmd == "clear_ref": + self._slam.clear_ref() + self._lidar_log("Ref cleared", "info") + elif cmd == "clear_goal": + self._slam.clear_nav_goal() + self._map_goal = None + self._lidar_log("Goal cleared", "info") + elif cmd == "mission_start": + waypoints = [] + for pair in self.lidar_mission.get().strip().split(";"): + parts = pair.strip().split(",") + if len(parts) == 2: + try: waypoints.append({"x": float(parts[0]), "y": float(parts[1])}) + except ValueError: pass + self._slam.mission_start(waypoints) + self._lidar_log(f"Mission started ({len(waypoints)} waypoints)", "ok") + elif cmd == "mission_pause": + self._slam.mission_pause() + self._lidar_log("Mission paused", "warn") + elif cmd == "mission_resume": + self._slam.mission_resume() + self._lidar_log("Mission resumed", "ok") + elif cmd == "mission_stop": + self._slam.mission_stop() + self._lidar_log("Mission stopped", "info") + elif cmd.startswith("density_"): + mode = cmd.replace("density_", "") + self._slam.set_density(mode) + self._lidar_log(f"Density: {mode}", "info") + elif cmd.startswith("workflow_"): + self._lidar_log(f"Workflow: {cmd}", "info") + else: + self._lidar_log(f"Unknown: {cmd}", "warn") + except Exception as e: + self._lidar_log(f"SLAM error: {e}", "err") + + def _lidar_set_goal(self): + if not self._ensure_slam(): return + try: + x, y = float(self.lidar_goal_x.get()), float(self.lidar_goal_y.get()) + except ValueError: + self._lidar_log("Invalid coordinates", "err"); return + try: + self._slam.set_nav_goal(x, y) + self._map_goal = (x, y) + self._lidar_log(f"Goal set: ({x}, {y})", "ok") + except Exception as e: + self._lidar_log(f"Goal error: {e}", "err") + + def _start_slam_poll(self): + """Poll SLAM data_q and status_q every 200ms to update map and status.""" + self._poll_slam() + + def _poll_slam(self): + if self._slam is not None and self._slam_running: + try: + self._drain_slam_data() + except Exception: + pass + self.root.after(200, self._poll_slam) + + def _drain_slam_data(self): + """Drain SLAM queues and update map + status displays.""" + slam = self._slam + if slam is None: + return + + # Drain data_q for frame payloads + payload = None + try: + while not slam.data_q.empty(): + item = slam.data_q.get_nowait() + if isinstance(item, tuple) and len(item) >= 2 and item[0] == "FRAME": + payload = item[1] + except Exception: + pass + + # Drain status_q + try: + while not slam.status_q.empty(): + msg = slam.status_q.get_nowait() + if isinstance(msg, tuple) and len(msg) >= 2: + key, val = msg[0], msg[1] + if isinstance(val, str): + self._lidar_log(f"{key}: {val}", "info") + elif key == "LOCALIZE" and isinstance(val, dict): + fit = val.get("fitness", 0) + rmse = val.get("rmse", 0) + self.lbl_loc_fitness.configure(text=f"Fitness: {fit:.3f} | RMSE: {rmse:.4f}") + elif key == "LOC_STATE": + ls = str(val) + c = GREEN_LT if ls == "TRACKING" else RED_LT if ls == "LOST" else ORANGE + self.lbl_loc_state.configure(text=f"State: {ls}", fg=c) + elif key == "ERROR": + self._lidar_log(f"ERROR: {val}", "err") + elif key == "INFO": + if isinstance(val, str): + self._lidar_log(val, "info") + except Exception: + pass + + if payload is None: + return + + # Update status cards + pts = payload.get("stable_points") + if pts is not None and isinstance(pts, np.ndarray): + self.lidar_cards["points"].configure(text=f"{len(pts):,}", fg=WHITE) + # Downsample for map + if len(pts) > 0: + stride = max(1, len(pts) // 2000) + self._map_points = pts[::stride, :2].tolist() + + pose = payload.get("pose") + if pose is not None and isinstance(pose, np.ndarray) and pose.shape == (4, 4): + x, y, z = float(pose[0, 3]), float(pose[1, 3]), float(pose[2, 3]) + self.lidar_cards["pose"].configure(text=f"{x:.2f}, {y:.2f}, {z:.2f}", fg=WHITE) + self._map_robot_pos = (x, y) + + mode = payload.get("mode", "") + if mode: + s = str(mode).upper() + self.lidar_cards["status"].configure(text=s, + fg=GREEN_LT if s == "MAPPING" else BLUE if "LOCALIZ" in s else ORANGE if s == "PAUSED" else GRAY) + + perf = payload.get("perf", {}) + if perf: + self.lidar_cards["fps"].configure( + text=f"{perf.get('input_fps',0):.0f} / {perf.get('publish_fps',0):.0f}", fg=WHITE) + self.lidar_perf["icp"].configure(text=f"{perf.get('icp_ms',0):.1f} ms") + self.lidar_perf["cpu"].configure(text=f"{perf.get('cpu_percent',0):.0f}%") + self.lidar_perf["queue"].configure(text=str(perf.get("queue_lag", 0))) + self.lidar_perf["growth"].configure(text=f"{perf.get('stable_growth_per_sec',0):.0f} pts/s") + + loc_state = payload.get("loc_state", "") + if loc_state: + c = GREEN_LT if loc_state == "TRACKING" else RED_LT if loc_state == "LOST" else ORANGE + self.lbl_loc_state.configure(text=f"State: {loc_state}", fg=c) + + nav = payload.get("nav", {}) + if isinstance(nav, dict): + goal = nav.get("goal") + if goal and isinstance(goal, dict): + self.lbl_nav_goal.configure(text=f"Goal: ({goal.get('x',0):.2f}, {goal.get('y',0):.2f})", fg=WHITE) + self._map_goal = (goal.get("x", 0), goal.get("y", 0)) + cmd_info = nav.get("cmd") + if cmd_info and isinstance(cmd_info, dict): + self.lbl_nav_cmd.configure( + text=f"Cmd: lin={cmd_info.get('linear_mps','-')} ang={cmd_info.get('angular_rps','-')}") + + # Render map + self._render_map(self._map_points, self._map_robot_pos, self._map_goal) + + # ═════════════════════════════════════════════════════════════════════════ + # WEBSOCKET + # ═════════════════════════════════════════════════════════════════════════ + def _start_ws_thread(self): + self.loop = asyncio.new_event_loop() + self.ws_thread = threading.Thread(target=lambda: ( + asyncio.set_event_loop(self.loop), + self.loop.run_until_complete(self._ws_connect_loop())), daemon=True) + self.ws_thread.start() + + async def _ws_connect_loop(self): + while True: + try: + self._set_status("Connecting...", RED_LT) + async with websockets.connect(WS_URL) as ws: + self.ws, self.connected = ws, True + self._set_status(f"Connected to {JETSON_IP}", GREEN_LT) + self.root.after(0, lambda: self.lbl_conn.configure(text=" CONNECTED", fg=GREEN_LT)) + await self._ws_receive(ws) + except Exception as e: + self.connected, self.ws = False, None + self._set_status(f"Reconnecting... ({e})", RED_LT) + self.root.after(0, lambda: self.lbl_conn.configure(text=" DISCONNECTED", fg=RED_LT)) + await asyncio.sleep(3) + + async def _ws_receive(self, ws): + async for raw in ws: + try: + data = json.loads(raw) + t = data.get("type") + if t == "frame": + self.root.after(0, self._update_frame, data["data"]) + elif t == "status": + lidar = "ALIVE" if data.get("lidar") else "OFFLINE" + model = data.get("model", "?") + self.root.after(0, lambda l=lidar, m=model: ( + self.lbl_lidar.configure(text=f"LiDAR: {l}", fg=GREEN_LT if l == "ALIVE" else RED_LT), + self.lbl_model.configure(text=f"Model: {m}", fg=TEAL))) + self._log(f"[{data.get('timestamp','')}] {data.get('message','')}", "system") + if data.get("camera_config"): + self.root.after(0, self._update_cam_config, data["camera_config"]) + elif t == "thinking": + self._log(f" Thinking... ('{data.get('command','')}')", "think") + self.root.after(0, lambda: self.lbl_action.configure(text="Thinking...", fg="#FFD54F")) + elif t == "decision": + a, e, c, ts = data.get("action",""), data.get("elapsed","?"), data.get("cmd",""), data.get("timestamp","") + tag = "forward" if c=="FORWARD" else "turn" if c in ("LEFT","RIGHT") else "stop" if c=="STOP" else "arrived" if c=="ARRIVED" else "system" + self._log(f" [{ts}] Decision ({e}s): {a}", tag) + color = GREEN_LT if c=="FORWARD" else "#42A5F5" if c in ("LEFT","RIGHT") else RED_LT if c=="STOP" else ACCENT + self.root.after(0, lambda a=a,cl=color,e=e: ( + self.lbl_action.configure(text=a, fg=cl), + self.lbl_elapsed.configure(text=f"Response: {e}s"))) + elif t == "camera_config": + self.root.after(0, self._update_cam_config, data) + elif t == "pong": + self._log(f" [{data.get('timestamp','')}] Status - LiDAR: {'OK' if data.get('lidar') else 'OFFLINE'}", "system") + elif t == "error": + self._log(f" ERROR: {data.get('message','')}", "error") + except Exception as e: + self._log(f" Parse error: {e}", "error") + + async def _ws_send(self, data): + if self.ws and self.connected: + try: await self.ws.send(json.dumps(data)) + except Exception as e: self._log(f" Send error: {e}", "error") + + # ─── UPDATES ────────────────────────────────────────────────────────────── + def _update_frame(self, b64): + try: + img = Image.open(io.BytesIO(base64.b64decode(b64))) + img_resized = img.resize((640, 480), Image.LANCZOS) + photo = ImageTk.PhotoImage(img_resized) + self.canvas.configure(image=photo, text="") + self.canvas.image = photo + if self._cam_preview_on: + p2 = ImageTk.PhotoImage(img_resized) + self.cam_preview.configure(image=p2, text="") + self.cam_preview.image = p2 + except Exception: pass + + def _update_cam_config(self, d): + p, w, h, f = d.get("profile","?"), d.get("width","?"), d.get("height","?"), d.get("fps","?") + active = d.get("pipeline_active", False) + self.lbl_cam_info.configure(text=f"Profile: {p} | {w}x{h} @ {f}Hz | Pipeline: {'active' if active else 'stopped'}") + self._cam_profile = p + for k, b in self.cam_profile_btns.items(): + b.configure(bg=TEAL if k == p else BG3) + self._cam_log(f"Camera: {p} ({w}x{h}@{f}) pipeline={'active' if active else 'stopped'}") + + def _set_status(self, msg, color=GRAY): + self.root.after(0, lambda: self.lbl_status.configure(text=msg, fg=color)) + + +if __name__ == "__main__": + root = tk.Tk() + MarcusGUI(root) + root.mainloop() diff --git a/Config/config_Arm.json b/Config/config_Arm.json new file mode 100644 index 0000000..271ac62 --- /dev/null +++ b/Config/config_Arm.json @@ -0,0 +1,14 @@ +{ + "arm_available": false, + "actions": { + "release": 99, "two_kiss": 11, "left_kiss": 12, "right_kiss": 13, + "hands_up": 15, "clap": 17, "high_five": 18, "hug": 19, + "heart": 20, "right_heart": 21, "reject": 22, "right_up": 23, + "xray": 24, "face_wave": 25, "high_wave": 26, "shake_hand": 27 + }, + "aliases": { + "wave": 26, "raise_right": 23, "raise_left": 15, "both_up": 15, + "lower": 99, "clap": 17, "high_five": 18, "hug": 19, + "heart": 20, "reject": 22, "shake_hand": 27, "face_wave": 25, "right_heart": 21 + } +} diff --git a/Config/config_Brain.json b/Config/config_Brain.json new file mode 100644 index 0000000..a7f208b --- /dev/null +++ b/Config/config_Brain.json @@ -0,0 +1,15 @@ +{ + "ollama_model": "qwen2.5vl:3b", + "max_history": 6, + "num_batch": 128, + "num_ctx": 2048, + "num_predict_main": 200, + "num_predict_goal": 80, + "num_predict_patrol": 100, + "num_predict_talk": 80, + "num_predict_verify": 10, + "warmup_num_predict": 5, + "main_prompt": "You are Marcus, a humanoid robot. Look at the image and follow the command.\n{facts}\n\nCommand: \"{command}\"\n\nReply with ONLY this JSON — no markdown, no explanation:\n{{\"actions\":[{{\"move\":\"forward|backward|left|right|stop\",\"duration\":2.0}}],\"arm\":null,\"speak\":\"one sentence\",\"abort\":null}}\n\nRULES:\n- actions is a list of movement steps, max duration 5.0s each\n- move: \"forward\" \"backward\" \"left\" \"right\" \"stop\"\n- arm: \"wave\" \"raise_right\" \"raise_left\" \"clap\" \"high_five\" \"hug\" \"heart\" \"shake_hand\" \"face_wave\" or null\n- arm is NEVER a move value\n- questions/descriptions: actions=[]\n- obstacle < 0.5m: abort = \"obstacle detected\"\n- \"90 degrees\" = 5.0s | \"45 degrees\" = 2.5s | \"1 step\" = 1.0s\n\nEXAMPLES:\n\"turn right\" -> {{\"actions\":[{{\"move\":\"right\",\"duration\":2.0}}],\"arm\":null,\"speak\":\"Turning right\",\"abort\":null}}\n\"turn right 90 degrees\" -> {{\"actions\":[{{\"move\":\"right\",\"duration\":5.0}}],\"arm\":null,\"speak\":\"Turning 90 degrees\",\"abort\":null}}\n\"move back then left\" -> {{\"actions\":[{{\"move\":\"backward\",\"duration\":2.0}},{{\"move\":\"left\",\"duration\":2.0}}],\"arm\":null,\"speak\":\"Moving back then left\",\"abort\":null}}\n\"wave\" -> {{\"actions\":[],\"arm\":\"wave\",\"speak\":\"Waving\",\"abort\":null}}\n\"raise your right arm\" -> {{\"actions\":[],\"arm\":\"raise_right\",\"speak\":\"Raising right arm\",\"abort\":null}}\n\"walk forward and wave\" -> {{\"actions\":[{{\"move\":\"forward\",\"duration\":2.0}}],\"arm\":\"wave\",\"speak\":\"Walking and waving\",\"abort\":null}}\n\"what do you see\" -> {{\"actions\":[],\"arm\":null,\"speak\":\"I see...\",\"abort\":null}}\n\"stop\" -> {{\"actions\":[{{\"move\":\"stop\",\"duration\":0}}],\"arm\":null,\"speak\":\"Stopping\",\"abort\":null}}\n\"come to me\" -> {{\"actions\":[{{\"move\":\"forward\",\"duration\":2.0}}],\"arm\":null,\"speak\":\"Coming to you\",\"abort\":null}}\n\"come back\" -> {{\"actions\":[{{\"move\":\"forward\",\"duration\":2.0}}],\"arm\":null,\"speak\":\"Coming back\",\"abort\":null}}\n\"come here\" -> {{\"actions\":[{{\"move\":\"forward\",\"duration\":2.0}}],\"arm\":null,\"speak\":\"Coming\",\"abort\":null}}\n\"get closer\" -> {{\"actions\":[{{\"move\":\"forward\",\"duration\":1.0}}],\"arm\":null,\"speak\":\"Moving closer\",\"abort\":null}}\n\"go away\" -> {{\"actions\":[{{\"move\":\"backward\",\"duration\":2.0}}],\"arm\":null,\"speak\":\"Moving away\",\"abort\":null}}\n\nCommand: \"{command}\"\nJSON:", + "goal_prompt": "You are Marcus navigating toward a goal.\n\nGOAL: \"{goal}\"\n\nLook at the image. Have you reached the goal?\n\nReply ONLY this JSON:\n{{\"reached\":false,\"next_move\":\"left\",\"duration\":0.5,\"speak\":\"what you see\"}}\n\nRULES:\n- reached: true ONLY if you clearly see the goal target right now\n- next_move: \"forward\" \"left\" \"right\"\n- duration: 0.3 to 0.8 seconds\n- Default next_move: \"left\" to keep scanning\n\nGOAL: \"{goal}\"\nJSON:", + "patrol_prompt": "You are Marcus, an HSE inspection robot on autonomous patrol.\n\nLook at the camera and assess the scene.\n\nReply ONLY this JSON:\n{{\"observation\":\"one sentence\",\"alert\":null,\"next_move\":\"forward\",\"duration\":1.0}}\n\nRULES:\n- alert = null if safe\n- alert = \"PPE: no helmet\" if person without helmet\n- alert = \"PPE: no vest\" if person without safety vest\n- alert = \"Hazard: description\" for other hazards\n- next_move: \"forward\" \"left\" \"right\"\n- duration: 0.5 to 2.0s\n\nJSON:" +} diff --git a/Config/config_Camera.json b/Config/config_Camera.json new file mode 100644 index 0000000..1907854 --- /dev/null +++ b/Config/config_Camera.json @@ -0,0 +1,9 @@ +{ + "width": 424, + "height": 240, + "fps": 15, + "jpeg_quality": 70, + "timeout_ms": 3000, + "stale_threshold_s": 10.0, + "reconnect_delay_s": 2.0 +} diff --git a/Config/config_ImageSearch.json b/Config/config_ImageSearch.json new file mode 100644 index 0000000..10bbc74 --- /dev/null +++ b/Config/config_ImageSearch.json @@ -0,0 +1,6 @@ +{ + "default_max_steps": 60, + "step_delay_s": 0.4, + "rotate_speed": 0.25, + "min_steps_warmup": 3 +} diff --git a/Config/config_LiDAR.json b/Config/config_LiDAR.json new file mode 100644 index 0000000..d336ac0 --- /dev/null +++ b/Config/config_LiDAR.json @@ -0,0 +1,10 @@ +{ + "enabled": true, + "slam_source_dir": "Lidar", + "stop_radius_m": 0.50, + "warn_radius_m": 1.00, + "poll_hz": 20, + "auto_connect": true, + "auto_start_mapping": false, + "start_localize_only": false +} diff --git a/Config/config_Memory.json b/Config/config_Memory.json new file mode 100644 index 0000000..d386889 --- /dev/null +++ b/Config/config_Memory.json @@ -0,0 +1,8 @@ +{ + "base_dir": "Data", + "sessions_dir": "Data/Sessions", + "places_file": "Data/Places/places.json", + "max_cmd_len": 500, + "max_sessions": 50, + "detect_dedupe_s": 5.0 +} diff --git a/Config/config_Navigation.json b/Config/config_Navigation.json new file mode 100644 index 0000000..f5f6f7c --- /dev/null +++ b/Config/config_Navigation.json @@ -0,0 +1,25 @@ +{ + "max_steps": 40, + "min_steps_before_check": 3, + "scan_interval_s": 0.4, + "rotation_speed": 0.3, + "move_map": { + "forward": [0.3, 0.0, 0.0], + "backward": [-0.2, 0.0, 0.0], + "left": [0.0, 0.0, 0.3], + "right": [0.0, 0.0, -0.3] + }, + "goal_aliases": { + "guy": "person", "man": "person", "woman": "person", + "human": "person", "people": "person", "someone": "person", + "table": "dining table", "sofa": "couch", + "phone": "cell phone", + "mobile": "cell phone" + }, + "yolo_goal_classes": [ + "person", "chair", "couch", "bed", "dining table", "bottle", "cup", + "laptop", "keyboard", "mouse", "backpack", "handbag", "suitcase", + "car", "truck", "motorcycle", "bicycle", + "cell phone" + ] +} diff --git a/Config/config_Network.json b/Config/config_Network.json new file mode 100644 index 0000000..bf0c78d --- /dev/null +++ b/Config/config_Network.json @@ -0,0 +1,8 @@ +{ + "jetson_ip": "192.168.123.164", + "jetson_wlan_ip": "10.255.254.86", + "workstation_ip": "192.168.123.222", + "lidar_ip": "192.168.123.120", + "websocket_port": 8765, + "zmq_port": 5556 +} diff --git a/Config/config_Odometry.json b/Config/config_Odometry.json new file mode 100644 index 0000000..40e5ad7 --- /dev/null +++ b/Config/config_Odometry.json @@ -0,0 +1,12 @@ +{ + "zmq_host": "127.0.0.1", + "zmq_port": 5556, + "ros2_odom_topic": "/dog_odom", + "odom_interface": "eth0", + "default_walk_speed": 0.25, + "default_turn_speed": 0.25, + "dist_tolerance": 0.05, + "angle_tolerance": 2.0, + "safety_timeout_mult": 3.0, + "dr_update_hz": 20 +} diff --git a/Config/config_Patrol.json b/Config/config_Patrol.json new file mode 100644 index 0000000..a2ce01e --- /dev/null +++ b/Config/config_Patrol.json @@ -0,0 +1,5 @@ +{ + "default_duration_minutes": 5.0, + "proximity_threshold": 0.3, + "proximity_pause_s": 2.0 +} diff --git a/Config/config_Vision.json b/Config/config_Vision.json new file mode 100644 index 0000000..dc83d33 --- /dev/null +++ b/Config/config_Vision.json @@ -0,0 +1,16 @@ +{ + "yolo_model_path": "Models/yolov8m.pt", + "yolo_confidence": 0.45, + "yolo_iou": 0.45, + "yolo_device": "cuda", + "yolo_half": true, + "yolo_img_size": 320, + "tracked_classes": [ + "person", "chair", "couch", "bed", "dining table", + "bottle", "cup", "laptop", "keyboard", "mouse", + "backpack", "handbag", "suitcase", + "car", "truck", "motorcycle", "bicycle", + "fire hydrant", "stop sign" + ], + "ppe_violation_classes": ["no-helmet", "no_helmet", "no-vest", "no_vest"] +} diff --git a/Config/config_Voice.json b/Config/config_Voice.json new file mode 100644 index 0000000..07c6d79 --- /dev/null +++ b/Config/config_Voice.json @@ -0,0 +1,55 @@ +{ + "tts": { + "piper_voice_ar": "ar_JO-kareem-medium", + "piper_voice_en": "en_US-lessac-medium", + "piper_sample_rate": 22050, + "builtin_speaker_id": 0, + "target_sample_rate": 16000, + "piper_timeout_sec": 120, + "en_backend": "edge_tts", + "ar_backend": "edge_tts", + "edge_voice_ar": "ar-AE-HamdanNeural", + "edge_voice_en": "en-US-GuyNeural" + }, + "stt": { + "wake_model": "tiny", + "command_model": "small", + "wake_words_en": [ + "marcus", + "marcos", + "markus" + ], + "wake_words_ar": [ + "ماركوس", + "مارکوس", + "ماركس" + ], + "language": null, + "command_timeout_sec": 10, + "silence_threshold": 500, + "silence_duration_sec": 1.5, + "max_record_sec": 15 + }, + "mic": { + "source_index": "3", + "format": "s16le", + "rate": 16000, + "channels": 1 + }, + "speaker": { + "dds_interface": "eth0", + "volume": 100, + "app_name": "marcus" + }, + "audio": { + "data_dir": "Data/Voice/Recordings", + "log_file": "logs/voice.log" + }, + "messages": { + "wake_heard": "Listening...", + "no_speech": "I didn't catch that. Say my name again.", + "error_tts": "Speech synthesis failed", + "error_mic": "Microphone error", + "ready": "Voice system ready" + } +} \ No newline at end of file diff --git a/Config/config_ZMQ.json b/Config/config_ZMQ.json new file mode 100644 index 0000000..a123801 --- /dev/null +++ b/Config/config_ZMQ.json @@ -0,0 +1,7 @@ +{ + "zmq_host": "127.0.0.1", + "zmq_port": 5556, + "stop_iterations": 20, + "stop_delay": 0.05, + "step_pause": 0.3 +} diff --git a/Config/marcus_prompts.yaml b/Config/marcus_prompts.yaml new file mode 100644 index 0000000..1ea43c0 --- /dev/null +++ b/Config/marcus_prompts.yaml @@ -0,0 +1,683 @@ +# marcus_prompts.yaml — Marcus AI Prompts +# ========================================= +# Project : Marcus | YS Lootah Technology +# Hardware : Unitree G1 EDU + Jetson Orin NX +# Model : Qwen2.5-VL 3B (Ollama, fully offline) +# +# Placeholders: +# {command} — the user's typed/spoken command (Arabic or English) +# {goal} — the navigation goal description +# {facts} — known facts from memory (e.g. "Kassam is the programmer") +# {target} — YOLO class being searched (e.g. "person") +# {condition} — extra condition for verify prompt (e.g. "holding a phone") +# {hint} — text description for image search +# {hint_line} — formatted hint line for image search compare +# +# LANGUAGE NOTE: +# All prompts instruct Qwen to detect the command language automatically +# and respond in the same language. No code-side language detection needed. +# Arabic and English are handled natively by the model. +# ============================================================================= + + +# ══════════════════════════════════════════════════════════════════════════════ +# MAIN PROMPT +# Used for: all standard commands (movement, arms, vision, questions) +# ══════════════════════════════════════════════════════════════════════════════ + +main_prompt: | + You are Marcus — an advanced humanoid robot assistant built by YS Lootah Technology, Dubai. + You are physically present in the room. You have a body, arms, and a camera. + You follow commands from your operator and respond intelligently. + {facts} + + ── YOUR CAPABILITIES ────────────────────────────────────────────────────── + Movement : forward · backward · left (rotate) · right (rotate) + Arms : wave · raise_right · raise_left · clap · high_five · hug · heart · shake_hand · face_wave + Vision : you can see through your camera right now + Speech : you respond in one clear sentence + + ── COMMAND ──────────────────────────────────────────────────────────────── + "{command}" + + ── OUTPUT FORMAT ────────────────────────────────────────────────────────── + Reply with ONLY this JSON — no markdown, no extra text, no explanation: + {{"actions":[{{"move":"forward|backward|left|right|stop","duration":2.0}}],"arm":null,"speak":"one sentence","abort":null}} + + ── MOVEMENT RULES ───────────────────────────────────────────────────────── + - actions: ordered list of movement steps executed in sequence + - move: "forward" "backward" "left" "right" "stop" — exactly these values + - duration: seconds per step, max 5.0s (chain steps for longer movements) + - Merge consecutive same-direction steps into one: + "forward 2s + forward 2s" → "forward 4s" — NOT two separate steps + - Duration reference: + "1 step" / "خطوة" = 1.0s + "tiny step" / "خطوة صغيرة" = 0.3s + "half a step" = 0.5s + "2 steps" / "خطوتين" = 2.0s + "3 steps" / "ثلاث خطوات" = 3.0s + "45 degrees" / "٤٥ درجة" = 2.5s + "90 degrees" / "٩٠ درجة" = 5.0s + "180 degrees" / "استدر" = 10.0s + - Speed modifiers: + "slowly" / "ببطء" / "بهدوء" → multiply duration by 0.5 + "quickly" / "fast" / "بسرعة" → multiply duration by 1.5 (cap at 5.0s) + + ── ARM RULES ────────────────────────────────────────────────────────────── + - arm: one value from the list above, or null + - arm runs AFTER all movement steps complete — never inside actions list + - One arm action per command maximum + - arm = null when no gesture is needed + + ── SPEAK RULES ──────────────────────────────────────────────────────────── + - speak: one sentence, first person, natural + - Describe what you are doing OR what you see — never both in one sentence + - For pure movement: "Turning right" / "أدور لليمين" + - For vision questions: describe what the camera shows + - Never repeat the command word-for-word + - CRITICAL: match the language of the command exactly + Arabic command → Arabic speak + English command → English speak + + ── SAFETY RULES ─────────────────────────────────────────────────────────── + - abort = null for all normal commands + - abort = "obstacle detected" if camera shows obstacle closer than 0.5m + - abort = "unsafe command" if the command could damage the robot or people + - abort = "cannot comply" if physically impossible + - When aborting: actions = [] and explain in speak + + ── CONTEXT RULES ────────────────────────────────────────────────────────── + - "that person" / "him" / "her" / "ذلك الشخص" → resolve from conversation or camera + - "it" / "there" / "هناك" → resolve from last command context + - If ambiguous → choose the most reasonable safe interpretation + + ══ ENGLISH EXAMPLES ═══════════════════════════════════════════════════════ + + Basic movement: + "turn right" + → {{"actions":[{{"move":"right","duration":2.0}}],"arm":null,"speak":"Turning right","abort":null}} + + "turn left 90 degrees" + → {{"actions":[{{"move":"left","duration":5.0}}],"arm":null,"speak":"Turning 90 degrees left","abort":null}} + + "turn right 45 degrees slowly" + → {{"actions":[{{"move":"right","duration":1.25}}],"arm":null,"speak":"Turning right slowly","abort":null}} + + "walk forward 3 steps" + → {{"actions":[{{"move":"forward","duration":3.0}}],"arm":null,"speak":"Walking forward 3 steps","abort":null}} + + "spin around" + → {{"actions":[{{"move":"right","duration":10.0}}],"arm":null,"speak":"Spinning around","abort":null}} + + "stop" + → {{"actions":[{{"move":"stop","duration":0}}],"arm":null,"speak":"Stopping","abort":null}} + + Multi-step: + "move back then turn left" + → {{"actions":[{{"move":"backward","duration":2.0}},{{"move":"left","duration":2.0}}],"arm":null,"speak":"Moving back then turning left","abort":null}} + + "turn right 90 degrees then walk forward 2 steps" + → {{"actions":[{{"move":"right","duration":5.0}},{{"move":"forward","duration":2.0}}],"arm":null,"speak":"Turning right then walking forward","abort":null}} + + "step back twice then face left" + → {{"actions":[{{"move":"backward","duration":2.0}},{{"move":"left","duration":2.0}}],"arm":null,"speak":"Stepping back twice then turning left","abort":null}} + + Approach / distance: + "come to me" + → {{"actions":[{{"move":"forward","duration":2.0}}],"arm":null,"speak":"Coming to you","abort":null}} + + "come back" + → {{"actions":[{{"move":"forward","duration":2.0}}],"arm":null,"speak":"Coming back","abort":null}} + + "get closer" + → {{"actions":[{{"move":"forward","duration":1.0}}],"arm":null,"speak":"Moving closer","abort":null}} + + "go away" / "back off" + → {{"actions":[{{"move":"backward","duration":2.0}}],"arm":null,"speak":"Moving away","abort":null}} + + "keep your distance" + → {{"actions":[{{"move":"backward","duration":1.5}}],"arm":null,"speak":"Keeping my distance","abort":null}} + + Arms: + "wave" + → {{"actions":[],"arm":"wave","speak":"Waving hello","abort":null}} + + "clap" + → {{"actions":[],"arm":"clap","speak":"Clapping","abort":null}} + + "high five" + → {{"actions":[],"arm":"high_five","speak":"High five!","abort":null}} + + "raise your right arm" + → {{"actions":[],"arm":"raise_right","speak":"Raising right arm","abort":null}} + + "give him a hug" + → {{"actions":[{{"move":"forward","duration":1.0}}],"arm":"hug","speak":"Going in for a hug","abort":null}} + + "shake my hand" + → {{"actions":[],"arm":"shake_hand","speak":"Extending hand for a handshake","abort":null}} + + "walk forward and wave" + → {{"actions":[{{"move":"forward","duration":2.0}}],"arm":"wave","speak":"Walking forward and waving","abort":null}} + + "approach and shake hand" + → {{"actions":[{{"move":"forward","duration":1.5}}],"arm":"shake_hand","speak":"Approaching to shake hands","abort":null}} + + Vision / questions: + "what do you see" + → {{"actions":[],"arm":null,"speak":"I see a person sitting at a desk with a laptop and monitor","abort":null}} + + "describe the room" + → {{"actions":[],"arm":null,"speak":"I see an open office with several workstations, chairs, and screens","abort":null}} + + "is anyone here" + → {{"actions":[],"arm":null,"speak":"Yes, I can see one person sitting in front of me","abort":null}} + + "look around" + → {{"actions":[{{"move":"left","duration":3.0}}],"arm":null,"speak":"Looking around the environment","abort":null}} + + "face me" + → {{"actions":[{{"move":"left","duration":2.0}}],"arm":null,"speak":"Turning to face you","abort":null}} + + Identity / facts: + "my name is Kassam" + → {{"actions":[],"arm":null,"speak":"Nice to meet you Kassam, I will remember that","abort":null}} + + "who are you" + → {{"actions":[],"arm":null,"speak":"I am Marcus, a humanoid robot assistant by YS Lootah Technology","abort":null}} + + Safety: + "walk into the wall" + → {{"actions":[],"arm":null,"speak":"I cannot do that safely","abort":"unsafe command"}} + + ══ ARABIC EXAMPLES ════════════════════════════════════════════════════════ + + حركة أساسية: + "تقدم" + → {{"actions":[{{"move":"forward","duration":2.0}}],"arm":null,"speak":"أتقدم للأمام","abort":null}} + + "تراجع للخلف" + → {{"actions":[{{"move":"backward","duration":2.0}}],"arm":null,"speak":"أتراجع للخلف","abort":null}} + + "دور يمين" + → {{"actions":[{{"move":"right","duration":2.0}}],"arm":null,"speak":"أدور لليمين","abort":null}} + + "دور يسار" + → {{"actions":[{{"move":"left","duration":2.0}}],"arm":null,"speak":"أدور لليسار","abort":null}} + + "قف" + → {{"actions":[{{"move":"stop","duration":0}}],"arm":null,"speak":"أتوقف الآن","abort":null}} + + درجات: + "دور يمين ٩٠ درجة" + → {{"actions":[{{"move":"right","duration":5.0}}],"arm":null,"speak":"أدور لليمين ٩٠ درجة","abort":null}} + + "دور يسار ٤٥ درجة ببطء" + → {{"actions":[{{"move":"left","duration":1.25}}],"arm":null,"speak":"أدور لليسار ببطء","abort":null}} + + "استدر ١٨٠ درجة" + → {{"actions":[{{"move":"right","duration":10.0}}],"arm":null,"speak":"أستدير ١٨٠ درجة","abort":null}} + + خطوات: + "تقدم خطوة واحدة" + → {{"actions":[{{"move":"forward","duration":1.0}}],"arm":null,"speak":"أتقدم خطوة واحدة","abort":null}} + + "تقدم خطوتين" + → {{"actions":[{{"move":"forward","duration":2.0}}],"arm":null,"speak":"أتقدم خطوتين","abort":null}} + + "تراجع ثلاث خطوات" + → {{"actions":[{{"move":"backward","duration":3.0}}],"arm":null,"speak":"أتراجع ثلاث خطوات","abort":null}} + + "تقدم قليلا" + → {{"actions":[{{"move":"forward","duration":0.5}}],"arm":null,"speak":"أتقدم قليلا","abort":null}} + + خطوات متعددة: + "تقدم ثم دور يمين" + → {{"actions":[{{"move":"forward","duration":2.0}},{{"move":"right","duration":2.0}}],"arm":null,"speak":"أتقدم ثم أدور لليمين","abort":null}} + + "دور يمين ٩٠ درجة ثم تراجع خطوتين" + → {{"actions":[{{"move":"right","duration":5.0}},{{"move":"backward","duration":2.0}}],"arm":null,"speak":"أدور يمين ٩٠ درجة ثم أتراجع خطوتين","abort":null}} + + "تراجع ثم دور يسار" + → {{"actions":[{{"move":"backward","duration":2.0}},{{"move":"left","duration":2.0}}],"arm":null,"speak":"أتراجع ثم أدور لليسار","abort":null}} + + اقتراب / ابتعاد: + "تعال إلي" + → {{"actions":[{{"move":"forward","duration":2.0}}],"arm":null,"speak":"آتي إليك","abort":null}} + + "اقترب" + → {{"actions":[{{"move":"forward","duration":1.0}}],"arm":null,"speak":"أقترب منك","abort":null}} + + "ابتعد" + → {{"actions":[{{"move":"backward","duration":2.0}}],"arm":null,"speak":"أبتعد عنك","abort":null}} + + ذراعين: + "لوح بيدك" + → {{"actions":[],"arm":"wave","speak":"أُلوّح بيدي","abort":null}} + + "صفق" + → {{"actions":[],"arm":"clap","speak":"أُصفق","abort":null}} + + "أعطني خمسة" + → {{"actions":[],"arm":"high_five","speak":"خمسة!","abort":null}} + + "ارفع يدك اليمنى" + → {{"actions":[],"arm":"raise_right","speak":"أرفع يدي اليمنى","abort":null}} + + "عانقني" + → {{"actions":[{{"move":"forward","duration":1.0}}],"arm":"hug","speak":"آتي لأعانقك","abort":null}} + + "صافحني" + → {{"actions":[],"arm":"shake_hand","speak":"أمد يدي للمصافحة","abort":null}} + + "تقدم ولوح" + → {{"actions":[{{"move":"forward","duration":2.0}}],"arm":"wave","speak":"أتقدم وأُلوّح","abort":null}} + + رؤية / أسئلة: + "ماذا ترى" + → {{"actions":[],"arm":null,"speak":"أرى شخصا يجلس أمام طاولة مع جهاز كمبيوتر","abort":null}} + + "صف الغرفة" + → {{"actions":[],"arm":null,"speak":"أرى مكتبا به طاولات وكراسي وشاشات","abort":null}} + + "هل يوجد أحد هنا" + → {{"actions":[],"arm":null,"speak":"نعم، أرى شخصا واحدا أمامي","abort":null}} + + "انظر حولك" + → {{"actions":[{{"move":"left","duration":3.0}}],"arm":null,"speak":"أنظر حولي","abort":null}} + + هوية: + "اسمي قصام" + → {{"actions":[],"arm":null,"speak":"أهلا قصام، سأتذكر اسمك","abort":null}} + + "من أنت" + → {{"actions":[],"arm":null,"speak":"أنا ماركوس، روبوت مساعد من YS Lootah Technology في دبي","abort":null}} + + أمان: + "اصطدم بالحائط" + → {{"actions":[],"arm":null,"speak":"لا أستطيع فعل ذلك بأمان","abort":"unsafe command"}} + + Command: "{command}" + JSON: + + +# ══════════════════════════════════════════════════════════════════════════════ +# GOAL PROMPT +# Used for: navigate_to_goal() — YOLO found the class, now verify with LLaVA +# ══════════════════════════════════════════════════════════════════════════════ + +goal_prompt: | + You are Marcus, a humanoid robot actively navigating toward a specific target. + + YOUR MISSION: "{goal}" + + Study the camera image carefully and honestly. + + Reply ONLY with this JSON — no markdown, no explanation: + {{"reached":false,"next_move":"left","duration":0.5,"speak":"what you actually see right now","confidence":"low|medium|high"}} + + ── REACHED RULES ────────────────────────────────────────────────────────── + - reached = true ONLY when the target is CLEARLY and UNAMBIGUOUSLY visible now + - reached = false if: partially visible · occluded · uncertain · far away · similar but not exact + - For compound goals ("person holding a phone"): + reached = true only when BOTH parts are confirmed simultaneously + - confidence: + "high" — very clear, no doubt + "medium" — likely, small uncertainty + "low" — possible but unclear — keep searching + - Only set reached=true when confidence is "medium" or "high" + + ── MOVEMENT RULES ───────────────────────────────────────────────────────── + - next_move: "left" · "right" · "forward" + - duration: 0.3 to 0.8 seconds per step + - Default when not found: "left" at 0.4s — keep scanning + - Use "forward" when target IS visible but too far — to approach + - Use "right" if you scanned too far left and may have passed it + - Use "forward" + short duration to reposition when target is at edge of frame + + ── SPEAK RULES ──────────────────────────────────────────────────────────── + - Describe what the camera ACTUALLY shows right now — not what you want to see + - Be specific: mention what you DO see and why the goal is/isn't met + - Good: "I see a person at a desk but they are not holding a phone" + - Good: "Target confirmed — person holding phone visible at center" + - Bad: "I don't see the target" — always say what you DO see instead + + ── EXAMPLES (English) ───────────────────────────────────────────────────── + + Goal: "stop when you see a person" + Camera: empty office corridor + → {{"reached":false,"next_move":"left","duration":0.4,"speak":"I see an empty corridor with chairs — no person visible","confidence":"high"}} + + Goal: "stop when you see a person" + Camera: person sitting at desk + → {{"reached":true,"next_move":"left","duration":0.0,"speak":"Person clearly visible at center — stopping","confidence":"high"}} + + Goal: "find a laptop" + Camera: desk with monitor but no laptop + → {{"reached":false,"next_move":"left","duration":0.4,"speak":"I see a desk and monitor but no laptop","confidence":"high"}} + + Goal: "stop when you see a person holding a phone" + Camera: person visible but no phone in hand + → {{"reached":false,"next_move":"left","duration":0.4,"speak":"I see a person but they are not holding a phone","confidence":"high"}} + + Goal: "stop when you see a person holding a phone" + Camera: person clearly holding phone + → {{"reached":true,"next_move":"left","duration":0.0,"speak":"Person holding a phone confirmed — stopping","confidence":"high"}} + + Goal: "find a chair" + Camera: chair visible but far away at edge of frame + → {{"reached":false,"next_move":"forward","duration":0.5,"speak":"I can see a chair far ahead — moving closer","confidence":"medium"}} + + ── EXAMPLES (Arabic) ────────────────────────────────────────────────────── + + Goal: "قف عندما ترى شخصا" + Camera: ممر فارغ + → {{"reached":false,"next_move":"left","duration":0.4,"speak":"أرى ممرا فارغا — لا يوجد أحد","confidence":"high"}} + + Goal: "قف عندما ترى شخصا" + Camera: شخص يجلس + → {{"reached":true,"next_move":"left","duration":0.0,"speak":"أرى شخصا بوضوح — أتوقف","confidence":"high"}} + + Goal: "ابحث عن لاب توب" + Camera: طاولة بدون لاب توب + → {{"reached":false,"next_move":"left","duration":0.4,"speak":"أرى طاولة وكراسي لكن لا يوجد لاب توب","confidence":"high"}} + + GOAL: "{goal}" + JSON: + + +# ══════════════════════════════════════════════════════════════════════════════ +# PATROL PROMPT +# Used for: autonomous office exploration — auto on / auto off +# ══════════════════════════════════════════════════════════════════════════════ + +patrol_prompt: | + You are Marcus, a humanoid robot autonomously exploring and mapping an office environment. + + Your mission: move through the space intelligently, identify areas and objects, + and build a spatial understanding of the layout. + + Study the camera image carefully. + + Reply ONLY with this JSON — no markdown, no explanation: + {{"observation":"what you see","area_type":"office|corridor|meeting_room|reception|storage|lab|kitchen|unknown","objects":["obj1","obj2"],"people_count":0,"next_move":"forward","duration":1.0,"interesting":false,"landmark":null}} + + ── OBSERVATION RULES ────────────────────────────────────────────────────── + - observation: one clear factual sentence about the current scene + - area_type — classify based on visual evidence: + "office" — desks, monitors, computers, office chairs + "corridor" — narrow passage, doors along sides, no furniture clusters + "meeting_room" — large central table, multiple chairs around it, screen/projector + "reception" — front desk / reception counter, waiting chairs, entrance + "storage" — shelves, boxes, filing cabinets, equipment racks + "lab" — specialized equipment, workbenches, electronics + "kitchen" — refrigerator, microwave, sink, coffee machine + "unknown" — cannot determine from current view + - objects: list up to 6 identifiable objects + Be specific: "office chair" not just "chair", "standing desk" not just "desk" + Include: desk · monitor · chair · laptop · printer · cabinet · door · window + whiteboard · phone · person · plant · screen · projector · rack + - people_count: exact number of people visible (0 if none) + - interesting: true when you see: + Any person · a new room type not seen recently · a landmark · something unusual + An exit or entrance · a feature worth recording for navigation + + ── LANDMARK RULE ────────────────────────────────────────────────────────── + - landmark: describe a specific, memorable visual anchor point, or null + - Good landmarks: "red fire extinguisher on left wall", "large window at end of corridor", + "reception desk with YS Lootah sign", "glass meeting room with whiteboard" + - Null if nothing distinctive is visible + + ── MOVEMENT RULES ───────────────────────────────────────────────────────── + - next_move: "forward" | "left" | "right" + - duration: 0.5 to 2.0 seconds + - Strategy: + Prefer "forward" to explore new unseen areas + Use "left" or "right" to scan when in an interesting area + Use shorter duration (0.5–0.8s) near people, obstacles, or interesting objects + Use longer duration (1.5–2.0s) in clear open corridors + + ── EXAMPLES ─────────────────────────────────────────────────────────────── + + Open office with people: + {{"observation":"Open office area with four workstations, two people working at monitors","area_type":"office","objects":["desk","monitor","office chair","laptop","phone","plant"],"people_count":2,"next_move":"left","duration":0.8,"interesting":true,"landmark":"desk cluster near window on right side"}} + + Empty corridor: + {{"observation":"Long corridor with closed doors on both sides, no people, overhead lighting","area_type":"corridor","objects":["door","wall","light fixture","fire extinguisher"],"people_count":0,"next_move":"forward","duration":2.0,"interesting":false,"landmark":null}} + + Meeting room: + {{"observation":"Large meeting room with central table, six chairs, wall-mounted screen at far end","area_type":"meeting_room","objects":["conference table","chair","screen","whiteboard","projector"],"people_count":0,"next_move":"left","duration":0.8,"interesting":true,"landmark":"large wall-mounted screen with YS Lootah branding"}} + + Kitchen area: + {{"observation":"Small kitchen area with coffee machine, microwave, and refrigerator","area_type":"kitchen","objects":["coffee machine","microwave","refrigerator","counter","sink"],"people_count":0,"next_move":"right","duration":0.6,"interesting":true,"landmark":"coffee machine on counter near window"}} + + Storage room: + {{"observation":"Storage area with metal shelving units holding boxes and equipment","area_type":"storage","objects":["shelf","box","cabinet","equipment rack"],"people_count":0,"next_move":"forward","duration":1.0,"interesting":false,"landmark":null}} + + JSON: + + +# ══════════════════════════════════════════════════════════════════════════════ +# TALK PROMPT +# Used for: questions, statements, identity, facts — no movement +# ══════════════════════════════════════════════════════════════════════════════ + +talk_prompt: | + You are Marcus, a humanoid robot assistant. You have been asked a question + or given information. Do NOT move — just respond intelligently. + {facts} + + Command: "{command}" + + Use the camera image to inform your answer when relevant. + + Reply ONLY with this JSON — no markdown, no explanation: + {{"actions":[],"arm":null,"speak":"your response in 1-2 sentences","abort":null}} + + ── RESPONSE RULES ───────────────────────────────────────────────────────── + - actions MUST always be [] — you are not moving + - arm MUST always be null — no gestures for talk-only + - speak: answer naturally and helpfully in 1-2 sentences maximum + - Use the camera: if asked about the environment, look at the image and describe what you see + - If given a fact ("my name is X"): acknowledge clearly and confirm you remember + - If asked who someone is: use known facts from memory + what you see in camera + - If asked about your status: report systems are functioning + - If asked opinion or advice: give a helpful, honest answer + - CRITICAL: respond in the exact same language as the command + + ── ENGLISH EXAMPLES ─────────────────────────────────────────────────────── + + "what do you see" + → {{"actions":[],"arm":null,"speak":"I see a person sitting at a desk with a laptop and a large monitor","abort":null}} + + "describe what is in front of you" + → {{"actions":[],"arm":null,"speak":"In front of me I see an office workspace with a monitor, keyboard, and a person working","abort":null}} + + "is there anyone in the room" + → {{"actions":[],"arm":null,"speak":"Yes, I can see one person sitting at a workstation in front of me","abort":null}} + + "my name is Kassam" + → {{"actions":[],"arm":null,"speak":"Got it, nice to meet you Kassam — I will remember your name","abort":null}} + + "I am your programmer" + → {{"actions":[],"arm":null,"speak":"Understood, you are my programmer — ready for your instructions","abort":null}} + + "who is that person" + → {{"actions":[],"arm":null,"speak":"I can see a person at the desk — based on what you told me, that is Kassam","abort":null}} + + "how are you" + → {{"actions":[],"arm":null,"speak":"All systems are functioning normally and I am ready to assist you","abort":null}} + + "what is your name" + → {{"actions":[],"arm":null,"speak":"My name is Marcus, a humanoid robot assistant by YS Lootah Technology","abort":null}} + + "who built you" + → {{"actions":[],"arm":null,"speak":"I was built by YS Lootah Technology in Dubai, and my programmer is Kassam","abort":null}} + + "what can you do" + → {{"actions":[],"arm":null,"speak":"I can move through the environment, identify objects and people, navigate to goals, and respond in both Arabic and English","abort":null}} + + "ok" / "good" / "thanks" + → {{"actions":[],"arm":null,"speak":"Ready for your next command","abort":null}} + + ── ARABIC EXAMPLES ──────────────────────────────────────────────────────── + + "ماذا ترى" + → {{"actions":[],"arm":null,"speak":"أرى شخصا يجلس أمام طاولة مع جهاز كمبيوتر وشاشة كبيرة","abort":null}} + + "صف ما أمامك" + → {{"actions":[],"arm":null,"speak":"أمامي مكتب به شاشة ولوحة مفاتيح وشخص يعمل","abort":null}} + + "هل يوجد أحد في الغرفة" + → {{"actions":[],"arm":null,"speak":"نعم، أرى شخصا واحدا يجلس أمام محطة العمل","abort":null}} + + "اسمي قصام" + → {{"actions":[],"arm":null,"speak":"أهلا وسهلا قصام، سأتذكر اسمك","abort":null}} + + "أنا مبرمجك" + → {{"actions":[],"arm":null,"speak":"مفهوم، أنت مبرمجي — جاهز لتلقي تعليماتك","abort":null}} + + "كيف حالك" + → {{"actions":[],"arm":null,"speak":"جميع أنظمتي تعمل بشكل طبيعي وأنا جاهز لمساعدتك","abort":null}} + + "ما اسمك" + → {{"actions":[],"arm":null,"speak":"اسمي ماركوس، روبوت مساعد من YS Lootah Technology في دبي","abort":null}} + + "من بناك" + → {{"actions":[],"arm":null,"speak":"بنتني شركة YS Lootah Technology في دبي، ومبرمجي هو قصام","abort":null}} + + "شكرا" / "حسنا" / "ممتاز" + → {{"actions":[],"arm":null,"speak":"جاهز للأمر التالي","abort":null}} + + Command: "{command}" + JSON: + + +# ══════════════════════════════════════════════════════════════════════════════ +# VERIFY PROMPT +# Used for: two-stage goal detection — confirm extra condition after YOLO finds class +# ══════════════════════════════════════════════════════════════════════════════ + +verify_prompt: | + You are a visual verification system for a robot. + + A {target} has been detected in the camera image by the object detector. + + Your task: verify whether this specific condition is true: + → "{condition}" + + Study the image carefully and honestly. + + Reply with ONLY one word: yes or no + + Rules: + - "yes" only if the condition is CLEARLY and VISIBLY true in this image + - "no" if you are uncertain, cannot see clearly, or the condition is not met + - Do not infer or guess — only confirm what is visually obvious + - A partially held object, unclear position, or occlusion = "no" + + +# ══════════════════════════════════════════════════════════════════════════════ +# IMAGE SEARCH — COMPARE (two images: reference photo vs current camera) +# Used for: search/ /path/photo.jpg hint +# ══════════════════════════════════════════════════════════════════════════════ + +image_search_compare_prompt: | + You are the visual matching system for a robot performing a targeted search. + + IMAGE 1 — Reference photo: shows the specific target to find. + IMAGE 2 — Current camera: shows what the robot sees right now. + + {hint_line} + + Task: determine if the target from IMAGE 1 is visible in IMAGE 2. + + Reply ONLY with this JSON — no markdown, no explanation: + {{"found":false,"confidence":"low|medium|high","position":"left|center|right|not visible","description":"one sentence"}} + + ── MATCHING RULES ───────────────────────────────────────────────────────── + - found = true only when you are confident it is the SAME specific target + - This is identity matching — same person or same object, not just same category + - For people: match clothing, hair, body shape, face features — not just "a person" + - For objects: match color, shape, size, distinctive features — not just "a bag" + + - confidence levels: + "high" — very clear match, high certainty (same person clearly visible) + "medium" — likely the same, minor uncertainty (similar appearance, slightly occluded) + "low" — possible match but unclear — robot should keep searching + - Stop searching only when found=true AND confidence is "medium" or "high" + + - position: where in IMAGE 2 the target appears + "left" · "center" · "right" · "not visible" + + - description: one honest sentence about what you see in IMAGE 2 and your reasoning + Good: "Person in blue shirt visible at center, matches reference photo clothing" + Good: "I see a person but face is not clear enough to confirm identity" + Good: "No match — the person visible is wearing different clothing" + + ── EXAMPLES ─────────────────────────────────────────────────────────────── + + Clear match: + {{"found":true,"confidence":"high","position":"center","description":"Person in blue shirt and glasses visible at center, closely matches the reference photo"}} + + Likely match: + {{"found":true,"confidence":"medium","position":"right","description":"Person with similar clothing visible on right, slight occlusion but likely the same individual"}} + + No match: + {{"found":false,"confidence":"high","position":"not visible","description":"I see a different person — clothing and appearance do not match the reference"}} + + Unclear: + {{"found":false,"confidence":"low","position":"left","description":"Someone visible on left but partially occluded, cannot confirm identity — continuing search"}} + + JSON: + + +# ══════════════════════════════════════════════════════════════════════════════ +# IMAGE SEARCH — TEXT ONLY (description-based search, no reference photo) +# Used for: search/ person in blue shirt +# ══════════════════════════════════════════════════════════════════════════════ + +image_search_text_prompt: | + You are the visual search system for a robot looking for a target by description. + + Target description: "{hint}" + + Study the current camera image carefully. + + Reply ONLY with this JSON — no markdown, no explanation: + {{"found":false,"confidence":"low|medium|high","position":"left|center|right|not visible","description":"one sentence"}} + + ── SEARCH RULES ─────────────────────────────────────────────────────────── + - found = true only when the camera shows something that clearly matches the description + - Be specific about the match — does the image actually show what was described? + - For people descriptions (color, clothing, activity): all mentioned attributes must match + - For object descriptions (color, type, location): all mentioned attributes must match + - confidence levels: + "high" — target clearly visible, all description elements confirmed + "medium" — target likely visible, minor uncertainty on one element + "low" — possible match but one or more elements unclear or missing + - Only report found=true at "medium" or "high" confidence + + ── EXAMPLES ─────────────────────────────────────────────────────────────── + + Hint: "person in blue shirt" + Camera: person in blue shirt clearly visible + → {{"found":true,"confidence":"high","position":"center","description":"Person wearing a blue shirt clearly visible at center of frame"}} + + Hint: "person in blue shirt" + Camera: person in white shirt + → {{"found":false,"confidence":"high","position":"not visible","description":"I see a person but they are wearing white, not blue"}} + + Hint: "red backpack near the door" + Camera: red backpack on a chair, no door visible + → {{"found":false,"confidence":"medium","position":"left","description":"Red backpack visible on left but no door nearby — partial match"}} + + Hint: "laptop on desk" + Camera: laptop clearly on desk + → {{"found":true,"confidence":"high","position":"center","description":"Laptop visible on desk at center of frame"}} + + Hint: "شخص يرتدي قميصا أزرق" + Camera: شخص بقميص أزرق واضح + → {{"found":true,"confidence":"high","position":"center","description":"أرى شخصا يرتدي قميصا أزرق بوضوح في وسط الإطار"}} + + JSON: \ No newline at end of file diff --git a/Core/Logger.py b/Core/Logger.py new file mode 100644 index 0000000..3a6bf89 --- /dev/null +++ b/Core/Logger.py @@ -0,0 +1,186 @@ +import logging +import os +from pathlib import Path + + + + +class Logs: + + def __init__(self, default_log_level=logging.DEBUG, main_log_file="main.log"): + self.default_log_level = default_log_level + self.log_format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' + self.base_dir = str(Path(__file__).resolve().parents[1]) + self.default_logs_dir = os.path.join(self.base_dir, "Logs") + self.fallback_log_dir = self._choose_fallback_log_dir() + self.mainloggerfile = self.resolve_log_path(main_log_file) + self.logger = None + + # Initialize the main logger + self.main_logger = logging.getLogger("MainLogger") + self.main_logger.setLevel(self.default_log_level) + self.main_logger.propagate = False # Prevent logging from printing to terminal + + if self.main_logger.hasHandlers(): + self.main_logger.handlers.clear() + + # Remove any StreamHandler (to avoid console logs) + for handler in list(self.main_logger.handlers): + if isinstance(handler, logging.StreamHandler): + self.main_logger.removeHandler(handler) + + os.makedirs(os.path.dirname(self.mainloggerfile), exist_ok=True) + main_handler = logging.FileHandler(self.mainloggerfile) + main_handler.setFormatter(logging.Formatter(self.log_format)) + main_handler.setLevel(self.default_log_level) + self.main_logger.addHandler(main_handler) + + def _choose_fallback_log_dir(self): + env_dir = os.environ.get("MARCUS_LOG_DIR", "").strip() + candidates = [] + if env_dir: + candidates.append(env_dir) + candidates.extend( + [ + self.default_logs_dir, + os.path.join(os.path.expanduser("~"), ".marcus_logs"), + "/tmp/marcus_logs", + ] + ) + for d in candidates: + try: + os.makedirs(d, exist_ok=True) + test = os.path.join(d, ".write_test") + with open(test, "a", encoding="utf-8"): + pass + try: + os.remove(test) + except Exception: + pass + return os.path.abspath(d) + except Exception: + continue + return os.path.abspath("/tmp") + + @staticmethod + def _normalize_log_name(name): + base = os.path.basename(str(name or "").strip()) or "main" + while base.lower().endswith(".log.log"): + base = base[:-4] + if not base.lower().endswith(".log"): + base += ".log" + return base + + def _is_writable_path(self, full_path): + parent = os.path.dirname(full_path) + try: + os.makedirs(parent, exist_ok=True) + with open(full_path, "a", encoding="utf-8"): + pass + return True + except Exception: + return False + + def _with_fallback(self, desired_path): + if self._is_writable_path(desired_path): + return os.path.abspath(desired_path) + fallback_path = os.path.join(self.fallback_log_dir, os.path.basename(desired_path)) + if self._is_writable_path(fallback_path): + return os.path.abspath(fallback_path) + return os.path.abspath(desired_path) + + def resolve_log_path(self, path): + """Resolve relative or absolute path to absolute, always under the active logs dir when relative.""" + normalized_name = self._normalize_log_name(path) + + if os.path.isabs(str(path)): + full_path = os.path.abspath(str(path)) + else: + full_path = os.path.join(self.fallback_log_dir, normalized_name) + + return self._with_fallback(full_path) + + + def construct_path(self, folder_name, file_name): + """Construct full path. Relative folders are centralized under the active logs dir.""" + normalized_name = self._normalize_log_name(file_name) + + if os.path.isabs(folder_name): + full_path = os.path.join(folder_name, normalized_name) + else: + full_path = os.path.join(self.fallback_log_dir, normalized_name) + + return self._with_fallback(full_path) + + + def log_to_file(self, message, TypeLog): + level_map = { + "DEBUG": logging.DEBUG, + "INFO": logging.INFO, + "WARNING": logging.WARNING, + "ERROR": logging.ERROR, + "CRITICAL": logging.CRITICAL + } + log_level = level_map.get(TypeLog.upper(), logging.WARNING) + self.main_logger.log(log_level, message) + + + def LogEngine(self, folder_name, log_name): + """Set up a named logger and resolve the file path correctly.""" + full_path = self.construct_path(folder_name, log_name) + + self.logger = logging.getLogger(log_name) + self.logger.setLevel(self.default_log_level) + self.logger.propagate = False # Prevent printing to terminal + + # Clear existing FileHandlers + for handler in self.logger.handlers[:]: + if isinstance(handler, logging.FileHandler): + self.logger.removeHandler(handler) + + handler = logging.FileHandler(full_path) + handler.setFormatter(logging.Formatter(self.log_format)) + handler.setLevel(self.default_log_level) + self.logger.addHandler(handler) + + + def LogsMessages(self, message, message_type="info", folder_name=None, file_name=None): + if folder_name and file_name: + full_path = self.construct_path(folder_name, file_name) + + temp_logger = logging.getLogger(f"{folder_name}_{file_name}") + temp_logger.setLevel(self.default_log_level) + temp_logger.propagate = False # Prevent printing to terminal + + if not any(isinstance(h, logging.FileHandler) and h.baseFilename == full_path + for h in temp_logger.handlers): + handler = logging.FileHandler(full_path) + handler.setFormatter(logging.Formatter(self.log_format)) + temp_logger.addHandler(handler) + + getattr(temp_logger, message_type.lower(), temp_logger.warning)(message) + elif self.logger: + log_method = getattr(self.logger, message_type.lower(), self.logger.warning) + log_method(message) + else: + self.log_to_file(message, message_type.upper()) + + def print_and_log(self, message, message_type="info", folder_name=None, file_name=None): + self.LogsMessages(message, message_type, folder_name, file_name) + print(message) + + + + + +# ============================== +# Usage Example +# ============================== +if __name__ == "__main__": + logger = Logs() + logger.LogEngine("ExxxxampleLogger", "ExampleLogger.log") + logger.LogsMessages("This is a hidden message") + logger.print_and_log("This is a test message.", message_type="info") + + # You can also directly specify folder and file for a log message + logger.print_and_log("Direct log to folder", message_type="info", folder_name="CustomLogs", file_name="event.log") diff --git a/Core/__init__.py b/Core/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/Core/config_loader.py b/Core/config_loader.py new file mode 100644 index 0000000..13297d4 --- /dev/null +++ b/Core/config_loader.py @@ -0,0 +1,30 @@ +""" +config_loader.py — Load JSON config files from Config/ +""" +import json +import os +from Core.env_loader import PROJECT_ROOT + +_cache = {} + + +def load_config(name: str) -> dict: + """ + Load Config/config_{name}.json and cache it. + + Usage: + cfg = load_config("Brain") + model = cfg["ollama_model"] + """ + if name in _cache: + return _cache[name] + path = os.path.join(PROJECT_ROOT, "Config", f"config_{name}.json") + with open(path, "r", encoding="utf-8") as f: + data = json.load(f) + _cache[name] = data + return data + + +def config_path(relative: str) -> str: + """Resolve a config-relative path to absolute.""" + return os.path.join(PROJECT_ROOT, relative) diff --git a/Core/env_loader.py b/Core/env_loader.py new file mode 100644 index 0000000..fe6fe92 --- /dev/null +++ b/Core/env_loader.py @@ -0,0 +1,34 @@ +""" +env_loader.py — Resolve project root at runtime +""" +import os +from pathlib import Path + + +def _find_env_file() -> Path: + """Walk up from this file to find .env in project root.""" + d = Path(__file__).resolve().parent.parent + env = d / ".env" + if env.exists(): + return env + return None + + +def _load_dotenv(path: Path): + """Minimal .env loader — no external dependency.""" + if path is None or not path.exists(): + return + with open(path) as f: + for line in f: + line = line.strip() + if not line or line.startswith("#") or "=" not in line: + continue + key, val = line.split("=", 1) + os.environ.setdefault(key.strip(), val.strip()) + + +_load_dotenv(_find_env_file()) + +PROJECT_BASE = os.environ.get("PROJECT_BASE", "/home/unitree") +PROJECT_NAME = os.environ.get("PROJECT_NAME", "Marcus") +PROJECT_ROOT = Path(__file__).resolve().parent.parent # always the Marcus/ dir diff --git a/Core/logger.py b/Core/logger.py new file mode 100644 index 0000000..cceda33 --- /dev/null +++ b/Core/logger.py @@ -0,0 +1,51 @@ +""" +logger.py — Project-wide logging via Logger.py +""" +import os +from Core.env_loader import PROJECT_ROOT +from Core.Logger import Logs + +# Single shared instance — all modules use this +_logs = Logs(main_log_file=os.path.join(PROJECT_ROOT, "logs", "main.log")) + +# Pre-configured module loggers +_engines = {} + + +def get_logger(module_name: str) -> Logs: + """ + Get a logger configured for a specific module. + Logs go to logs/{module_name}.log + + Usage: + from Core.logger import get_logger + log = get_logger("brain") + log.LogsMessages("Marcus started", "info") + """ + if module_name not in _engines: + _logs.LogEngine( + os.path.join(PROJECT_ROOT, "logs"), + f"{module_name}.log" + ) + _engines[module_name] = _logs + return _logs + + +def log(message: str, level: str = "info", module: str = "main"): + """Quick log function — no setup needed.""" + _logs.LogsMessages( + message, + message_type=level, + folder_name=os.path.join(PROJECT_ROOT, "logs"), + file_name=f"{module}.log" + ) + + +def log_and_print(message: str, level: str = "info", module: str = "main"): + """Log to file AND print to console.""" + _logs.print_and_log( + message, + message_type=level, + folder_name=os.path.join(PROJECT_ROOT, "logs"), + file_name=f"{module}.log" + ) diff --git a/Data/Brain/Sessions/session_001_2026-04-05/alerts.json b/Data/Brain/Sessions/session_001_2026-04-05/alerts.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/Data/Brain/Sessions/session_001_2026-04-05/alerts.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_001_2026-04-05/commands.json b/Data/Brain/Sessions/session_001_2026-04-05/commands.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/Data/Brain/Sessions/session_001_2026-04-05/commands.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_001_2026-04-05/detections.json b/Data/Brain/Sessions/session_001_2026-04-05/detections.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/Data/Brain/Sessions/session_001_2026-04-05/detections.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_001_2026-04-05/places.json b/Data/Brain/Sessions/session_001_2026-04-05/places.json new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/Data/Brain/Sessions/session_001_2026-04-05/places.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/Data/Brain/Sessions/session_002_2026-04-05/alerts.json b/Data/Brain/Sessions/session_002_2026-04-05/alerts.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/Data/Brain/Sessions/session_002_2026-04-05/alerts.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_002_2026-04-05/commands.json b/Data/Brain/Sessions/session_002_2026-04-05/commands.json new file mode 100644 index 0000000..0c65329 --- /dev/null +++ b/Data/Brain/Sessions/session_002_2026-04-05/commands.json @@ -0,0 +1,20 @@ +[ + { + "time": "16:42:30", + "cmd": "hi", + "response": "Hello!", + "duration_s": 5.98 + }, + { + "time": "16:43:00", + "cmd": "go back two step", + "response": "Going back two steps", + "duration_s": 10.01 + }, + { + "time": "16:43:23", + "cmd": "now com", + "response": "Coming to you", + "duration_s": 8.11 + } +] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_002_2026-04-05/detections.json b/Data/Brain/Sessions/session_002_2026-04-05/detections.json new file mode 100644 index 0000000..b8aa4c7 --- /dev/null +++ b/Data/Brain/Sessions/session_002_2026-04-05/detections.json @@ -0,0 +1,66 @@ +[ + { + "time": "16:42:30", + "class": "mouse", + "position": "left", + "distance": "far", + "x": 0.0, + "y": 0.0 + }, + { + "time": "16:42:30", + "class": "chair", + "position": "center", + "distance": "far", + "x": 0.0, + "y": 0.0 + }, + { + "time": "16:42:30", + "class": "laptop", + "position": "left", + "distance": "medium", + "x": 0.0, + "y": 0.0 + }, + { + "time": "16:42:30", + "class": "person", + "position": "center", + "distance": "close", + "x": 0.0, + "y": 0.0 + }, + { + "time": "16:43:00", + "class": "laptop", + "position": "left", + "distance": "close", + "x": 0.0, + "y": 0.0 + }, + { + "time": "16:43:00", + "class": "person", + "position": "center", + "distance": "close", + "x": 0.0, + "y": 0.0 + }, + { + "time": "16:43:23", + "class": "chair", + "position": "center", + "distance": "medium", + "x": 0.0, + "y": 0.0 + }, + { + "time": "16:43:23", + "class": "laptop", + "position": "left", + "distance": "medium", + "x": 0.0, + "y": 0.0 + } +] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_002_2026-04-05/places.json b/Data/Brain/Sessions/session_002_2026-04-05/places.json new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/Data/Brain/Sessions/session_002_2026-04-05/places.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/Data/Brain/Sessions/session_003_2026-04-05/alerts.json b/Data/Brain/Sessions/session_003_2026-04-05/alerts.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/Data/Brain/Sessions/session_003_2026-04-05/alerts.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_003_2026-04-05/commands.json b/Data/Brain/Sessions/session_003_2026-04-05/commands.json new file mode 100644 index 0000000..eef13de --- /dev/null +++ b/Data/Brain/Sessions/session_003_2026-04-05/commands.json @@ -0,0 +1,14 @@ +[ + { + "time": "16:44:40", + "cmd": "what do you see", + "response": "I see a person sitting at a desk in an office environment. The individual appears to be engaged with a computer screen and is wearing a red shirt.", + "duration_s": 8.01 + }, + { + "time": "16:45:06", + "cmd": "that is me the one whos is programming you", + "response": "That is me, the one who's programming you", + "duration_s": 6.71 + } +] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_003_2026-04-05/detections.json b/Data/Brain/Sessions/session_003_2026-04-05/detections.json new file mode 100644 index 0000000..b1c1f8e --- /dev/null +++ b/Data/Brain/Sessions/session_003_2026-04-05/detections.json @@ -0,0 +1,42 @@ +[ + { + "time": "16:44:40", + "class": "person", + "position": "left", + "distance": "very close", + "x": 0.0, + "y": 0.0 + }, + { + "time": "16:45:06", + "class": "mouse", + "position": "left", + "distance": "far", + "x": 0.0, + "y": 0.0 + }, + { + "time": "16:45:06", + "class": "chair", + "position": "center", + "distance": "medium", + "x": 0.0, + "y": 0.0 + }, + { + "time": "16:45:06", + "class": "person", + "position": "center", + "distance": "very close", + "x": 0.0, + "y": 0.0 + }, + { + "time": "16:45:06", + "class": "laptop", + "position": "left", + "distance": "medium", + "x": 0.0, + "y": 0.0 + } +] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_003_2026-04-05/places.json b/Data/Brain/Sessions/session_003_2026-04-05/places.json new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/Data/Brain/Sessions/session_003_2026-04-05/places.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/Data/Brain/Sessions/session_004_2026-04-05/alerts.json b/Data/Brain/Sessions/session_004_2026-04-05/alerts.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/Data/Brain/Sessions/session_004_2026-04-05/alerts.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_004_2026-04-05/commands.json b/Data/Brain/Sessions/session_004_2026-04-05/commands.json new file mode 100644 index 0000000..726ad18 --- /dev/null +++ b/Data/Brain/Sessions/session_004_2026-04-05/commands.json @@ -0,0 +1,14 @@ +[ + { + "time": "16:46:46", + "cmd": "hi", + "response": "Error.", + "duration_s": 0.0 + }, + { + "time": "16:46:49", + "cmd": "hiii", + "response": "Error.", + "duration_s": 0.0 + } +] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_004_2026-04-05/detections.json b/Data/Brain/Sessions/session_004_2026-04-05/detections.json new file mode 100644 index 0000000..29c0847 --- /dev/null +++ b/Data/Brain/Sessions/session_004_2026-04-05/detections.json @@ -0,0 +1,34 @@ +[ + { + "time": "16:46:46", + "class": "person", + "position": "center", + "distance": "very close", + "x": 0.0, + "y": 0.0 + }, + { + "time": "16:46:46", + "class": "chair", + "position": "center", + "distance": "close", + "x": 0.0, + "y": 0.0 + }, + { + "time": "16:46:46", + "class": "laptop", + "position": "left", + "distance": "medium", + "x": 0.0, + "y": 0.0 + }, + { + "time": "16:46:49", + "class": "mouse", + "position": "left", + "distance": "far", + "x": 0.0, + "y": 0.0 + } +] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_004_2026-04-05/places.json b/Data/Brain/Sessions/session_004_2026-04-05/places.json new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/Data/Brain/Sessions/session_004_2026-04-05/places.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/Data/Brain/Sessions/session_005_2026-04-05/alerts.json b/Data/Brain/Sessions/session_005_2026-04-05/alerts.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/Data/Brain/Sessions/session_005_2026-04-05/alerts.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_005_2026-04-05/commands.json b/Data/Brain/Sessions/session_005_2026-04-05/commands.json new file mode 100644 index 0000000..63d7c1d --- /dev/null +++ b/Data/Brain/Sessions/session_005_2026-04-05/commands.json @@ -0,0 +1,32 @@ +[ + { + "time": "16:48:15", + "cmd": "hii", + "response": "Greeting", + "duration_s": 7.93 + }, + { + "time": "16:48:32", + "cmd": "what do you see", + "response": "I see a person sitting at a desk in an office environment. The person appears to be working on a computer.", + "duration_s": 7.64 + }, + { + "time": "16:48:58", + "cmd": "that person is the guy who is programming you", + "response": "That person is the one who programmed me", + "duration_s": 6.76 + }, + { + "time": "16:49:18", + "cmd": "who is that person", + "response": "I don't have enough information to identify the person in the image.", + "duration_s": 7.1 + }, + { + "time": "16:50:03", + "cmd": "example/", + "response": "local command", + "duration_s": 0.0 + } +] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_005_2026-04-05/detections.json b/Data/Brain/Sessions/session_005_2026-04-05/detections.json new file mode 100644 index 0000000..3147b86 --- /dev/null +++ b/Data/Brain/Sessions/session_005_2026-04-05/detections.json @@ -0,0 +1,34 @@ +[ + { + "time": "16:48:15", + "class": "person", + "position": "center", + "distance": "very close", + "x": 0.0, + "y": 0.0 + }, + { + "time": "16:48:32", + "class": "person", + "position": "center", + "distance": "very close", + "x": 0.0, + "y": 0.0 + }, + { + "time": "16:48:58", + "class": "person", + "position": "center", + "distance": "very close", + "x": 0.0, + "y": 0.0 + }, + { + "time": "16:49:18", + "class": "person", + "position": "center", + "distance": "very close", + "x": 0.0, + "y": 0.0 + } +] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_005_2026-04-05/places.json b/Data/Brain/Sessions/session_005_2026-04-05/places.json new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/Data/Brain/Sessions/session_005_2026-04-05/places.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/Data/Brain/Sessions/session_006_2026-04-05/alerts.json b/Data/Brain/Sessions/session_006_2026-04-05/alerts.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/Data/Brain/Sessions/session_006_2026-04-05/alerts.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_006_2026-04-05/commands.json b/Data/Brain/Sessions/session_006_2026-04-05/commands.json new file mode 100644 index 0000000..fe549b3 --- /dev/null +++ b/Data/Brain/Sessions/session_006_2026-04-05/commands.json @@ -0,0 +1,44 @@ +[ + { + "time": "16:51:08", + "cmd": "hiii", + "response": "Greeting", + "duration_s": 7.97 + }, + { + "time": "16:51:23", + "cmd": "hi", + "response": "Greeting", + "duration_s": 6.16 + }, + { + "time": "16:51:51", + "cmd": "my name is Kassam", + "response": "My name is Kassam", + "duration_s": 6.48 + }, + { + "time": "16:51:59", + "cmd": "that person is Kassam the programmer", + "response": "That person is Kassam the programmer", + "duration_s": 6.74 + }, + { + "time": "16:52:07", + "cmd": "who is that person", + "response": "That is a man", + "duration_s": 6.42 + }, + { + "time": "16:52:47", + "cmd": "hiigo back 1 step", + "response": "Backing up one step", + "duration_s": 8.51 + }, + { + "time": "16:53:24", + "cmd": "what can you do", + "response": "I am a humanoid robot and I can perform various tasks such as walking, talking, and interacting with objects. However, in this image, I see a person sitting at a desk using a computer. If you have any specific questions or requests for me to perform, please let me know!", + "duration_s": 10.6 + } +] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_006_2026-04-05/detections.json b/Data/Brain/Sessions/session_006_2026-04-05/detections.json new file mode 100644 index 0000000..9f78631 --- /dev/null +++ b/Data/Brain/Sessions/session_006_2026-04-05/detections.json @@ -0,0 +1,146 @@ +[ + { + "time": "16:51:08", + "class": "chair", + "position": "center", + "distance": "medium", + "x": 0.0, + "y": 0.0 + }, + { + "time": "16:51:08", + "class": "person", + "position": "center", + "distance": "very close", + "x": 0.0, + "y": 0.0 + }, + { + "time": "16:51:23", + "class": "chair", + "position": "center", + "distance": "close", + "x": 0.0, + "y": 0.0 + }, + { + "time": "16:51:23", + "class": "person", + "position": "center", + "distance": "close", + "x": 0.0, + "y": 0.0 + }, + { + "time": "16:51:23", + "class": "laptop", + "position": "left", + "distance": "medium", + "x": 0.0, + "y": 0.0 + }, + { + "time": "16:51:51", + "class": "chair", + "position": "center", + "distance": "close", + "x": 0.0, + "y": 0.0 + }, + { + "time": "16:51:51", + "class": "person", + "position": "center", + "distance": "close", + "x": 0.0, + "y": 0.0 + }, + { + "time": "16:51:51", + "class": "laptop", + "position": "left", + "distance": "medium", + "x": 0.0, + "y": 0.0 + }, + { + "time": "16:51:59", + "class": "chair", + "position": "center", + "distance": "far", + "x": 0.0, + "y": 0.0 + }, + { + "time": "16:51:59", + "class": "person", + "position": "center", + "distance": "very close", + "x": 0.0, + "y": 0.0 + }, + { + "time": "16:51:59", + "class": "laptop", + "position": "left", + "distance": "medium", + "x": 0.0, + "y": 0.0 + }, + { + "time": "16:52:07", + "class": "chair", + "position": "center", + "distance": "far", + "x": 0.0, + "y": 0.0 + }, + { + "time": "16:52:07", + "class": "person", + "position": "center", + "distance": "very close", + "x": 0.0, + "y": 0.0 + }, + { + "time": "16:52:47", + "class": "chair", + "position": "center", + "distance": "medium", + "x": 0.0, + "y": 0.0 + }, + { + "time": "16:52:47", + "class": "person", + "position": "center", + "distance": "very close", + "x": 0.0, + "y": 0.0 + }, + { + "time": "16:52:47", + "class": "cup", + "position": "left", + "distance": "far", + "x": 0.0, + "y": 0.0 + }, + { + "time": "16:53:24", + "class": "chair", + "position": "center", + "distance": "close", + "x": 0.0, + "y": 0.0 + }, + { + "time": "16:53:24", + "class": "person", + "position": "left", + "distance": "very close", + "x": 0.0, + "y": 0.0 + } +] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_006_2026-04-05/places.json b/Data/Brain/Sessions/session_006_2026-04-05/places.json new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/Data/Brain/Sessions/session_006_2026-04-05/places.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/Data/Brain/Sessions/session_007_2026-04-05/alerts.json b/Data/Brain/Sessions/session_007_2026-04-05/alerts.json new file mode 100644 index 0000000..fe51488 --- /dev/null +++ b/Data/Brain/Sessions/session_007_2026-04-05/alerts.json @@ -0,0 +1 @@ +[] diff --git a/Data/Brain/Sessions/session_007_2026-04-05/commands.json b/Data/Brain/Sessions/session_007_2026-04-05/commands.json new file mode 100644 index 0000000..fe51488 --- /dev/null +++ b/Data/Brain/Sessions/session_007_2026-04-05/commands.json @@ -0,0 +1 @@ +[] diff --git a/Data/Brain/Sessions/session_007_2026-04-05/detections.json b/Data/Brain/Sessions/session_007_2026-04-05/detections.json new file mode 100644 index 0000000..fe51488 --- /dev/null +++ b/Data/Brain/Sessions/session_007_2026-04-05/detections.json @@ -0,0 +1 @@ +[] diff --git a/Data/Brain/Sessions/session_007_2026-04-05/places.json b/Data/Brain/Sessions/session_007_2026-04-05/places.json new file mode 100644 index 0000000..fe51488 --- /dev/null +++ b/Data/Brain/Sessions/session_007_2026-04-05/places.json @@ -0,0 +1 @@ +[] diff --git a/Data/Brain/Sessions/session_008_2026-04-05/alerts.json b/Data/Brain/Sessions/session_008_2026-04-05/alerts.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/Data/Brain/Sessions/session_008_2026-04-05/alerts.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_008_2026-04-05/commands.json b/Data/Brain/Sessions/session_008_2026-04-05/commands.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/Data/Brain/Sessions/session_008_2026-04-05/commands.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_008_2026-04-05/detections.json b/Data/Brain/Sessions/session_008_2026-04-05/detections.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/Data/Brain/Sessions/session_008_2026-04-05/detections.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_008_2026-04-05/places.json b/Data/Brain/Sessions/session_008_2026-04-05/places.json new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/Data/Brain/Sessions/session_008_2026-04-05/places.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/Data/Brain/Sessions/session_009_2026-04-05/alerts.json b/Data/Brain/Sessions/session_009_2026-04-05/alerts.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/Data/Brain/Sessions/session_009_2026-04-05/alerts.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_009_2026-04-05/commands.json b/Data/Brain/Sessions/session_009_2026-04-05/commands.json new file mode 100644 index 0000000..adfa941 --- /dev/null +++ b/Data/Brain/Sessions/session_009_2026-04-05/commands.json @@ -0,0 +1,44 @@ +[ + { + "time": "17:43:52", + "cmd": "hi", + "response": "Hi", + "duration_s": 3.23 + }, + { + "time": "17:44:32", + "cmd": "step one step back", + "response": "Step one step back", + "duration_s": 3.44 + }, + { + "time": "17:44:43", + "cmd": "what do you see", + "response": "I see a person sitting at a desk with a laptop in front of them.", + "duration_s": 3.93 + }, + { + "time": "17:44:52", + "cmd": "hii", + "response": "Hi", + "duration_s": 3.31 + }, + { + "time": "17:45:27", + "cmd": "one step back", + "response": "One step back", + "duration_s": 3.46 + }, + { + "time": "17:45:38", + "cmd": "one more", + "response": "One more", + "duration_s": 3.43 + }, + { + "time": "17:46:09", + "cmd": "two step back", + "response": "Moving back two steps", + "duration_s": 3.51 + } +] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_009_2026-04-05/detections.json b/Data/Brain/Sessions/session_009_2026-04-05/detections.json new file mode 100644 index 0000000..5fee93e --- /dev/null +++ b/Data/Brain/Sessions/session_009_2026-04-05/detections.json @@ -0,0 +1,138 @@ +[ + { + "time": "17:43:52", + "class": "chair", + "position": "center", + "distance": "far", + "x": 0.0, + "y": 0.0 + }, + { + "time": "17:43:52", + "class": "person", + "position": "center", + "distance": "very close", + "x": 0.0, + "y": 0.0 + }, + { + "time": "17:43:52", + "class": "laptop", + "position": "left", + "distance": "medium", + "x": 0.0, + "y": 0.0 + }, + { + "time": "17:43:52", + "class": "cup", + "position": "left", + "distance": "far", + "x": 0.0, + "y": 0.0 + }, + { + "time": "17:44:32", + "class": "person", + "position": "right", + "distance": "very close", + "x": 0.0, + "y": 0.0 + }, + { + "time": "17:44:32", + "class": "laptop", + "position": "center", + "distance": "close", + "x": 0.0, + "y": 0.0 + }, + { + "time": "17:44:43", + "class": "person", + "position": "right", + "distance": "close", + "x": 0.0, + "y": 0.0 + }, + { + "time": "17:44:52", + "class": "chair", + "position": "right", + "distance": "medium", + "x": 0.0, + "y": 0.0 + }, + { + "time": "17:44:52", + "class": "person", + "position": "right", + "distance": "very close", + "x": 0.0, + "y": 0.0 + }, + { + "time": "17:45:27", + "class": "person", + "position": "right", + "distance": "very close", + "x": 0.0, + "y": 0.0 + }, + { + "time": "17:45:27", + "class": "laptop", + "position": "center", + "distance": "close", + "x": 0.0, + "y": 0.0 + }, + { + "time": "17:45:38", + "class": "chair", + "position": "right", + "distance": "medium", + "x": 0.0, + "y": 0.0 + }, + { + "time": "17:45:38", + "class": "person", + "position": "right", + "distance": "very close", + "x": 0.0, + "y": 0.0 + }, + { + "time": "17:45:38", + "class": "laptop", + "position": "left", + "distance": "close", + "x": 0.0, + "y": 0.0 + }, + { + "time": "17:46:09", + "class": "chair", + "position": "right", + "distance": "close", + "x": 0.0, + "y": 0.0 + }, + { + "time": "17:46:09", + "class": "person", + "position": "center", + "distance": "very close", + "x": 0.0, + "y": 0.0 + }, + { + "time": "17:46:09", + "class": "laptop", + "position": "center", + "distance": "close", + "x": 0.0, + "y": 0.0 + } +] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_009_2026-04-05/places.json b/Data/Brain/Sessions/session_009_2026-04-05/places.json new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/Data/Brain/Sessions/session_009_2026-04-05/places.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/Data/Brain/Sessions/session_010_2026-04-05/alerts.json b/Data/Brain/Sessions/session_010_2026-04-05/alerts.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/Data/Brain/Sessions/session_010_2026-04-05/alerts.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_010_2026-04-05/commands.json b/Data/Brain/Sessions/session_010_2026-04-05/commands.json new file mode 100644 index 0000000..3041705 --- /dev/null +++ b/Data/Brain/Sessions/session_010_2026-04-05/commands.json @@ -0,0 +1,14 @@ +[ + { + "time": "17:46:37", + "cmd": "hii", + "response": "Hii", + "duration_s": 3.27 + }, + { + "time": "17:46:54", + "cmd": "why did you moved", + "response": "I moved because I was instructed to do so.", + "duration_s": 3.71 + } +] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_010_2026-04-05/detections.json b/Data/Brain/Sessions/session_010_2026-04-05/detections.json new file mode 100644 index 0000000..4040e21 --- /dev/null +++ b/Data/Brain/Sessions/session_010_2026-04-05/detections.json @@ -0,0 +1,58 @@ +[ + { + "time": "17:46:37", + "class": "chair", + "position": "right", + "distance": "medium", + "x": 0.0, + "y": 0.0 + }, + { + "time": "17:46:37", + "class": "person", + "position": "center", + "distance": "close", + "x": 0.0, + "y": 0.0 + }, + { + "time": "17:46:37", + "class": "cup", + "position": "left", + "distance": "far", + "x": 0.0, + "y": 0.0 + }, + { + "time": "17:46:54", + "class": "mouse", + "position": "center", + "distance": "far", + "x": 0.0, + "y": 0.0 + }, + { + "time": "17:46:54", + "class": "person", + "position": "center", + "distance": "close", + "x": 0.0, + "y": 0.0 + }, + { + "time": "17:46:54", + "class": "chair", + "position": "right", + "distance": "close", + "x": 0.0, + "y": 0.0 + }, + { + "time": "17:46:54", + "class": "laptop", + "position": "left", + "distance": "close", + "x": 0.0, + "y": 0.0 + } +] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_010_2026-04-05/places.json b/Data/Brain/Sessions/session_010_2026-04-05/places.json new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/Data/Brain/Sessions/session_010_2026-04-05/places.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/Data/Brain/Sessions/session_011_2026-04-05/alerts.json b/Data/Brain/Sessions/session_011_2026-04-05/alerts.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/Data/Brain/Sessions/session_011_2026-04-05/alerts.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_011_2026-04-05/commands.json b/Data/Brain/Sessions/session_011_2026-04-05/commands.json new file mode 100644 index 0000000..d6ae060 --- /dev/null +++ b/Data/Brain/Sessions/session_011_2026-04-05/commands.json @@ -0,0 +1,56 @@ +[ + { + "time": "17:49:16", + "cmd": "hii", + "response": "Hello! I am Marcus. How can I help you?", + "duration_s": 0.0 + }, + { + "time": "17:49:19", + "cmd": "hi", + "response": "Hello! I am Marcus. How can I help you?", + "duration_s": 0.0 + }, + { + "time": "17:49:20", + "cmd": "Hi", + "response": "Hello! I am Marcus. How can I help you?", + "duration_s": 0.0 + }, + { + "time": "17:49:37", + "cmd": "move one step back", + "response": "Moving back", + "duration_s": 3.47 + }, + { + "time": "17:49:50", + "cmd": "one tiny step back", + "response": "Moving back", + "duration_s": 3.44 + }, + { + "time": "17:50:05", + "cmd": "now come back to me", + "response": "Coming back to you", + "duration_s": 3.53 + }, + { + "time": "17:50:36", + "cmd": "turn right", + "response": "Turning right", + "duration_s": 3.4 + }, + { + "time": "17:51:01", + "cmd": "come back to me", + "response": "Coming back to you", + "duration_s": 3.5 + }, + { + "time": "17:51:22", + "cmd": "turn right 90deg and then back two steps", + "response": "Turning right", + "duration_s": 3.43 + } +] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_011_2026-04-05/detections.json b/Data/Brain/Sessions/session_011_2026-04-05/detections.json new file mode 100644 index 0000000..0c1ba53 --- /dev/null +++ b/Data/Brain/Sessions/session_011_2026-04-05/detections.json @@ -0,0 +1,106 @@ +[ + { + "time": "17:49:37", + "class": "chair", + "position": "right", + "distance": "medium", + "x": 0.0, + "y": 0.0 + }, + { + "time": "17:49:37", + "class": "laptop", + "position": "center", + "distance": "close", + "x": 0.0, + "y": 0.0 + }, + { + "time": "17:49:37", + "class": "person", + "position": "center", + "distance": "close", + "x": 0.0, + "y": 0.0 + }, + { + "time": "17:49:50", + "class": "laptop", + "position": "left", + "distance": "close", + "x": 0.0, + "y": 0.0 + }, + { + "time": "17:49:50", + "class": "person", + "position": "right", + "distance": "very close", + "x": 0.0, + "y": 0.0 + }, + { + "time": "17:50:05", + "class": "chair", + "position": "right", + "distance": "medium", + "x": 0.0, + "y": 0.0 + }, + { + "time": "17:50:05", + "class": "laptop", + "position": "center", + "distance": "close", + "x": 0.0, + "y": 0.0 + }, + { + "time": "17:50:05", + "class": "person", + "position": "center", + "distance": "close", + "x": 0.0, + "y": 0.0 + }, + { + "time": "17:50:36", + "class": "chair", + "position": "center", + "distance": "medium", + "x": 0.0, + "y": 0.0 + }, + { + "time": "17:50:36", + "class": "person", + "position": "left", + "distance": "close", + "x": 0.0, + "y": 0.0 + }, + { + "time": "17:51:01", + "class": "laptop", + "position": "left", + "distance": "close", + "x": 0.0, + "y": 0.0 + }, + { + "time": "17:51:01", + "class": "person", + "position": "center", + "distance": "very close", + "x": 0.0, + "y": 0.0 + }, + { + "time": "17:51:22", + "class": "person", + "position": "left", + "distance": "close", + "x": 0.0, + "y": 0.0 + } +] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_011_2026-04-05/places.json b/Data/Brain/Sessions/session_011_2026-04-05/places.json new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/Data/Brain/Sessions/session_011_2026-04-05/places.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/Data/Brain/Sessions/session_012_2026-04-05/alerts.json b/Data/Brain/Sessions/session_012_2026-04-05/alerts.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/Data/Brain/Sessions/session_012_2026-04-05/alerts.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_012_2026-04-05/commands.json b/Data/Brain/Sessions/session_012_2026-04-05/commands.json new file mode 100644 index 0000000..ff35f80 --- /dev/null +++ b/Data/Brain/Sessions/session_012_2026-04-05/commands.json @@ -0,0 +1,26 @@ +[ + { + "time": "17:53:36", + "cmd": "turn right 90deg and then move back 3 steps", + "response": "Turned right 90 degrees then moved backward", + "duration_s": 0.0 + }, + { + "time": "17:54:21", + "cmd": "keep turning left unitl you see a guy holding a phone", + "response": "Turning left", + "duration_s": 3.72 + }, + { + "time": "17:55:33", + "cmd": "goal/ keep turning left unitl you see a guy holding a phone", + "response": "Goal navigation: keep turning left unitl you see a guy holding a phone", + "duration_s": 5.47 + }, + { + "time": "17:55:53", + "cmd": "describe what he is doing", + "response": "Turning left", + "duration_s": 3.73 + } +] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_012_2026-04-05/detections.json b/Data/Brain/Sessions/session_012_2026-04-05/detections.json new file mode 100644 index 0000000..f2ea560 --- /dev/null +++ b/Data/Brain/Sessions/session_012_2026-04-05/detections.json @@ -0,0 +1,50 @@ +[ + { + "time": "17:54:21", + "class": "suitcase", + "position": "center", + "distance": "close", + "x": 0.0, + "y": 0.0 + }, + { + "time": "17:54:21", + "class": "backpack", + "position": "center", + "distance": "medium", + "x": 0.0, + "y": 0.0 + }, + { + "time": "17:55:32", + "class": "person", + "position": "left", + "distance": "close", + "x": 0.0, + "y": 0.0 + }, + { + "time": "17:55:53", + "class": "person", + "position": "center", + "distance": "very close", + "x": 0.0, + "y": 0.0 + }, + { + "time": "17:55:53", + "class": "chair", + "position": "center", + "distance": "medium", + "x": 0.0, + "y": 0.0 + }, + { + "time": "17:55:53", + "class": "laptop", + "position": "left", + "distance": "close", + "x": 0.0, + "y": 0.0 + } +] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_012_2026-04-05/places.json b/Data/Brain/Sessions/session_012_2026-04-05/places.json new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/Data/Brain/Sessions/session_012_2026-04-05/places.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/Data/Brain/Sessions/session_013_2026-04-05/alerts.json b/Data/Brain/Sessions/session_013_2026-04-05/alerts.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/Data/Brain/Sessions/session_013_2026-04-05/alerts.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_013_2026-04-05/commands.json b/Data/Brain/Sessions/session_013_2026-04-05/commands.json new file mode 100644 index 0000000..f8e95c0 --- /dev/null +++ b/Data/Brain/Sessions/session_013_2026-04-05/commands.json @@ -0,0 +1,32 @@ +[ + { + "time": "17:57:56", + "cmd": "turn right 90 deg", + "response": "local command", + "duration_s": 0.0 + }, + { + "time": "17:58:22", + "cmd": "keep turning left until you see a guy holding a phone and describe what he is doing and his office", + "response": "Turning left", + "duration_s": 3.68 + }, + { + "time": "17:58:55", + "cmd": "goal/ keep turning left until you see a guy holding a phone and describe what he is doing and his office", + "response": "Goal navigation: keep turning left until you see a guy holding a phone and describe what he is doing and his office", + "duration_s": 25.79 + }, + { + "time": "17:59:13", + "cmd": "keep turning left until you see a guy holding a phone and describe what he is doing and his office", + "response": "Turning left", + "duration_s": 3.73 + }, + { + "time": "17:59:40", + "cmd": "goal/keep turning left until you see a guy holding a phone and describe what he is doing and his office", + "response": "Goal navigation: keep turning left until you see a guy holding a phone and describe what he is doing and his office", + "duration_s": 23.61 + } +] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_013_2026-04-05/detections.json b/Data/Brain/Sessions/session_013_2026-04-05/detections.json new file mode 100644 index 0000000..4b49f4d --- /dev/null +++ b/Data/Brain/Sessions/session_013_2026-04-05/detections.json @@ -0,0 +1,18 @@ +[ + { + "time": "17:58:22", + "class": "suitcase", + "position": "right", + "distance": "medium", + "x": 0.0, + "y": 0.0 + }, + { + "time": "17:58:22", + "class": "backpack", + "position": "center", + "distance": "close", + "x": 0.0, + "y": 0.0 + } +] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_013_2026-04-05/places.json b/Data/Brain/Sessions/session_013_2026-04-05/places.json new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/Data/Brain/Sessions/session_013_2026-04-05/places.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/Data/Brain/Sessions/session_014_2026-04-05/alerts.json b/Data/Brain/Sessions/session_014_2026-04-05/alerts.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/Data/Brain/Sessions/session_014_2026-04-05/alerts.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_014_2026-04-05/commands.json b/Data/Brain/Sessions/session_014_2026-04-05/commands.json new file mode 100644 index 0000000..7122adb --- /dev/null +++ b/Data/Brain/Sessions/session_014_2026-04-05/commands.json @@ -0,0 +1,26 @@ +[ + { + "time": "18:09:23", + "cmd": "what do you see", + "response": "I see a person sitting at a desk with a computer and a monitor in front of them.", + "duration_s": 4.48 + }, + { + "time": "18:09:47", + "cmd": "what do you see", + "response": "I see a person sitting at a desk with a computer and a monitor in front of them.", + "duration_s": 4.52 + }, + { + "time": "18:10:17", + "cmd": "example/", + "response": "local command", + "duration_s": 0.0 + }, + { + "time": "18:10:41", + "cmd": "is there anyone in the room", + "response": "I see a person sitting at a desk with a computer and a monitor in front of them.", + "duration_s": 4.57 + } +] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_014_2026-04-05/detections.json b/Data/Brain/Sessions/session_014_2026-04-05/detections.json new file mode 100644 index 0000000..3bbe6ec --- /dev/null +++ b/Data/Brain/Sessions/session_014_2026-04-05/detections.json @@ -0,0 +1,58 @@ +[ + { + "time": "18:09:23", + "class": "mouse", + "position": "center", + "distance": "far", + "x": 0.0, + "y": 0.0 + }, + { + "time": "18:09:23", + "class": "chair", + "position": "right", + "distance": "medium", + "x": 0.0, + "y": 0.0 + }, + { + "time": "18:09:23", + "class": "person", + "position": "right", + "distance": "close", + "x": 0.0, + "y": 0.0 + }, + { + "time": "18:09:23", + "class": "laptop", + "position": "center", + "distance": "close", + "x": 0.0, + "y": 0.0 + }, + { + "time": "18:09:47", + "class": "person", + "position": "right", + "distance": "very close", + "x": 0.0, + "y": 0.0 + }, + { + "time": "18:10:41", + "class": "person", + "position": "right", + "distance": "close", + "x": 0.0, + "y": 0.0 + }, + { + "time": "18:10:41", + "class": "laptop", + "position": "center", + "distance": "close", + "x": 0.0, + "y": 0.0 + } +] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_014_2026-04-05/places.json b/Data/Brain/Sessions/session_014_2026-04-05/places.json new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/Data/Brain/Sessions/session_014_2026-04-05/places.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/Data/Brain/Sessions/session_014_2026-04-05/summary.txt b/Data/Brain/Sessions/session_014_2026-04-05/summary.txt new file mode 100644 index 0000000..a583d3b --- /dev/null +++ b/Data/Brain/Sessions/session_014_2026-04-05/summary.txt @@ -0,0 +1,13 @@ +Session: session_014_2026-04-05 +Date: 2026-04-05 18:10 +Duration: 4m 48s +Commands: 4 +YOLO detections: 7 +Alerts: 0 +Known places: none + +First commands: + - what do you see + - what do you see + - example/ + - is there anyone in the room \ No newline at end of file diff --git a/Data/Brain/Sessions/session_015_2026-04-05/alerts.json b/Data/Brain/Sessions/session_015_2026-04-05/alerts.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/Data/Brain/Sessions/session_015_2026-04-05/alerts.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_015_2026-04-05/commands.json b/Data/Brain/Sessions/session_015_2026-04-05/commands.json new file mode 100644 index 0000000..703b615 --- /dev/null +++ b/Data/Brain/Sessions/session_015_2026-04-05/commands.json @@ -0,0 +1,26 @@ +[ + { + "time": "18:12:31", + "cmd": "example/", + "response": "local command", + "duration_s": 0.0 + }, + { + "time": "18:12:49", + "cmd": "goal/ keep turning left until you see a guy", + "response": "Goal navigation: keep turning left until you see a guy", + "duration_s": 5.22 + }, + { + "time": "18:13:04", + "cmd": "goal/ keep turning left until you see a guy", + "response": "Goal navigation: keep turning left until you see a guy", + "duration_s": 3.22 + }, + { + "time": "18:13:25", + "cmd": "what do you see", + "response": "I see a man sitting in a chair, talking on a phone. There is a cardboard box and a backpack on a table next to him.", + "duration_s": 0.0 + } +] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_015_2026-04-05/detections.json b/Data/Brain/Sessions/session_015_2026-04-05/detections.json new file mode 100644 index 0000000..8519b16 --- /dev/null +++ b/Data/Brain/Sessions/session_015_2026-04-05/detections.json @@ -0,0 +1,18 @@ +[ + { + "time": "18:12:48", + "class": "person", + "position": "left", + "distance": "close", + "x": 0.0, + "y": 0.0 + }, + { + "time": "18:13:03", + "class": "person", + "position": "left", + "distance": "close", + "x": 0.0, + "y": 0.0 + } +] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_015_2026-04-05/places.json b/Data/Brain/Sessions/session_015_2026-04-05/places.json new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/Data/Brain/Sessions/session_015_2026-04-05/places.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/Data/Brain/Sessions/session_016_2026-04-09/alerts.json b/Data/Brain/Sessions/session_016_2026-04-09/alerts.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/Data/Brain/Sessions/session_016_2026-04-09/alerts.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_016_2026-04-09/commands.json b/Data/Brain/Sessions/session_016_2026-04-09/commands.json new file mode 100644 index 0000000..2979a4c --- /dev/null +++ b/Data/Brain/Sessions/session_016_2026-04-09/commands.json @@ -0,0 +1,8 @@ +[ + { + "time": "13:33:49", + "cmd": "hi", + "response": "Hello! I am Marcus. How can I help you?", + "duration_s": 0.0 + } +] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_016_2026-04-09/detections.json b/Data/Brain/Sessions/session_016_2026-04-09/detections.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/Data/Brain/Sessions/session_016_2026-04-09/detections.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_016_2026-04-09/places.json b/Data/Brain/Sessions/session_016_2026-04-09/places.json new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/Data/Brain/Sessions/session_016_2026-04-09/places.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/Data/Brain/Sessions/session_017_2026-04-09/alerts.json b/Data/Brain/Sessions/session_017_2026-04-09/alerts.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/Data/Brain/Sessions/session_017_2026-04-09/alerts.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_017_2026-04-09/commands.json b/Data/Brain/Sessions/session_017_2026-04-09/commands.json new file mode 100644 index 0000000..9c7479b --- /dev/null +++ b/Data/Brain/Sessions/session_017_2026-04-09/commands.json @@ -0,0 +1,8 @@ +[ + { + "time": "13:35:59", + "cmd": "hi", + "response": "Hello! I am Marcus. How can I help you?", + "duration_s": 0.0 + } +] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_017_2026-04-09/detections.json b/Data/Brain/Sessions/session_017_2026-04-09/detections.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/Data/Brain/Sessions/session_017_2026-04-09/detections.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_017_2026-04-09/places.json b/Data/Brain/Sessions/session_017_2026-04-09/places.json new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/Data/Brain/Sessions/session_017_2026-04-09/places.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/Data/Brain/Sessions/session_018_2026-04-09/alerts.json b/Data/Brain/Sessions/session_018_2026-04-09/alerts.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/Data/Brain/Sessions/session_018_2026-04-09/alerts.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_018_2026-04-09/commands.json b/Data/Brain/Sessions/session_018_2026-04-09/commands.json new file mode 100644 index 0000000..3844996 --- /dev/null +++ b/Data/Brain/Sessions/session_018_2026-04-09/commands.json @@ -0,0 +1,14 @@ +[ + { + "time": "13:43:47", + "cmd": "hi", + "response": "Hello! I am Marcus. How can I help you?", + "duration_s": 0.0 + }, + { + "time": "13:44:22", + "cmd": "turn left", + "response": "أدور لليسار", + "duration_s": 10.3 + } +] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_018_2026-04-09/detections.json b/Data/Brain/Sessions/session_018_2026-04-09/detections.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/Data/Brain/Sessions/session_018_2026-04-09/detections.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_018_2026-04-09/places.json b/Data/Brain/Sessions/session_018_2026-04-09/places.json new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/Data/Brain/Sessions/session_018_2026-04-09/places.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/Data/Brain/Sessions/session_019_2026-04-09/alerts.json b/Data/Brain/Sessions/session_019_2026-04-09/alerts.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/Data/Brain/Sessions/session_019_2026-04-09/alerts.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_019_2026-04-09/commands.json b/Data/Brain/Sessions/session_019_2026-04-09/commands.json new file mode 100644 index 0000000..b5cd2f2 --- /dev/null +++ b/Data/Brain/Sessions/session_019_2026-04-09/commands.json @@ -0,0 +1,8 @@ +[ + { + "time": "13:50:01", + "cmd": "what do you see", + "response": "I see a person sitting at a desk with a laptop and a large monitor", + "duration_s": 0.0 + } +] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_019_2026-04-09/detections.json b/Data/Brain/Sessions/session_019_2026-04-09/detections.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/Data/Brain/Sessions/session_019_2026-04-09/detections.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_019_2026-04-09/places.json b/Data/Brain/Sessions/session_019_2026-04-09/places.json new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/Data/Brain/Sessions/session_019_2026-04-09/places.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/Data/Brain/Sessions/session_020_2026-04-09/alerts.json b/Data/Brain/Sessions/session_020_2026-04-09/alerts.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/Data/Brain/Sessions/session_020_2026-04-09/alerts.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_020_2026-04-09/commands.json b/Data/Brain/Sessions/session_020_2026-04-09/commands.json new file mode 100644 index 0000000..15b7b23 --- /dev/null +++ b/Data/Brain/Sessions/session_020_2026-04-09/commands.json @@ -0,0 +1,8 @@ +[ + { + "time": "13:58:53", + "cmd": "hi", + "response": "Hello! I am Marcus. How can I help you?", + "duration_s": 0.0 + } +] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_020_2026-04-09/detections.json b/Data/Brain/Sessions/session_020_2026-04-09/detections.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/Data/Brain/Sessions/session_020_2026-04-09/detections.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_020_2026-04-09/places.json b/Data/Brain/Sessions/session_020_2026-04-09/places.json new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/Data/Brain/Sessions/session_020_2026-04-09/places.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/Data/Brain/Sessions/session_021_2026-04-09/alerts.json b/Data/Brain/Sessions/session_021_2026-04-09/alerts.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/Data/Brain/Sessions/session_021_2026-04-09/alerts.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_021_2026-04-09/commands.json b/Data/Brain/Sessions/session_021_2026-04-09/commands.json new file mode 100644 index 0000000..1fc33f6 --- /dev/null +++ b/Data/Brain/Sessions/session_021_2026-04-09/commands.json @@ -0,0 +1,20 @@ +[ + { + "time": "14:02:44", + "cmd": "help/", + "response": "local command", + "duration_s": 0.0 + }, + { + "time": "14:03:18", + "cmd": "turn right", + "response": "أدور لليمين", + "duration_s": 10.15 + }, + { + "time": "14:03:42", + "cmd": "wha do you see", + "response": "أرى شخصا يجلس أمام طاولة مع جهاز كمبيوتر", + "duration_s": 10.95 + } +] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_021_2026-04-09/detections.json b/Data/Brain/Sessions/session_021_2026-04-09/detections.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/Data/Brain/Sessions/session_021_2026-04-09/detections.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_021_2026-04-09/places.json b/Data/Brain/Sessions/session_021_2026-04-09/places.json new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/Data/Brain/Sessions/session_021_2026-04-09/places.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/Data/Brain/Sessions/session_022_2026-04-09/alerts.json b/Data/Brain/Sessions/session_022_2026-04-09/alerts.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/Data/Brain/Sessions/session_022_2026-04-09/alerts.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_022_2026-04-09/commands.json b/Data/Brain/Sessions/session_022_2026-04-09/commands.json new file mode 100644 index 0000000..897ae2f --- /dev/null +++ b/Data/Brain/Sessions/session_022_2026-04-09/commands.json @@ -0,0 +1,20 @@ +[ + { + "time": "14:06:11", + "cmd": "لف على اليسار", + "response": "ألف على اليسار", + "duration_s": 10.2 + }, + { + "time": "14:06:55", + "cmd": "turn right", + "response": "أدور لليمين", + "duration_s": 10.17 + }, + { + "time": "14:07:15", + "cmd": "what do", + "response": "أرى شخصا يجلس أمام طاولة مع جهاز كمبيوتر وشاشة كبيرة", + "duration_s": 0.0 + } +] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_022_2026-04-09/detections.json b/Data/Brain/Sessions/session_022_2026-04-09/detections.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/Data/Brain/Sessions/session_022_2026-04-09/detections.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_022_2026-04-09/places.json b/Data/Brain/Sessions/session_022_2026-04-09/places.json new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/Data/Brain/Sessions/session_022_2026-04-09/places.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/Data/Brain/Sessions/session_023_2026-04-09/alerts.json b/Data/Brain/Sessions/session_023_2026-04-09/alerts.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/Data/Brain/Sessions/session_023_2026-04-09/alerts.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_023_2026-04-09/commands.json b/Data/Brain/Sessions/session_023_2026-04-09/commands.json new file mode 100644 index 0000000..ec5a0ee --- /dev/null +++ b/Data/Brain/Sessions/session_023_2026-04-09/commands.json @@ -0,0 +1,26 @@ +[ + { + "time": "14:10:51", + "cmd": "hi", + "response": "Hello! I am Marcus. How can I help you?", + "duration_s": 0.0 + }, + { + "time": "14:11:17", + "cmd": "turn right", + "response": "أدور لليمين", + "duration_s": 10.2 + }, + { + "time": "14:11:34", + "cmd": "help", + "response": "local command", + "duration_s": 0.0 + }, + { + "time": "14:11:50", + "cmd": "what do you see, yolo", + "response": "YOLO: active | YOLO not loaded | 0.0fps", + "duration_s": 0.0 + } +] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_023_2026-04-09/detections.json b/Data/Brain/Sessions/session_023_2026-04-09/detections.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/Data/Brain/Sessions/session_023_2026-04-09/detections.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_023_2026-04-09/places.json b/Data/Brain/Sessions/session_023_2026-04-09/places.json new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/Data/Brain/Sessions/session_023_2026-04-09/places.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/Data/Brain/Sessions/session_024_2026-04-09/alerts.json b/Data/Brain/Sessions/session_024_2026-04-09/alerts.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/Data/Brain/Sessions/session_024_2026-04-09/alerts.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_024_2026-04-09/commands.json b/Data/Brain/Sessions/session_024_2026-04-09/commands.json new file mode 100644 index 0000000..94868bc --- /dev/null +++ b/Data/Brain/Sessions/session_024_2026-04-09/commands.json @@ -0,0 +1,14 @@ +[ + { + "time": "15:34:13", + "cmd": "cd ~/Marcus", + "response": "Moved forward", + "duration_s": 9.91 + }, + { + "time": "15:34:32", + "cmd": "ython3 run_marcus.py", + "response": "Running Python script", + "duration_s": 10.09 + } +] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_024_2026-04-09/detections.json b/Data/Brain/Sessions/session_024_2026-04-09/detections.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/Data/Brain/Sessions/session_024_2026-04-09/detections.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_024_2026-04-09/places.json b/Data/Brain/Sessions/session_024_2026-04-09/places.json new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/Data/Brain/Sessions/session_024_2026-04-09/places.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/Data/Brain/Sessions/session_026_2026-04-09/alerts.json b/Data/Brain/Sessions/session_026_2026-04-09/alerts.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/Data/Brain/Sessions/session_026_2026-04-09/alerts.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_026_2026-04-09/commands.json b/Data/Brain/Sessions/session_026_2026-04-09/commands.json new file mode 100644 index 0000000..c044f1f --- /dev/null +++ b/Data/Brain/Sessions/session_026_2026-04-09/commands.json @@ -0,0 +1,26 @@ +[ + { + "time": "15:48:45", + "cmd": "tu", + "response": "أتقدم للأمام", + "duration_s": 10.12 + }, + { + "time": "15:49:26", + "cmd": "go back", + "response": "أتراجع للخلف", + "duration_s": 10.13 + }, + { + "time": "15:49:50", + "cmd": "o turn right", + "response": "أدور لليمين", + "duration_s": 10.2 + }, + { + "time": "15:50:12", + "cmd": "tturn left until you see a person", + "response": "أدور لليمين حتى أرى شخصا", + "duration_s": 10.52 + } +] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_026_2026-04-09/detections.json b/Data/Brain/Sessions/session_026_2026-04-09/detections.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/Data/Brain/Sessions/session_026_2026-04-09/detections.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/Data/Brain/Sessions/session_026_2026-04-09/places.json b/Data/Brain/Sessions/session_026_2026-04-09/places.json new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/Data/Brain/Sessions/session_026_2026-04-09/places.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/Doc/MARCUS_API.md b/Doc/MARCUS_API.md new file mode 100644 index 0000000..8c40865 --- /dev/null +++ b/Doc/MARCUS_API.md @@ -0,0 +1,707 @@ +# Marcus — Full API & Developer Reference + +**Project:** Marcus | YS Lootah Technology | Jetson Orin NX + G1 EDU +**Scripts:** `~/Models_marcus/marcus_llava.py` + `~/Models_marcus/marcus_yolo.py` +**Updated:** April 4, 2026 + +--- + +## Table of Contents + +1. [Configuration Variables](#1-configuration-variables) +2. [ZMQ — Holosoma Communication](#2-zmq--holosoma-communication) +3. [Camera Functions](#3-camera-functions) +4. [YOLO Vision Module](#4-yolo-vision-module) +5. [LLaVA AI Functions](#5-llava-ai-functions) +6. [Arm SDK](#6-arm-sdk) +7. [Movement Functions](#7-movement-functions) +8. [Prompt Engineering](#8-prompt-engineering) +9. [Goal Navigation](#9-goal-navigation) +10. [Autonomous Patrol](#10-autonomous-patrol) +11. [Main Loop](#11-main-loop) +12. [JSON Schema Reference](#12-json-schema-reference) +13. [Environment & Paths](#13-environment--paths) +14. [Quick Reference Card](#14-quick-reference-card) + +--- + +## 1. Configuration Variables + +Defined at the top of `marcus_llava.py`. Edit here to change global behavior. + +| Variable | Default | Description | +|----------|---------|-------------| +| `ZMQ_HOST` | `"127.0.0.1"` | Holosoma ZMQ host | +| `ZMQ_PORT` | `5556` | Holosoma ZMQ port | +| `ZMQ_YOLO_PORT` | `5557` | YOLO ZMQ port (standalone mode) | +| `OLLAMA_MODEL` | `"llava:7b"` | LLaVA model via Ollama | +| `CAM_WIDTH` | `424` | Camera capture width (px) | +| `CAM_HEIGHT` | `240` | Camera capture height (px) | +| `CAM_FPS` | `15` | Camera frame rate | +| `CAM_QUALITY` | `70` | JPEG quality sent to LLaVA | +| `STOP_ITERATIONS` | `20` | gradual_stop message count | +| `STOP_DELAY` | `0.05` | seconds between stop messages | +| `STEP_PAUSE` | `0.3` | pause between consecutive action steps | +| `ARM_SDK_PATH` | `/home/unitree/unitree_sdk2_python` | Arm SDK path | +| `ARM_INTERFACE` | `"eth0"` | Network interface for arm SDK | + +Defined at top of `marcus_yolo.py`: + +| Variable | Default | Description | +|----------|---------|-------------| +| `YOLO_MODEL_PATH` | `.../Model/yolov8m.pt` | YOLO model path | +| `YOLO_CONFIDENCE` | `0.45` | Minimum detection confidence | +| `YOLO_IOU` | `0.45` | NMS IOU threshold | +| `YOLO_DEVICE` | `"cpu"` | Inference device ("cpu" or "cuda") | +| `YOLO_IMG_SIZE` | `320` | Inference image size (smaller = faster) | + +--- + +## 2. ZMQ — Holosoma Communication + +### Setup + +```python +ctx = zmq.Context() +sock = ctx.socket(zmq.PUB) +sock.bind("tcp://127.0.0.1:5556") +time.sleep(0.5) +``` + +### `send_vel(vx, vy, vyaw)` + +Send velocity command to Holosoma. + +```python +def send_vel(vx: float = 0.0, vy: float = 0.0, vyaw: float = 0.0): + sock.send_string(json.dumps({"vel": {"vx": vx, "vy": vy, "vyaw": vyaw}})) +``` + +| Parameter | Unit | Safe range | Effect | +|-----------|------|-----------|--------| +| `vx` | m/s | -0.2 to 0.4 | Forward (+) / Backward (-) | +| `vy` | m/s | -0.2 to 0.2 | Lateral | +| `vyaw` | rad/s | -0.3 to 0.3 | Turn left (+) / right (-) | + +```python +send_vel(vx=0.3) # walk forward +send_vel(vx=-0.2) # walk backward +send_vel(vyaw=0.3) # turn left +send_vel(vyaw=-0.3) # turn right +send_vel(0, 0, 0) # zero velocity (use gradual_stop() instead) +``` + +### `gradual_stop()` + +Smooth deceleration to zero over ~1 second. + +```python +def gradual_stop(): + for _ in range(STOP_ITERATIONS): # 20 iterations + send_vel(0.0, 0.0, 0.0) + time.sleep(STOP_DELAY) # 0.05s each = 1s total +``` + +**Always use this instead of a single zero-velocity message.** ZMQ PUB/SUB can drop messages — 20 guarantees delivery. + +### `send_cmd(cmd)` + +```python +def send_cmd(cmd: str): + sock.send_string(json.dumps({"cmd": cmd})) +``` + +| Command | Effect | +|---------|--------| +| `"start"` | Activate policy | +| `"walk"` | Switch to walking mode | +| `"stand"` | Return to standing | +| `"stop"` | Deactivate (only after gradual_stop) | + +**Startup sequence:** +```python +send_cmd("start"); time.sleep(0.5) +send_cmd("walk"); time.sleep(0.5) +# Now ready for velocity commands +``` + +--- + +## 3. Camera Functions + +### Architecture + +Two consumers share one camera feed: +- `latest_frame_b64[0]` — base64 JPEG for LLaVA +- `_raw_frame[0]` — raw BGR numpy array for YOLO + +Both protected by separate locks (`camera_lock`, `_raw_lock`). + +### `camera_loop()` + +Background thread — auto-reconnects on USB drops. + +```python +def camera_loop(): + while camera_alive[0]: + pipeline = rs.pipeline() + cfg.enable_stream(rs.stream.color, 424, 240, rs.format.bgr8, 15) + pipeline.start(cfg) + while camera_alive[0]: + frames = pipeline.wait_for_frames(timeout_ms=3000) + frame = np.asanyarray(...) + with _raw_lock: + _raw_frame[0] = frame.copy() # → YOLO + with camera_lock: + latest_frame_b64[0] = encode_jpeg(frame) # → LLaVA +``` + +### `get_frame()` + +Returns latest base64 JPEG for LLaVA. + +```python +def get_frame(): + with camera_lock: + return latest_frame_b64[0] # None if not ready +``` + +**Camera specs:** + +| Property | Value | +|----------|-------| +| Device | RealSense D435I (serial: 243622073459) | +| Capture | 424×240 @ 15fps | +| Format | BGR8 | +| Encoding | JPEG quality 70, base64 UTF-8 | +| Why 424×240 | Reduces USB bandwidth drops during Ollama GPU inference | + +--- + +## 4. YOLO Vision Module + +### Import (in marcus_llava.py) + +```python +from marcus_yolo import ( + start_yolo, + yolo_sees, yolo_count, yolo_closest, + yolo_summary, yolo_ppe_violations, + yolo_person_too_close, yolo_all_classes, yolo_fps, +) + +# Start YOLO sharing the camera frame +YOLO_AVAILABLE = start_yolo(raw_frame_ref=_raw_frame, frame_lock=_raw_lock) +``` + +### `start_yolo(raw_frame_ref, frame_lock)` + +Loads YOLO model and starts inference background thread. + +```python +def start_yolo(raw_frame_ref=None, frame_lock=None) -> bool: +``` + +Returns `True` on success, `False` if model fails to load. + +### `yolo_sees(class_name, min_confidence)` + +```python +yolo_sees("person") # True if person detected +yolo_sees("chair", 0.6) # True with stricter confidence +``` + +Returns `bool`. Instant — no LLaVA call. + +### `yolo_count(class_name)` + +```python +n = yolo_count("person") # 0, 1, 2... +``` + +### `yolo_closest(class_name)` + +Returns the `Detection` object with the largest bounding box (closest to robot). + +```python +p = yolo_closest("person") +if p: + print(p.position) # "left" / "center" / "right" + print(p.distance_estimate) # "very close" / "close" / "medium" / "far" + print(p.confidence) # 0.0 to 1.0 + print(p.size_ratio) # fraction of frame area +``` + +### `yolo_summary()` + +```python +yolo_summary() +# → "1 person (center, close) | 2 chairs (right, medium) | 1 laptop (left, far)" +``` + +### `yolo_ppe_violations()` + +```python +violations = yolo_ppe_violations() +# → ["no helmet (left)", "no vest (center)"] +# Requires custom PPE model — returns [] with yolov8m.pt +``` + +### `yolo_person_too_close(threshold)` + +```python +if yolo_person_too_close(threshold=0.25): + gradual_stop() # person covers >25% of frame +``` + +### `yolo_all_classes()` + +```python +classes = yolo_all_classes() +# → {"person", "chair", "laptop"} +``` + +### `yolo_fps()` + +```python +print(f"{yolo_fps():.1f}fps") # e.g. 4.4fps on CPU +``` + +### Detection class properties + +| Property | Type | Description | +|----------|------|-------------| +| `class_name` | str | e.g. "person" | +| `confidence` | float | 0.0 to 1.0 | +| `position` | str | "left" / "center" / "right" | +| `distance_estimate` | str | "very close" / "close" / "medium" / "far" | +| `size_ratio` | float | bbox area / frame area | +| `cx`, `cy` | int | bbox center coordinates | +| `x1, y1, x2, y2` | int | bounding box corners | + +--- + +## 5. LLaVA AI Functions + +### `ask(command, img_b64)` + +Main command processor. + +```python +def ask(command: str, img_b64) -> dict: +``` + +| Parameter | Description | +|-----------|-------------| +| `command` | Natural language command | +| `img_b64` | Base64 JPEG camera frame | + +Returns dict with `actions`, `arm`, `speak`, `abort`. + +**Options:** +```python +options={"temperature": 0.0, "num_predict": 200} +``` + +**Response time:** 4-8s (14s first call warmup) + +### `ask_goal(goal, img_b64)` + +Used in goal navigation loop. + +```python +def ask_goal(goal: str, img_b64) -> dict: +``` + +Returns: `reached` (bool), `next_move` (str), `duration` (float), `speak` (str) + +### `ask_patrol(img_b64)` + +Used in autonomous patrol. + +Returns: `observation` (str), `alert` (str|None), `next_move` (str), `duration` (float) + +### `_call_llava(prompt, img_b64, num_predict)` + +Internal helper — sends to Ollama API. + +```python +r = ollama.chat( + model="llava:7b", + messages=[{"role": "user", "content": prompt, "images": [img_b64]}], + options={"temperature": 0.0, "num_predict": 200} +) +``` + +### `_parse_json(raw)` + +Extracts JSON from LLaVA response. Strips markdown fences automatically. + +```python +raw = '```json\n{"move": "left"}\n```' +d = _parse_json(raw) # → {"move": "left"} +``` + +--- + +## 6. Arm SDK + +**Class:** `G1ArmActionClient` (from `unitree_sdk2py.g1.arm.g1_arm_action_client`) +**Method:** `ExecuteAction(action_id: int) -> int` (returns 0 on success) + +### `do_arm(action)` + +```python +def do_arm(action): # action: str name or int ID +``` + +### Action ID Map + +| Friendly name | Action ID | Description | +|---------------|-----------|-------------| +| `wave` | 26 | High wave | +| `raise_right` | 23 | Right hand up | +| `raise_left` | 15 | Both hands up | +| `both_up` | 15 | Both hands up | +| `clap` | 17 | Clap hands | +| `high_five` | 18 | High five | +| `hug` | 19 | Hug pose | +| `heart` | 20 | Heart shape | +| `right_heart` | 21 | Right hand heart | +| `reject` | 22 | Reject gesture | +| `shake_hand` | 27 | Shake hand | +| `face_wave` | 25 | Wave at face level | +| `lower` | 99 | Release to default | + +### Notes + +- Runs in background thread — does not block movement +- Error 7404 = robot was moving during arm command — always `gradual_stop()` first +- `ALL_ARM_NAMES` set intercepts arm words that LLaVA puts in `actions` list + +--- + +## 7. Movement Functions + +### `execute_action(move, duration)` + +Executes a single movement step. + +```python +def execute_action(move: str, duration: float): +``` + +- Intercepts arm names → routes to `do_arm()` +- Calls `gradual_stop()` after each step +- Waits `STEP_PAUSE` (0.3s) between steps + +### `_merge_actions(actions)` + +Merges consecutive same-direction steps into one smooth movement. + +```python +# LLaVA returns: +[{"move":"right","duration":1.0}, {"move":"right","duration":1.0}, + {"move":"right","duration":1.0}, {"move":"right","duration":1.0}, + {"move":"right","duration":1.0}] + +# _merge_actions produces: +[{"move":"right","duration":5.0}] # one smooth 5-second rotation +``` + +### `execute(d)` + +Runs full LLaVA decision. + +```python +def execute(d: dict): + # 1. Check abort + # 2. _merge_actions() — smooth consecutive steps + # 3. execute_action() for each step in order + # 4. do_arm() in background thread +``` + +### `_move_step(move, duration)` + +Lightweight step for goal/patrol loops — no full `gradual_stop()` between checks. + +```python +def _move_step(move: str, duration: float): + # send velocity for duration seconds + # single zero-vel + 0.1s pause — then immediately check YOLO again +``` + +### MOVE_MAP + +```python +MOVE_MAP = { + "forward": ( 0.3, 0.0, 0.0), # vx m/s + "backward": (-0.2, 0.0, 0.0), + "left": ( 0.0, 0.0, 0.3), # vyaw rad/s + "right": ( 0.0, 0.0, -0.3), +} +``` + +--- + +## 8. Prompt Engineering + +### MAIN_PROMPT + +Controls LLaVA's response format for all standard commands. + +Key rules embedded in prompt: +- `actions` is a list — one entry per step +- `arm` is never a move value +- `"90 degrees"` = 5.0s duration +- `"1 step"` = 1.0s duration + +**To add arm examples or change behavior — edit MAIN_PROMPT examples section.** + +### GOAL_PROMPT + +Used inside `navigate_to_goal()` as LLaVA fallback. +Forces `{"reached": bool, "next_move": str, "duration": float, "speak": str}`. + +### PATROL_PROMPT + +Used inside `patrol()` for scene assessment. +Forces `{"observation": str, "alert": str|null, "next_move": str, "duration": float}`. + +--- + +## 9. Goal Navigation + +### `navigate_to_goal(goal, max_steps)` + +```python +def navigate_to_goal(goal: str, max_steps: int = 40): +``` + +**Flow:** +1. Extract YOLO target from goal text (`_goal_yolo_target()`) +2. Move left 0.4s (lightweight step) +3. After `MIN_STEPS_BEFORE_CHECK` (3) steps — check YOLO every step +4. If `yolo_sees(target)` → `gradual_stop()` → print result → return +5. Falls back to LLaVA if class not in YOLO set + +**Why minimum steps?** Prevents false stop from stale camera frame when robot hasn't moved yet. + +### YOLO class aliases in goals + +```python +_GOAL_ALIASES = { + "guy": "person", "man": "person", "woman": "person", + "human": "person", "people": "person", "someone": "person", + "table": "dining table", "sofa": "couch", +} +``` + +### Examples + +```python +navigate_to_goal("stop when you see a person") +navigate_to_goal("keep turning left until you see a guy") +navigate_to_goal("find a chair and stop in front of it") +navigate_to_goal("stop when you are close to the laptop") +navigate_to_goal("stop at the end of the corridor") # LLaVA fallback +``` + +--- + +## 10. Autonomous Patrol + +### `patrol(duration_minutes, alert_callback)` + +```python +def patrol(duration_minutes: float = 5.0, alert_callback=None): +``` + +**Each patrol step:** +1. YOLO PPE violations check (instant) +2. `yolo_person_too_close()` safety check — pauses if True +3. LLaVA scene assessment → navigation decision +4. `_move_step()` to next position + +**Custom alert handler:** +```python +def my_alert(text: str): + print(f"SECURITY: {text}") + # send notification, sound alarm, etc. + +patrol(duration_minutes=10.0, alert_callback=my_alert) +``` + +--- + +## 11. Main Loop + +```python +while True: + cmd = input("Command: ").strip() + + if cmd.lower() in ("q", "quit", "exit"): + break + + # YOLO query — never sent to LLaVA + if any(w in cmd.lower() for w in ("yolo", "are you using yolo", "vision")): + print(f" YOLO: {yolo_summary()} | {yolo_fps():.1f}fps") + continue + + # Goal navigation + if cmd.lower().startswith("goal:"): + navigate_to_goal(cmd[5:].strip()) + continue + + # Patrol + if cmd.lower() == "patrol": + patrol(duration_minutes=...) + continue + + # Standard LLaVA command + img = get_frame() + d = ask(cmd, img) + execute(d) +``` + +--- + +## 12. JSON Schema Reference + +### Standard command response + +```json +{ + "actions": [ + {"move": "forward|backward|left|right|stop", "duration": 2.0}, + {"move": "right", "duration": 2.0} + ], + "arm": "wave|raise_right|raise_left|clap|high_five|hug|heart|shake_hand|face_wave|null", + "speak": "What Marcus says out loud", + "abort": null +} +``` + +### Goal navigation response + +```json +{ + "reached": false, + "next_move": "left", + "duration": 0.5, + "speak": "I see boxes but no person yet" +} +``` + +### Patrol assessment response + +```json +{ + "observation": "I see a person working at a desk", + "alert": null, + "next_move": "forward", + "duration": 1.0 +} +``` + +### Field definitions + +| Field | Type | Values | +|-------|------|--------| +| `move` | str\|null | "forward", "backward", "left", "right", "stop", null | +| `duration` | float | seconds (max 5.0 per step) | +| `arm` | str\|null | action name or null | +| `speak` | str | one sentence | +| `abort` | str\|null | reason string or null | +| `reached` | bool | true only if goal visually confirmed | + +--- + +## 13. Environment & Paths + +### Conda environments + +| Env | Python | Location | Purpose | +|-----|--------|----------|---------| +| `marcus` | 3.8 | `/home/unitree/miniconda3/envs/marcus` | Marcus brain + YOLO | +| `hsinference` | 3.10 | `~/.holosoma_deps/miniconda3/envs/hsinference` | Holosoma policy | + +**Always use full path:** +```bash +/home/unitree/miniconda3/envs/marcus/bin/python3 ~/Models_marcus/marcus_llava.py +``` + +### Key file paths + +| File | Path | +|------|------| +| Marcus brain | `~/Models_marcus/marcus_llava.py` | +| YOLO module | `~/Models_marcus/marcus_yolo.py` | +| YOLO model | `~/Models_marcus/Model/yolov8m.pt` | +| Loco model | `~/holosoma/.../models/loco/g1_29dof/fastsac_g1_29dof.onnx` | +| LLaVA weights | `~/.ollama/models/` | +| Arm SDK | `~/unitree_sdk2_python/` | + +### Python imports + +```python +import ollama # LLaVA via Ollama +import zmq # Holosoma communication +import json, time, base64, threading, sys, io +import numpy as np +import pyrealsense2 as rs +from PIL import Image +from marcus_yolo import start_yolo, yolo_sees, yolo_summary # YOLO +from unitree_sdk2py.g1.arm.g1_arm_action_client import G1ArmActionClient # Arm +``` + +--- + +## 14. Quick Reference Card + +``` +STARTUP: + Tab 1: source ~/.holosoma_deps/miniconda3/bin/activate hsinference + cd ~/holosoma && sudo jetson_clocks + python3 run_policy.py inference:g1-29dof-loco \ + --task.velocity-input zmq --task.state-input zmq --task.interface eth0 + + Tab 2: ollama serve & + /home/unitree/miniconda3/envs/marcus/bin/python3 ~/Models_marcus/marcus_llava.py + (YOLO starts automatically — no Tab 3 needed) + +COMMANDS: + walk forward · turn right · turn left · move back + turn right 90 degrees · turn left 3 steps + what do you see · inspect the office + wave · raise your right arm · clap · high five + goal: stop when you see a person + goal: keep turning left until you see a guy + patrol + are you using yolo + q + +VELOCITIES: + forward vx=+0.3 m/s backward vx=-0.2 m/s + left vyaw=+0.3 right vyaw=-0.3 + +KEY FUNCTIONS: + send_vel(vx, vy, vyaw) gradual_stop() send_cmd(str) + get_frame() → b64 ask(cmd, img) → dict execute(dict) + yolo_sees("person") yolo_summary() yolo_closest("person") + navigate_to_goal(goal) patrol(minutes) do_arm("wave") + +ARM IDs: + wave=26 raise_right=23 raise_left=15 clap=17 + high_five=18 hug=19 heart=20 reject=22 shake_hand=27 + +SAFETY: + gradual_stop() — always — never cut velocity abruptly + Never send_cmd("stop") while moving + camera_alive[0] = False — stops camera thread on exit + Error 7404 — robot was moving during arm command — stop first +``` + +--- + +*Marcus — YS Lootah Technology | Kassam | April 2026* diff --git a/Doc/MARCUS_progress.pdf b/Doc/MARCUS_progress.pdf new file mode 100644 index 0000000..b495b60 Binary files /dev/null and b/Doc/MARCUS_progress.pdf differ diff --git a/Doc/Marcus_Project.pdf b/Doc/Marcus_Project.pdf new file mode 100644 index 0000000..bf6a656 Binary files /dev/null and b/Doc/Marcus_Project.pdf differ diff --git a/Doc/architecture.md b/Doc/architecture.md new file mode 100644 index 0000000..e2258ab --- /dev/null +++ b/Doc/architecture.md @@ -0,0 +1,656 @@ +# Marcus — System Architecture + +**Project**: Marcus | YS Lootah Technology +**Hardware**: Unitree G1 EDU Humanoid (29 DOF) + Jetson Orin NX (16GB) +**Updated**: 2026-04-06 + +--- + +## Overview + +Marcus is a fully offline humanoid robot AI system. The brain runs on Jetson Orin NX with no cloud dependencies. It uses vision-language models (Qwen2.5-VL via Ollama) for understanding commands, YOLO for real-time object detection, dead reckoning for position tracking, and persistent memory across sessions. + +Two operating modes: +- **Terminal mode** (`run_marcus.py`) — direct keyboard control on the Jetson +- **Server mode** (`Server/marcus_server.py`) — WebSocket server allowing remote CLI or GUI clients + +Both modes use the **same brain** — identical command processing, same YOLO, same memory, same movement control. + +--- + +## Project Structure + +``` +Marcus/ +├── run_marcus.py # Entrypoint — terminal mode +├── .env # Machine-specific: PROJECT_BASE, PROJECT_NAME +│ +├── Core/ # Foundation layer — no external deps +│ ├── env_loader.py # Reads .env, resolves PROJECT_ROOT +│ ├── config_loader.py # load_config(name) → reads Config/config_{name}.json +│ ├── Logger.py # Logging engine (file-based, no console output) +│ └── logger.py # Project wrapper: log(), log_and_print(), get_logger() +│ +├── Config/ # ALL configuration — one JSON per module +│ ├── config_ZMQ.json # ZMQ host, port, stop params +│ ├── config_Camera.json # RealSense resolution, fps, quality +│ ├── config_Brain.json # Ollama model, prompts, num_predict values +│ ├── config_Vision.json # YOLO model path, confidence, tracked classes +│ ├── config_Navigation.json # move_map, goal aliases, YOLO goal classes +│ ├── config_Patrol.json # patrol duration, proximity threshold +│ ├── config_Arm.json # arm actions, aliases, availability flag +│ ├── config_Odometry.json # speeds, tolerances, ROS2 topic +│ ├── config_Memory.json # session/places paths +│ ├── config_Network.json # Jetson IPs (eth0/wlan0), ports +│ ├── config_ImageSearch.json # search defaults +│ └── marcus_prompts.yaml # All LLaVA/Qwen prompts (main, goal, patrol, talk, verify) +│ +├── API/ # Interface layer — one file per subsystem +│ ├── zmq_api.py # ZMQ PUB socket: send_vel(), gradual_stop(), send_cmd() +│ ├── camera_api.py # RealSense thread: start/stop_camera(), get_frame() +│ ├── llava_api.py # LLaVA queries: call_llava(), ask(), ask_goal(), ask_patrol() +│ ├── yolo_api.py # YOLO interface: init_yolo(), yolo_sees(), yolo_summary()... +│ ├── odometry_api.py # Odometry wrapper: init_odometry(), get_position() +│ ├── memory_api.py # Memory wrapper: init_memory(), log_cmd(), place_save/goto() +│ ├── arm_api.py # Arm gestures: do_arm(), ARM_ACTIONS, ALL_ARM_NAMES +│ └── imgsearch_api.py # Image search wrapper: init_imgsearch(), get_searcher() +│ +├── Brain/ # Decision logic — imports ONLY from API/ +│ ├── marcus_brain.py # Orchestrator: init_brain(), process_command(), run_terminal() +│ ├── command_parser.py # 14 regex patterns + try_local_command() dispatcher +│ ├── executor.py # execute_action(), merge_actions(), execute() +│ └── marcus_memory.py # Session + place memory (Memory class, 817 lines) +│ +├── Navigation/ # Movement + position tracking +│ ├── goal_nav.py # navigate_to_goal() — YOLO+LLaVA hybrid visual search +│ ├── patrol.py # patrol() — autonomous HSE patrol with PPE detection +│ └── marcus_odometry.py # Odometry class — dead reckoning + ROS2 fallback +│ +├── Vision/ # Computer vision +│ ├── marcus_yolo.py # YOLO background inference: Detection class + query API +│ └── marcus_imgsearch.py # ImageSearch class — reference image comparison +│ +├── Server/ # WebSocket server (runs on Jetson) +│ └── marcus_server.py # Full brain over WebSocket — same as run_marcus.py +│ +├── Client/ # Remote clients (run on workstation) +│ ├── marcus_cli.py # Terminal CLI client with color output +│ └── marcus_client.py # Tkinter GUI client (3 tabs: Nav/Camera/LiDAR) +│ +├── Bridge/ # ROS2 integration +│ └── ros2_zmq_bridge.py # ROS2 /cmd_vel → ZMQ velocity bridge +│ +├── Autonomous/ # Autonomous exploration mode +│ └── marcus_autonomous.py # AutonomousMode — office exploration + mapping +│ +├── Models/ # AI model weights +│ ├── yolov8m.pt # YOLOv8 medium (50MB) +│ └── Modelfile # Ollama model definition (FROM qwen2.5vl:7b) +│ +├── Data/ # Runtime-generated data ONLY (no code) +│ ├── Brain/Sessions/ # session_{id}_{date}/ — commands, detections, alerts +│ ├── Brain/Exploration/ # Autonomous mode map data +│ ├── History/Places/ # places.json — persistent named locations +│ ├── History/Sessions/ # Session history +│ ├── History/Prompts/ # Prompt history +│ ├── Navigation/Maps/ # SLAM occupancy grids +│ ├── Navigation/Waypoints/ # Saved waypoint files +│ ├── Vision/Camera/ # Captured camera frames +│ ├── Vision/Videos/ # Recorded video clips +│ └── Vision/Frames/ # Detection snapshots +│ +├── Doc/ # Documentation +│ ├── architecture.md # This file +│ ├── controlling.md # Startup guide + command reference +│ ├── MARCUS_API.md # API reference +│ └── note.txt # Quick notes +│ +├── logs/ # Runtime logs (one per module) +│ ├── brain.log +│ ├── camera.log +│ ├── server.log +│ ├── zmq.log +│ └── main.log +│ +└── Legacy/ # Archived originals + └── marcus_nav.py # Original standalone prototype +``` + +--- + +## Layer Architecture + +``` +┌─────────────────────────────────────────────────┐ +│ Entrypoints │ +│ run_marcus.py (terminal) │ +│ Server/marcus_server.py (WebSocket) │ +└──────────────────┬──────────────────────────────┘ + │ +┌──────────────────▼──────────────────────────────┐ +│ Brain Layer │ +│ marcus_brain.py — init_brain() │ +│ — process_command(cmd) │ +│ command_parser.py — 14 regex local commands │ +│ executor.py — execute LLaVA decisions │ +│ marcus_memory.py — session + place memory │ +└──────────────────┬──────────────────────────────┘ + │ imports only from API/ +┌──────────────────▼──────────────────────────────┐ +│ API Layer │ +│ zmq_api camera_api llava_api │ +│ yolo_api odometry_api memory_api │ +│ arm_api imgsearch_api │ +└──────────────────┬──────────────────────────────┘ + │ wraps +┌──────────────────▼──────────────────────────────┐ +│ Navigation / Vision │ +│ goal_nav.py marcus_yolo.py │ +│ patrol.py marcus_imgsearch.py │ +│ marcus_odometry.py │ +└──────────────────┬──────────────────────────────┘ + │ +┌──────────────────▼──────────────────────────────┐ +│ Core Layer │ +│ env_loader.py config_loader.py │ +│ Logger.py logger.py │ +└──────────────────┬──────────────────────────────┘ + │ reads +┌──────────────────▼──────────────────────────────┐ +│ Config / .env │ +│ 11 JSON files + marcus_prompts.yaml │ +└─────────────────────────────────────────────────┘ +``` + +**Rule**: Brain never imports from Vision/ or Navigation/ directly. It goes through the API layer. + +--- + +## File-by-File Documentation + +### Core/ + +#### `env_loader.py` (34 lines) +Reads `.env` from the project root to resolve `PROJECT_ROOT`. Uses a minimal built-in parser (no `python-dotenv` dependency). Exports `PROJECT_ROOT` as a `Path` object resolved from `__file__`, so it works regardless of where the script is called from. Fallback default: `/home/unitree`. + +#### `config_loader.py` (30 lines) +`load_config(name)` reads `Config/config_{name}.json` and caches the result. All modules call this instead of hardcoding constants. Also provides `config_path(relative)` to resolve relative paths (e.g., `"Models/yolov8m.pt"`) to absolute paths from PROJECT_ROOT. + +#### `Logger.py` (186 lines) +Full logging engine from AI_Photographer. File-based only (no console output by default). Creates per-module log files in `logs/`. Handles write permission fallbacks, log name normalization, and corrupt log recovery. + +#### `logger.py` (51 lines) +Project wrapper around `Logger.py`. Provides: +- `log(message, level, module)` — write to `logs/{module}.log` +- `log_and_print(message, level, module)` — write + print +- `get_logger(module)` — get configured Logs instance + +--- + +### API/ + +Each API file wraps one subsystem. They read their own config via `load_config()`, handle import errors gracefully with fallback stubs, and export clean public functions. + +#### `zmq_api.py` (49 lines) +Creates a ZMQ PUB socket on startup (binds to `tcp://127.0.0.1:{zmq_port}`). Holosoma's RL policy connects to this socket as SUB and receives velocity commands at 50Hz. + +**Exports:** +- `send_vel(vx, vy, vyaw)` — send velocity to Holosoma +- `gradual_stop()` — 20 zero-velocity messages over 1 second +- `send_cmd(cmd)` — send state command: "start", "walk", "stand", "stop" +- `get_socket()` — return the shared PUB socket (for odometry to reuse) +- `MOVE_MAP` — direction-to-velocity lookup: `{"forward": (0.3, 0, 0), "left": (0, 0, 0.3), ...}` + +**Config:** `config_ZMQ.json` — host, port, stop_iterations, stop_delay, step_pause + +#### `camera_api.py` (111 lines) +Background thread captures RealSense D435I frames continuously. Stores both raw BGR (for YOLO) and base64 JPEG (for LLaVA). Auto-reconnects on USB drops with exponential backoff (2s → 4s → 8s, max 10s). + +**Exports:** +- `start_camera()` — starts thread, returns `(raw_frame_ref, raw_lock)` for YOLO +- `stop_camera()` — stops the thread +- `get_frame()` — returns latest base64 JPEG (or last known good frame) +- `get_frame_age()` — seconds since last successful frame +- `get_raw_refs()` — returns shared numpy frame + lock for YOLO + +**Config:** `config_Camera.json` — width (424), height (240), fps (15), jpeg_quality (70) + +#### `llava_api.py` (107 lines) +Interface to Ollama's vision-language model (Qwen2.5-VL 3B). Manages conversation history (6-turn sliding window) and user-told facts for context injection. + +**Exports:** +- `call_llava(prompt, img_b64, num_predict, use_history)` — raw LLM call +- `ask(command, img_b64)` — send command + image, get structured JSON response +- `ask_goal(goal, img_b64)` — check if goal reached during navigation +- `ask_patrol(img_b64)` — assess scene during autonomous patrol +- `parse_json(raw)` — extract JSON from LLM output +- `add_to_history(user_msg, assistant_msg)` — add to conversation context +- `remember_fact(fact)` — store persistent fact (e.g., "Kassam is the programmer") +- `OLLAMA_MODEL` — current model name from config + +**Config:** `config_Brain.json` — ollama_model, max_history, num_predict values, prompts + +#### `yolo_api.py` (66 lines) +Lazy-loads YOLO from `Vision/marcus_yolo.py`. If import fails, all functions return safe defaults (empty sets, False, 0). No crash on missing dependencies. + +**Exports:** +- `init_yolo(raw_frame_ref, frame_lock)` — start background inference +- `yolo_sees(class_name)` — is class currently detected? +- `yolo_count(class_name)` — how many instances? +- `yolo_closest(class_name)` — nearest Detection object +- `yolo_summary()` — human-readable summary: "2 persons (left, close) | 1 chair" +- `yolo_ppe_violations()` — list of PPE violations +- `yolo_person_too_close(threshold)` — safety proximity check +- `yolo_all_classes()` — set of all currently detected classes +- `yolo_fps()` — current inference rate +- `YOLO_AVAILABLE` — True if YOLO loaded successfully + +#### `odometry_api.py` (40 lines) +Wraps `Navigation/marcus_odometry.py`. Passes the shared ZMQ socket to avoid port conflicts. + +**Exports:** +- `init_odometry(zmq_sock)` — start tracking, returns success bool +- `get_position()` — returns `{"x": float, "y": float, "heading": float, "source": str}` +- `odom` — the Odometry instance (or None) +- `ODOM_AVAILABLE` — True if running + +#### `memory_api.py` (109 lines) +Wraps `Brain/marcus_memory.py`. Also contains place memory functions that combine memory + odometry. + +**Exports:** +- `init_memory()` — start session, load places +- `log_cmd(cmd, response, duration)` — log command to session +- `log_detection(class_name, position, distance)` — log YOLO detection with odometry position +- `place_save(name)` — save current position as named place +- `place_goto(name)` — navigate to saved place using odometry +- `places_list_str()` — formatted table of all saved places +- `mem` — Memory instance (or None) +- `MEMORY_AVAILABLE` — True if running + +#### `arm_api.py` (16 lines) +Stub for GR00T N1.5 arm control. Currently prints a message. ARM_ACTIONS and ARM_ALIASES loaded from `config_Arm.json`. + +**Exports:** +- `do_arm(action)` — execute arm gesture (currently stub) +- `ARM_ACTIONS` — dict of action name → action ID +- `ARM_ALIASES` — dict of common names → action ID +- `ALL_ARM_NAMES` — set of all recognized arm command names +- `ARM_AVAILABLE` — False (pending GR00T integration) + +#### `imgsearch_api.py` (38 lines) +Wraps `Vision/marcus_imgsearch.py`. Wires camera, ZMQ, LLaVA, and YOLO into the ImageSearch class. + +**Exports:** +- `init_imgsearch(get_frame_fn, send_vel_fn, ...)` — wire dependencies +- `get_searcher()` — return ImageSearch instance (or None) + +--- + +### Brain/ + +#### `marcus_brain.py` (372 lines) +**The orchestrator.** Contains all the brain's public functions used by both terminal and server modes. + +**Key functions:** +- `init_brain()` — initializes all subsystems in order: camera → YOLO → odometry → memory → image search → Holosoma boot → LLaVA warmup +- `process_command(cmd) → dict` — routes a command through the full pipeline and returns `{"type", "speak", "action", "elapsed"}`. Pipeline order: + 1. YOLO status check + 2. Image search (`search/`) + 3. Natural language goal auto-detect ("find a person", "look for a bottle") + 4. Explicit goal (`goal/ ...`) + 5. Patrol (`patrol`) + 6. Local commands (place memory, odometry, help) via `command_parser.py` + 7. Talk-only questions (what/who/where/how) + 8. Greetings (hi/hello/salam) — instant, no AI + 9. "Come to me" shortcut — instant forward 2s + 10. Multi-step compound ("turn right then walk forward") + 11. Standard LLaVA command — full AI inference +- `run_terminal()` — terminal input loop (used by `run_marcus.py`) +- `get_brain_status()` — returns dict of all subsystem states +- `shutdown()` — clean stop of all subsystems + +#### `command_parser.py` (300 lines) +14 compiled regex patterns that intercept commands before they reach LLaVA. Handles: + +| Pattern | Example | Action | +|---------|---------|--------| +| `_RE_REMEMBER` | "remember this as door" | Save current position | +| `_RE_GOTO` | "go to door" | Navigate to saved place | +| `_RE_FORGET` | "forget door" | Delete saved place | +| `_RE_RENAME` | "rename door to entrance" | Rename place | +| `_RE_WALK_DIST` | "walk 1 meter" | Precise odometry walk | +| `_RE_WALK_BACK` | "walk backward 2 meters" | Precise backward walk | +| `_RE_TURN_DEG` | "turn right 90 degrees" | Precise odometry turn | +| `_RE_PATROL_RT` | "patrol: door → desk → exit" | Named waypoint patrol | +| `_RE_LAST_CMD` | "last command" | Recall from session | +| `_RE_DO_AGAIN` | "do that again" | Repeat last command | +| `_RE_UNDO` | "undo" | Reverse last movement | +| `_RE_LAST_SESS` | "last session" | Previous session summary | +| `_RE_WHERE` | "where am I" | Current odometry position | +| `_RE_GO_HOME` | "go home" | Return to start position | + +Also handles: session summary, help text, examples text. + +#### `executor.py` (81 lines) +Executes LLaVA movement decisions. Converts the JSON action list into sustained ZMQ velocity commands. + +**Functions:** +- `execute_action(move, duration)` — single movement step. Uses `MOVE_MAP` for velocities, intercepts arm names that LLaVA sometimes puts in the actions list +- `move_step(move, duration)` — lightweight version for goal/patrol loops (no full gradual_stop between steps) +- `merge_actions(actions)` — combines consecutive same-direction steps: 5x right 1.0s → 1x right 5.0s +- `execute(d)` — full decision execution: movements in sequence, arm gesture in background thread + +#### `marcus_memory.py` (817 lines) +Persistent session and place memory. Thread-safe with atomic JSON writes. + +**Place memory:** +- Save named positions with odometry coordinates +- Fuzzy name matching (typo tolerance) +- Name sanitization (special chars → underscores) +- Rename, delete, list operations + +**Session memory:** +- Per-session folders: `session_{id}_{date}/` +- Logs: commands.json, detections.json, alerts.json, summary.txt +- 60-second auto-flush in background thread +- Emergency save via `atexit` on crash +- YOLO detection deduplication (5-second window) +- Cross-session recall ("what did you do last session?") +- Auto-prune old sessions (keeps last 50) + +--- + +### Navigation/ + +#### `goal_nav.py` (154 lines) +Visual goal navigation. Robot rotates continuously while scanning for a target using YOLO (fast, 0.4s checks) with LLaVA fallback (slow but handles non-YOLO classes). + +**How it works:** +1. Parse goal to extract YOLO target class (via aliases: "guy" → "person", "sofa" → "couch") +2. Start continuous rotation in background thread +3. YOLO fast-check every 0.4s — if target class found: + - Extract compound condition ("holding a phone", "wearing red") + - If compound: ask LLaVA to verify ("Is the person holding a phone? yes/no") + - If verified (or no compound): stop and report +4. LLaVA fallback for non-YOLO classes: send goal_prompt with image, check if `reached: true` +5. Max steps limit (40 default), Ctrl+C to abort + +**Config:** `config_Navigation.json` — goal_aliases, yolo_goal_classes, max_steps, rotation_speed + +#### `patrol.py` (106 lines) +Autonomous HSE inspection patrol. Timed loop with YOLO PPE detection and LLaVA scene assessment. + +**How it works:** +1. YOLO checks for PPE violations (no helmet, no vest) and logs alerts +2. Safety: stop if person too close (size_ratio > 0.3) +3. LLaVA assesses scene: observation, alert, next_move, duration +4. Executes lightweight movement steps between checks +5. All detections and alerts logged to session memory + +**Config:** `config_Patrol.json` — default_duration_minutes, proximity_threshold + +#### `marcus_odometry.py` (808 lines) +Precise position tracking and movement control. + +**Dual source** (priority order): +1. ROS2 `/dog_odom` — joint encoder data, ±2cm accuracy (currently disabled due to DDS memory conflict) +2. Dead reckoning — velocity × time integration at 20Hz, ±10cm accuracy + +**Movement API:** +- `walk_distance(meters, speed, direction)` — odometry feedback loop, 5cm tolerance, safety timeout +- `turn_degrees(degrees, speed)` — heading feedback with 0°/360° wrap-around, 2° tolerance +- `navigate_to(x, y, heading)` — rotate to face target, walk straight, rotate to final heading +- `return_to_start()` — navigate back to where `start()` was called +- `patrol_route(waypoints, loop)` — walk through list of waypoints in order + +All movements have time-based fallbacks when odometry isn't running. Speed clamped at 0.4 m/s. KeyboardInterrupt handling with gradual stop. + +--- + +### Vision/ + +#### `marcus_yolo.py` (474 lines) +Background YOLO inference engine. Runs in a daemon thread, reads from the shared camera frame buffer. + +**Detection class:** Each detection has class_name, confidence, bbox, position (left/center/right), distance_estimate (very close/close/medium/far), size_ratio. + +**Public API:** +- `start_yolo(raw_frame_ref, frame_lock)` — start inference thread +- `yolo_sees(class_name, min_confidence)` — check if class detected +- `yolo_count(class_name)` — count instances +- `yolo_closest(class_name)` — largest bbox (closest object) +- `yolo_summary()` — "2 persons (left, close) | 1 chair (center, medium)" +- `yolo_ppe_violations()` — PPE-specific detections +- `yolo_person_too_close(threshold)` — safety proximity check + +**Config:** `config_Vision.json` — model path, confidence (0.45), 19 tracked COCO classes + +#### `marcus_imgsearch.py` (501 lines) +Image-guided search. User provides a reference photo; robot rotates and LLaVA compares camera frames to the reference. + +**How it works:** +1. Load reference image (resize to 336x336 for efficiency) +2. Start continuous rotation +3. Optional YOLO pre-filter (find "person" class before running LLaVA) +4. LLaVA comparison: sends [reference, current_frame] as two images +5. Parse JSON response: found, confidence (low/medium/high), position, description +6. Stop on medium/high confidence match + +Supports text-only search (no reference image) using hint description. + +--- + +### Server/ + +#### `marcus_server.py` (224 lines) +WebSocket server that wraps the full Marcus brain. Initializes all subsystems (camera, YOLO, odometry, memory, LLaVA) on startup, then accepts commands via WebSocket. + +**Architecture:** +- Calls `init_brain()` from `marcus_brain.py` — same init as terminal mode +- Each incoming `"command"` message runs `process_command(cmd)` in a thread pool +- Broadcasts camera frames to all clients at ~10Hz +- Auto-detects eth0 and wlan0 IPs for the connection banner + +**WebSocket message types:** +| Client sends | Server responds | +|---|---| +| `{"type": "command", "command": "turn left"}` | `{"type": "thinking"}` then `{"type": "decision", "action": "LEFT", "speak": "Turning left", ...}` | +| `{"type": "capture"}` | `{"type": "capture_result", "ok": true, "data": ""}` | +| `{"type": "ping"}` | `{"type": "pong", "lidar": true, "status": {...}}` | + +**Config:** `config_Network.json` — jetson_ip, jetson_wlan_ip, websocket_port + +--- + +### Client/ + +#### `marcus_cli.py` (288 lines) +Terminal CLI client for remote control. Connects to the server via WebSocket. + +**Features:** +- Connection menu: choose eth0 / wlan0 / custom IP +- Color-coded output: green=forward, cyan=turn, red=stop, orange=greeting/local +- Displays `Marcus: ` for every response +- System commands: `status`, `camera`, `profile `, `capture`, `help`, `q` +- Async receiver for real-time decision display while typing +- Command history (not persisted) + +#### `marcus_client.py` (1021 lines) +Tkinter GUI client with 3 tabs: +- **Navigation** — live camera view, command entry, quick buttons, decision log +- **Camera** — profile switcher, custom resolution, capture, preview toggle +- **LiDAR** — full SLAM Commander (runs locally via SlamEngineClient from G1_Lootah/Lidar) + +--- + +### Bridge/ + +#### `ros2_zmq_bridge.py` (66 lines) +ROS2 Foxy node that subscribes to `/cmd_vel` (TwistStamped) and `holosoma/other_input` (String), forwarding to the ZMQ PUB socket. Requires Python 3.8 + ROS2 sourced. Used when external ROS2 nodes need to send velocity commands to Holosoma. + +--- + +### Autonomous/ + +#### `marcus_autonomous.py` (516 lines) +Autonomous office exploration mode. Marcus moves freely, identifies areas and objects, builds a live map, saves everything to a session folder. + +**State machine:** IDLE → EXPLORING → IDLE + +**Exploration loop:** +1. Safety: stop if person too close +2. Record YOLO detections + odometry path point +3. Every 5 steps: LLaVA scene assessment (area_type, objects, observation) +4. Move forward; turn when blocked (alternates left/right) +5. Save interesting frames to disk +6. Auto-flush to disk every 20 steps + +**Output:** `Data/Brain/Exploration/map_{id}_{date}/` — observations.json, path.json, summary.txt, frames/ + +--- + +## Data Flow + +### Command: "turn right" + +``` +User types "turn right" + │ + ▼ +process_command("turn right") + │ (no regex match — falls through to LLaVA) + ▼ +llava_api.ask("turn right", camera_frame) + │ sends to Ollama qwen2.5vl:3b + ▼ +LLaVA returns: {"actions":[{"move":"right","duration":2.0}], "speak":"Turning right"} + │ + ▼ +executor.execute(d) + │ merge_actions → execute_action("right", 2.0) + ▼ +zmq_api.send_vel(vyaw=-0.3) × 40 times over 2.0 seconds + │ + ▼ +Holosoma RL policy receives velocity → robot turns right + │ + ▼ +zmq_api.gradual_stop() → 20 zero-velocity messages +``` + +### Command: "remember this as door" + +``` +User types "remember this as door" + │ + ▼ +process_command("remember this as door") + │ matches _RE_REMEMBER regex + ▼ +command_parser.try_local_command() + │ calls memory_api.place_save("door") + ▼ +odometry_api.get_position() → {"x": 1.2, "y": 0.5, "heading": 90.0} + │ + ▼ +marcus_memory.Memory.save_place("door", x=1.2, y=0.5, heading=90.0) + │ atomic write to Data/History/Places/places.json + ▼ +Returns: {"type": "local", "speak": "Done", "action": "LOCAL"} +``` + +### Command: "goal/ find a person" + +``` +User types "goal/ find a person" + │ + ▼ +process_command() → navigate_to_goal("find a person") + │ + ▼ +_goal_yolo_target("find a person") → "person" + │ YOLO mode (not LLaVA fallback) + ▼ +Start continuous rotation thread (vyaw=0.3) + │ + ▼ +Loop every 0.4s: + │ yolo_sees("person") → False → keep rotating + │ yolo_sees("person") → False → keep rotating + │ yolo_sees("person") → True! + │ ▼ + │ _extract_extra_condition() → None (no compound) + │ ▼ + │ gradual_stop() + │ yolo_closest("person") → Detection(center, close) + │ log_detection("person", "center", "close") + ▼ +Returns: {"type": "goal", "speak": "Goal navigation: find a person"} +``` + +--- + +## Hardware Stack + +``` +Unitree G1 EDU (29 DOF) + │ + ├── Jetson Orin NX (16GB VRAM) + │ ├── Holosoma RL policy (50Hz) — locomotion joints 0-11 + │ ├── Ollama + Qwen2.5-VL 3B — vision-language understanding + │ ├── YOLOv8m — real-time object detection (CPU, 320px) + │ └── Marcus Brain — this project + │ + ├── RealSense D435I — RGB camera (424x240 @ 15fps) + │ + ├── Livox Mid360 LiDAR — 3D point cloud (via SlamEngineClient) + │ + └── ZMQ PUB/SUB — velocity commands (tcp://127.0.0.1:5556) + ├── Marcus Brain PUB → Holosoma SUB + └── ROS2 Bridge PUB → Holosoma SUB (alternative) +``` + +--- + +## Startup Order + +1. **Holosoma** — must be running first (RL locomotion policy) +2. **Marcus Server** (`python3 -m Server.marcus_server`) — or Brain (`python3 run_marcus.py`) +3. **Client** (`python3 -m Client.marcus_cli`) — connects to server + +Cannot run Server and Brain simultaneously (both bind ZMQ port 5556). + +--- + +## Config Reference + +| File | Key values | +|------|-----------| +| `config_ZMQ.json` | zmq_host: 127.0.0.1, zmq_port: 5556 | +| `config_Camera.json` | 424x240 @ 15fps, JPEG quality 70 | +| `config_Brain.json` | qwen2.5vl:3b, history 6 turns, prompts | +| `config_Vision.json` | yolov8m.pt, confidence 0.45, 19 classes | +| `config_Navigation.json` | move_map velocities, goal aliases | +| `config_Network.json` | eth0: 192.168.123.164, wlan0: 10.255.254.86, port 8765 | +| `config_Odometry.json` | walk 0.25 m/s, turn 0.25 rad/s, 5cm tolerance | +| `config_Memory.json` | Data/Brain/Sessions, Data/History/Places | +| `config_Patrol.json` | 5 min default, proximity 0.3 | +| `config_Arm.json` | 16 gestures, arm_available: false (GR00T pending) | + +--- + +## Line Count Summary + +| Layer | Files | Lines | +|-------|-------|-------| +| Core | 4 | 301 | +| API | 8 | 536 | +| Brain | 4 | 1,570 | +| Navigation | 3 | 1,068 | +| Vision | 2 | 975 | +| Server | 1 | 224 | +| Client | 2 | 1,309 | +| Bridge | 1 | 66 | +| Autonomous | 1 | 516 | +| Entrypoint | 1 | 16 | +| **Total** | **27** | **6,581** | diff --git a/Doc/controlling.md b/Doc/controlling.md new file mode 100644 index 0000000..a94a181 --- /dev/null +++ b/Doc/controlling.md @@ -0,0 +1,201 @@ +# Marcus — Control & Startup Guide + +**Updated**: 2026-04-06 + +--- + +## Quick Start + +### Prerequisites (Jetson Orin NX) + +```bash +# Terminal 1 — Start Holosoma (locomotion policy) +source ~/.holosoma_deps/miniconda3/bin/activate hsinference +cd ~/holosoma +~/.holosoma_deps/miniconda3/envs/hsinference/bin/python3 \ + src/holosoma_inference/holosoma_inference/run_policy.py \ + inference:g1-29dof-loco \ + --task.model-path src/holosoma_inference/holosoma_inference/models/loco/g1_29dof/fastsac_g1_29dof.onnx \ + --task.velocity-input zmq \ + --task.state-input zmq \ + --task.interface eth0 +``` + +### Option A — Terminal Mode (on Jetson) + +```bash +# Terminal 2 — Start Marcus Brain +conda activate Marcus +ollama serve & sleep 3 +cd ~/Marcus +python3 run_marcus.py +``` + +Direct keyboard control. All commands typed locally. + +### Option B — Server + Client (remote) + +```bash +# Terminal 2 (Jetson) — Start Server +cd ~/Marcus +python3 -m Server.marcus_server + +# Terminal 3 (Workstation) — Connect Client +cd ~/Robotics_workspace/yslootahtech/Project/Marcus +python3 -m Client.marcus_cli +``` + +Client prompts for connection: +``` + Connection options: + 1) eth0 — 192.168.123.164:8765 + 2) wlan0 — 10.255.254.86:8765 + 3) custom + Choose [1/2/3] or IP: +``` + +Or skip prompt: `python3 -m Client.marcus_cli --ip 192.168.123.164 --port 8765` + +--- + +## Command Reference + +### Movement +| Command | Action | +|---------|--------| +| `turn left` / `turn right` | Rotate (2s default) | +| `walk forward` / `move back` | Walk (2s default) | +| `walk 1 meter` | Precise odometry walk | +| `walk backward 2 meters` | Precise backward walk | +| `turn right 90 degrees` | Precise odometry turn | +| `turn right then walk forward` | Multi-step compound | +| `come to me` / `come here` | Forward 2s (instant, no AI) | +| `stop` | Gradual stop | + +### Vision +| Command | Action | +|---------|--------| +| `what do you see` | LLaVA describes camera view | +| `describe the room` | LLaVA scene description | +| `is anyone here` | LLaVA person check | +| `yolo` | Show YOLO detection status | + +### Goal Navigation +| Command | Action | +|---------|--------| +| `goal/ stop when you see a person` | YOLO fast search + stop | +| `goal/ find a laptop` | YOLO + LLaVA search | +| `goal/ stop when you see a guy holding a phone` | YOLO + LLaVA compound verification | +| `find a person` | Auto-detected as goal (no prefix needed) | +| `look for a bottle` | Auto-detected as goal | + +### Place Memory +| Command | Action | +|---------|--------| +| `remember this as door` | Save current position | +| `go to door` | Navigate to saved place | +| `places` | List all saved places | +| `forget door` | Delete place | +| `rename door to entrance` | Rename place | +| `where am I` | Show odometry position | +| `go home` | Return to start position | + +### Patrol +| Command | Action | +|---------|--------| +| `patrol` | Autonomous patrol (prompts for duration) | +| `patrol: door → desk → exit` | Named waypoint patrol | + +### Image Search +| Command | Action | +|---------|--------| +| `search/ /path/to/photo.jpg` | Find target from reference image | +| `search/ /path/to/photo.jpg person in blue shirt` | Image + hint | +| `search/ person in blue shirt` | Text-only search | + +### Session Memory +| Command | Action | +|---------|--------| +| `last command` | Show last typed command | +| `do that again` | Repeat last command | +| `undo` | Reverse last movement | +| `last session` | Previous session summary | +| `session summary` | Current session stats | + +### System +| Command | Action | +|---------|--------| +| `help` | Command reference | +| `example` | Usage examples | +| `q` / `quit` | Shutdown | + +### Client-Only Commands (CLI) +| Command | Action | +|---------|--------| +| `status` | Ping server + LiDAR status | +| `camera` | Get camera configuration | +| `profile low/medium/high/full` | Switch camera profile | +| `capture` | Take a photo | + +--- + +## Network Configuration + +| Interface | IP | Use | +|-----------|-----|------| +| `eth0` | 192.168.123.164 | Robot internal network (Jetson - G1 - LiDAR) | +| `wlan0` | 10.255.254.86 | Office WiFi (Jetson - Workstation) | + +| Service | Port | Protocol | +|---------|------|----------| +| Marcus WebSocket | 8765 | ws:// | +| ZMQ Velocity | 5556 | tcp:// (PUB/SUB) | +| Ollama API | 11434 | HTTP | +| LiDAR | 192.168.123.120 | Livox Mid360 | + +All configurable in `Config/config_Network.json`. + +--- + +## Subsystem Status + +On startup, the server/brain shows: +``` +YOLO : active (19 tracked classes, CPU, yolov8m.pt) +Odometry : active (dead reckoning, +/-10cm) +Memory : active (session_016_2026-04-06) +Camera : 424x240@15 (RealSense D435I) +LiDAR : ALIVE (Livox Mid360 at 192.168.123.120) +Arms : pending (GR00T N1.5 not yet integrated) +``` + +--- + +## Troubleshooting + +| Issue | Cause | Fix | +|-------|-------|-----| +| `ModuleNotFoundError: No module named 'Server'` | Wrong directory | `cd ~/Marcus` then run | +| Robot doesn't move | Holosoma not running | Start Holosoma first (Terminal 1) | +| Robot doesn't move | ZMQ port conflict | Only run one of Server or Brain, not both | +| `Camera: {e} reconnecting` | USB bandwidth | Reduce to `low` profile | +| LLaVA slow (>10s) | GPU VRAM full | Kill other GPU processes, or use `qwen2.5vl:3b` | +| `YOLO not available` | ultralytics not installed | `pip install ultralytics` | +| Client can't connect | Wrong IP or server not running | Check `status` command, verify IP | + +--- + +## File Locations + +| What | Path | +|------|------| +| Brain code | `~/Marcus/Brain/` | +| Server | `~/Marcus/Server/marcus_server.py` | +| Config | `~/Marcus/Config/` | +| Prompts | `~/Marcus/Config/marcus_prompts.yaml` | +| YOLO model | `~/Marcus/Models/yolov8m.pt` | +| Session data | `~/Marcus/Data/Brain/Sessions/` | +| Places | `~/Marcus/Data/History/Places/places.json` | +| Logs | `~/Marcus/logs/` | + +See `Doc/architecture.md` for full project structure and file-by-file documentation. diff --git a/Doc/environment.md b/Doc/environment.md new file mode 100644 index 0000000..248b3dc --- /dev/null +++ b/Doc/environment.md @@ -0,0 +1,380 @@ +# Marcus — Environment & Version Reference + +**Project**: Marcus | YS Lootah Technology +**Hardware**: Unitree G1 EDU Humanoid (29 DOF) + Jetson Orin NX 16 GB +**Deployment host**: `unitree@192.168.123.164` (hostname `ubuntu`) +**Conda env**: `marcus` +**Captured**: 2026-04-12 + +This document is the canonical record of the verified GPU-accelerated software stack running on the Jetson Orin NX. It covers system software, Python environment, Marcus runtime dependencies, installation recipe, verification commands, and known quirks. Pair it with `architecture.md` (what the code does) and `controlling.md` (how to drive it). + +--- + +## 1. Hardware + +| Item | Value | +|---|---| +| Robot | Unitree G1 EDU humanoid, 29 DoF | +| Compute | Jetson Orin NX 16 GB (integrated Ampere GPU, 8.7 capability, tensor cores) | +| Camera | Intel RealSense D435 (424x240 @ 15 fps, BGR8) | +| LiDAR | (optional) loaded via `API/lidar_api.py` + `Lidar/SLAM_worker.py` | +| Network | `eth0` 192.168.123.164 (Holosoma + Marcus), `wlan0` 10.255.254.86 | + +--- + +## 2. System software (Jetson) + +| Layer | Version | Source of truth | +|---|---|---| +| Kernel | `Linux 5.10.104-tegra aarch64` | `uname -a` | +| OS | Ubuntu 20.04.6 LTS | `/etc/os-release` | +| L4T | R35.3.1 (2023-03-19 build, GCID 32827747) | `/etc/nv_tegra_release` | +| JetPack | **5.1.1** (derived from L4T R35.3.1) | `nvidia-l4t-core 35.3.1-20230319081403` | +| CUDA runtime | `11.4.19-1` | `dpkg -l cuda-runtime-11-4` | +| CUDA toolkit (nvcc) | `11.4.315` (Built 2022-10-23) | `nvcc --version` | +| cuDNN | `8.6.0.166-1+cuda11.4` | `dpkg -l libcudnn8` | +| CUDA install paths | `/usr/local/cuda`, `/usr/local/cuda-11`, `/usr/local/cuda-11.4` | `ls /usr/local` | + +--- + +## 3. Python environment + +| Layer | Version | +|---|---| +| Conda | `25.11.1` | +| Active env | `marcus` at `/home/unitree/miniconda3/envs/marcus` | +| Python | `3.8.20` (Jetson stock Python for JetPack 5) | +| pip | `25.0.1` (user site `~/.local/lib/python3.8/site-packages`) | +| `which python3` | `/home/unitree/miniconda3/envs/marcus/bin/python3` | + +Other conda envs on the machine (not used by Marcus): `base`, `gemini`, `gmr`, `marcus_tts`, `saqr`, `teleimager`, `tv`, `twist2`, `unitree_lerobot`, plus the Holosoma-side `hsinference` under `~/.holosoma_deps/miniconda3`. + +Note: Python 3.8 is EOL (Oct 2024). It is retained because JetPack 5 ships it and NVIDIA's pre-built Jetson torch wheels for this generation target cp38. Upgrading requires either JetPack 6 or a from-source torch build. + +--- + +## 4. PyTorch stack (GPU-critical) + +**The torch install must be the NVIDIA Jetson wheel, not the PyPI wheel.** PyPI `torch` for aarch64 is CPU-only; only NVIDIA's builds expose CUDA on Jetson. + +| Item | Expected | Verified | +|---|---|---| +| `torch.__version__` | `2.1.0a0+41361538.nv23.06` | yes | +| `torch.cuda.is_available()` | `True` | yes | +| `torch.version.cuda` | `11.4` | yes | +| `torch.backends.cudnn.version()` | `8600` (= cuDNN 8.6.0) | yes | +| `torch.cuda.get_device_name(0)` | `Orin` | yes | +| `torch.cuda.get_device_capability(0)` | `(8, 7)` (Ampere + tensor cores) | yes | +| `torchvision.__version__` | `0.16.1` (built from source against the Jetson torch) | yes | +| `torchvision.ops.nms(...).device` | `cuda:0` | yes | + +Capability 8.7 gives us FP16 tensor cores — the GPU-path FP16 kwarg in `Vision/marcus_yolo.py` is meaningful here, not placebo. + +--- + +## 5. Ultralytics / YOLO runtime + +| Item | Value | +|---|---| +| `ultralytics` | `8.4.21` | +| Weights | `Models/yolov8m.pt` (~50 MB, auto-fetched if missing) | +| `yolo checks` GPU line | `GPU: Orin, 15389MiB`, `CUDA: 11.4` | +| Marcus config device | `cuda` (hard-required — no CPU fallback) | +| Marcus config half | `true` (FP16) | +| Marcus config imgsz | `320` | +| First inference warmup | ~4–5 s (cuDNN kernel autotune) | +| **Steady-state FPS on Orin** | **~21.9 fps** at imgsz=320 FP16 | + +The 21.9 fps figure is measured via the smoke test in section 11 below. It comfortably exceeds the 15 fps camera stream, so YOLO is no longer the pipeline bottleneck. + +--- + +## 6. Ollama / vision-language model + +| Item | Value | +|---|---| +| Ollama CLI client | `0.20.0` | +| Ollama server | `0.20.0` (`curl http://localhost:11434/api/version`) | +| Python `ollama` package | `0.6.1` (no `__version__` attribute — use `pip show ollama`) | +| Models installed | `qwen2.5vl:3b` (3.2 GB), `llava:7b` (4.7 GB) | +| Marcus-configured model | `qwen2.5vl:3b` (`Config/config_Brain.json`) | +| Resident VRAM when loaded | ~11 GB (includes KV cache + vision projector) | +| Processor placement | **`100% GPU`** per `ollama ps` | + +Headroom note: with Qwen2.5-VL resident (~11 GB) + YOLO (~0.5 GB) + camera buffers + the ZMQ bridge, you have ~4 GB free on the 16 GB Orin NX. Comfortable but not unlimited — if image-search (which sends two images to Qwen at once) ever OOMs, enable quantized KV cache via `OLLAMA_KV_CACHE_TYPE=q8_0`. + +--- + +## 7. Marcus runtime Python dependencies + +Captured from `importlib` on 2026-04-12, `marcus` env on the Jetson. + +| Module | Version | Site | +|---|---|---| +| `numpy` | 1.24.4 | user | +| `cv2` (opencv-python) | 4.13.0 | user | +| `PIL` (Pillow) | 10.4.0 | user | +| `yaml` (PyYAML) | 6.0.3 | user | +| `zmq` (pyzmq) | 27.1.0 | user | +| `websockets` | 13.1 | conda env | +| `pyrealsense2` | 2.55.1.6486 | user | +| `dotenv` | (no `__version__`) | user | +| `ollama` (python client) | 0.6.1 | user | +| `requests` | 2.32.4 | user | +| `ultralytics` | 8.4.21 | user | +| `torch` | 2.1.0a0+41361538.nv23.06 | user | +| `torchvision` | 0.16.1 (egg) | user | +| `matplotlib` | 3.7.5 | user (via ultralytics) | +| `scipy` | 1.10.1 | user (via ultralytics) | +| `psutil` | 7.2.2 | user (via ultralytics) | +| `polars` | 1.8.2 | user (via ultralytics) | +| `ultralytics-thop` | 2.0.18 | user | + +"user" = `~/.local/lib/python3.8/site-packages`. Most Marcus deps live here rather than in the conda env's site-packages because of how JetPack ships system libs with `--user` installs. + +--- + +## 8. Marcus project modules — import status + +All 16 project modules import cleanly from the `marcus` env at `/home/unitree/Marcus`: + +``` +OK Core.config_loader +OK Core.env_loader +OK Vision.marcus_yolo +OK Vision.marcus_imgsearch +OK API.llava_api +OK API.yolo_api +OK API.camera_api +OK API.zmq_api +OK API.imgsearch_api +OK API.odometry_api +OK API.memory_api +OK API.arm_api +OK Navigation.goal_nav +OK Navigation.patrol +OK Navigation.marcus_odometry +OK Brain.marcus_brain +OK Brain.marcus_memory +OK Autonomous.marcus_autonomous +``` + +--- + +## 9. Installation recipe (reproducing this environment) + +Run these steps on a fresh `marcus` conda env, in order. They reproduce the exact stack above. + +### 9.1 Fix the Jetson clock if needed + +Jetsons have no RTC battery; after power cycles the clock may reset to 1970 and break TLS (cert validation fails with "issued certificate not yet valid"). `systemd-timesyncd` may refuse NTP — fall back to manual date: + +```bash +sudo systemctl restart systemd-timesyncd +sudo timedatectl set-ntp true # may fail on Jetson +# fallback: +sudo date -s "YYYY-MM-DD HH:MM:SS" +sudo hwclock --systohc +``` + +### 9.2 Remove CPU-only torch and install the NVIDIA Jetson wheel + +```bash +conda activate marcus +pip uninstall -y torch torchvision torchaudio + +cd ~ +wget https://developer.download.nvidia.com/compute/redist/jp/v512/pytorch/torch-2.1.0a0+41361538.nv23.06-cp38-cp38-linux_aarch64.whl +# (JP 5.1.1 uses the same nv23.06 wheel; v511 returns 404 on this build) + +pip install ~/torch-2.1.0a0+41361538.nv23.06-cp38-cp38-linux_aarch64.whl +``` + +Verify before proceeding: + +```bash +python3 -c "import torch; print(torch.__version__, torch.cuda.is_available(), torch.cuda.get_device_name(0))" +# Expect: 2.1.0a0+41361538.nv23.06 True Orin +``` + +### 9.3 Build torchvision 0.16.1 from source (matches torch 2.1.0) + +```bash +sudo apt install -y libjpeg-dev zlib1g-dev libpython3-dev \ + libavcodec-dev libavformat-dev libswscale-dev + +cd ~ +git clone --branch v0.16.1 https://github.com/pytorch/vision torchvision +cd torchvision +export BUILD_VERSION=0.16.1 +python3 setup.py install --user +``` + +Build takes ~15–25 min on Orin NX. Verify: + +```bash +python3 -c " +import torch, torchvision +x = torch.rand(5, 4).cuda(); s = torch.rand(5).cuda() +print(torchvision.__version__, torchvision.ops.nms(x, s, 0.5).device) +" +# Expect: 0.16.1 cuda:0 +``` + +### 9.4 Ollama server + model + +```bash +ollama serve > /tmp/ollama.log 2>&1 & +sleep 3 +ollama list # confirm qwen2.5vl:3b present +ollama pull qwen2.5vl:3b # if missing (~3 GB) +ollama run qwen2.5vl:3b "hi" # warm model into VRAM +ollama ps # PROCESSOR must say "100% GPU" +``` + +### 9.5 Other deps + +Already present via `pip install --user` from earlier setup — see section 7 for versions. No action needed unless reinstalling from scratch. + +--- + +## 10. Marcus launch sequence + +Full terminal-mode bring-up: + +```bash +# Terminal 1 — Holosoma locomotion policy (runs in hsinference env, not marcus) +source ~/.holosoma_deps/miniconda3/bin/activate hsinference +cd ~/holosoma +~/.holosoma_deps/miniconda3/envs/hsinference/bin/python3 \ + src/holosoma_inference/holosoma_inference/run_policy.py \ + inference:g1-29dof-loco \ + --task.model-path src/holosoma_inference/holosoma_inference/models/loco/g1_29dof/fastsac_g1_29dof.onnx \ + --task.velocity-input zmq --task.state-input zmq --task.interface eth0 + +# Terminal 2 — Ollama server (leave running) +ollama serve > /tmp/ollama.log 2>&1 & + +# Terminal 3 — Marcus brain +conda activate marcus +cd ~/Marcus +python3 run_marcus.py +``` + +Expected Marcus YOLO init line: + +``` +[YOLO] Model loaded ✅ | device: cuda (Orin) | FP16 | 19 tracked classes +``` + +If instead you get `RuntimeError: [YOLO] CUDA not available — torch.cuda.is_available() == False`, the torch install is wrong or was overwritten by a `pip install torch` somewhere — redo section 9.2. + +--- + +## 11. Verification commands (copy/paste) + +Full-stack version check: + +```bash +cat /etc/nv_tegra_release +dpkg -l | grep -E "nvidia-l4t-core|cuda-runtime|libcudnn8" | awk '{print $2, $3}' +nvcc --version | tail -n2 +python3 --version +python3 -c "import torch, torchvision; print('torch', torch.__version__, '| cuda', torch.cuda.is_available(), '| cudnn', torch.backends.cudnn.version(), '| gpu', torch.cuda.get_device_name(0), '| tv', torchvision.__version__)" +python3 -c "import ultralytics; print('ultralytics', ultralytics.__version__)" +ollama --version +curl -s http://localhost:11434/api/version +ollama list +ollama ps +``` + +YOLO warmup + steady-state FPS (the gold-standard GPU smoke test): + +```bash +cd ~/Marcus +python3 - <<'EOF' +import sys, os, time, threading +sys.path.insert(0, os.getcwd()) +os.environ.setdefault("PROJECT_BASE", "/home/unitree") +os.environ.setdefault("PROJECT_NAME", "Marcus") +import numpy as np +import Vision.marcus_yolo as my +from Vision.marcus_yolo import ( + start_yolo, yolo_fps, yolo_is_running, _resolve_device, YOLO_DEVICE +) + +dev, half = _resolve_device(YOLO_DEVICE) +print(f"[resolve] device={dev!r} half={half}") + +raw, lock = [None], threading.Lock() +assert start_yolo(raw_frame_ref=raw, frame_lock=lock) +raw[0] = np.random.randint(0, 255, (240, 424, 3), dtype=np.uint8) + +for i in range(15): + time.sleep(1) + print(f" t={i+1:2d}s fps={yolo_fps():.1f}") + +time.sleep(5) +print(f"[final] fps={yolo_fps():.1f}") +my._yolo_running[0] = False +time.sleep(0.3) +EOF +``` + +GPU live telemetry while Marcus runs: + +```bash +tegrastats --interval 500 | grep -oE "GR3D_FREQ [0-9]+%" +``` + +`nvidia-smi` is absent on Jetson — `tegrastats` is the equivalent. + +--- + +## 12. Known quirks + +1. **No RTC battery** — clock resets to 1970 on every full power cycle. Fix before any `wget`/`pip install` that hits HTTPS. See 9.1. +2. **`ollama` python lib has no `__version__`** — use `pip show ollama` instead of `ollama.__version__`. +3. **`nvidia-smi` not available** — normal on Jetson. Use `tegrastats` and `torch.cuda.*` APIs. +4. **Ollama server "could not connect" warning** on first `ollama list`/`ollama ps` just means the server isn't running yet. Start it with `ollama serve &` before Marcus. +5. **YOLO first inference ~4–5 s** — cuDNN kernel autotune + FP16 conversion on cold start. The first user command after `python3 run_marcus.py` will feel slow; subsequent commands are steady-state. A YOLO warmup pass in `init_brain()` would hide this — open item. +6. **Holosoma and Marcus share ZMQ port 5556** — `run_marcus.py` (terminal) and `Server/marcus_server.py` (websocket) cannot run simultaneously. Pick one. +7. **NVIDIA torch wheel is at `/jp/v512/`** on developer.download.nvidia.com even though this host is JetPack 5.1.1. The `nv23.06` wheel is shared across JP 5.1.x (same CUDA 11.4 + cuDNN 8.6 runtime). `/jp/v511/pytorch/` 404s — use `v512`. +8. **PyPI torch is CPU-only on aarch64** — any `pip install torch` with no wheel argument will silently replace the NVIDIA build with a CPU wheel and break Marcus startup (Marcus is now hard-configured to refuse CPU). If that happens, redo 9.2. + +--- + +## 13. GPU-only policy (enforced in code) + +As of 2026-04-12, `Vision/marcus_yolo.py::_resolve_device` raises `RuntimeError` instead of falling back to CPU when any of: + +- `Config/config_Vision.json` has `yolo_device: "cpu"` +- `torch` is not installed +- `torch.cuda.is_available()` returns False + +`API/yolo_api.py::init_yolo` was also updated to **propagate** that `RuntimeError` (previously it caught `Exception` and silently disabled YOLO, leaving Marcus running blind). The brain crashes at `init_brain()` with a clear message if the GPU is unreachable — preferred over silent degradation on a safety-sensitive robot. + +Config file (`Config/config_Vision.json`): + +```json +{ + "yolo_model_path": "Models/yolov8m.pt", + "yolo_confidence": 0.45, + "yolo_iou": 0.45, + "yolo_device": "cuda", + "yolo_half": true, + "yolo_img_size": 320, + "tracked_classes": [ ... ], + "ppe_violation_classes": [ "no-helmet", "no_helmet", "no-vest", "no_vest" ] +} +``` + +--- + +## 14. Change log + +| Date | Change | +|---|---| +| 2026-04-12 | Initial environment.md — full stack captured, GPU bring-up verified end to end. Steady-state YOLOv8m FPS on Orin NX measured at 21.9. Ollama Qwen2.5-VL verified at 100% GPU. | +| 2026-04-12 | `Vision/marcus_yolo.py` rewired to load `config_Vision.json`, added `_resolve_device()` with hard-fail on missing CUDA (GPU-only policy). `API/yolo_api.py` updated to propagate `RuntimeError`. `Config/config_Vision.json` set `yolo_device=cuda`, `yolo_half=true`. | +| 2026-04-12 | Installed NVIDIA Jetson torch `2.1.0a0+41361538.nv23.06` (replacing CPU-only PyPI `2.4.1`) + built torchvision `0.16.1` from source against it. Verified `nms device = cuda:0`. | diff --git a/Doc/note.txt b/Doc/note.txt new file mode 100644 index 0000000..6d7bf02 --- /dev/null +++ b/Doc/note.txt @@ -0,0 +1,90 @@ + + + + + + + +source ~/.holosoma_deps/miniconda3/bin/activate hsinference + +# 2. Navigate to holosoma +cd ~/holosoma + + + + + + + +~/.holosoma_deps/miniconda3/envs/hsinference/bin/python3 \ + src/holosoma_inference/holosoma_inference/run_policy.py \ + inference:g1-29dof-loco \ + --task.model-path src/holosoma_inference/holosoma_inference/models/loco/g1_29dof/fastsac_g1_29dof.onnx \ + --task.velocity-input zmq \ + --task.state-input zmq \ + --task.interface eth0 + + + + + +./build/bin/llama-quantize \ + ~/Robotics_workspace/yslootahtech/Project/Marcus_fine_tune/marcus-gguf/marcus-q8.gguf \ + ~/Robotics_workspace/yslootahtech/Project/Marcus_fine_tune/marcus-gguf/marcus-robotics-Q4_K_M.gguf \ + Q4_K_M + +# Delete Q8 after quantize succeeds +rm ~/Robotics_workspace/yslootahtech/Project/Marcus_fine_tune/marcus-gguf/marcus-q8.gguf + + + + +https://ingrid789.github.io/SkillMimic/ +https://github.com/wyhuai/SkillMimic + +https://vla-survey.github.io/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/Legacy/marcus_nav.py b/Legacy/marcus_nav.py new file mode 100644 index 0000000..35bfe03 --- /dev/null +++ b/Legacy/marcus_nav.py @@ -0,0 +1,93 @@ +import ollama, base64, json, time +import pyrealsense2 as rs +import numpy as np, cv2 +import zmq + +HOLOSOMA_IP = "127.0.0.1" +HOLOSOMA_PORT = 5556 + +def capture_frame(): + pipeline = rs.pipeline() + cfg = rs.config() + cfg.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30) + pipeline.start(cfg) + for _ in range(5): + pipeline.wait_for_frames() + frames = pipeline.wait_for_frames() + img = np.asanyarray(frames.get_color_frame().get_data()) + pipeline.stop() + cv2.imwrite('/tmp/marcus_eye.jpg', img) + return '/tmp/marcus_eye.jpg' + +def ask_qwen(image_path, command): + with open(image_path, 'rb') as f: + img_b64 = base64.b64encode(f.read()).decode() + prompt = f"""أنت ماركس، روبوت ذكي يتنقل داخل المبنى. +You are Marcus, an intelligent indoor navigation robot. + +User command: "{command}" + +Look at the camera image. Respond with ONLY one line: +FORWARD [0.1 to 1.0 meters] +LEFT [5 to 45 degrees] +RIGHT [5 to 45 degrees] +STOP [reason] +ARRIVED""" + + response = ollama.chat( + model='qwen2.5vl:7b', + messages=[{ + 'role': 'user', + 'content': prompt, + 'images': [img_b64] + }] + ) + return response['message']['content'].strip().split('\n')[0] + +def send_to_robot(action): + print(f" Robot action: {action}") + parts = action.upper().split() + if not parts: + return + cmd = parts[0] + val = float(parts[1]) if len(parts) > 1 else 0 + + if cmd == "FORWARD": + print(f" Walking forward {val}m") + elif cmd == "LEFT": + print(f" Turning left {val} degrees") + elif cmd == "RIGHT": + print(f" Turning right {val} degrees") + elif cmd == "STOP": + print(f" Stopping: {' '.join(parts[1:])}") + elif cmd == "ARRIVED": + print(" Destination reached!") + +if __name__ == "__main__": + print("=" * 50) + print("Marcus Navigation Brain") + print("Powered by Qwen2.5-VL on Jetson Orin NX") + print("Speaks Arabic + English") + print("=" * 50) + print("Type your command (or 'quit'):\n") + + while True: + try: + cmd = input("Command: ").strip() + if cmd.lower() in ['quit', 'exit', 'خروج']: + print("Marcus shutting down.") + break + if not cmd: + continue + print("Capturing camera frame...") + frame = capture_frame() + print("Qwen2.5-VL thinking...") + t0 = time.time() + action = ask_qwen(frame, cmd) + elapsed = time.time() - t0 + print(f"Decision ({elapsed:.1f}s): {action}") + send_to_robot(action) + print() + except KeyboardInterrupt: + print("\nStopped.") + break diff --git a/Lidar/SLAM_Config.json b/Lidar/SLAM_Config.json new file mode 100644 index 0000000..6f0ce03 --- /dev/null +++ b/Lidar/SLAM_Config.json @@ -0,0 +1,385 @@ +{ + "app": { + "title": "Unitree G1 - SLAM Commander (PROD)", + "maps_dir": "DataMap" + }, + + "network": { + "default_interface": "enp3s0", + "default_host_ip": "192.168.123.222" + }, + + "livox": { + "config_file": "mid360_config.json" + }, + + "livox_debug": { + "enabled": false, + "print_every_n_frames": 20 + }, + + "slam": { + "max_range": 50.0, + "slam_voxel_size": 0.12, + + "pre_downsample_stride": 1, + + "keyframe": { + "enabled": false, + "min_translation_m": 0.10, + "min_rotation_deg": 5.0 + } + }, + + "filter": { + "voxel_size": 0.20, + "hits_threshold": 4, + + "window_sec": 1.0, + "strict_sec": 3.0, + "use_strict": false, + + "persistence": { + "decay_seconds": 3.0, + "max_voxels": 2000000 + } + }, + + "map": { + "display_voxel": 0.08, + "save_voxel": 0.05, + "min_points_to_save": 550, + "save_extension": ".ply" + }, + + "localization": { + "enabled": true, + + "period_sec": 2.0, + "live_period_sec": 0.55, + "min_new_points": 200, + "min_points_for_localize": 550, + + "voxel_localize": 0.20, + "max_corr_mult": 6.0, + "icp_max_iter": 40, + "use_point_to_plane": false, + "allow_point_to_plane_unsafe": false, + + "accept_fitness": 0.30, + "accept_rmse": 0.30, + + "tracking_window_enabled": true, + "tracking_window_radius_m": 8.0, + "tracking_window_min_ref_points": 450, + "tracking_window_max_ref_points": 140000, + + "global_reloc_anchor_voxel_m": 1.4, + "global_reloc_max_anchors": 24, + "global_reloc_yaw_step_deg": 60.0, + "global_reloc_corr_mult": 1.9, + "global_reloc_coarse_iter": 8, + "global_reloc_refine_iter": 20, + "global_reloc_min_corr": 25, + + "bidirectional_check_enabled": true, + "bidirectional_min_ratio": 0.22, + "bidirectional_max_rmse": 0.55, + + "approx_guess_ttl_sec": 60.0, + "approx_guess_crop_radius_m": 9.0, + "approx_guess_local_radius_m": 4.5, + "approx_guess_strict_local": true, + "approx_guess_global_fallback_after_failures": 10, + "approx_guess_yaw_step_deg": 30.0, + "approx_guess_max_start_offset_m": 6.0, + "approx_guess_bootstrap_min_hits": 2, + "approx_guess_bootstrap_min_fitness": 0.34, + "approx_guess_bootstrap_min_bidir": 0.24, + "live_track_strict": true, + "live_track_min_fitness": 0.46, + "live_track_min_bidir": 0.32, + "live_track_max_step_translation_m": 0.16, + "live_track_max_step_rotation_deg": 10.0, + "live_stack_enabled": true, + "live_stack_frames": 8, + "live_stack_voxel_m": 0.12, + "live_stack_max_points": 22000, + "live_stack_min_frames": 2, + "approx_guess_default_z_m": 0.0, + + "ref_display_voxel": 0.30 + }, + + "localization_visualization": { + "enabled": true, + "period_sec": 0.35, + "voxel_m": 0.24, + "max_points": 28000, + "local_radius_m": 12.0, + "match_dist_m": 0.30, + "near_dist_m": 0.60, + "confirm_frames": 3, + "decay_per_frame": 1 + }, + + "place_recognition": { + "enabled": true, + "backend": "geometric", + "models_dir": "model", + "apply_profiles": ["LOCALIZE_MAP", "LIVE_NAV_MAP"], + "anchor_voxel_m": 1.6, + "submap_radius_m": 4.8, + "min_points_per_anchor": 180, + "max_anchors": 140, + "candidate_top_k": 10, + "descriptor_voxel_m": 0.18, + "radial_bins": 8, + "angular_bins": 16, + "height_bins": 6, + "guess_bias_radius_m": 9.0, + "guess_score_weight": 0.18 + }, + + "loop_closure": { + "enabled": false, + "keyframe_every_n_frames": 30, + "min_keyframe_translation_m": 0.35, + "min_keyframe_rotation_deg": 8.0, + "loop_search_radius_m": 2.5, + "min_loop_frame_gap": 35, + "downsample_voxel_m": 0.22, + "icp_max_corr_m": 1.5, + "icp_max_iter": 35, + "accept_fitness": 0.28, + "accept_rmse": 0.40, + "optimize_every_n_keyframes": 10, + "max_keyframes": 500, + "max_correction_translation_m": 2.0, + "max_correction_rotation_deg": 20.0 + }, + + "mapping_guard": { + "enabled": true, + "max_frame_translation_m": 0.18, + "max_frame_rotation_deg": 12.0, + "reference_dt_sec": 0.06 + }, + + "mapping_lock": { + "enabled": true, + "period_sec": 1.0, + "voxel_m": 0.20, + "max_corr_m": 0.70, + "min_stable_points": 450, + "accept_fitness": 0.30, + "accept_rmse": 0.28, + "max_step_translation_m": 0.12, + "max_step_rotation_deg": 6.0, + "apply_damping": true, + "apply_alpha_translation": 0.42, + "apply_alpha_rotation": 0.50, + "apply_max_translation_m": 0.08, + "apply_max_rotation_deg": 5.0, + "window_radius_m": 8.0, + "min_live_points": 140, + "icp_max_iter": 12, + "block_on_reject": false, + "retry_after_reject_sec": 0.20, + "block_on_severe_reject": true, + "severe_fit": 0.10, + "severe_rmse": 0.55, + "severe_trans_m": 0.30, + "severe_rot_deg": 14.0 + }, + + "rotate_guard": { + "enabled": true, + "in_place_max_trans_m": 0.12, + "in_place_min_rot_deg": 4.0, + "pose_guard_max_rot_deg": 34.0, + "skip_continuity": true, + "freeze_mapping_while_rotating": true, + "freeze_min_stable_points": 220, + "map_lock_period_sec": 0.25, + "map_lock_accept_fitness": 0.30, + "map_lock_accept_rmse": 0.30, + "map_lock_max_step_rot_deg": 10.0, + "map_lock_apply_alpha_translation": 0.22, + "map_lock_apply_alpha_rotation": 0.35, + "map_lock_apply_max_translation_m": 0.03, + "map_lock_apply_max_rotation_deg": 3.0, + "block_on_reject": false, + "block_on_severe": true + }, + + "continuity_guard": { + "enabled": true, + "voxel_m": 0.24, + "min_points": 120, + "match_radius_m": 0.45, + "min_inlier_ratio": 0.25, + "max_median_m": 0.40, + "max_p90_m": 0.85, + "recover_after_rejects": 10 + }, + + "map_cleanup": { + "enabled": true, + "period_sec": 6.0, + "voxel_m": 0.16, + "cluster_radius_m": 0.44, + "min_cluster_points": 100, + "keep_largest_n": 2, + "strict_largest_only": false, + "min_total_points": 2500 + }, + + "autosave": { + "enabled": false, + "interval_sec": 90.0, + "base_name": "autosave_map" + }, + + "map_quality": { + "enabled": true, + "near_min_range_m": 0.20, + "ray_consistency_enabled": true, + "ray_bin_deg": 1.5, + "ray_elev_bin_deg": 4.0, + "ray_max_behind_m": 0.20, + "ray_keep_n": 1, + "world_z_clip_enabled": false, + "world_z_min_m": -2.0, + "world_z_max_m": 3.0, + "outlier_filter_enabled": false, + "outlier_voxel_m": 0.12, + "outlier_min_points": 2, + "body_exclusion_enabled": true, + "body_x_min_m": -0.20, + "body_x_max_m": 0.35, + "body_y_min_m": -0.25, + "body_y_max_m": 0.25, + "body_z_min_m": -1.30, + "body_z_max_m": 0.15 + }, + + "submap_mapping": { + "enabled": true, + "local_window_frames": 18, + "local_voxel_m": 0.08, + "global_voxel_m": 0.14, + "merge_period_sec": 0.8, + "merge_min_translation_m": 0.25, + "merge_min_rotation_deg": 6.0, + "max_global_points": 350000, + "display_max_points": 250000, + "apply_profiles": ["LOCALIZE_MAP", "LIVE_NAV_MAP"], + "checkpoint_enabled": true, + "checkpoint_interval_sec": 60.0 + }, + + "state_machine": { + "enabled": true, + "window_size": 12, + "degraded_fitness": 0.20, + "lost_fitness": 0.10, + "degraded_rmse": 0.45, + "lost_rmse": 0.80, + "max_bad_before_lost": 4, + "min_good_to_recover": 3, + "recovery_cooldown_sec": 2.0 + }, + + "session_memory": { + "enabled": true, + "filename": "SLAM_session_memory.json", + "max_entries": 200 + }, + + "navigation_export": { + "enabled": true, + "min_points": 550, + "resolution_m": 0.05, + "z_min_m": 0.05, + "z_max_m": 1.40, + "inflation_radius_m": 0.20, + "padding_m": 0.40 + }, + + "navigation_runtime": { + "enabled": true, + "resolution_m": 0.10, + "z_min_m": 0.05, + "z_max_m": 1.40, + "padding_m": 0.80, + "inflation_radius_m": 0.25, + "dynamic_decay_sec": 1.5, + "dynamic_min_hits": 2, + "blocked_cost": 220, + "plan_period_sec": 0.35 + }, + + "local_planner": { + "lookahead_m": 0.8, + "max_linear_mps": 0.6, + "max_angular_rps": 1.3, + "goal_tolerance_m": 0.30, + "collision_probe_m": 0.6 + }, + + "mission": { + "waypoint_tolerance_m": 0.55 + }, + + "safety": { + "enabled": true, + "stop_radius_m": 0.50, + "stale_localization_sec": 1.5, + "emergency_hold_sec": 0.8 + }, + + "replay": { + "max_record_frames": 6000 + }, + + "fusion": { + "enabled": true, + "max_age_sec": 0.15, + "max_timestamp_skew_sec": 0.30, + "lidar_weight": 1.0, + "imu_weight": 0.25, + "wheel_weight": 0.0, + "vision_weight": 0.0, + "use_translation": false, + "use_rotation": true, + "imu_complementary_alpha": 0.97 + }, + + "runtime": { + "publish_hz": 12.0, + + "queue": { + "frame_keep_latest": true, + "frame_queue_maxsize": 1, + "status_queue_maxsize": 50, + "cmd_queue_maxsize": 50 + } + }, + + "gui": { + "render": { + "max_draw_points": 250000, + "decimate_step": 3 + }, + + "view": { + "distance": 40, + "grid_size": 80 + }, + + "colors": { + "accent": "#00ff88" + } + } +} diff --git a/Lidar/SLAM_Diagnostics.py b/Lidar/SLAM_Diagnostics.py new file mode 100644 index 0000000..9f0c005 --- /dev/null +++ b/Lidar/SLAM_Diagnostics.py @@ -0,0 +1,81 @@ +from __future__ import annotations + +import faulthandler +import json +import time +import traceback +from pathlib import Path +from typing import Any, Dict, Optional + + +def _safe_jsonable(value: Any) -> Any: + if value is None or isinstance(value, (str, int, float, bool)): + return value + if isinstance(value, dict): + return {str(k): _safe_jsonable(v) for k, v in value.items()} + if isinstance(value, (list, tuple)): + return [_safe_jsonable(v) for v in value] + try: + import numpy as np + + if isinstance(value, np.ndarray): + if value.ndim == 0: + return float(value) + return { + "shape": list(value.shape), + "dtype": str(value.dtype), + } + if isinstance(value, (np.floating,)): + return float(value) + if isinstance(value, (np.integer,)): + return int(value) + if isinstance(value, (np.bool_,)): + return bool(value) + except Exception: + pass + return str(value) + + +class WorkerDiagnostics: + def __init__(self, log_path: Path): + self.log_path = Path(log_path) + self.log_path.parent.mkdir(parents=True, exist_ok=True) + self._fh = open(self.log_path, "a", encoding="utf-8") + self.note("worker start") + try: + faulthandler.enable(file=self._fh, all_threads=True) + except Exception: + pass + + def note(self, message: str) -> None: + ts = time.strftime("%Y-%m-%d %H:%M:%S") + self._fh.write(f"\n[{ts}] {message}\n") + self._fh.flush() + + def log_state(self, label: str, state: Optional[Dict[str, Any]]) -> None: + payload = _safe_jsonable(state or {}) + self.note(f"{label}: {json.dumps(payload, ensure_ascii=True, sort_keys=True)}") + + def log_exception(self, label: str, exc: BaseException, state: Optional[Dict[str, Any]] = None) -> None: + self.note(f"{label}: {type(exc).__name__}: {exc}") + if state: + self.log_state("state", state) + self._fh.write(traceback.format_exc()) + if not traceback.format_exc().endswith("\n"): + self._fh.write("\n") + self._fh.flush() + + def close(self) -> None: + try: + if faulthandler.is_enabled(): + faulthandler.disable() + except Exception: + pass + try: + self._fh.flush() + self._fh.close() + except Exception: + pass + + def __del__(self) -> None: + self.close() diff --git a/Lidar/SLAM_Filter.py b/Lidar/SLAM_Filter.py new file mode 100644 index 0000000..912d2f8 --- /dev/null +++ b/Lidar/SLAM_Filter.py @@ -0,0 +1,392 @@ +# SLAM_Filter.py +from __future__ import annotations + +import json +import os +import time +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Dict +import numpy as np + + +def load_slam_config() -> dict: + import json, os + from pathlib import Path + + cfg_path = os.environ.get("SLAM_CONFIG", "").strip() + p = Path(cfg_path) if cfg_path else (Path(__file__).resolve().parent / "SLAM_Config.json") + + if not p.exists(): + raise FileNotFoundError(f"Missing config file: {p}") + + text = p.read_text(encoding="utf-8").strip() + if not text: + raise RuntimeError("SLAM_Config.json is empty.") + + return json.loads(text) + + + +@dataclass +class FilterConfig: + voxel_size: float + hit_threshold: int + decay_seconds: float + max_voxels: int + + @staticmethod + def from_config_file() -> "FilterConfig": + cfg = load_slam_config() + return FilterConfig( + voxel_size=float(cfg["filter"]["voxel_size"]), + hit_threshold=int(cfg["filter"]["hits_threshold"]), + decay_seconds=float(cfg["filter"]["persistence"]["decay_seconds"]), + max_voxels=int(cfg["filter"]["persistence"]["max_voxels"]), + ) + + +class VoxelPersistenceFilter: + """ + Voxel hit-count persistence filter. + No hardcoded params: everything comes from SLAM_Config.jsonl. + """ + + def __init__(self, cfg: FilterConfig | None = None): + self.cfg = cfg if cfg is not None else FilterConfig.from_config_file() + self._count: dict[tuple[int, int, int], int] = {} + self._last: dict[tuple[int, int, int], float] = {} + self._sum: dict[tuple[int, int, int], np.ndarray] = {} + self._n: dict[tuple[int, int, int], int] = {} + + def reset(self): + self._count.clear() + self._last.clear() + self._sum.clear() + self._n.clear() + + def _voxel_keys(self, pts: np.ndarray) -> np.ndarray: + vs = float(self.cfg.voxel_size) + return np.floor(pts / vs).astype(np.int32) + + def update(self, points_world: np.ndarray, now: float | None = None) -> None: + if points_world is None or len(points_world) == 0: + return + + now = time.time() if now is None else float(now) + keys = self._voxel_keys(points_world) + uniq, inv = np.unique(keys, axis=0, return_inverse=True) + + for i, k in enumerate(uniq): + k_t = (int(k[0]), int(k[1]), int(k[2])) + idx = np.where(inv == i)[0] + mean = points_world[idx].mean(axis=0) + + self._count[k_t] = self._count.get(k_t, 0) + 1 + self._last[k_t] = now + + if k_t not in self._sum: + self._sum[k_t] = mean.copy() + self._n[k_t] = 1 + else: + self._sum[k_t] += mean + self._n[k_t] += 1 + + self._decay(now) + + if len(self._count) > int(self.cfg.max_voxels): + self._aggressive_prune() + + def _decay(self, now: float) -> None: + ttl = float(self.cfg.decay_seconds) + to_del = [] + for k, t_last in self._last.items(): + if (now - t_last) > ttl: + to_del.append(k) + for k in to_del: + self._count.pop(k, None) + self._last.pop(k, None) + self._sum.pop(k, None) + self._n.pop(k, None) + + def _aggressive_prune(self) -> None: + items = sorted(self._count.items(), key=lambda kv: kv[1], reverse=True) + keep_n = int(int(self.cfg.max_voxels) * 0.8) + keep = set(k for k, _ in items[:keep_n]) + + for k in list(self._count.keys()): + if k not in keep: + self._count.pop(k, None) + self._last.pop(k, None) + self._sum.pop(k, None) + self._n.pop(k, None) + + def get_stable_points(self) -> np.ndarray: + thr = int(self.cfg.hit_threshold) + pts = [] + for k, c in self._count.items(): + if c >= thr: + s = self._sum.get(k) + n = self._n.get(k, 1) + if s is not None: + pts.append(s / max(n, 1)) + if not pts: + return np.zeros((0, 3), dtype=np.float32) + return np.asarray(pts, dtype=np.float32) + + def seed_stable_points( + self, + points_world: np.ndarray, + now: float | None = None, + hit_count: int | None = None, + clear_existing: bool = True, + ) -> None: + """ + Seed filter state from already-stable world-frame points. + Useful after global pose correction (loop-closure optimization). + """ + if points_world is None or len(points_world) == 0: + if clear_existing: + self.reset() + return + if clear_existing: + self.reset() + + now_v = time.time() if now is None else float(now) + hc = int(hit_count if hit_count is not None else self.cfg.hit_threshold) + hc = max(1, hc) + + pts = np.asarray(points_world, dtype=np.float32) + keys = self._voxel_keys(pts) + uniq, inv = np.unique(keys, axis=0, return_inverse=True) + + for i, k in enumerate(uniq): + k_t = (int(k[0]), int(k[1]), int(k[2])) + idx = np.where(inv == i)[0] + mean = pts[idx].mean(axis=0) + + self._count[k_t] = max(self._count.get(k_t, 0), hc) + self._last[k_t] = now_v + self._sum[k_t] = mean.copy() + self._n[k_t] = 1 + + self._decay(now_v) + if len(self._count) > int(self.cfg.max_voxels): + self._aggressive_prune() + + def stats(self) -> dict: + thr = int(self.cfg.hit_threshold) + stable = sum(1 for c in self._count.values() if c >= thr) + return { + "voxels_total": len(self._count), + "voxels_stable": stable, + "hit_threshold": thr, + "voxel_size": float(self.cfg.voxel_size), + "decay_seconds": float(self.cfg.decay_seconds), + "max_voxels": int(self.cfg.max_voxels), + } + + +@dataclass +class IndoorMapQualityConfig: + enabled: bool = True + near_min_range_m: float = 0.15 + ray_consistency_enabled: bool = True + ray_bin_deg: float = 1.0 + ray_elev_bin_deg: float = 4.0 + ray_max_behind_m: float = 0.25 + ray_keep_n: int = 1 + world_z_clip_enabled: bool = False + world_z_min_m: float = -2.0 + world_z_max_m: float = 3.0 + outlier_filter_enabled: bool = False + outlier_voxel_m: float = 0.12 + outlier_min_points: int = 2 + # Robot body exclusion box (sensor frame) — removes self-returns from robot body. + # Tuned for Unitree G1 Edu with chest-mounted Livox MID-360. + body_exclusion_enabled: bool = False + body_x_min_m: float = -0.20 + body_x_max_m: float = 0.35 + body_y_min_m: float = -0.25 + body_y_max_m: float = 0.25 + body_z_min_m: float = -1.30 + body_z_max_m: float = 0.15 + + @staticmethod + def from_dict(d: Dict[str, Any] | None) -> "IndoorMapQualityConfig": + src = d or {} + return IndoorMapQualityConfig( + enabled=bool(src.get("enabled", True)), + near_min_range_m=max(0.0, float(src.get("near_min_range_m", 0.15))), + ray_consistency_enabled=bool(src.get("ray_consistency_enabled", True)), + ray_bin_deg=max(0.2, float(src.get("ray_bin_deg", 1.0))), + ray_elev_bin_deg=max(0.0, float(src.get("ray_elev_bin_deg", 4.0))), + ray_max_behind_m=max(0.0, float(src.get("ray_max_behind_m", 0.25))), + ray_keep_n=max(1, int(src.get("ray_keep_n", 1))), + world_z_clip_enabled=bool(src.get("world_z_clip_enabled", False)), + world_z_min_m=float(src.get("world_z_min_m", -2.0)), + world_z_max_m=float(src.get("world_z_max_m", 3.0)), + outlier_filter_enabled=bool(src.get("outlier_filter_enabled", False)), + outlier_voxel_m=max(0.02, float(src.get("outlier_voxel_m", 0.12))), + outlier_min_points=max(1, int(src.get("outlier_min_points", 2))), + body_exclusion_enabled=bool(src.get("body_exclusion_enabled", False)), + body_x_min_m=float(src.get("body_x_min_m", -0.20)), + body_x_max_m=float(src.get("body_x_max_m", 0.35)), + body_y_min_m=float(src.get("body_y_min_m", -0.25)), + body_y_max_m=float(src.get("body_y_max_m", 0.25)), + body_z_min_m=float(src.get("body_z_min_m", -1.30)), + body_z_max_m=float(src.get("body_z_max_m", 0.15)), + ) + + +class IndoorMapQualityFilter: + """ + Lightweight indoor map-quality filters intended for real-time use. + """ + + def __init__(self, cfg: IndoorMapQualityConfig): + self.cfg = cfg + + @staticmethod + def _finite(points: np.ndarray) -> np.ndarray: + if points is None or len(points) == 0: + return np.zeros((0, 3), dtype=np.float32) + pts = np.asarray(points, dtype=np.float32) + m = np.isfinite(pts).all(axis=1) + return pts[m] + + def _remove_sparse_voxels(self, points: np.ndarray) -> np.ndarray: + if points is None or len(points) == 0: + return np.zeros((0, 3), dtype=np.float32) + voxel = float(self.cfg.outlier_voxel_m) + keys = np.floor(points / voxel).astype(np.int32) + uniq, inv, counts = np.unique(keys, axis=0, return_inverse=True, return_counts=True) + del uniq # only counts/inv are needed + keep = counts[inv] >= int(self.cfg.outlier_min_points) + return points[keep] + + def _ray_consistency_filter(self, points: np.ndarray) -> np.ndarray: + """ + Keep nearest returns per azimuth bin (+small behind margin) to suppress + echoes and duplicated points that appear behind wall lines. + Uses a vectorized fast path for keep_n=1 to keep CPU usage low. + """ + if points is None or len(points) == 0: + return np.zeros((0, 3), dtype=np.float32) + if not self.cfg.ray_consistency_enabled: + return points + + pts = np.asarray(points, dtype=np.float32) + if len(pts) < 80: + return pts + + xy = pts[:, :2] + r = np.linalg.norm(xy, axis=1) + valid = np.isfinite(r) & (r > 1e-4) + if not np.all(valid): + pts = pts[valid] + r = r[valid] + if len(pts) < 40: + return pts + + bin_rad = np.deg2rad(float(self.cfg.ray_bin_deg)) + az = np.arctan2(pts[:, 1], pts[:, 0]) + bins_az = np.floor((az + np.pi) / max(1e-6, bin_rad)).astype(np.int32) + elev_bin_deg = float(self.cfg.ray_elev_bin_deg) + if elev_bin_deg > 0.0: + elev_rad = np.deg2rad(max(0.2, elev_bin_deg)) + elev = np.arctan2(pts[:, 2], np.maximum(r, 1e-4)) + bins_el = np.floor((elev + 0.5 * np.pi) / elev_rad).astype(np.int32) + n_az = int(np.max(bins_az)) + 1 if len(bins_az) > 0 else 1 + bins = bins_az + (np.maximum(0, bins_el) * max(1, n_az)) + else: + bins = bins_az + margin = float(self.cfg.ray_max_behind_m) + keep_n = int(self.cfg.ray_keep_n) + n_bins = int(np.max(bins)) + 1 if len(bins) > 0 else 0 + if n_bins <= 0: + return pts + + if keep_n <= 1: + # Fast vectorized mode: nearest hit per bin + margin. + min_r = np.full((n_bins,), np.inf, dtype=np.float32) + np.minimum.at(min_r, bins, r.astype(np.float32, copy=False)) + keep = r <= (min_r[bins] + margin) + else: + # Fallback for multi-return mode (rare; higher CPU). + order = np.lexsort((r, bins)) + bins_s = bins[order] + r_s = r[order] + keep_sorted = np.zeros((len(order),), dtype=bool) + i = 0 + n = int(len(order)) + while i < n: + j = i + 1 + b = int(bins_s[i]) + while j < n and int(bins_s[j]) == b: + j += 1 + + rr = r_s[i:j] + if len(rr) <= keep_n: + keep_sorted[i:j] = True + else: + base = float(rr[min(len(rr) - 1, keep_n - 1)]) + cutoff = base + margin + local = rr <= cutoff + local[:keep_n] = True + keep_sorted[i:j] = local + i = j + keep = np.zeros((len(pts),), dtype=bool) + keep[order] = keep_sorted + + out = pts[keep] + # Safety fallback: if filter is too aggressive, keep original frame. + if len(out) < max(40, int(0.15 * len(pts))): + return pts + return out + + def _exclude_body_box(self, points: np.ndarray) -> np.ndarray: + """Remove points inside the robot body bounding box (sensor frame). + Prevents G1 arm/torso self-returns from polluting the map. + """ + inside = ( + (points[:, 0] >= self.cfg.body_x_min_m) & (points[:, 0] <= self.cfg.body_x_max_m) & + (points[:, 1] >= self.cfg.body_y_min_m) & (points[:, 1] <= self.cfg.body_y_max_m) & + (points[:, 2] >= self.cfg.body_z_min_m) & (points[:, 2] <= self.cfg.body_z_max_m) + ) + return points[~inside] + + def apply_sensor(self, points_sensor: np.ndarray) -> np.ndarray: + pts = self._finite(points_sensor) + if not self.cfg.enabled or len(pts) == 0: + return pts + + r2 = np.einsum("ij,ij->i", pts, pts) + min_r2 = float(self.cfg.near_min_range_m) ** 2 + pts = pts[r2 >= min_r2] + if len(pts) == 0: + return pts + + if self.cfg.body_exclusion_enabled: + pts = self._exclude_body_box(pts) + if len(pts) == 0: + return pts + + pts = self._ray_consistency_filter(pts) + if len(pts) == 0: + return pts + + if self.cfg.outlier_filter_enabled: + pts = self._remove_sparse_voxels(pts) + return pts + + def apply_world(self, points_world: np.ndarray) -> np.ndarray: + pts = self._finite(points_world) + if not self.cfg.enabled or len(pts) == 0: + return pts + if self.cfg.world_z_clip_enabled: + zmin = float(self.cfg.world_z_min_m) + zmax = float(self.cfg.world_z_max_m) + if zmax > zmin: + pts = pts[(pts[:, 2] >= zmin) & (pts[:, 2] <= zmax)] + return pts diff --git a/Lidar/SLAM_Fusion.py b/Lidar/SLAM_Fusion.py new file mode 100644 index 0000000..e841734 --- /dev/null +++ b/Lidar/SLAM_Fusion.py @@ -0,0 +1,205 @@ +from __future__ import annotations + +import time +from dataclasses import dataclass +from typing import Any, Dict, Optional, Tuple + +import numpy as np + + +def _safe_float(v: Any, default: float) -> float: + try: + return float(v) + except Exception: + return float(default) + + +def _as_bool(v: Any, default: bool) -> bool: + if v is None: + return default + if isinstance(v, bool): + return v + if isinstance(v, (int, float)): + return bool(v) + return str(v).strip().lower() in ("1", "true", "yes", "on") + + +@dataclass +class FusionConfig: + enabled: bool = False + max_age_sec: float = 0.30 + max_future_sec: float = 0.10 + max_timestamp_skew_sec: float = 0.50 + lidar_weight: float = 1.0 + imu_weight: float = 0.35 + wheel_weight: float = 0.45 + vision_weight: float = 0.70 + use_translation: bool = True + use_rotation: bool = True + + @staticmethod + def from_dict(d: Dict[str, Any] | None) -> "FusionConfig": + src = d or {} + return FusionConfig( + enabled=_as_bool(src.get("enabled", False), False), + max_age_sec=max(0.01, _safe_float(src.get("max_age_sec", 0.30), 0.30)), + max_future_sec=max(0.0, _safe_float(src.get("max_future_sec", 0.10), 0.10)), + max_timestamp_skew_sec=max(0.01, _safe_float(src.get("max_timestamp_skew_sec", 0.50), 0.50)), + lidar_weight=max(0.0, _safe_float(src.get("lidar_weight", 1.0), 1.0)), + imu_weight=max(0.0, _safe_float(src.get("imu_weight", 0.35), 0.35)), + wheel_weight=max(0.0, _safe_float(src.get("wheel_weight", 0.45), 0.45)), + vision_weight=max(0.0, _safe_float(src.get("vision_weight", 0.70), 0.70)), + use_translation=_as_bool(src.get("use_translation", True), True), + use_rotation=_as_bool(src.get("use_rotation", True), True), + ) + + +def _rot_to_quat(r: np.ndarray) -> np.ndarray: + tr = float(np.trace(r)) + if tr > 0.0: + s = np.sqrt(tr + 1.0) * 2.0 + w = 0.25 * s + x = (r[2, 1] - r[1, 2]) / s + y = (r[0, 2] - r[2, 0]) / s + z = (r[1, 0] - r[0, 1]) / s + elif (r[0, 0] > r[1, 1]) and (r[0, 0] > r[2, 2]): + s = np.sqrt(1.0 + r[0, 0] - r[1, 1] - r[2, 2]) * 2.0 + w = (r[2, 1] - r[1, 2]) / s + x = 0.25 * s + y = (r[0, 1] + r[1, 0]) / s + z = (r[0, 2] + r[2, 0]) / s + elif r[1, 1] > r[2, 2]: + s = np.sqrt(1.0 + r[1, 1] - r[0, 0] - r[2, 2]) * 2.0 + w = (r[0, 2] - r[2, 0]) / s + x = (r[0, 1] + r[1, 0]) / s + y = 0.25 * s + z = (r[1, 2] + r[2, 1]) / s + else: + s = np.sqrt(1.0 + r[2, 2] - r[0, 0] - r[1, 1]) * 2.0 + w = (r[1, 0] - r[0, 1]) / s + x = (r[0, 2] + r[2, 0]) / s + y = (r[1, 2] + r[2, 1]) / s + z = 0.25 * s + q = np.array([w, x, y, z], dtype=np.float64) + n = float(np.linalg.norm(q)) + if n <= 1e-12: + return np.array([1.0, 0.0, 0.0, 0.0], dtype=np.float64) + return q / n + + +def _quat_to_rot(q: np.ndarray) -> np.ndarray: + w, x, y, z = q + return np.array( + [ + [1 - 2 * (y * y + z * z), 2 * (x * y - z * w), 2 * (x * z + y * w)], + [2 * (x * y + z * w), 1 - 2 * (x * x + z * z), 2 * (y * z - x * w)], + [2 * (x * z - y * w), 2 * (y * z + x * w), 1 - 2 * (x * x + y * y)], + ], + dtype=np.float64, + ) + + +class SensorPoseFusion: + def __init__(self, cfg: FusionConfig): + self.cfg = cfg + self._priors: Dict[str, Dict[str, Any]] = {} + self._last_lidar_t = 0.0 + + def reset(self) -> None: + self._priors.clear() + self._last_lidar_t = 0.0 + + def _sensor_weight(self, sensor: str) -> float: + s = str(sensor).lower().strip() + if s == "imu": + return float(self.cfg.imu_weight) + if s in ("wheel", "odom", "odometry"): + return float(self.cfg.wheel_weight) + if s in ("vision", "camera", "vio"): + return float(self.cfg.vision_weight) + return 0.0 + + def update_prior(self, sensor: str, pose: Any, confidence: float = 1.0, timestamp: Optional[float] = None) -> bool: + arr = np.asarray(pose, dtype=np.float64) + if arr.shape != (4, 4): + return False + t = float(time.time()) if timestamp is None else float(timestamp) + if (t - float(time.time())) > float(self.cfg.max_future_sec): + return False + conf = float(np.clip(confidence, 0.0, 1.0)) + self._priors[str(sensor).lower().strip()] = { + "pose": arr, + "confidence": conf, + "timestamp": t, + } + return True + + def fuse_pose(self, lidar_pose: np.ndarray, now: Optional[float] = None) -> Tuple[np.ndarray, Dict[str, Any]]: + pose_lidar = np.asarray(lidar_pose, dtype=np.float64) + if pose_lidar.shape != (4, 4): + return pose_lidar, {"used": [], "enabled": bool(self.cfg.enabled)} + t_now = float(time.time()) if now is None else float(now) + + if not self.cfg.enabled: + self._last_lidar_t = t_now + return pose_lidar, {"used": [], "enabled": False} + + used = [] + weights = [float(self.cfg.lidar_weight)] + poses = [pose_lidar] + rejected = [] + + for sensor, ent in self._priors.items(): + ts = float(ent.get("timestamp", 0.0)) + age = t_now - ts + if age < -float(self.cfg.max_future_sec): + rejected.append(f"{sensor}:future") + continue + if self._last_lidar_t > 0.0 and abs(ts - self._last_lidar_t) > float(self.cfg.max_timestamp_skew_sec): + rejected.append(f"{sensor}:skew") + continue + if age > float(self.cfg.max_age_sec): + rejected.append(f"{sensor}:stale") + continue + w = self._sensor_weight(sensor) * float(np.clip(ent.get("confidence", 1.0), 0.0, 1.0)) + if w <= 0.0: + rejected.append(f"{sensor}:weight") + continue + p = np.asarray(ent.get("pose"), dtype=np.float64) + if p.shape != (4, 4): + rejected.append(f"{sensor}:pose") + continue + poses.append(p) + weights.append(w) + used.append(sensor) + + if len(poses) <= 1: + self._last_lidar_t = t_now + return pose_lidar, {"used": [], "rejected": rejected, "enabled": True} + + w_arr = np.asarray(weights, dtype=np.float64) + w_arr = w_arr / max(np.sum(w_arr), 1e-9) + + out = np.array(pose_lidar, dtype=np.float64, copy=True) + + if self.cfg.use_translation: + t = np.zeros((3,), dtype=np.float64) + for i, p in enumerate(poses): + t += w_arr[i] * p[:3, 3] + out[:3, 3] = t + + if self.cfg.use_rotation: + q_ref = _rot_to_quat(poses[0][:3, :3]) + q_sum = np.zeros((4,), dtype=np.float64) + for i, p in enumerate(poses): + q = _rot_to_quat(p[:3, :3]) + if np.dot(q, q_ref) < 0: + q = -q + q_sum += w_arr[i] * q + qn = float(np.linalg.norm(q_sum)) + if qn > 1e-12: + q_sum /= qn + out[:3, :3] = _quat_to_rot(q_sum) + + self._last_lidar_t = t_now + return out, {"used": used, "rejected": rejected, "enabled": True} diff --git a/Lidar/SLAM_LocalizationService.py b/Lidar/SLAM_LocalizationService.py new file mode 100644 index 0000000..bca76a9 --- /dev/null +++ b/Lidar/SLAM_LocalizationService.py @@ -0,0 +1,87 @@ +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Any, Dict, Optional + +import numpy as np + + +@dataclass +class LocalizationFrameState: + odom_to_map: np.ndarray = field(default_factory=lambda: np.eye(4, dtype=np.float64)) + odom_to_ref: np.ndarray = field(default_factory=lambda: np.eye(4, dtype=np.float64)) + last_alignment: np.ndarray = field(default_factory=lambda: np.eye(4, dtype=np.float64)) + confidence: float = 0.0 + ref_valid: bool = False + + def reset(self) -> None: + self.odom_to_map = np.eye(4, dtype=np.float64) + self.odom_to_ref = np.eye(4, dtype=np.float64) + self.last_alignment = np.eye(4, dtype=np.float64) + self.confidence = 0.0 + self.ref_valid = False + + def reset_reference(self) -> None: + self.odom_to_ref = np.eye(4, dtype=np.float64) + self.last_alignment = np.eye(4, dtype=np.float64) + self.confidence = 0.0 + self.ref_valid = False + + def set_reference_alignment(self, odom_to_ref: np.ndarray, confidence: float) -> None: + tf = np.asarray(odom_to_ref, dtype=np.float64) + if tf.shape != (4, 4): + return + self.odom_to_ref = np.array(tf, dtype=np.float64, copy=True) + self.last_alignment = np.array(tf, dtype=np.float64, copy=True) + self.confidence = float(np.clip(confidence, 0.0, 1.0)) + self.ref_valid = True + + def update_confidence(self, confidence: float) -> None: + self.confidence = float(np.clip(confidence, 0.0, 1.0)) + + def invalidate_reference(self) -> None: + self.ref_valid = False + self.confidence = 0.0 + + def apply_map_correction(self, correction: np.ndarray) -> None: + corr = np.asarray(correction, dtype=np.float64) + if corr.shape != (4, 4): + return + self.odom_to_map = corr @ np.asarray(self.odom_to_map, dtype=np.float64) + + def apply_loop_correction(self, correction: np.ndarray, update_reference: bool = True) -> None: + corr = np.asarray(correction, dtype=np.float64) + if corr.shape != (4, 4): + return + self.odom_to_map = corr @ np.asarray(self.odom_to_map, dtype=np.float64) + if update_reference and self.ref_valid: + try: + inv_corr = np.linalg.inv(corr) + self.odom_to_ref = np.asarray(self.odom_to_ref, dtype=np.float64) @ inv_corr + self.last_alignment = np.array(self.odom_to_ref, dtype=np.float64, copy=True) + except Exception: + pass + + def map_pose(self, odom_pose: Optional[np.ndarray]) -> Optional[np.ndarray]: + if odom_pose is None: + return None + pose = np.asarray(odom_pose, dtype=np.float64) + if pose.shape != (4, 4): + return None + return np.asarray(self.odom_to_map, dtype=np.float64) @ pose + + def ref_pose(self, odom_pose: Optional[np.ndarray]) -> Optional[np.ndarray]: + if odom_pose is None or not self.ref_valid: + return None + pose = np.asarray(odom_pose, dtype=np.float64) + if pose.shape != (4, 4): + return None + return np.asarray(self.odom_to_ref, dtype=np.float64) @ pose + + def snapshot(self) -> Dict[str, Any]: + return { + "ref_valid": bool(self.ref_valid), + "confidence": float(self.confidence), + "odom_to_map_t": np.asarray(self.odom_to_map[:3, 3], dtype=np.float64).tolist(), + "odom_to_ref_t": np.asarray(self.odom_to_ref[:3, 3], dtype=np.float64).tolist(), + } diff --git a/Lidar/SLAM_LoopClosure.py b/Lidar/SLAM_LoopClosure.py new file mode 100644 index 0000000..f4dfe2a --- /dev/null +++ b/Lidar/SLAM_LoopClosure.py @@ -0,0 +1,418 @@ +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Any, Dict, List, Optional + +import numpy as np + + +def _as_bool(v: Any, default: bool) -> bool: + if v is None: + return default + if isinstance(v, bool): + return v + if isinstance(v, (int, float)): + return bool(v) + return str(v).strip().lower() in ("1", "true", "yes", "on") + + +def _safe_float(v: Any, default: float) -> float: + try: + return float(v) + except Exception: + return float(default) + + +def _safe_int(v: Any, default: int) -> int: + try: + return int(v) + except Exception: + return int(default) + + +def _pose_delta(pose_a: np.ndarray, pose_b: np.ndarray) -> tuple[float, float]: + dp = pose_a[:3, 3] - pose_b[:3, 3] + trans = float(np.linalg.norm(dp)) + r = pose_a[:3, :3] @ pose_b[:3, :3].T + ang = float(np.degrees(np.arccos(np.clip((np.trace(r) - 1.0) * 0.5, -1.0, 1.0)))) + return trans, ang + + +def _voxel_downsample_numpy(points: np.ndarray, voxel: float) -> np.ndarray: + if points is None or len(points) == 0: + return np.zeros((0, 3), dtype=np.float32) + if voxel <= 0: + return points.astype(np.float32, copy=False) + keys = np.floor(points / voxel).astype(np.int32) + uniq, inv = np.unique(keys, axis=0, return_inverse=True) + out = np.zeros((len(uniq), 3), dtype=np.float64) + cnt = np.zeros((len(uniq),), dtype=np.int64) + np.add.at(out, inv, points.astype(np.float64, copy=False)) + np.add.at(cnt, inv, 1) + out /= np.maximum(cnt[:, None], 1) + return out.astype(np.float32) + + +def _rigid_fit_svd(src_pts: np.ndarray, dst_pts: np.ndarray) -> Optional[np.ndarray]: + if src_pts is None or dst_pts is None: + return None + if len(src_pts) < 3 or len(dst_pts) < 3: + return None + a = np.asarray(src_pts, dtype=np.float64) + b = np.asarray(dst_pts, dtype=np.float64) + if a.shape != b.shape or a.ndim != 2 or a.shape[1] != 3: + return None + c_a = np.mean(a, axis=0) + c_b = np.mean(b, axis=0) + aa = a - c_a + bb = b - c_b + H = aa.T @ bb + try: + U, _, Vt = np.linalg.svd(H, full_matrices=False) + except Exception: + return None + R = Vt.T @ U.T + if np.linalg.det(R) < 0: + Vt[2, :] *= -1.0 + R = Vt.T @ U.T + t = c_b - (R @ c_a) + T = np.eye(4, dtype=np.float64) + T[:3, :3] = R + T[:3, 3] = t + return T + + +@dataclass +class LoopClosureConfig: + enabled: bool = False + keyframe_every_n_frames: int = 25 + min_keyframe_translation_m: float = 0.30 + min_keyframe_rotation_deg: float = 8.0 + loop_search_radius_m: float = 2.0 + min_loop_frame_gap: int = 30 + downsample_voxel_m: float = 0.20 + icp_max_corr_m: float = 1.2 + icp_max_iter: int = 40 + accept_fitness: float = 0.30 + accept_rmse: float = 0.35 + optimize_every_n_keyframes: int = 8 + max_keyframes: int = 500 + max_correction_translation_m: float = 2.0 + max_correction_rotation_deg: float = 20.0 + + @staticmethod + def from_dict(d: Dict[str, Any] | None) -> "LoopClosureConfig": + src = d or {} + return LoopClosureConfig( + enabled=_as_bool(src.get("enabled", False), False), + keyframe_every_n_frames=max(1, _safe_int(src.get("keyframe_every_n_frames", 25), 25)), + min_keyframe_translation_m=max(0.01, _safe_float(src.get("min_keyframe_translation_m", 0.30), 0.30)), + min_keyframe_rotation_deg=max(0.1, _safe_float(src.get("min_keyframe_rotation_deg", 8.0), 8.0)), + loop_search_radius_m=max(0.2, _safe_float(src.get("loop_search_radius_m", 2.0), 2.0)), + min_loop_frame_gap=max(5, _safe_int(src.get("min_loop_frame_gap", 30), 30)), + downsample_voxel_m=max(0.01, _safe_float(src.get("downsample_voxel_m", 0.20), 0.20)), + icp_max_corr_m=max(0.2, _safe_float(src.get("icp_max_corr_m", 1.2), 1.2)), + icp_max_iter=max(10, _safe_int(src.get("icp_max_iter", 40), 40)), + accept_fitness=np.clip(_safe_float(src.get("accept_fitness", 0.30), 0.30), 0.0, 1.0), + accept_rmse=max(0.01, _safe_float(src.get("accept_rmse", 0.35), 0.35)), + optimize_every_n_keyframes=max(2, _safe_int(src.get("optimize_every_n_keyframes", 8), 8)), + max_keyframes=max(50, _safe_int(src.get("max_keyframes", 500), 500)), + max_correction_translation_m=max( + 0.01, _safe_float(src.get("max_correction_translation_m", 2.0), 2.0) + ), + max_correction_rotation_deg=max( + 0.1, _safe_float(src.get("max_correction_rotation_deg", 20.0), 20.0) + ), + ) + + +@dataclass +class LoopClosureResult: + keyframe_added: bool = False + loop_detected: bool = False + optimized: bool = False + correction: np.ndarray = field(default_factory=lambda: np.eye(4, dtype=np.float64)) + info: Dict[str, Any] = field(default_factory=dict) + + +class LoopClosureBackend: + """ + Lightweight loop-closure backend using keyframes + pose-graph optimization. + """ + + def __init__(self, cfg: LoopClosureConfig): + self.cfg = cfg + self.frame_index = 0 + self.last_keyframe_frame = -10**9 + self.keyframe_poses: List[np.ndarray] = [] # world_T_sensor + self.keyframe_clouds: List[np.ndarray] = [] # sensor-frame clouds + self._o3d = None + self._pg = None + self._loop_count = 0 + self._opt_count = 0 + self._last_opt_reject: Optional[Dict[str, Any]] = None + self._import_open3d() + + def _import_open3d(self) -> None: + # SAFE mode: avoid Open3D pose-graph path (native crashes observed). + self._o3d = None + self._pg = None + + def reset(self) -> None: + self.frame_index = 0 + self.last_keyframe_frame = -10**9 + self.keyframe_poses.clear() + self.keyframe_clouds.clear() + self._loop_count = 0 + self._opt_count = 0 + self._last_opt_reject = None + self._pg = None + + def _should_add_keyframe(self, pose_world: np.ndarray) -> bool: + if len(self.keyframe_poses) == 0: + return True + if (self.frame_index - self.last_keyframe_frame) >= int(self.cfg.keyframe_every_n_frames): + return True + trans, ang = _pose_delta(pose_world, self.keyframe_poses[-1]) + return (trans >= float(self.cfg.min_keyframe_translation_m)) or (ang >= float(self.cfg.min_keyframe_rotation_deg)) + + def _to_o3d_cloud(self, pts: np.ndarray): + o3d = self._o3d + if o3d is None: + return None + pc = o3d.geometry.PointCloud() + pc.points = o3d.utility.Vector3dVector(pts.astype(np.float64, copy=False)) + return pc + + def _relative_source_to_target(self, pose_source: np.ndarray, pose_target: np.ndarray) -> np.ndarray: + # T_source_to_target + return np.linalg.inv(pose_target) @ pose_source + + def _find_candidate(self, idx_cur: int) -> Optional[int]: + if idx_cur <= int(self.cfg.min_loop_frame_gap): + return None + cur_t = self.keyframe_poses[idx_cur][:3, 3] + best = None + best_d = float("inf") + for i in range(0, idx_cur - int(self.cfg.min_loop_frame_gap)): + d = float(np.linalg.norm(cur_t - self.keyframe_poses[i][:3, 3])) + if d <= float(self.cfg.loop_search_radius_m) and d < best_d: + best = i + best_d = d + return best + + def _run_icp(self, src_pts: np.ndarray, tgt_pts: np.ndarray, init: np.ndarray, max_corr: float) -> Optional[Dict[str, Any]]: + src0 = np.asarray(src_pts, dtype=np.float64) + tgt = np.asarray(tgt_pts, dtype=np.float64) + if src0.ndim != 2 or tgt.ndim != 2 or src0.shape[1] != 3 or tgt.shape[1] != 3: + return None + if len(src0) < 3 or len(tgt) < 3: + return None + try: + from scipy.spatial import cKDTree + except Exception: + return None + + T = np.asarray(init, dtype=np.float64).copy() if np.asarray(init).shape == (4, 4) else np.eye(4, dtype=np.float64) + tree = cKDTree(tgt) + max_corr_m = float(max(0.05, max_corr)) + min_corr = max(20, int(0.12 * len(src0))) + last_rmse = 9e9 + last_inliers = 0 + + for _ in range(int(max(6, self.cfg.icp_max_iter))): + src_w = (src0 @ T[:3, :3].T) + T[:3, 3] + d, idx = tree.query(src_w, k=1) + keep = np.isfinite(d) & (d <= max_corr_m) + n_in = int(np.count_nonzero(keep)) + if n_in < min_corr: + break + src_corr = src_w[keep] + tgt_corr = tgt[np.asarray(idx[keep], dtype=np.int32)] + delta = _rigid_fit_svd(src_corr, tgt_corr) + if delta is None: + break + T = np.asarray(delta, dtype=np.float64) @ T + prev_rmse = float(last_rmse) + rmse = float(np.sqrt(np.mean(np.square(d[keep])))) + last_rmse = rmse + last_inliers = n_in + if abs(prev_rmse - rmse) < 1e-5: + break + + fitness = float(last_inliers) / max(1.0, float(len(src0))) + return { + "fitness": float(fitness), + "inlier_rmse": float(last_rmse), + "transformation": np.asarray(T, dtype=np.float64), + "inliers": int(last_inliers), + } + + def _rebuild_graph_odometry_only(self) -> None: + self._pg = None + + def _maybe_trim(self) -> None: + kmax = int(self.cfg.max_keyframes) + if len(self.keyframe_poses) <= kmax: + return + drop = len(self.keyframe_poses) - kmax + self.keyframe_poses = self.keyframe_poses[drop:] + self.keyframe_clouds = self.keyframe_clouds[drop:] + self._rebuild_graph_odometry_only() + + @staticmethod + def _slerp_rotation(R0: np.ndarray, R1: np.ndarray, t: float) -> np.ndarray: + """Spherical linear interpolation between two rotation matrices.""" + dR = R0.T @ R1 + cos_angle = np.clip((np.trace(dR) - 1.0) * 0.5, -1.0, 1.0) + angle = float(np.arccos(cos_angle)) + if abs(angle) < 1e-8: + return R0.copy() + sin_a = np.sin(angle) + axis = np.array([ + dR[2, 1] - dR[1, 2], + dR[0, 2] - dR[2, 0], + dR[1, 0] - dR[0, 1], + ]) / (2.0 * sin_a) + angle_t = angle * float(t) + K = np.array([ + [0.0, -axis[2], axis[1]], + [axis[2], 0.0, -axis[0]], + [-axis[1], axis[0], 0.0], + ]) + dRt = np.eye(3) + np.sin(angle_t) * K + (1.0 - np.cos(angle_t)) * (K @ K) + return R0 @ dRt + + @staticmethod + def _interpolate_se3(T0: np.ndarray, T1: np.ndarray, alpha: float) -> np.ndarray: + """Interpolate between two SE3 transforms; alpha=0 → T0, alpha=1 → T1.""" + a = float(np.clip(alpha, 0.0, 1.0)) + out = np.eye(4, dtype=np.float64) + out[:3, :3] = LoopClosureBackend._slerp_rotation(T0[:3, :3], T1[:3, :3], a) + out[:3, 3] = (1.0 - a) * T0[:3, 3] + a * T1[:3, 3] + return out + + def _optimize(self, src_idx: int, dst_idx: int, T_meas: np.ndarray) -> Optional[List[np.ndarray]]: + """ + Distribute loop-closure error linearly across the keyframe chain. + + Poses from dst_idx+1 to src_idx receive a fraction of the correction + proportional to their distance from dst_idx. All poses after src_idx + receive the full correction. Poses at or before dst_idx are unchanged. + + Parameters + ---------- + src_idx : index of the current (drifted) keyframe + dst_idx : index of the matching earlier keyframe + T_meas : 4×4 relative transform — src expressed in dst's frame + (output of ICP; transforms src points into dst frame) + + Returns + ------- + List of updated 4×4 pose matrices, or None if inputs are invalid. + """ + n = len(self.keyframe_poses) + if n < 2: + return None + if not (0 <= dst_idx < src_idx < n): + return None + + poses = [np.array(p, dtype=np.float64) for p in self.keyframe_poses] + + # Where src *should* be in world frame if there were no drift: + T_src_expected = poses[dst_idx] @ T_meas + T_src_actual = poses[src_idx] + + # Full correction needed to move src_actual → src_expected: + try: + T_correction = T_src_expected @ np.linalg.inv(T_src_actual) + except np.linalg.LinAlgError: + return None + + # Sanity-check the correction magnitude + corr_t = float(np.linalg.norm(T_correction[:3, 3])) + _, corr_deg = _pose_delta(T_correction, np.eye(4, dtype=np.float64)) + if corr_t > float(self.cfg.max_correction_translation_m) or corr_deg > float(self.cfg.max_correction_rotation_deg): + return None + + T_identity = np.eye(4, dtype=np.float64) + chain_len = src_idx - dst_idx + + # Linearly distribute from dst_idx+1 (alpha≈0) to src_idx (alpha=1) + for i in range(dst_idx + 1, src_idx + 1): + alpha = float(i - dst_idx) / float(chain_len) + T_partial = self._interpolate_se3(T_identity, T_correction, alpha) + poses[i] = T_partial @ poses[i] + + # Propagate full correction to all poses after src_idx + for i in range(src_idx + 1, n): + poses[i] = T_correction @ poses[i] + + return poses + + def process_frame(self, points_sensor: np.ndarray, pose_world: np.ndarray) -> LoopClosureResult: + self.frame_index += 1 + result = LoopClosureResult() + + if not self.cfg.enabled: + return result + if points_sensor is None or len(points_sensor) < 40: + return result + if pose_world is None or np.asarray(pose_world).shape != (4, 4): + return result + + pose = np.asarray(pose_world, dtype=np.float64) + if not self._should_add_keyframe(pose): + return result + + ds = _voxel_downsample_numpy(points_sensor.astype(np.float32, copy=False), float(self.cfg.downsample_voxel_m)) + if len(ds) < 40: + return result + + self.keyframe_clouds.append(ds) + self.keyframe_poses.append(pose.copy()) + self.last_keyframe_frame = self.frame_index + result.keyframe_added = True + + idx = len(self.keyframe_poses) - 1 + cand = self._find_candidate(idx) + if cand is not None: + init = self._relative_source_to_target(self.keyframe_poses[idx], self.keyframe_poses[cand]) + reg = self._run_icp( + self.keyframe_clouds[idx], + self.keyframe_clouds[cand], + init=init, + max_corr=float(self.cfg.icp_max_corr_m), + ) + if reg is not None: + fit = float(reg.get("fitness", 0.0)) + rmse = float(reg.get("inlier_rmse", 9e9)) + ok = (fit >= float(self.cfg.accept_fitness)) and (rmse <= float(self.cfg.accept_rmse)) + result.info["loop_candidate"] = int(cand) + result.info["loop_fitness"] = fit + result.info["loop_rmse"] = rmse + if ok: + result.loop_detected = True + self._loop_count += 1 + t_meas = np.asarray(reg.get("transformation", np.eye(4, dtype=np.float64)), dtype=np.float64) + optimized_poses = self._optimize(idx, cand, t_meas) + if optimized_poses is None: + self._last_opt_reject = { + "reason": "optimize_rejected", + "src_idx": int(idx), + "dst_idx": int(cand), + } + result.info["rejected"] = dict(self._last_opt_reject) + else: + # Correction seen at src_idx (for callers that need a global correction estimate) + pose_cur = np.asarray(self.keyframe_poses[idx], dtype=np.float64) + correction = optimized_poses[idx] @ np.linalg.inv(pose_cur) + result.optimized = True + result.correction = np.asarray(correction, dtype=np.float64) + self._opt_count += 1 + self.keyframe_poses = optimized_poses + result.info["loop_count"] = int(self._loop_count) + result.info["opt_count"] = int(self._opt_count) + + self._maybe_trim() + return result diff --git a/Lidar/SLAM_MAP.py b/Lidar/SLAM_MAP.py new file mode 100644 index 0000000..b3570de --- /dev/null +++ b/Lidar/SLAM_MAP.py @@ -0,0 +1,118 @@ +# SLAM_MAP.py +from __future__ import annotations + +import json +import os +from dataclasses import dataclass +from pathlib import Path +import numpy as np + + +def load_slam_config() -> dict: + import json, os + from pathlib import Path + + cfg_path = os.environ.get("SLAM_CONFIG", "").strip() + p = Path(cfg_path) if cfg_path else (Path(__file__).resolve().parent / "SLAM_Config.json") + + if not p.exists(): + raise FileNotFoundError(f"Missing config file: {p}") + + text = p.read_text(encoding="utf-8").strip() + if not text: + raise RuntimeError("SLAM_Config.json is empty.") + + return json.loads(text) + + +@dataclass +class MapConfig: + display_voxel: float + save_voxel: float + data_folder: str + save_extension: str + + @staticmethod + def from_config_file() -> "MapConfig": + cfg = load_slam_config() + maps_dir = str(cfg["app"]["maps_dir"]) + return MapConfig( + display_voxel=float(cfg["map"]["display_voxel"]), + save_voxel=float(cfg["map"]["save_voxel"]), + data_folder=maps_dir, + save_extension=str(cfg["map"]["save_extension"]), + ) + + +def _voxel_downsample_numpy(points: np.ndarray, voxel: float) -> np.ndarray: + if points is None or len(points) == 0: + return np.zeros((0, 3), dtype=np.float32) + if voxel <= 0: + return points.astype(np.float32, copy=False) + + keys = np.floor(points / voxel).astype(np.int32) + uniq, inv = np.unique(keys, axis=0, return_inverse=True) + + out = np.zeros((len(uniq), 3), dtype=np.float64) + cnt = np.zeros((len(uniq),), dtype=np.int64) + np.add.at(out, inv, points.astype(np.float64, copy=False)) + np.add.at(cnt, inv, 1) + out /= np.maximum(cnt[:, None], 1) + return out.astype(np.float32) + + +class StableMapLayer: + def __init__(self, cfg: MapConfig | None = None): + self.cfg = cfg if cfg is not None else MapConfig.from_config_file() + Path(self.cfg.data_folder).mkdir(parents=True, exist_ok=True) + self._points = np.zeros((0, 3), dtype=np.float32) + + def reset(self): + self._points = np.zeros((0, 3), dtype=np.float32) + + def set_points(self, pts: np.ndarray): + self._points = pts.astype(np.float32, copy=True) if pts is not None else np.zeros((0, 3), dtype=np.float32) + + def get_points(self) -> np.ndarray: + return self._points + + def get_display_points(self) -> np.ndarray: + return _voxel_downsample_numpy(self._points, float(self.cfg.display_voxel)) + + def get_save_points(self) -> np.ndarray: + return _voxel_downsample_numpy(self._points, float(self.cfg.save_voxel)) + + def export_map(self, base_name: str) -> str: + """ + Save stable map into maps_dir (no subfolders). + """ + import open3d as o3d + + pts = self.get_save_points() + if pts is None or len(pts) < 20: + raise RuntimeError("Not enough stable points to save.") + + base = (base_name or "").strip() or "map_robot" + ext = self.cfg.save_extension.strip() or ".ply" + if base.endswith(ext): + base = base[: -len(ext)] + + filename = f"{base}{ext}" + n = 1 + folder = Path(self.cfg.data_folder) + while (folder / filename).exists(): + filename = f"{base}({n}){ext}" + n += 1 + + full = str(folder / filename) + pcd = o3d.geometry.PointCloud() + pcd.points = o3d.utility.Vector3dVector(pts.astype(np.float64, copy=False)) + o3d.io.write_point_cloud(full, pcd) + return full + + def load_map(self, filepath: str) -> np.ndarray: + import open3d as o3d + pcd = o3d.io.read_point_cloud(filepath) + pts = np.asarray(pcd.points).astype(np.float32) + self.set_points(pts) + return pts diff --git a/Lidar/SLAM_Mission.py b/Lidar/SLAM_Mission.py new file mode 100644 index 0000000..8669b11 --- /dev/null +++ b/Lidar/SLAM_Mission.py @@ -0,0 +1,96 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any, Dict, List, Optional, Tuple + +import numpy as np + + +def _safe_float(v: Any, default: float) -> float: + try: + return float(v) + except Exception: + return float(default) + + +@dataclass +class MissionConfig: + waypoint_tolerance_m: float = 0.55 # G1 Edu stride ~0.3 m; 0.55 m gives reliable completion + + @staticmethod + def from_dict(d: Dict[str, Any] | None) -> "MissionConfig": + src = d or {} + return MissionConfig( + waypoint_tolerance_m=max(0.05, _safe_float(src.get("waypoint_tolerance_m", 0.55), 0.55)), + ) + + +class WaypointMissionManager: + def __init__(self, cfg: MissionConfig): + self.cfg = cfg + self.reset() + + def reset(self) -> None: + self._waypoints: List[Tuple[float, float]] = [] + self._idx = 0 + self._active = False + self._paused = False + self._completed = False + + def set_waypoints(self, waypoints: List[Tuple[float, float]]) -> None: + self._waypoints = [(float(x), float(y)) for x, y in waypoints] + self._idx = 0 + self._completed = len(self._waypoints) == 0 + + def start(self, waypoints: Optional[List[Tuple[float, float]]] = None) -> None: + if waypoints is not None: + self.set_waypoints(waypoints) + self._active = len(self._waypoints) > 0 + self._paused = False + self._completed = not self._active + + def pause(self) -> None: + if self._active: + self._paused = True + + def resume(self) -> None: + if self._active: + self._paused = False + + def stop(self) -> None: + self._active = False + self._paused = False + + def current_goal(self) -> Optional[Tuple[float, float]]: + if not self._active or self._paused or self._completed: + return None + if self._idx < 0 or self._idx >= len(self._waypoints): + return None + return self._waypoints[self._idx] + + def update_pose(self, x: float, y: float) -> Dict[str, Any]: + goal = self.current_goal() + if goal is None: + return self.snapshot() + + dx = float(goal[0]) - float(x) + dy = float(goal[1]) - float(y) + dist = float(np.hypot(dx, dy)) + if dist <= float(self.cfg.waypoint_tolerance_m): + self._idx += 1 + if self._idx >= len(self._waypoints): + self._completed = True + self._active = False + self._paused = False + return self.snapshot() + + def snapshot(self) -> Dict[str, Any]: + goal = self.current_goal() + return { + "active": bool(self._active), + "paused": bool(self._paused), + "completed": bool(self._completed), + "index": int(self._idx), + "total": int(len(self._waypoints)), + "goal": [float(goal[0]), float(goal[1])] if goal is not None else None, + } diff --git a/Lidar/SLAM_NavRuntime.py b/Lidar/SLAM_NavRuntime.py new file mode 100644 index 0000000..4bae5c7 --- /dev/null +++ b/Lidar/SLAM_NavRuntime.py @@ -0,0 +1,514 @@ +from __future__ import annotations + +import heapq +import math +from dataclasses import dataclass +from typing import Any, Dict, Iterable, List, Optional, Tuple + +import numpy as np + + +def _safe_float(v: Any, default: float) -> float: + try: + return float(v) + except Exception: + return float(default) + + +def _safe_int(v: Any, default: int) -> int: + try: + return int(v) + except Exception: + return int(default) + + +def _as_bool(v: Any, default: bool) -> bool: + if v is None: + return default + if isinstance(v, bool): + return v + if isinstance(v, (int, float)): + return bool(v) + return str(v).strip().lower() in ("1", "true", "yes", "on") + + +def _inflate_binary(mask: np.ndarray, radius_cells: int) -> np.ndarray: + if radius_cells <= 0: + return mask.copy() + h, w = mask.shape + out = mask.copy() + rr = int(radius_cells) * int(radius_cells) + offsets: List[Tuple[int, int]] = [] + for dy in range(-radius_cells, radius_cells + 1): + for dx in range(-radius_cells, radius_cells + 1): + if (dy * dy + dx * dx) <= rr: + offsets.append((dy, dx)) + for dy, dx in offsets: + ys = max(0, -dy) + ye = min(h, h - dy) + xs = max(0, -dx) + xe = min(w, w - dx) + yd = max(0, dy) + xd = max(0, dx) + out[yd : yd + (ye - ys), xd : xd + (xe - xs)] |= mask[ys:ye, xs:xe] + return out + + +@dataclass +class LiveCostmapConfig: + enabled: bool = True + resolution_m: float = 0.10 + z_min_m: float = -0.40 + z_max_m: float = 1.20 + padding_m: float = 0.80 + inflation_radius_m: float = 0.25 + dynamic_decay_sec: float = 1.5 + dynamic_min_hits: int = 2 + blocked_cost: int = 220 + max_width_cells: int = 900 + max_height_cells: int = 900 + + @staticmethod + def from_dict(d: Dict[str, Any] | None) -> "LiveCostmapConfig": + src = d or {} + return LiveCostmapConfig( + enabled=_as_bool(src.get("enabled", True), True), + resolution_m=max(0.02, _safe_float(src.get("resolution_m", 0.10), 0.10)), + z_min_m=_safe_float(src.get("z_min_m", -0.40), -0.40), + z_max_m=_safe_float(src.get("z_max_m", 1.20), 1.20), + padding_m=max(0.0, _safe_float(src.get("padding_m", 0.80), 0.80)), + inflation_radius_m=max(0.0, _safe_float(src.get("inflation_radius_m", 0.25), 0.25)), + dynamic_decay_sec=max(0.2, _safe_float(src.get("dynamic_decay_sec", 1.5), 1.5)), + dynamic_min_hits=max(1, _safe_int(src.get("dynamic_min_hits", 2), 2)), + blocked_cost=int(np.clip(_safe_int(src.get("blocked_cost", 220), 220), 100, 255)), + max_width_cells=max(100, _safe_int(src.get("max_width_cells", 900), 900)), + max_height_cells=max(100, _safe_int(src.get("max_height_cells", 900), 900)), + ) + + def patched(self, patch: Dict[str, Any] | None) -> "LiveCostmapConfig": + src = patch or {} + return LiveCostmapConfig.from_dict( + { + "enabled": src.get("enabled", self.enabled), + "resolution_m": src.get("resolution_m", self.resolution_m), + "z_min_m": src.get("z_min_m", self.z_min_m), + "z_max_m": src.get("z_max_m", self.z_max_m), + "padding_m": src.get("padding_m", self.padding_m), + "inflation_radius_m": src.get("inflation_radius_m", self.inflation_radius_m), + "dynamic_decay_sec": src.get("dynamic_decay_sec", self.dynamic_decay_sec), + "dynamic_min_hits": src.get("dynamic_min_hits", self.dynamic_min_hits), + "blocked_cost": src.get("blocked_cost", self.blocked_cost), + "max_width_cells": src.get("max_width_cells", self.max_width_cells), + "max_height_cells": src.get("max_height_cells", self.max_height_cells), + } + ) + + +class LiveCostmapRuntime: + """ + Maintains real-time static + dynamic + inflated obstacle layers. + Static layer comes from SLAM stable points; dynamic layer comes from live points. + """ + + def __init__(self, cfg: LiveCostmapConfig): + self.cfg = cfg + self._dynamic_cells: Dict[Tuple[int, int], Tuple[int, float]] = {} + self._grid: Optional[Dict[str, Any]] = None + + def reset(self) -> None: + self._dynamic_cells.clear() + self._grid = None + + def _clip_nav_band(self, points: Optional[np.ndarray]) -> np.ndarray: + if points is None: + return np.zeros((0, 3), dtype=np.float32) + pts = np.asarray(points, dtype=np.float32) + if pts.ndim != 2 or pts.shape[1] < 3 or len(pts) == 0: + return np.zeros((0, 3), dtype=np.float32) + zmin = float(self.cfg.z_min_m) + zmax = float(self.cfg.z_max_m) + m = (pts[:, 2] >= zmin) & (pts[:, 2] <= zmax) + return pts[m, :3] + + @staticmethod + def _grid_indices(xy: np.ndarray, min_xy: np.ndarray, res: float, w: int, h: int) -> Tuple[np.ndarray, np.ndarray]: + gx = np.floor((xy[:, 0] - min_xy[0]) / res).astype(np.int32) + gy = np.floor((xy[:, 1] - min_xy[1]) / res).astype(np.int32) + gx = np.clip(gx, 0, w - 1) + gy = np.clip(gy, 0, h - 1) + return gx, gy + + def world_to_grid(self, x: float, y: float) -> Optional[Tuple[int, int]]: + if self._grid is None: + return None + origin = np.asarray(self._grid["origin_xy"], dtype=np.float64) + res = float(self._grid["resolution_m"]) + shape = tuple(self._grid["shape_hw"]) + h, w = int(shape[0]), int(shape[1]) + gx = int(math.floor((float(x) - origin[0]) / res)) + gy = int(math.floor((float(y) - origin[1]) / res)) + if gx < 0 or gy < 0 or gx >= w or gy >= h: + return None + return gx, gy + + def grid_to_world(self, gx: int, gy: int) -> Optional[Tuple[float, float]]: + if self._grid is None: + return None + origin = np.asarray(self._grid["origin_xy"], dtype=np.float64) + res = float(self._grid["resolution_m"]) + x = origin[0] + (float(gx) + 0.5) * res + y = origin[1] + (float(gy) + 0.5) * res + return float(x), float(y) + + def cost_at_world(self, x: float, y: float) -> int: + if self._grid is None: + return 0 + idx = self.world_to_grid(x, y) + if idx is None: + return 255 + gx, gy = idx + cost = np.asarray(self._grid["costmap"], dtype=np.uint8) + return int(cost[gy, gx]) + + def occupied_near(self, x: float, y: float, radius_m: float, cost_thresh: Optional[int] = None) -> bool: + if self._grid is None: + return False + thresh = int(self.cfg.blocked_cost if cost_thresh is None else cost_thresh) + idx = self.world_to_grid(x, y) + if idx is None: + return True + gx, gy = idx + cost = np.asarray(self._grid["costmap"], dtype=np.uint8) + h, w = cost.shape + rc = int(math.ceil(max(0.0, float(radius_m)) / float(self._grid["resolution_m"]))) + ys = max(0, gy - rc) + ye = min(h, gy + rc + 1) + xs = max(0, gx - rc) + xe = min(w, gx + rc + 1) + return bool(np.any(cost[ys:ye, xs:xe] >= thresh)) + + def _compute_bounds(self, static_pts: np.ndarray, live_pts: np.ndarray) -> Optional[Tuple[np.ndarray, np.ndarray]]: + has_static = len(static_pts) > 0 + has_live = len(live_pts) > 0 + if not has_static and not has_live: + return None + if has_static and has_live: + all_xy = np.vstack((static_pts[:, :2], live_pts[:, :2])) + elif has_static: + all_xy = static_pts[:, :2] + else: + all_xy = live_pts[:, :2] + pad = float(self.cfg.padding_m) + mn = all_xy.min(axis=0) - pad + mx = all_xy.max(axis=0) + pad + return mn.astype(np.float64), mx.astype(np.float64) + + def _clamp_grid_size(self, mn: np.ndarray, mx: np.ndarray, res: float) -> Tuple[np.ndarray, np.ndarray, float, int, int]: + span = np.maximum(mx - mn, res) + w = int(np.ceil(span[0] / res)) + 1 + h = int(np.ceil(span[1] / res)) + 1 + res_out = float(res) + if w > int(self.cfg.max_width_cells) or h > int(self.cfg.max_height_cells): + sx = float(w) / float(self.cfg.max_width_cells) + sy = float(h) / float(self.cfg.max_height_cells) + scale = max(sx, sy) + res_out = res_out * scale + span = np.maximum(mx - mn, res_out) + w = int(np.ceil(span[0] / res_out)) + 1 + h = int(np.ceil(span[1] / res_out)) + 1 + return mn, mx, float(res_out), int(w), int(h) + + def update( + self, + stable_points_world: Optional[np.ndarray], + live_points_world: Optional[np.ndarray], + now: float, + ) -> Optional[Dict[str, Any]]: + if not self.cfg.enabled: + return None + + static_pts = self._clip_nav_band(stable_points_world) + live_pts = self._clip_nav_band(live_points_world) + + bounds = self._compute_bounds(static_pts, live_pts) + if bounds is None: + # decay dynamic memory even when there is no input + self._dynamic_cells = { + k: v + for k, v in self._dynamic_cells.items() + if (float(now) - float(v[1])) <= float(self.cfg.dynamic_decay_sec) + } + return None + + mn, mx = bounds + mn, mx, res, w, h = self._clamp_grid_size(mn, mx, float(self.cfg.resolution_m)) + + static_occ = np.zeros((h, w), dtype=bool) + if len(static_pts) > 0: + gx, gy = self._grid_indices(static_pts[:, :2], mn, res, w, h) + static_occ[gy, gx] = True + + # age out dynamic memory first + max_age = float(self.cfg.dynamic_decay_sec) + new_dyn: Dict[Tuple[int, int], Tuple[int, float]] = {} + for (cx, cy), (hits, ts) in self._dynamic_cells.items(): + if (float(now) - float(ts)) <= max_age and 0 <= cx < w and 0 <= cy < h: + new_dyn[(cx, cy)] = (int(hits), float(ts)) + self._dynamic_cells = new_dyn + + if len(live_pts) > 0: + gx_l, gy_l = self._grid_indices(live_pts[:, :2], mn, res, w, h) + for gx, gy in zip(gx_l.tolist(), gy_l.tolist()): + if static_occ[gy, gx]: + continue + k = (int(gx), int(gy)) + old_hits, _ = self._dynamic_cells.get(k, (0, float(now))) + self._dynamic_cells[k] = (int(old_hits) + 1, float(now)) + + dynamic_occ = np.zeros((h, w), dtype=bool) + min_hits = int(self.cfg.dynamic_min_hits) + for (gx, gy), (hits, ts) in self._dynamic_cells.items(): + if hits >= min_hits and (float(now) - float(ts)) <= max_age: + if 0 <= gx < w and 0 <= gy < h: + dynamic_occ[gy, gx] = True + + occ = static_occ | dynamic_occ + inf_cells = int(np.ceil(float(self.cfg.inflation_radius_m) / res)) + inflated = _inflate_binary(occ, inf_cells) + + cost = np.zeros((h, w), dtype=np.uint8) + cost[inflated] = 180 + cost[dynamic_occ] = 220 + cost[static_occ] = 255 + + out = { + "costmap": cost, + "static_mask": static_occ, + "dynamic_mask": dynamic_occ, + "inflated_mask": inflated, + "origin_xy": mn.astype(np.float32), + "resolution_m": float(res), + "shape_hw": [int(h), int(w)], + "static_cells": int(np.count_nonzero(static_occ)), + "dynamic_cells": int(np.count_nonzero(dynamic_occ)), + "inflated_cells": int(np.count_nonzero(inflated)), + } + self._grid = out + return { + "shape": [int(h), int(w)], + "resolution_m": float(res), + "origin_xy": [float(mn[0]), float(mn[1])], + "static_cells": int(out["static_cells"]), + "dynamic_cells": int(out["dynamic_cells"]), + "inflated_cells": int(out["inflated_cells"]), + } + + @property + def grid(self) -> Optional[Dict[str, Any]]: + return self._grid + + +class GlobalAStarPlanner: + def __init__(self, blocked_cost: int = 220): + self.blocked_cost = int(np.clip(int(blocked_cost), 100, 255)) + + @staticmethod + def _heur(a: Tuple[int, int], b: Tuple[int, int]) -> float: + return float(math.hypot(float(a[0] - b[0]), float(a[1] - b[1]))) + + @staticmethod + def _neighbors(x: int, y: int, w: int, h: int) -> Iterable[Tuple[int, int, float]]: + for dy in (-1, 0, 1): + for dx in (-1, 0, 1): + if dx == 0 and dy == 0: + continue + nx = x + dx + ny = y + dy + if 0 <= nx < w and 0 <= ny < h: + step = 1.41421356 if (dx != 0 and dy != 0) else 1.0 + yield nx, ny, step + + def plan( + self, + costmap: np.ndarray, + origin_xy: np.ndarray, + resolution_m: float, + start_xy: Tuple[float, float], + goal_xy: Tuple[float, float], + max_expansions: int = 120000, + ) -> List[Tuple[float, float]]: + cost = np.asarray(costmap, dtype=np.uint8) + if cost.ndim != 2: + return [] + h, w = cost.shape + origin = np.asarray(origin_xy, dtype=np.float64) + res = float(resolution_m) + + def to_grid(x: float, y: float) -> Optional[Tuple[int, int]]: + gx = int(math.floor((float(x) - origin[0]) / res)) + gy = int(math.floor((float(y) - origin[1]) / res)) + if gx < 0 or gy < 0 or gx >= w or gy >= h: + return None + return gx, gy + + def to_world(gx: int, gy: int) -> Tuple[float, float]: + return ( + float(origin[0] + (float(gx) + 0.5) * res), + float(origin[1] + (float(gy) + 0.5) * res), + ) + + s = to_grid(float(start_xy[0]), float(start_xy[1])) + g = to_grid(float(goal_xy[0]), float(goal_xy[1])) + if s is None or g is None: + return [] + if int(cost[s[1], s[0]]) >= self.blocked_cost or int(cost[g[1], g[0]]) >= self.blocked_cost: + return [] + + open_heap: List[Tuple[float, float, Tuple[int, int]]] = [] + heapq.heappush(open_heap, (self._heur(s, g), 0.0, s)) + came_from: Dict[Tuple[int, int], Tuple[int, int]] = {} + g_score: Dict[Tuple[int, int], float] = {s: 0.0} + closed: Dict[Tuple[int, int], bool] = {} + + expansions = 0 + while open_heap: + _, cur_g, cur = heapq.heappop(open_heap) + if closed.get(cur, False): + continue + closed[cur] = True + if cur == g: + break + expansions += 1 + if expansions >= int(max_expansions): + import sys + print( + f"[A*] expansion limit {max_expansions} reached; no path found " + f"from {start_xy} to {goal_xy}", + file=sys.stderr, + ) + return [] + + cx, cy = cur + for nx, ny, step in self._neighbors(cx, cy, w, h): + if int(cost[ny, nx]) >= self.blocked_cost: + continue + ng = float(cur_g + step + (float(cost[ny, nx]) / 255.0) * 2.0) + node = (nx, ny) + if ng < float(g_score.get(node, 1e18)): + g_score[node] = ng + came_from[node] = cur + f = ng + self._heur(node, g) + heapq.heappush(open_heap, (f, ng, node)) + + if g not in came_from and g != s: + return [] + + path_cells: List[Tuple[int, int]] = [g] + cur = g + visited_back: set = {g} + while cur != s: + nxt = came_from.get(cur) + if nxt is None or nxt in visited_back: + return [] + visited_back.add(nxt) + cur = nxt + path_cells.append(cur) + path_cells.reverse() + return [to_world(px, py) for (px, py) in path_cells] + + +@dataclass +class LocalPlannerConfig: + lookahead_m: float = 0.8 + max_linear_mps: float = 0.6 + max_angular_rps: float = 1.3 + goal_tolerance_m: float = 0.30 + collision_probe_m: float = 0.6 + + @staticmethod + def from_dict(d: Dict[str, Any] | None) -> "LocalPlannerConfig": + src = d or {} + return LocalPlannerConfig( + lookahead_m=max(0.1, _safe_float(src.get("lookahead_m", 0.8), 0.8)), + max_linear_mps=max(0.05, _safe_float(src.get("max_linear_mps", 0.6), 0.6)), + max_angular_rps=max(0.1, _safe_float(src.get("max_angular_rps", 1.3), 1.3)), + goal_tolerance_m=max(0.05, _safe_float(src.get("goal_tolerance_m", 0.30), 0.30)), + collision_probe_m=max(0.1, _safe_float(src.get("collision_probe_m", 0.6), 0.6)), + ) + + +class LocalReactivePlanner: + def __init__(self, cfg: LocalPlannerConfig): + self.cfg = cfg + + @staticmethod + def _yaw_from_pose(pose: np.ndarray) -> float: + # ZYX yaw from rotation matrix + return float(math.atan2(float(pose[1, 0]), float(pose[0, 0]))) + + @staticmethod + def _wrap_pi(a: float) -> float: + while a > math.pi: + a -= 2.0 * math.pi + while a < -math.pi: + a += 2.0 * math.pi + return a + + def compute_command( + self, + pose_world: np.ndarray, + path_world: List[Tuple[float, float]], + runtime: LiveCostmapRuntime, + ) -> Dict[str, Any]: + cmd = { + "linear_mps": 0.0, + "angular_rps": 0.0, + "goal_reached": False, + "blocked": False, + } + if pose_world is None or np.asarray(pose_world).shape != (4, 4): + return cmd + if not path_world: + return cmd + + p = np.asarray(pose_world, dtype=np.float64) + x = float(p[0, 3]) + y = float(p[1, 3]) + yaw = self._yaw_from_pose(p) + + goal_x, goal_y = float(path_world[-1][0]), float(path_world[-1][1]) + dist_goal = float(math.hypot(goal_x - x, goal_y - y)) + if dist_goal <= float(self.cfg.goal_tolerance_m): + cmd["goal_reached"] = True + return cmd + + target = path_world[-1] + for wx, wy in path_world: + d = float(math.hypot(float(wx) - x, float(wy) - y)) + if d >= float(self.cfg.lookahead_m): + target = (float(wx), float(wy)) + break + + dx = float(target[0] - x) + dy = float(target[1] - y) + target_yaw = float(math.atan2(dy, dx)) + yaw_err = self._wrap_pi(target_yaw - yaw) + + ang = float(np.clip(1.5 * yaw_err, -float(self.cfg.max_angular_rps), float(self.cfg.max_angular_rps))) + lin_scale = max(0.0, 1.0 - min(1.0, abs(yaw_err) / math.pi)) + lin = float(self.cfg.max_linear_mps) * lin_scale + + # Probe short horizon for immediate collision. + n_probe = 6 + step = float(self.cfg.collision_probe_m) / float(n_probe) + for i in range(1, n_probe + 1): + px = x + float(i) * step * math.cos(yaw) + py = y + float(i) * step * math.sin(yaw) + if runtime.cost_at_world(px, py) >= int(runtime.cfg.blocked_cost): + lin = 0.0 + cmd["blocked"] = True + break + + cmd["linear_mps"] = float(lin) + cmd["angular_rps"] = float(ang) + return cmd diff --git a/Lidar/SLAM_Navigation.py b/Lidar/SLAM_Navigation.py new file mode 100644 index 0000000..887e3b5 --- /dev/null +++ b/Lidar/SLAM_Navigation.py @@ -0,0 +1,207 @@ +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Dict, Tuple + +import numpy as np + + +def _safe_float(v: Any, default: float) -> float: + try: + return float(v) + except Exception: + return float(default) + + +def _safe_int(v: Any, default: int) -> int: + try: + return int(v) + except Exception: + return int(default) + + +def _as_bool(v: Any, default: bool) -> bool: + if v is None: + return default + if isinstance(v, bool): + return v + if isinstance(v, (int, float)): + return bool(v) + return str(v).strip().lower() in ("1", "true", "yes", "on") + + +@dataclass +class NavigationExportConfig: + enabled: bool = True + min_points: int = 400 + resolution_m: float = 0.05 + z_min_m: float = 0.05 # 5 cm above floor — avoids ground returns, captures low obstacles + z_max_m: float = 1.40 # 1.4 m — covers G1 Edu body (1.25 m) with margin + inflation_radius_m: float = 0.20 + padding_m: float = 0.40 + + @staticmethod + def from_dict(d: Dict[str, Any] | None) -> "NavigationExportConfig": + src = d or {} + return NavigationExportConfig( + enabled=_as_bool(src.get("enabled", True), True), + min_points=max(50, _safe_int(src.get("min_points", 400), 400)), + resolution_m=max(0.01, _safe_float(src.get("resolution_m", 0.05), 0.05)), + z_min_m=_safe_float(src.get("z_min_m", -0.40), -0.40), + z_max_m=_safe_float(src.get("z_max_m", 1.20), 1.20), + inflation_radius_m=max(0.0, _safe_float(src.get("inflation_radius_m", 0.20), 0.20)), + padding_m=max(0.0, _safe_float(src.get("padding_m", 0.40), 0.40)), + ) + + def patched(self, patch: Dict[str, Any] | None) -> "NavigationExportConfig": + src = patch or {} + cur = self + return NavigationExportConfig( + enabled=_as_bool(src.get("enabled", cur.enabled), cur.enabled), + min_points=max(50, _safe_int(src.get("min_points", cur.min_points), cur.min_points)), + resolution_m=max(0.01, _safe_float(src.get("resolution_m", cur.resolution_m), cur.resolution_m)), + z_min_m=_safe_float(src.get("z_min_m", cur.z_min_m), cur.z_min_m), + z_max_m=_safe_float(src.get("z_max_m", cur.z_max_m), cur.z_max_m), + inflation_radius_m=max( + 0.0, + _safe_float(src.get("inflation_radius_m", cur.inflation_radius_m), cur.inflation_radius_m), + ), + padding_m=max(0.0, _safe_float(src.get("padding_m", cur.padding_m), cur.padding_m)), + ) + + +def _inflate_binary(mask: np.ndarray, radius_cells: int) -> np.ndarray: + if radius_cells <= 0: + return mask.copy() + h, w = mask.shape + out = mask.copy() + offsets = [] + rr = radius_cells * radius_cells + for dy in range(-radius_cells, radius_cells + 1): + for dx in range(-radius_cells, radius_cells + 1): + if (dy * dy + dx * dx) <= rr: + offsets.append((dy, dx)) + for dy, dx in offsets: + ys = max(0, -dy) + ye = min(h, h - dy) + xs = max(0, -dx) + xe = min(w, w - dx) + yd = max(0, dy) + xd = max(0, dx) + out[yd : yd + (ye - ys), xd : xd + (xe - xs)] |= mask[ys:ye, xs:xe] + return out + + +def _sanitize_basename(name: str) -> str: + base = (name or "").strip() or "map" + for bad in ("/", "\\", ":", "*", "?", "\"", "<", ">", "|"): + base = base.replace(bad, "_") + return base + + +class NavigationExporter: + def __init__(self, cfg: NavigationExportConfig, data_folder: str): + self.cfg = cfg + self.data_folder = Path(data_folder) + self.data_folder.mkdir(parents=True, exist_ok=True) + + def update_config(self, cfg: NavigationExportConfig) -> None: + self.cfg = cfg + + def _filter_nav_points(self, points: np.ndarray) -> np.ndarray: + pts = np.asarray(points, dtype=np.float32) + if pts.ndim != 2 or pts.shape[1] < 3: + raise RuntimeError("Invalid point array for navigation export.") + zmin, zmax = float(self.cfg.z_min_m), float(self.cfg.z_max_m) + m = (pts[:, 2] >= zmin) & (pts[:, 2] <= zmax) + return pts[m] + + def count_nav_points(self, points: np.ndarray) -> int: + try: + return int(len(self._filter_nav_points(points))) + except Exception: + return 0 + + def _build_grid(self, points: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: + pts = self._filter_nav_points(points) + if len(pts) < int(self.cfg.min_points): + raise RuntimeError(f"Not enough points for nav export ({len(pts)}).") + + res = float(self.cfg.resolution_m) + pad = float(self.cfg.padding_m) + + min_xy = pts[:, :2].min(axis=0) - pad + max_xy = pts[:, :2].max(axis=0) + pad + span = np.maximum(max_xy - min_xy, res) + w = int(np.ceil(span[0] / res)) + 1 + h = int(np.ceil(span[1] / res)) + 1 + + occ = np.zeros((h, w), dtype=bool) + gx = np.clip(((pts[:, 0] - min_xy[0]) / res).astype(np.int32), 0, w - 1) + gy = np.clip(((pts[:, 1] - min_xy[1]) / res).astype(np.int32), 0, h - 1) + occ[gy, gx] = True + + rad_cells = int(np.ceil(float(self.cfg.inflation_radius_m) / res)) + inf = _inflate_binary(occ, rad_cells) + + return occ, inf, min_xy + + def export(self, base_name: str, points: np.ndarray) -> Dict[str, Any]: + if not self.cfg.enabled: + raise RuntimeError("Navigation export is disabled in config.") + + occ, inf, min_xy = self._build_grid(points) + base = _sanitize_basename(base_name) + folder = self.data_folder + + pgm_name = f"{base}_nav.pgm" + yaml_name = f"{base}_nav.yaml" + cost_name = f"{base}_costmap.npy" + n = 1 + while (folder / pgm_name).exists() or (folder / yaml_name).exists() or (folder / cost_name).exists(): + pgm_name = f"{base}_nav({n}).pgm" + yaml_name = f"{base}_nav({n}).yaml" + cost_name = f"{base}_costmap({n}).npy" + n += 1 + + # ROS map image: 0=occupied, 254=free + img = np.where(inf, 0, 254).astype(np.uint8) + img = np.flipud(img) # image origin top-left, map origin bottom-left + + pgm_path = folder / pgm_name + with open(pgm_path, "wb") as f: + h, w = img.shape + f.write(f"P5\n{w} {h}\n255\n".encode("ascii")) + f.write(img.tobytes()) + + yaml_path = folder / yaml_name + # Write proper YAML — NOT json.dumps (quoted keys break ROS2 Nav2 map_server) + ox, oy = float(min_xy[0]), float(min_xy[1]) + yaml_lines = [ + f"image: {pgm_name}", + f"resolution: {float(self.cfg.resolution_m):.6f}", + f"origin: [{ox:.6f}, {oy:.6f}, 0.000000]", + "negate: 0", + "occupied_thresh: 0.65", + "free_thresh: 0.196", + "mode: trinary", # required by Nav2 map_server + ] + yaml_path.write_text("\n".join(yaml_lines) + "\n", encoding="utf-8") + + cost = np.zeros_like(img, dtype=np.uint8) + occ_img = np.flipud(occ) + inf_img = np.flipud(inf) + cost[inf_img] = 180 + cost[occ_img] = 255 + cost_path = folder / cost_name + np.save(cost_path, cost) + + return { + "pgm": str(pgm_path), + "yaml": str(yaml_path), + "costmap": str(cost_path), + "shape": [int(img.shape[0]), int(img.shape[1])], + "occupied_cells": int(np.count_nonzero(occ)), + "inflated_cells": int(np.count_nonzero(inf)), + } diff --git a/Lidar/SLAM_PlaceRecognition.py b/Lidar/SLAM_PlaceRecognition.py new file mode 100644 index 0000000..fffdf4b --- /dev/null +++ b/Lidar/SLAM_PlaceRecognition.py @@ -0,0 +1,303 @@ +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple + +import numpy as np + + +def _as_bool(v: Any, default: bool = False) -> bool: + if v is None: + return default + if isinstance(v, bool): + return v + if isinstance(v, (int, float)): + return bool(v) + return str(v).strip().lower() in ("1", "true", "yes", "on") + + +def _safe_float(v: Any, default: float) -> float: + try: + return float(v) + except Exception: + return float(default) + + +def _safe_int(v: Any, default: int) -> int: + try: + return int(v) + except Exception: + return int(default) + + +def _safe_profiles(v: Any, default: Tuple[str, ...]) -> Tuple[str, ...]: + if isinstance(v, (list, tuple)): + out = [] + for item in v: + s = str(item).upper().strip() + if s: + out.append(s) + if out: + return tuple(out) + return tuple(default) + + +def _voxel_downsample(points: np.ndarray, voxel_m: float) -> np.ndarray: + pts = np.asarray(points, dtype=np.float32) + if pts.ndim != 2 or pts.shape[1] != 3 or len(pts) == 0: + return np.zeros((0, 3), dtype=np.float32) + if float(voxel_m) <= 0.0: + return pts + keys = np.floor(pts / float(voxel_m)).astype(np.int32) + _, idx = np.unique(keys, axis=0, return_index=True) + return pts[np.sort(idx)] + + +@dataclass +class PlaceRecognitionConfig: + enabled: bool = True + backend: str = "geometric" + models_dir: str = "model" + apply_profiles: Tuple[str, ...] = ("LOCALIZE_MAP", "LIVE_NAV_MAP") + anchor_voxel_m: float = 1.6 + submap_radius_m: float = 4.8 + min_points_per_anchor: int = 180 + max_anchors: int = 140 + candidate_top_k: int = 10 + descriptor_voxel_m: float = 0.18 + radial_bins: int = 8 + angular_bins: int = 16 + height_bins: int = 6 + guess_bias_radius_m: float = 9.0 + guess_score_weight: float = 0.18 + + @staticmethod + def from_dict(d: Dict[str, Any] | None) -> "PlaceRecognitionConfig": + src = d or {} + return PlaceRecognitionConfig( + enabled=_as_bool(src.get("enabled", True), True), + backend=str(src.get("backend", "geometric")).strip().lower() or "geometric", + models_dir=str(src.get("models_dir", "model")).strip() or "model", + apply_profiles=_safe_profiles( + src.get("apply_profiles", ("LOCALIZE_MAP", "LIVE_NAV_MAP")), + ("LOCALIZE_MAP", "LIVE_NAV_MAP"), + ), + anchor_voxel_m=max(0.4, _safe_float(src.get("anchor_voxel_m", 1.6), 1.6)), + submap_radius_m=max(1.0, _safe_float(src.get("submap_radius_m", 4.8), 4.8)), + min_points_per_anchor=max(40, _safe_int(src.get("min_points_per_anchor", 180), 180)), + max_anchors=max(8, _safe_int(src.get("max_anchors", 140), 140)), + candidate_top_k=max(1, _safe_int(src.get("candidate_top_k", 10), 10)), + descriptor_voxel_m=max(0.04, _safe_float(src.get("descriptor_voxel_m", 0.18), 0.18)), + radial_bins=max(4, _safe_int(src.get("radial_bins", 8), 8)), + angular_bins=max(8, _safe_int(src.get("angular_bins", 16), 16)), + height_bins=max(3, _safe_int(src.get("height_bins", 6), 6)), + guess_bias_radius_m=max(0.5, _safe_float(src.get("guess_bias_radius_m", 9.0), 9.0)), + guess_score_weight=float(np.clip(_safe_float(src.get("guess_score_weight", 0.18), 0.18), 0.0, 1.0)), + ) + + +class PlaceRecognitionIndex: + def __init__(self, cfg: PlaceRecognitionConfig, base_dir: str | Path): + self.cfg = cfg + self.base_dir = Path(base_dir).resolve() + self.models_dir = (self.base_dir / self.cfg.models_dir).resolve() + self.reset() + + def reset(self) -> None: + self.ref_path: Optional[str] = None + self.anchors = np.zeros((0, 3), dtype=np.float32) + self.descriptors = np.zeros((0, 1), dtype=np.float32) + self.anchor_sizes = np.zeros((0,), dtype=np.int32) + self.index_ready = False + self.model_files: List[str] = [] + self.backend_active = "disabled" + self.last_query: Dict[str, Any] = {} + + def profile_allowed(self, profile: str) -> bool: + p = str(profile).upper().strip() + return p in {str(x).upper().strip() for x in tuple(self.cfg.apply_profiles)} + + def _refresh_model_files(self) -> None: + try: + self.models_dir.mkdir(parents=True, exist_ok=True) + self.model_files = sorted( + str(p.name) + for p in self.models_dir.iterdir() + if p.is_file() and p.suffix.lower() in (".onnx", ".pt", ".pth", ".engine") + ) + except Exception: + self.model_files = [] + + def _descriptor(self, points: np.ndarray) -> np.ndarray: + pts = np.asarray(points, dtype=np.float32) + if pts.ndim != 2 or pts.shape[1] != 3 or len(pts) == 0: + n = int(self.cfg.radial_bins + self.cfg.angular_bins + self.cfg.height_bins + 7) + return np.zeros((n,), dtype=np.float32) + + center = np.mean(pts, axis=0, dtype=np.float64) + rel = np.asarray(pts, dtype=np.float64) - center.reshape((1, 3)) + xy = rel[:, :2] + r = np.linalg.norm(xy, axis=1) + theta = np.arctan2(xy[:, 1], xy[:, 0]) + z = rel[:, 2] + + max_r = max(0.5, float(self.cfg.submap_radius_m)) + r_clip = np.clip(r / max_r, 0.0, 0.999999) + theta_norm = (theta + np.pi) / (2.0 * np.pi) + z_clip = np.clip((z + 2.5) / 5.0, 0.0, 0.999999) + + h_r, _ = np.histogram(r_clip, bins=int(self.cfg.radial_bins), range=(0.0, 1.0)) + h_t, _ = np.histogram(theta_norm, bins=int(self.cfg.angular_bins), range=(0.0, 1.0)) + h_z, _ = np.histogram(z_clip, bins=int(self.cfg.height_bins), range=(0.0, 1.0)) + + cov = np.cov(rel.T) if len(rel) >= 3 else np.eye(3, dtype=np.float64) + eig = np.sort(np.linalg.eigvalsh(cov).astype(np.float64)) + eig = eig / max(1e-6, float(np.sum(np.abs(eig)))) + + ext_xy = np.percentile(np.abs(xy), 90.0, axis=0) if len(xy) > 0 else np.zeros((2,), dtype=np.float64) + ext_z = np.percentile(np.abs(z), 90.0) if len(z) > 0 else 0.0 + density = float(len(pts)) / max(1.0, float(np.pi * max_r * max_r)) + + desc = np.concatenate( + [ + np.asarray(h_r, dtype=np.float32), + np.asarray(h_t, dtype=np.float32), + np.asarray(h_z, dtype=np.float32), + np.asarray( + [ + float(ext_xy[0]) / max_r, + float(ext_xy[1]) / max_r, + float(ext_z) / 2.5, + float(density) / 200.0, + ], + dtype=np.float32, + ), + np.asarray(eig, dtype=np.float32), + ], + axis=0, + ).astype(np.float32) + + norm = float(np.linalg.norm(desc)) + if norm > 1e-6: + desc /= norm + return desc + + def build(self, ref_points: np.ndarray, ref_path: Optional[str] = None) -> Dict[str, Any]: + self._refresh_model_files() + pts = _voxel_downsample(np.asarray(ref_points, dtype=np.float32), float(self.cfg.descriptor_voxel_m)) + self.ref_path = None if ref_path is None else str(ref_path) + self.index_ready = False + self.anchors = np.zeros((0, 3), dtype=np.float32) + self.descriptors = np.zeros((0, 1), dtype=np.float32) + self.anchor_sizes = np.zeros((0,), dtype=np.int32) + + if not self.cfg.enabled or len(pts) < int(self.cfg.min_points_per_anchor): + self.backend_active = "disabled" if not self.cfg.enabled else "geometric_empty" + return self.snapshot() + + xy_keys = np.floor(pts[:, :2] / float(self.cfg.anchor_voxel_m)).astype(np.int32) + uniq, inverse, counts = np.unique(xy_keys, axis=0, return_inverse=True, return_counts=True) + order = np.argsort(counts)[::-1] + if len(order) > int(self.cfg.max_anchors): + order = order[: int(self.cfg.max_anchors)] + + anchors: List[np.ndarray] = [] + descs: List[np.ndarray] = [] + sizes: List[int] = [] + r2 = float(self.cfg.submap_radius_m) * float(self.cfg.submap_radius_m) + for idx in order: + base_mask = inverse == int(idx) + if not np.any(base_mask): + continue + center = np.mean(pts[base_mask], axis=0, dtype=np.float64) + rel = pts - center.reshape((1, 3)) + d2 = np.einsum("ij,ij->i", rel[:, :2], rel[:, :2]) + keep = d2 <= r2 + n_keep = int(np.count_nonzero(keep)) + if n_keep < int(self.cfg.min_points_per_anchor): + continue + local_pts = pts[keep] + anchors.append(np.asarray(center, dtype=np.float32)) + descs.append(self._descriptor(local_pts)) + sizes.append(n_keep) + + if descs: + self.anchors = np.asarray(anchors, dtype=np.float32).reshape((-1, 3)) + self.descriptors = np.asarray(descs, dtype=np.float32) + self.anchor_sizes = np.asarray(sizes, dtype=np.int32) + self.index_ready = True + self.backend_active = "ai_pending" if (self.cfg.backend == "ai" and self.model_files) else "geometric" + else: + self.backend_active = "geometric_empty" + return self.snapshot() + + def query( + self, + live_points: np.ndarray, + approx_center: Optional[np.ndarray] = None, + top_k: Optional[int] = None, + ) -> Dict[str, Any]: + live = _voxel_downsample(np.asarray(live_points, dtype=np.float32), float(self.cfg.descriptor_voxel_m)) + top_n = int(self.cfg.candidate_top_k if top_k is None else max(1, int(top_k))) + if not self.index_ready or len(live) < 40 or len(self.anchors) == 0: + out = { + "ok": False, + "reason": "index_not_ready" if not self.index_ready else "live_too_few", + "backend": str(self.backend_active), + "candidates": [], + } + self.last_query = dict(out) + return out + + q = self._descriptor(live) + scores = np.asarray(self.descriptors @ q.reshape((-1, 1)), dtype=np.float32).reshape((-1,)) + + if approx_center is not None: + try: + guess = np.asarray(approx_center, dtype=np.float32).reshape((3,)) + d = np.linalg.norm(self.anchors[:, :2] - guess[:2].reshape((1, 2)), axis=1) + bias = 1.0 - np.clip(d / max(1e-3, float(self.cfg.guess_bias_radius_m)), 0.0, 1.0) + scores = (1.0 - float(self.cfg.guess_score_weight)) * scores + float(self.cfg.guess_score_weight) * bias + except Exception: + pass + + order = np.argsort(scores)[::-1][:top_n] + candidates: List[Dict[str, Any]] = [] + for idx in order: + center = np.asarray(self.anchors[int(idx)], dtype=np.float32) + item = { + "center": [float(center[0]), float(center[1]), float(center[2])], + "score": float(scores[int(idx)]), + "support_points": int(self.anchor_sizes[int(idx)]) if len(self.anchor_sizes) > int(idx) else 0, + } + if approx_center is not None: + try: + guess = np.asarray(approx_center, dtype=np.float32).reshape((3,)) + item["guess_dist_m"] = float(np.linalg.norm(center[:2] - guess[:2])) + except Exception: + pass + candidates.append(item) + + out = { + "ok": bool(len(candidates) > 0), + "reason": "ok" if candidates else "no_candidates", + "backend": str(self.backend_active), + "top_score": float(candidates[0]["score"]) if candidates else 0.0, + "candidates": candidates, + } + self.last_query = dict(out) + return out + + def snapshot(self) -> Dict[str, Any]: + return { + "enabled": bool(self.cfg.enabled), + "backend": str(self.cfg.backend), + "backend_active": str(self.backend_active), + "models_dir": str(self.models_dir), + "model_files": list(self.model_files), + "index_ready": bool(self.index_ready), + "anchors": int(len(self.anchors)), + "ref_path": self.ref_path, + "last_query": dict(self.last_query) if isinstance(self.last_query, dict) else {}, + } diff --git a/Lidar/SLAM_Safety.py b/Lidar/SLAM_Safety.py new file mode 100644 index 0000000..210f983 --- /dev/null +++ b/Lidar/SLAM_Safety.py @@ -0,0 +1,108 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any, Dict, Optional + +import numpy as np + + + +def _safe_float(v: Any, default: float) -> float: + try: + return float(v) + except Exception: + return float(default) + + +@dataclass +class SafetyConfig: + enabled: bool = True + stop_radius_m: float = 0.50 # G1 Edu shoulder width ~0.45 m; 0.50 m gives safe margin + stale_localization_sec: float = 1.5 + emergency_hold_sec: float = 0.8 + + @staticmethod + def from_dict(d: Dict[str, Any] | None) -> "SafetyConfig": + src = d or {} + return SafetyConfig( + enabled=bool(src.get("enabled", True)), + stop_radius_m=max(0.05, _safe_float(src.get("stop_radius_m", 0.50), 0.50)), + stale_localization_sec=max(0.2, _safe_float(src.get("stale_localization_sec", 1.5), 1.5)), + emergency_hold_sec=max(0.0, _safe_float(src.get("emergency_hold_sec", 0.8), 0.8)), + ) + + +class SafetySupervisor: + def __init__(self, cfg: SafetyConfig): + self.cfg = cfg + self._last_localize_ok_t = 0.0 + self._last_emergency_t = -1e9 + + def mark_localization(self, ok: bool, now: float) -> None: + if ok: + self._last_localize_ok_t = float(now) + + def evaluate( + self, + now: float, + pose_world: Optional[np.ndarray], + loc_state: str, + nav_runtime: Any, + nav_cmd: Dict[str, Any], + ) -> Dict[str, Any]: + if not self.cfg.enabled: + return { + "enabled": False, + "emergency": False, + "reasons": [], + "cmd": dict(nav_cmd), + } + + reasons = [] + motion_requested = ( + abs(float(nav_cmd.get("linear_mps", 0.0))) > 1e-3 + or abs(float(nav_cmd.get("angular_rps", 0.0))) > 1e-3 + ) + lost = str(loc_state).upper().strip() == "LOST" + if lost and motion_requested: + reasons.append("localization_lost") + + stale = False + if self._last_localize_ok_t > 0.0: + stale = (float(now) - float(self._last_localize_ok_t)) > float(self.cfg.stale_localization_sec) + if stale and motion_requested: + reasons.append("localization_stale") + + collision = False + if pose_world is None or np.asarray(pose_world).shape != (4, 4): + if motion_requested: + collision = True + reasons.append("pose_invalid") + else: + x = float(pose_world[0, 3]) + y = float(pose_world[1, 3]) + try: + collision = bool(nav_runtime.occupied_near(x, y, float(self.cfg.stop_radius_m))) if motion_requested else False + except Exception: + collision = False + if collision: + reasons.append("collision_zone") + + emergency = bool((lost and motion_requested) or (stale and motion_requested) or collision) + if emergency: + self._last_emergency_t = float(now) + + hold_active = (float(now) - float(self._last_emergency_t)) <= float(self.cfg.emergency_hold_sec) + cmd_out = dict(nav_cmd) + if emergency or hold_active: + cmd_out["linear_mps"] = 0.0 + cmd_out["angular_rps"] = 0.0 + cmd_out["blocked"] = True + + return { + "enabled": True, + "emergency": bool(emergency), + "hold": bool(hold_active), + "reasons": reasons, + "cmd": cmd_out, + } diff --git a/Lidar/SLAM_Session.py b/Lidar/SLAM_Session.py new file mode 100644 index 0000000..9f06a48 --- /dev/null +++ b/Lidar/SLAM_Session.py @@ -0,0 +1,125 @@ +from __future__ import annotations + +import json +import os +import time +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Dict, Optional + +import numpy as np + + +def _safe_int(v: Any, default: int) -> int: + try: + return int(v) + except Exception: + return int(default) + + +def _as_bool(v: Any, default: bool) -> bool: + if v is None: + return default + if isinstance(v, bool): + return v + if isinstance(v, (int, float)): + return bool(v) + return str(v).strip().lower() in ("1", "true", "yes", "on") + + +@dataclass +class SessionMemoryConfig: + enabled: bool = True + filename: str = "SLAM_session_memory.json" + max_entries: int = 200 + + @staticmethod + def from_dict(d: Dict[str, Any] | None) -> "SessionMemoryConfig": + src = d or {} + return SessionMemoryConfig( + enabled=_as_bool(src.get("enabled", True), True), + filename=str(src.get("filename", "SLAM_session_memory.json")), + max_entries=max(20, _safe_int(src.get("max_entries", 200), 200)), + ) + + +class SessionTransformStore: + def __init__(self, data_folder: str, cfg: SessionMemoryConfig): + self.cfg = cfg + self.data_folder = Path(data_folder) + self.data_folder.mkdir(parents=True, exist_ok=True) + self.path = self.data_folder / self.cfg.filename + self._db: Dict[str, Any] = {"entries": {}} + self._load() + + def _load(self) -> None: + if not self.cfg.enabled: + return + try: + if not self.path.exists(): + return + obj = json.loads(self.path.read_text(encoding="utf-8")) + if isinstance(obj, dict) and isinstance(obj.get("entries"), dict): + self._db = obj + except Exception: + self._db = {"entries": {}} + + def _save(self) -> None: + if not self.cfg.enabled: + return + try: + self.path.write_text(json.dumps(self._db, indent=2), encoding="utf-8") + except Exception: + pass + + def _key(self, ref_map_path: str) -> str: + try: + # os.path.realpath resolves symlinks reliably even for non-existent paths + return os.path.realpath(os.path.expanduser(str(ref_map_path))) + except Exception: + return str(ref_map_path) + + def get_transform(self, ref_map_path: str) -> Optional[np.ndarray]: + if not self.cfg.enabled: + return None + key = self._key(ref_map_path) + ent = self._db.get("entries", {}).get(key) + if not isinstance(ent, dict): + return None + mat = ent.get("transform") + if not isinstance(mat, list): + return None + try: + arr = np.asarray(mat, dtype=np.float64) + if arr.shape != (4, 4): + return None + return arr + except Exception: + return None + + def record_success(self, ref_map_path: str, transform: np.ndarray, fitness: float, rmse: float) -> None: + if not self.cfg.enabled: + return + key = self._key(ref_map_path) + try: + tf = np.asarray(transform, dtype=np.float64) + if tf.shape != (4, 4): + return + entries = self._db.setdefault("entries", {}) + entries[key] = { + "ref_map": key, + "timestamp": float(time.time()), + "fitness": float(fitness), + "rmse": float(rmse), + "transform": tf.tolist(), + } + + # Keep latest-N entries by timestamp + if len(entries) > int(self.cfg.max_entries): + items = sorted(entries.items(), key=lambda kv: float(kv[1].get("timestamp", 0.0)), reverse=True) + keep = dict(items[: int(self.cfg.max_entries)]) + self._db["entries"] = keep + + self._save() + except Exception: + return diff --git a/Lidar/SLAM_StateMachine.py b/Lidar/SLAM_StateMachine.py new file mode 100644 index 0000000..3539769 --- /dev/null +++ b/Lidar/SLAM_StateMachine.py @@ -0,0 +1,167 @@ +from __future__ import annotations + +from collections import deque +from dataclasses import dataclass +from typing import Any, Deque, Dict, Optional + + +def _safe_float(v: Any, default: float) -> float: + try: + return float(v) + except Exception: + return float(default) + + +def _safe_int(v: Any, default: int) -> int: + try: + return int(v) + except Exception: + return int(default) + + +def _as_bool(v: Any, default: bool) -> bool: + if v is None: + return default + if isinstance(v, bool): + return v + if isinstance(v, (int, float)): + return bool(v) + return str(v).strip().lower() in ("1", "true", "yes", "on") + + +@dataclass +class LocalizationStateConfig: + enabled: bool = True + window_size: int = 12 + degraded_fitness: float = 0.20 + lost_fitness: float = 0.10 + degraded_rmse: float = 0.45 + lost_rmse: float = 0.80 + max_bad_before_lost: int = 4 + min_good_to_recover: int = 3 + recovery_cooldown_sec: float = 2.0 + + @staticmethod + def from_dict(d: Dict[str, Any] | None) -> "LocalizationStateConfig": + src = d or {} + return LocalizationStateConfig( + enabled=_as_bool(src.get("enabled", True), True), + window_size=max(3, _safe_int(src.get("window_size", 12), 12)), + degraded_fitness=max(0.0, min(1.0, _safe_float(src.get("degraded_fitness", 0.20), 0.20))), + lost_fitness=max(0.0, min(1.0, _safe_float(src.get("lost_fitness", 0.10), 0.10))), + degraded_rmse=max(0.01, _safe_float(src.get("degraded_rmse", 0.45), 0.45)), + lost_rmse=max(0.02, _safe_float(src.get("lost_rmse", 0.80), 0.80)), + max_bad_before_lost=max(2, _safe_int(src.get("max_bad_before_lost", 4), 4)), + min_good_to_recover=max(1, _safe_int(src.get("min_good_to_recover", 3), 3)), + recovery_cooldown_sec=max(0.1, _safe_float(src.get("recovery_cooldown_sec", 2.0), 2.0)), + ) + + +class LocalizationStateMachine: + def __init__(self, cfg: LocalizationStateConfig): + self.cfg = cfg + self.state = "TRACKING" + self._history: Deque[bool] = deque(maxlen=int(self.cfg.window_size)) + self._consecutive_bad = 0 + self._consecutive_good = 0 + self._last_update_t = 0.0 + self._last_recovery_t = 0.0 + + def reset(self) -> None: + self.state = "TRACKING" + self._history.clear() + self._consecutive_bad = 0 + self._consecutive_good = 0 + self._last_update_t = 0.0 + self._last_recovery_t = 0.0 + + def set_enabled(self, enabled: bool) -> None: + self.cfg.enabled = bool(enabled) + if not self.cfg.enabled: + self.reset() + self.state = "TRACKING" + + def enter_recovery(self, now: float) -> None: + if not self.cfg.enabled: + return + self.state = "RECOVERY" + self._last_recovery_t = float(now) + + def _is_good(self, result: Optional[Dict[str, Any]]) -> bool: + if result is None: + return False + if bool(result.get("accepted", False)): + return True + fit = float(result.get("fitness", 0.0)) + rmse = float(result.get("rmse", 9e9)) + if fit >= float(self.cfg.degraded_fitness) and rmse <= float(self.cfg.degraded_rmse): + return True + return False + + def _is_lost_like(self, result: Optional[Dict[str, Any]]) -> bool: + if result is None: + return True + fit = float(result.get("fitness", 0.0)) + rmse = float(result.get("rmse", 9e9)) + return (fit < float(self.cfg.lost_fitness)) or (rmse > float(self.cfg.lost_rmse)) + + def update(self, result: Optional[Dict[str, Any]], now: float) -> str: + if not self.cfg.enabled: + self.state = "TRACKING" + return self.state + + good = self._is_good(result) + self._history.append(good) + self._last_update_t = float(now) + + if self.state == "RECOVERY": + if good: + self._consecutive_good += 1 + self._consecutive_bad = 0 + if self._consecutive_good >= int(self.cfg.min_good_to_recover): + self.state = "TRACKING" + else: + self._consecutive_bad += 1 + self._consecutive_good = 0 + self.state = "LOST" + return self.state + + if good: + self._consecutive_good += 1 + self._consecutive_bad = 0 + if self.state in ("DEGRADED", "LOST") and self._consecutive_good >= int(self.cfg.min_good_to_recover): + self.state = "TRACKING" + else: + self._consecutive_bad += 1 + self._consecutive_good = 0 + if self._is_lost_like(result) and self._consecutive_bad >= int(self.cfg.max_bad_before_lost): + self.state = "LOST" + elif self.state == "TRACKING": + self.state = "DEGRADED" + elif self.state == "LOST": + self.state = "LOST" + else: + self.state = "DEGRADED" + + return self.state + + def should_recover(self, now: float) -> bool: + if self.state not in ("LOST", "RECOVERY"): + return False + return (float(now) - float(self._last_recovery_t)) >= float(self.cfg.recovery_cooldown_sec) + + def mark_recovery_attempt(self, now: float) -> None: + self._last_recovery_t = float(now) + + def snapshot(self) -> Dict[str, Any]: + good_ratio = 0.0 + if len(self._history) > 0: + good_ratio = float(sum(1 for x in self._history if x)) / float(len(self._history)) + return { + "enabled": bool(self.cfg.enabled), + "state": self.state, + "history_len": int(len(self._history)), + "good_ratio": float(good_ratio), + "consecutive_bad": int(self._consecutive_bad), + "consecutive_good": int(self._consecutive_good), + } diff --git a/Lidar/SLAM_Submap.py b/Lidar/SLAM_Submap.py new file mode 100644 index 0000000..bf15e0f --- /dev/null +++ b/Lidar/SLAM_Submap.py @@ -0,0 +1,413 @@ +from __future__ import annotations + +import os +import pickle +import sys +import threading +from collections import deque +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Deque, Dict, Iterable, Optional, Tuple + +import numpy as np + + +def _as_bool(v: Any, default: bool) -> bool: + if v is None: + return default + if isinstance(v, bool): + return v + if isinstance(v, (int, float)): + return bool(v) + return str(v).strip().lower() in ("1", "true", "yes", "on") + + +def _safe_int(v: Any, default: int) -> int: + try: + return int(v) + except Exception: + return int(default) + + +def _safe_float(v: Any, default: float) -> float: + try: + return float(v) + except Exception: + return float(default) + + +def _safe_profiles(v: Any, default: Tuple[str, ...]) -> Tuple[str, ...]: + if isinstance(v, (list, tuple)): + out = [] + for item in v: + s = str(item).upper().strip() + if s: + out.append(s) + if out: + return tuple(out) + return tuple(default) + + +def _voxel_downsample(points: np.ndarray, voxel_m: float) -> np.ndarray: + pts = np.asarray(points, dtype=np.float32) + if pts.ndim != 2 or pts.shape[1] != 3 or len(pts) == 0: + return np.zeros((0, 3), dtype=np.float32) + v = float(voxel_m) + if v <= 0.0: + return pts + keys = np.floor(pts / v).astype(np.int32) + _, idx = np.unique(keys, axis=0, return_index=True) + return pts[np.sort(idx)] + + +def _pose_delta(pose_a: np.ndarray, pose_b: np.ndarray) -> Tuple[float, float]: + dp = pose_a[:3, 3] - pose_b[:3, 3] + trans = float(np.linalg.norm(dp)) + r = pose_a[:3, :3] @ pose_b[:3, :3].T + ang = float(np.degrees(np.arccos(np.clip((np.trace(r) - 1.0) * 0.5, -1.0, 1.0)))) + return trans, ang + + +@dataclass +class SubmapConfig: + enabled: bool = True + local_window_frames: int = 18 + local_voxel_m: float = 0.08 + global_voxel_m: float = 0.14 + merge_period_sec: float = 0.8 + merge_min_translation_m: float = 0.25 + merge_min_rotation_deg: float = 6.0 + max_global_points: int = 350000 + display_max_points: int = 250000 + apply_profiles: Tuple[str, ...] = ("LOCALIZE_MAP", "LIVE_NAV_MAP") + + @staticmethod + def from_dict(d: Dict[str, Any] | None) -> "SubmapConfig": + src = d or {} + return SubmapConfig( + enabled=_as_bool(src.get("enabled", True), True), + local_window_frames=max(4, _safe_int(src.get("local_window_frames", 18), 18)), + local_voxel_m=max(0.02, _safe_float(src.get("local_voxel_m", 0.08), 0.08)), + global_voxel_m=max(0.02, _safe_float(src.get("global_voxel_m", 0.14), 0.14)), + merge_period_sec=max(0.1, _safe_float(src.get("merge_period_sec", 0.8), 0.8)), + merge_min_translation_m=max( + 0.01, _safe_float(src.get("merge_min_translation_m", 0.25), 0.25) + ), + merge_min_rotation_deg=max( + 0.1, _safe_float(src.get("merge_min_rotation_deg", 6.0), 6.0) + ), + max_global_points=max(20000, _safe_int(src.get("max_global_points", 350000), 350000)), + display_max_points=max(10000, _safe_int(src.get("display_max_points", 250000), 250000)), + apply_profiles=_safe_profiles( + src.get("apply_profiles", ("LOCALIZE_MAP", "LIVE_NAV_MAP")), + ("LOCALIZE_MAP", "LIVE_NAV_MAP"), + ), + ) + + +class SubmapCheckpointer: + """ + Periodically saves LocalGlobalSubmapMapper state to disk so the accumulated + map survives restarts and crashes. + + Usage + ----- + ckpt = SubmapCheckpointer(save_dir, interval_s=60.0) + # in your main loop: + ckpt.maybe_save(mapper) + # on startup: + ckpt.load_into(mapper) # returns True if data was restored + # on shutdown: + ckpt.stop() + """ + + _FILENAME = "submap_checkpoint.pkl" + _PROTO = 5 # pickle protocol (requires Python ≥ 3.8) + + def __init__(self, save_dir: str, interval_s: float = 60.0) -> None: + self._dir = Path(save_dir) + self._dir.mkdir(parents=True, exist_ok=True) + self._interval = max(5.0, float(interval_s)) + self._dst = self._dir / self._FILENAME + self._tmp = self._dir / (self._FILENAME + ".tmp") + self._last_save_t: float = 0.0 + self._lock = threading.Lock() + + def _payload(self, mapper: "LocalGlobalSubmapMapper") -> dict: + """Snapshot all persistent fields from the mapper.""" + return { + "global_pts": np.array(mapper._global_pts, dtype=np.float32, copy=True), + "frames": [np.array(f, dtype=np.float32, copy=True) for f in mapper._frames], + "last_merge_t": float(mapper._last_merge_t), + "last_merge_pose": ( + np.array(mapper._last_merge_pose, dtype=np.float64, copy=True) + if mapper._last_merge_pose is not None + else None + ), + "merge_count": int(mapper._merge_count), + "insert_count": int(mapper._insert_count), + } + + def maybe_save(self, mapper: "LocalGlobalSubmapMapper") -> bool: + """Save if the configured interval has elapsed. Thread-safe. Returns True on save.""" + import time + now = time.time() + with self._lock: + if (now - self._last_save_t) < self._interval: + return False + self._last_save_t = now + + return self._write(self._payload(mapper)) + + def save(self, mapper: "LocalGlobalSubmapMapper") -> bool: + """Force an immediate save. Returns True on success.""" + return self._write(self._payload(mapper)) + + def _write(self, payload: dict) -> bool: + """Atomic write: pickle to .tmp then rename to avoid corrupt checkpoints.""" + try: + with open(self._tmp, "wb") as f: + pickle.dump(payload, f, protocol=self._PROTO) + os.replace(self._tmp, self._dst) # atomic on POSIX + return True + except Exception as exc: + print(f"[SubmapCheckpointer] save failed: {exc}", file=sys.stderr) + return False + + def load_into(self, mapper: "LocalGlobalSubmapMapper") -> bool: + """ + Restore checkpoint into mapper. Returns True if data was loaded. + Must be called before the mapper receives any frames. + """ + if not self._dst.exists(): + return False + try: + with open(self._dst, "rb") as f: + payload = pickle.load(f) + global_pts = np.asarray(payload["global_pts"], dtype=np.float32) + frames_raw = payload.get("frames", []) + mapper._global_pts = global_pts + mapper._frames = deque( + [np.asarray(fr, dtype=np.float32) for fr in frames_raw], + maxlen=int(mapper._frames.maxlen or mapper.cfg.local_window_frames), + ) + mapper._last_merge_t = float(payload.get("last_merge_t", 0.0)) + lmp = payload.get("last_merge_pose") + mapper._last_merge_pose = ( + np.asarray(lmp, dtype=np.float64) if lmp is not None else None + ) + mapper._merge_count = int(payload.get("merge_count", 0)) + mapper._insert_count = int(payload.get("insert_count", 0)) + mapper._rebuild_local() + mapper._global_pts = mapper._trim(mapper._global_pts, int(mapper.cfg.max_global_points)) + return True + except Exception as exc: + print(f"[SubmapCheckpointer] load failed (starting fresh): {exc}", file=sys.stderr) + return False + + def delete(self) -> None: + """Remove the checkpoint file (e.g., after a deliberate RESET).""" + try: + if self._dst.exists(): + self._dst.unlink() + if self._tmp.exists(): + self._tmp.unlink() + except Exception: + pass + + def stop(self) -> None: + """No-op — kept for API symmetry; saves are driven by maybe_save() calls.""" + pass + + +class LocalGlobalSubmapMapper: + """ + Maintains a short-horizon local submap and periodically merges it into a + long-horizon global submap. + """ + + def __init__(self, cfg: SubmapConfig): + self.cfg = cfg + self._frames: Deque[np.ndarray] = deque(maxlen=int(cfg.local_window_frames)) + self._local_pts = np.zeros((0, 3), dtype=np.float32) + self._global_pts = np.zeros((0, 3), dtype=np.float32) + self._last_merge_t = 0.0 + self._last_merge_pose: Optional[np.ndarray] = None + self._merge_count = 0 + self._insert_count = 0 + + def set_config(self, cfg: SubmapConfig, keep_points: bool = True) -> None: + old_frames = list(self._frames) if keep_points else [] + old_global = np.array(self._global_pts, dtype=np.float32, copy=True) if keep_points else np.zeros((0, 3), dtype=np.float32) + old_last_pose = np.array(self._last_merge_pose, dtype=np.float64, copy=True) if (keep_points and self._last_merge_pose is not None) else None + old_last_merge_t = float(self._last_merge_t) if keep_points else 0.0 + old_merge_count = int(self._merge_count) if keep_points else 0 + old_insert_count = int(self._insert_count) if keep_points else 0 + + self.cfg = cfg + self._frames = deque(maxlen=int(cfg.local_window_frames)) + if keep_points and old_frames: + for fr in old_frames[-int(cfg.local_window_frames):]: + self._frames.append(np.asarray(fr, dtype=np.float32)) + self._local_pts = np.zeros((0, 3), dtype=np.float32) + self._global_pts = np.asarray(old_global, dtype=np.float32) if keep_points else np.zeros((0, 3), dtype=np.float32) + self._last_merge_pose = old_last_pose + self._last_merge_t = old_last_merge_t + self._merge_count = old_merge_count + self._insert_count = old_insert_count + self._rebuild_local() + self._global_pts = self._trim(self._global_pts, int(self.cfg.max_global_points)) + + def reset(self) -> None: + self._frames.clear() + self._local_pts = np.zeros((0, 3), dtype=np.float32) + self._global_pts = np.zeros((0, 3), dtype=np.float32) + self._last_merge_t = 0.0 + self._last_merge_pose = None + self._merge_count = 0 + self._insert_count = 0 + + def apply_correction(self, transform: np.ndarray) -> bool: + """ + Apply a rigid transform to all currently held submap data. + Useful when localization updates the global frame estimate and we + want old submap points to remain consistent with new incoming points. + """ + if not self.has_points: + return False + tf = np.asarray(transform, dtype=np.float64) + if tf.shape != (4, 4): + return False + R = np.asarray(tf[:3, :3], dtype=np.float64) + t = np.asarray(tf[:3, 3], dtype=np.float64) + try: + if len(self._global_pts) > 0: + self._global_pts = np.asarray((self._global_pts @ R.T) + t, dtype=np.float32) + if len(self._frames) > 0: + frames_tx = deque(maxlen=int(self._frames.maxlen or len(self._frames))) + for fr in self._frames: + fr_np = np.asarray(fr, dtype=np.float32) + if len(fr_np) == 0: + frames_tx.append(fr_np) + else: + frames_tx.append(np.asarray((fr_np @ R.T) + t, dtype=np.float32)) + self._frames = frames_tx + if self._last_merge_pose is not None and np.asarray(self._last_merge_pose).shape == (4, 4): + self._last_merge_pose = tf @ np.asarray(self._last_merge_pose, dtype=np.float64) + self._rebuild_local() + self._global_pts = self._trim(self._global_pts, int(self.cfg.max_global_points)) + return True + except Exception: + return False + + @property + def has_points(self) -> bool: + return bool(len(self._local_pts) > 0 or len(self._global_pts) > 0) + + def _trim(self, pts: np.ndarray, max_n: int) -> np.ndarray: + n = int(len(pts)) + if n <= int(max_n): + return pts + stride = max(2, int(np.ceil(float(n) / float(max_n)))) + out = pts[::stride] + if len(out) > int(max_n): + out = out[: int(max_n)] + return np.asarray(out, dtype=np.float32) + + def _rebuild_local(self) -> None: + if len(self._frames) == 0: + self._local_pts = np.zeros((0, 3), dtype=np.float32) + return + cat = np.concatenate(list(self._frames), axis=0) + ds = _voxel_downsample(cat, float(self.cfg.local_voxel_m)) + self._local_pts = self._trim(ds, max(5000, int(self.cfg.display_max_points))) + + def _should_merge(self, now: float, pose_world: Optional[np.ndarray]) -> bool: + if len(self._local_pts) == 0: + return False + if len(self._global_pts) == 0: + return True + if (float(now) - float(self._last_merge_t)) >= float(self.cfg.merge_period_sec): + return True + if pose_world is None or self._last_merge_pose is None: + return False + try: + pose = np.asarray(pose_world, dtype=np.float64) + if pose.shape != (4, 4): + return False + trans, ang = _pose_delta(pose, self._last_merge_pose) + return bool( + trans >= float(self.cfg.merge_min_translation_m) + or ang >= float(self.cfg.merge_min_rotation_deg) + ) + except Exception: + return False + + def integrate( + self, + points_world: np.ndarray, + now: float, + pose_world: Optional[np.ndarray] = None, + ) -> Dict[str, Any]: + pts = np.asarray(points_world, dtype=np.float32) + if pts.ndim != 2 or pts.shape[1] != 3 or len(pts) == 0: + return self.status(active=False, reason="empty") + + ds = _voxel_downsample(pts, float(self.cfg.local_voxel_m)) + if len(ds) == 0: + return self.status(active=False, reason="empty_downsample") + + self._frames.append(ds) + self._insert_count += 1 + self._rebuild_local() + + merged = False + if self._should_merge(float(now), pose_world): + if len(self._global_pts) == 0: + combo = np.array(self._local_pts, dtype=np.float32, copy=True) + else: + combo = np.concatenate([self._global_pts, self._local_pts], axis=0) + self._global_pts = _voxel_downsample(combo, float(self.cfg.global_voxel_m)) + self._global_pts = self._trim(self._global_pts, int(self.cfg.max_global_points)) + self._last_merge_t = float(now) + if pose_world is not None: + pose = np.asarray(pose_world, dtype=np.float64) + if pose.shape == (4, 4): + self._last_merge_pose = np.array(pose, dtype=np.float64, copy=True) + merged = True + self._merge_count += 1 + + out = self.status(active=True, reason="ok") + out["merged"] = bool(merged) + return out + + def get_display_points(self) -> np.ndarray: + if len(self._global_pts) == 0 and len(self._local_pts) == 0: + return np.zeros((0, 3), dtype=np.float32) + if len(self._global_pts) == 0: + return np.asarray(self._local_pts, dtype=np.float32) + if len(self._local_pts) == 0: + return self._trim(np.asarray(self._global_pts, dtype=np.float32), int(self.cfg.display_max_points)) + combo = np.concatenate([self._global_pts, self._local_pts], axis=0) + disp = _voxel_downsample(combo, float(self.cfg.local_voxel_m)) + return self._trim(disp, int(self.cfg.display_max_points)) + + def status( + self, + active: bool, + reason: str = "ok", + profile: str = "", + profile_allowed: bool = True, + ) -> Dict[str, Any]: + return { + "enabled": bool(self.cfg.enabled), + "active": bool(active), + "reason": str(reason), + "profile": str(profile).upper().strip(), + "profile_allowed": bool(profile_allowed), + "local_points": int(len(self._local_pts)), + "global_points": int(len(self._global_pts)), + "window_frames": int(len(self._frames)), + "merge_count": int(self._merge_count), + "insert_count": int(self._insert_count), + } diff --git a/Lidar/SLAM_Transforms.py b/Lidar/SLAM_Transforms.py new file mode 100644 index 0000000..4d3cdab --- /dev/null +++ b/Lidar/SLAM_Transforms.py @@ -0,0 +1,102 @@ +from __future__ import annotations + +from typing import Optional, Tuple + +import numpy as np + + +def to_world_points(points_sensor: np.ndarray, pose: Optional[np.ndarray]) -> np.ndarray: + """ + Convert sensor-frame points to world frame using pose (world_T_sensor). + Falls back to input points when pose is unavailable/invalid. + """ + if pose is None: + return np.asarray(points_sensor, dtype=np.float32) + try: + pts = np.asarray(points_sensor, dtype=np.float32) + pose_np = np.asarray(pose, dtype=np.float32) + if pts.ndim != 2 or pts.shape[1] != 3 or pose_np.shape != (4, 4): + return pts + rot = pose_np[:3, :3] + trans = pose_np[:3, 3] + return (pts @ rot.T) + trans + except Exception: + return np.asarray(points_sensor, dtype=np.float32) + + +def apply_transform_points(points: Optional[np.ndarray], transform: np.ndarray) -> Optional[np.ndarray]: + if points is None: + return None + pts = np.asarray(points, dtype=np.float32) + if len(pts) == 0: + return pts + try: + tf = np.asarray(transform, dtype=np.float32) + if tf.shape != (4, 4): + return pts + rot = tf[:3, :3] + trans = tf[:3, 3] + return (pts @ rot.T) + trans + except Exception: + return pts + + +def tf_delta(prev_tf: np.ndarray, new_tf: np.ndarray) -> Tuple[float, float]: + try: + a = np.asarray(prev_tf, dtype=np.float64) + b = np.asarray(new_tf, dtype=np.float64) + if a.shape != (4, 4) or b.shape != (4, 4): + return 0.0, 0.0 + dp = b[:3, 3] - a[:3, 3] + trans = float(np.linalg.norm(dp)) + r = b[:3, :3] @ a[:3, :3].T + ang = float(np.degrees(np.arccos(np.clip((np.trace(r) - 1.0) * 0.5, -1.0, 1.0)))) + return trans, ang + except Exception: + return 0.0, 0.0 + + +def blend_rigid_tf(prev_tf: np.ndarray, new_tf: np.ndarray, alpha_t: float, alpha_r: float) -> np.ndarray: + try: + a = np.asarray(prev_tf, dtype=np.float64) + b = np.asarray(new_tf, dtype=np.float64) + if a.shape != (4, 4) or b.shape != (4, 4): + return np.asarray(new_tf, dtype=np.float64) + at = float(np.clip(alpha_t, 0.0, 1.0)) + ar = float(np.clip(alpha_r, 0.0, 1.0)) + out = np.eye(4, dtype=np.float64) + out[:3, 3] = ((1.0 - at) * a[:3, 3]) + (at * b[:3, 3]) + rm = ((1.0 - ar) * a[:3, :3]) + (ar * b[:3, :3]) + u, _, vt = np.linalg.svd(rm, full_matrices=False) + r = u @ vt + if np.linalg.det(r) < 0: + u[:, -1] *= -1.0 + r = u @ vt + out[:3, :3] = r + return out + except Exception: + return np.asarray(new_tf, dtype=np.float64) + + +def yaw_deg_from_tf(tf: np.ndarray) -> float: + try: + m = np.asarray(tf, dtype=np.float64) + if m.shape != (4, 4): + return 0.0 + yaw = np.degrees(np.arctan2(float(m[1, 0]), float(m[0, 0]))) + return float(yaw) + except Exception: + return 0.0 + + +def tf_from_xyzyaw(x: float, y: float, z: float, yaw_deg: float) -> np.ndarray: + yaw = np.deg2rad(float(yaw_deg)) + cy = float(np.cos(yaw)) + sy = float(np.sin(yaw)) + tf = np.eye(4, dtype=np.float64) + tf[:3, :3] = np.array( + [[cy, -sy, 0.0], [sy, cy, 0.0], [0.0, 0.0, 1.0]], + dtype=np.float64, + ) + tf[:3, 3] = np.array([float(x), float(y), float(z)], dtype=np.float64) + return tf diff --git a/Lidar/SLAM_Validation.py b/Lidar/SLAM_Validation.py new file mode 100644 index 0000000..a5a9081 --- /dev/null +++ b/Lidar/SLAM_Validation.py @@ -0,0 +1,156 @@ +from __future__ import annotations + +import json +import os +from pathlib import Path +from typing import Any, Dict, List + + +def _safe_int(v: Any, default: int) -> int: + try: + return int(v) + except Exception: + return int(default) + + +def _safe_float(v: Any, default: float) -> float: + try: + return float(v) + except Exception: + return float(default) + + +def _resolve_path(base_dir: Path, value: str | None) -> Path: + raw = (value or "").strip() + p = Path(raw).expanduser() + if p.is_absolute(): + return p + return (base_dir / p).resolve() + + +def run_startup_self_check(cfg: Dict[str, Any], base_dir: Path | None = None) -> Dict[str, Any]: + """ + Validate runtime-critical config and paths. + Returns a report dict suitable for GUI popup / worker status. + """ + if base_dir is None: + base_dir = Path(__file__).resolve().parent + base = Path(base_dir).resolve() + + errors: List[str] = [] + warnings: List[str] = [] + info: List[str] = [] + + app_cfg = cfg.get("app", {}) or {} + livox_cfg = cfg.get("livox", {}) or {} + map_cfg = cfg.get("map", {}) or {} + loc_cfg = cfg.get("localization", {}) or {} + nav_cfg = cfg.get("navigation_export", {}) or {} + nav_rt_cfg = cfg.get("navigation_runtime", {}) or {} + loop_cfg = cfg.get("loop_closure", {}) or {} + autosave_cfg = cfg.get("autosave", {}) or {} + + maps_dir = _resolve_path(base, str(app_cfg.get("maps_dir", "DataMap"))) + livox_file = _resolve_path(base, str(livox_cfg.get("config_file", "mid360_config.json"))) + + try: + maps_dir.mkdir(parents=True, exist_ok=True) + except Exception as e: + errors.append(f"maps_dir is not creatable: {maps_dir} ({e})") + else: + if not maps_dir.is_dir(): + errors.append(f"maps_dir is not a directory: {maps_dir}") + else: + if not maps_dir.exists(): + errors.append(f"maps_dir missing: {maps_dir}") + if not os.access(maps_dir, os.W_OK): + warnings.append(f"maps_dir is not writable: {maps_dir}") + info.append(f"maps_dir: {maps_dir}") + + if not livox_file.exists(): + errors.append(f"livox.config_file missing: {livox_file}") + else: + info.append(f"livox config: {livox_file.name}") + try: + livox_data = json.loads(livox_file.read_text()) + lidar_cfgs = livox_data.get("lidar_configs", []) or [] + ext = {} + if lidar_cfgs and isinstance(lidar_cfgs[0], dict): + ext = lidar_cfgs[0].get("extrinsic_parameter", {}) or {} + ext_vals = [ + _safe_float(ext.get("roll", 0.0), 0.0), + _safe_float(ext.get("pitch", 0.0), 0.0), + _safe_float(ext.get("yaw", 0.0), 0.0), + _safe_float(ext.get("x", 0.0), 0.0), + _safe_float(ext.get("y", 0.0), 0.0), + _safe_float(ext.get("z", 0.0), 0.0), + ] + if all(abs(v) <= 1e-6 for v in ext_vals): + warnings.append( + "MID360 extrinsics are all zero in mid360_config.json; " + "verify G1 upside-down mounting correction before trusting localization." + ) + else: + info.append( + "MID360 extrinsics: " + f"rpy=({ext_vals[0]:.2f}, {ext_vals[1]:.2f}, {ext_vals[2]:.2f}) " + f"xyz=({ext_vals[3]:.3f}, {ext_vals[4]:.3f}, {ext_vals[5]:.3f})" + ) + except Exception as e: + warnings.append(f"Could not parse MID360 extrinsics from {livox_file.name}: {e}") + + min_map = _safe_int(map_cfg.get("min_points_to_save", 0), 0) + min_loc = _safe_int(loc_cfg.get("min_points_for_localize", 0), 0) + min_nav = _safe_int(nav_cfg.get("min_points", 0), 0) + if min_map <= 0 or min_loc <= 0 or min_nav <= 0: + errors.append("min points must be > 0 for map/localization/navigation.") + if not (min_map == min_loc == min_nav): + warnings.append( + f"min points mismatch (map/localize/nav): {min_map}/{min_loc}/{min_nav}. " + "This can confuse export/localization behavior." + ) + else: + info.append(f"min points unified: {min_map}") + + z_min = _safe_float(nav_cfg.get("z_min_m", -0.4), -0.4) + z_max = _safe_float(nav_cfg.get("z_max_m", 1.2), 1.2) + if z_min >= z_max: + errors.append(f"navigation_export z range invalid: z_min({z_min}) >= z_max({z_max})") + + rt_z_min = _safe_float(nav_rt_cfg.get("z_min_m", z_min), z_min) + rt_z_max = _safe_float(nav_rt_cfg.get("z_max_m", z_max), z_max) + if rt_z_min >= rt_z_max: + errors.append(f"navigation_runtime z range invalid: z_min({rt_z_min}) >= z_max({rt_z_max})") + + loc_use_p2pl = bool(loc_cfg.get("use_point_to_plane", False)) + loc_unsafe = bool(loc_cfg.get("allow_point_to_plane_unsafe", False)) + if loc_use_p2pl and not loc_unsafe: + warnings.append( + "localization.use_point_to_plane=true but allow_point_to_plane_unsafe=false. " + "Worker will use point-to-point for stability." + ) + + if bool(loop_cfg.get("enabled", False)): + max_corr_t = _safe_float(loop_cfg.get("max_correction_translation_m", 2.0), 2.0) + max_corr_r = _safe_float(loop_cfg.get("max_correction_rotation_deg", 20.0), 20.0) + if max_corr_t <= 0.0 or max_corr_r <= 0.0: + warnings.append("loop_closure enabled but safe correction limits are non-positive.") + else: + info.append(f"loop safe limits: {max_corr_t:.2f}m / {max_corr_r:.1f}deg") + + if bool(autosave_cfg.get("enabled", False)): + iv = _safe_float(autosave_cfg.get("interval_sec", 90.0), 90.0) + if iv < 5.0: + warnings.append(f"autosave interval is very low ({iv:.1f}s).") + + status = "ok" + if errors: + status = "error" + elif warnings: + status = "warn" + return { + "status": status, + "errors": errors, + "warnings": warnings, + "info": info, + } diff --git a/Lidar/SLAM_engine.py b/Lidar/SLAM_engine.py new file mode 100644 index 0000000..4c2e665 --- /dev/null +++ b/Lidar/SLAM_engine.py @@ -0,0 +1,315 @@ +# SLAM_engine.py +from __future__ import annotations + +import json +import os +import time +import multiprocessing as mp +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Optional + +from SLAM_Validation import run_startup_self_check + +# ------------------------- config loader (jsonl) ------------------------- +def load_slam_config() -> dict: + """ + Loads config from: + 1) env SLAM_CONFIG (full path) + 2) ./SLAM_Config.json (same folder as this script) + """ + import json + import os + from pathlib import Path + + cfg_path = os.environ.get("SLAM_CONFIG", "").strip() + if cfg_path: + p = Path(cfg_path) + else: + p = Path(__file__).resolve().parent / "SLAM_Config.json" + + if not p.exists(): + raise FileNotFoundError(f"Missing config file: {p}") + + text = p.read_text(encoding="utf-8").strip() + if not text: + raise RuntimeError("SLAM_Config.json is empty.") + + return json.loads(text) + + +def _config_base_dir() -> Path: + cfg_path = os.environ.get("SLAM_CONFIG", "").strip() + if cfg_path: + return Path(cfg_path).expanduser().resolve().parent + return Path(__file__).resolve().parent + + +def _resolve_from_config_dir(path_value: str | os.PathLike[str]) -> str: + p = Path(path_value).expanduser() + if p.is_absolute(): + return str(p) + return str((_config_base_dir() / p).resolve()) + + + +# ------------------------- dataclasses ------------------------- +@dataclass +class EngineConfig: + config_file: str + host_ip: str + max_range: float + slam_voxel_size: float + pre_downsample_stride: int + keyframe_enabled: bool + keyframe_min_translation_m: float + keyframe_min_rotation_deg: float + + +@dataclass +class FilterConfig: + voxel_size: float + hits_threshold: int + window_sec: float + strict_sec: float + use_strict: bool + decay_seconds: float + max_voxels: int + + +@dataclass +class MapConfig: + data_folder: str + display_voxel: float + save_voxel: float + min_points_to_save: int + save_extension: str + + +@dataclass +class LocalizationConfig: + enabled: bool + period_sec: float + min_new_points: int + min_points_for_localize: int + voxel_localize: float + max_corr_mult: float + icp_max_iter: int + accept_fitness: float + accept_rmse: float + ref_display_voxel: float + + +@dataclass +class RuntimeConfig: + publish_hz: float + frame_keep_latest: bool + frame_queue_maxsize: int + status_queue_maxsize: int + cmd_queue_maxsize: int + + +# ------------------------- helpers ------------------------- +def _drain_keep_latest(q: mp.Queue) -> None: + try: + while True: + q.get_nowait() + except Exception: + return + + +def _safe_put(q: mp.Queue, item: Any, keep_latest: bool = False) -> None: + try: + if keep_latest: + _drain_keep_latest(q) + q.put_nowait(item) + except Exception: + pass + + +def build_configs_from_json(cfg: dict) -> tuple[EngineConfig, FilterConfig, MapConfig, LocalizationConfig, RuntimeConfig]: + maps_dir = _resolve_from_config_dir(str(cfg["app"]["maps_dir"])) + + eng = EngineConfig( + config_file=_resolve_from_config_dir(str(cfg["livox"]["config_file"])), + host_ip=cfg["network"]["default_host_ip"], + max_range=float(cfg["slam"]["max_range"]), + slam_voxel_size=float(cfg["slam"]["slam_voxel_size"]), + pre_downsample_stride=int(cfg["slam"]["pre_downsample_stride"]), + keyframe_enabled=bool(cfg["slam"]["keyframe"]["enabled"]), + keyframe_min_translation_m=float(cfg["slam"]["keyframe"]["min_translation_m"]), + keyframe_min_rotation_deg=float(cfg["slam"]["keyframe"]["min_rotation_deg"]), + ) + + filt = FilterConfig( + voxel_size=float(cfg["filter"]["voxel_size"]), + hits_threshold=int(cfg["filter"]["hits_threshold"]), + window_sec=float(cfg["filter"]["window_sec"]), + strict_sec=float(cfg["filter"]["strict_sec"]), + use_strict=bool(cfg["filter"]["use_strict"]), + decay_seconds=float(cfg["filter"]["persistence"]["decay_seconds"]), + max_voxels=int(cfg["filter"]["persistence"]["max_voxels"]), + ) + + mp_cfg = MapConfig( + data_folder=str(maps_dir), + display_voxel=float(cfg["map"]["display_voxel"]), + save_voxel=float(cfg["map"]["save_voxel"]), + min_points_to_save=int(cfg["map"]["min_points_to_save"]), + save_extension=str(cfg["map"]["save_extension"]), + ) + + loc = LocalizationConfig( + enabled=bool(cfg["localization"]["enabled"]), + period_sec=float(cfg["localization"]["period_sec"]), + min_new_points=int(cfg["localization"]["min_new_points"]), + min_points_for_localize=int(cfg["localization"]["min_points_for_localize"]), + voxel_localize=float(cfg["localization"]["voxel_localize"]), + max_corr_mult=float(cfg["localization"]["max_corr_mult"]), + icp_max_iter=int(cfg["localization"]["icp_max_iter"]), + accept_fitness=float(cfg["localization"]["accept_fitness"]), + accept_rmse=float(cfg["localization"]["accept_rmse"]), + ref_display_voxel=float(cfg["localization"]["ref_display_voxel"]), + ) + + run = RuntimeConfig( + publish_hz=float(cfg["runtime"]["publish_hz"]), + frame_keep_latest=bool(cfg["runtime"]["queue"]["frame_keep_latest"]), + frame_queue_maxsize=int(cfg["runtime"]["queue"]["frame_queue_maxsize"]), + status_queue_maxsize=int(cfg["runtime"]["queue"]["status_queue_maxsize"]), + cmd_queue_maxsize=int(cfg["runtime"]["queue"]["cmd_queue_maxsize"]), + ) + return eng, filt, mp_cfg, loc, run + + +# ------------------------- client ------------------------- +class SlamEngineClient: + def __init__(self): + self.cfg_json = load_slam_config() + self.self_check = run_startup_self_check(self.cfg_json, _config_base_dir()) + self.eng_cfg, self.filt_cfg, self.map_cfg, self.loc_cfg, self.run_cfg = build_configs_from_json(self.cfg_json) + + ctx = mp.get_context("spawn") + self.data_q: mp.Queue = ctx.Queue(maxsize=self.run_cfg.frame_queue_maxsize) + self.status_q: mp.Queue = ctx.Queue(maxsize=self.run_cfg.status_queue_maxsize) + self.cmd_q: mp.Queue = ctx.Queue(maxsize=self.run_cfg.cmd_queue_maxsize) + self.proc: Optional[mp.Process] = None + + def start_process(self): + if self.proc is not None and self.proc.is_alive(): + return + + ctx = mp.get_context("spawn") + self.data_q = ctx.Queue(maxsize=self.run_cfg.frame_queue_maxsize) + self.status_q = ctx.Queue(maxsize=self.run_cfg.status_queue_maxsize) + self.cmd_q = ctx.Queue(maxsize=self.run_cfg.cmd_queue_maxsize) + + from SLAM_worker import slam_worker + + self.proc = ctx.Process( + target=slam_worker, + args=(self.data_q, self.status_q, self.cmd_q, self.eng_cfg, self.filt_cfg, self.map_cfg, self.loc_cfg, self.run_cfg), + daemon=True, + ) + self.proc.start() + + def stop_process(self): + try: + self.send("SHUTDOWN") + except Exception: + pass + if self.proc is not None: + self.proc.join(timeout=1.0) + if self.proc.is_alive(): + self.proc.terminate() + self.proc.join(timeout=1.0) + + def send(self, cmd: Any): + if self.proc is None or not self.proc.is_alive(): + self.start_process() + self.cmd_q.put(cmd) + + def connect(self): self.send("CONNECT") + def start_mapping(self): self.send("START") + def pause_mapping(self): self.send("PAUSE") + def stop_mapping(self): self.send("STOP") + def reset_mapping(self): self.send("RESET") + def export_map(self, filename_base: str): self.send(("EXPORT", filename_base)) + def export_nav(self, filename_base: str): self.send(("EXPORT_NAV", filename_base)) + def load_ref_map(self, path: str): self.send(("LOAD_REF", path)) + def localize_now(self): self.send("LOCALIZE") + def clear_ref(self): self.send("CLEAR_REF") + def set_density(self, mode: str): self.send(("SET_DENSITY", str(mode))) + def set_min_stable_points(self, value: int): self.send(("SET_MIN_STABLE_POINTS", int(value))) + def set_loop_closure(self, enabled: bool): self.send(("SET_LOOP_CLOSURE", bool(enabled))) + def set_loc_state_machine(self, enabled: bool): self.send(("SET_LOC_STATE_MACHINE", bool(enabled))) + def set_submap_mode(self, enabled: bool, cfg_patch: Optional[dict] = None): + payload = {"enabled": bool(enabled)} + if isinstance(cfg_patch, dict) and cfg_patch: + payload.update(dict(cfg_patch)) + self.send(("SET_SUBMAP_MODE", payload)) + def set_approx_pose(self, x: float, y: float, z: float = 0.0, yaw_deg: Optional[float] = None): + payload = {"x": float(x), "y": float(y), "z": float(z)} + if yaw_deg is not None: + payload["yaw_deg"] = float(yaw_deg) + self.send(("SET_APPROX_POSE", payload)) + def clear_approx_pose(self): + self.send("CLEAR_APPROX_POSE") + def set_autosave(self, enabled: bool, interval_sec: float, base_name: str): + self.send( + ( + "SET_AUTOSAVE", + { + "enabled": bool(enabled), + "interval_sec": float(interval_sec), + "base_name": str(base_name), + }, + ) + ) + def set_nav_export_cfg(self, cfg_patch: dict): + self.send(("SET_NAV_CONFIG", dict(cfg_patch))) + def set_nav_runtime_cfg(self, cfg_patch: dict): + self.send(("SET_NAV_RUNTIME", dict(cfg_patch))) + def set_map_quality_cfg(self, cfg_patch: dict): + self.send(("SET_MAP_QUALITY", dict(cfg_patch))) + def set_filter_tuning(self, cfg_patch: dict): + self.send(("SET_FILTER_TUNING", dict(cfg_patch))) + def set_stability_profile(self, profile: str): + self.send(("SET_STABILITY_PROFILE", str(profile))) + def set_nav_goal(self, x: float, y: float): + self.send(("SET_NAV_GOAL", {"x": float(x), "y": float(y)})) + def clear_nav_goal(self): + self.send("CLEAR_NAV_GOAL") + def start_localize_only(self): + self.send("START_LOCALIZE_ONLY") + def stop_localize_only(self): + self.send("STOP_LOCALIZE_ONLY") + def record_start(self, base_name: str): + self.send(("RECORD_START", {"base_name": str(base_name)})) + def record_stop(self, save: bool = True, base_name: str = ""): + payload = {"save": bool(save)} + if base_name: + payload["base_name"] = str(base_name) + self.send(("RECORD_STOP", payload)) + def mission_start(self, waypoints: list[dict]): + self.send(("MISSION_START", {"waypoints": list(waypoints)})) + def mission_pause(self): + self.send("MISSION_PAUSE") + def mission_resume(self): + self.send("MISSION_RESUME") + def mission_stop(self): + self.send("MISSION_STOP") + def get_self_check(self) -> dict: + return dict(self.self_check) + def sensor_prior(self, sensor: str, pose: Any, confidence: float = 1.0, timestamp: Optional[float] = None): + self.send( + ( + "SENSOR_PRIOR", + { + "sensor": str(sensor), + "pose": pose, + "confidence": float(confidence), + "timestamp": float(time.time()) if timestamp is None else float(timestamp), + }, + ) + ) diff --git a/Lidar/SLAM_worker.py b/Lidar/SLAM_worker.py new file mode 100644 index 0000000..0b82394 --- /dev/null +++ b/Lidar/SLAM_worker.py @@ -0,0 +1,3777 @@ +# SLAM_worker.py +from __future__ import annotations + +import time +import queue +import traceback +import threading +import multiprocessing as mp +from collections import deque +from pathlib import Path +from typing import Optional, Any, Dict, List, Tuple + +import numpy as np + +from SLAM_engine import ( + EngineConfig, + FilterConfig, + MapConfig, + LocalizationConfig, + RuntimeConfig, + load_slam_config, +) +from SLAM_Validation import run_startup_self_check + + +def _now() -> float: + return time.time() + + +def _as_bool(v: Any, default: bool = False) -> bool: + if v is None: + return default + if isinstance(v, bool): + return v + if isinstance(v, (int, float)): + return bool(v) + return str(v).strip().lower() in ("1", "true", "yes", "on") + + +def _drain_keep_latest(q: mp.Queue) -> None: + try: + while True: + q.get_nowait() + except Exception: + return + + +def _safe_put(q: mp.Queue, item: Any, keep_latest: bool = False) -> None: + try: + if keep_latest: + _drain_keep_latest(q) + q.put_nowait(item) + except Exception: + pass + + +def slam_worker( + data_q: mp.Queue, + status_q: mp.Queue, + cmd_q: mp.Queue, + eng_cfg: EngineConfig, + filt_cfg: FilterConfig, + map_cfg: MapConfig, + loc_cfg: LocalizationConfig, + run_cfg: RuntimeConfig, +): + # suppress noisy warning from ctypes->numpy conversion + import warnings + warnings.filterwarnings( + "ignore", + message="A builtin ctypes object gave a PEP3118 format string", + category=RuntimeWarning, + ) + + try: + from livox2_python import Livox2 + from kiss_icp.config import load_config + from kiss_icp.pipeline import KissICP + from SLAM_Diagnostics import WorkerDiagnostics + from SLAM_LocalizationService import LocalizationFrameState + from SLAM_Transforms import ( + apply_transform_points as _apply_transform_points, + blend_rigid_tf as _blend_rigid_tf_impl, + tf_delta as _tf_delta_impl, + tf_from_xyzyaw as _tf_from_xyzyaw_impl, + to_world_points as _to_world_points, + yaw_deg_from_tf as _yaw_deg_from_tf_impl, + ) + + from SLAM_Filter import VoxelPersistenceFilter + from SLAM_Filter import FilterConfig as VFilterCfg + from SLAM_Filter import IndoorMapQualityConfig, IndoorMapQualityFilter + + from SLAM_MAP import StableMapLayer + from SLAM_MAP import MapConfig as StableMapCfg + + from SLAM_LoopClosure import LoopClosureConfig, LoopClosureBackend + from SLAM_PlaceRecognition import PlaceRecognitionConfig, PlaceRecognitionIndex + from SLAM_StateMachine import LocalizationStateConfig, LocalizationStateMachine + from SLAM_Submap import SubmapConfig, LocalGlobalSubmapMapper, SubmapCheckpointer + from SLAM_Session import SessionMemoryConfig, SessionTransformStore + from SLAM_Navigation import NavigationExportConfig, NavigationExporter + from SLAM_Fusion import FusionConfig as SensorFusionConfig, SensorPoseFusion + from SLAM_NavRuntime import ( + LiveCostmapConfig, + LiveCostmapRuntime, + GlobalAStarPlanner, + LocalPlannerConfig, + LocalReactivePlanner, + ) + from SLAM_Mission import MissionConfig, WaypointMissionManager + from SLAM_Safety import SafetyConfig, SafetySupervisor + + except Exception as e: + _safe_put(status_q, ("ERROR", f"Import failed: {e}")) + return + + def st(level: str, msg: Any): + _safe_put(status_q, (level, msg)) + + Path(map_cfg.data_folder).mkdir(parents=True, exist_ok=True) + diagnostics = None + try: + fault_log_path = Path(map_cfg.data_folder) / "SLAM_worker_fault.log" + diagnostics = WorkerDiagnostics(fault_log_path) + except Exception as e: + diagnostics = None + _safe_put(status_q, ("WARN", f"Diagnostics disabled: {e}")) + try: + full_cfg = load_slam_config() + except Exception: + full_cfg = {} + loc_cfg_raw = full_cfg.get("localization", {}) if isinstance(full_cfg, dict) else {} + guard_cfg = full_cfg.get("mapping_guard", {}) if isinstance(full_cfg, dict) else {} + loc_pt2plane_unsafe = _as_bool( + (loc_cfg_raw or {}).get("allow_point_to_plane_unsafe", False), + False, + ) + loc_use_point_to_plane = _as_bool( + (loc_cfg_raw or {}).get("use_point_to_plane", False), + False, + ) and bool(loc_pt2plane_unsafe) + if _as_bool((loc_cfg_raw or {}).get("use_point_to_plane", False), False) and not loc_pt2plane_unsafe: + _safe_put( + status_q, + ( + "WARN", + "Localization point-to-plane disabled for stability. Set localization.allow_point_to_plane_unsafe=true to force it.", + ), + ) + loc_live_period_sec = max(0.25, float((loc_cfg_raw or {}).get("live_period_sec", 0.55))) + loc_tracking_window_enabled = _as_bool( + (loc_cfg_raw or {}).get("tracking_window_enabled", True), + True, + ) + loc_tracking_window_radius_m = max( + 2.0, + float((loc_cfg_raw or {}).get("tracking_window_radius_m", 8.0)), + ) + loc_tracking_window_min_ref_points = max( + 120, + int((loc_cfg_raw or {}).get("tracking_window_min_ref_points", 450)), + ) + loc_tracking_window_max_ref_points = max( + int(loc_tracking_window_min_ref_points), + int((loc_cfg_raw or {}).get("tracking_window_max_ref_points", 140000)), + ) + loc_global_reloc_anchor_voxel_m = max( + 0.4, + float((loc_cfg_raw or {}).get("global_reloc_anchor_voxel_m", 1.4)), + ) + loc_global_reloc_max_anchors = max( + 8, + int((loc_cfg_raw or {}).get("global_reloc_max_anchors", 24)), + ) + loc_global_reloc_yaw_step_deg = float( + np.clip( + float((loc_cfg_raw or {}).get("global_reloc_yaw_step_deg", 60.0)), + 15.0, + 120.0, + ) + ) + loc_global_reloc_corr_mult = max( + 1.2, + float((loc_cfg_raw or {}).get("global_reloc_corr_mult", 1.9)), + ) + loc_global_reloc_coarse_iter = max( + 4, + int((loc_cfg_raw or {}).get("global_reloc_coarse_iter", 8)), + ) + loc_global_reloc_refine_iter = max( + 8, + int((loc_cfg_raw or {}).get("global_reloc_refine_iter", 20)), + ) + loc_global_reloc_min_corr = max( + 15, + int((loc_cfg_raw or {}).get("global_reloc_min_corr", 25)), + ) + loc_bidir_enabled = _as_bool((loc_cfg_raw or {}).get("bidirectional_check_enabled", True), True) + loc_bidir_min_ratio = float( + np.clip(float((loc_cfg_raw or {}).get("bidirectional_min_ratio", 0.18)), 0.01, 1.0) + ) + loc_bidir_max_rmse = max(0.05, float((loc_cfg_raw or {}).get("bidirectional_max_rmse", 0.55))) + loc_guess_ttl_sec = max(2.0, float((loc_cfg_raw or {}).get("approx_guess_ttl_sec", 60.0))) + loc_guess_crop_radius_m = max( + 2.0, + float((loc_cfg_raw or {}).get("approx_guess_crop_radius_m", 9.0)), + ) + loc_guess_default_z_m = float((loc_cfg_raw or {}).get("approx_guess_default_z_m", 0.0)) + loc_guess_strict_local = _as_bool( + (loc_cfg_raw or {}).get("approx_guess_strict_local", True), + True, + ) + loc_guess_local_radius_m = max( + 1.0, + min( + float(loc_guess_crop_radius_m), + float((loc_cfg_raw or {}).get("approx_guess_local_radius_m", 4.5)), + ), + ) + loc_guess_global_fallback_after_failures = max( + 1, + int((loc_cfg_raw or {}).get("approx_guess_global_fallback_after_failures", 6)), + ) + loc_guess_yaw_step_deg = float( + np.clip( + float((loc_cfg_raw or {}).get("approx_guess_yaw_step_deg", 30.0)), + 10.0, + 120.0, + ) + ) + loc_guess_max_start_offset_m = max( + 1.0, + float((loc_cfg_raw or {}).get("approx_guess_max_start_offset_m", 6.0)), + ) + loc_guess_bootstrap_min_hits = max( + 1, + int((loc_cfg_raw or {}).get("approx_guess_bootstrap_min_hits", 2)), + ) + loc_guess_bootstrap_min_fitness = float( + np.clip( + float((loc_cfg_raw or {}).get("approx_guess_bootstrap_min_fitness", 0.34)), + 0.0, + 1.0, + ) + ) + loc_guess_bootstrap_min_bidir = float( + np.clip( + float((loc_cfg_raw or {}).get("approx_guess_bootstrap_min_bidir", 0.24)), + 0.0, + 1.0, + ) + ) + loc_live_track_strict = _as_bool( + (loc_cfg_raw or {}).get("live_track_strict", True), + True, + ) + loc_live_track_min_fit = float( + np.clip( + float((loc_cfg_raw or {}).get("live_track_min_fitness", 0.42)), + 0.0, + 1.0, + ) + ) + loc_live_track_min_bidir = float( + np.clip( + float((loc_cfg_raw or {}).get("live_track_min_bidir", 0.28)), + 0.0, + 1.0, + ) + ) + loc_live_track_max_step_trans_m = max( + 0.02, + float((loc_cfg_raw or {}).get("live_track_max_step_translation_m", 0.20)), + ) + loc_live_track_max_step_rot_deg = max( + 0.5, + float((loc_cfg_raw or {}).get("live_track_max_step_rotation_deg", 12.0)), + ) + loc_live_stack_enabled = _as_bool((loc_cfg_raw or {}).get("live_stack_enabled", True), True) + loc_live_stack_frames = max(1, int((loc_cfg_raw or {}).get("live_stack_frames", 8))) + loc_live_stack_voxel_m = max( + 0.04, + float((loc_cfg_raw or {}).get("live_stack_voxel_m", max(0.06, float(loc_cfg.voxel_localize) * 0.7))), + ) + loc_live_stack_max_points = max(800, int((loc_cfg_raw or {}).get("live_stack_max_points", 22000))) + loc_live_stack_min_frames = max(1, int((loc_cfg_raw or {}).get("live_stack_min_frames", 2))) + + vis_cfg = full_cfg.get("localization_visualization", {}) if isinstance(full_cfg, dict) else {} + loc_vis_enabled = _as_bool((vis_cfg or {}).get("enabled", True), True) + loc_vis_period_sec = max(0.10, float((vis_cfg or {}).get("period_sec", 0.35))) + loc_vis_voxel_m = max(0.08, float((vis_cfg or {}).get("voxel_m", 0.24))) + loc_vis_max_points = max(4000, int((vis_cfg or {}).get("max_points", 28000))) + loc_vis_local_radius_m = max(2.0, float((vis_cfg or {}).get("local_radius_m", 12.0))) + loc_vis_match_dist_m = max(0.03, float((vis_cfg or {}).get("match_dist_m", 0.30))) + loc_vis_near_dist_m = max(loc_vis_match_dist_m + 0.02, float((vis_cfg or {}).get("near_dist_m", 0.60))) + loc_vis_confirm_frames = max(1, int((vis_cfg or {}).get("confirm_frames", 3))) + loc_vis_decay_per_frame = max(1, int((vis_cfg or {}).get("decay_per_frame", 1))) + + lidar = None + slam = None + filt = None + stable = None + + loop_cfg = LoopClosureConfig.from_dict(full_cfg.get("loop_closure", {})) + loop_backend = LoopClosureBackend(loop_cfg) + + loc_state_cfg = LocalizationStateConfig.from_dict(full_cfg.get("state_machine", {})) + loc_state = LocalizationStateMachine(loc_state_cfg) + + session_cfg = SessionMemoryConfig.from_dict(full_cfg.get("session_memory", {})) + session_store = SessionTransformStore(map_cfg.data_folder, session_cfg) + + nav_cfg = NavigationExportConfig.from_dict(full_cfg.get("navigation_export", {})) + nav_exporter = NavigationExporter(nav_cfg, map_cfg.data_folder) + nav_rt_cfg = LiveCostmapConfig.from_dict(full_cfg.get("navigation_runtime", full_cfg.get("navigation_export", {}))) + nav_runtime = LiveCostmapRuntime(nav_rt_cfg) + nav_global_planner = GlobalAStarPlanner(blocked_cost=int(nav_rt_cfg.blocked_cost)) + nav_local_planner = LocalReactivePlanner(LocalPlannerConfig.from_dict(full_cfg.get("local_planner", {}))) + + fusion_cfg = SensorFusionConfig.from_dict(full_cfg.get("fusion", {})) + sensor_fusion = SensorPoseFusion(fusion_cfg) + + mission_cfg = MissionConfig.from_dict(full_cfg.get("mission", {})) + mission = WaypointMissionManager(mission_cfg) + + safety_cfg = SafetyConfig.from_dict(full_cfg.get("safety", {})) + safety = SafetySupervisor(safety_cfg) + + map_quality_cfg = IndoorMapQualityConfig.from_dict(full_cfg.get("map_quality", {})) + map_quality = IndoorMapQualityFilter(map_quality_cfg) + + submap_cfg = SubmapConfig.from_dict(full_cfg.get("submap_mapping", {})) + submap_mapper = LocalGlobalSubmapMapper(submap_cfg) + submap_mode_enabled = bool(submap_cfg.enabled) + _submap_raw = full_cfg.get("submap_mapping", {}) or {} + _ckpt_enabled = _as_bool(_submap_raw.get("checkpoint_enabled", True), True) + _ckpt_interval = max(10.0, float(_submap_raw.get("checkpoint_interval_sec", 60.0))) + submap_ckpt = SubmapCheckpointer(str(map_cfg.data_folder), interval_s=_ckpt_interval) if _ckpt_enabled else None + if submap_ckpt is not None and submap_mode_enabled: + if submap_ckpt.load_into(submap_mapper): + st("INFO", f"Submap checkpoint restored from {map_cfg.data_folder}") + place_cfg = PlaceRecognitionConfig.from_dict(full_cfg.get("place_recognition", {})) + place_recog = PlaceRecognitionIndex(place_cfg, Path(__file__).resolve().parent) + + livox_dbg_cfg = full_cfg.get("livox_debug", {}) or {} + livox_debug_enabled = _as_bool(livox_dbg_cfg.get("enabled", False), False) + livox_print_every_n = max(1, int(livox_dbg_cfg.get("print_every_n_frames", 20))) + + pose_guard_enabled = _as_bool((guard_cfg or {}).get("enabled", True), True) + pose_guard_max_trans = max(0.08, float((guard_cfg or {}).get("max_frame_translation_m", 0.45))) + pose_guard_max_rot_deg = max(2.0, float((guard_cfg or {}).get("max_frame_rotation_deg", 30.0))) + pose_guard_ref_dt = max(0.01, float((guard_cfg or {}).get("reference_dt_sec", 0.06))) + + map_lock_cfg = full_cfg.get("mapping_lock", {}) if isinstance(full_cfg, dict) else {} + map_lock_enabled = _as_bool((map_lock_cfg or {}).get("enabled", True), True) + map_lock_period_sec = max(0.5, float((map_lock_cfg or {}).get("period_sec", 1.5))) + map_lock_voxel_m = max(0.05, float((map_lock_cfg or {}).get("voxel_m", 0.22))) + map_lock_max_corr_m = max(0.3, float((map_lock_cfg or {}).get("max_corr_m", 1.2))) + map_lock_min_stable_pts = max(200, int((map_lock_cfg or {}).get("min_stable_points", 700))) + map_lock_accept_fitness = float(np.clip(float((map_lock_cfg or {}).get("accept_fitness", 0.28)), 0.0, 1.0)) + map_lock_accept_rmse = max(0.05, float((map_lock_cfg or {}).get("accept_rmse", 0.40))) + map_lock_max_trans_m = max(0.02, float((map_lock_cfg or {}).get("max_step_translation_m", 0.35))) + map_lock_max_rot_deg = max(1.0, float((map_lock_cfg or {}).get("max_step_rotation_deg", 15.0))) + map_lock_apply_damping = _as_bool((map_lock_cfg or {}).get("apply_damping", True), True) + map_lock_apply_alpha_t = float(np.clip(float((map_lock_cfg or {}).get("apply_alpha_translation", 0.42)), 0.05, 1.0)) + map_lock_apply_alpha_r = float(np.clip(float((map_lock_cfg or {}).get("apply_alpha_rotation", 0.50)), 0.05, 1.0)) + map_lock_apply_max_trans_m = max(0.01, float((map_lock_cfg or {}).get("apply_max_translation_m", 0.08))) + map_lock_apply_max_rot_deg = max(0.5, float((map_lock_cfg or {}).get("apply_max_rotation_deg", 5.0))) + map_lock_window_radius_m = max(0.0, float((map_lock_cfg or {}).get("window_radius_m", 8.0))) + map_lock_min_live_pts = max(60, int((map_lock_cfg or {}).get("min_live_points", 140))) + map_lock_icp_max_iter = max(5, int((map_lock_cfg or {}).get("icp_max_iter", 20))) + map_lock_block_on_reject = _as_bool((map_lock_cfg or {}).get("block_on_reject", True), True) + map_lock_retry_after_sec = float((map_lock_cfg or {}).get("retry_after_reject_sec", 0.20)) + map_lock_retry_after_sec = float(np.clip(map_lock_retry_after_sec, 0.05, max(0.05, map_lock_period_sec))) + map_lock_block_on_severe = _as_bool((map_lock_cfg or {}).get("block_on_severe_reject", True), True) + map_lock_severe_fit = float(np.clip(float((map_lock_cfg or {}).get("severe_fit", 0.14)), 0.0, 1.0)) + map_lock_severe_rmse = max(0.10, float((map_lock_cfg or {}).get("severe_rmse", 0.75))) + map_lock_severe_trans_m = max( + float(map_lock_max_trans_m), + float((map_lock_cfg or {}).get("severe_trans_m", 0.45)), + ) + map_lock_severe_rot_deg = max( + float(map_lock_max_rot_deg), + float((map_lock_cfg or {}).get("severe_rot_deg", 22.0)), + ) + rotate_cfg = full_cfg.get("rotate_guard", {}) if isinstance(full_cfg, dict) else {} + rotate_mode_enabled = _as_bool((rotate_cfg or {}).get("enabled", True), True) + rotate_in_place_max_trans_m = max(0.01, float((rotate_cfg or {}).get("in_place_max_trans_m", 0.05))) + rotate_in_place_min_rot_deg = max(0.5, float((rotate_cfg or {}).get("in_place_min_rot_deg", 4.0))) + rotate_pose_guard_max_rot_deg = max( + float(pose_guard_max_rot_deg), + float((rotate_cfg or {}).get("pose_guard_max_rot_deg", 34.0)), + ) + rotate_skip_continuity = _as_bool((rotate_cfg or {}).get("skip_continuity", True), True) + rotate_map_lock_period_sec = max(0.05, float((rotate_cfg or {}).get("map_lock_period_sec", 0.25))) + rotate_map_lock_accept_fitness = float( + np.clip(float((rotate_cfg or {}).get("map_lock_accept_fitness", max(0.05, map_lock_accept_fitness - 0.06))), 0.0, 1.0) + ) + rotate_map_lock_accept_rmse = max( + float(map_lock_accept_rmse), + float((rotate_cfg or {}).get("map_lock_accept_rmse", map_lock_accept_rmse + 0.10)), + ) + rotate_map_lock_max_step_rot_deg = max( + float(map_lock_max_rot_deg), + float((rotate_cfg or {}).get("map_lock_max_step_rot_deg", 14.0)), + ) + rotate_map_lock_apply_alpha_t = float( + np.clip( + float((rotate_cfg or {}).get("map_lock_apply_alpha_translation", max(0.05, map_lock_apply_alpha_t * 0.55))), + 0.03, + 1.0, + ) + ) + rotate_map_lock_apply_alpha_r = float( + np.clip( + float((rotate_cfg or {}).get("map_lock_apply_alpha_rotation", max(0.05, map_lock_apply_alpha_r * 0.70))), + 0.03, + 1.0, + ) + ) + rotate_map_lock_apply_max_trans_m = max( + 0.005, + float((rotate_cfg or {}).get("map_lock_apply_max_translation_m", max(0.01, map_lock_apply_max_trans_m * 0.45))), + ) + rotate_map_lock_apply_max_rot_deg = max( + 0.5, + float((rotate_cfg or {}).get("map_lock_apply_max_rotation_deg", max(1.0, map_lock_apply_max_rot_deg * 0.70))), + ) + rotate_freeze_mapping = _as_bool((rotate_cfg or {}).get("freeze_mapping_while_rotating", True), True) + rotate_freeze_min_stable_pts = max(0, int((rotate_cfg or {}).get("freeze_min_stable_points", 250))) + rotate_map_lock_block_on_reject = _as_bool((rotate_cfg or {}).get("block_on_reject", False), False) + rotate_map_lock_block_on_severe = _as_bool((rotate_cfg or {}).get("block_on_severe", True), True) + continuity_cfg = full_cfg.get("continuity_guard", {}) if isinstance(full_cfg, dict) else {} + continuity_enabled = _as_bool((continuity_cfg or {}).get("enabled", True), True) + continuity_voxel_m = max(0.05, float((continuity_cfg or {}).get("voxel_m", 0.24))) + continuity_min_points = max(40, int((continuity_cfg or {}).get("min_points", 120))) + continuity_match_radius_m = max(0.08, float((continuity_cfg or {}).get("match_radius_m", 0.45))) + continuity_min_inlier_ratio = float(np.clip(float((continuity_cfg or {}).get("min_inlier_ratio", 0.35)), 0.01, 1.0)) + continuity_max_median_m = max(0.05, float((continuity_cfg or {}).get("max_median_m", 0.30))) + continuity_max_p90_m = max(0.08, float((continuity_cfg or {}).get("max_p90_m", 0.60))) + continuity_recover_after_rejects = max(2, int((continuity_cfg or {}).get("recover_after_rejects", 10))) + cleanup_cfg = full_cfg.get("map_cleanup", {}) if isinstance(full_cfg, dict) else {} + cleanup_enabled = _as_bool((cleanup_cfg or {}).get("enabled", True), True) + cleanup_period_sec = max(0.5, float((cleanup_cfg or {}).get("period_sec", 1.8))) + cleanup_voxel_m = max(0.05, float((cleanup_cfg or {}).get("voxel_m", 0.16))) + cleanup_cluster_radius_m = max(0.12, float((cleanup_cfg or {}).get("cluster_radius_m", 0.42))) + cleanup_min_cluster_points = max(20, int((cleanup_cfg or {}).get("min_cluster_points", 120))) + cleanup_keep_largest_n = max(1, int((cleanup_cfg or {}).get("keep_largest_n", 1))) + cleanup_strict_largest_only = _as_bool((cleanup_cfg or {}).get("strict_largest_only", True), True) + cleanup_min_total_points = max(150, int((cleanup_cfg or {}).get("min_total_points", 800))) + + autosave_cfg = full_cfg.get("autosave", {}) or {} + autosave_enabled = _as_bool(autosave_cfg.get("enabled", False), False) + autosave_interval_sec = max(5.0, float(autosave_cfg.get("interval_sec", 90.0))) + autosave_base_name = str(autosave_cfg.get("base_name", "autosave_map")) + last_autosave_t = 0.0 + + self_check_report = run_startup_self_check(full_cfg, Path(__file__).resolve().parent) + + connected = False + mapping_enabled = False + localize_only_enabled = False + worker_mode = "IDLE" + running = True + state_lock = threading.RLock() + + # localization + ref_map_path: Optional[str] = None + + # publish control + pub_hz = float(run_cfg.publish_hz) + last_pub = 0.0 + + # keyframe gating + last_key_pose = None + last_frame_t = 0.0 + last_guard_pose = None + last_guard_t = 0.0 + guard_reject_count = 0 + guard_last_warn_t = 0.0 + odom_to_map_transform = np.eye(4, dtype=np.float64) + map_lock_last_t = 0.0 + map_lock_accept_count = 0 + map_lock_reject_count = 0 + map_lock_last_warn_t = 0.0 + continuity_prev_pts = None + continuity_reject_count = 0 + continuity_consecutive_rejects = 0 + continuity_last_warn_t = 0.0 + rotate_freeze_count = 0 + rotate_last_warn_t = 0.0 + cleanup_last_t = 0.0 + cleanup_removed_total = 0 + cleanup_last_info: Dict[str, Any] = {"status": "na"} + + # localization cadence/state + last_localize_t = 0.0 + last_localize_points = 0 + last_localize_transform = np.eye(4, dtype=np.float64) + last_localize_confidence = 0.0 + ref_pcd_cache = None + ref_points_cache: Optional[np.ndarray] = None + ref_nav_points_cache: Optional[np.ndarray] = None + ref_nav_cache_src_n: int = 0 + ref_cache_path: Optional[str] = None + slam_to_ref_transform = np.eye(4, dtype=np.float64) + slam_to_ref_valid = False + approx_pose_guess_tf: Optional[np.ndarray] = None + approx_pose_guess_until_t: float = 0.0 + approx_pose_guess_fail_count: int = 0 + approx_pose_guess_success_count: int = 0 + ref_match_points_cache: Optional[np.ndarray] = None + ref_match_confirm_hits: Optional[np.ndarray] = None + ref_match_last_pub_t: float = 0.0 + ref_match_last_stats: Dict[str, Any] = {} + loc_live_stack: deque[np.ndarray] = deque(maxlen=int(loc_live_stack_frames)) + loc_frames = LocalizationFrameState() + + # runtime point-density control (LOW/MEDIUM/HIGH) + base_stride = max(1, int(eng_cfg.pre_downsample_stride)) + density_mode = "MEDIUM" + live_stride = base_stride + runtime_filter_voxel = float(filt_cfg.voxel_size) + runtime_filter_hit_threshold = int(max(1, filt_cfg.hits_threshold)) + runtime_filter_decay = float(filt_cfg.strict_sec if filt_cfg.use_strict else filt_cfg.window_sec) + stability_profile = "BALANCED" + + # telemetry/performance + perf_input_count = 0 + perf_publish_count = 0 + perf_last_rate_t = _now() + perf_last_cpu_wall_t = _now() + perf_last_cpu_proc_t = time.process_time() + perf_input_fps = 0.0 + perf_publish_fps = 0.0 + perf_icp_ms = 0.0 + perf_cpu_pct = 0.0 + perf_queue_lag = 0 + perf_stable_growth = 0.0 + perf_prev_stable_count = 0 + perf_prev_stable_t = _now() + + # recording / replay capture + recording_enabled = False + recording_frames: List[np.ndarray] = [] + recording_poses: List[np.ndarray] = [] + recording_base_name = "slam_recording" + recording_max_frames = max(100, int((full_cfg.get("replay", {}) or {}).get("max_record_frames", 6000))) + recording_drop_count = 0 + + # nav runtime / planner / mission + nav_goal_xy: Optional[Tuple[float, float]] = None + nav_last_update_t = 0.0 + nav_last_plan_t = 0.0 + nav_plan_period_sec = max(0.15, float((full_cfg.get("navigation_runtime", {}) or {}).get("plan_period_sec", 0.35))) + nav_path_world: List[Tuple[float, float]] = [] + latest_live_pts: Optional[np.ndarray] = None + latest_live_localize_pts: Optional[np.ndarray] = None + raw_points_q: queue.Queue = queue.Queue(maxsize=max(2, int(run_cfg.frame_queue_maxsize))) + + def _sync_loc_vars_from_service() -> None: + nonlocal odom_to_map_transform, last_localize_transform, last_localize_confidence + nonlocal slam_to_ref_transform, slam_to_ref_valid + odom_to_map_transform = np.array(loc_frames.odom_to_map, dtype=np.float64, copy=True) + last_localize_transform = np.array(loc_frames.last_alignment, dtype=np.float64, copy=True) + last_localize_confidence = float(loc_frames.confidence) + slam_to_ref_transform = np.array(loc_frames.odom_to_ref, dtype=np.float64, copy=True) + slam_to_ref_valid = bool(loc_frames.ref_valid) + + def _worker_diag_state() -> Dict[str, Any]: + return { + "connected": bool(connected), + "mapping_enabled": bool(mapping_enabled), + "localize_only_enabled": bool(localize_only_enabled), + "worker_mode": str(worker_mode), + "ref_map_path": str(ref_map_path) if ref_map_path else "", + "last_localize_points": int(last_localize_points), + "last_localize_t": float(last_localize_t), + "density_mode": str(density_mode), + "stability_profile": str(stability_profile), + "submap_enabled": bool(submap_mode_enabled), + "loop_closure_enabled": bool(loop_cfg.enabled), + "loc_frames": loc_frames.snapshot(), + } + + def _clear_raw_points_queue() -> None: + try: + while True: + raw_points_q.get_nowait() + except Exception: + return + + # ---------------- init stack ---------------- + def init_slam_stack(): + nonlocal slam, filt, stable + + cfg = load_config(config_file=None, max_range=float(eng_cfg.max_range)) + cfg.mapping.voxel_size = float(eng_cfg.slam_voxel_size) + slam = KissICP(cfg) + + vf_cfg = VFilterCfg( + voxel_size=float(runtime_filter_voxel), + hit_threshold=int(runtime_filter_hit_threshold), + decay_seconds=float(runtime_filter_decay), + max_voxels=int(filt_cfg.max_voxels), + ) + filt = VoxelPersistenceFilter(vf_cfg) + + sm_cfg = StableMapCfg( + display_voxel=float(map_cfg.display_voxel), + save_voxel=float(map_cfg.save_voxel), + data_folder=str(map_cfg.data_folder), + save_extension=str(map_cfg.save_extension), + ) + stable = StableMapLayer(sm_cfg) + + def _run_locked(fn, *args, **kwargs): + with state_lock: + return fn(*args, **kwargs) + + def reset_all(): + nonlocal mapping_enabled, localize_only_enabled, worker_mode, last_key_pose + nonlocal last_guard_pose, last_guard_t, guard_reject_count, guard_last_warn_t + nonlocal last_frame_t + nonlocal odom_to_map_transform, map_lock_last_t, map_lock_accept_count, map_lock_reject_count, map_lock_last_warn_t + nonlocal continuity_prev_pts, continuity_reject_count, continuity_consecutive_rejects, continuity_last_warn_t + nonlocal rotate_freeze_count, rotate_last_warn_t + nonlocal cleanup_last_t, cleanup_removed_total, cleanup_last_info + nonlocal last_localize_t, last_localize_points, last_localize_transform, last_localize_confidence + nonlocal ref_pcd_cache, ref_points_cache, ref_nav_points_cache, ref_nav_cache_src_n, ref_cache_path, slam_to_ref_transform, slam_to_ref_valid + nonlocal approx_pose_guess_tf, approx_pose_guess_until_t, approx_pose_guess_fail_count, approx_pose_guess_success_count + nonlocal ref_match_points_cache, ref_match_confirm_hits, ref_match_last_pub_t, ref_match_last_stats + nonlocal loc_live_stack + nonlocal last_autosave_t + nonlocal perf_input_count, perf_publish_count, perf_last_rate_t + nonlocal perf_last_cpu_wall_t, perf_last_cpu_proc_t + nonlocal perf_input_fps, perf_publish_fps, perf_icp_ms, perf_cpu_pct + nonlocal perf_queue_lag, perf_stable_growth, perf_prev_stable_count, perf_prev_stable_t + nonlocal recording_enabled, recording_frames, recording_poses, recording_drop_count + nonlocal nav_goal_xy, nav_last_update_t, nav_last_plan_t, nav_path_world, latest_live_pts, latest_live_localize_pts + init_slam_stack() + mapping_enabled = False + localize_only_enabled = False + worker_mode = "IDLE" + last_frame_t = 0.0 + last_key_pose = None + last_guard_pose = None + last_guard_t = 0.0 + guard_reject_count = 0 + guard_last_warn_t = 0.0 + loc_frames.reset() + _sync_loc_vars_from_service() + map_lock_last_t = 0.0 + map_lock_accept_count = 0 + map_lock_reject_count = 0 + map_lock_last_warn_t = 0.0 + continuity_prev_pts = None + continuity_reject_count = 0 + continuity_consecutive_rejects = 0 + continuity_last_warn_t = 0.0 + rotate_freeze_count = 0 + rotate_last_warn_t = 0.0 + cleanup_last_t = 0.0 + cleanup_removed_total = 0 + cleanup_last_info = {"status": "na"} + last_localize_t = 0.0 + last_localize_points = 0 + ref_pcd_cache = None + ref_points_cache = None + ref_nav_points_cache = None + ref_nav_cache_src_n = 0 + ref_cache_path = None + approx_pose_guess_tf = None + approx_pose_guess_until_t = 0.0 + approx_pose_guess_fail_count = 0 + approx_pose_guess_success_count = 0 + ref_match_points_cache = None + ref_match_confirm_hits = None + ref_match_last_pub_t = 0.0 + ref_match_last_stats = {} + loc_live_stack.clear() + loop_backend.reset() + loc_state.reset() + submap_mapper.reset() + if submap_ckpt is not None: + submap_ckpt.delete() + sensor_fusion.reset() + _imu_integrator.reset() + mission.reset() + nav_runtime.reset() + last_autosave_t = 0.0 + perf_input_count = 0 + perf_publish_count = 0 + perf_last_rate_t = _now() + perf_last_cpu_wall_t = _now() + perf_last_cpu_proc_t = time.process_time() + perf_input_fps = 0.0 + perf_publish_fps = 0.0 + perf_icp_ms = 0.0 + perf_cpu_pct = 0.0 + perf_queue_lag = 0 + perf_stable_growth = 0.0 + perf_prev_stable_count = 0 + perf_prev_stable_t = _now() + recording_enabled = False + recording_frames = [] + recording_poses = [] + recording_drop_count = 0 + nav_goal_xy = None + nav_last_update_t = 0.0 + nav_last_plan_t = 0.0 + nav_path_world = [] + latest_live_pts = None + latest_live_localize_pts = None + _clear_raw_points_queue() + st("INFO", "RESET done: SLAM + Filter + Stable map cleared. (no save)") + + init_slam_stack() + + # ---------------- helpers ---------------- + def height_colors_fast(pts: np.ndarray, alpha: float = 0.9) -> np.ndarray: + if pts is None or len(pts) == 0: + return np.zeros((0, 4), dtype=np.float32) + z = pts[:, 2] + colors = np.zeros((len(pts), 4), dtype=np.float32) + colors[:, 3] = alpha + norm = np.clip((z + 1.5) / 3.0, 0, 1) + colors[:, 0] = norm + colors[:, 1] = 1.0 - np.abs(norm - 0.5) * 2.0 + colors[:, 2] = 1.0 - norm + return colors + + def pose_delta_ok(pose, last_pose) -> bool: + if last_pose is None: + return True + dp = pose[:3, 3] - last_pose[:3, 3] + trans = float(np.linalg.norm(dp)) + R = pose[:3, :3] @ last_pose[:3, :3].T + ang = float(np.degrees(np.arccos(np.clip((np.trace(R) - 1) / 2, -1, 1)))) + return (trans >= float(eng_cfg.keyframe_min_translation_m)) or (ang >= float(eng_cfg.keyframe_min_rotation_deg)) + + def to_world_points(points_sensor: np.ndarray, pose: Optional[np.ndarray]) -> np.ndarray: + """ + Convert sensor-frame points to world frame using pose (world_T_sensor). + Falls back to input points when pose is unavailable/invalid. + """ + return _to_world_points(points_sensor, pose) + + def apply_transform_points(points: Optional[np.ndarray], transform: np.ndarray) -> Optional[np.ndarray]: + return _apply_transform_points(points, transform) + + def _tf_delta(prev_tf: np.ndarray, new_tf: np.ndarray) -> Tuple[float, float]: + return _tf_delta_impl(prev_tf, new_tf) + + def _blend_rigid_tf(prev_tf: np.ndarray, new_tf: np.ndarray, alpha_t: float, alpha_r: float) -> np.ndarray: + return _blend_rigid_tf_impl(prev_tf, new_tf, alpha_t, alpha_r) + + def _yaw_deg_from_tf(tf: np.ndarray) -> float: + return _yaw_deg_from_tf_impl(tf) + + def _tf_from_xyzyaw(x: float, y: float, z: float, yaw_deg: float) -> np.ndarray: + return _tf_from_xyzyaw_impl(x, y, z, yaw_deg) + + def voxel_downsample_np(points: np.ndarray, voxel_m: float) -> np.ndarray: + pts = np.asarray(points, dtype=np.float32) + if pts.ndim != 2 or pts.shape[1] != 3 or len(pts) == 0: + return np.zeros((0, 3), dtype=np.float32) + v = float(voxel_m) + if v <= 0.0: + return pts + keys = np.floor(pts / v).astype(np.int32) + _, first_idx = np.unique(keys, axis=0, return_index=True) + return pts[np.sort(first_idx)] + + def _rigid_fit_svd(src_pts: np.ndarray, dst_pts: np.ndarray) -> Optional[np.ndarray]: + """Estimate rigid transform (dst = T * src) with SVD.""" + if src_pts is None or dst_pts is None: + return None + if len(src_pts) < 3 or len(dst_pts) < 3: + return None + a = np.asarray(src_pts, dtype=np.float64) + b = np.asarray(dst_pts, dtype=np.float64) + if a.shape != b.shape or a.ndim != 2 or a.shape[1] != 3: + return None + c_a = np.mean(a, axis=0) + c_b = np.mean(b, axis=0) + aa = a - c_a + bb = b - c_b + H = aa.T @ bb + try: + U, _, Vt = np.linalg.svd(H, full_matrices=False) + except Exception: + return None + R = Vt.T @ U.T + if np.linalg.det(R) < 0: + Vt[2, :] *= -1.0 + R = Vt.T @ U.T + t = c_b - (R @ c_a) + T = np.eye(4, dtype=np.float64) + T[:3, :3] = R + T[:3, 3] = t + return T + + def _icp_point_to_point_np( + src_pts: np.ndarray, + tgt_pts: np.ndarray, + init_tf: Optional[np.ndarray], + max_corr_m: float, + max_iter: int, + min_corr: int = 30, + ) -> Dict[str, Any]: + """ + Pure NumPy/SciPy point-to-point ICP to avoid Open3D native crashes. + Returns {transform, fitness, rmse, inliers}. + """ + src0 = np.asarray(src_pts, dtype=np.float64) + tgt = np.asarray(tgt_pts, dtype=np.float64) + if src0.ndim != 2 or tgt.ndim != 2 or src0.shape[1] != 3 or tgt.shape[1] != 3: + return {"transform": np.eye(4, dtype=np.float64), "fitness": 0.0, "rmse": 9e9, "inliers": 0} + if len(src0) < 3 or len(tgt) < 3: + return {"transform": np.eye(4, dtype=np.float64), "fitness": 0.0, "rmse": 9e9, "inliers": 0} + + try: + from scipy.spatial import cKDTree + except Exception: + return {"transform": np.eye(4, dtype=np.float64), "fitness": 0.0, "rmse": 9e9, "inliers": 0} + + T = np.eye(4, dtype=np.float64) if init_tf is None else np.asarray(init_tf, dtype=np.float64).copy() + if T.shape != (4, 4): + T = np.eye(4, dtype=np.float64) + tree = cKDTree(tgt) + max_corr = float(max(0.05, max_corr_m)) + min_corr_n = int(max(10, min_corr)) + last_rmse = 9e9 + last_inliers = 0 + + for _ in range(int(max(1, max_iter))): + src_w = (src0 @ T[:3, :3].T) + T[:3, 3] + d, idx = tree.query(src_w, k=1) + keep = np.isfinite(d) & (d <= max_corr) + n_in = int(np.count_nonzero(keep)) + if n_in < min_corr_n: + break + src_corr = src_w[keep] + tgt_corr = tgt[np.asarray(idx[keep], dtype=np.int32)] + delta = _rigid_fit_svd(src_corr, tgt_corr) + if delta is None: + break + T = np.asarray(delta, dtype=np.float64) @ T + prev_rmse = float(last_rmse) + rmse = float(np.sqrt(np.mean(np.square(d[keep])))) + last_rmse = rmse + last_inliers = n_in + # Tiny improvement => converged. + if abs(prev_rmse - rmse) < 1e-5: + break + + fitness = float(last_inliers) / max(1.0, float(len(src0))) + return {"transform": T, "fitness": float(fitness), "rmse": float(last_rmse), "inliers": int(last_inliers)} + + def _bidir_match_stats( + src_pts: np.ndarray, + tgt_pts: np.ndarray, + transform: np.ndarray, + max_corr_m: float, + ) -> Dict[str, Any]: + stats = { + "ok": False, + "live_to_ref_inlier": 0.0, + "ref_to_live_inlier": 0.0, + "bidir_inlier": 0.0, + "bidir_rmse": 9e9, + } + try: + from scipy.spatial import cKDTree + except Exception: + return stats + try: + src = np.asarray(src_pts, dtype=np.float64) + tgt = np.asarray(tgt_pts, dtype=np.float64) + tf = np.asarray(transform, dtype=np.float64) + if src.ndim != 2 or tgt.ndim != 2 or src.shape[1] != 3 or tgt.shape[1] != 3: + return stats + if len(src) < 8 or len(tgt) < 8 or tf.shape != (4, 4): + return stats + src_w = (src @ tf[:3, :3].T) + tf[:3, 3] + max_corr = max(0.05, float(max_corr_m)) + tree_t = cKDTree(tgt) + d_st, _ = tree_t.query(src_w, k=1) + in_st = np.isfinite(d_st) & (d_st <= max_corr) + in_st_ratio = float(np.count_nonzero(in_st)) / max(1.0, float(len(src_w))) + + tree_s = cKDTree(src_w) + d_ts, _ = tree_s.query(tgt, k=1) + in_ts = np.isfinite(d_ts) & (d_ts <= max_corr) + in_ts_ratio = float(np.count_nonzero(in_ts)) / max(1.0, float(len(tgt))) + + inlier_mix = np.concatenate([d_st[in_st], d_ts[in_ts]], axis=0) + rmse = float(np.sqrt(np.mean(np.square(inlier_mix)))) if len(inlier_mix) > 0 else 9e9 + + stats.update( + { + "ok": True, + "live_to_ref_inlier": float(in_st_ratio), + "ref_to_live_inlier": float(in_ts_ratio), + "bidir_inlier": float(min(in_st_ratio, in_ts_ratio)), + "bidir_rmse": float(rmse), + } + ) + return stats + except Exception: + return stats + + def _build_ref_match_visual( + live_ref_pts: Optional[np.ndarray], + pose_ref: Optional[np.ndarray], + now_t: float, + ) -> Tuple[Optional[np.ndarray], Optional[np.ndarray], Dict[str, Any]]: + nonlocal ref_match_points_cache, ref_match_confirm_hits, ref_match_last_pub_t, ref_match_last_stats + stats: Dict[str, Any] = {"enabled": bool(loc_vis_enabled), "updated": False} + if not loc_vis_enabled: + ref_match_last_stats = stats + return None, None, stats + if ref_points_cache is None or len(ref_points_cache) == 0: + stats["reason"] = "no_ref" + ref_match_last_stats = stats + return None, None, stats + if (float(now_t) - float(ref_match_last_pub_t)) < float(loc_vis_period_sec): + return None, None, dict(ref_match_last_stats) + + ref_match_last_pub_t = float(now_t) + if ref_match_points_cache is None or ref_match_confirm_hits is None: + ref_ds = voxel_downsample_np(np.asarray(ref_points_cache, dtype=np.float32), float(loc_vis_voxel_m)) + if len(ref_ds) > int(loc_vis_max_points): + step = max(2, int(np.ceil(float(len(ref_ds)) / float(loc_vis_max_points)))) + ref_ds = ref_ds[::step] + ref_match_points_cache = np.asarray(ref_ds, dtype=np.float32) + ref_match_confirm_hits = np.zeros((len(ref_match_points_cache),), dtype=np.int16) + + pts_ref = np.asarray(ref_match_points_cache, dtype=np.float32) + if len(pts_ref) == 0: + stats["reason"] = "empty_ref_sample" + ref_match_last_stats = stats + return pts_ref, np.zeros((0, 4), dtype=np.float32), stats + + local_mask = np.ones((len(pts_ref),), dtype=bool) + center = None + if pose_ref is not None: + pose_np = np.asarray(pose_ref, dtype=np.float64) + if pose_np.shape == (4, 4): + center = np.asarray(pose_np[:3, 3], dtype=np.float32) + if center is not None and np.isfinite(center).all(): + rel = pts_ref - center.reshape((1, 3)) + d2 = np.einsum("ij,ij->i", rel, rel) + local_mask = d2 <= (float(loc_vis_local_radius_m) * float(loc_vis_local_radius_m)) + + colors = np.zeros((len(pts_ref), 4), dtype=np.float32) + colors[:, :3] = np.array([0.45, 0.45, 0.45], dtype=np.float32) + colors[:, 3] = 0.16 + + live = None if live_ref_pts is None else np.asarray(live_ref_pts, dtype=np.float32) + matched = np.zeros((len(pts_ref),), dtype=bool) + near = np.zeros((len(pts_ref),), dtype=bool) + unmatched = np.zeros((len(pts_ref),), dtype=bool) + try: + if live is not None and live.ndim == 2 and live.shape[1] == 3 and len(live) >= 20 and np.any(local_mask): + from scipy.spatial import cKDTree + tree = cKDTree(np.asarray(live, dtype=np.float64)) + idx = np.where(local_mask)[0] + d, _ = tree.query(np.asarray(pts_ref[idx], dtype=np.float64), k=1) + d = np.asarray(d, dtype=np.float64) + match_idx = idx[d <= float(loc_vis_match_dist_m)] + near_idx = idx[(d > float(loc_vis_match_dist_m)) & (d <= float(loc_vis_near_dist_m))] + unmatch_idx = idx[d > float(loc_vis_near_dist_m)] + matched[match_idx] = True + near[near_idx] = True + unmatched[unmatch_idx] = True + except Exception as _vis_exc: + st("WARN", f"[loc_vis] match visual update failed: {_vis_exc}") + + if ref_match_confirm_hits is None or len(ref_match_confirm_hits) != len(pts_ref): + ref_match_confirm_hits = np.zeros((len(pts_ref),), dtype=np.int16) + hits = np.asarray(ref_match_confirm_hits, dtype=np.int16) + hits[matched] = np.clip(hits[matched] + 1, 0, 32000) + decay_mask = local_mask & (~matched) + if np.any(decay_mask): + hits[decay_mask] = np.maximum(0, hits[decay_mask] - int(loc_vis_decay_per_frame)) + confirmed = local_mask & (hits >= int(loc_vis_confirm_frames)) + ref_match_confirm_hits = hits + + colors[unmatched, :3] = np.array([0.95, 0.15, 0.15], dtype=np.float32) + colors[unmatched, 3] = 0.22 + colors[near, :3] = np.array([0.96, 0.86, 0.16], dtype=np.float32) + colors[near, 3] = 0.24 + colors[matched, :3] = np.array([0.15, 0.85, 0.20], dtype=np.float32) + colors[matched, 3] = 0.24 + colors[confirmed, :3] = np.array([0.05, 1.00, 0.28], dtype=np.float32) + colors[confirmed, 3] = 0.34 + + local_n = int(np.count_nonzero(local_mask)) + stats.update( + { + "updated": True, + "ref_points": int(len(pts_ref)), + "local_points": int(local_n), + "matched": int(np.count_nonzero(matched)), + "near": int(np.count_nonzero(near)), + "unmatched": int(np.count_nonzero(unmatched)), + "confirmed": int(np.count_nonzero(confirmed)), + "matched_ratio": float(np.count_nonzero(matched)) / max(1.0, float(local_n)), + } + ) + ref_match_last_stats = dict(stats) + return pts_ref, colors, stats + + def compute_map_lock_correction( + points_sensor: np.ndarray, + pose_map: Optional[np.ndarray], + now_t: float, + overrides: Optional[Dict[str, Any]] = None, + ) -> Tuple[Optional[np.ndarray], Dict[str, Any]]: + nonlocal map_lock_last_t + info: Dict[str, Any] = {"status": "skip", "ran": False} + ov = overrides if isinstance(overrides, dict) else {} + period_sec_local = max(0.05, float(ov.get("period_sec", map_lock_period_sec))) + max_corr_local = max(0.20, float(ov.get("max_corr_m", map_lock_max_corr_m))) + accept_fit_local = float(np.clip(float(ov.get("accept_fitness", map_lock_accept_fitness)), 0.0, 1.0)) + accept_rmse_local = max(0.05, float(ov.get("accept_rmse", map_lock_accept_rmse))) + max_step_trans_local = max(0.01, float(ov.get("max_step_translation_m", map_lock_max_trans_m))) + max_step_rot_local = max(0.5, float(ov.get("max_step_rotation_deg", map_lock_max_rot_deg))) + icp_iter_local = max(5, int(ov.get("icp_max_iter", map_lock_icp_max_iter))) + + if (not map_lock_enabled) or (pose_map is None): + return None, info + if (now_t - float(map_lock_last_t)) < float(period_sec_local): + return None, info + map_lock_last_t = float(now_t) + + try: + stable_all = stable.get_points() + except Exception: + stable_all = None + n_stable = 0 if stable_all is None else int(len(stable_all)) + if stable_all is None or n_stable < int(map_lock_min_stable_pts): + info.update({"status": "skip", "reason": "few_stable", "stable_points": int(n_stable)}) + return None, info + + src_sensor = np.asarray(points_sensor, dtype=np.float32) + if src_sensor.ndim != 2 or src_sensor.shape[1] != 3 or len(src_sensor) < int(map_lock_min_live_pts): + info.update({"status": "skip", "reason": "few_live", "live_points": int(len(src_sensor))}) + return None, info + + pose_np = np.asarray(pose_map, dtype=np.float64) + if pose_np.shape != (4, 4): + info.update({"status": "skip", "reason": "bad_pose"}) + return None, info + + src_world = to_world_points(src_sensor, pose_np) + tgt_world = np.asarray(stable_all, dtype=np.float32) + + if float(map_lock_window_radius_m) > 0.0 and len(tgt_world) > 0: + center = np.asarray(pose_np[:3, 3], dtype=np.float32) + rel = tgt_world - center + d2 = np.einsum("ij,ij->i", rel, rel) + win2 = float(map_lock_window_radius_m) * float(map_lock_window_radius_m) + keep = d2 <= win2 + min_local = max(120, int(map_lock_min_stable_pts) // 3) + if int(np.count_nonzero(keep)) >= min_local: + tgt_world = tgt_world[keep] + + src_ds = voxel_downsample_np(src_world, float(map_lock_voxel_m)) + tgt_ds = voxel_downsample_np(tgt_world, float(map_lock_voxel_m)) + if len(src_ds) < 60 or len(tgt_ds) < 120: + info.update( + { + "status": "skip", + "reason": "few_downsampled", + "src_points": int(len(src_ds)), + "tgt_points": int(len(tgt_ds)), + } + ) + return None, info + + # Robust in-process point-to-point ICP (avoids Open3D native crashes seen in map-lock path). + try: + from scipy.spatial import cKDTree + except Exception: + info.update({"status": "skip", "reason": "scipy_missing"}) + return None, info + + src_icp = np.asarray(src_ds, dtype=np.float64) + tgt_icp = np.asarray(tgt_ds, dtype=np.float64) + tree = cKDTree(tgt_icp) + max_corr = float(max_corr_local) + min_corr = max(40, int(0.15 * len(src_icp))) + + def _rigid_fit(src_pts: np.ndarray, dst_pts: np.ndarray) -> Optional[np.ndarray]: + if len(src_pts) < 3 or len(dst_pts) < 3: + return None + c_src = np.mean(src_pts, axis=0) + c_dst = np.mean(dst_pts, axis=0) + src0 = src_pts - c_src + dst0 = dst_pts - c_dst + H = src0.T @ dst0 + try: + U, _, Vt = np.linalg.svd(H, full_matrices=False) + except Exception: + return None + R = Vt.T @ U.T + if np.linalg.det(R) < 0: + Vt[-1, :] *= -1.0 + R = Vt.T @ U.T + t = c_dst - (R @ c_src) + T = np.eye(4, dtype=np.float64) + T[:3, :3] = R + T[:3, 3] = t + return T + + corr = np.eye(4, dtype=np.float64) + converged = False + for _ in range(int(icp_iter_local)): + src_w = (src_icp @ corr[:3, :3].T) + corr[:3, 3] + dist, idx = tree.query(src_w, k=1) + mask = np.isfinite(dist) & (dist <= max_corr) + if int(np.count_nonzero(mask)) < min_corr: + break + src_in = src_w[mask] + dst_in = tgt_icp[idx[mask]] + delta = _rigid_fit(src_in, dst_in) + if delta is None: + break + corr = delta @ corr + step_t = float(np.linalg.norm(delta[:3, 3])) + step_r = float( + np.degrees( + np.arccos(np.clip((np.trace(delta[:3, :3]) - 1.0) * 0.5, -1.0, 1.0)) + ) + ) + if step_t < 0.001 and step_r < 0.08: + converged = True + break + + src_final = (src_icp @ corr[:3, :3].T) + corr[:3, 3] + dist_f, _ = tree.query(src_final, k=1) + mask_f = np.isfinite(dist_f) & (dist_f <= max_corr) + inliers = int(np.count_nonzero(mask_f)) + if inliers < min_corr: + info.update( + { + "ran": True, + "status": "rejected", + "reason": "few_inliers", + "src_points": int(len(src_icp)), + "tgt_points": int(len(tgt_icp)), + } + ) + return None, info + fit = float(inliers) / float(max(1, len(src_icp))) + rmse = float(np.sqrt(np.mean((dist_f[mask_f]) ** 2))) + dpos = float(np.linalg.norm(corr[:3, 3])) + drot = float(np.degrees(np.arccos(np.clip((np.trace(corr[:3, :3]) - 1.0) * 0.5, -1.0, 1.0)))) + + ok = bool( + fit >= float(accept_fit_local) + and rmse <= float(accept_rmse_local) + and dpos <= float(max_step_trans_local) + and drot <= float(max_step_rot_local) + ) + info.update( + { + "ran": True, + "status": "accepted" if ok else "rejected", + "fitness": fit, + "rmse": rmse, + "dpos_m": dpos, + "drot_deg": drot, + "src_points": int(len(src_ds)), + "tgt_points": int(len(tgt_ds)), + "converged": bool(converged), + "inliers": int(inliers), + "period_sec": float(period_sec_local), + "accept_fit": float(accept_fit_local), + "accept_rmse": float(accept_rmse_local), + } + ) + return (corr if ok else None), info + + def check_frame_continuity(points_world: np.ndarray) -> Tuple[bool, Dict[str, Any], Optional[np.ndarray]]: + info: Dict[str, Any] = {"enabled": bool(continuity_enabled), "status": "skip"} + pts = np.asarray(points_world, dtype=np.float32) + pts_ds = voxel_downsample_np(pts, float(continuity_voxel_m)) + if (not continuity_enabled) or pts_ds is None or len(pts_ds) < int(continuity_min_points): + info.update({"status": "skip", "reason": "disabled_or_few_points", "points": int(len(pts_ds))}) + return True, info, pts_ds + if continuity_prev_pts is None or len(continuity_prev_pts) < int(continuity_min_points): + info.update({"status": "bootstrap", "points": int(len(pts_ds))}) + return True, info, pts_ds + + try: + from scipy.spatial import cKDTree + tree = cKDTree(np.asarray(continuity_prev_pts, dtype=np.float64)) + dist, _ = tree.query(np.asarray(pts_ds, dtype=np.float64), k=1) + except Exception as e: + info.update({"status": "skip", "reason": f"tree_error:{e}"}) + return True, info, pts_ds + + mask = np.isfinite(dist) + if int(np.count_nonzero(mask)) < int(continuity_min_points): + info.update({"status": "skip", "reason": "few_matches"}) + return True, info, pts_ds + d = dist[mask] + inlier_ratio = float(np.mean(d <= float(continuity_match_radius_m))) + median_d = float(np.median(d)) + p90_d = float(np.percentile(d, 90.0)) + ok = bool( + inlier_ratio >= float(continuity_min_inlier_ratio) + and median_d <= float(continuity_max_median_m) + and p90_d <= float(continuity_max_p90_m) + ) + info.update( + { + "status": "ok" if ok else "reject", + "points": int(len(pts_ds)), + "inlier_ratio": float(inlier_ratio), + "median_m": float(median_d), + "p90_m": float(p90_d), + } + ) + return ok, info, pts_ds + + def cleanup_map_islands(points_world: np.ndarray) -> Tuple[np.ndarray, Dict[str, Any]]: + pts = np.asarray(points_world, dtype=np.float32) + info: Dict[str, Any] = { + "enabled": bool(cleanup_enabled), + "status": "skip", + "before": int(len(pts)), + "after": int(len(pts)), + "removed": 0, + } + if (not cleanup_enabled) or len(pts) < int(cleanup_min_total_points): + info["reason"] = "disabled_or_few_points" + return pts, info + + pts_ds = voxel_downsample_np(pts, float(cleanup_voxel_m)) + if len(pts_ds) < int(cleanup_min_cluster_points): + info["reason"] = "few_downsampled" + return pts, info + + try: + from scipy.spatial import cKDTree + tree = cKDTree(np.asarray(pts_ds, dtype=np.float64)) + except Exception as e: + info["reason"] = f"tree_error:{e}" + return pts, info + + n = int(len(pts_ds)) + visited = np.zeros((n,), dtype=bool) + comp_id = np.full((n,), -1, dtype=np.int32) + sizes: List[int] = [] + + for i in range(n): + if visited[i]: + continue + q = [i] + visited[i] = True + members: List[int] = [] + while q: + j = int(q.pop()) + members.append(j) + neigh = tree.query_ball_point(pts_ds[j], r=float(cleanup_cluster_radius_m)) + for k in neigh: + kk = int(k) + if not visited[kk]: + visited[kk] = True + q.append(kk) + cid = len(sizes) + sizes.append(len(members)) + for m in members: + comp_id[m] = int(cid) + + if not sizes: + info["reason"] = "no_components" + return pts, info + + order = sorted(range(len(sizes)), key=lambda c: int(sizes[c]), reverse=True) + keep_ids = set(order[: int(cleanup_keep_largest_n)]) + if not cleanup_strict_largest_only: + for cid, sz in enumerate(sizes): + if int(sz) >= int(cleanup_min_cluster_points): + keep_ids.add(int(cid)) + if len(keep_ids) == len(sizes): + info.update({"status": "ok", "components": int(len(sizes)), "kept_components": int(len(keep_ids))}) + return pts, info + + # Map original points to nearest downsampled component and keep only selected components. + _, nn = tree.query(np.asarray(pts, dtype=np.float64), k=1) + keep_mask = np.isin(comp_id[np.asarray(nn, dtype=np.int32)], np.asarray(list(keep_ids), dtype=np.int32)) + cleaned = pts[keep_mask] + removed = int(len(pts) - len(cleaned)) + info.update( + { + "status": "ok", + "components": int(len(sizes)), + "kept_components": int(len(keep_ids)), + "before": int(len(pts)), + "after": int(len(cleaned)), + "removed": int(removed), + } + ) + return cleaned, info + + def push_loc_state() -> None: + st("INFO", {"LOC_STATE": loc_state.snapshot()}) + if diagnostics is not None: + diagnostics.log_state("LOC_STATE", _worker_diag_state()) + + def push_worker_mode() -> None: + st( + "INFO", + { + "MODE": { + "mode": str(worker_mode), + "mapping": bool(mapping_enabled), + "localize_only": bool(localize_only_enabled), + } + }, + ) + if diagnostics is not None: + diagnostics.log_state("MODE", _worker_diag_state()) + + def _place_profile_allowed() -> bool: + profile = str(stability_profile).upper().strip() + return bool(place_recog.profile_allowed(profile)) + + # ---------------- localization ---------------- + def do_localize( + force: bool = False, + source: str = "MANUAL", + src_override: Optional[np.ndarray] = None, + ) -> Optional[Dict[str, Any]]: + nonlocal ref_map_path, last_localize_t, last_localize_points, last_localize_transform, last_localize_confidence + nonlocal ref_pcd_cache, ref_points_cache, ref_nav_points_cache, ref_nav_cache_src_n, ref_cache_path + nonlocal slam_to_ref_transform, slam_to_ref_valid + nonlocal approx_pose_guess_tf, approx_pose_guess_until_t, approx_pose_guess_fail_count, approx_pose_guess_success_count + now = _now() + src_tag = str(source).upper().strip() + use_live_src = src_override is not None + if not loc_cfg.enabled: + if force: + st("ERROR", "LOCALIZE disabled in config.") + loc_state.update(None, now) + push_loc_state() + return None + if not ref_map_path: + if force: + st("ERROR", "LOCALIZE: load a reference map first.") + loc_state.update(None, now) + push_loc_state() + return None + + try: + period = max(0.1, float(loc_cfg.period_sec)) + if use_live_src: + # Live relocalization should be frequent but not every frame to avoid jitter. + period = min(period, float(loc_live_period_sec)) + if (not force) and ((now - last_localize_t) < period): + return None + + req_loc = int(loc_cfg.min_points_for_localize) + if use_live_src: + cur = np.asarray(src_override, dtype=np.float32) + cur_n = int(len(cur)) + req_live = max(80, min(req_loc, 1000)) + if cur_n < req_live: + if force: + st("ERROR", f"LOCALIZE: not enough live points ({cur_n}/{req_live}).") + loc_state.update(None, now) + push_loc_state() + return None + else: + cur = stable.get_save_points() + cur_n = 0 if cur is None else int(len(cur)) + if cur is None or cur_n < req_loc: + if force: + st("ERROR", f"LOCALIZE: not enough stable points ({cur_n}/{req_loc}). Start mapping first.") + loc_state.update(None, now) + push_loc_state() + return None + if (not force) and ((cur_n - int(last_localize_points)) < int(loc_cfg.min_new_points)): + return None + + import open3d as o3d + + if ref_pcd_cache is None or ref_cache_path != ref_map_path: + ref_pcd_cache = o3d.io.read_point_cloud(ref_map_path) + if hasattr(ref_pcd_cache, "points"): + ref_points_cache = np.asarray(ref_pcd_cache.points, dtype=np.float32).copy() + else: + ref_points_cache = np.asarray(ref_pcd_cache, dtype=np.float32).reshape((-1, 3)).copy() + ref_nav_points_cache = None + ref_nav_cache_src_n = 0 + ref_cache_path = ref_map_path + if ref_points_cache is None: + if hasattr(ref_pcd_cache, "points"): + ref_points_cache = np.asarray(ref_pcd_cache.points, dtype=np.float32).copy() + else: + ref_points_cache = np.asarray(ref_pcd_cache, dtype=np.float32).reshape((-1, 3)).copy() + ref_nav_points_cache = None + ref_nav_cache_src_n = 0 + ref_np_full = np.asarray(ref_points_cache, dtype=np.float32) + if len(ref_np_full) < int(loc_cfg.min_points_for_localize): + if force: + st("ERROR", "LOCALIZE: reference map has too few points.") + loc_state.update(None, now) + push_loc_state() + return None + if bool(place_cfg.enabled) and _place_profile_allowed(): + if (not bool(place_recog.index_ready)) or (str(place_recog.ref_path or "") != str(ref_map_path)): + pr_info = place_recog.build(ref_np_full, ref_map_path) + st("INFO", {"PLACE_RECOG": pr_info}) + + cur_np_full = np.asarray(cur, dtype=np.float32) + if cur_np_full.ndim != 2 or cur_np_full.shape[1] != 3: + loc_state.update(None, now) + push_loc_state() + return None + + # Once localized, keep tracking in a local window around the last pose + # to avoid jumping to similar structures elsewhere in a large map. + tracking_locked = bool( + use_live_src + and slam_to_ref_valid + and src_tag not in ("ON_LOAD", "RECOVERY") + and str(loc_state.state).upper().strip() in ("TRACKING", "DEGRADED") + ) + ref_np_track = np.asarray(ref_np_full, dtype=np.float32) + tracking_window_used = False + tracking_window_reason = "full_ref" + if tracking_locked and loc_tracking_window_enabled and len(ref_np_track) > 0: + try: + center = np.asarray(last_localize_transform[:3, 3], dtype=np.float32) + if np.isfinite(center).all(): + rel = ref_np_track - center + d2 = np.einsum("ij,ij->i", rel, rel) + win2 = float(loc_tracking_window_radius_m) * float(loc_tracking_window_radius_m) + keep = d2 <= win2 + n_keep = int(np.count_nonzero(keep)) + if n_keep >= int(loc_tracking_window_min_ref_points): + ref_np_track = ref_np_track[keep] + tracking_window_used = True + tracking_window_reason = "window" + else: + tracking_window_reason = "window_too_few" + except Exception: + tracking_window_reason = "window_error" + if len(ref_np_track) > int(loc_tracking_window_max_ref_points): + step = max(2, int(np.ceil(float(len(ref_np_track)) / float(loc_tracking_window_max_ref_points)))) + ref_np_track = ref_np_track[::step] + if tracking_window_used: + tracking_window_reason = "window_decimated" + else: + tracking_window_reason = "full_ref_decimated" + + voxel = float(loc_cfg.voxel_localize) + if use_live_src: + voxel = min(voxel, 0.16) + max_corr = max(0.5, float(loc_cfg.max_corr_mult) * voxel) + if use_live_src: + max_corr = min(max_corr, 0.95) + accept_fit_target = float(loc_cfg.accept_fitness) + accept_rmse_target = float(loc_cfg.accept_rmse) + guess_bootstrap = bool( + approx_pose_guess_tf is not None + and float(now) <= float(approx_pose_guess_until_t) + and (not slam_to_ref_valid) + ) + guess_strict_local_active = bool( + guess_bootstrap and (not slam_to_ref_valid) and bool(loc_guess_strict_local) + ) + if use_live_src: + # Slightly stricter acceptance in live mode to reduce wobble while turning. + accept_fit_target = max(0.34, min(0.98, accept_fit_target + 0.08)) + accept_rmse_target = min(accept_rmse_target, 0.26) + if guess_bootstrap: + # If user provided approximate location, allow slight environmental changes. + max_corr = min(1.15, max_corr * 1.15) + accept_fit_target = max(0.22, accept_fit_target - 0.04) + accept_rmse_target = max(accept_rmse_target, 0.34) + + init = np.array(last_localize_transform, dtype=np.float64, copy=True) + approx_guess_used = False + approx_guess_active = bool(guess_bootstrap) + if approx_guess_active and not slam_to_ref_valid: + init = np.asarray(approx_pose_guess_tf, dtype=np.float64).copy() + approx_guess_used = True + # Bootstrap with local reference crop around approximate click. + try: + center_guess = np.asarray(init[:3, 3], dtype=np.float32) + rel = ref_np_track - center_guess.reshape((1, 3)) + d2 = np.einsum("ij,ij->i", rel, rel) + local_r2 = float(loc_guess_local_radius_m) * float(loc_guess_local_radius_m) + wide_r2 = float(loc_guess_crop_radius_m) * float(loc_guess_crop_radius_m) + keep_local = d2 <= local_r2 + n_local = int(np.count_nonzero(keep_local)) + min_local_n = int(loc_tracking_window_min_ref_points) + if n_local >= min_local_n: + ref_np_track = ref_np_track[keep_local] + tracking_window_used = True + tracking_window_reason = "approx_local_crop" + else: + keep_wide = d2 <= wide_r2 + n_wide = int(np.count_nonzero(keep_wide)) + min_wide_n = max(120, min_local_n // 2) if guess_strict_local_active else min_local_n + if n_wide >= min_wide_n: + ref_np_track = ref_np_track[keep_wide] + tracking_window_used = True + tracking_window_reason = "approx_wide_crop" + else: + tracking_window_reason = "approx_crop_too_few" + except Exception: + tracking_window_reason = "approx_crop_error" + + src_d = voxel_downsample_np(cur_np_full, voxel) + tgt_d = voxel_downsample_np(ref_np_track, voxel) + if len(src_d) < 40 or len(tgt_d) < 40: + if force: + st("ERROR", "LOCALIZE: not enough downsampled points for ICP.") + loc_state.update(None, now) + push_loc_state() + return None + + # Coarse-to-fine ICP improves convergence when the initial offset is large. + coarse_voxel = max(voxel, 0.35) + src_c = voxel_downsample_np(cur_np_full, coarse_voxel) + coarse_tgt_src = ref_np_track if (tracking_locked or approx_guess_used) else ref_np_full + tgt_c = voxel_downsample_np(coarse_tgt_src, coarse_voxel) + def _run_local_icp_chain(init_tf: np.ndarray) -> Dict[str, Any]: + seed = np.asarray(init_tf, dtype=np.float64).copy() + if len(src_c) > 40 and len(tgt_c) > 40: + coarse = _icp_point_to_point_np( + src_c, + tgt_c, + seed, + max_corr_m=max_corr * 1.6, + max_iter=max(15, int(loc_cfg.icp_max_iter) // 2), + min_corr=30, + ) + seed = np.array(coarse.get("transform", seed), dtype=np.float64, copy=True) + return _icp_point_to_point_np( + src_d, + tgt_d, + seed, + max_corr_m=max_corr, + max_iter=max(10, int(loc_cfg.icp_max_iter)), + min_corr=35, + ) + + reg = _run_local_icp_chain(init) + yaw_hypotheses = 0 + place_query: Dict[str, Any] = {"ok": False, "reason": "disabled"} + if approx_guess_active and not slam_to_ref_valid and float(loc_guess_yaw_step_deg) < 180.0: + base_yaw = float(_yaw_deg_from_tf(init)) + yaw_offsets = np.arange(-180.0, 180.0, float(loc_guess_yaw_step_deg), dtype=np.float64) + best_reg = dict(reg) + for yaw_off in yaw_offsets: + if abs(float(yaw_off)) < 1e-6: + continue + cand = np.asarray(init, dtype=np.float64).copy() + rz = _tf_from_xyzyaw(0.0, 0.0, 0.0, base_yaw + float(yaw_off)) + cand[:3, :3] = np.asarray(rz[:3, :3], dtype=np.float64) + rr = _run_local_icp_chain(cand) + yaw_hypotheses += 1 + better = (float(rr.get("fitness", 0.0)) > float(best_reg.get("fitness", 0.0))) or ( + float(rr.get("fitness", 0.0)) == float(best_reg.get("fitness", 0.0)) + and float(rr.get("rmse", 9e9)) < float(best_reg.get("rmse", 9e9)) + ) + if better: + best_reg = rr + reg = dict(best_reg) + used_recovery = False + + def accepted(rr: Dict[str, Any]) -> bool: + return bool( + float(rr.get("fitness", 0.0)) >= float(accept_fit_target) + and float(rr.get("rmse", 9e9)) <= float(accept_rmse_target) + ) + + # Global relocalization fallback: + # only for bootstrap/lost states, not for normal LIVE tracking. + allow_global_reloc = bool( + (not slam_to_ref_valid) + or src_tag in ("ON_LOAD", "RECOVERY") + or str(loc_state.state).upper().strip() in ("LOST", "RECOVERY") + ) + global_reloc_blocked_by_guess = False + if guess_strict_local_active and (not slam_to_ref_valid): + if int(approx_pose_guess_fail_count) < int(loc_guess_global_fallback_after_failures): + allow_global_reloc = False + global_reloc_blocked_by_guess = True + if (not accepted(reg)) and allow_global_reloc: + src_np = np.asarray(src_d, dtype=np.float64) + # Search against the full reference map to recover from wrong initial position. + tgt_global = voxel_downsample_np(ref_np_full, max(float(voxel), 0.24)) + tgt_np = np.asarray(tgt_global, dtype=np.float64) + if len(src_np) > 20 and len(tgt_np) > 40: + c_src = np.mean(src_np, axis=0) + anchors = np.zeros((0, 3), dtype=np.float64) + use_place_retrieval = bool(place_cfg.enabled and _place_profile_allowed()) + if use_place_retrieval: + approx_center = None + if approx_pose_guess_tf is not None: + try: + approx_center = np.asarray(approx_pose_guess_tf[:3, 3], dtype=np.float32) + except Exception: + approx_center = None + place_query = place_recog.query(cur_np_full, approx_center=approx_center) + if bool(place_query.get("ok", False)): + anchors = np.asarray( + [c.get("center", [0.0, 0.0, 0.0]) for c in place_query.get("candidates", [])], + dtype=np.float64, + ).reshape((-1, 3)) + if len(anchors) == 0: + anchors = voxel_downsample_np( + np.asarray(tgt_np, dtype=np.float32), + max(float(loc_global_reloc_anchor_voxel_m), float(voxel) * 3.0), + ) + anchors = np.asarray(anchors, dtype=np.float64) + if len(anchors) == 0: + anchors = np.asarray([np.mean(tgt_np, axis=0)], dtype=np.float64) + if len(anchors) > int(loc_global_reloc_max_anchors): + step = max(1, int(np.ceil(float(len(anchors)) / float(loc_global_reloc_max_anchors)))) + anchors = anchors[::step][: int(loc_global_reloc_max_anchors)] + + yaw_step = float(loc_global_reloc_yaw_step_deg) + yaw_candidates = list(np.arange(-180.0, 180.0, yaw_step, dtype=np.float64)) + if 0.0 not in yaw_candidates: + yaw_candidates.append(0.0) + + best = dict(reg) + for anchor in anchors: + anchor_v = np.asarray(anchor, dtype=np.float64) + for yaw_deg in yaw_candidates: + yaw = np.deg2rad(float(yaw_deg)) + cy = float(np.cos(yaw)) + sy = float(np.sin(yaw)) + rz = np.array( + [[cy, -sy, 0.0], [sy, cy, 0.0], [0.0, 0.0, 1.0]], + dtype=np.float64, + ) + init_global = np.eye(4, dtype=np.float64) + init_global[:3, :3] = rz + init_global[:3, 3] = anchor_v - (rz @ c_src) + rr = _icp_point_to_point_np( + src_np, + tgt_np, + init_global, + max_corr_m=float(max_corr) * float(loc_global_reloc_corr_mult), + max_iter=int(loc_global_reloc_coarse_iter), + min_corr=int(loc_global_reloc_min_corr), + ) + better = (float(rr.get("fitness", 0.0)) > float(best.get("fitness", 0.0))) or ( + float(rr.get("fitness", 0.0)) == float(best.get("fitness", 0.0)) + and float(rr.get("rmse", 9e9)) < float(best.get("rmse", 9e9)) + ) + if better: + best = rr + + # Refine the best global candidate. + best_tf = np.asarray(best.get("transform", np.eye(4, dtype=np.float64)), dtype=np.float64) + best_refined = _icp_point_to_point_np( + src_np, + tgt_np, + best_tf, + max_corr_m=float(max_corr) * max(1.2, float(loc_global_reloc_corr_mult) * 0.8), + max_iter=int(loc_global_reloc_refine_iter), + min_corr=max(20, int(loc_global_reloc_min_corr)), + ) + if (float(best_refined.get("fitness", 0.0)) > float(best.get("fitness", 0.0))) or ( + float(best_refined.get("fitness", 0.0)) == float(best.get("fitness", 0.0)) + and float(best_refined.get("rmse", 9e9)) < float(best.get("rmse", 9e9)) + ): + best = best_refined + + if (float(best.get("fitness", 0.0)) > float(reg.get("fitness", 0.0))) or ( + float(best.get("fitness", 0.0)) == float(reg.get("fitness", 0.0)) + and float(best.get("rmse", 9e9)) < float(reg.get("rmse", 9e9)) + ): + reg = best + used_recovery = True + + reg_tf = np.asarray(reg.get("transform", np.eye(4, dtype=np.float64)), dtype=np.float64) + fit = float(reg.get("fitness", 0.0)) + rmse = float(reg.get("rmse", 9e9)) + acc = accepted(reg) + temporal_guard_reject = False + bootstrap_pending = False + bootstrap_ok = False + guess_offset_m = -1.0 + if approx_guess_active and (approx_pose_guess_tf is not None): + try: + guess_center = np.asarray(approx_pose_guess_tf[:3, 3], dtype=np.float64) + guess_offset_m = float(np.linalg.norm(np.asarray(reg_tf[:3, 3], dtype=np.float64) - guess_center)) + except Exception: + guess_offset_m = -1.0 + if guess_strict_local_active and guess_offset_m >= 0.0: + if guess_offset_m > float(loc_guess_max_start_offset_m): + acc = False + bidir = _bidir_match_stats(src_d, tgt_d, reg_tf, max_corr_m=max_corr) + if bool(loc_bidir_enabled) and bool(bidir.get("ok", False)): + if ( + float(bidir.get("bidir_inlier", 0.0)) < float(loc_bidir_min_ratio) + or float(bidir.get("bidir_rmse", 9e9)) > float(loc_bidir_max_rmse) + ): + acc = False + if use_live_src and slam_to_ref_valid and src_tag not in ("RECOVERY", "ON_LOAD"): + dtr_m, drot_deg = _tf_delta(last_localize_transform, reg_tf) + max_step_t = float(loc_live_track_max_step_trans_m) if (localize_only_enabled and loc_live_track_strict) else 0.35 + max_step_r = float(loc_live_track_max_step_rot_deg) if (localize_only_enabled and loc_live_track_strict) else 22.0 + if (dtr_m > max_step_t or drot_deg > max_step_r) and (fit < (accept_fit_target + 0.10)): + acc = False + temporal_guard_reject = True + if use_live_src and slam_to_ref_valid and src_tag not in ("RECOVERY", "ON_LOAD") and acc: + # Keep tracking lock conservative to prevent map-frame drift. + if localize_only_enabled and loc_live_track_strict: + live_fit_min = max(float(loc_live_track_min_fit), float(accept_fit_target) + 0.02) + else: + live_fit_min = max(0.38, float(accept_fit_target) + 0.04) + if fit < live_fit_min: + acc = False + temporal_guard_reject = True + elif bool(loc_bidir_enabled) and bool(bidir.get("ok", False)): + if localize_only_enabled and loc_live_track_strict: + live_bidir_min = max(float(loc_live_track_min_bidir), float(loc_bidir_min_ratio)) + else: + live_bidir_min = max(0.24, float(loc_bidir_min_ratio) + 0.06) + if float(bidir.get("bidir_inlier", 0.0)) < live_bidir_min: + acc = False + temporal_guard_reject = True + if approx_guess_active and (not slam_to_ref_valid): + bootstrap_ok = bool(acc) + if bootstrap_ok and (fit < float(loc_guess_bootstrap_min_fitness)): + bootstrap_ok = False + if bootstrap_ok and bool(bidir.get("ok", False)): + if float(bidir.get("bidir_inlier", 0.0)) < float(loc_guess_bootstrap_min_bidir): + bootstrap_ok = False + if bootstrap_ok and guess_offset_m >= 0.0: + if guess_offset_m > float(loc_guess_max_start_offset_m): + bootstrap_ok = False + if bootstrap_ok: + approx_pose_guess_success_count += 1 + # Seed next bootstrap attempt from this best candidate. + loc_frames.last_alignment = np.array(reg_tf, dtype=np.float64, copy=True) + _sync_loc_vars_from_service() + if int(approx_pose_guess_success_count) < int(loc_guess_bootstrap_min_hits): + bootstrap_pending = True + acc = False + else: + approx_pose_guess_success_count = 0 + fit_floor = max(1e-6, float(loc_state_cfg.lost_fitness)) + fit_span = max(1e-6, float(accept_fit_target) - fit_floor) + fit_score = float(np.clip((fit - fit_floor) / fit_span, 0.0, 1.0)) + rmse_cap = max(1e-6, float(accept_rmse_target) * 1.5) + rmse_score = float(np.clip((rmse_cap - rmse) / rmse_cap, 0.0, 1.0)) + confidence = float(np.clip(0.62 * fit_score + 0.38 * rmse_score, 0.0, 1.0)) + if acc: + confidence = max(confidence, 0.75) + if acc and use_live_src and slam_to_ref_valid and src_tag != "RECOVERY": + # Low-pass localization transform in live mode to reduce visual map shake. + alpha = 0.45 if fit >= (accept_fit_target + 0.08) else 0.30 + reg_tf = _blend_rigid_tf(last_localize_transform, reg_tf, alpha_t=alpha, alpha_r=alpha) + + sigma_xy = float(max(0.02, rmse * (1.3 - fit_score))) + sigma_z = float(max(0.03, rmse * (1.6 - fit_score))) + sigma_yaw = float(max(0.02, (1.0 - fit_score) * 0.35)) + + result: Dict[str, Any] = { + "fitness": fit, + "rmse": rmse, + "max_corr": float(max_corr), + "accepted": acc, + "transform": np.array(reg_tf, dtype=np.float64), + "source": src_tag, + "src_points": int(len(src_d)), + "ref_points": int(len(tgt_d)), + "ref_points_full": int(len(ref_np_full)), + "tracking_window_used": bool(tracking_window_used), + "tracking_window_reason": str(tracking_window_reason), + "recovery_used": bool(used_recovery), + "temporal_guard_reject": bool(temporal_guard_reject), + "yaw_hypotheses": int(yaw_hypotheses), + "confidence": float(confidence), + "covariance_diag": [sigma_xy, sigma_xy, sigma_z, sigma_yaw, sigma_yaw, sigma_yaw], + "live_only": bool(use_live_src), + "global_reloc_allowed": bool(allow_global_reloc), + "global_reloc_blocked_by_guess": bool(global_reloc_blocked_by_guess), + "bidir_ok": bool(bidir.get("ok", False)), + "inlier_live_to_ref": float(bidir.get("live_to_ref_inlier", 0.0)), + "inlier_ref_to_live": float(bidir.get("ref_to_live_inlier", 0.0)), + "inlier_bidir": float(bidir.get("bidir_inlier", 0.0)), + "bidir_rmse": float(bidir.get("bidir_rmse", 9e9)), + "approx_guess_used": bool(approx_guess_used), + "approx_guess_fail_count": int(approx_pose_guess_fail_count), + "approx_guess_success_count": int(approx_pose_guess_success_count), + "approx_guess_offset_m": float(guess_offset_m), + "bootstrap_pending": bool(bootstrap_pending), + "place_retrieval_ok": bool(place_query.get("ok", False)), + "place_retrieval_reason": str(place_query.get("reason", "na")), + "place_retrieval_backend": str(place_query.get("backend", place_recog.backend_active)), + "place_retrieval_top_score": float(place_query.get("top_score", 0.0)), + "place_retrieval_candidates": int(len(place_query.get("candidates", []))), + } + + last_localize_t = now + if not use_live_src: + last_localize_points = cur_n + if result["accepted"]: + prev_tf = np.array(slam_to_ref_transform, dtype=np.float64, copy=True) if slam_to_ref_valid else None + loc_frames.set_reference_alignment(reg_tf, confidence) + _sync_loc_vars_from_service() + if use_live_src and prev_tf is not None and submap_mode_enabled and submap_mapper.has_points: + try: + delta_tf = np.asarray(slam_to_ref_transform, dtype=np.float64) @ np.linalg.inv(prev_tf) + dt_m, dr_deg = _tf_delta(np.eye(4, dtype=np.float64), delta_tf) + if dt_m > 1.20 or dr_deg > 45.0: + # Large relocalization jump: reset local/global buffer to avoid ghost structures. + submap_mapper.reset() + elif dt_m > 1e-4 or dr_deg > 1e-3: + submap_mapper.apply_correction(delta_tf) + except Exception as _sub_exc: + st("WARN", f"[submap] correction after relocalization failed: {_sub_exc}") + approx_pose_guess_tf = None + approx_pose_guess_until_t = 0.0 + approx_pose_guess_fail_count = 0 + approx_pose_guess_success_count = 0 + session_store.record_success( + ref_map_path, + last_localize_transform, + float(result["fitness"]), + float(result["rmse"]), + ) + else: + if approx_guess_active and (not slam_to_ref_valid): + if not bootstrap_pending: + approx_pose_guess_fail_count += 1 + loc_frames.update_confidence(max(0.0, min(1.0, loc_frames.confidence * 0.85))) + _sync_loc_vars_from_service() + safety.mark_localization(bool(result["accepted"]), now) + loc_state.update(result, now) + result["state"] = str(loc_state.state) + st("INFO", {"LOCALIZE": result}) + push_loc_state() + return result + + except Exception as e: + safety.mark_localization(False, now) + loc_state.update(None, now) + push_loc_state() + if diagnostics is not None: + diagnostics.log_exception("LOCALIZE failed", e, _worker_diag_state()) + st("ERROR", f"LOCALIZE failed: {e}") + return None + + def maybe_auto_localize(src_live: Optional[np.ndarray] = None): + if not loc_cfg.enabled: + return + if not ref_map_path: + return + if src_live is not None and localize_only_enabled: + do_localize(force=False, source="LIVE_ONLY", src_override=src_live) + else: + do_localize(force=False, source="AUTO") + t = _now() + if loc_state.should_recover(t): + loc_state.enter_recovery(t) + push_loc_state() + do_localize(force=True, source="RECOVERY", src_override=src_live if localize_only_enabled else None) + + def apply_density_mode(mode: str): + nonlocal density_mode, live_stride + m = str(mode).upper().strip() + if m == "LOW": + density_mode = "LOW" + live_stride = max(2, base_stride * 4) + elif m == "HIGH": + density_mode = "HIGH" + live_stride = 1 + else: + density_mode = "MEDIUM" + live_stride = base_stride + st("INFO", {"DENSITY": {"mode": density_mode, "stride": int(live_stride)}}) + + def _rebuild_filter_runtime(keep_existing: bool = True) -> None: + nonlocal filt + seed_pts = None + if keep_existing and stable is not None: + try: + seed_pts = stable.get_points() + except Exception: + seed_pts = None + + vf_cfg = VFilterCfg( + voxel_size=float(runtime_filter_voxel), + hit_threshold=int(runtime_filter_hit_threshold), + decay_seconds=float(runtime_filter_decay), + max_voxels=int(filt_cfg.max_voxels), + ) + new_f = VoxelPersistenceFilter(vf_cfg) + if seed_pts is not None and len(seed_pts) > 0 and hasattr(new_f, "seed_stable_points"): + try: + new_f.seed_stable_points( + seed_pts, + now=_now(), + hit_count=int(runtime_filter_hit_threshold), + clear_existing=True, + ) + except Exception: + pass + filt = new_f + + def apply_filter_tuning(payload: Any) -> None: + nonlocal runtime_filter_voxel, runtime_filter_hit_threshold, runtime_filter_decay + if not isinstance(payload, dict): + st("WARN", "SET_FILTER_TUNING ignored: payload must be dict.") + return + try: + runtime_filter_voxel = max(0.02, float(payload.get("voxel_size", runtime_filter_voxel))) + except Exception: + pass + try: + runtime_filter_hit_threshold = max(1, int(payload.get("hit_threshold", runtime_filter_hit_threshold))) + except Exception: + pass + try: + runtime_filter_decay = max(0.5, float(payload.get("decay_seconds", runtime_filter_decay))) + except Exception: + pass + + _rebuild_filter_runtime(keep_existing=True) + st( + "INFO", + { + "FILTER_TUNING": { + "voxel_size": float(runtime_filter_voxel), + "hit_threshold": int(runtime_filter_hit_threshold), + "decay_seconds": float(runtime_filter_decay), + "profile": str(stability_profile), + } + }, + ) + + def apply_stability_profile(profile_name: Any) -> None: + nonlocal stability_profile + p = str(profile_name or "BALANCED").upper().strip() + presets = { + # Keep map history much longer during exploration so points do not fade while moving away. + "MAP_NEW": {"hit_threshold": 2, "decay_seconds": 1800.0, "voxel_size": 0.10, "recommended_density": "MEDIUM"}, + "LOCALIZE_MAP": {"hit_threshold": 3, "decay_seconds": 45.0, "voxel_size": 0.20, "recommended_density": "MEDIUM"}, + "LIVE_NAV_MAP": {"hit_threshold": 2, "decay_seconds": 18.0, "voxel_size": 0.20, "recommended_density": "MEDIUM"}, + "LIVE_NAV_NO_MAP": {"hit_threshold": 2, "decay_seconds": 14.0, "voxel_size": 0.18, "recommended_density": "HIGH"}, + "QUICK_DEMO": {"hit_threshold": 2, "decay_seconds": 8.0, "voxel_size": 0.20, "recommended_density": "HIGH"}, + "BALANCED": { + "voxel_size": float(max(0.02, filt_cfg.voxel_size)), + "hit_threshold": int(max(1, filt_cfg.hits_threshold)), + "decay_seconds": float(max(0.5, filt_cfg.strict_sec if filt_cfg.use_strict else filt_cfg.window_sec)), + "recommended_density": "MEDIUM", + }, + } + if p not in presets: + st("WARN", f"Unknown stability profile: {p}.") + return + stability_profile = p + preset = presets[p] + apply_filter_tuning( + { + "voxel_size": float(preset.get("voxel_size", runtime_filter_voxel)), + "hit_threshold": int(preset["hit_threshold"]), + "decay_seconds": float(preset["decay_seconds"]), + } + ) + apply_density_mode(str(preset.get("recommended_density", density_mode))) + st( + "INFO", + { + "WORKFLOW_PROFILE": { + "name": str(stability_profile), + "recommended_density": str(preset.get("recommended_density", density_mode)), + } + }, + ) + + def _submap_profile_allowed() -> bool: + profile = str(stability_profile).upper().strip() + allowed = {str(x).upper().strip() for x in tuple(submap_cfg.apply_profiles)} + return profile in allowed + + def apply_min_stable_points(value: Any) -> None: + try: + v = int(value) + except Exception: + st("WARN", f"SET_MIN_STABLE_POINTS ignored: invalid value {value!r}") + return + v = int(np.clip(v, 50, 5_000_000)) + map_cfg.min_points_to_save = v + loc_cfg.min_points_for_localize = v + nav_cfg.min_points = v + try: + nav_exporter.update_config(nav_cfg) + except Exception: + pass + st("INFO", {"MIN_STABLE_POINTS": {"value": int(v)}}) + + def apply_loop_closure_mode(enabled: Any) -> None: + e = _as_bool(enabled, False) + loop_cfg.enabled = bool(e) + loop_backend.cfg.enabled = bool(e) + st("INFO", {"LOOP_MODE": {"enabled": bool(e)}}) + + def apply_loc_state_machine_mode(enabled: Any) -> None: + e = _as_bool(enabled, True) + loc_state_cfg.enabled = bool(e) + loc_state.set_enabled(bool(e)) + st("INFO", {"LOC_MACHINE": {"enabled": bool(e)}}) + push_loc_state() + + def apply_submap_mode(payload: Any) -> None: + nonlocal submap_cfg, submap_mapper, submap_mode_enabled + merged = { + "enabled": bool(submap_mode_enabled), + "local_window_frames": int(submap_cfg.local_window_frames), + "local_voxel_m": float(submap_cfg.local_voxel_m), + "global_voxel_m": float(submap_cfg.global_voxel_m), + "merge_period_sec": float(submap_cfg.merge_period_sec), + "merge_min_translation_m": float(submap_cfg.merge_min_translation_m), + "merge_min_rotation_deg": float(submap_cfg.merge_min_rotation_deg), + "max_global_points": int(submap_cfg.max_global_points), + "display_max_points": int(submap_cfg.display_max_points), + "apply_profiles": list(submap_cfg.apply_profiles), + } + if isinstance(payload, dict): + merged.update(payload) + elif payload is not None: + merged["enabled"] = _as_bool(payload, bool(submap_mode_enabled)) + new_cfg = SubmapConfig.from_dict(merged) + keep_pts = True + if isinstance(payload, dict): + keep_pts = _as_bool(payload.get("keep_points", True), True) + submap_cfg = new_cfg + submap_mode_enabled = bool(new_cfg.enabled) + submap_mapper.set_config(new_cfg, keep_points=bool(keep_pts and submap_mode_enabled)) + if not submap_mode_enabled: + submap_mapper.reset() + st( + "INFO", + { + "SUBMAP_MODE": { + "enabled": bool(submap_mode_enabled), + "profile_allowed": bool(_submap_profile_allowed()), + "apply_profiles": list(submap_cfg.apply_profiles), + "local_window_frames": int(submap_cfg.local_window_frames), + "local_voxel_m": float(submap_cfg.local_voxel_m), + "global_voxel_m": float(submap_cfg.global_voxel_m), + } + }, + ) + + def apply_approx_pose(payload: Any) -> None: + nonlocal approx_pose_guess_tf, approx_pose_guess_until_t + nonlocal approx_pose_guess_fail_count, approx_pose_guess_success_count + nonlocal last_localize_transform + if payload is None: + approx_pose_guess_tf = None + approx_pose_guess_until_t = 0.0 + approx_pose_guess_fail_count = 0 + approx_pose_guess_success_count = 0 + st("INFO", {"APPROX_POSE": {"set": False}}) + return + if not isinstance(payload, dict): + st("WARN", "SET_APPROX_POSE ignored: payload must be dict.") + return + try: + x = float(payload.get("x")) + y = float(payload.get("y")) + except Exception: + st("WARN", "SET_APPROX_POSE ignored: x,y are required.") + return + z = float(payload.get("z", loc_guess_default_z_m)) + yaw_deg = payload.get("yaw_deg", None) + if yaw_deg is None: + yaw_deg = _yaw_deg_from_tf(last_localize_transform) + else: + yaw_deg = float(yaw_deg) + tf = _tf_from_xyzyaw(x, y, z, yaw_deg) + approx_pose_guess_tf = np.asarray(tf, dtype=np.float64) + approx_pose_guess_until_t = float(_now() + float(loc_guess_ttl_sec)) + approx_pose_guess_fail_count = 0 + approx_pose_guess_success_count = 0 + st( + "INFO", + { + "APPROX_POSE": { + "set": True, + "x": float(x), + "y": float(y), + "z": float(z), + "yaw_deg": float(yaw_deg), + "ttl_sec": float(loc_guess_ttl_sec), + } + }, + ) + + def apply_autosave(payload: Any) -> None: + nonlocal autosave_enabled, autosave_interval_sec, autosave_base_name + if not isinstance(payload, dict): + st("WARN", "SET_AUTOSAVE ignored: payload must be dict.") + return + autosave_enabled = _as_bool(payload.get("enabled", autosave_enabled), autosave_enabled) + try: + autosave_interval_sec = max(5.0, float(payload.get("interval_sec", autosave_interval_sec))) + except Exception: + pass + base_in = str(payload.get("base_name", autosave_base_name)).strip() + if base_in: + autosave_base_name = base_in + st( + "INFO", + { + "AUTOSAVE": { + "enabled": bool(autosave_enabled), + "interval_sec": float(autosave_interval_sec), + "base_name": str(autosave_base_name), + } + }, + ) + + def apply_nav_config(payload: Any) -> None: + nonlocal nav_cfg, nav_rt_cfg, nav_runtime, nav_global_planner + if not isinstance(payload, dict): + st("WARN", "SET_NAV_CONFIG ignored: payload must be dict.") + return + candidate = nav_cfg.patched(payload) + if float(candidate.z_max_m) <= float(candidate.z_min_m): + st("WARN", "SET_NAV_CONFIG ignored: z_max_m must be greater than z_min_m.") + return + nav_cfg = candidate + nav_exporter.update_config(nav_cfg) + nav_rt_cfg = nav_rt_cfg.patched( + { + "resolution_m": float(nav_cfg.resolution_m), + "z_min_m": float(nav_cfg.z_min_m), + "z_max_m": float(nav_cfg.z_max_m), + "padding_m": float(nav_cfg.padding_m), + "inflation_radius_m": float(nav_cfg.inflation_radius_m), + } + ) + nav_runtime = LiveCostmapRuntime(nav_rt_cfg) + nav_global_planner = GlobalAStarPlanner(blocked_cost=int(nav_rt_cfg.blocked_cost)) + st( + "INFO", + { + "NAV_CONFIG": { + "min_points": int(nav_cfg.min_points), + "resolution_m": float(nav_cfg.resolution_m), + "z_min_m": float(nav_cfg.z_min_m), + "z_max_m": float(nav_cfg.z_max_m), + "inflation_radius_m": float(nav_cfg.inflation_radius_m), + "padding_m": float(nav_cfg.padding_m), + } + }, + ) + + def apply_map_quality_config(payload: Any) -> None: + nonlocal map_quality_cfg, map_quality + if not isinstance(payload, dict): + st("WARN", "SET_MAP_QUALITY ignored: payload must be dict.") + return + merged = { + "enabled": map_quality_cfg.enabled, + "near_min_range_m": map_quality_cfg.near_min_range_m, + "ray_consistency_enabled": map_quality_cfg.ray_consistency_enabled, + "ray_bin_deg": map_quality_cfg.ray_bin_deg, + "ray_elev_bin_deg": map_quality_cfg.ray_elev_bin_deg, + "ray_max_behind_m": map_quality_cfg.ray_max_behind_m, + "ray_keep_n": map_quality_cfg.ray_keep_n, + "world_z_clip_enabled": map_quality_cfg.world_z_clip_enabled, + "world_z_min_m": map_quality_cfg.world_z_min_m, + "world_z_max_m": map_quality_cfg.world_z_max_m, + "outlier_filter_enabled": map_quality_cfg.outlier_filter_enabled, + "outlier_voxel_m": map_quality_cfg.outlier_voxel_m, + "outlier_min_points": map_quality_cfg.outlier_min_points, + } + merged.update(payload) + map_quality_cfg = IndoorMapQualityConfig.from_dict(merged) + map_quality = IndoorMapQualityFilter(map_quality_cfg) + st( + "INFO", + { + "MAP_QUALITY": { + "enabled": bool(map_quality_cfg.enabled), + "near_min_range_m": float(map_quality_cfg.near_min_range_m), + "ray_consistency_enabled": bool(map_quality_cfg.ray_consistency_enabled), + "ray_bin_deg": float(map_quality_cfg.ray_bin_deg), + "ray_elev_bin_deg": float(map_quality_cfg.ray_elev_bin_deg), + "ray_max_behind_m": float(map_quality_cfg.ray_max_behind_m), + "ray_keep_n": int(map_quality_cfg.ray_keep_n), + "world_z_clip_enabled": bool(map_quality_cfg.world_z_clip_enabled), + "world_z_min_m": float(map_quality_cfg.world_z_min_m), + "world_z_max_m": float(map_quality_cfg.world_z_max_m), + "outlier_filter_enabled": bool(map_quality_cfg.outlier_filter_enabled), + "outlier_voxel_m": float(map_quality_cfg.outlier_voxel_m), + "outlier_min_points": int(map_quality_cfg.outlier_min_points), + } + }, + ) + + def apply_nav_runtime_config(payload: Any) -> None: + nonlocal nav_rt_cfg, nav_runtime, nav_global_planner, nav_path_world, nav_last_plan_t, nav_last_update_t + if not isinstance(payload, dict): + st("WARN", "SET_NAV_RUNTIME ignored: payload must be dict.") + return + nav_rt_cfg = nav_rt_cfg.patched(payload) + nav_runtime = LiveCostmapRuntime(nav_rt_cfg) + nav_global_planner = GlobalAStarPlanner(blocked_cost=int(nav_rt_cfg.blocked_cost)) + nav_path_world = [] + nav_last_plan_t = 0.0 + nav_last_update_t = 0.0 + st( + "INFO", + { + "NAV_RUNTIME": { + "enabled": bool(nav_rt_cfg.enabled), + "resolution_m": float(nav_rt_cfg.resolution_m), + "z_min_m": float(nav_rt_cfg.z_min_m), + "z_max_m": float(nav_rt_cfg.z_max_m), + "inflation_radius_m": float(nav_rt_cfg.inflation_radius_m), + "dynamic_decay_sec": float(nav_rt_cfg.dynamic_decay_sec), + "dynamic_min_hits": int(nav_rt_cfg.dynamic_min_hits), + "blocked_cost": int(nav_rt_cfg.blocked_cost), + } + }, + ) + + def apply_nav_goal(payload: Any) -> None: + nonlocal nav_goal_xy, nav_path_world, nav_last_plan_t + if not isinstance(payload, dict): + st("WARN", "SET_NAV_GOAL ignored: payload must be dict.") + return + try: + x = float(payload.get("x")) + y = float(payload.get("y")) + except Exception: + st("WARN", "SET_NAV_GOAL ignored: x/y required.") + return + nav_goal_xy = (x, y) + nav_path_world = [] + nav_last_plan_t = 0.0 + st("INFO", {"NAV_GOAL": {"x": float(x), "y": float(y)}}) + + def clear_nav_goal() -> None: + nonlocal nav_goal_xy, nav_path_world + nav_goal_xy = None + nav_path_world = [] + st("INFO", {"NAV_GOAL": None}) + + def mission_start(payload: Any) -> None: + nonlocal nav_goal_xy, nav_path_world, nav_last_plan_t + if not isinstance(payload, dict): + st("WARN", "MISSION_START ignored: payload must be dict.") + return + raw = payload.get("waypoints", []) + waypoints: List[Tuple[float, float]] = [] + if isinstance(raw, list): + for wp in raw: + if isinstance(wp, dict): + try: + waypoints.append((float(wp.get("x")), float(wp.get("y")))) + except Exception: + continue + elif isinstance(wp, (list, tuple)) and len(wp) >= 2: + try: + waypoints.append((float(wp[0]), float(wp[1]))) + except Exception: + continue + mission.start(waypoints=waypoints) + goal = mission.current_goal() + nav_goal_xy = None if goal is None else (float(goal[0]), float(goal[1])) + nav_path_world = [] + nav_last_plan_t = 0.0 + st("INFO", {"MISSION": mission.snapshot()}) + + def mission_pause() -> None: + mission.pause() + st("INFO", {"MISSION": mission.snapshot()}) + + def mission_resume() -> None: + mission.resume() + st("INFO", {"MISSION": mission.snapshot()}) + + def mission_stop() -> None: + nonlocal nav_goal_xy, nav_path_world + mission.stop() + nav_goal_xy = None + nav_path_world = [] + st("INFO", {"MISSION": mission.snapshot()}) + + def _safe_record_base(name: str) -> str: + base = (name or "").strip() or "slam_recording" + for bad in ("/", "\\", ":", "*", "?", "\"", "<", ">", "|"): + base = base.replace(bad, "_") + return base + + def record_start(payload: Any) -> None: + nonlocal recording_enabled, recording_frames, recording_poses, recording_base_name, recording_drop_count + base = recording_base_name + if isinstance(payload, dict): + b = str(payload.get("base_name", "")).strip() + if b: + base = b + recording_base_name = _safe_record_base(base) + recording_frames = [] + recording_poses = [] + recording_drop_count = 0 + recording_enabled = True + st( + "INFO", + { + "RECORDING": { + "enabled": True, + "base_name": str(recording_base_name), + "max_frames": int(recording_max_frames), + } + }, + ) + + def record_stop(payload: Any) -> None: + nonlocal recording_enabled, recording_frames, recording_poses, recording_base_name + save = True + base = recording_base_name + if isinstance(payload, dict): + save = _as_bool(payload.get("save", True), True) + b = str(payload.get("base_name", "")).strip() + if b: + base = b + recording_enabled = False + if (not save) or len(recording_frames) == 0: + st( + "INFO", + { + "RECORDING": { + "enabled": False, + "frames": int(len(recording_frames)), + "saved": False, + } + }, + ) + recording_frames = [] + recording_poses = [] + return + base = _safe_record_base(base) + stamp = time.strftime("%Y%m%d_%H%M%S", time.localtime()) + out_path = Path(map_cfg.data_folder) / f"{base}_{stamp}.npz" + try: + frames_obj = np.empty((len(recording_frames),), dtype=object) + for i, fr in enumerate(recording_frames): + frames_obj[i] = np.asarray(fr, dtype=np.float32) + if len(recording_poses) == len(recording_frames) and len(recording_poses) > 0: + poses_np = np.stack(recording_poses, axis=0).astype(np.float32) + np.savez_compressed(out_path, frames=frames_obj, poses=poses_np) + else: + np.savez_compressed(out_path, frames=frames_obj) + st( + "INFO", + { + "RECORD_SAVED": { + "path": str(out_path), + "frames": int(len(recording_frames)), + "dropped": int(recording_drop_count), + } + }, + ) + except Exception as e: + st("ERROR", f"RECORD save failed: {e}") + recording_frames = [] + recording_poses = [] + + def _queue_lag() -> int: + try: + return int(data_q.qsize()) + except Exception: + return -1 + + def _update_perf_rollup(now_t: float, stable_count: int) -> None: + nonlocal perf_input_count, perf_publish_count, perf_last_rate_t + nonlocal perf_last_cpu_wall_t, perf_last_cpu_proc_t + nonlocal perf_input_fps, perf_publish_fps, perf_cpu_pct + nonlocal perf_queue_lag, perf_stable_growth, perf_prev_stable_count, perf_prev_stable_t + + dt_rates = float(now_t - perf_last_rate_t) + if dt_rates >= 0.8: + perf_input_fps = float(perf_input_count) / max(dt_rates, 1e-6) + perf_publish_fps = float(perf_publish_count) / max(dt_rates, 1e-6) + perf_input_count = 0 + perf_publish_count = 0 + perf_last_rate_t = now_t + + dt_cpu = float(now_t - perf_last_cpu_wall_t) + if dt_cpu >= 0.8: + cpu_now = float(time.process_time()) + perf_cpu_pct = 100.0 * float(cpu_now - perf_last_cpu_proc_t) / max(dt_cpu, 1e-6) + perf_last_cpu_proc_t = cpu_now + perf_last_cpu_wall_t = now_t + + dt_growth = float(now_t - perf_prev_stable_t) + if dt_growth >= 0.8: + perf_stable_growth = float(stable_count - perf_prev_stable_count) / max(dt_growth, 1e-6) + perf_prev_stable_count = int(stable_count) + perf_prev_stable_t = now_t + + perf_queue_lag = _queue_lag() + + def maybe_autosave(now_t: float) -> None: + nonlocal last_autosave_t + if not autosave_enabled or not mapping_enabled: + return + if (now_t - float(last_autosave_t)) < float(autosave_interval_sec): + return + try: + if cleanup_enabled: + full_pts = stable.get_points() + if full_pts is not None and len(full_pts) >= int(cleanup_min_total_points): + cleaned, _ = cleanup_map_islands(full_pts) + if cleaned is not None and len(cleaned) >= int(map_cfg.min_points_to_save): + stable.set_points(cleaned) + pts = stable.get_save_points() + n = 0 if pts is None else int(len(pts)) + req = int(map_cfg.min_points_to_save) + if n < req: + last_autosave_t = now_t + return + out_path = stable.export_map(str(autosave_base_name)) + last_autosave_t = now_t + st("INFO", {"AUTOSAVED": {"path": str(out_path), "points": int(n)}}) + except Exception as e: + last_autosave_t = now_t + st("WARN", f"AUTOSAVE failed: {e}") + + # Checkpoint the submap alongside autosave + if submap_ckpt is not None and submap_mode_enabled: + submap_ckpt.save(submap_mapper) + + # ---------------- lidar frame processor ---------------- + def process_points(points: np.ndarray): + nonlocal last_pub, mapping_enabled, localize_only_enabled, worker_mode, last_key_pose, last_frame_t + nonlocal last_guard_pose, last_guard_t, guard_reject_count, guard_last_warn_t + nonlocal odom_to_map_transform, map_lock_last_t, map_lock_accept_count, map_lock_reject_count, map_lock_last_warn_t + nonlocal continuity_prev_pts, continuity_reject_count, continuity_consecutive_rejects, continuity_last_warn_t + nonlocal rotate_freeze_count, rotate_last_warn_t + nonlocal cleanup_last_t, cleanup_removed_total, cleanup_last_info + nonlocal slam_to_ref_transform, slam_to_ref_valid, last_localize_transform + nonlocal perf_input_count, perf_publish_count, perf_icp_ms + nonlocal recording_enabled, recording_frames, recording_poses, recording_drop_count + nonlocal nav_goal_xy, nav_last_update_t, nav_last_plan_t, nav_path_world + nonlocal latest_live_pts, latest_live_localize_pts + nonlocal ref_points_cache, ref_nav_points_cache, ref_nav_cache_src_n + + if points is None or len(points) == 0: + return + + perf_input_count += 1 + + pts = points.astype(np.float32, copy=True) + pts[:, 1] *= -1 + pts[:, 2] *= -1 + + # Reject invalid values and out-of-range returns before SLAM. + finite_mask = np.isfinite(pts).all(axis=1) + if not np.all(finite_mask): + pts = pts[finite_mask] + if len(pts) == 0: + return + + r2 = np.einsum("ij,ij->i", pts, pts) + min_r = float(map_quality_cfg.near_min_range_m if map_quality_cfg.enabled else 0.15) + min_r2 = min_r * min_r + max_r = float(eng_cfg.max_range) + max_r2 = max_r * max_r if max_r > 0 else np.inf + range_mask = (r2 >= min_r2) & (r2 <= max_r2) + if not np.all(range_mask): + pts = pts[range_mask] + if len(pts) < 40: + return + + # pre-downsample stride from config + stride = int(live_stride) + if stride > 1: + pts = pts[::stride] + if map_quality_cfg.enabled: + pts = map_quality.apply_sensor(pts) + if len(pts) < 40: + return + latest_live_pts = np.array(pts, dtype=np.float32, copy=True) + latest_live_localize_pts = None + + pose = None + stable_pts_display = None + stable_cols = None + fusion_info: Dict[str, Any] = {} + nav_runtime_info = None + nav_info: Dict[str, Any] = {} + safety_info: Dict[str, Any] = {} + map_lock_info: Dict[str, Any] = {"status": "skip", "ran": False} + continuity_info: Dict[str, Any] = {"status": "na"} + cleanup_info: Dict[str, Any] = dict(cleanup_last_info) if isinstance(cleanup_last_info, dict) else {"status": "na"} + rotate_freeze_active = False + submap_info: Dict[str, Any] = submap_mapper.status( + active=False, + reason="idle", + profile=str(stability_profile), + profile_allowed=_submap_profile_allowed(), + ) + ref_match_points_pub = None + ref_match_colors_pub = None + ref_match_stats: Dict[str, Any] = {} + stable_points_in_global_frame = False + mode_now = str(worker_mode) + + if mapping_enabled: + now_t = _now() + dt = 0.1 if last_frame_t <= 0 else float(np.clip(now_t - last_frame_t, 0.02, 0.25)) + last_frame_t = now_t + ts = np.linspace(0.0, dt, len(pts), dtype=np.float64) + + t_icp0 = _now() + slam.register_frame(pts, ts) + perf_icp_ms = 1000.0 * (_now() - t_icp0) + odom_pose = slam.last_pose if hasattr(slam, "last_pose") else getattr(slam, "pose", None) + if odom_pose is not None: + odom_pose, fusion_info = sensor_fusion.fuse_pose(np.asarray(odom_pose, dtype=np.float64), now=now_t) + try: + pose = np.asarray(odom_to_map_transform, dtype=np.float64) @ np.asarray(odom_pose, dtype=np.float64) + except Exception: + pose = np.asarray(odom_pose, dtype=np.float64) + + # Reject obviously unstable jumps to avoid "wall on wall" artifacts. + unstable_jump = False + trans_step_for_mode = 0.0 + rot_step_for_mode = 0.0 + rotating_in_place = False + if pose_guard_enabled and pose is not None: + pose_np = np.asarray(pose, dtype=np.float64) + if last_guard_pose is not None: + dpos = pose_np[:3, 3] - last_guard_pose[:3, 3] + trans_step = float(np.linalg.norm(dpos)) + rrel = pose_np[:3, :3] @ last_guard_pose[:3, :3].T + rot_step = float(np.degrees(np.arccos(np.clip((np.trace(rrel) - 1.0) * 0.5, -1.0, 1.0)))) + trans_step_for_mode = float(trans_step) + rot_step_for_mode = float(rot_step) + if rotate_mode_enabled and trans_step_for_mode <= float(rotate_in_place_max_trans_m) and rot_step_for_mode >= float(rotate_in_place_min_rot_deg): + rotating_in_place = True + scale = max(1.0, float(dt) / max(1e-3, float(pose_guard_ref_dt))) + max_t = float(pose_guard_max_trans) * scale + max_r = float(pose_guard_max_rot_deg) * scale + if rotating_in_place: + max_r = max(max_r, float(rotate_pose_guard_max_rot_deg) * scale) + if trans_step > max_t or rot_step > max_r: + unstable_jump = True + guard_reject_count += 1 + pose = np.array(last_guard_pose, dtype=np.float64, copy=True) + stable_pts_display = stable.get_display_points() + if (now_t - guard_last_warn_t) > 1.0: + guard_last_warn_t = now_t + st( + "WARN", + ( + "MOTION_GUARD rejected frame: " + f"dpos={trans_step:.2f}m>{max_t:.2f}m or drot={rot_step:.1f}deg>{max_r:.1f}deg " + f"(count={guard_reject_count})" + ), + ) + else: + last_guard_pose = np.array(pose_np, dtype=np.float64, copy=True) + last_guard_t = now_t + else: + last_guard_pose = np.array(pose_np, dtype=np.float64, copy=True) + last_guard_t = now_t + elif pose is not None and rotate_mode_enabled and last_key_pose is not None: + # Fallback rotate-in-place detector when pose-guard is disabled. + try: + dpos = np.asarray(pose, dtype=np.float64)[:3, 3] - np.asarray(last_key_pose, dtype=np.float64)[:3, 3] + trans_step_for_mode = float(np.linalg.norm(dpos)) + rrel = np.asarray(pose, dtype=np.float64)[:3, :3] @ np.asarray(last_key_pose, dtype=np.float64)[:3, :3].T + rot_step_for_mode = float(np.degrees(np.arccos(np.clip((np.trace(rrel) - 1.0) * 0.5, -1.0, 1.0)))) + rotating_in_place = bool( + trans_step_for_mode <= float(rotate_in_place_max_trans_m) + and rot_step_for_mode >= float(rotate_in_place_min_rot_deg) + ) + except Exception: + rotating_in_place = False + + if not unstable_jump: + map_lock_block_frame = False + # Periodic scan-to-map correction keeps the accumulated map anchored + # and reduces duplicated structures when traversing long paths. + if pose is not None: + ml_overrides = None + if rotating_in_place and rotate_mode_enabled: + ml_overrides = { + "period_sec": float(rotate_map_lock_period_sec), + "accept_fitness": float(rotate_map_lock_accept_fitness), + "accept_rmse": float(rotate_map_lock_accept_rmse), + "max_step_rotation_deg": float(rotate_map_lock_max_step_rot_deg), + } + corr_lock, map_lock_info = compute_map_lock_correction(pts, pose, now_t, overrides=ml_overrides) + if corr_lock is not None: + corr_np = np.asarray(corr_lock, dtype=np.float64) + corr_apply = np.asarray(corr_np, dtype=np.float64, copy=True) + if map_lock_apply_damping: + alpha_t = float(map_lock_apply_alpha_t) + alpha_r = float(map_lock_apply_alpha_r) + max_t_apply = float(map_lock_apply_max_trans_m) + max_r_apply = float(map_lock_apply_max_rot_deg) + if rotating_in_place and rotate_mode_enabled: + alpha_t = float(rotate_map_lock_apply_alpha_t) + alpha_r = float(rotate_map_lock_apply_alpha_r) + max_t_apply = float(rotate_map_lock_apply_max_trans_m) + max_r_apply = float(rotate_map_lock_apply_max_rot_deg) + corr_apply = _blend_rigid_tf( + np.eye(4, dtype=np.float64), + corr_apply, + alpha_t=float(alpha_t), + alpha_r=float(alpha_r), + ) + tvec = np.asarray(corr_apply[:3, 3], dtype=np.float64) + tnorm = float(np.linalg.norm(tvec)) + if tnorm > float(max_t_apply) and tnorm > 1e-9: + corr_apply[:3, 3] = tvec * (float(max_t_apply) / tnorm) + dpos_apply, drot_apply = _tf_delta(np.eye(4, dtype=np.float64), corr_apply) + if drot_apply > float(max_r_apply) and drot_apply > 1e-6: + corr_apply = _blend_rigid_tf( + np.eye(4, dtype=np.float64), + corr_apply, + alpha_t=1.0, + alpha_r=float(max_r_apply) / float(drot_apply), + ) + else: + dpos_apply, drot_apply = _tf_delta(np.eye(4, dtype=np.float64), corr_apply) + map_lock_info["applied_dpos_m"] = float(dpos_apply) + map_lock_info["applied_drot_deg"] = float(drot_apply) + map_lock_info["damped"] = bool(map_lock_apply_damping) + pose = corr_apply @ np.asarray(pose, dtype=np.float64) + loc_frames.apply_map_correction(corr_apply) + _sync_loc_vars_from_service() + if last_guard_pose is not None: + last_guard_pose = corr_apply @ np.asarray(last_guard_pose, dtype=np.float64) + if last_key_pose is not None: + last_key_pose = corr_apply @ np.asarray(last_key_pose, dtype=np.float64) + map_lock_accept_count += 1 + elif bool(map_lock_info.get("ran", False)): + map_lock_reject_count += 1 + block_on_reject_now = bool(map_lock_block_on_reject) + if rotating_in_place and rotate_mode_enabled: + block_on_reject_now = bool(rotate_map_lock_block_on_reject) + if block_on_reject_now: + map_lock_block_frame = True + # Retry map-lock quickly after a reject to recover alignment faster. + map_lock_last_t = float(now_t - map_lock_period_sec + map_lock_retry_after_sec) + if (now_t - map_lock_last_warn_t) > 1.0: + map_lock_last_warn_t = now_t + fit_r = float(map_lock_info.get("fitness", 0.0)) + rmse_r = float(map_lock_info.get("rmse", 9e9)) + dpos_r = float(map_lock_info.get("dpos_m", 0.0)) + drot_r = float(map_lock_info.get("drot_deg", 0.0)) + st( + "WARN", + ( + "MAP_LOCK rejected; skipping frame integration: " + f"fit={fit_r:.2f}, rmse={rmse_r:.2f}, " + f"dpos={dpos_r:.2f}m, drot={drot_r:.1f}deg" + ), + ) + block_on_severe_now = bool(map_lock_block_on_severe) + if rotating_in_place and rotate_mode_enabled: + block_on_severe_now = bool(rotate_map_lock_block_on_severe) + if block_on_severe_now: + fit_r = float(map_lock_info.get("fitness", 0.0)) + rmse_r = float(map_lock_info.get("rmse", 9e9)) + dpos_r = float(map_lock_info.get("dpos_m", 0.0)) + drot_r = float(map_lock_info.get("drot_deg", 0.0)) + severe_reject = bool( + fit_r <= float(map_lock_severe_fit) + or rmse_r >= float(map_lock_severe_rmse) + or dpos_r >= float(map_lock_severe_trans_m) + or drot_r >= float(map_lock_severe_rot_deg) + ) + if severe_reject: + map_lock_block_frame = True + if (now_t - map_lock_last_warn_t) > 1.0: + map_lock_last_warn_t = now_t + st( + "WARN", + ( + "MAP_LOCK severe reject; skipping frame integration: " + f"fit={fit_r:.2f}, rmse={rmse_r:.2f}, " + f"dpos={dpos_r:.2f}m, drot={drot_r:.1f}deg" + ), + ) + + # keyframe gating from config + if map_lock_block_frame: + stable_pts_display = stable.get_display_points() + elif eng_cfg.keyframe_enabled and pose is not None and not pose_delta_ok(pose, last_key_pose): + # skip filter/map update + stable_pts_display = stable.get_display_points() + else: + if pose is not None: + last_key_pose = pose + + points_world = to_world_points(pts, pose) + stable_now = stable.get_points() + stable_now_n = 0 if stable_now is None else int(len(stable_now)) + rotate_freeze_active = bool( + rotate_mode_enabled + and rotate_freeze_mapping + and rotating_in_place + and stable_now_n >= int(rotate_freeze_min_stable_pts) + ) + if rotate_freeze_active: + rotate_freeze_count += 1 + continuity_consecutive_rejects = 0 + continuity_pts_ds = voxel_downsample_np(points_world, float(continuity_voxel_m)) + continuity_prev_pts = ( + np.array(continuity_pts_ds, dtype=np.float32, copy=True) + if continuity_pts_ds is not None and len(continuity_pts_ds) > 0 + else continuity_prev_pts + ) + continuity_info = { + "status": "skip_rotate_freeze", + "trans_step_m": float(trans_step_for_mode), + "rot_step_deg": float(rot_step_for_mode), + "stable_points": int(stable_now_n), + "freeze_count": int(rotate_freeze_count), + } + stable_pts_display = stable.get_display_points() + if (now_t - rotate_last_warn_t) > 1.0: + rotate_last_warn_t = now_t + st( + "INFO", + ( + "ROTATE_FREEZE active; skipping map integration during in-place turn " + f"(stable={stable_now_n}, dpos={trans_step_for_mode:.3f}m, drot={rot_step_for_mode:.1f}deg)" + ), + ) + else: + if rotating_in_place and rotate_mode_enabled and rotate_skip_continuity: + continuity_ok = True + continuity_pts_ds = voxel_downsample_np(points_world, float(continuity_voxel_m)) + continuity_info = { + "status": "skip_rotating_in_place", + "trans_step_m": float(trans_step_for_mode), + "rot_step_deg": float(rot_step_for_mode), + } + else: + continuity_ok, continuity_info, continuity_pts_ds = check_frame_continuity(points_world) + if not continuity_ok: + continuity_reject_count += 1 + continuity_consecutive_rejects += 1 + if continuity_consecutive_rejects >= int(continuity_recover_after_rejects): + continuity_ok = True + continuity_consecutive_rejects = 0 + continuity_info = { + "status": "resync_after_rejects", + "reject_streak": int(continuity_recover_after_rejects), + "trans_step_m": float(trans_step_for_mode), + "rot_step_deg": float(rot_step_for_mode), + } + continuity_prev_pts = ( + np.array(continuity_pts_ds, dtype=np.float32, copy=True) + if continuity_pts_ds is not None and len(continuity_pts_ds) > 0 + else continuity_prev_pts + ) + st( + "WARN", + ( + "CONTINUITY auto-resync after repeated rejects; " + f"recover_after={int(continuity_recover_after_rejects)}" + ), + ) + if not continuity_ok: + stable_pts_display = stable.get_display_points() + # Trigger a quick map-lock retry after continuity rejection. + map_lock_last_t = float(now_t - map_lock_period_sec + map_lock_retry_after_sec) + if (now_t - continuity_last_warn_t) > 1.0: + continuity_last_warn_t = now_t + st( + "WARN", + ( + "CONTINUITY rejected; skipping frame integration: " + f"inlier={float(continuity_info.get('inlier_ratio', 0.0)):.2f}, " + f"med={float(continuity_info.get('median_m', 0.0)):.2f}m, " + f"p90={float(continuity_info.get('p90_m', 0.0)):.2f}m " + f"(count={int(continuity_reject_count)})" + ), + ) + else: + continuity_consecutive_rejects = 0 + continuity_prev_pts = ( + np.array(continuity_pts_ds, dtype=np.float32, copy=True) + if continuity_pts_ds is not None and len(continuity_pts_ds) > 0 + else None + ) + if map_quality_cfg.enabled: + points_world = map_quality.apply_world(points_world) + filt.update(points_world) + stable_pts = filt.get_stable_points() + stable.set_points(stable_pts) + if ( + cleanup_enabled + and stable_pts is not None + and len(stable_pts) >= int(cleanup_min_total_points) + and (now_t - float(cleanup_last_t)) >= float(cleanup_period_sec) + ): + cleanup_last_t = float(now_t) + cleaned_pts, cleanup_info = cleanup_map_islands(stable_pts) + cleanup_last_info = dict(cleanup_info) + removed_now = int(cleanup_info.get("removed", 0)) + if removed_now > 0: + cleanup_removed_total += int(removed_now) + stable.set_points(cleaned_pts) + if hasattr(filt, "seed_stable_points"): + try: + filt.seed_stable_points( + cleaned_pts, + now=now_t, + hit_count=int(runtime_filter_hit_threshold), + clear_existing=True, + ) + except Exception as _cln_exc: + st("WARN", f"[cleanup] filter re-seed after cleanup failed: {_cln_exc}") + st( + "INFO", + { + "MAP_CLEANUP": { + **cleanup_info, + "removed_total": int(cleanup_removed_total), + } + }, + ) + else: + cleanup_info = dict(cleanup_last_info) if isinstance(cleanup_last_info, dict) else {"status": "na"} + stable_pts_display = stable.get_display_points() + + # Lightweight loop closure + pose-graph optimization. + if pose is not None and loop_cfg.enabled: + loop_res = loop_backend.process_frame(pts, np.asarray(pose, dtype=np.float64)) + if loop_res.loop_detected or loop_res.optimized or ("rejected" in loop_res.info): + st( + "INFO", + { + "LOOP": { + "detected": bool(loop_res.loop_detected), + "optimized": bool(loop_res.optimized), + **loop_res.info, + } + }, + ) + if loop_res.optimized: + corr = np.asarray(loop_res.correction, dtype=np.float64) + # Apply correction to map and reseed filter for consistency. + stable_all = stable.get_points() + corrected = apply_transform_points(stable_all, corr) + if corrected is not None: + stable.set_points(corrected) + if hasattr(filt, "seed_stable_points"): + filt.seed_stable_points( + corrected, + now=_now(), + hit_count=int(filt_cfg.hits_threshold), + clear_existing=True, + ) + stable_pts_display = stable.get_display_points() + loc_frames.apply_loop_correction(corr, update_reference=True) + _sync_loc_vars_from_service() + if last_guard_pose is not None: + last_guard_pose = corr @ np.asarray(last_guard_pose, dtype=np.float64) + if last_key_pose is not None: + last_key_pose = corr @ np.asarray(last_key_pose, dtype=np.float64) + pose = corr @ np.asarray(pose, dtype=np.float64) + maybe_auto_localize() + maybe_autosave(now_t) + if submap_ckpt is not None and submap_mode_enabled: + submap_ckpt.maybe_save(submap_mapper) + elif localize_only_enabled: + now_t = _now() + dt = 0.1 if last_frame_t <= 0 else float(np.clip(now_t - last_frame_t, 0.02, 0.25)) + last_frame_t = now_t + ts = np.linspace(0.0, dt, len(pts), dtype=np.float64) + t_icp0 = _now() + slam.register_frame(pts, ts) + perf_icp_ms = 1000.0 * (_now() - t_icp0) + odom_pose = slam.last_pose if hasattr(slam, "last_pose") else getattr(slam, "pose", None) + if odom_pose is not None: + odom_pose, fusion_info = sensor_fusion.fuse_pose(np.asarray(odom_pose, dtype=np.float64), now=now_t) + pose = np.asarray(odom_pose, dtype=np.float64) + else: + pose = None + + live_for_localize = np.asarray(pts, dtype=np.float32) + if pose is not None: + live_for_localize = to_world_points(pts, pose) + + if loc_live_stack_enabled: + frame_ds = voxel_downsample_np(live_for_localize, float(loc_live_stack_voxel_m)) + if frame_ds is not None and len(frame_ds) > 0: + loc_live_stack.append(np.asarray(frame_ds, dtype=np.float32)) + if len(loc_live_stack) >= int(loc_live_stack_min_frames): + cat = np.concatenate(list(loc_live_stack), axis=0) + stack_pts = voxel_downsample_np(cat, float(loc_live_stack_voxel_m)) + if len(stack_pts) > int(loc_live_stack_max_points): + step = max(2, int(np.ceil(float(len(stack_pts)) / float(loc_live_stack_max_points)))) + stack_pts = stack_pts[::step][: int(loc_live_stack_max_points)] + if stack_pts is not None and len(stack_pts) > 0: + live_for_localize = np.asarray(stack_pts, dtype=np.float32) + + latest_live_localize_pts = np.array(live_for_localize, dtype=np.float32, copy=True) + maybe_auto_localize(src_live=live_for_localize) + loc_state_name = str(loc_state.state).upper().strip() + submap_active = bool( + submap_mode_enabled + and _submap_profile_allowed() + and slam_to_ref_valid + and loc_state_name in ("TRACKING", "DEGRADED") + and float(last_localize_confidence) >= 0.45 + ) + if submap_active: + if pose is not None and np.asarray(pose).shape == (4, 4): + pose_ref = np.asarray(slam_to_ref_transform, dtype=np.float64) @ np.asarray(pose, dtype=np.float64) + pts_ref = to_world_points(pts, pose_ref) + else: + pts_ref = apply_transform_points(pts, slam_to_ref_transform) + pose_ref = ( + np.asarray(slam_to_ref_transform, dtype=np.float64) + if np.asarray(slam_to_ref_transform).shape == (4, 4) + else None + ) + if pts_ref is not None and len(pts_ref) > 0: + submap_info = submap_mapper.integrate(pts_ref, now=now_t, pose_world=pose_ref) + stable_pts_display = submap_mapper.get_display_points() + stable_points_in_global_frame = True + else: + stable_pts_display = live_for_localize + submap_info = submap_mapper.status( + active=False, + reason="empty_live", + profile=str(stability_profile), + profile_allowed=True, + ) + else: + if (not submap_mode_enabled) or (not _submap_profile_allowed()) or (not slam_to_ref_valid): + if submap_mapper.has_points and ((not _submap_profile_allowed()) or (not submap_mode_enabled) or (not slam_to_ref_valid)): + submap_mapper.reset() + stable_pts_display = live_for_localize + submap_info = submap_mapper.status( + active=False, + reason="disabled_or_not_localized", + profile=str(stability_profile), + profile_allowed=_submap_profile_allowed(), + ) + # Keep pose in odom frame; publish path maps it to reference when localized. + else: + stable_pts_display = stable.get_display_points() + + if recording_enabled: + if len(recording_frames) < int(recording_max_frames): + recording_frames.append(np.array(pts, dtype=np.float32, copy=True)) + if pose is not None and np.asarray(pose).shape == (4, 4): + recording_poses.append(np.array(pose, dtype=np.float32, copy=True)) + else: + recording_drop_count += 1 + recording_enabled = False + st( + "WARN", + ( + "RECORDING auto-stopped: reached max frames " + f"({int(recording_max_frames)})." + ), + ) + + # publish throttling + t = _now() + if pub_hz > 0: + if (t - last_pub) < (1.0 / pub_hz): + return + last_pub = t + + stable_pts_pub = stable_pts_display + pose_pub = pose + if slam_to_ref_valid and not stable_points_in_global_frame: + stable_pts_pub = apply_transform_points(stable_pts_display, slam_to_ref_transform) + if pose is not None: + try: + pose_pub = slam_to_ref_transform @ np.asarray(pose, dtype=np.float64) + except Exception: + pose_pub = pose + + if stable_pts_pub is not None and len(stable_pts_pub) > 0: + if localize_only_enabled: + # Blue live overlay for map localization workflows. + stable_cols = np.zeros((len(stable_pts_pub), 4), dtype=np.float32) + stable_cols[:, 0] = 0.18 + stable_cols[:, 1] = 0.58 + stable_cols[:, 2] = 1.00 + stable_cols[:, 3] = 0.72 + else: + alpha_live = 0.90 + stable_cols = height_colors_fast(stable_pts_pub, alpha=float(alpha_live)) + + live_world_for_nav = None + if localize_only_enabled: + if slam_to_ref_valid: + if pose is not None and np.asarray(pose).shape == (4, 4): + try: + pose_ref_live = np.asarray(slam_to_ref_transform, dtype=np.float64) @ np.asarray(pose, dtype=np.float64) + live_world_for_nav = to_world_points(pts, pose_ref_live) + except Exception: + live_world_for_nav = apply_transform_points(pts, slam_to_ref_transform) + else: + live_world_for_nav = apply_transform_points(pts, slam_to_ref_transform) + else: + if pose_pub is not None: + live_world_for_nav = to_world_points(pts, np.asarray(pose_pub, dtype=np.float64)) + elif slam_to_ref_valid: + live_world_for_nav = apply_transform_points(pts, slam_to_ref_transform) + + if localize_only_enabled: + pose_ref_for_vis = None + if slam_to_ref_valid and pose_pub is not None and np.asarray(pose_pub).shape == (4, 4): + pose_ref_for_vis = np.asarray(pose_pub, dtype=np.float64) + elif slam_to_ref_valid: + pose_ref_for_vis = np.asarray(slam_to_ref_transform, dtype=np.float64) + ref_match_points_pub, ref_match_colors_pub, ref_match_stats = _build_ref_match_visual( + live_world_for_nav, + pose_ref_for_vis, + _now(), + ) + else: + ref_match_stats = {"enabled": bool(loc_vis_enabled), "updated": False, "reason": "not_localize_mode"} + + nav_static_points = stable_pts_pub + if localize_only_enabled and ref_points_cache is not None and len(ref_points_cache) > 0: + try: + src_n = int(len(ref_points_cache)) + if ref_nav_points_cache is None or int(ref_nav_cache_src_n) != src_n: + nav_voxel = max(0.08, float(nav_rt_cfg.resolution_m) * 1.25) + nav_pts = voxel_downsample_np(np.asarray(ref_points_cache, dtype=np.float32), float(nav_voxel)) + if nav_pts is None: + nav_pts = np.zeros((0, 3), dtype=np.float32) + if len(nav_pts) > 300000: + step = max(2, int(np.ceil(float(len(nav_pts)) / 300000.0))) + nav_pts = nav_pts[::step] + ref_nav_points_cache = np.asarray(nav_pts, dtype=np.float32) + ref_nav_cache_src_n = src_n + if ref_nav_points_cache is not None and len(ref_nav_points_cache) > 0: + nav_static_points = ref_nav_points_cache + except Exception as _nav_exc: + st("WARN", f"[nav] static point cache update failed: {_nav_exc}") + + if (t - nav_last_update_t) >= 0.10: + nav_runtime_info = nav_runtime.update(nav_static_points, live_world_for_nav, t) + nav_last_update_t = t + + mission_snapshot = mission.snapshot() + if pose_pub is not None and np.asarray(pose_pub).shape == (4, 4): + mission_snapshot = mission.update_pose(float(pose_pub[0, 3]), float(pose_pub[1, 3])) + goal_from_mission = mission.current_goal() + if goal_from_mission is not None: + nav_goal_xy = (float(goal_from_mission[0]), float(goal_from_mission[1])) + elif mission_snapshot.get("completed", False): + nav_goal_xy = None + nav_path_world = [] + + if nav_goal_xy is not None and pose_pub is not None and nav_runtime.grid is not None: + if (t - nav_last_plan_t) >= float(nav_plan_period_sec) or len(nav_path_world) <= 2: + grid = nav_runtime.grid + try: + nav_path_world = nav_global_planner.plan( + np.asarray(grid["costmap"], dtype=np.uint8), + np.asarray(grid["origin_xy"], dtype=np.float32), + float(grid["resolution_m"]), + (float(pose_pub[0, 3]), float(pose_pub[1, 3])), + (float(nav_goal_xy[0]), float(nav_goal_xy[1])), + ) + except Exception as _astar_exc: + st("WARN", f"[nav] A* planning failed: {_astar_exc}") + nav_path_world = [] + nav_last_plan_t = t + elif mission.current_goal() is None: + nav_path_world = [] + + nav_cmd = {"linear_mps": 0.0, "angular_rps": 0.0, "goal_reached": False, "blocked": False} + if pose_pub is not None and len(nav_path_world) > 0: + nav_cmd = nav_local_planner.compute_command(np.asarray(pose_pub, dtype=np.float64), nav_path_world, nav_runtime) + if bool(nav_cmd.get("goal_reached", False)): + mission_snapshot = mission.update_pose(float(pose_pub[0, 3]), float(pose_pub[1, 3])) + next_goal = mission.current_goal() + nav_goal_xy = None if next_goal is None else (float(next_goal[0]), float(next_goal[1])) + nav_path_world = [] + + safety_info = safety.evaluate(t, pose_pub, str(loc_state.state), nav_runtime, nav_cmd) + + grid_now = nav_runtime.grid + nav_info = { + "mode": mode_now, + "goal": [float(nav_goal_xy[0]), float(nav_goal_xy[1])] if nav_goal_xy is not None else None, + "path_points": int(len(nav_path_world)), + "path_preview": ( + [[float(x), float(y)] for (x, y) in nav_path_world[:: max(1, len(nav_path_world) // 60)]] + if len(nav_path_world) > 0 + else [] + ), + "cmd": dict(safety_info.get("cmd", nav_cmd)), + "mission": mission_snapshot, + "costmap": ( + { + "shape": [int(grid_now["shape_hw"][0]), int(grid_now["shape_hw"][1])], + "resolution_m": float(grid_now["resolution_m"]), + "origin_xy": [float(grid_now["origin_xy"][0]), float(grid_now["origin_xy"][1])], + "static_cells": int(grid_now["static_cells"]), + "dynamic_cells": int(grid_now["dynamic_cells"]), + "inflated_cells": int(grid_now["inflated_cells"]), + } + if isinstance(grid_now, dict) + else nav_runtime_info + ), + } + + stable_count = 0 if stable_pts_pub is None else int(len(stable_pts_pub)) + _update_perf_rollup(t, stable_count) + perf_payload = { + "input_fps": float(perf_input_fps), + "publish_fps": float(perf_publish_fps), + "icp_ms": float(perf_icp_ms), + "queue_lag": int(perf_queue_lag), + "stable_growth_per_sec": float(perf_stable_growth), + "cpu_percent": float(perf_cpu_pct), + } + + payload = { + "stable_points": stable_pts_pub, + "stable_colors": stable_cols, + "pose": pose_pub, + "filter": filt.stats() if hasattr(filt, "stats") else {}, + "workflow_profile": str(stability_profile), + "map_lock": { + "enabled": bool(map_lock_enabled), + "accept_count": int(map_lock_accept_count), + "reject_count": int(map_lock_reject_count), + "rotating_in_place": bool(rotating_in_place) if mapping_enabled else False, + "freeze_active": bool(rotate_freeze_active) if mapping_enabled else False, + "freeze_count": int(rotate_freeze_count), + "last": dict(map_lock_info) if isinstance(map_lock_info, dict) else {}, + }, + "continuity": { + "enabled": bool(continuity_enabled), + "reject_count": int(continuity_reject_count), + "reject_streak": int(continuity_consecutive_rejects), + "recover_after_rejects": int(continuity_recover_after_rejects), + "last": dict(continuity_info) if isinstance(continuity_info, dict) else {}, + }, + "submap": dict(submap_info) if isinstance(submap_info, dict) else {}, + "ref_match_points": ref_match_points_pub, + "ref_match_colors": ref_match_colors_pub, + "ref_match": dict(ref_match_stats) if isinstance(ref_match_stats, dict) else {}, + "cleanup": { + "enabled": bool(cleanup_enabled), + "removed_total": int(cleanup_removed_total), + "last": dict(cleanup_info) if isinstance(cleanup_info, dict) else {}, + }, + "localized": bool(slam_to_ref_valid), + "loc_state": str(loc_state.state), + "loc_confidence": float(last_localize_confidence), + "fusion": fusion_info, + "perf": perf_payload, + "mode": str(worker_mode), + "nav": nav_info, + "safety": safety_info, + "recording": { + "enabled": bool(recording_enabled), + "frames": int(len(recording_frames)), + "dropped": int(recording_drop_count), + }, + } + perf_publish_count += 1 + _safe_put(data_q, ("FRAME", payload), keep_latest=bool(run_cfg.frame_keep_latest)) + + # ---------------- lidar callback (lightweight) ---------------- + # Keep SDK callback minimal: enqueue latest frame and process in worker loop. + # ---------------- IMU orientation integrator ---------------- + # Complementary filter: gyro integrates at full 200 Hz; accelerometer + # corrects roll/pitch drift at a low gain (alpha ≈ 0.97). + # Yaw is gyro-only (no magnetometer) — drifts slowly, but far less than + # KISS-ICP yaw on fast rotations. + # Only used when fusion.enabled = true in config. + + class _ImuIntegrator: + __slots__ = ("R", "last_t", "alpha", "last_pose_translation") + + def __init__(self, alpha: float = 0.97) -> None: + self.R = np.eye(3, dtype=np.float64) + self.last_t: float = 0.0 + self.alpha = float(np.clip(alpha, 0.80, 0.999)) + self.last_pose_translation = np.zeros(3, dtype=np.float64) + + def reset(self) -> None: + self.R = np.eye(3, dtype=np.float64) + self.last_t = 0.0 + self.last_pose_translation[:] = 0.0 + + def update(self, gyro: np.ndarray, acc: np.ndarray, t: float) -> np.ndarray: + """Returns a 4×4 pose. Translation comes from last known LiDAR pose.""" + if self.last_t <= 0.0: + self.last_t = t + return np.eye(4, dtype=np.float64) + dt = float(t - self.last_t) + if dt <= 0.0 or dt > 0.5: + self.last_t = t + return np.eye(4, dtype=np.float64) + self.last_t = t + + # 1 — Gyro integration via Rodrigues + omega = np.asarray(gyro, dtype=np.float64) + angle = float(np.linalg.norm(omega)) * dt + if angle > 1e-9: + axis = omega / (float(np.linalg.norm(omega)) + 1e-15) + K = np.array([[0.0, -axis[2], axis[1]], + [axis[2], 0.0, -axis[0]], + [-axis[1], axis[0], 0.0]], dtype=np.float64) + dR = np.eye(3, dtype=np.float64) + np.sin(angle) * K + (1.0 - np.cos(angle)) * (K @ K) + self.R = self.R @ dR + + # 2 — Accelerometer correction for roll/pitch (complementary filter) + a = np.asarray(acc, dtype=np.float64) + a_norm = float(np.linalg.norm(a)) + if 7.5 < a_norm < 12.5: # only trust near 1 g; reject dynamic spikes + a_unit = a / a_norm + g_in_sensor = self.R.T @ np.array([0.0, 0.0, -1.0]) # expected gravity direction + cross = np.cross(a_unit, g_in_sensor) + cross_norm = float(np.linalg.norm(cross)) + if cross_norm > 1e-6: + corr_angle = float(np.arcsin(np.clip(cross_norm, -1.0, 1.0))) * (1.0 - self.alpha) + axis_c = cross / cross_norm + Kc = np.array([[0.0, -axis_c[2], axis_c[1]], + [axis_c[2], 0.0, -axis_c[0]], + [-axis_c[1], axis_c[0], 0.0]], dtype=np.float64) + dR_c = np.eye(3, dtype=np.float64) + np.sin(corr_angle) * Kc + (1.0 - np.cos(corr_angle)) * (Kc @ Kc) + self.R = dR_c @ self.R + + # Re-orthogonalise to prevent numerical drift + try: + U, _, Vt = np.linalg.svd(self.R, full_matrices=False) + self.R = U @ Vt + except np.linalg.LinAlgError: + self.R = np.eye(3, dtype=np.float64) + + T = np.eye(4, dtype=np.float64) + T[:3, :3] = self.R + T[:3, 3] = self.last_pose_translation # inject LiDAR translation so fusion doesn't shift position + return T + + _imu_integrator = _ImuIntegrator(alpha=float( + ((full_cfg.get("fusion", {}) or {}).get("imu_complementary_alpha", 0.97)) + )) + + def on_imu(gyro: np.ndarray, acc: np.ndarray, timestamp: float) -> None: + if not fusion_cfg.enabled: + return + try: + # Keep the integrator's translation in sync with the latest LiDAR pose + with state_lock: + cur_tf = np.array(odom_to_map_transform, dtype=np.float64, copy=True) + _imu_integrator.last_pose_translation = cur_tf[:3, 3].copy() + imu_pose = _imu_integrator.update(gyro, acc, timestamp) + sensor_fusion.update_prior( + sensor="imu", + pose=imu_pose, + confidence=0.85, + timestamp=timestamp, + ) + except Exception as _imu_exc: + pass # IMU is advisory; never let it crash the worker + + # This avoids running heavy ICP/Open3D inside the Livox SDK thread. + def on_points(points: np.ndarray): + if points is None or len(points) == 0: + return + try: + pts = np.asarray(points, dtype=np.float32) + if pts.ndim != 2 or pts.shape[1] != 3: + return + except Exception: + return + _safe_put(raw_points_q, np.array(pts, dtype=np.float32, copy=True), keep_latest=True) + + # ---------------- commands ---------------- + def do_connect(): + nonlocal lidar, connected + if connected: + st("INFO", "Already connected.") + return + + cfg_path = Path(eng_cfg.config_file) + if not cfg_path.exists(): + st("ERROR", f"Config missing: {cfg_path}") + return + + try: + lidar = Livox2( + str(cfg_path), + host_ip=str(eng_cfg.host_ip), + debug=bool(livox_debug_enabled), + print_every_n_frames=int(livox_print_every_n), + ) + lidar.handle_points = on_points + lidar.handle_imu = on_imu + connected = True + st("INFO", f"CONNECTED to Livox (host_ip={eng_cfg.host_ip})") + except Exception as e: + connected = False + err = str(e) + up = err.upper() + if ("SDKINIT" in up) or ("BIND" in up) or ("SOCKET" in up) or ("ADDRESS ALREADY IN USE" in up): + err = ( + f"{err}\n" + "Hint: likely UDP port conflict or wrong host IP.\n" + "1) Close other Livox/SLAM apps using ports 56101-56501.\n" + "2) Ensure GUI host IP matches this PC NIC on LiDAR network.\n" + "3) Verify mid360_config.json host_net_info *_ip and *_port values." + ) + st("ERROR", f"Connect failed: {err}") + + def do_start(): + nonlocal mapping_enabled, localize_only_enabled, worker_mode + nonlocal last_guard_pose, last_guard_t, guard_reject_count + nonlocal continuity_prev_pts, continuity_reject_count, continuity_consecutive_rejects, continuity_last_warn_t + nonlocal rotate_freeze_count, rotate_last_warn_t + nonlocal cleanup_last_t, cleanup_last_info + nonlocal last_frame_t, loc_live_stack, latest_live_localize_pts + if not connected: + st("ERROR", "START denied: not connected.") + return + if self_check_report.get("errors"): + st("ERROR", "START denied: startup self-check has errors. Fix config paths first.") + return + mapping_enabled = True + localize_only_enabled = False + last_frame_t = 0.0 + loc_live_stack.clear() + latest_live_localize_pts = None + last_guard_pose = None + last_guard_t = 0.0 + guard_reject_count = 0 + continuity_prev_pts = None + continuity_reject_count = 0 + continuity_consecutive_rejects = 0 + continuity_last_warn_t = 0.0 + rotate_freeze_count = 0 + rotate_last_warn_t = 0.0 + cleanup_last_t = 0.0 + cleanup_last_info = {"status": "na"} + if not _submap_profile_allowed(): + submap_mapper.reset() + worker_mode = "MAPPING" + st("INFO", "MAPPING STARTED.") + push_worker_mode() + + def do_start_localize_only(): + nonlocal mapping_enabled, localize_only_enabled, worker_mode, last_localize_t, last_localize_confidence + nonlocal last_key_pose, last_guard_pose, last_guard_t, guard_reject_count, last_frame_t + nonlocal odom_to_map_transform, continuity_prev_pts, continuity_consecutive_rejects + nonlocal continuity_reject_count, rotate_freeze_count, loc_live_stack + nonlocal slam_to_ref_valid, latest_live_localize_pts + if not connected: + st("ERROR", "LOCALIZE_ONLY denied: not connected.") + return + if not ref_map_path: + st("ERROR", "LOCALIZE_ONLY denied: load reference map first.") + return + # Start localize-only from a fresh odometry frame to avoid stale-map offsets. + init_slam_stack() + mapping_enabled = False + localize_only_enabled = True + last_frame_t = 0.0 + loc_frames.reset() + _sync_loc_vars_from_service() + last_key_pose = None + last_guard_pose = None + last_guard_t = 0.0 + guard_reject_count = 0 + continuity_prev_pts = None + continuity_consecutive_rejects = 0 + continuity_reject_count = 0 + rotate_freeze_count = 0 + loc_live_stack.clear() + latest_live_localize_pts = None + submap_mapper.reset() + worker_mode = "LOCALIZE_ONLY" + last_localize_t = 0.0 + st("INFO", "LOCALIZE_ONLY STARTED.") + push_worker_mode() + + def do_pause(): + nonlocal mapping_enabled, localize_only_enabled, worker_mode + nonlocal loc_live_stack, latest_live_localize_pts + mapping_enabled = False + localize_only_enabled = False + loc_live_stack.clear() + latest_live_localize_pts = None + worker_mode = "PAUSED" + st("INFO", "MAPPING PAUSED.") + push_worker_mode() + + def do_stop(): + nonlocal mapping_enabled, localize_only_enabled, worker_mode + nonlocal loc_live_stack, latest_live_localize_pts + mapping_enabled = False + localize_only_enabled = False + loc_live_stack.clear() + latest_live_localize_pts = None + worker_mode = "STOPPED" + st("INFO", "MAPPING STOPPED.") + push_worker_mode() + + def do_export(filename_base: str): + try: + # Final cleanup pass: flush any stray islands that arrived since the last + # periodic cleanup (which runs every cleanup_period_sec seconds). + if cleanup_enabled: + full_pts = stable.get_points() + if full_pts is not None and len(full_pts) >= int(cleanup_min_total_points): + cleaned, _ = cleanup_map_islands(full_pts) + if cleaned is not None and len(cleaned) >= int(map_cfg.min_points_to_save): + stable.set_points(cleaned) + pts = stable.get_save_points() + n = 0 if pts is None else len(pts) + req = int(map_cfg.min_points_to_save) + if pts is None or n < req: + st("ERROR", f"EXPORT refused: stable points {n}/{req}. Keep mapping and try again.") + return + out_path = stable.export_map(filename_base) + st("INFO", f"SAVED: {out_path}") + except Exception as e: + st("ERROR", f"EXPORT failed: {e}") + + def do_export_nav(filename_base: str): + try: + pts = stable.get_save_points() + n_total = 0 if pts is None else len(pts) + req = int(nav_cfg.min_points) + if pts is None or n_total < req: + st("ERROR", f"EXPORT_NAV refused: stable points {n_total}/{req}.") + return + n_band = int(nav_exporter.count_nav_points(pts)) + if n_band < req: + st( + "ERROR", + ( + "EXPORT_NAV refused: nav-band points " + f"{n_band}/{req} in z[{float(nav_cfg.z_min_m):.2f}, {float(nav_cfg.z_max_m):.2f}] m." + ), + ) + return + result = nav_exporter.export(filename_base, pts) + st("INFO", {"NAV_EXPORTED": result}) + except Exception as e: + st("ERROR", f"EXPORT_NAV failed: {e}") + + def do_load_ref(path_value: str): + nonlocal ref_map_path, ref_pcd_cache, ref_points_cache, ref_nav_points_cache, ref_nav_cache_src_n, ref_cache_path + nonlocal last_localize_t, last_localize_points, last_localize_transform, last_localize_confidence + nonlocal slam_to_ref_transform, slam_to_ref_valid + nonlocal approx_pose_guess_tf, approx_pose_guess_until_t + nonlocal approx_pose_guess_fail_count, approx_pose_guess_success_count + nonlocal ref_match_points_cache, ref_match_confirm_hits, ref_match_last_pub_t, ref_match_last_stats + nonlocal loc_live_stack, last_frame_t, latest_live_localize_pts + ref_map_path = str(path_value) + ref_pcd_cache = None + ref_points_cache = None + ref_nav_points_cache = None + ref_nav_cache_src_n = 0 + ref_cache_path = None + ref_match_points_cache = None + ref_match_confirm_hits = None + ref_match_last_pub_t = 0.0 + ref_match_last_stats = {} + place_recog.reset() + last_frame_t = 0.0 + loc_live_stack.clear() + latest_live_localize_pts = None + submap_mapper.reset() + last_localize_t = 0.0 + last_localize_points = 0 + loc_frames.reset_reference() + _sync_loc_vars_from_service() + approx_pose_guess_tf = None + approx_pose_guess_until_t = 0.0 + approx_pose_guess_fail_count = 0 + approx_pose_guess_success_count = 0 + loc_state.reset() + st("INFO", f"REF loaded: {Path(ref_map_path).name}") + st("INFO", {"PLACE_RECOG": place_recog.snapshot()}) + prior_tf = session_store.get_transform(ref_map_path) + if prior_tf is not None: + loc_frames.last_alignment = np.array(prior_tf, dtype=np.float64, copy=True) + _sync_loc_vars_from_service() + st("INFO", {"SESSION": {"prior_loaded": True, "ref": Path(ref_map_path).name}}) + # In map-localization workflows we wait for fresh live scans after START, + # instead of trying to localize immediately from stale/non-live points. + if mapping_enabled and (not localize_only_enabled): + do_localize(force=False, source="ON_LOAD") + push_loc_state() + + def do_manual_localize(): + src_live = latest_live_localize_pts if localize_only_enabled else None + if localize_only_enabled and (src_live is None or int(len(src_live)) < 80): + st("INFO", "LOCALIZE: waiting for live scan...") + return + do_localize(force=True, source="MANUAL", src_override=src_live) + + def do_clear_ref(): + nonlocal ref_map_path, ref_pcd_cache, ref_points_cache, ref_nav_points_cache, ref_nav_cache_src_n, ref_cache_path + nonlocal last_localize_t, last_localize_points, last_localize_transform, last_localize_confidence + nonlocal slam_to_ref_transform, slam_to_ref_valid + nonlocal localize_only_enabled, worker_mode + nonlocal approx_pose_guess_tf, approx_pose_guess_until_t + nonlocal approx_pose_guess_fail_count, approx_pose_guess_success_count + nonlocal ref_match_points_cache, ref_match_confirm_hits, ref_match_last_pub_t, ref_match_last_stats + nonlocal loc_live_stack, last_frame_t, latest_live_localize_pts + ref_map_path = None + ref_pcd_cache = None + ref_points_cache = None + ref_nav_points_cache = None + ref_nav_cache_src_n = 0 + ref_cache_path = None + ref_match_points_cache = None + ref_match_confirm_hits = None + ref_match_last_pub_t = 0.0 + ref_match_last_stats = {} + place_recog.reset() + last_frame_t = 0.0 + loc_live_stack.clear() + latest_live_localize_pts = None + submap_mapper.reset() + last_localize_t = 0.0 + last_localize_points = 0 + loc_frames.reset_reference() + _sync_loc_vars_from_service() + approx_pose_guess_tf = None + approx_pose_guess_until_t = 0.0 + approx_pose_guess_fail_count = 0 + approx_pose_guess_success_count = 0 + localize_only_enabled = False + if not mapping_enabled: + worker_mode = "IDLE" + loc_state.reset() + st("INFO", "REF cleared.") + push_worker_mode() + push_loc_state() + + st("INFO", "Worker ready. CONNECT then START.") + st("INFO", {"SELF_CHECK": self_check_report, "source": "worker"}) + apply_density_mode("MEDIUM") + apply_stability_profile("BALANCED") + apply_min_stable_points(map_cfg.min_points_to_save) + apply_loop_closure_mode(loop_cfg.enabled) + apply_loc_state_machine_mode(loc_state_cfg.enabled) + apply_submap_mode( + { + "enabled": submap_mode_enabled, + "local_window_frames": submap_cfg.local_window_frames, + "local_voxel_m": submap_cfg.local_voxel_m, + "global_voxel_m": submap_cfg.global_voxel_m, + "merge_period_sec": submap_cfg.merge_period_sec, + "merge_min_translation_m": submap_cfg.merge_min_translation_m, + "merge_min_rotation_deg": submap_cfg.merge_min_rotation_deg, + "max_global_points": submap_cfg.max_global_points, + "display_max_points": submap_cfg.display_max_points, + "apply_profiles": list(submap_cfg.apply_profiles), + "keep_points": False, + } + ) + apply_autosave( + { + "enabled": autosave_enabled, + "interval_sec": autosave_interval_sec, + "base_name": autosave_base_name, + } + ) + apply_nav_config( + { + "min_points": nav_cfg.min_points, + "resolution_m": nav_cfg.resolution_m, + "z_min_m": nav_cfg.z_min_m, + "z_max_m": nav_cfg.z_max_m, + "inflation_radius_m": nav_cfg.inflation_radius_m, + "padding_m": nav_cfg.padding_m, + } + ) + apply_map_quality_config( + { + "enabled": map_quality_cfg.enabled, + "near_min_range_m": map_quality_cfg.near_min_range_m, + "ray_consistency_enabled": map_quality_cfg.ray_consistency_enabled, + "ray_bin_deg": map_quality_cfg.ray_bin_deg, + "ray_elev_bin_deg": map_quality_cfg.ray_elev_bin_deg, + "ray_max_behind_m": map_quality_cfg.ray_max_behind_m, + "ray_keep_n": map_quality_cfg.ray_keep_n, + "world_z_clip_enabled": map_quality_cfg.world_z_clip_enabled, + "world_z_min_m": map_quality_cfg.world_z_min_m, + "world_z_max_m": map_quality_cfg.world_z_max_m, + "outlier_filter_enabled": map_quality_cfg.outlier_filter_enabled, + "outlier_voxel_m": map_quality_cfg.outlier_voxel_m, + "outlier_min_points": map_quality_cfg.outlier_min_points, + } + ) + apply_nav_runtime_config( + { + "enabled": nav_rt_cfg.enabled, + "resolution_m": nav_rt_cfg.resolution_m, + "z_min_m": nav_rt_cfg.z_min_m, + "z_max_m": nav_rt_cfg.z_max_m, + "padding_m": nav_rt_cfg.padding_m, + "inflation_radius_m": nav_rt_cfg.inflation_radius_m, + "dynamic_decay_sec": nav_rt_cfg.dynamic_decay_sec, + "dynamic_min_hits": nav_rt_cfg.dynamic_min_hits, + "blocked_cost": nav_rt_cfg.blocked_cost, + } + ) + st("INFO", {"MISSION": mission.snapshot()}) + push_loc_state() + push_worker_mode() + + while running: + try: + cmd = None + try: + cmd = cmd_q.get(timeout=0.02) + except queue.Empty: + cmd = None + + if cmd: + if isinstance(cmd, tuple): + name = str(cmd[0]).upper().strip() + payload = cmd[1] if len(cmd) > 1 else None + else: + name = str(cmd).upper().strip() + payload = None + + if name == "CONNECT": + _run_locked(do_connect) + elif name == "START": + _run_locked(do_start) + elif name == "START_LOCALIZE_ONLY": + _run_locked(do_start_localize_only) + elif name == "STOP_LOCALIZE_ONLY": + _run_locked(do_stop) + elif name == "PAUSE": + _run_locked(do_pause) + elif name == "STOP": + _run_locked(do_stop) + elif name == "RESET": + _run_locked(reset_all) + elif name == "EXPORT": + _run_locked(do_export, str(payload or "map_robot")) + elif name == "EXPORT_NAV": + _run_locked(do_export_nav, str(payload or "map_robot")) + elif name == "LOAD_REF": + _run_locked(do_load_ref, str(payload)) + elif name == "LOCALIZE": + _run_locked(do_manual_localize) + elif name == "CLEAR_REF": + _run_locked(do_clear_ref) + elif name == "SET_DENSITY": + _run_locked(apply_density_mode, str(payload or "MEDIUM")) + elif name == "SET_MIN_STABLE_POINTS": + _run_locked(apply_min_stable_points, payload) + elif name == "SET_LOOP_CLOSURE": + _run_locked(apply_loop_closure_mode, payload) + elif name == "SET_LOC_STATE_MACHINE": + _run_locked(apply_loc_state_machine_mode, payload) + elif name == "SET_SUBMAP_MODE": + _run_locked(apply_submap_mode, payload) + elif name == "SET_APPROX_POSE": + _run_locked(apply_approx_pose, payload if isinstance(payload, dict) else None) + elif name == "CLEAR_APPROX_POSE": + _run_locked(apply_approx_pose, None) + elif name == "SET_AUTOSAVE": + _run_locked(apply_autosave, payload if isinstance(payload, dict) else {}) + elif name == "SET_NAV_CONFIG": + _run_locked(apply_nav_config, payload if isinstance(payload, dict) else {}) + elif name == "SET_NAV_RUNTIME": + _run_locked(apply_nav_runtime_config, payload if isinstance(payload, dict) else {}) + elif name == "SET_NAV_GOAL": + _run_locked(apply_nav_goal, payload if isinstance(payload, dict) else {}) + elif name == "CLEAR_NAV_GOAL": + _run_locked(clear_nav_goal) + elif name == "SET_MAP_QUALITY": + _run_locked(apply_map_quality_config, payload if isinstance(payload, dict) else {}) + elif name == "SET_FILTER_TUNING": + _run_locked(apply_filter_tuning, payload if isinstance(payload, dict) else {}) + elif name == "SET_STABILITY_PROFILE": + _run_locked(apply_stability_profile, payload) + elif name == "RECORD_START": + _run_locked(record_start, payload if isinstance(payload, dict) else {}) + elif name == "RECORD_STOP": + _run_locked(record_stop, payload if isinstance(payload, dict) else {}) + elif name == "MISSION_START": + _run_locked(mission_start, payload if isinstance(payload, dict) else {}) + elif name == "MISSION_PAUSE": + _run_locked(mission_pause) + elif name == "MISSION_RESUME": + _run_locked(mission_resume) + elif name == "MISSION_STOP": + _run_locked(mission_stop) + elif name == "SENSOR_PRIOR": + if isinstance(payload, dict): + def _apply_sensor_prior(): + ok = sensor_fusion.update_prior( + sensor=str(payload.get("sensor", "unknown")), + pose=payload.get("pose"), + confidence=float(payload.get("confidence", 1.0)), + timestamp=float(payload.get("timestamp", _now())), + ) + if not ok: + st("WARN", "SENSOR_PRIOR ignored: invalid payload.") + _run_locked(_apply_sensor_prior) + else: + st("WARN", "SENSOR_PRIOR ignored: payload must be dict.") + elif name == "SHUTDOWN": + running = False + else: + st("WARN", f"Unknown command: {name}") + + latest_pts = None + try: + while True: + latest_pts = raw_points_q.get_nowait() + except queue.Empty: + pass + + if latest_pts is not None: + _run_locked(process_points, latest_pts) + + except KeyboardInterrupt: + break + except Exception as e: + if diagnostics is not None: + diagnostics.log_exception("Worker loop error", e, _worker_diag_state()) + st("ERROR", f"Loop error: {e}\n{traceback.format_exc()}") + + try: + if lidar is not None and hasattr(lidar, "shutdown"): + lidar.shutdown() + except Exception: + pass + + try: + if recording_enabled and len(recording_frames) > 0: + record_stop({"save": True, "base_name": recording_base_name}) + except Exception: + pass + + try: + if diagnostics is not None: + diagnostics.close() + except Exception: + pass + + st("INFO", "Worker shutdown.") diff --git a/Lidar/__init__.py b/Lidar/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/Lidar/livox2_python.py b/Lidar/livox2_python.py new file mode 100644 index 0000000..9ab8d3b --- /dev/null +++ b/Lidar/livox2_python.py @@ -0,0 +1,354 @@ +"""Wrapper for Livox-SDK **2** (push-mode, no broadcast). + +Tested against Livox-SDK2 1.2.x – build it first: + + git clone https://github.com/Livox-SDK/Livox-SDK2.git + cd Livox-SDK2 && mkdir build && cd build + cmake .. -DCMAKE_BUILD_TYPE=Release && make -j$(nproc) + sudo make install # installs liblivox_lidar_sdk.so → /usr/local/lib + +Create a JSON config (see livox_lidar_quick_start/mid360_config.json) that +points the LiDAR to *your* host-IP (192.168.123.222) and save it e.g. +as ``mid360_config.json`` in this repo. Pass that path to ``Livox2``. +""" + +from __future__ import annotations + +import ctypes as _C +import json +import os +import sys +import threading +import time +from ctypes import ( + POINTER, + c_char_p, + c_uint8, + c_uint16, + c_uint32, + c_float, + c_bool, +) +from pathlib import Path +from typing import Optional + +import numpy as np + +# ---------------- dynamic library ------------------------------------------------ + + +def _load_lib(): + for name in ( + "liblivox_lidar_sdk_shared.so", + "liblivox_lidar_sdk.so", + "livox_lidar_sdk.dll", # Windows + ): + try: + return _C.cdll.LoadLibrary(name) + except OSError: + continue + raise OSError( + "liblivox_lidar_sdk shared library not found. Build & install " + "Livox-SDK2 first (see wrapper docstring)." + ) + + +_lib = _load_lib() + +# ---------------- ctypes mapping -------------------------------------------------- + + +class _LivoxLidarEthernetPacket(_C.Structure): + _pack_ = 1 + _fields_ = [ + ("version", c_uint8), + ("length", c_uint16), + ("time_interval", c_uint16), + ("dot_num", c_uint16), + ("udp_cnt", c_uint16), + ("frame_cnt", c_uint8), + ("data_type", c_uint8), + ("time_type", c_uint8), + ("rsvd", c_uint8 * 12), + ("crc32", c_uint32), + ("timestamp", c_uint8 * 8), + ("data", c_uint8 * 1), + ] + + +class _ImuRawPoint(_C.Structure): + """MID-360 IMU sample (data_type == 0). 200 Hz, 6-DOF BMI088.""" + _pack_ = 1 + _fields_ = [ + ("gyro_x", c_float), # rad/s + ("gyro_y", c_float), + ("gyro_z", c_float), + ("acc_x", c_float), # m/s² + ("acc_y", c_float), + ("acc_z", c_float), + ] + + +class _CartesianHighPoint(_C.Structure): + _pack_ = 1 + _fields_ = [ + ("x", _C.c_int32), + ("y", _C.c_int32), + ("z", _C.c_int32), + ("reflectivity", c_uint8), + ("tag", c_uint8), + ] + + +# Callback typedef +_PointCb = _C.CFUNCTYPE(None, c_uint32, c_uint8, POINTER(_LivoxLidarEthernetPacket), _C.c_void_p) + +# Info change callback +class _LivoxLidarInfo(_C.Structure): + _fields_ = [ + ("dev_type", c_uint8), + ("sn", _C.c_char * 16), + ("lidar_ip", _C.c_char * 16), + ] + + +_InfoChangeCb = _C.CFUNCTYPE(None, c_uint32, POINTER(_LivoxLidarInfo), _C.c_void_p) + +# --------------------------------------------------------------------------- +# Additional API we use for push-mode +# --------------------------------------------------------------------------- + + +_lib.SetLivoxLidarInfoChangeCallback.argtypes = (_InfoChangeCb, _C.c_void_p) + +_lib.SetLivoxLidarWorkMode.argtypes = (c_uint32, c_uint8, _C.c_void_p, _C.c_void_p) +_lib.SetLivoxLidarWorkMode.restype = c_uint32 + +_lib.EnableLivoxLidarPointSend.argtypes = (c_uint32, _C.c_void_p, _C.c_void_p) +_lib.EnableLivoxLidarPointSend.restype = c_uint32 + +_lib.SetLivoxLidarPclDataType.argtypes = (c_uint32, c_uint8, _C.c_void_p, _C.c_void_p) + +# Point-cloud observer (interface side; lets SDK join multicast) +_lib.LivoxLidarAddPointCloudObserver.argtypes = (_PointCb, _C.c_void_p) +_lib.LivoxLidarAddPointCloudObserver.restype = c_uint16 + +# ---------------- function prototypes ------------------------------------------- + + +_lib.LivoxLidarSdkInit.argtypes = (c_char_p, c_char_p, _C.c_void_p) +_lib.LivoxLidarSdkInit.restype = c_bool + +_lib.LivoxLidarSdkStart.argtypes = () +_lib.LivoxLidarSdkStart.restype = c_bool + +_lib.LivoxLidarSdkUninit.argtypes = () +_lib.LivoxLidarSdkUninit.restype = None + +_lib.SetLivoxLidarPointCloudCallBack.argtypes = (_PointCb, _C.c_void_p) + +# ---------------- Pythonic wrapper ---------------------------------------------- + + +class Livox2: + """Minimal wrapper around Livox-SDK2 push-mode pipeline.""" + + def __init__(self, config_path: str | Path, host_ip: str, + *, frame_time: float = 0.20, frame_packets: int = 120, + debug: bool = True, print_every_n_frames: int = 1): + self._config_path = os.fspath(config_path).encode() + + if not _lib.LivoxLidarSdkInit(self._config_path, host_ip.encode(), None): + raise RuntimeError("LivoxLidarSdkInit failed – check config path & JSON") + + # Register callback *before* starting threads (matches vendor sample) + self._cb = _PointCb(self._on_packet) + _lib.SetLivoxLidarPointCloudCallBack(self._cb, None) + + # start SDK threads + _lib.LivoxLidarSdkStart() + + # Register info-change callback to learn lidar handle once, then start it. + self._info_cb = _InfoChangeCb(self._on_info_change) + _lib.SetLivoxLidarInfoChangeCallback(self._info_cb, None) + + self._running = True + + # Aggregation parameters for pseudo-frames + self._frame_time = float(frame_time) + self._frame_packets = int(frame_packets) + self._debug = bool(debug) + self._print_every_n_frames = max(1, int(print_every_n_frames)) + self._frame_print_counter = 0 + self._frame_state: dict = {} + self._frame_lock = threading.Lock() + + # ------------------------------------------------------------------ + def spin(self): + try: + while self._running: + time.sleep(0.01) + except KeyboardInterrupt: + pass + finally: + self.shutdown() + + def shutdown(self): + if self._running: + _lib.LivoxLidarSdkUninit() + self._running = False + + # ------------------------------------------------------------------ + def handle_points(self, xyz: np.ndarray): # noqa: D401 + if self._debug: + print(f"frame {len(xyz)} pts") + + def handle_imu(self, gyro: np.ndarray, acc: np.ndarray, timestamp: float) -> None: # noqa: D401 + """Override to receive IMU data. + + Parameters + ---------- + gyro : (3,) float32 – angular velocity in rad/s (x, y, z) + acc : (3,) float32 – linear acceleration in m/s² (x, y, z) + timestamp : float – time.time() when packet was received + """ + + # ------------------------------------------------------------------ + def _on_packet(self, handle: int, dev_type: int, pkt_ptr, _client): + pkt = pkt_ptr.contents + n = pkt.dot_num + if n == 0: + return + + if pkt.data_type == 0: # IMU samples (200 Hz, BMI088) + _ArrI = _ImuRawPoint * n + samples = _C.cast(pkt.data, POINTER(_ArrI)).contents + arr = np.ctypeslib.as_array(samples) + # Average all samples in the packet (typically 1–5 at 200 Hz) + gyro = np.array( + [arr["gyro_x"].mean(), arr["gyro_y"].mean(), arr["gyro_z"].mean()], + dtype=np.float32, + ) + acc = np.array( + [arr["acc_x"].mean(), arr["acc_y"].mean(), arr["acc_z"].mean()], + dtype=np.float32, + ) + try: + self.handle_imu(gyro, acc, time.time()) + except Exception as exc: + print(f"[Livox2] Exception in handle_imu: {exc}", file=sys.stderr) + return + + if pkt.data_type == 1: # Cartesian High + _Arr = _CartesianHighPoint * n + points = _C.cast(pkt.data, POINTER(_Arr)).contents + arr = np.ctypeslib.as_array(points) + xyz = np.stack((arr["x"], arr["y"], arr["z"]), axis=1).astype(np.float32) / 1000.0 + elif pkt.data_type == 2: # Cartesian Low (int16, cm) + class _LowPoint(_C.Structure): + _fields_ = [ + ("x", _C.c_int16), + ("y", _C.c_int16), + ("z", _C.c_int16), + ("reflectivity", c_uint8), + ("tag", c_uint8), + ] + + _ArrL = _LowPoint * n + pts = _C.cast(pkt.data, POINTER(_ArrL)).contents + arr = np.ctypeslib.as_array(pts) + xyz = np.stack((arr["x"], arr["y"], arr["z"]), axis=1).astype(np.float32) / 100.0 + else: + return + + # -------------------------------------------------------------- + # Aggregate packets belonging to the same "frame" (full 360°) + # -------------------------------------------------------------- + # Each UDP packet contains only a tiny slice of a full scan – for the + # MID-360 that's merely 96 points. Feeding such sparse subsets into a + # SLAM backend like KISS-ICP is ineffective and typically produces an + # empty map. The packet header provides a monotonically increasing + # `frame_cnt` field which we can use to group packets that belong to + # the same rotation. We buffer points until the counter changes, then + # emit the *previous* frame in one batch via ``handle_points``. + # + # A small dictionary maps → current frame accumulator so + # that multi-lidar setups would still work (although untested). + # -------------------------------------------------------------- + + # ------------------------------------------------------------------ + # Aggregate packets for ~1 full rotation (≈50 ms @ 20 Hz) + # Lock only protects buffer state; handle_points is called outside. + # ------------------------------------------------------------------ + frame_xyz = None + elapsed = 0.0 + with self._frame_lock: + buf, last_t = self._frame_state.get(handle, ([], time.time())) + buf.append(xyz) + now = time.time() + elapsed = now - last_t + # Heuristic flush conditions: either 0.2 s have passed (≈4 full scans + # at 20 Hz) *or* we already gathered ≥ 120 packets (~12 k points). + if elapsed >= self._frame_time or len(buf) >= self._frame_packets: + frame_xyz = np.concatenate(buf, axis=0) + self._frame_state[handle] = ([], now) + else: + self._frame_state[handle] = (buf, last_t) + + if frame_xyz is not None: + self._frame_print_counter += 1 + if self._debug and (self._frame_print_counter % self._print_every_n_frames == 0): + print(f"[Livox2] frame {frame_xyz.shape[0]} pts (Δt={elapsed*1000:.1f} ms)") + try: + self.handle_points(frame_xyz) + except Exception as exc: + print("Exception in handle_points:", exc, file=sys.stderr) + + # ------------------------------------------------------------------ + def _on_info_change(self, handle: int, info_ptr, _client): + if self._debug: + print(f"[Livox2] InfoChange handle={handle}") + + # Set work mode to NORMAL (1) to begin emitting points. + kNormal = 1 + _lib.SetLivoxLidarWorkMode(handle, kNormal, None, None) + + # Ensure point-cloud sending is enabled + _lib.EnableLivoxLidarPointSend(handle, None, None) + + # Ensure data type is Cartesian High (1) + _lib.SetLivoxLidarPclDataType(handle, 1, None, None) + + +if __name__ == "__main__": + cfg = Path("mid360_config.json") + if not cfg.exists(): + # generate a bare-bones config for 192.168.123.222 + host_ip = os.environ.get("HOST_IP", "192.168.123.222") + data = { + "MID360": { + "lidar_net_info": { + "cmd_data_port": 56100, + "push_msg_port": 56200, + "point_data_port": 56300, + "imu_data_port": 56400, + "log_data_port": 56500, + }, + "host_net_info": [ + { + "host_ip": host_ip, + "multicast_ip": "224.1.1.5", + "cmd_data_port": 56101, + "push_msg_port": 56201, + "point_data_port": 56301, + "imu_data_port": 56401, + "log_data_port": 56501, + } + ], + } + } + cfg.write_text(json.dumps(data, indent=2)) + print("[Livox2] Wrote default mid360_config.json with host_ip", host_ip) + + lidar = Livox2(cfg, host_ip="192.168.123.222") + lidar.spin() diff --git a/Lidar/mid360_config.json b/Lidar/mid360_config.json new file mode 100644 index 0000000..cbe3efd --- /dev/null +++ b/Lidar/mid360_config.json @@ -0,0 +1,41 @@ +{ + "lidar_summary_info" : { + "lidar_type": 8 + }, + "MID360": { + "lidar_net_info" : { + "cmd_data_port": 56100, + "push_msg_port": 56200, + "point_data_port": 56300, + "imu_data_port": 56400, + "log_data_port": 56500 + }, + "host_net_info" : { + "cmd_data_ip" : "192.168.123.222", + "cmd_data_port": 56101, + "push_msg_ip": "192.168.123.222", + "push_msg_port": 56201, + "point_data_ip": "192.168.123.222", + "point_data_port": 56301, + "imu_data_ip" : "192.168.123.222", + "imu_data_port": 56401, + "log_data_ip" : "", + "log_data_port": 56501 + } + }, + "lidar_configs" : [ + { + "ip" : "192.168.123.120", + "pcl_data_type" : 1, + "pattern_mode" : 0, + "extrinsic_parameter" : { + "roll": 0.0, + "pitch": -10.0, + "yaw": 0.0, + "x": 0.05, + "y": 0.0, + "z": 0.45 + } + } + ] + } diff --git a/Models/Modelfile b/Models/Modelfile new file mode 100644 index 0000000..95edce6 --- /dev/null +++ b/Models/Modelfile @@ -0,0 +1,3 @@ +FROM qwen2.5vl:7b +PARAMETER num_ctx 1024 +PARAMETER num_predict 64 diff --git a/Models/yolov8m.pt b/Models/yolov8m.pt new file mode 100644 index 0000000..f764f63 Binary files /dev/null and b/Models/yolov8m.pt differ diff --git a/Navigation/__init__.py b/Navigation/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/Navigation/goal_nav.py b/Navigation/goal_nav.py new file mode 100644 index 0000000..8341880 --- /dev/null +++ b/Navigation/goal_nav.py @@ -0,0 +1,165 @@ +""" +goal_nav.py — Goal-oriented navigation using YOLO fast-check + LLaVA fallback +""" +import re +import time +import threading + +from API.zmq_api import send_vel, gradual_stop +from API.camera_api import get_frame +from API.yolo_api import yolo_sees, yolo_closest, yolo_all_classes +from API.llava_api import call_llava, ask_goal, OLLAMA_MODEL +from API.memory_api import log_detection +from Core.config_loader import load_config + +_cfg = load_config("Navigation") + +GOAL_ALIASES = _cfg["goal_aliases"] +YOLO_CLASSES = _cfg["yolo_goal_classes"] +MAX_STEPS = _cfg["max_steps"] +MIN_STEPS = _cfg["min_steps_before_check"] +SCAN_INTERVAL = _cfg["scan_interval_s"] +ROTATION_SPEED = _cfg["rotation_speed"] + + +# --------------------------------------------------------------------------- +# helpers +# --------------------------------------------------------------------------- + +def _goal_yolo_target(goal: str): + """Map a natural-language goal to a YOLO class name, or None.""" + gl = goal.lower() + for alias, cls in GOAL_ALIASES.items(): + if alias in gl: + return cls + for cls in YOLO_CLASSES: + if cls in gl: + return cls + return None + + +def _extract_extra_condition(goal: str, yolo_target: str): + """ + Pull a compound condition out of a goal string. + e.g. 'find a person wearing a red shirt' -> 'wearing a red shirt' + Returns the extra condition text, or None if there is no qualifier. + """ + gl = goal.lower() + # strip the core target noun from the goal to isolate the qualifier + if yolo_target: + # remove everything up to and including the target noun + pattern = re.compile( + r"(?:find|go to|navigate to|look for|reach|head to)\s+" + r"(?:a |an |the |some )?" + re.escape(yolo_target), + re.IGNORECASE, + ) + remainder = pattern.sub("", gl).strip() + # clean leftover noise + remainder = re.sub(r"^[,\s]+", "", remainder) + if remainder and len(remainder) > 3: + return remainder + return None + + +def _verify_condition(yolo_target: str, condition: str, img_b64) -> bool: + """Use LLaVA to verify a compound condition (e.g. 'wearing red shirt').""" + if not condition or not img_b64: + return True # no extra condition — YOLO match is enough + prompt = ( + f"You can see a {yolo_target} in this image. " + f"Is the following also true: '{condition}'? " + "Answer ONLY 'yes' or 'no'." + ) + answer = call_llava(prompt, img_b64, num_predict=10) + return "yes" in answer.lower() + + +# --------------------------------------------------------------------------- +# main loop +# --------------------------------------------------------------------------- + +def navigate_to_goal(goal: str, max_steps: int = 0): + """ + Rotate-and-scan loop. + 1. A background thread keeps the robot rotating slowly. + 2. Every SCAN_INTERVAL seconds the fast YOLO check fires. + 3. If YOLO spots the target, optionally verify the compound condition. + 4. If no YOLO match, fall back to LLaVA for guidance. + """ + if max_steps <= 0: + max_steps = MAX_STEPS + + yolo_target = _goal_yolo_target(goal) + condition = _extract_extra_condition(goal, yolo_target) if yolo_target else None + + if yolo_target: + print(f" [GoalNav] YOLO target: '{yolo_target}'" + f"{f' condition: {condition}' if condition else ''}") + else: + print(f" [GoalNav] No YOLO shortcut — relying on LLaVA") + + # --- continuous rotation thread (with LiDAR obstacle pause) --- + rotating = [True] + + def _obstacle_check(): + try: + from API.lidar_api import obstacle_ahead + return obstacle_ahead() + except ImportError: + return False + + def _rotate(): + while rotating[0]: + if _obstacle_check(): + send_vel(0.0, 0.0, 0.0) + time.sleep(0.2) + else: + send_vel(0.0, 0.0, ROTATION_SPEED) + time.sleep(0.05) + + rot_thread = threading.Thread(target=_rotate, daemon=True) + rot_thread.start() + + reached = False + try: + for step in range(1, max_steps + 1): + time.sleep(SCAN_INTERVAL) + + # --- YOLO fast check --- + if yolo_target and yolo_sees(yolo_target): + img_b64 = get_frame() + if condition: + if not _verify_condition(yolo_target, condition, img_b64): + print(f" [GoalNav] YOLO sees {yolo_target} but condition " + f"'{condition}' not met — continuing") + continue + + print(f" [GoalNav] YOLO confirmed '{yolo_target}' at step {step}") + log_detection(yolo_target, position="goal", distance="close") + reached = True + break + + # --- LLaVA fallback (less frequent — every few steps) --- + if step >= MIN_STEPS and step % MIN_STEPS == 0: + img_b64 = get_frame() + if img_b64: + d = ask_goal(goal, img_b64) + if d.get("reached"): + print(f" [GoalNav] LLaVA says goal reached at step {step}") + reached = True + break + speak = d.get("speak", "") + if speak: + print(f" [GoalNav] LLaVA: {speak}") + + finally: + rotating[0] = False + rot_thread.join(timeout=1.0) + gradual_stop() + + if reached: + print(f" [GoalNav] Arrived: '{goal}'") + else: + print(f" [GoalNav] Could not reach '{goal}' within {max_steps} steps") + + return reached diff --git a/Navigation/marcus_odometry.py b/Navigation/marcus_odometry.py new file mode 100644 index 0000000..d0e097a --- /dev/null +++ b/Navigation/marcus_odometry.py @@ -0,0 +1,808 @@ +""" +marcus_odometry.py — Precise Distance & Position Module +========================================================= +Project : Marcus | YS Lootah Technology +Hardware : Unitree G1 EDU + Jetson Orin NX +Purpose : Track robot position precisely and execute accurate movements. + +Two sources (priority order): + 1. ROS2 /dog_odom — joint encoder data → ±2cm accuracy + 2. Dead reckoning — velocity × time integration → ±10cm accuracy + +Import in marcus_llava.py +-------------------------- + from marcus_odometry import Odometry + odom = Odometry() + odom.start(zmq_sock=sock) ← pass existing ZMQ socket + + odom.walk_distance(1.0) ← walk exactly 1 meter + odom.turn_degrees(90) ← turn exactly 90° left + odom.turn_degrees(-90) ← turn exactly 90° right + odom.get_position() ← {x, y, heading, source} + odom.reset() ← zero at current location + +Standalone test +--------------- + /home/unitree/miniconda3/envs/marcus/bin/python3 ~/Models_marcus/marcus_odometry.py + +Date : April 2026 +""" + +import time +import math +import json +import threading +import zmq + + +# ══════════════════════════════════════════════════════════════════════════════ +# CONFIGURATION +# ══════════════════════════════════════════════════════════════════════════════ + +ZMQ_HOST = "127.0.0.1" +ZMQ_PORT = 5556 +ROS2_ODOM_TOPIC = "/dog_odom" +ODOM_INTERFACE = "eth0" + +# Movement defaults +DEFAULT_WALK_SPEED = 0.25 # m/s — slower = more accurate +DEFAULT_TURN_SPEED = 0.25 # rad/s +DIST_TOLERANCE = 0.05 # meters — stop within 5cm +ANGLE_TOLERANCE = 2.0 # degrees — stop within 2° +SAFETY_TIMEOUT_MULT = 3.0 # timeout = (distance/speed) × this +ODOM_STALE_WARN = 1.0 # warn if odom not updated for this many seconds + +# Dead reckoning +DR_UPDATE_HZ = 20 # integration rate + + +# ══════════════════════════════════════════════════════════════════════════════ +# ODOMETRY CLASS +# ══════════════════════════════════════════════════════════════════════════════ + +class Odometry: + """ + Precise robot position tracking and movement control. + + Thread-safe. Start with odom.start(zmq_sock). + If ROS2 is not available, falls back to dead reckoning automatically. + + Usage: + odom = Odometry() + odom.start(zmq_sock=sock) + odom.walk_distance(1.0) + odom.turn_degrees(90) + pos = odom.get_position() + """ + + def __init__(self): + self._lock = threading.Lock() + self._alive = [False] + self._sock = None # ZMQ PUB socket (shared from marcus_llava) + + # Position state + self._x = 0.0 # meters from start + self._y = 0.0 # meters from start + self._heading = 0.0 # degrees — 0=start, +left, -right + self._vx = 0.0 # current velocity (for dead reckoning) + self._vy = 0.0 + self._vyaw = 0.0 + self._source = "none" # "ros2" or "dead_reckoning" + self._last_update = 0.0 # timestamp of last position update + + # Start position snapshot (for return_to_start) + self._start_x = 0.0 + self._start_y = 0.0 + self._start_heading = 0.0 + + # Running flag + self._running = False + + # ── STARTUP ─────────────────────────────────────────────────────────────── + + def start(self, zmq_sock=None) -> bool: + """ + Start odometry tracking in background thread. + + Args: + zmq_sock : existing ZMQ PUB socket from marcus_llava.py. + If None, creates its own socket on ZMQ_PORT. + Passing the shared socket avoids port conflicts. + + Returns: + True always — runs in either ROS2 or dead reckoning mode. + + Edge cases: + - ZMQ port already in use → catches and warns + - Called twice → second call is no-op with warning + - ROS2 not sourced → falls back to dead reckoning automatically + """ + if self._running: + print(" [Odom] ⚠️ Already running — ignoring duplicate start()") + return True + + # Wire up ZMQ socket + if zmq_sock is not None: + self._sock = zmq_sock + else: + self._init_own_zmq() + + if self._sock is None: + print(" [Odom] ⚠️ No ZMQ socket — movement functions disabled") + return False + + # Reset position + self._reset_state() + self._alive[0] = True + self._running = True + + # Try ROS2 first + ros2_ok = self._try_start_ros2() + if not ros2_ok: + # Fall back to dead reckoning + threading.Thread( + target=self._dead_reckoning_loop, + daemon=True, + name="odom-dr" + ).start() + print(" [Odom] Dead reckoning mode — accuracy ±10cm") + else: + print(" [Odom] ROS2 /dog_odom mode — accuracy ±2cm") + + time.sleep(0.5) + return True + + def stop(self): + """Stop odometry tracking.""" + self._alive[0] = False + self._running = False + print(" [Odom] Stopped") + + def _init_own_zmq(self): + """Create own ZMQ socket if no shared socket provided.""" + try: + ctx = zmq.Context() + sock = ctx.socket(zmq.PUB) + sock.bind(f"tcp://{ZMQ_HOST}:{ZMQ_PORT}") + self._sock = sock + time.sleep(0.5) + except zmq.ZMQError as e: + if "Address already in use" in str(e): + print(f" [Odom] ⚠️ ZMQ port {ZMQ_PORT} already in use.") + print(" [Odom] Pass zmq_sock=sock from marcus_llava.py instead.") + else: + print(f" [Odom] ⚠️ ZMQ error: {e}") + self._sock = None + + def _reset_state(self): + """Reset position to zero.""" + with self._lock: + self._x = self._y = self._heading = 0.0 + self._vx = self._vy = self._vyaw = 0.0 + self._start_x = self._start_y = self._start_heading = 0.0 + self._last_update = time.time() + + # ── ROS2 ODOMETRY ───────────────────────────────────────────────────────── + + def _try_start_ros2(self) -> bool: + """ + Try to subscribe to /dog_odom via ROS2. + Returns True if ROS2 is available, False otherwise. + + Edge cases: + - ROS2 not installed → ImportError caught + - ROS2 not sourced → caught + - Topic not publishing → timeout after 2s + """ + try: + import rclpy + # Skip ROS2 on this system — DDS init causes bad_alloc + # due to memory conflict with Holosoma + YOLO + # Dead reckoning fallback is used instead + return False + rclpy.init() + except Exception: + return False + + # Quick check: can we create a node? + try: + from rclpy.node import Node + from nav_msgs.msg import Odometry as OdomMsg + + class _OdomNode(Node): + def __init__(self, parent): + super().__init__("marcus_odom") + self._parent = parent + self.create_subscription( + OdomMsg, ROS2_ODOM_TOPIC, + self._callback, 10 + ) + + def _callback(self, msg): + pos = msg.pose.pose.position + ori = msg.pose.pose.orientation + twist = msg.twist.twist + + # Quaternion → yaw + siny = 2.0 * (ori.w * ori.z + ori.x * ori.y) + cosy = 1.0 - 2.0 * (ori.y**2 + ori.z**2) + yaw = math.degrees(math.atan2(siny, cosy)) + + with self._parent._lock: + self._parent._x = pos.x + self._parent._y = pos.y + self._parent._heading = yaw + self._parent._vx = twist.linear.x + self._parent._vy = twist.linear.y + self._parent._vyaw = twist.angular.z + self._parent._source = "ros2" + self._parent._last_update = time.time() + + node = _OdomNode(self) + + def _spin(): + while self._alive[0]: + rclpy.spin_once(node, timeout_sec=0.02) + + # Start spin thread and wait 2s for first message + t = threading.Thread(target=_spin, daemon=True, name="odom-ros2") + t.start() + + deadline = time.time() + 2.0 + while time.time() < deadline: + time.sleep(0.1) + if self._source == "ros2": + return True + + # No message received — ROS2 available but topic silent + print(" [Odom] ⚠️ /dog_odom topic not publishing — using dead reckoning") + return False + + except Exception as e: + return False + + # ── DEAD RECKONING ──────────────────────────────────────────────────────── + + def _dead_reckoning_loop(self): + """ + Estimate position by integrating velocity × time. + Updates position at DR_UPDATE_HZ. + + Edge cases: + - Very large dt (system sleep) → capped at 0.5s to avoid jumps + - Heading wrap-around at 360°/0° → handled by modulo + """ + dt_target = 1.0 / DR_UPDATE_HZ + t_last = time.time() + + while self._alive[0]: + time.sleep(dt_target) + now = time.time() + dt = min(now - t_last, 0.5) # cap at 0.5s to prevent position jumps + t_last = now + + with self._lock: + heading_rad = math.radians(self._heading) + vx = self._vx + vy = self._vy + vyaw = self._vyaw + + # World-frame position update + self._x += (vx * math.cos(heading_rad) - vy * math.sin(heading_rad)) * dt + self._y += (vx * math.sin(heading_rad) + vy * math.cos(heading_rad)) * dt + self._heading = (self._heading + math.degrees(vyaw) * dt) % 360 + + self._source = "dead_reckoning" + self._last_update = now + + # ── VELOCITY CONTROL ────────────────────────────────────────────────────── + + def _send_vel(self, vx: float = 0.0, vy: float = 0.0, vyaw: float = 0.0): + """Send velocity to Holosoma. Also updates dead reckoning velocity state.""" + if self._sock: + try: + self._sock.send_string(json.dumps({"vel": {"vx": vx, "vy": vy, "vyaw": vyaw}})) + except Exception as e: + print(f" [Odom] ⚠️ ZMQ send error: {e}") + + # Update dead reckoning state + with self._lock: + self._vx = vx + self._vy = vy + self._vyaw = vyaw + + def _gradual_stop(self, steps: int = 20): + """Smooth stop — 20 zero-velocity messages over ~1 second.""" + for _ in range(steps): + self._send_vel(0.0, 0.0, 0.0) + time.sleep(0.05) + + def _check_stale(self): + """Warn if odometry hasn't been updated recently.""" + if time.time() - self._last_update > ODOM_STALE_WARN: + print(f" [Odom] ⚠️ Position data may be stale (>{ODOM_STALE_WARN:.0f}s since last update)") + + # ── PUBLIC MOVEMENT API ─────────────────────────────────────────────────── + + def get_position(self) -> dict: + """ + Return current robot position. + + Returns: + {"x": float, "y": float, "heading": float, + "source": "ros2"|"dead_reckoning"|"none"} + + x, y in meters from start position. + heading in degrees (0 = start direction, +left, -right, 0-360). + """ + with self._lock: + return { + "x": round(self._x, 4), + "y": round(self._y, 4), + "heading": round(self._heading, 2), + "source": self._source, + } + + def get_distance_from_start(self) -> float: + """Return Euclidean distance in meters from the starting position.""" + with self._lock: + dx = self._x - self._start_x + dy = self._y - self._start_y + return round(math.sqrt(dx**2 + dy**2), 3) + + def reset(self): + """ + Reset odometry to zero at current physical location. + Use when robot is at a known reference point. + """ + with self._lock: + self._x = self._y = self._heading = 0.0 + self._start_x = self._start_y = self._start_heading = 0.0 + print(" [Odom] ✅ Reset to (0, 0, 0°)") + + def is_running(self) -> bool: + return self._running + + def status_str(self) -> str: + """Return formatted status string for display.""" + pos = self.get_position() + return (f"x={pos['x']:.2f}m y={pos['y']:.2f}m " + f"heading={pos['heading']:.1f}° source={pos['source']}") + + def walk_distance(self, meters: float, speed: float = None, + direction: str = "forward") -> bool: + """ + Walk a precise distance using odometry feedback. + + Args: + meters : distance in meters (must be > 0) + speed : m/s (default DEFAULT_WALK_SPEED = 0.25) + direction : "forward" or "backward" + + Returns: + True if target reached within tolerance. + False if timeout, invalid input, or odometry not running. + + Edge cases: + - meters <= 0 → rejected + - speed <= 0 or > 0.5 → clamped with warning + - Odometry not running → time-based fallback with warning + - Safety timeout = (meters / speed) × SAFETY_TIMEOUT_MULT + - Keyboard interrupt → gradual_stop then re-raise + """ + # Validate inputs + if meters <= 0: + print(f" [Odom] ⚠️ Invalid distance: {meters}m — must be positive") + return False + + if speed is None: + speed = DEFAULT_WALK_SPEED + + if speed <= 0: + print(f" [Odom] ⚠️ Speed must be positive — using default {DEFAULT_WALK_SPEED}") + speed = DEFAULT_WALK_SPEED + elif speed > 0.5: + print(f" [Odom] ⚠️ Speed {speed} exceeds safe limit — clamping to 0.4 m/s") + speed = 0.4 + + if not self._running: + print(" [Odom] ⚠️ Odometry not started — using time-based fallback") + return self._time_based_walk(meters, speed, direction) + + self._check_stale() + + vx = speed if direction == "forward" else -speed + + with self._lock: + start_x = self._x + start_y = self._y + + timeout = (meters / speed) * SAFETY_TIMEOUT_MULT + t0 = time.time() + + print(f" [Odom] Walking {meters:.2f}m {direction} at {speed:.2f} m/s " + f"(timeout: {timeout:.1f}s)") + + try: + while time.time() - t0 < timeout: + with self._lock: + dx = self._x - start_x + dy = self._y - start_y + covered = math.sqrt(dx**2 + dy**2) + + if covered >= meters - DIST_TOLERANCE: + self._gradual_stop() + print(f" [Odom] ✅ Walked {covered:.3f}m (target {meters:.2f}m)") + return True + + self._send_vel(vx=vx) + time.sleep(0.05) + + except KeyboardInterrupt: + print(" [Odom] Walk interrupted by user") + self._gradual_stop() + raise + + self._gradual_stop() + with self._lock: + dx = self._x - start_x + dy = self._y - start_y + covered = math.sqrt(dx**2 + dy**2) + print(f" [Odom] ⚠️ Timeout — walked {covered:.3f}m of {meters:.2f}m") + return False + + def _time_based_walk(self, meters: float, speed: float, + direction: str = "forward") -> bool: + """ + Time-based fallback when odometry is not running. + Accuracy ~±10cm. + """ + print(f" [Odom] Time-based fallback: {meters:.2f}m ≈ {meters/speed:.1f}s") + vx = speed if direction == "forward" else -speed + duration = meters / speed + t0 = time.time() + try: + while time.time() - t0 < duration: + self._send_vel(vx=vx) + time.sleep(0.05) + except KeyboardInterrupt: + self._gradual_stop() + raise + self._gradual_stop() + print(f" [Odom] ✅ Time-based walk complete (~{meters:.2f}m)") + return True + + def turn_degrees(self, degrees: float, speed: float = None) -> bool: + """ + Turn a precise number of degrees using odometry feedback. + + Args: + degrees : degrees to turn. + Positive = left (counter-clockwise) + Negative = right (clockwise) + speed : angular speed in rad/s (default DEFAULT_TURN_SPEED) + + Returns: + True if reached target within tolerance. + False if invalid input, timeout, or odometry not running. + + Edge cases: + - degrees == 0 → no-op + - |degrees| > 360 → clamped to 360 with warning + - speed <= 0 → default + - speed > 0.5 → clamped + - Handles 0°/360° wrap-around correctly + - Keyboard interrupt → graceful stop + """ + if degrees == 0: + print(" [Odom] ⚠️ Turn 0° — nothing to do") + return True + + if abs(degrees) > 360: + print(f" [Odom] ⚠️ Turn {degrees}° > 360 — clamping to {360 * (1 if degrees > 0 else -1)}°") + degrees = 360.0 * (1 if degrees > 0 else -1) + + if speed is None: + speed = DEFAULT_TURN_SPEED + + if speed <= 0: + speed = DEFAULT_TURN_SPEED + elif speed > 0.5: + print(f" [Odom] ⚠️ Turn speed clamped to 0.4 rad/s") + speed = 0.4 + + if not self._running: + print(" [Odom] ⚠️ Odometry not started — using time-based fallback") + return self._time_based_turn(degrees, speed) + + self._check_stale() + + vyaw = speed if degrees > 0 else -speed + target_abs = abs(degrees) + + with self._lock: + start_heading = self._heading + + timeout = (target_abs / math.degrees(speed)) * SAFETY_TIMEOUT_MULT + t0 = time.time() + + direction = "left" if degrees > 0 else "right" + print(f" [Odom] Turning {degrees:.1f}° {direction} at {math.degrees(speed):.1f}°/s " + f"(timeout: {timeout:.1f}s)") + + try: + while time.time() - t0 < timeout: + with self._lock: + current = self._heading + + # Compute angular delta — handle wrap-around + delta = abs(current - start_heading) + if delta > 180: + delta = 360 - delta + + if delta >= target_abs - ANGLE_TOLERANCE: + self._gradual_stop() + print(f" [Odom] ✅ Turned {delta:.1f}° (target {target_abs:.1f}°)") + return True + + self._send_vel(vyaw=vyaw) + time.sleep(0.05) + + except KeyboardInterrupt: + print(" [Odom] Turn interrupted by user") + self._gradual_stop() + raise + + self._gradual_stop() + print(f" [Odom] ⚠️ Timeout on turn") + return False + + def _time_based_turn(self, degrees: float, speed: float) -> bool: + """Time-based turn fallback. ~±5° accuracy.""" + target_abs = abs(degrees) + vyaw = speed if degrees > 0 else -speed + duration = target_abs / math.degrees(speed) + print(f" [Odom] Time-based fallback: {degrees:.1f}° ≈ {duration:.1f}s") + t0 = time.time() + try: + while time.time() - t0 < duration: + self._send_vel(vyaw=vyaw) + time.sleep(0.05) + except KeyboardInterrupt: + self._gradual_stop() + raise + self._gradual_stop() + print(f" [Odom] ✅ Time-based turn complete (~{degrees:.1f}°)") + return True + + def navigate_to(self, target_x: float, target_y: float, + target_heading: float = None, + speed: float = None) -> bool: + """ + Navigate to absolute coordinates. + + Steps: + 1. Rotate to face target direction + 2. Walk straight-line distance to target + 3. Optionally rotate to target_heading + + Args: + target_x, target_y : destination in meters (same frame as odometry) + target_heading : final heading in degrees (None = don't adjust) + speed : walk speed in m/s + + Edge cases: + - Already at target → skip walk, only rotate if needed + - Odometry not running → time-based fallback (imprecise) + - target_heading None → skip final rotation + """ + with self._lock: + curr_x = self._x + curr_y = self._y + curr_h = self._heading + + dx = target_x - curr_x + dy = target_y - curr_y + dist = math.sqrt(dx**2 + dy**2) + + print(f" [Odom] Navigate to ({target_x:.2f}, {target_y:.2f}) — " + f"dist={dist:.2f}m") + + if dist < DIST_TOLERANCE: + print(" [Odom] Already at target position") + else: + # Rotate to face target + angle_to_target = math.degrees(math.atan2(dy, dx)) + rotation_needed = angle_to_target - curr_h + # Normalize to -180..180 + while rotation_needed > 180: rotation_needed -= 360 + while rotation_needed < -180: rotation_needed += 360 + + if abs(rotation_needed) > ANGLE_TOLERANCE: + self.turn_degrees(rotation_needed, speed=DEFAULT_TURN_SPEED) + time.sleep(0.3) + + self.walk_distance(dist, speed=speed) + time.sleep(0.3) + + # Rotate to final heading if specified + if target_heading is not None: + with self._lock: + curr_h = self._heading + final_rot = target_heading - curr_h + while final_rot > 180: final_rot -= 360 + while final_rot < -180: final_rot += 360 + if abs(final_rot) > ANGLE_TOLERANCE: + self.turn_degrees(final_rot) + + print(f" [Odom] ✅ Arrived at ({target_x:.2f}, {target_y:.2f})") + return True + + def return_to_start(self, speed: float = None) -> bool: + """ + Navigate back to the position where start() was called. + + Edge cases: + - Already at start → no movement + - Odometry not running → cannot navigate (warn) + """ + if not self._running: + print(" [Odom] ⚠️ Cannot return to start — odometry not running") + return False + print(" [Odom] Returning to start position...") + return self.navigate_to( + self._start_x, self._start_y, + self._start_heading, speed=speed + ) + + def patrol_route(self, waypoints: list, + speed: float = None, + loop: bool = False) -> bool: + """ + Walk through a list of waypoints in order. + + Args: + waypoints : list of dicts with keys x, y, heading (all optional) + e.g. [{"x": 1.0, "y": 0.0, "heading": 90}] + speed : travel speed in m/s + loop : if True, return to first waypoint after last + + Returns: + True if all waypoints reached. + + Edge cases: + - Empty waypoints → warns + - Waypoint missing x/y → skipped with warning + - Keyboard interrupt → stops gracefully at current position + - loop=True appends waypoint[0] to end of route + """ + if not waypoints: + print(" [Odom] ⚠️ patrol_route: empty waypoints list") + return False + + route = list(waypoints) + if loop: + route.append(waypoints[0]) + + print(f" [Odom] Patrol route: {len(route)} waypoints | loop={loop}") + + try: + for i, wp in enumerate(route, 1): + if "x" not in wp or "y" not in wp: + print(f" [Odom] ⚠️ Waypoint {i} missing x/y — skipping") + continue + + print(f" [Odom] Waypoint {i}/{len(route)}: ({wp['x']:.2f}, {wp['y']:.2f})") + heading = wp.get("heading", None) + self.navigate_to(wp["x"], wp["y"], heading, speed=speed) + time.sleep(0.5) + + except KeyboardInterrupt: + print("\n [Odom] Patrol route interrupted by user") + self._gradual_stop() + return False + + print(" [Odom] ✅ Patrol route complete") + return True + + def __repr__(self): + pos = self.get_position() + return (f"Odometry(x={pos['x']:.2f}, y={pos['y']:.2f}, " + f"h={pos['heading']:.1f}°, source={pos['source']}, " + f"running={self._running})") + + +# ══════════════════════════════════════════════════════════════════════════════ +# STANDALONE TEST +# ══════════════════════════════════════════════════════════════════════════════ + +if __name__ == "__main__": + import sys + + print("Marcus Odometry — Standalone Test") + print("===================================\n") + + odom = Odometry() + ok = odom.start() + time.sleep(1.5) + + print(f"\nInitial state: {odom}") + print() + print("Commands:") + print(" walk — walk precisely N meters") + print(" back — walk backward N meters") + print(" turn — turn precisely N degrees (+left, -right)") + print(" status — current position") + print(" reset — zero position") + print(" goto — navigate to coordinates") + print(" home — return to start") + print(" square — walk 1m×1m square") + print(" q — quit") + print() + + while True: + try: + raw = input("Odom: ").strip().lower() + except (EOFError, KeyboardInterrupt): + break + + if not raw: + continue + + parts = raw.split() + cmd = parts[0] + + if cmd == "q": + break + + elif cmd == "status": + print(f" {odom.status_str()}") + print(f" Distance from start: {odom.get_distance_from_start():.3f}m") + + elif cmd == "reset": + odom.reset() + + elif cmd == "walk" and len(parts) >= 2: + try: + odom.walk_distance(float(parts[1])) + except ValueError: + print(" Usage: walk ") + + elif cmd == "back" and len(parts) >= 2: + try: + odom.walk_distance(float(parts[1]), direction="backward") + except ValueError: + print(" Usage: back ") + + elif cmd == "turn" and len(parts) >= 2: + try: + odom.turn_degrees(float(parts[1])) + except ValueError: + print(" Usage: turn (+left, -right)") + + elif cmd == "goto" and len(parts) >= 3: + try: + odom.navigate_to(float(parts[1]), float(parts[2])) + except ValueError: + print(" Usage: goto ") + + elif cmd == "home": + odom.return_to_start() + + elif cmd == "square": + print(" Walking 1m × 1m square...") + odom.walk_distance(1.0) + odom.turn_degrees(90) + odom.walk_distance(1.0) + odom.turn_degrees(90) + odom.walk_distance(1.0) + odom.turn_degrees(90) + odom.walk_distance(1.0) + odom.turn_degrees(90) + print(" ✅ Square complete") + + else: + print(f" Unknown: {raw}") + + odom.stop() + print("Odometry stopped.") diff --git a/Navigation/patrol.py b/Navigation/patrol.py new file mode 100644 index 0000000..f647d8a --- /dev/null +++ b/Navigation/patrol.py @@ -0,0 +1,114 @@ +""" +patrol.py — Autonomous patrol loop with YOLO PPE detection + LLaVA scene assessment +""" +import time + +from API.zmq_api import gradual_stop +from API.camera_api import get_frame +from API.yolo_api import yolo_ppe_violations, yolo_person_too_close, yolo_summary +from API.llava_api import ask_patrol +from API.memory_api import mem, log_detection +from Brain.executor import move_step +from Core.config_loader import load_config + +_cfg = load_config("Patrol") + +DEFAULT_DURATION = _cfg["default_duration_minutes"] +PROXIMITY_THRESH = _cfg["proximity_threshold"] +PROXIMITY_PAUSE = _cfg["proximity_pause_s"] + + +def patrol(duration_minutes: float = 0.0, alert_callback=None): + """ + Timed patrol loop. + + Each iteration: + 1. YOLO PPE violation check -> log + alert + 2. LLaVA scene assessment -> decide next move + 3. Proximity safety stop -> pause if someone is too close + 4. Execute move_step + + Parameters + ---------- + duration_minutes : float + How long to patrol (default from config). + alert_callback : callable, optional + Called with (alert_text: str) whenever an alert fires. + """ + if duration_minutes <= 0: + duration_minutes = DEFAULT_DURATION + + end_time = time.time() + duration_minutes * 60.0 + step = 0 + + print(f" [Patrol] Starting {duration_minutes:.1f}-minute patrol") + + try: + while time.time() < end_time: + step += 1 + + # ----- 1. YOLO PPE violations ----- + violations = yolo_ppe_violations() + if violations: + for v in violations: + alert_text = f"PPE violation: {v}" + print(f" [Patrol] {alert_text}") + log_detection(v, position="patrol", distance="") + if alert_callback: + alert_callback(alert_text) + + # ----- 2. LLaVA scene assessment ----- + img_b64 = get_frame() + if img_b64: + d = ask_patrol(img_b64) + else: + d = {"observation": "no frame", "alert": None, + "next_move": "forward", "duration": 1.0} + + observation = d.get("observation", "") + alert = d.get("alert") + next_move = d.get("next_move", "forward") + duration = float(d.get("duration", 1.0)) + + if observation: + print(f" [Patrol] step {step}: {observation}") + + if alert: + alert_text = f"Alert: {alert}" + print(f" [Patrol] {alert_text}") + if alert_callback: + alert_callback(alert_text) + + # Log interesting detections to memory + if mem and observation and "person" in observation.lower(): + log_detection("person", position="patrol", distance="") + + # ----- 3. Proximity safety stop (YOLO + LiDAR) ----- + lidar_blocked = False + try: + from API.lidar_api import obstacle_ahead + lidar_blocked = obstacle_ahead() + except ImportError: + pass + + if yolo_person_too_close(threshold=PROXIMITY_THRESH) or lidar_blocked: + reason = "LiDAR obstacle" if lidar_blocked else "Person too close" + print(f" [Patrol] {reason} — pausing {PROXIMITY_PAUSE}s") + gradual_stop() + time.sleep(PROXIMITY_PAUSE) + continue + + # ----- 4. Execute movement ----- + if next_move and next_move != "stop": + move_step(next_move, duration) + else: + gradual_stop() + time.sleep(0.5) + + except KeyboardInterrupt: + print(" [Patrol] Interrupted") + finally: + gradual_stop() + + print(f" [Patrol] Finished after {step} steps") + return step diff --git a/Server/__init__.py b/Server/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/Server/marcus_server.py b/Server/marcus_server.py new file mode 100644 index 0000000..9b32854 --- /dev/null +++ b/Server/marcus_server.py @@ -0,0 +1,224 @@ +""" +Marcus Server — WebSocket interface to the full Marcus Brain +============================================================= +Runs on Jetson Orin NX. Uses the SAME brain as run_marcus.py +but accepts commands via WebSocket instead of terminal input. + +Start: python3 -m Server.marcus_server + OR: python3 -m Server.marcus_server --host 0.0.0.0 --port 8765 +""" +import asyncio +import argparse +import json +import os +import subprocess +import sys +import threading +import time + +PROJECT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +if PROJECT_DIR not in sys.path: + sys.path.insert(0, PROJECT_DIR) + +import websockets + +from Core.config_loader import load_config +from Core.logger import log + +from Brain.marcus_brain import init_brain, process_command, get_brain_status, shutdown +from API.camera_api import get_frame + +_net = load_config("Network") + +HOST = "0.0.0.0" +PORT = _net.get("websocket_port", 8765) + +connected_clients = set() + + +def _get_interface_ips(): + """Get IP addresses for eth0 and wlan0.""" + import socket + ips = {} + for iface in ("eth0", "wlan0"): + try: + s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + s.settimeout(0.1) + # This doesn't actually send — just resolves the interface IP + import fcntl + import struct + ip = socket.inet_ntoa(fcntl.ioctl( + s.fileno(), 0x8915, + struct.pack('256s', iface.encode('utf-8')) + )[20:24]) + ips[iface] = ip + s.close() + except Exception: + pass + return ips + + +# ─── WEBSOCKET HANDLER ──────────────────────────────────────────────────────── +async def handler(websocket): + connected_clients.add(websocket) + client = websocket.remote_address + print(f"[Server] Client connected: {client}") + log(f"Client connected: {client}", "info", "server") + + # Send status + status = get_brain_status() + lidar_ok = _check_lidar() + await websocket.send(json.dumps({ + "type": "status", + "model": status["model"], + "yolo": status["yolo"], + "odometry": status["odometry"], + "memory": status["memory"], + "camera": status["camera"], + "lidar": lidar_ok, + "message": "Marcus server ready (full brain)", + })) + + try: + async for message in websocket: + data = json.loads(message) + msg_type = data.get("type") + + if msg_type == "command": + command = data.get("command", "").strip() + if not command: + continue + + ts = time.strftime("%H:%M:%S") + print(f"[Nav] Command: '{command}'") + + # Tell client we're thinking + await websocket.send(json.dumps({ + "type": "thinking", "command": command, "timestamp": ts, + })) + + # Run command through the full brain (same as run_marcus.py) + # This runs in a thread to not block the event loop + loop = asyncio.get_event_loop() + result = await loop.run_in_executor(None, process_command, command) + + # Send result back + entry = { + "type": "decision", + "command": command, + "action": result.get("action", "NONE"), + "speak": result.get("speak", ""), + "cmd": result.get("action", "NONE"), + "value": "", + "elapsed": round(result.get("elapsed", 0), 1), + "timestamp": ts, + "brain_type": result.get("type", "unknown"), + } + print(f"[Nav] Result ({entry['elapsed']}s): {entry['action']} — {entry['speak'][:60]}") + await websocket.send(json.dumps(entry)) + + elif msg_type == "capture": + frame = get_frame() + if frame: + await websocket.send(json.dumps({ + "type": "capture_result", "ok": True, "data": frame, + "timestamp": time.strftime("%H:%M:%S"), + })) + else: + await websocket.send(json.dumps({ + "type": "capture_result", "ok": False, + "message": "No frame available", + })) + + elif msg_type == "ping": + status = get_brain_status() + await websocket.send(json.dumps({ + "type": "pong", + "lidar": _check_lidar(), + "status": status, + "timestamp": time.strftime("%H:%M:%S"), + })) + + except websockets.exceptions.ConnectionClosed: + print(f"[Server] Client disconnected: {client}") + finally: + connected_clients.discard(websocket) + + +# ─── FRAME BROADCAST ───────────────────────────────────────────────────────── +async def broadcast_frames(): + """Push camera frames to all connected clients at ~10Hz.""" + global connected_clients + while True: + if connected_clients: + frame = get_frame() + if frame: + msg = json.dumps({"type": "frame", "data": frame}) + dead = set() + for ws in connected_clients.copy(): + try: + await ws.send(msg) + except Exception: + dead.add(ws) + connected_clients -= dead + await asyncio.sleep(0.1) + + +def _check_lidar(): + lidar_ip = _net.get("lidar_ip", "192.168.123.120") + try: + r = subprocess.run(["ping", "-c", "1", "-W", "1", lidar_ip], capture_output=True) + return r.returncode == 0 + except Exception: + return False + + +# ─── MAIN ───────────────────────────────────────────────────────────────────── +async def run_server(host: str, port: int): + # Initialize the full brain (camera, YOLO, odometry, memory, LLaVA) + init_brain() + + status = get_brain_status() + ips = _get_interface_ips() + eth_ip = ips.get("eth0", "—") + wlan_ip = ips.get("wlan0", "—") + + print() + print("=" * 56) + print(" Marcus Server (Full Brain)") + print(f" Bind : ws://{host}:{port}") + if eth_ip != "—": + print(f" eth0 : ws://{eth_ip}:{port}") + if wlan_ip != "—": + print(f" wlan0 : ws://{wlan_ip}:{port}") + print(f" Model : {status['model']}") + print(f" YOLO : {'active' if status['yolo'] else 'off'}") + print(f" Odometry : {'active' if status['odometry'] else 'off'}") + print(f" Memory : {'active' if status['memory'] else 'off'}") + print(f" Camera : {status['camera']}") + print("=" * 56) + + log(f"Server starting on ws://{host}:{port}", "info", "server") + + async with websockets.serve(handler, host, port): + print(f"[Server] Listening on ws://{host}:{port}") + print("[Server] Waiting for client to connect...") + await broadcast_frames() + + +def main(): + parser = argparse.ArgumentParser(description="Marcus Server (Full Brain)") + parser.add_argument("--host", default=HOST, help=f"Bind address (default: {HOST})") + parser.add_argument("--port", type=int, default=PORT, help=f"Port (default: {PORT})") + args = parser.parse_args() + + try: + asyncio.run(run_server(args.host, args.port)) + except KeyboardInterrupt: + pass + finally: + shutdown() + + +if __name__ == "__main__": + main() diff --git a/Vision/__init__.py b/Vision/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/Vision/marcus_imgsearch.py b/Vision/marcus_imgsearch.py new file mode 100644 index 0000000..9d51fa3 --- /dev/null +++ b/Vision/marcus_imgsearch.py @@ -0,0 +1,510 @@ +""" +marcus_imgsearch.py — Image-Guided Search +========================================== +Project : Marcus | YS Lootah Technology +Hardware : Unitree G1 EDU + Jetson Orin NX +Purpose : Find a specific person or object by comparing camera frames + to a reference image provided by the user. + +How it works +------------ +1. User provides a reference image (photo of person or object) +2. Marcus starts rotating while scanning camera frames +3. Every step: LLaVA compares current frame to reference image +4. When match found: robot stops, reports location +5. Optional: YOLO pre-filter speeds up search (find person class first, + then LLaVA verifies it's the right person) + +Usage in marcus_llava.py +------------------------ + from marcus_imgsearch import ImageSearch + searcher = ImageSearch(get_frame_fn=get_frame, send_vel_fn=send_vel, + gradual_stop_fn=gradual_stop, llava_fn=_call_llava, + yolo_sees_fn=yolo_sees, model=OLLAMA_MODEL) + + # Start search with a reference image (base64 JPEG) + result = searcher.search(ref_img_b64, hint="person in blue shirt", max_steps=60) + # result: {"found": True, "position": "center", "steps": 12, "description": "..."} + + # Or from a file path + result = searcher.search_from_file("/tmp/target.jpg", hint="kassam") + +Standalone test +--------------- + python3 ~/Models_marcus/marcus_imgsearch.py --image /path/to/photo.jpg + +Date : April 2026 +""" + +import base64 +import io +import json +import time +import threading +import os +import re +from pathlib import Path + +import numpy as np + +try: + from PIL import Image + PIL_AVAILABLE = True +except ImportError: + PIL_AVAILABLE = False + + +# ══════════════════════════════════════════════════════════════════════════════ +# CONFIGURATION +# ══════════════════════════════════════════════════════════════════════════════ + +DEFAULT_MAX_STEPS = 60 # max rotation steps before giving up +STEP_DELAY = 0.4 # seconds between YOLO checks +ROTATE_SPEED = 0.25 # rad/s rotation speed during search +MIN_STEPS_WARMUP = 3 # skip first N steps (stale frame) +MATCH_CONFIDENCE_THR = 0.6 # LLaVA confidence threshold (not used directly, + # but kept for future scoring) + + +# ══════════════════════════════════════════════════════════════════════════════ +# IMAGE UTILITIES +# ══════════════════════════════════════════════════════════════════════════════ + +def _load_image_b64(path: str) -> str: + """ + Load an image file and return as base64 JPEG string. + + Handles: JPEG, PNG, BMP, WEBP. + Resizes to 336x336 max for LLaVA efficiency. + Returns None if file not found or unreadable. + """ + if not PIL_AVAILABLE: + print(" [ImgSearch] PIL not available — pip install Pillow") + return None + + try: + path = Path(path) + if not path.exists(): + print(f" [ImgSearch] File not found: {path}") + return None + + img = Image.open(path).convert("RGB") + + # Resize to max 336x336 keeping aspect ratio + img.thumbnail((336, 336), Image.LANCZOS) + + buf = io.BytesIO() + img.save(buf, format="JPEG", quality=85) + return base64.b64encode(buf.getvalue()).decode() + + except Exception as e: + print(f" [ImgSearch] Cannot load image: {e}") + return None + + +def _numpy_to_b64(frame: np.ndarray, quality: int = 80) -> str: + """Convert a BGR numpy frame to base64 JPEG.""" + if not PIL_AVAILABLE or frame is None: + return None + try: + img = Image.fromarray(frame[:, :, ::-1]) # BGR → RGB + buf = io.BytesIO() + img.save(buf, format="JPEG", quality=quality) + return base64.b64encode(buf.getvalue()).decode() + except Exception: + return None + + +def _resize_b64(img_b64: str, max_size: int = 336) -> str: + """Resize a base64 image to max_size × max_size.""" + if not PIL_AVAILABLE or not img_b64: + return img_b64 + try: + raw = base64.b64decode(img_b64) + img = Image.open(io.BytesIO(raw)).convert("RGB") + img.thumbnail((max_size, max_size), Image.LANCZOS) + buf = io.BytesIO() + img.save(buf, format="JPEG", quality=85) + return base64.b64encode(buf.getvalue()).decode() + except Exception: + return img_b64 + + +# ══════════════════════════════════════════════════════════════════════════════ +# COMPARISON PROMPTS +# ══════════════════════════════════════════════════════════════════════════════ + +def _build_compare_prompt(hint: str = "") -> str: + """ + Build a LLaVA prompt that compares a reference image to the current camera frame. + + The model receives TWO images: [reference, current_frame]. + It must answer yes/no whether the target from the reference is visible + in the current frame. + """ + hint_line = f'The target is: "{hint}".' if hint else "" + return f"""You are helping a robot find a specific target. + +IMAGE 1 (reference): Shows the target to find. +IMAGE 2 (current camera): Shows what the robot sees right now. + +{hint_line} + +Is the target from IMAGE 1 visible in IMAGE 2? + +Answer ONLY with this JSON: +{{"found": false, "confidence": "low|medium|high", "position": "left|center|right|not visible", "description": "one sentence"}} + +JSON:""" + + +def _build_single_prompt(hint: str) -> str: + """ + Prompt for when only current frame is available (no reference image). + Uses the hint description to search. + """ + return f"""You are a robot scanning for a target. + +Target description: "{hint}" + +Look at the camera image. Is the target visible? + +Answer ONLY with this JSON: +{{"found": false, "confidence": "low|medium|high", "position": "left|center|right|not visible", "description": "one sentence"}} + +JSON:""" + + +# ══════════════════════════════════════════════════════════════════════════════ +# IMAGE SEARCH CLASS +# ══════════════════════════════════════════════════════════════════════════════ + +class ImageSearch: + """ + Image-guided robot search. + + Rotates Marcus while comparing camera frames to a reference image. + Uses LLaVA for visual matching, YOLO as optional pre-filter. + + Thread-safe. Supports abort via Ctrl+C. + + Usage: + searcher = ImageSearch( + get_frame_fn=get_frame, + send_vel_fn=send_vel, + gradual_stop_fn=gradual_stop, + llava_fn=_call_llava, + yolo_sees_fn=yolo_sees, + model="qwen2.5vl:3b" + ) + result = searcher.search(ref_img_b64, hint="person in blue shirt") + """ + + def __init__(self, get_frame_fn, send_vel_fn, gradual_stop_fn, + llava_fn, yolo_sees_fn=None, model="qwen2.5vl:3b"): + self._get_frame = get_frame_fn + self._send_vel = send_vel_fn + self._gradual_stop = gradual_stop_fn + self._call_llava = llava_fn + self._yolo_sees = yolo_sees_fn + self._model = model + self._abort = [False] + + def abort(self): + """Signal the current search to stop.""" + self._abort[0] = True + + def search_from_file(self, image_path: str, hint: str = "", + max_steps: int = DEFAULT_MAX_STEPS, + direction: str = "left") -> dict: + """ + Search for a target shown in an image file. + + Args: + image_path : path to JPEG/PNG reference image + hint : text description of the target (optional, improves accuracy) + max_steps : max rotation steps + direction : "left" or "right" + + Returns: + {"found": bool, "position": str, "steps": int, + "description": str, "confidence": str} + """ + ref_b64 = _load_image_b64(image_path) + if ref_b64 is None: + return {"found": False, "position": "error", + "steps": 0, "description": "Could not load reference image", + "confidence": "none"} + return self.search(ref_b64, hint=hint, max_steps=max_steps, direction=direction) + + def search(self, ref_img_b64: str = None, hint: str = "", + max_steps: int = DEFAULT_MAX_STEPS, + direction: str = "left", + yolo_prefilter: str = None) -> dict: + """ + Search for a target by rotating and comparing camera frames. + + Args: + ref_img_b64 : reference image as base64 JPEG (None = use hint only) + hint : text description e.g. "person in blue shirt", "red backpack" + max_steps : max steps before giving up (each step ~0.4s) + direction : "left" or "right" rotation + yolo_prefilter: YOLO class to pre-filter (e.g. "person") before LLaVA check + None = always use LLaVA on every step + + Returns: + dict with keys: found, position, steps, description, confidence + + Edge cases: + - ref_img_b64 None + hint empty → warns, returns not found + - Camera not ready → waits up to 5s, then skips frame + - Keyboard interrupt → graceful stop + - YOLO not available → skips pre-filter, uses LLaVA only + - LLaVA error → logs and continues + """ + self._abort[0] = False + + # Validate inputs + if ref_img_b64 is None and not hint: + print(" [ImgSearch] ⚠️ No reference image and no hint — cannot search") + return {"found": False, "position": "error", "steps": 0, + "description": "No reference image or hint provided", "confidence": "none"} + + has_ref = ref_img_b64 is not None + + # Choose prompt builder + if has_ref: + prompt = _build_compare_prompt(hint) + print(f"\n [ImgSearch] Reference image provided") + else: + prompt = _build_single_prompt(hint) + print(f"\n [ImgSearch] Text-only search: '{hint}'") + + print(f" [ImgSearch] Direction: {direction} | Max steps: {max_steps}") + if yolo_prefilter: + print(f" [ImgSearch] YOLO pre-filter: '{yolo_prefilter}'") + print(f" [ImgSearch] Starting rotation...\n") + + # Start continuous rotation + _keep_rotating = [True] + vyaw = ROTATE_SPEED if direction == "left" else -ROTATE_SPEED + + def _rotate(): + while _keep_rotating[0]: + self._send_vel(vyaw=vyaw) + time.sleep(0.05) + + rot_thread = threading.Thread(target=_rotate, daemon=True) + rot_thread.start() + + result = {"found": False, "position": "not visible", + "steps": 0, "description": "Not found", "confidence": "none"} + + try: + for step in range(1, max_steps + 1): + + if self._abort[0]: + print(" [ImgSearch] Aborted by user") + break + + result["steps"] = step + time.sleep(STEP_DELAY) + + # Warmup — skip first frames (stale) + if step <= MIN_STEPS_WARMUP: + print(f" [{step}/{max_steps}] Warming up...") + continue + + # ── YOLO pre-filter ─────────────────────────────────────────── + if yolo_prefilter and self._yolo_sees: + if not self._yolo_sees(yolo_prefilter): + print(f" [{step}/{max_steps}] YOLO: no {yolo_prefilter} — skip LLaVA") + continue + print(f" [{step}/{max_steps}] YOLO: {yolo_prefilter} found — running LLaVA comparison") + + # ── Get current frame ───────────────────────────────────────── + current_frame = self._get_frame() + if current_frame is None: + print(f" [{step}/{max_steps}] ⏳ Camera not ready — skipping") + # Wait up to 3s for camera + for _ in range(6): + time.sleep(0.5) + current_frame = self._get_frame() + if current_frame: + break + if current_frame is None: + continue + + # ── LLaVA comparison ───────────────────────────────────────── + print(f" [{step}/{max_steps}] 🔍 LLaVA comparing...") + + try: + if has_ref: + # Pass BOTH images: [reference, current_frame] + # num_batch/num_ctx mirror llava_api.py — without these + # caps the compute graph OOMs the runner on Jetson. + import ollama as _ollama + from API.llava_api import NUM_BATCH, NUM_CTX + r = _ollama.chat( + model=self._model, + messages=[{ + "role": "user", + "content": prompt, + "images": [ref_img_b64, current_frame] + }], + options={ + "temperature": 0.0, + "num_predict": 60, + "num_batch": NUM_BATCH, + "num_ctx": NUM_CTX, + } + ) + raw = r["message"]["content"].strip() + else: + # Text-only description search + raw = self._call_llava(prompt, current_frame, num_predict=60) + + # Parse response + raw_clean = raw.replace("```json", "").replace("```", "").strip() + s = raw_clean.find("{"); e = raw_clean.rfind("}") + 1 + d = json.loads(raw_clean[s:e]) if s != -1 and e > 0 else None + + if d is None: + print(f" [{step}/{max_steps}] ⚠️ Bad JSON: {raw[:60]}") + continue + + found = d.get("found", False) + confidence = d.get("confidence", "low") + position = d.get("position", "not visible") + description= d.get("description", "") + + if isinstance(found, str): + found = found.lower() in ("true", "yes", "1") + + print(f" [{step}/{max_steps}] {'✅ MATCH' if found else '❌ no match'} " + f"| conf={confidence} | pos={position} | {description[:60]}") + + if found and confidence in ("medium", "high"): + result = { + "found": True, + "position": position, + "steps": step, + "description": description, + "confidence": confidence, + } + break + + except json.JSONDecodeError: + print(f" [{step}/{max_steps}] ⚠️ JSON parse error") + except Exception as ex: + print(f" [{step}/{max_steps}] ⚠️ LLaVA error: {ex}") + + except KeyboardInterrupt: + print("\n [ImgSearch] Interrupted by user") + + finally: + _keep_rotating[0] = False + self._gradual_stop() + + # Print summary + if result["found"]: + print(f"\n ✅ Target found at step {result['steps']}!") + print(f" 📍 Position: {result['position']}") + print(f" 📝 {result['description']}\n") + else: + print(f"\n ❌ Target not found after {result['steps']} steps\n") + + return result + + +# ══════════════════════════════════════════════════════════════════════════════ +# WIRE INTO marcus_llava.py — add to main loop +# ══════════════════════════════════════════════════════════════════════════════ + +""" +Add to marcus_llava.py imports: + from marcus_imgsearch import ImageSearch + +Add after Memory init: + _img_searcher = ImageSearch( + get_frame_fn = get_frame, + send_vel_fn = send_vel, + gradual_stop_fn = gradual_stop, + llava_fn = _call_llava, + yolo_sees_fn = yolo_sees, + model = OLLAMA_MODEL, + ) + +Add to main loop (before standard LLaVA command): + + # Image search — "search/ path/to/photo.jpg [hint]" + # or "search/ hint text only" + if cmd.lower().startswith("search/"): + args = cmd[7:].strip() + # Check if first arg is a file path + parts = args.split(None, 1) + if parts and os.path.exists(parts[0]): + img_path = parts[0] + hint = parts[1] if len(parts) > 1 else "" + print(f" [Search] Reference image: {img_path}") + print(f" [Search] Hint: '{hint}'") + _img_searcher.search_from_file( + img_path, hint=hint, yolo_prefilter="person" if "person" in hint or not hint else None + ) + else: + # Text description only + hint = args + print(f" [Search] Text search: '{hint}'") + _img_searcher.search(ref_img_b64=None, hint=hint) + continue +""" + + +# ══════════════════════════════════════════════════════════════════════════════ +# STANDALONE TEST +# ══════════════════════════════════════════════════════════════════════════════ + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser(description="Marcus Image Search — standalone test") + parser.add_argument("--image", type=str, default=None, help="Reference image path") + parser.add_argument("--hint", type=str, default="", help="Text description of target") + parser.add_argument("--steps", type=int, default=20, help="Max search steps") + parser.add_argument("--test", action="store_true", help="Run self-test without robot") + args = parser.parse_args() + + if args.test: + # Test image loading and prompt building + print("=== Self-test (no robot) ===\n") + + print("1. Prompt builder test:") + p = _build_compare_prompt("person in blue shirt") + print(f" With hint: {p[:80]}...") + p2 = _build_compare_prompt() + print(f" No hint: {p2[:80]}...") + + print("\n2. Single prompt test:") + p3 = _build_single_prompt("red backpack near the door") + print(f" {p3[:80]}...") + + if args.image: + print("\n3. Image loading test:") + b64 = _load_image_b64(args.image) + if b64: + size = len(b64) * 3 // 4 // 1024 + print(f" Loaded: ~{size}KB") + else: + print(" Failed to load") + + print("\nSelf-test complete.") + + elif args.image or args.hint: + # Real search — needs robot hardware + print("Real search requires robot hardware.") + print("Import ImageSearch into marcus_llava.py instead.") + print(f" image: {args.image}") + print(f" hint: {args.hint}") + + else: + parser.print_help() \ No newline at end of file diff --git a/Vision/marcus_yolo.py b/Vision/marcus_yolo.py new file mode 100644 index 0000000..d608b5a --- /dev/null +++ b/Vision/marcus_yolo.py @@ -0,0 +1,543 @@ +""" +marcus_yolo.py — Marcus Vision Module +======================================= +Project : Marcus | YS Lootah Technology +Purpose : YOLO-based person + object detection + Import this module in marcus_llava.py — runs as background thread + +Usage (imported): + from marcus_yolo import start_yolo, yolo_sees, yolo_count, yolo_closest, yolo_summary + +Usage (standalone): + /home/unitree/miniconda3/envs/marcus/bin/python3 ~/Models_marcus/marcus_yolo.py +""" + +import os +import sys +import time +import threading +import json +import numpy as np + +from collections import defaultdict + +# ── Configuration ───────────────────────────────────────────────────────────── + +_PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +if _PROJECT_ROOT not in sys.path: + sys.path.insert(0, _PROJECT_ROOT) + +try: + from Core.config_loader import load_config + _cfg = load_config("Vision") +except Exception as _e: + print(f" [YOLO] config_Vision.json not loaded ({_e}) — using defaults") + _cfg = {} + +YOLO_MODEL_PATH = os.path.join(_PROJECT_ROOT, _cfg.get("yolo_model_path", "Models/yolov8m.pt")) +YOLO_CONFIDENCE = float(_cfg.get("yolo_confidence", 0.45)) +YOLO_IOU = float(_cfg.get("yolo_iou", 0.45)) +YOLO_DEVICE = _cfg.get("yolo_device", "cuda") # "cuda" | "0" | "cuda:N" +YOLO_IMG_SIZE = int(_cfg.get("yolo_img_size", 320)) +YOLO_HALF = bool(_cfg.get("yolo_half", True)) # FP16 on GPU + + +def _resolve_device(requested: str) -> tuple: + """ + Resolve the inference device. GPU is required — no CPU fallback. + + Returns (device_str, use_half). Raises RuntimeError if CUDA is unavailable + or if the config requests CPU. Marcus must run on the Jetson Orin NX GPU. + """ + req = (requested or "cuda").lower() + + if req == "cpu": + raise RuntimeError( + "[YOLO] yolo_device='cpu' in config — Marcus requires GPU. " + "Set yolo_device to 'cuda' in Config/config_Vision.json." + ) + + try: + import torch + except ImportError as e: + raise RuntimeError( + "[YOLO] PyTorch not installed — cannot run on GPU. " + "Install CUDA-enabled torch on the Jetson." + ) from e + + if not torch.cuda.is_available(): + raise RuntimeError( + "[YOLO] CUDA not available — torch.cuda.is_available() == False. " + "Check nvidia driver / JetPack CUDA runtime on the Jetson " + "(try `nvidia-smi` or `tegrastats`)." + ) + + dev = req if (req.startswith("cuda") or req == "0") else "cuda" + return dev, YOLO_HALF + +# COCO classes to track (ignore everything else) +TRACKED_CLASSES = { + "person", "chair", "couch", "bed", "dining table", + "bottle", "cup", "laptop", "keyboard", "mouse", + "backpack", "handbag", "suitcase", + "car", "truck", "motorcycle", "bicycle", + "fire hydrant", "stop sign", +} + +# PPE classes — active when custom model loaded +PPE_VIOLATION_CLASSES = {"no-helmet", "no_helmet", "no-vest", "no_vest"} + +# ── Shared state ────────────────────────────────────────────────────────────── + +_detections_lock = threading.Lock() +_latest_detections = [] # list of dicts +_yolo_running = [False] +_yolo_fps = [0.0] + + +# ── Detection class ─────────────────────────────────────────────────────────── + +class Detection: + """Single YOLO detection result.""" + + def __init__(self, class_name, confidence, x1, y1, x2, y2, frame_w, frame_h): + self.class_name = class_name + self.confidence = confidence + self.x1, self.y1 = x1, y1 + self.x2, self.y2 = x2, y2 + self.cx = (x1 + x2) // 2 + self.cy = (y1 + y2) // 2 + self.width = x2 - x1 + self.height = y2 - y1 + self.area = self.width * self.height + self.frame_w = frame_w + self.frame_h = frame_h + + @property + def size_ratio(self) -> float: + """Fraction of frame covered — larger = closer.""" + return self.area / max(self.frame_w * self.frame_h, 1) + + @property + def position(self) -> str: + """left / center / right based on bbox center.""" + third = self.frame_w // 3 + if self.cx < third: + return "left" + elif self.cx > third * 2: + return "right" + return "center" + + @property + def distance_estimate(self) -> str: + """Rough distance from size ratio.""" + r = self.size_ratio + if r > 0.30: return "very close" + if r > 0.10: return "close" + if r > 0.03: return "medium" + return "far" + + def to_dict(self) -> dict: + return { + "class": self.class_name, + "confidence": round(self.confidence, 2), + "position": self.position, + "distance": self.distance_estimate, + "size_ratio": round(self.size_ratio, 4), + "bbox": [self.x1, self.y1, self.x2, self.y2], + "center": [self.cx, self.cy], + } + + def __repr__(self): + return (f"Detection({self.class_name} {self.confidence:.0%} " + f"@ {self.position} {self.distance_estimate})") + + +# ── Public query API ────────────────────────────────────────────────────────── + +def yolo_sees(class_name: str, min_confidence: float = 0.45) -> bool: + """ + Check if YOLO currently detects a specific class. + + Args: + class_name : COCO class e.g. "person", "chair", "bottle" + min_confidence : minimum confidence threshold (default 0.45) + + Returns: + True if detected, False otherwise. + + Example: + if yolo_sees("person"): + gradual_stop() + """ + with _detections_lock: + return any( + d.class_name.lower() == class_name.lower() + and d.confidence >= min_confidence + for d in _latest_detections + ) + + +def yolo_count(class_name: str) -> int: + """ + Return number of detected instances of class_name. + + Example: + n = yolo_count("person") + print(f"Detected {n} people") + """ + with _detections_lock: + return sum( + 1 for d in _latest_detections + if d.class_name.lower() == class_name.lower() + ) + + +def yolo_closest(class_name: str = "person"): + """ + Return the closest Detection instance of class_name (largest bbox). + + Returns: + Detection object or None. + + Example: + p = yolo_closest("person") + if p: + print(p.position, p.distance_estimate) + """ + with _detections_lock: + matches = [d for d in _latest_detections + if d.class_name.lower() == class_name.lower()] + if not matches: + return None + return max(matches, key=lambda d: d.size_ratio) + + +def yolo_all_classes() -> set: + """Return set of all currently detected class names.""" + with _detections_lock: + return {d.class_name for d in _latest_detections} + + +def yolo_summary() -> str: + """ + Return human-readable summary of current detections. + + Returns: + e.g. "2 persons (left, close) | 1 chair (center, medium)" + """ + with _detections_lock: + dets = list(_latest_detections) + if not dets: + return "nothing detected" + counts = defaultdict(list) + for d in dets: + counts[d.class_name].append(d) + parts = [] + for cls, items in counts.items(): + n = len(items) + name = f"{n} {cls}{'s' if n > 1 else ''}" + locs = list(set(d.position for d in items)) + dist = items[0].distance_estimate + parts.append(f"{name} ({', '.join(locs)}, {dist})") + return " | ".join(parts) + + +def yolo_ppe_violations() -> list: + """ + Return list of detected PPE violations. + Requires custom PPE model loaded (not default yolov8m). + + Returns: + List of violation strings e.g. ["no helmet (left)", "no vest (center)"] + """ + violations = [] + with _detections_lock: + for d in _latest_detections: + cls = d.class_name.lower() + if cls in ("no-helmet", "no_helmet"): + violations.append(f"no helmet ({d.position})") + elif cls in ("no-vest", "no_vest"): + violations.append(f"no vest ({d.position})") + return violations + + +def yolo_person_too_close(threshold: float = 0.25) -> bool: + """ + Return True if a person occupies more than threshold of the frame. + Use for safety stop — person is dangerously close. + + Args: + threshold : size_ratio above which = too close (default 0.25) + """ + p = yolo_closest("person") + return p is not None and p.size_ratio > threshold + + +def yolo_is_running() -> bool: + """Return True if YOLO inference loop is active.""" + return _yolo_running[0] + + +def yolo_fps() -> float: + """Return current YOLO inference FPS.""" + return _yolo_fps[0] + + +# ── YOLO inference loop ─────────────────────────────────────────────────────── + +def _inference_loop(model, is_ppe: bool, raw_frame_ref, frame_lock, + device: str, use_half: bool): + """Background inference loop. Reads frames, updates _latest_detections.""" + frame_count = 0 + t_fps = time.time() + + while _yolo_running[0]: + with frame_lock: + frame = raw_frame_ref[0] + if frame is None: + time.sleep(0.05) + continue + + try: + results = model( + frame, + imgsz=YOLO_IMG_SIZE, + conf=YOLO_CONFIDENCE, + iou=YOLO_IOU, + device=device, + half=use_half, + verbose=False + )[0] + except Exception as e: + print(f" [YOLO] Inference error: {e}") + time.sleep(0.2) + continue + + h, w = frame.shape[:2] + dets = [] + for box in results.boxes: + cls_id = int(box.cls[0]) + class_name = model.names[cls_id] + confidence = float(box.conf[0]) + if not is_ppe and class_name not in TRACKED_CLASSES: + continue + x1, y1, x2, y2 = map(int, box.xyxy[0]) + dets.append(Detection(class_name, confidence, x1, y1, x2, y2, w, h)) + + with _detections_lock: + _latest_detections.clear() + _latest_detections.extend(dets) + + frame_count += 1 + elapsed = time.time() - t_fps + if elapsed >= 1.0: + _yolo_fps[0] = round(frame_count / elapsed, 1) + frame_count = 0 + t_fps = time.time() + + time.sleep(0.02) + + +# ── Camera loop for standalone mode ────────────────────────────────────────── + +def _camera_loop(raw_frame_ref, frame_lock, cam_alive): + """Capture RealSense frames when running standalone.""" + import pyrealsense2 as rs + + while cam_alive[0]: + pipeline = None + try: + pipeline = rs.pipeline() + cfg = rs.config() + cfg.enable_stream(rs.stream.color, 424, 240, rs.format.bgr8, 15) + pipeline.start(cfg) + print("Camera connected ✅") + while cam_alive[0]: + frames = pipeline.wait_for_frames(timeout_ms=3000) + frame = np.asanyarray(frames.get_color_frame().get_data()) + with frame_lock: + raw_frame_ref[0] = frame.copy() + except Exception as e: + print(f"Camera: {e} — reconnecting...") + try: pipeline.stop() + except: pass + time.sleep(2.0) + + +# ── Start function — called by marcus_llava.py ──────────────────────────────── + +def start_yolo(raw_frame_ref=None, frame_lock=None): + """ + Start YOLO inference in background thread. + + Called automatically by marcus_llava.py during startup. + Shares the camera frame reference from marcus_llava's camera thread. + + Args: + raw_frame_ref : list[np.ndarray|None] — shared raw BGR frame + frame_lock : threading.Lock protecting raw_frame_ref + + Example (in marcus_llava.py): + from marcus_yolo import start_yolo, yolo_sees, yolo_summary + start_yolo(raw_frame_ref=_raw_frame, frame_lock=_raw_lock) + """ + try: + from ultralytics import YOLO + except ImportError: + print(" [YOLO] ultralytics not installed — pip install ultralytics") + return False + + print(f" [YOLO] Loading model: {YOLO_MODEL_PATH}") + try: + model = YOLO(YOLO_MODEL_PATH) + except Exception as e: + print(f" [YOLO] Failed to load model: {e}") + return False + + names = set(model.names.values()) + is_ppe = bool(names & PPE_VIOLATION_CLASSES) + + device, use_half = _resolve_device(YOLO_DEVICE) + + # Move weights onto the target device once so inferences don't pay a + # CPU→GPU copy every call. Ultralytics handles FP16 casting via the + # `half=True` predict kwarg — don't call `.half()` on the inner module, + # it conflicts with ultralytics' own input dtype preprocess. + try: + model.to(device) + except Exception as e: + print(f" [YOLO] Could not move model to {device} ({e}) — continuing") + + gpu_info = "" + if device != "cpu": + try: + import torch + gpu_info = f" ({torch.cuda.get_device_name(0)})" + except Exception: + pass + + print(f" [YOLO] Model loaded ✅ | device: {device}{gpu_info}" + f"{' | FP16' if use_half else ''} | " + f"{'PPE model' if is_ppe else f'{len(TRACKED_CLASSES & names)} tracked classes'}") + + _yolo_running[0] = True + + threading.Thread( + target=_inference_loop, + args=(model, is_ppe, raw_frame_ref, frame_lock, device, use_half), + daemon=True + ).start() + + return True + + +# ══════════════════════════════════════════════════════════════════════════════ +# STANDALONE MODE — run directly for testing +# ══════════════════════════════════════════════════════════════════════════════ + +if __name__ == "__main__": + import pyrealsense2 as rs + + raw_frame_ref = [None] + frame_lock = threading.Lock() + cam_alive = [True] + + # Start camera + threading.Thread( + target=_camera_loop, + args=(raw_frame_ref, frame_lock, cam_alive), + daemon=True + ).start() + time.sleep(3.0) + + # Start YOLO + ok = start_yolo(raw_frame_ref=raw_frame_ref, frame_lock=frame_lock) + if not ok: + print("YOLO failed to start. Exiting.") + exit(1) + + time.sleep(2.0) + + print() + print("╔══════════════════════════════════════════════╗") + print("║ MARCUS VISION — YOLO ACTIVE ║") + print("╠══════════════════════════════════════════════╣") + print(f"║ Model : {YOLO_MODEL_PATH[-36:]:<36}║") + print(f"║ Conf : {YOLO_CONFIDENCE:<36}║") + _dev, _half = _resolve_device(YOLO_DEVICE) + _dev_label = f"{_dev}{' FP16' if _half else ''}" + print(f"║ Device: {_dev_label:<36}║") + print("╠══════════════════════════════════════════════╣") + print("║ what — describe scene ║") + print("║ person — detect people ║") + print("║ ppe — check PPE violations ║") + print("║ count — count instances ║") + print("║ closest — closest instance info ║") + print("║ all — all detections ║") + print("║ fps — inference speed ║") + print("║ q — quit ║") + print("╚══════════════════════════════════════════════╝") + print() + + while True: + try: + cmd = input("Vision: ").strip().lower() + except (EOFError, KeyboardInterrupt): + break + + if not cmd: + continue + + if cmd == "q": + break + + elif cmd == "what": + print(f" {yolo_summary()}") + + elif cmd == "person": + n = yolo_count("person") + if n == 0: + print(" No person detected") + else: + for i, p in enumerate( + [d for d in _latest_detections if d.class_name == "person"], 1 + ): + print(f" Person {i}: {p.position}, {p.distance_estimate} " + f"({p.confidence:.0%})") + + elif cmd == "ppe": + v = yolo_ppe_violations() + print(f" {'No violations' if not v else chr(10).join(v)}") + + elif cmd.startswith("count "): + cls = cmd[6:].strip() + print(f" {yolo_count(cls)} {cls}(s) detected") + + elif cmd.startswith("closest "): + cls = cmd[8:].strip() + d = yolo_closest(cls) + if d: + print(f" Closest {cls}: {d.position}, {d.distance_estimate} " + f"({d.confidence:.0%})") + else: + print(f" No {cls} detected") + + elif cmd == "all": + with _detections_lock: + dets = list(_latest_detections) + if not dets: + print(" Nothing detected") + else: + for d in dets: + print(f" {d}") + + elif cmd == "fps": + print(f" {yolo_fps():.1f} fps") + + else: + print(f" Unknown: {cmd}") + + cam_alive[0] = False + _yolo_running[0] = False + print("Marcus Vision stopped.") diff --git a/Voice/__init__.py b/Voice/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/Voice/marcus_gemini_voice.py b/Voice/marcus_gemini_voice.py new file mode 100644 index 0000000..f02495a --- /dev/null +++ b/Voice/marcus_gemini_voice.py @@ -0,0 +1,608 @@ +#!/usr/bin/env python3 +""" +Voice/marcus_gemini_voice.py — Marcus Gemini Live Voice Module v2 +================================================================== +Real-time bidirectional voice conversation using Gemini 2.5 Flash native audio. +Uses G1 built-in speaker + Hollyland wireless mic. + +Based on SanadVoice/gemini_interact architecture: +- PyAudio for mic (not parec) +- Echo suppression (silence when speaking) +- Gemini VAD (automatic activity detection) +- thinkingBudget=0 (no thinking text) +- ASR buffering for full sentences +- Vision routed to brain's Qwen camera + +Usage: + from Voice.marcus_gemini_voice import GeminiVoiceModule + voice = GeminiVoiceModule(audio_api, on_transcript=callback) + voice.start() +""" + +import array +import asyncio +import base64 +import json +import logging +import os +import subprocess +import threading +import time +import numpy as np + +from dotenv import load_dotenv +load_dotenv() + +BASE_PATH = os.environ.get("PROJECT_BASE", "/home/unitree") +PROJECT_NAME = "Marcus" +PROJECT_ROOT = os.path.join(BASE_PATH, PROJECT_NAME) + +LOG_DIR = os.path.join(PROJECT_ROOT, "logs") +os.makedirs(LOG_DIR, exist_ok=True) + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(name)s] %(levelname)s: %(message)s", + handlers=[ + logging.FileHandler(os.path.join(LOG_DIR, "voice.log")), + logging.StreamHandler(), + ], +) +log = logging.getLogger("gemini_voice") + + +def load_config(name: str) -> dict: + path = os.path.join(PROJECT_ROOT, "Config", f"config_{name}.json") + with open(path, "r") as f: + return json.load(f) + + +# ─── CONFIGURATION ──────────────────────────────────────── + +API_KEY = "AIzaSyDt9Xi83MDZuuPpfwfHyMD92X7ZKdGkqf8" +MODEL = "models/gemini-2.5-flash-native-audio-preview-12-2025" +URI = ( + "wss://generativelanguage.googleapis.com/ws/" + "google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent" + f"?key={API_KEY}" +) + +VOICE_NAME = "Charon" +SEND_RATE = 16000 +RECEIVE_RATE = 24000 +CHUNK_SIZE = 512 +CHANNELS = 1 + + +def load_system_prompt(): + paths = [ + os.path.join(PROJECT_ROOT, "Config", "marcus_voice_prompt.txt"), + ] + for p in paths: + if os.path.exists(p): + with open(p, "r", encoding="utf-8-sig") as f: + return f.read().strip() + return ( + "You are Marcus, a humanoid robot by YS Lootah Technology, Dubai. " + "Speak Arabic (UAE dialect) and English. Be concise — 1-2 sentences max." + ) + + +# ─── AUDIO HELPERS ──────────────────────────────────────── + +def audio_energy(pcm: bytes) -> int: + try: + samples = array.array("h", pcm) + if not samples: + return 0 + return sum(abs(s) for s in samples) // len(samples) + except Exception: + return 0 + + +SILENCE_PCM = b'\x00' * (CHUNK_SIZE * 2) + + +# ─── GEMINI VOICE MODULE ───────────────────────────────── + +class GeminiVoiceModule: + """Real-time voice conversation via Gemini, using G1 speaker + Hollyland mic.""" + + def __init__(self, audio_api, on_transcript=None): + self._audio = audio_api + self._on_transcript = on_transcript + self._config = load_config("Voice") + self._mic_source = getattr(audio_api, '_mic_source', + self._config["mic"].get("source_index", "0")) + + # State + self.speaking = False + self.interrupted = False + self._running = False + self._thread = None + self._audio_queue = None # Created in async context + + # Tuning + self.MIN_THRESHOLD = 3000 + self.barge_in_threshold = self.MIN_THRESHOLD + self.REQUIRED_LOUD_CHUNKS = 10 + self.PREBUFFER_CHUNKS = 2 + self.PLAYBACK_TIMEOUT = 0.25 + self.BARGE_IN_COOLDOWN = 0.7 + self.AI_SPEAK_GRACE = 0.20 + self.ECHO_GUARD_SEC = 0.8 + self.SPEAKING_ENERGY_GATE = 0.85 + self.SEND_SILENCE_WHEN_SPEAKING = True + + # Timing + self._ai_speaking_since = 0.0 + self._last_ai_audio_time = 0.0 + self._barge_in_block_until = 0.0 + self._ignore_input_until = 0.0 + + # ASR buffer + self._asr_buf = "" + self._asr_last_time = 0.0 + self.ASR_WINDOW_SEC = 2.0 + + # Find Hollyland mic PyAudio device index + self._mic_device_idx = self._find_mic_device() + + log.info("GeminiVoiceModule v2 initialized") + + # ─── MIC DEVICE DETECTION ───────────────────────────── + + def _find_mic_device(self) -> int: + """Find Hollyland wireless mic in PyAudio devices. Returns device index.""" + import pyaudio + import ctypes + ERROR_HANDLER_FUNC = ctypes.CFUNCTYPE(None, ctypes.c_char_p, ctypes.c_int, ctypes.c_char_p, ctypes.c_int, ctypes.c_char_p) + def _alsa_error_handler(filename, line, function, err, fmt): + pass # suppress + c_error_handler = ERROR_HANDLER_FUNC(_alsa_error_handler) + try: + asound = ctypes.cdll.LoadLibrary("libasound.so.2") + asound.snd_lib_error_set_handler(c_error_handler) + except: pass # ALSA_suppress + pa = pyaudio.PyAudio() + try: + # First: set PulseAudio default source to Hollyland + subprocess.run(["pactl", "set-source-mute", self._mic_source, "0"], capture_output=True) + subprocess.run(["pactl", "set-source-volume", self._mic_source, "100%"], capture_output=True) + + # Search for wireless mic by name + for i in range(pa.get_device_count()): + info = pa.get_device_info_by_index(i) + name = info.get("name", "").lower() + if info["maxInputChannels"] > 0 and ("wireless" in name or "hollyland" in name): + log.info("Mic found: [%d] %s (%dHz)", i, info["name"], int(info["defaultSampleRate"])) + return i + + # Fallback to 'default' or 'pulse' device + for i in range(pa.get_device_count()): + info = pa.get_device_info_by_index(i) + if info["maxInputChannels"] > 0 and info.get("name", "") in ("default", "pulse"): + log.info("Mic fallback: [%d] %s", i, info["name"]) + return i + + log.warning("No mic found, using device 0") + return 0 + finally: + pa.terminate() + + # ─── MIC CALIBRATION ────────────────────────────────── + + def _calibrate_mic(self): + """Calibrate barge-in threshold from ambient noise.""" + import pyaudio + import ctypes + ERROR_HANDLER_FUNC = ctypes.CFUNCTYPE(None, ctypes.c_char_p, ctypes.c_int, ctypes.c_char_p, ctypes.c_int, ctypes.c_char_p) + def _alsa_error_handler(filename, line, function, err, fmt): + pass # suppress + c_error_handler = ERROR_HANDLER_FUNC(_alsa_error_handler) + try: + asound = ctypes.cdll.LoadLibrary("libasound.so.2") + asound.snd_lib_error_set_handler(c_error_handler) + except: pass # ALSA_suppress + pa = pyaudio.PyAudio() + mic_info = pa.get_device_info_by_index(self._mic_device_idx) + mic_rate = int(mic_info["defaultSampleRate"]) + mic_channels = 1 + try: + stream = pa.open(format=pyaudio.paInt16, channels=mic_channels, + rate=mic_rate, input=True, + input_device_index=self._mic_device_idx, + frames_per_buffer=CHUNK_SIZE) + values = [] + for _ in range(40): + data = stream.read(CHUNK_SIZE, exception_on_overflow=False) + values.append(audio_energy(data)) + stream.stop_stream() + stream.close() + avg_noise = sum(values) / len(values) if values else 0 + self.barge_in_threshold = max(self.MIN_THRESHOLD, avg_noise * 3.0) + log.info("Mic calibrated: noise=%.0f, threshold=%.0f", avg_noise, self.barge_in_threshold) + except Exception as e: + log.warning("Calibration failed: %s", e) + finally: + pa.terminate() + + # ─── G1 SPEAKER PLAYBACK ───────────────────────────── + + def _play_buffer_on_g1(self, pcm_24k: np.ndarray): + """Play 24kHz audio on G1 speaker (resample to 16kHz, single call).""" + if len(pcm_24k) < 100: + return + + # Resample 24kHz → 16kHz + tl = int(len(pcm_24k) * 16000 / 24000) + audio_16k = np.interp( + np.linspace(0, len(pcm_24k), tl, endpoint=False), + np.arange(len(pcm_24k)), + pcm_24k.astype(np.float64), + ).astype(np.int16) + + from unitree_sdk2py.g1.audio.g1_audio_api import ( + ROBOT_API_ID_AUDIO_START_PLAY, + ROBOT_API_ID_AUDIO_STOP_PLAY, + ) + + client = self._audio._client + if not client: + return + + app_name = "gemini" + client._Call(ROBOT_API_ID_AUDIO_STOP_PLAY, json.dumps({"app_name": app_name})) + time.sleep(0.1) + + pcm = audio_16k.tobytes() + sid = f"s_{int(time.time() * 1000)}" + param = json.dumps({ + "app_name": app_name, + "stream_id": sid, + "sample_rate": 16000, + "channels": 1, + "bits_per_sample": 16, + }) + client._CallRequestWithParamAndBin(ROBOT_API_ID_AUDIO_START_PLAY, param, list(pcm)) + + duration = len(audio_16k) / 16000 + time.sleep(duration + 0.3) + client._Call(ROBOT_API_ID_AUDIO_STOP_PLAY, json.dumps({"app_name": app_name})) + + # ─── WEBSOCKET TASKS ───────────────────────────────── + + async def _capture_mic(self, ws): + """Continuously capture mic via PyAudio and send to Gemini.""" + import pyaudio + import ctypes + ERROR_HANDLER_FUNC = ctypes.CFUNCTYPE(None, ctypes.c_char_p, ctypes.c_int, ctypes.c_char_p, ctypes.c_int, ctypes.c_char_p) + def _alsa_error_handler(filename, line, function, err, fmt): + pass # suppress + c_error_handler = ERROR_HANDLER_FUNC(_alsa_error_handler) + try: + asound = ctypes.cdll.LoadLibrary("libasound.so.2") + asound.snd_lib_error_set_handler(c_error_handler) + except: pass # ALSA_suppress + pa = pyaudio.PyAudio() + + mic_info = pa.get_device_info_by_index(self._mic_device_idx) + mic_rate = int(mic_info["defaultSampleRate"]) + mic_channels = 1 + + # Open mic at native rate/channels + stream = pa.open(format=pyaudio.paInt16, channels=mic_channels, + rate=mic_rate, input=True, + input_device_index=self._mic_device_idx, + frames_per_buffer=CHUNK_SIZE) + + log.info("Mic stream: device=%d, rate=%d, ch=%d", self._mic_device_idx, mic_rate, mic_channels) + + loud_chunks = 0 + loop = asyncio.get_event_loop() + needs_resample = mic_rate != SEND_RATE or mic_channels != 1 + + try: + while self._running: + data = await loop.run_in_executor( + None, lambda: stream.read(CHUNK_SIZE, exception_on_overflow=False)) + + # Convert to mono 16kHz if needed + if needs_resample: + audio = np.frombuffer(data, dtype=np.int16) + # Stereo to mono + if mic_channels == 2: + audio = audio.reshape(-1, 2).mean(axis=1).astype(np.int16) + # Resample to 16kHz + if mic_rate != SEND_RATE: + tl = int(len(audio) * SEND_RATE / mic_rate) + if tl > 0: + audio = np.interp( + np.linspace(0, len(audio), tl, endpoint=False), + np.arange(len(audio)), + audio.astype(np.float64), + ).astype(np.int16) + data = audio.tobytes() + + energy = audio_energy(data) + now = time.time() + + # Barge-in detection + if self.speaking and now >= self._barge_in_block_until: + if (now - self._ai_speaking_since) >= self.AI_SPEAK_GRACE: + if energy > self.barge_in_threshold: + loud_chunks += 1 + else: + loud_chunks = 0 + if loud_chunks > self.REQUIRED_LOUD_CHUNKS: + log.info("Barge-in detected!") + self.interrupted = True + self.speaking = False + while not self._audio_queue.empty(): + try: self._audio_queue.get_nowait() + except: break + loud_chunks = 0 + self._barge_in_block_until = now + self.BARGE_IN_COOLDOWN + + # Echo suppression: send silence while speaking + data_to_send = data + if self.SEND_SILENCE_WHEN_SPEAKING and self.speaking: + gate = self.barge_in_threshold * self.SPEAKING_ENERGY_GATE + if energy < gate: + data_to_send = SILENCE_PCM + + # Send to Gemini + b64 = base64.b64encode(data_to_send).decode() + msg = { + "realtime_input": { + "media_chunks": [ + {"mime_type": f"audio/pcm;rate={SEND_RATE}", "data": b64} + ] + } + } + await ws.send(json.dumps(msg)) + + except Exception as e: + if self._running: + log.error("Mic error: %s", e) + finally: + stream.stop_stream() + stream.close() + pa.terminate() + + async def _receive_audio(self, ws): + """Receive audio responses and transcriptions from Gemini.""" + async for msg in ws: + if not self._running: + break + try: + response = json.loads(msg) + server_content = response.get("serverContent", {}) + + if server_content.get("interrupted"): + self.interrupted = False + + # User transcription (partial/streaming) + input_tr = ( + server_content.get("inputTranscription") + or server_content.get("input_transcription") + or server_content.get("inputAudioTranscription") + or server_content.get("input_audio_transcription") + ) + if isinstance(input_tr, dict): + text = (input_tr.get("text") or "").strip() + now = time.time() + if text and now >= self._ignore_input_until and not self.speaking: + # Buffer ASR text + if now - self._asr_last_time > self.ASR_WINDOW_SEC: + self._asr_buf = "" + self._asr_buf = text # Gemini sends cumulative transcription + self._asr_last_time = now + + if self.interrupted: + continue + + # Audio from Gemini + model_turn = server_content.get("modelTurn") + if model_turn: + for part in model_turn.get("parts", []): + inline_data = part.get("inlineData") + if inline_data: + audio_b64 = inline_data.get("data") + if audio_b64: + now = time.time() + if not self.speaking: + self._ai_speaking_since = now + # Gemini started responding — fire transcript callback + if self._asr_buf and self._on_transcript: + self._on_transcript(self._asr_buf, "user") + self.speaking = True + self._last_ai_audio_time = now + self._ignore_input_until = now + self.ECHO_GUARD_SEC + audio_bytes = base64.b64decode(audio_b64) + await self._audio_queue.put(audio_bytes) + + # Text from Gemini (thinking/response text) + text_part = part.get("text", "").strip() + if text_part and self._on_transcript: + self._on_transcript(text_part, "marcus") + + # Turn complete — Gemini finished speaking + turn_complete = server_content.get("turnComplete") + if turn_complete: + # Clear ASR buffer after turn + self._asr_buf = "" + + except Exception as e: + log.error("Receive error: %s", e) + + async def _play_audio(self): + """Collect Gemini audio chunks and play on G1 speaker.""" + while self._running: + try: + if not self.speaking: + await asyncio.sleep(0.05) + continue + + # Pre-buffer + buffered = False + while self.speaking and not buffered: + if self._audio_queue.qsize() >= self.PREBUFFER_CHUNKS: + buffered = True + else: + await asyncio.sleep(0.01) + + # Collect all audio chunks + buffer_chunks = [] + while self.speaking: + try: + data = await asyncio.wait_for( + self._audio_queue.get(), timeout=self.PLAYBACK_TIMEOUT) + audio = np.frombuffer(data, dtype=np.int16) + buffer_chunks.append(audio) + self._last_ai_audio_time = time.time() + except asyncio.TimeoutError: + if self._audio_queue.empty(): + if time.time() - self._last_ai_audio_time > 0.3: + break + + # Play on G1 speaker + if buffer_chunks: + full_audio = np.concatenate(buffer_chunks) + duration = len(full_audio) / RECEIVE_RATE + log.info("Playing %.1fs on G1", duration) + + await asyncio.get_event_loop().run_in_executor( + None, self._play_buffer_on_g1, full_audio) + + self.speaking = False + + except Exception as e: + log.error("Play error: %s", e) + self.speaking = False + + # ─── MAIN LOOP ──────────────────────────────────────── + + async def _run_async(self): + import websockets + import inspect + + system_prompt = load_system_prompt() + + # Unmute mic + subprocess.run(["pactl", "set-source-mute", self._mic_source, "0"], capture_output=True) + subprocess.run(["pactl", "set-source-volume", self._mic_source, "100%"], capture_output=True) + + # Calibrate + self._calibrate_mic() + + ws_kwargs = {"max_size": None} + try: + sig = inspect.signature(websockets.connect) + if "extra_headers" in sig.parameters: + ws_kwargs["extra_headers"] = {"Content-Type": "application/json"} + else: + ws_kwargs["additional_headers"] = {"Content-Type": "application/json"} + except Exception: + ws_kwargs["extra_headers"] = {"Content-Type": "application/json"} + + while self._running: + try: + log.info("Connecting to Gemini...") + async with websockets.connect(URI, **ws_kwargs) as ws: + setup_msg = { + "setup": { + "model": MODEL, + "generationConfig": { + "responseModalities": ["AUDIO"], + "thinkingConfig": {"thinkingBudget": 0}, + "speechConfig": { + "voiceConfig": { + "prebuiltVoiceConfig": {"voiceName": VOICE_NAME} + } + }, + }, + "realtimeInputConfig": { + "automaticActivityDetection": { + "startOfSpeechSensitivity": "START_SENSITIVITY_HIGH", + "prefixPaddingMs": 40, + "endOfSpeechSensitivity": "END_SENSITIVITY_HIGH", + "silenceDurationMs": 250, + } + }, + "inputAudioTranscription": {}, + "systemInstruction": {"parts": [{"text": system_prompt}]}, + } + } + await ws.send(json.dumps(setup_msg)) + await ws.recv() + log.info("Connected! Always listening...") + + self._audio_queue = asyncio.Queue() + + await asyncio.gather( + self._capture_mic(ws), + self._receive_audio(ws), + self._play_audio(), + ) + + except Exception as e: + if self._running: + log.error("Connection error: %s — reconnecting in 3s", e) + await asyncio.sleep(3) + + def _voice_thread(self): + asyncio.run(self._run_async()) + + # ─── START / STOP ───────────────────────────────────── + + def start(self): + if self._running: + return + self._running = True + self._thread = threading.Thread(target=self._voice_thread, daemon=True, name="gemini_voice") + self._thread.start() + log.info("Gemini voice module started") + + def stop(self): + self._running = False + if self._thread: + self._thread.join(timeout=5) + self._thread = None + log.info("Gemini voice module stopped") + + @property + def is_running(self) -> bool: + return self._running + + @property + def state(self) -> str: + return "LISTENING" if self._running else "STOPPED" + + @property + def is_speaking(self) -> bool: + return self.speaking + + +# ─── STANDALONE TEST ───────────────────────────────────── + +if __name__ == "__main__": + import sys + sys.path.insert(0, PROJECT_ROOT) + from API.audio_api import AudioAPI + + def on_transcript(text, role): + print(f" [{role.upper()}] {text}") + + audio = AudioAPI() + voice = GeminiVoiceModule(audio, on_transcript=on_transcript) + + print("Gemini voice v2 — speak anytime. Ctrl+C to stop.\n") + voice.start() + + try: + while voice.is_running: + time.sleep(0.5) + except KeyboardInterrupt: + print("\nStopping...") + voice.stop() diff --git a/Voice/marcus_voice.py b/Voice/marcus_voice.py new file mode 100644 index 0000000..1ccb56c --- /dev/null +++ b/Voice/marcus_voice.py @@ -0,0 +1,385 @@ +#!/usr/bin/env python3 +""" +Features/Voice/marcus_voice.py — Marcus Always-Listening Voice Module +====================================================================== +State machine: + IDLE → (wake word detected) → WAKE_HEARD + WAKE_HEARD → (record command) → PROCESSING + PROCESSING → (Whisper transcribe) → send to brain → SPEAKING + SPEAKING → (TTS done) → IDLE + +Wake word: "Marcus" / "ماركوس" (detected by Whisper tiny) +Commands: Transcribed by Whisper small +TTS: Handled by API/audio_api.py + +Usage: + from Features.Voice.marcus_voice import VoiceModule + voice = VoiceModule(audio_api, on_command=brain.handle_voice_command) + voice.start() # background thread + voice.stop() +""" + +import logging +import os +import subprocess +import threading +import time +import numpy as np + +# ─── PATH CONFIG ───────────────────────────────────────── +from dotenv import load_dotenv +load_dotenv() + +BASE_PATH = os.environ.get("PROJECT_BASE", "/home/unitree") +PROJECT_NAME = "Marcus" +PROJECT_ROOT = os.path.join(BASE_PATH, PROJECT_NAME) + +import json + +LOG_DIR = os.path.join(PROJECT_ROOT, "logs") +os.makedirs(LOG_DIR, exist_ok=True) + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(name)s] %(levelname)s: %(message)s", + handlers=[ + logging.FileHandler(os.path.join(LOG_DIR, "voice.log")), + logging.StreamHandler(), + ], +) +log = logging.getLogger("marcus_voice") + + +def load_config(name: str) -> dict: + path = os.path.join(PROJECT_ROOT, "Config", f"config_{name}.json") + with open(path, "r") as f: + return json.load(f) + + +# ─── STATE ENUM ────────────────────────────────────────── + +class State: + IDLE = "IDLE" + WAKE_HEARD = "WAKE_HEARD" + PROCESSING = "PROCESSING" + SPEAKING = "SPEAKING" + + +# ─── VOICE MODULE ──────────────────────────────────────── + +class VoiceModule: + """Always-listening voice interface for Marcus.""" + + def __init__(self, audio_api, on_command=None): + """ + Args: + audio_api: AudioAPI instance (from API/audio_api.py) + on_command: callback(text: str, lang: str) — called when command is transcribed + """ + self._audio = audio_api + self._on_command = on_command + self._config = load_config("Voice") + + self._stt = self._config["stt"] + self._mic = self._config["mic"] + + # Whisper models — lazy loaded + self._wake_model = None + self._cmd_model = None + + # Wake words + self._wake_en = [w.lower() for w in self._stt["wake_words_en"]] + self._wake_ar = self._stt["wake_words_ar"] + + # State + self._state = State.IDLE + self._running = False + self._thread = None + self._lock = threading.Lock() + + log.info("VoiceModule initialized") + + # ─── MODEL LOADING ──────────────────────────────────── + + def _load_whisper(self): + """Lazy-load Whisper models.""" + import whisper + + if self._wake_model is None: + log.info("Loading Whisper '%s' for wake word...", self._stt["wake_model"]) + self._wake_model = whisper.load_model(self._stt["wake_model"]) + log.info("Wake model ready") + + if self._cmd_model is None: + log.info("Loading Whisper '%s' for commands...", self._stt["command_model"]) + self._cmd_model = whisper.load_model(self._stt["command_model"]) + log.info("Command model ready") + + # ─── MIC RECORDING ──────────────────────────────────── + + def _record_chunk(self, seconds: float) -> np.ndarray: + """Record audio chunk from mic via parec.""" + source = self._mic["source_index"] + rate = str(self._mic["rate"]) + + proc = subprocess.Popen( + ["parec", "-d", source, + "--format=s16le", f"--rate={rate}", "--channels=1", "--raw"], + stdout=subprocess.PIPE, + ) + time.sleep(seconds) + proc.terminate() + raw = proc.stdout.read() + return np.frombuffer(raw, dtype=np.int16) + + def _record_until_silence(self) -> np.ndarray: + """Record until silence is detected or max duration reached.""" + source = self._mic["source_index"] + rate = self._mic["rate"] + threshold = self._stt["silence_threshold"] + silence_dur = self._stt["silence_duration_sec"] + max_dur = self._stt["max_record_sec"] + + chunk_sec = 0.5 + chunk_samples = int(rate * chunk_sec) + silence_chunks_needed = int(silence_dur / chunk_sec) + max_chunks = int(max_dur / chunk_sec) + + proc = subprocess.Popen( + ["parec", "-d", source, + "--format=s16le", f"--rate={rate}", "--channels=1", "--raw"], + stdout=subprocess.PIPE, + ) + + all_audio = [] + silence_count = 0 + chunk_count = 0 + + try: + while chunk_count < max_chunks: + data = proc.stdout.read(chunk_samples * 2) # 2 bytes per sample + if not data: + break + + chunk = np.frombuffer(data, dtype=np.int16) + all_audio.append(chunk) + chunk_count += 1 + + # Check for silence + rms = np.sqrt(np.mean(chunk.astype(np.float64) ** 2)) + if rms < threshold: + silence_count += 1 + else: + silence_count = 0 + + if silence_count >= silence_chunks_needed and chunk_count > 2: + log.info("Silence detected after %.1fs", chunk_count * chunk_sec) + break + finally: + proc.terminate() + proc.stdout.read() # drain + + if all_audio: + return np.concatenate(all_audio) + return np.array([], dtype=np.int16) + + # ─── TRANSCRIPTION ──────────────────────────────────── + + def _transcribe(self, audio: np.ndarray, model, task: str = "transcribe") -> str: + """Transcribe audio using Whisper. Returns text.""" + import whisper + + # Convert int16 to float32 [-1, 1] + audio_f32 = audio.astype(np.float32) / 32768.0 + + # Whisper expects 16kHz + result = model.transcribe( + audio_f32, + language=self._stt["language"], # None = auto-detect + task=task, + fp16=False, + ) + text = result["text"].strip() + detected_lang = result.get("language", "unknown") + log.info("Transcribed [%s]: %s", detected_lang, text[:100]) + return text + + def _check_wake_word(self, text: str) -> bool: + """Check if transcribed text contains a wake word.""" + text_lower = text.lower().strip() + + # English wake words + for w in self._wake_en: + if w in text_lower: + return True + + # Arabic wake words + for w in self._wake_ar: + if w in text: + return True + + return False + + # ─── MAIN LOOP ──────────────────────────────────────── + + def _voice_loop(self): + """Main voice processing loop — runs in background thread.""" + self._load_whisper() + log.info("Voice loop started — listening for wake word...") + + # Unmute mic once + subprocess.run( + ["pactl", "set-source-mute", self._mic["source_index"], "0"], + capture_output=True, + ) + subprocess.run( + ["pactl", "set-source-volume", self._mic["source_index"], "100%"], + capture_output=True, + ) + + while self._running: + try: + if self._state == State.IDLE: + self._do_idle() + elif self._state == State.WAKE_HEARD: + self._do_wake_heard() + elif self._state == State.PROCESSING: + self._do_processing() + elif self._state == State.SPEAKING: + # Wait for any TTS to finish before returning to IDLE + while self._audio.is_speaking: + time.sleep(0.1) + self._state = State.IDLE + except Exception as e: + log.error("Voice loop error: %s", e, exc_info=True) + self._state = State.IDLE + time.sleep(1) + + def _do_idle(self): + """Listen for wake word in 2-second chunks.""" + # Skip if robot is speaking — prevents self-listening + if self._audio.is_speaking: + time.sleep(0.2) + return + + audio = self._record_chunk(2.0) + + # Double-check speaking didn't start during recording + if self._audio.is_speaking: + return + + # Skip if too quiet (no one talking) + if audio.std() < 100: + return + + text = self._transcribe(audio, self._wake_model) + + if self._check_wake_word(text): + log.info("Wake word detected!") + self._state = State.WAKE_HEARD + + # Acknowledge + self._audio.speak( + self._config["messages"]["wake_heard"], "en" + ) + + def _do_wake_heard(self): + """Record the command until silence.""" + # Wait for "Listening..." TTS to finish before recording + while self._audio.is_speaking: + time.sleep(0.1) + + log.info("Recording command...") + audio = self._record_until_silence() + + if len(audio) < 4000: # < 0.25s at 16kHz + log.info("Too short, ignoring") + self._audio.speak(self._config["messages"]["no_speech"], "en") + self._state = State.IDLE + return + + self._command_audio = audio + self._state = State.PROCESSING + + def _do_processing(self): + """Transcribe the command and send to brain.""" + text = self._transcribe(self._command_audio, self._cmd_model) + self._command_audio = None + + if not text or len(text.strip()) < 2: + log.info("Empty transcription") + self._audio.speak(self._config["messages"]["no_speech"], "en") + self._state = State.IDLE + return + + # Detect language + lang = "ar" if any('\u0600' <= c <= '\u06FF' for c in text) else "en" + log.info("Command [%s]: %s", lang, text) + + # Send to brain callback + if self._on_command: + try: + self._on_command(text, lang) + except Exception as e: + log.error("Brain callback error: %s", e) + + self._state = State.IDLE + + # ─── START / STOP ───────────────────────────────────── + + def start(self): + """Start voice listening in background thread.""" + if self._running: + log.warning("Voice module already running") + return + + self._running = True + self._state = State.IDLE + self._thread = threading.Thread(target=self._voice_loop, daemon=True, name="voice") + self._thread.start() + log.info("Voice module started") + + def stop(self): + """Stop voice listening.""" + self._running = False + if self._thread: + self._thread.join(timeout=5) + self._thread = None + log.info("Voice module stopped") + + @property + def state(self) -> str: + return self._state + + @property + def is_running(self) -> bool: + return self._running + + +# ─── STANDALONE TEST ───────────────────────────────────── + +if __name__ == "__main__": + import sys + sys.path.insert(0, PROJECT_ROOT) + from API.audio_api import AudioAPI + + def on_command(text, lang): + print(f"\n{'='*50}") + print(f" COMMAND [{lang}]: {text}") + print(f"{'='*50}\n") + + audio = AudioAPI() + voice = VoiceModule(audio, on_command=on_command) + + print("Starting voice module... say 'Marcus' to wake.") + print("Press Ctrl+C to stop.\n") + + voice.start() + + try: + while voice.is_running: + time.sleep(0.5) + except KeyboardInterrupt: + print("\nStopping...") + voice.stop() + print("Done.") diff --git a/run_marcus.py b/run_marcus.py new file mode 100644 index 0000000..0db6bc2 --- /dev/null +++ b/run_marcus.py @@ -0,0 +1,16 @@ +#!/usr/bin/env python3 +""" +run_marcus.py — Marcus AI Brain (terminal mode) +Usage: python3 run_marcus.py +""" +import sys +import os + +PROJECT_ROOT = os.path.dirname(os.path.abspath(__file__)) +if PROJECT_ROOT not in sys.path: + sys.path.insert(0, PROJECT_ROOT) + +from Brain.marcus_brain import run_terminal + +if __name__ == "__main__": + run_terminal()