Update 2026-04-28 15:55:43

Update 2026-04-27 13:48:17
Update 2026-04-27 11:16:57
2026-04-28 15:55:45 +04:00 · 2026-04-27 13:48:18 +04:00 · 2026-04-27 11:16:57 +04:00 · 2026-04-27 09:39:13 +04:00 · 2026-04-24 15:23:19 +04:00 · 2026-04-24 14:32:28 +04:00
176 changed files with 8511 additions and 3042 deletions
--- a/API/audio_api.py
+++ b/API/audio_api.py
@ -211,56 +211,58 @@ class AudioAPI:
    # ─── G1 SPEAKER PLAYBACK (raw PCM, kept for future backends) ─────────
-    def _play_pcm(self, audio_16k: np.ndarray) -> float:
+    def _play_pcm(self, audio: np.ndarray, rate: int = None) -> float:
-        """Play 16kHz mono int16 on G1 speaker. Returns duration."""
+        """
        Play mono int16 PCM on the G1 speaker.
        `rate` is the sample rate of the incoming `audio`; we always
        resample to self._target_rate (16 kHz) before sending because the
        G1 speaker hardware only honors that rate — if you hand it 24 kHz
        PCM, it plays ~1.5x too fast. This matches the Sanad pattern.
        Uses AudioClient.PlayStream (the high-level API) with a fresh
        stream_id + STOP_PLAY bracket on either side so a prior stream
        can't blend into this one.
        """
        if not self._sdk_available:
            log.warning("SDK not available, cannot play audio")
            return 0.0
-        from unitree_sdk2py.g1.audio.g1_audio_api import (
+        src_rate = int(rate) if rate else self._target_rate
-            ROBOT_API_ID_AUDIO_START_PLAY,
+        audio = self._resample(audio, src_rate)   # → self._target_rate
-            ROBOT_API_ID_AUDIO_STOP_PLAY,
+        if audio.size == 0:
-        )
+            return 0.0
        from unitree_sdk2py.g1.audio.g1_audio_api import ROBOT_API_ID_AUDIO_STOP_PLAY
        app_name = self._spk["app_name"]
-        # Stop previous stream
+        # Stop any prior stream before opening a new one.
        self._client._Call(
            ROBOT_API_ID_AUDIO_STOP_PLAY,
            json.dumps({"app_name": app_name}),
        )
-        time.sleep(0.3)
+        time.sleep(0.15)
        # Build params — unique stream_id every call
        pcm = audio_16k.tobytes()
        sid = f"s_{int(time.time() * 1000)}"
-        param = json.dumps({
+        self._client.PlayStream(app_name, sid, audio.tobytes())
            "app_name": app_name,
            "stream_id": sid,
            "sample_rate": self._target_rate,
            "channels": 1,
            "bits_per_sample": 16,
        })
-        # Single call — full buffer
+        duration = len(audio) / self._target_rate
-        self._client._CallRequestWithParamAndBin(
+        # Margin covers DDS buffer drain before STOP cuts playback short.
-            ROBOT_API_ID_AUDIO_START_PLAY, param, list(pcm)
+        time.sleep(duration + 0.3)
        )
        duration = len(audio_16k) / self._target_rate
        time.sleep(duration + 0.5)
        self._client._Call(
            ROBOT_API_ID_AUDIO_STOP_PLAY,
            json.dumps({"app_name": app_name}),
        )
-        log.info("Played: %.1fs", duration)
+        log.info("Played: %.1fs (src=%d Hz → hw=%d Hz)",
                 duration, src_rate, self._target_rate)
        return duration
-    def play_pcm(self, audio_16k: np.ndarray) -> float:
+    def play_pcm(self, audio: np.ndarray, rate: int = None) -> float:
        """Public wrapper for playing PCM audio."""
-        return self._play_pcm(audio_16k)
+        return self._play_pcm(audio, rate=rate)
    # ─── MIC RECORDING ───────────────────────────────────
--- a/API/llava_api.py
+++ b/API/llava_api.py
@ -84,6 +84,15 @@ def add_to_history(user_msg: str, assistant_msg: str):
 def call_llava(prompt: str, img_b64, num_predict: int = 200, use_history: bool = False) -> str:
    """
    Single synchronous VLM call — same mechanism as Marcus_v1's _call_llava.
    With YOLO on CPU (config_Vision.json::yolo_device="cpu"), there is no
    iGPU contention to guard against, so the v1-style plain call is the
    right shape. num_batch and num_ctx are still passed per-request because
    Ollama's compute-graph pre-allocation pays attention to them (default
    batch=512/ctx=4096 would OOM on the Jetson).
    """
    if not VLM_ENABLED:
        return ""    # safe-mode — caller must handle empty string
    messages = []
@ -93,38 +102,14 @@ def call_llava(prompt: str, img_b64, num_predict: int = 200, use_history: bool =
    if img_b64:
        msg["images"] = [img_b64]
    messages.append(msg)
-
+    r = _client.chat(model=OLLAMA_MODEL, messages=messages,
-    # When an image is attached, pause YOLO to free iGPU memory for the
+                     options={
-    # vision-encoder activations (~1.5 GiB). Without this, concurrent YOLO
+                         "temperature": 0.0,
-    # inference + Qwen vision-encode exceeds the 15 GiB Jetson iGPU budget
+                         "num_predict": num_predict,
-    # and the llama runner is reaped by the OOM killer (status code: 500).
+                         "num_batch":   NUM_BATCH,
-    # Text-only calls skip the pause — they fit easily and YOLO stays hot.
+                         "num_ctx":     NUM_CTX,
-    _paused = False
+                     })
-    if img_b64:
+    return r["message"]["content"].strip()
        try:
            from API.yolo_api import yolo_pause, yolo_resume, YOLO_AVAILABLE
            if YOLO_AVAILABLE:
                yolo_pause()
                _paused = True
        except Exception:
            pass
    try:
        r = _client.chat(model=OLLAMA_MODEL, messages=messages,
                         options={
                             "temperature": 0.0,
                             "num_predict": num_predict,
                             "num_batch":   NUM_BATCH,
                             "num_ctx":     NUM_CTX,
                         })
        return r["message"]["content"].strip()
    finally:
        if _paused:
            try:
                from API.yolo_api import yolo_resume
                yolo_resume()
            except Exception:
                pass
 def parse_json(raw: str):
@ -197,13 +182,21 @@ def ask_goal(goal: str, img_b64) -> dict:
 def ask_talk(command: str, img_b64, facts: str = "") -> dict:
-    """Handle talk-only commands using the YAML talk_prompt."""
+    """
    Handle talk-only commands using the YAML talk_prompt.
    NOTE: use_history is off. Accumulated turns push the prompt past
    qwen2.5vl's 2048-token KV cache; Ollama then truncates and tries to
    RoPE-shift the cache, which triggers an Ollama/ggml bug
    (GGML_ASSERT(a->ne[2] * 4 == b->ne[0]) — runner SIGABRTs with status
    500. Keeping each call stateless avoids the ceiling entirely.
    """
    if not VLM_ENABLED:
        return dict(_VLM_OFF_EMPTY)
    try:
        prompt = TALK_PROMPT.format(command=command, facts=facts)
        raw = call_llava(prompt, img_b64, num_predict=_cfg["num_predict_talk"],
-                         use_history=True)
+                         use_history=False)
        print(f"  Raw: {raw}")
        d = parse_json(raw)
        if d is None:
--- a/API/zmq_api.py
+++ b/API/zmq_api.py
@ -80,6 +80,11 @@ def send_cmd(cmd: str):
    _ensure_sock().send_string(json.dumps({"cmd": cmd}))
-# Load MOVE_MAP from navigation config (pure data, safe at import time)
+# Load navigation constants from config (pure data, safe at import time).
 # MOVE_MAP[direction] = (vx, vy, vyaw). STEP_DURATION_SEC is how long one
 # "step" of a bare directional command lasts (2 s at default velocities
 # ≈ 60 cm forward or 34° turn). Both live in config_Navigation.json so
 # motion can be retuned without editing Python.
 _nav = load_config("Navigation")
-MOVE_MAP = {k: tuple(v) for k, v in _nav["move_map"].items()}
+MOVE_MAP           = {k: tuple(v) for k, v in _nav["move_map"].items()}
 STEP_DURATION_SEC  = float(_nav.get("step_duration_sec", 2.0))
--- a/Autonomous/marcus_autonomous.py
+++ b/Autonomous/marcus_autonomous.py
@ -61,6 +61,25 @@ import base64
 from datetime import datetime
 from pathlib import Path
 # Persist autonomous-mode events to logs/autonomous.log so an unattended
 # explore/patrol run leaves an audit trail. Terminal output is preserved
 # (the existing "  [Auto] ..." indented lines still print) but every
 # event is also written to the dedicated log for post-mortem review.
 try:
    from Core.logger import log as _core_log
 except Exception:
    _core_log = None
 def _alog(msg: str, level: str = "info") -> None:
    """Print and persist to logs/autonomous.log."""
    print(f"  [Auto] {msg}")
    if _core_log is not None:
        try:
            _core_log(f"[Auto] {msg}", level, "autonomous")
        except Exception:
            pass
 # ══════════════════════════════════════════════════════════════════════════════
 # CONFIGURATION
@ -134,7 +153,7 @@ class AutonomousMode:
        """Start autonomous exploration."""
        with self._lock:
            if self._enabled:
-                print("  [Auto] Already running — use 'auto off' to stop first")
+                _alog("Already running — use 'auto off' to stop first")
                return
            self._enabled    = True
@ -153,19 +172,19 @@ class AutonomousMode:
            )
            self._thread.start()
-        print(f"\n  [Auto] Exploration started")
+        _alog(f"Exploration started")
-        print(f"  [Auto] Map folder: {self._map_dir}")
+        _alog(f"Map folder: {self._map_dir}")
-        print(f"  [Auto] Type 'auto off' to stop\n")
+        _alog("Type 'auto off' to stop")
    def disable(self):
        """Stop autonomous exploration and save results."""
        with self._lock:
            if not self._enabled:
-                print("  [Auto] Not running")
+                _alog("Not running")
                return
            self._enabled = False
-        print("\n  [Auto] Stopping exploration...")
+        _alog("Stopping exploration...")
        # Wait for thread to finish
        if self._thread and self._thread.is_alive():
@ -183,29 +202,29 @@ class AutonomousMode:
            obs     = len(self._observations)
        if not running:
-            print("  [Auto] Status: IDLE")
+            _alog("Status: IDLE")
            if self._map_dir:
-                print(f"  [Auto] Last map: {self._map_dir}")
+                _alog(f"Last map: {self._map_dir}")
            return
        elapsed = time.time() - (self._start_time or time.time())
        mins = int(elapsed // 60)
        secs = int(elapsed % 60)
-        print(f"  [Auto] Status: EXPLORING")
+        _alog(f"Status: EXPLORING")
-        print(f"  [Auto] Duration: {mins}m {secs}s")
+        _alog(f"Duration: {mins}m {secs}s")
-        print(f"  [Auto] Steps: {step} | Observations: {obs}")
+        _alog(f"Steps: {step} | Observations: {obs}")
        if self._area_counts:
            areas = ", ".join(f"{k}:{v}" for k, v in sorted(self._area_counts.items()))
-            print(f"  [Auto] Areas seen: {areas}")
+            _alog(f"Areas seen: {areas}")
        if self._all_objects:
-            print(f"  [Auto] Objects found: {', '.join(sorted(self._all_objects))}")
+            _alog(f"Objects found: {', '.join(sorted(self._all_objects))}")
        pos = self._odom_pos()
        if pos:
-            print(f"  [Auto] Position: x={pos['x']:.2f} y={pos['y']:.2f} heading={pos['heading']:.1f}°")
+            _alog(f"Position: x={pos['x']:.2f} y={pos['y']:.2f} heading={pos['heading']:.1f}°")
    def is_enabled(self) -> bool:
        with self._lock:
@ -215,7 +234,7 @@ class AutonomousMode:
        """Save current state to disk without stopping."""
        self._save_observations()
        self._save_path()
-        print(f"  [Auto] Snapshot saved to {self._map_dir}")
+        _alog(f"Snapshot saved to {self._map_dir}")
    # ── EXPLORATION LOOP ────────────────────────────────────────────────────────
@ -240,7 +259,7 @@ class AutonomousMode:
            if self._yolo_sees("person"):
                closest = self._yolo_closest("person")
                if closest and closest.distance_estimate == "very close":
-                    print(f"  [Auto] Person very close — pausing 2s")
+                    _alog(f"Person very close — pausing 2s")
                    self._gradual_stop()
                    time.sleep(2.0)
                    continue
@ -268,7 +287,7 @@ class AutonomousMode:
            # ── Movement decision ─────────────────────────────────────────────
            if consecutive_blocks >= 3:
                # Stuck — turn more aggressively
-                print(f"  [Auto] Stuck — turning {self._last_turn} 180°")
+                _alog(f"Stuck — turning {self._last_turn} 180°")
                self._turn(self._last_turn, TURN_DURATION * 2)
                consecutive_blocks = 0
                continue
@ -281,14 +300,14 @@ class AutonomousMode:
                # Alternate left/right turns to explore both directions
                turn_dir = "left" if self._last_turn == "right" else "right"
                self._last_turn = turn_dir
-                print(f"  [Auto] Obstacle — turning {turn_dir}")
+                _alog(f"Obstacle — turning {turn_dir}")
                self._turn(turn_dir, TURN_DURATION)
            else:
                consecutive_blocks = 0
            # ── Max observations check ────────────────────────────────────────
            if len(self._observations) >= MAX_OBSERVATIONS:
-                print(f"  [Auto] Max observations ({MAX_OBSERVATIONS}) reached — stopping")
+                _alog(f"Max observations ({MAX_OBSERVATIONS}) reached — stopping")
                self._enabled = False
                break
@ -375,9 +394,9 @@ class AutonomousMode:
            }
            self._observations.append(obs)
-            print(f"  [Auto] Step {self._step} | {area_type} | {observation[:60]}")
+            _alog(f"Step {self._step} | {area_type} | {observation[:60]}")
            if objects:
-                print(f"  [Auto] Objects: {', '.join(objects)}")
+                _alog(f"Objects: {', '.join(objects)}")
            # Save frame if interesting
            if interesting and SAVE_FRAMES:
@ -396,7 +415,7 @@ class AutonomousMode:
                self._save_path()
        except Exception as e:
-            print(f"  [Auto] LLaVA assess error: {e}")
+            _alog(f"LLaVA assess error: {e}")
    # ── FILE I/O ────────────────────────────────────────────────────────────────
@ -429,7 +448,7 @@ class AutonomousMode:
                json.dump(self._observations, f, indent=2, ensure_ascii=False)
            tmp.replace(path)
        except Exception as e:
-            print(f"  [Auto] Save observations error: {e}")
+            _alog(f"Save observations error: {e}")
    def _save_path(self):
        if not self._map_dir or not self._path:
@ -441,7 +460,7 @@ class AutonomousMode:
                json.dump(self._path, f, indent=2)
            tmp.replace(path)
        except Exception as e:
-            print(f"  [Auto] Save path error: {e}")
+            _alog(f"Save path error: {e}")
    def _save_frame(self, img_b64: str, step: int):
        """Save a camera frame as JPEG."""
@ -452,7 +471,7 @@ class AutonomousMode:
            with open(frame_path, "wb") as f:
                f.write(__import__("base64").b64decode(img_b64))
        except Exception as e:
-            print(f"  [Auto] Save frame error: {e}")
+            _alog(f"Save frame error: {e}")
    def _generate_summary(self) -> str:
        """Generate a text summary of the exploration session."""
@ -500,7 +519,7 @@ class AutonomousMode:
                with open(self._map_dir / "summary.txt", "w", encoding="utf-8") as f:
                    f.write(summary)
            except Exception as e:
-                print(f"  [Auto] Save summary error: {e}")
+                _alog(f"Save summary error: {e}")
    def _print_summary(self):
        """Print exploration summary to terminal."""
@ -508,15 +527,15 @@ class AutonomousMode:
        mins = int(elapsed // 60)
        secs = int(elapsed % 60)
-        print(f"\n  [Auto] Exploration complete")
+        _alog(f"Exploration complete")
-        print(f"  [Auto] Duration: {mins}m {secs}s | Steps: {self._step}")
+        _alog(f"Duration: {mins}m {secs}s | Steps: {self._step}")
-        print(f"  [Auto] Observations: {len(self._observations)}")
+        _alog(f"Observations: {len(self._observations)}")
        if self._area_counts:
-            print(f"  [Auto] Areas: {dict(sorted(self._area_counts.items()))}")
+            _alog(f"Areas: {dict(sorted(self._area_counts.items()))}")
        if self._all_objects:
-            print(f"  [Auto] Objects: {', '.join(sorted(self._all_objects))}")
+            _alog(f"Objects: {', '.join(sorted(self._all_objects))}")
        if self._map_dir:
-            print(f"  [Auto] Saved to: {self._map_dir}\n")
+            _alog(f"Saved to: {self._map_dir}\n")
--- a/Brain/command_parser.py
+++ b/Brain/command_parser.py
@ -3,13 +3,16 @@ command_parser.py — Local command regex patterns + dispatcher
 Handles place memory, odometry, session recall, help, examples
 """
 import re
 import threading
 import time
-from API.zmq_api import send_vel, gradual_stop
+from API.zmq_api import send_vel, gradual_stop, MOVE_MAP, STEP_DURATION_SEC
 from API.memory_api import mem, place_save, place_goto, places_list_str
 from API.odometry_api import odom, ODOM_AVAILABLE
 from API.camera_api import get_frame
-from API.llava_api import ask
+from API.llava_api import ask, call_llava, parse_json
 from Brain.executor import execute
 from Core.motion_state import motion_abort, wait_while_paused
 from Core.motion_log import log_motion, print_motion
 # ── Compiled patterns ────────────────────────────────────────────────────────
@ -33,9 +36,33 @@ _RE_TURN_DEG = re.compile(
 # that for a trivial two-second motion. One step = 2 s of motion at the default
 # velocity, matching the undo-loop duration already used below.
 _RE_WALK_STEP = re.compile(
-    r"^(?:walk|go|move|step)(?:\s+(forward|back(?:ward)?))?\s+(\d+)\s*steps?$", re.I)
+    r"^(?:walk|go|move|step)(?:\s+(forward|back(?:ward)?))?\s+(\d+(?:\.\d+)?)\s*steps?$", re.I)
 _RE_TURN_STEP = re.compile(
-    r"^turn\s+(left|right)(?:\s+(\d+)\s*steps?)?$", re.I)
+    r"^turn\s+(left|right)(?:\s+(\d+(?:\.\d+)?)\s*steps?)?$", re.I)
 # Simple one-shot motion — one word or verb+direction, no counts/units.
 # All default to a ~2 s motion at the normal velocity. Kept local so the
 # user doesn't eat a 5 s Qwen round-trip for a trivial "go back".
 #
 # Matches:
 #     "left" / "right" / "forward" / "back" / "backward"
 #     "go back" / "step back" / "move back" / "walk back" / "run back"
 #     "go forward" / "step forward" / "move forward" / "walk forward"
 #     "go left" / "move right" / "step left" / etc.
 #     "head forward" / "head back"
 # Does NOT match multi-word phrases like "walk to the chair" — those
 # still fall through to Qwen where they belong.
 _RE_SIMPLE_DIR = re.compile(
    r"^(?:(?:walk|go|move|step|run|head)\s+)?"
    r"(forward|back(?:ward)?|left|right)$",
    re.I,
 )
 # Bare stop / pause words — no need to ask Qwen what "stop" means.
 _RE_STOP_SIMPLE = re.compile(
    r"^(?:stop|halt|wait|pause|stay|freeze|hold|stand\s+still|don'?t\s+move)$",
    re.I,
 )
 _RE_PATROL_RT = re.compile(
    r"^patrol[/:]\s*(.+)$", re.I)
 _RE_LAST_CMD = re.compile(
@ -55,6 +82,106 @@ _RE_SESSION_SUMMARY = re.compile(
 _RE_AUTO = re.compile(
    r"^auto(?:nomous)?\s+(on|off|status|save|summary)$", re.I)
 # ── DIRECT-MOTION CANONICALS ─────────────────────────────────────────────
 # Canonicals that the dispatcher emits but were previously falling through
 # to the LLaVA/Qwen fallback (5–30s blocking call on Jetson). Each maps
 # to a fast, deterministic motion or arm gesture. See Bug #3 in the
 # 14-bug audit.
 _RE_TURN_AROUND      = re.compile(r"^turn[\s_-]+around$", re.I)
 _RE_SIT_DOWN         = re.compile(r"^sit[\s_-]+down$", re.I)
 _RE_STAND_UP         = re.compile(r"^stand[\s_-]+up$", re.I)
 _RE_WAVE_HELLO       = re.compile(r"^wave(?:[\s_-]+(?:hello|hi))?$", re.I)
 _RE_RAISE_ARM        = re.compile(r"^raise[\s_-]+(?:right[\s_-]+)?arm$", re.I)
 _RE_LOWER_ARM        = re.compile(r"^lower[\s_-]+(?:right[\s_-]+)?arm$", re.I)
 _RE_STAY_HERE        = re.compile(r"^stay[\s_-]+(?:here|in[\s_-]+place)$", re.I)
 _RE_POINT_GESTURE    = re.compile(r"^point(?:[\s_-]+(?:at|to)[\s_-]+.*)?$", re.I)
 # Note: 'move left' / 'move right' canonicals are handled by the existing
 # _RE_SIMPLE_DIR (verb=move, direction=left/right via MOVE_MAP). MOVE_MAP
 # in config_Navigation.json defines 'left'/'right' as ROTATIONS, not
 # strafes — historical decision; if real strafes are wanted later add
 # vy axis to MOVE_MAP and a separate strafe handler.
 # ── HIGHER-LEVEL MOTIONS ─────────────────────────────────────────────────────
 # 'look around' = scan-left + return + scan-right + return; uses execute_action
 # so motion_abort halts it cleanly.
 _RE_LOOK_AROUND = re.compile(
    r"^(?:look[\s_-]+around|scan[\s_-]+(?:the\s+)?(?:room|area)|survey[\s_-]+(?:the\s+)?surroundings?)$",
    re.I,
 )
 # 'follow me' = walk forward when YOLO sees a person and they aren't too close,
 # else hold. Loop bounded by FOLLOW_ME_MAX_SEC and motion_abort.
 _RE_FOLLOW_ME = re.compile(
    r"^(?:follow[\s_-]+me|come[\s_-]+with[\s_-]+me|stick[\s_-]+with[\s_-]+me)$",
    re.I,
 )
 # Spatial planner: 'walk to/towards the <X>'. Distinct from _RE_GOTO
 # above because that one is reserved for place-memory names ('go to
 # kitchen' looks up the saved 'kitchen' position). This planner uses
 # LLaVA scene grounding to actually steer toward an arbitrary visible
 # object — used for unsaved targets like 'the door' or 'the red chair'.
 # Verbs: walk / step + to/towards/over to. We do NOT include 'go to'
 # here to avoid colliding with place memory.
 _RE_WALK_TO = re.compile(
    r"^(?:(?:walk|step)\s+(?:over\s+)?(?:to|towards?|up\s+to)|approach)\s+"
    r"(?:the\s+|a\s+|an\s+|that\s+|this\s+)?(.+?)$",
    re.I,
 )
 # Tunable durations for higher-level motions. Kept inline (not config) so a
 # hot-edit doesn't risk an import-time crash; tweak if the demo asks for
 # longer scan / follow runs.
 LOOK_AROUND_TURN_SEC      = 1.0   # ~45° at default vyaw
 LOOK_AROUND_HOLD_SEC      = 0.4   # pause at each extreme so VLM can see
 FOLLOW_ME_MAX_SEC         = 30.0  # hard upper bound on a 'follow me' session
 FOLLOW_ME_STEP_SEC        = 0.5   # one forward burst per yolo-poll cycle
 FOLLOW_ME_POLL_SEC        = 0.2   # how often to re-check yolo when no person
 # Spatial planner tunables. The planner asks LLaVA where the target is
 # (left/center/right + near/medium/far) every WALK_TO_REPLAN_SEC, then
 # turns to centre + walks forward in WALK_TO_STEP_SEC bursts. Total
 # session bounded by WALK_TO_MAX_SEC.
 WALK_TO_MAX_SEC           = 45.0
 WALK_TO_REPLAN_SEC        = 2.5   # cadence of LLaVA scene queries
 WALK_TO_STEP_SEC          = 1.0   # forward burst length between re-plans
 WALK_TO_TURN_SHORT_SEC    = 0.4   # small correction (~18° at default vyaw)
 WALK_TO_TURN_WIDE_SEC     = 1.0   # larger correction when target is far off
 WALK_TO_LLAVA_TIMEOUT_SEC = 15.0  # per-call deadline on the VLM scene query.
                                   # Without this, ollama-python's default
                                   # 5-min HTTP timeout would let a stalled
                                   # VLM block the entire motion worker
                                   # — and motion_abort can't break out of
                                   # an in-flight HTTP call.
 def _call_llava_with_timeout(prompt, img_b64, num_predict=120,
                             timeout_sec=WALK_TO_LLAVA_TIMEOUT_SEC):
    """Run call_llava in a worker thread with a hard deadline.
    Returns the raw string response, or None on timeout/error.
    We cannot truly cancel the HTTP request (ollama-python doesn't
    expose a cancel handle), so the thread is left to finish on its
    own — but the planner moves on. The orphaned thread will exit
    when the HTTP eventually returns; we rely on it being a daemon
    so it doesn't block process exit.
    """
    result = {"raw": None, "err": None}
    def _runner():
        try:
            result["raw"] = call_llava(prompt, img_b64, num_predict=num_predict)
        except Exception as e:
            result["err"] = str(e)
    th = threading.Thread(target=_runner, daemon=True)
    th.start()
    th.join(timeout=timeout_sec)
    if th.is_alive():
        return None  # timeout
    if result["err"]:
        return None  # error
    return result["raw"]
 # Autonomous mode instance — set by init_autonomous()
 _auto = None
@ -84,9 +211,22 @@ def try_local_command(cmd: str) -> bool:
                odom.return_to_start()
            else:
                print("  [Places] Odometry not running — cannot return to start")
-        else:
+            return True
-            place_goto(name)
+        # Try saved-place memory first. If place_goto returns False
-        return True
+        # (no such place), fall through to the spatial planner — the
        # user probably meant a visible object like 'the door' that
        # was never saved as a place. Do this by rewriting the cmd
        # to the planner's input shape and re-dispatching through
        # try_local_command via _RE_WALK_TO below; we DON'T return
        # True here so the rest of try_local_command runs.
        if place_goto(name):
            return True
        # Re-target this same call through the spatial planner. We
        # mutate the local 'cmd' string, then fall through to the
        # _RE_WALK_TO branch later in this function (see further down).
        print(f"  [Places] '{name}' not saved — trying spatial planner")
        cmd = "walk to " + name
        # Note: don't return — let control fall through to _RE_WALK_TO.
    m = _RE_FORGET.match(cmd)
    if m:
@ -115,9 +255,11 @@ def try_local_command(cmd: str) -> bool:
        if odom:
            odom.walk_distance(meters)
        else:
            vx, _, _ = MOVE_MAP["forward"]
            t0 = time.time()
-            while time.time() - t0 < meters / 0.3:
+            while time.time() - t0 < meters / abs(vx):
-                send_vel(vx=0.3)
+                if motion_abort.is_set(): break
                send_vel(vx=vx)
                time.sleep(0.05)
            gradual_stop()
        return True
@ -128,9 +270,11 @@ def try_local_command(cmd: str) -> bool:
        if odom:
            odom.walk_distance(meters, direction="backward")
        else:
            vx, _, _ = MOVE_MAP["backward"]
            t0 = time.time()
-            while time.time() - t0 < meters / 0.2:
+            while time.time() - t0 < meters / abs(vx):
-                send_vel(vx=-0.2)
+                if motion_abort.is_set(): break
                send_vel(vx=vx)
                time.sleep(0.05)
            gradual_stop()
        return True
@ -144,10 +288,15 @@ def try_local_command(cmd: str) -> bool:
        if odom:
            odom.turn_degrees(degrees)
        else:
            # vyaw magnitude comes from MOVE_MAP["left"]; duration is
            # abs(degrees)/(vyaw_deg_per_sec). vyaw in config is rad/s.
            _, _, vyaw_mag = MOVE_MAP["left"]
            vyaw_deg_per_sec = abs(vyaw_mag) * 180.0 / 3.14159265
            vyaw = vyaw_mag if degrees > 0 else -vyaw_mag
            duration = abs(degrees) / vyaw_deg_per_sec
            t0 = time.time()
            vyaw = 0.3 if degrees > 0 else -0.3
            duration = abs(degrees) / 17.2
            while time.time() - t0 < duration:
                if motion_abort.is_set(): break
                send_vel(vyaw=vyaw)
                time.sleep(0.05)
            gradual_stop()
@ -156,11 +305,19 @@ def try_local_command(cmd: str) -> bool:
    m = _RE_WALK_STEP.match(cmd)
    if m:
        direction = (m.group(1) or "forward").lower()
-        steps = int(m.group(2))
+        if direction.startswith("back"):
-        vx = -0.2 if direction.startswith("back") else 0.3
+            direction = "backward"
-        duration = 2.0 * steps
+        # Step count may be fractional ('walk 3.5 steps' / 'walk half
        # a step' after fraction normalization). Convert to float for
        # the duration calc; STEP_DURATION_SEC × 3.5 = 7s of motion.
        steps = float(m.group(2))
        vx, _, _ = MOVE_MAP[direction]
        duration = STEP_DURATION_SEC * steps
        t0 = time.time()
        while time.time() - t0 < duration:
            if motion_abort.is_set(): break
            wait_while_paused()
            if motion_abort.is_set(): break
            send_vel(vx=vx)
            time.sleep(0.05)
        gradual_stop()
@ -169,16 +326,335 @@ def try_local_command(cmd: str) -> bool:
    m = _RE_TURN_STEP.match(cmd)
    if m:
        direction = m.group(1).lower()
-        steps = int(m.group(2)) if m.group(2) else 1
+        # Fractional step count for sub-step turns (e.g. 'turn left 0.5
-        vyaw = 0.3 if direction == "left" else -0.3
+        # steps' from 'turn left half a step').
-        duration = 2.0 * steps
+        steps = float(m.group(2)) if m.group(2) else 1.0
        _, _, vyaw = MOVE_MAP[direction]
        duration = STEP_DURATION_SEC * steps
        t0 = time.time()
        while time.time() - t0 < duration:
            if motion_abort.is_set(): break
            wait_while_paused()
            if motion_abort.is_set(): break
            send_vel(vyaw=vyaw)
            time.sleep(0.05)
        gradual_stop()
        return True
    # ── BARE / SIMPLE DIRECTIONAL COMMANDS ───────────────────────────────
    # "left", "right", "forward", "back", "go back", "move forward",
    # "step right", "walk back", "head forward" — any one-word direction
    # or verb+direction with no explicit count. Duration and velocities
    # come entirely from config_Navigation.json (MOVE_MAP +
    # step_duration_sec) — no magic numbers here.
    m = _RE_SIMPLE_DIR.match(cmd)
    if m:
        direction = m.group(1).lower()
        if direction.startswith("back"):
            direction = "backward"
        vx, vy, vyaw = MOVE_MAP[direction]
        t0 = time.time()
        while time.time() - t0 < STEP_DURATION_SEC:
            if motion_abort.is_set(): break
            wait_while_paused()
            if motion_abort.is_set(): break
            send_vel(vx=vx, vy=vy, vyaw=vyaw)
            time.sleep(0.05)
        gradual_stop()
        return True
    # ── BARE STOP / PAUSE ─────────────────────────────────────────────────
    m = _RE_STOP_SIMPLE.match(cmd)
    if m:
        gradual_stop()
        return True
    # ── DIRECT-MOTION CANONICALS (no LLaVA fallback) ────────────────────
    # These canonicals were previously falling through to ask(cmd, img) →
    # Ollama → 5–30s blocking call. We handle them deterministically here.
    # Bug #3 in the 14-bug audit; see README of that fix for rationale.
    # 'turn around' = 180° turn. Synthesize the brain call as the
    # parametric form so we reuse the existing _RE_TURN_DEG path
    # (which has motion_abort / motion_pause / odom support).
    if _RE_TURN_AROUND.match(cmd):
        return try_local_command("turn 180 degrees")
    # 'wave hello' / 'wave' → arm gesture via do_arm(). The arm_api
    # is currently a stub (arm_available=false) so this prints a clear
    # message rather than calling Ollama. Once GR00T is wired the gesture
    # will fire automatically — no change here needed.
    if _RE_WAVE_HELLO.match(cmd):
        from API.arm_api import do_arm
        do_arm("wave")
        return True
    if _RE_RAISE_ARM.match(cmd):
        from API.arm_api import do_arm
        do_arm("raise_right")  # default to right; alias is in config_Arm.json
        return True
    if _RE_LOWER_ARM.match(cmd):
        from API.arm_api import do_arm
        do_arm("lower")
        return True
    if _RE_POINT_GESTURE.match(cmd):
        from API.arm_api import do_arm
        # 'point at X' has no canonical gesture in the arm vocabulary —
        # closest match is 'right_up' which extends the right arm forward.
        # Use 'high_wave' (alias=wave) only if 'right_up' isn't registered.
        from API.arm_api import ARM_ALIASES, ARM_ACTIONS
        for choice in ("right_up", "raise_right", "high_wave", "wave"):
            if choice in ARM_ALIASES or choice in ARM_ACTIONS:
                do_arm(choice)
                break
        else:
            print("  [Arm] no point/raise gesture configured — skipping")
        return True
    # 'sit down' / 'stand up' need posture commands the SDK doesn't
    # currently expose through marcus's ZMQ velocity bridge. Print a
    # clear log line and gradual_stop (safety) instead of taking 20s
    # to have Qwen invent something. When G1 SportClient.StandUp() /
    # Sit() are wired into a posture_api, replace these with real calls.
    if _RE_SIT_DOWN.match(cmd):
        gradual_stop()
        print("  [Posture] sit_down not yet wired — gradual_stop only")
        return True
    if _RE_STAND_UP.match(cmd):
        gradual_stop()
        print("  [Posture] stand_up not yet wired — gradual_stop only")
        return True
    # 'stay here' / 'stay in place' — explicit hold. Just stops; the
    # robot maintains its standing posture by SDK default.
    if _RE_STAY_HERE.match(cmd):
        gradual_stop()
        print("  [Posture] holding position")
        return True
    # ── LOOK AROUND ───────────────────────────────────────────────────────
    # Scan left ~45° → centre → right ~90° → centre. Each leg goes through
    # execute_action which polls motion_abort, so 'stop' mid-scan halts
    # cleanly. No-op fallback if MOVE_MAP doesn't define left/right (shouldn't
    # happen on a real config but keep us defensive).
    if _RE_LOOK_AROUND.match(cmd):
        from Brain.executor import execute_action  # late import — avoids
                                                   # circular at module load
        try:
            execute_action("left",  LOOK_AROUND_TURN_SEC)
            if motion_abort.is_set(): return True
            time.sleep(LOOK_AROUND_HOLD_SEC)
            if motion_abort.is_set(): return True
            execute_action("right", LOOK_AROUND_TURN_SEC * 2)
            if motion_abort.is_set(): return True
            time.sleep(LOOK_AROUND_HOLD_SEC)
            if motion_abort.is_set(): return True
            execute_action("left",  LOOK_AROUND_TURN_SEC)
        except Exception as e:
            print(f"  [LookAround] failed: {e}")
        return True
    # ── FOLLOW ME ─────────────────────────────────────────────────────────
    # Watch for a person in the camera frame; when one is detected and not
    # already too close, advance forward in a short burst. If the person is
    # too close, hold. If no person seen, hold and re-poll. Bounded by
    # FOLLOW_ME_MAX_SEC and motion_abort. yolo_api uses fallback stubs when
    # YOLO isn't loaded — in that case we give up immediately and tell the
    # operator (no person tracking = no point walking blindly forward).
    if _RE_FOLLOW_ME.match(cmd):
        from Brain.executor import move_step
        try:
            from API.yolo_api import yolo_sees, yolo_person_too_close
        except Exception as e:
            print(f"  [FollowMe] yolo_api unavailable: {e}")
            return True
        # Probe for actual YOLO availability — both the stub `yolo_sees`
        # and the stub `yolo_person_too_close` always return False, so a
        # follow_me run with no real YOLO would just hold forever.
        try:
            from API.yolo_api import _stub_sees as _stub_sees_ref  # type: ignore
            stubbed = (yolo_sees is _stub_sees_ref)
        except Exception:
            stubbed = False
        if stubbed:
            print("  [FollowMe] YOLO not loaded — cannot track persons. "
                  "Skipping. (init_yolo() at startup to enable.)")
            return True
        print(f"  [FollowMe] tracking up to {FOLLOW_ME_MAX_SEC:.0f}s — "
              "say 'stop' to end")
        deadline = time.time() + FOLLOW_ME_MAX_SEC
        last_log = 0.0
        while time.time() < deadline:
            if motion_abort.is_set():
                print("  [FollowMe] motion_abort set — ending")
                gradual_stop()
                break
            try:
                see_person = bool(yolo_sees("person"))
            except Exception:
                see_person = False
            try:
                too_close = bool(yolo_person_too_close())
            except Exception:
                too_close = False
            now = time.time()
            if now - last_log > 2.0:
                state = ("person too close" if (see_person and too_close)
                         else "tracking" if see_person
                         else "no person")
                print(f"  [FollowMe] {state}")
                last_log = now
            if see_person and not too_close:
                move_step("forward", FOLLOW_ME_STEP_SEC)
            else:
                gradual_stop()
                time.sleep(FOLLOW_ME_POLL_SEC)
        gradual_stop()
        print("  [FollowMe] done")
        return True
    # ── WALK TO TARGET (spatial planner) ─────────────────────────────────
    # LLaVA-grounded approach to an arbitrary visible object. This is a
    # best-effort planner — accuracy depends entirely on the VLM. Loop:
    #   1. Capture a frame, ask LLaVA: "Is <target> visible? bearing
    #      (left/center/right)? distance (near/medium/far)?"
    #   2. If not visible: scan left/right small turn, retry up to N times.
    #   3. If visible: turn-toward (small or wide based on bearing) then
    #      walk forward for WALK_TO_STEP_SEC.
    #   4. Done when distance == 'near' OR motion_abort OR timeout.
    # All motion goes through move_step / execute_action which already
    # poll motion_abort and motion_pause, so 'stop'/'pause' work normally.
    m = _RE_WALK_TO.match(cmd)
    if m:
        target = m.group(1).strip().rstrip(".!?,").strip()
        if not target:
            return True
        # If the target is a known place name, defer to place_goto —
        # the user probably meant the saved place, not the visible
        # object with that name. (Belt-and-braces with _RE_GOTO above.)
        if mem and target.lower() in {p.lower() for p in (mem.list_places() or [])}:
            place_goto(target)
            return True
        from Brain.executor import move_step, execute_action
        print(f"  [WalkTo] target = {target!r}; max {WALK_TO_MAX_SEC:.0f}s")
        deadline = time.time() + WALK_TO_MAX_SEC
        scan_attempts_left = 4   # how many turns before giving up if unseen
        last_replan = 0.0
        # Kill-switch on the LLaVA retry loop. Without this, an unresponsive
        # Ollama (server crashed / model OOM / network wedged) makes the
        # planner grind for the full WALK_TO_MAX_SEC, hammering with retries
        # every 0.5 s and producing nothing but noise. Bail after a small
        # consecutive-timeout count so the user gets a fast 'unable to plan'
        # rather than a 45 s silent wait. Field-tested cap.
        consecutive_llava_timeouts = 0
        LLAVA_TIMEOUT_GIVE_UP = 3
        while time.time() < deadline:
            if motion_abort.is_set():
                print("  [WalkTo] motion_abort set — ending")
                gradual_stop()
                break
            # Re-plan via LLaVA at each cycle. Caching one query per
            # WALK_TO_REPLAN_SEC keeps token cost in check.
            now = time.time()
            if now - last_replan < WALK_TO_REPLAN_SEC and last_replan > 0:
                time.sleep(0.1)
                continue
            last_replan = now
            img_b64 = get_frame()
            if not img_b64:
                print("  [WalkTo] no camera frame — pausing")
                time.sleep(0.5)
                continue
            prompt = (
                f"You are guiding a robot toward the {target!r}. Look at "
                f"the image. Answer ONLY in JSON, no other text:\n"
                f"{{\"visible\": true|false, \"bearing\": \"left|center|right\", "
                f"\"distance\": \"near|medium|far\", \"reason\": \"<brief>\"}}\n"
                f"- 'visible' = whether the {target!r} is in the frame\n"
                f"- 'bearing' = which side of the frame it's on\n"
                f"- 'distance' = rough estimate based on apparent size\n"
                f"- 'near' means within ~1 metre (almost stopping distance)"
            )
            # Wrap LLaVA in a thread-timeout: ollama-python's default
            # HTTP timeout is ~5 minutes, which would let a stalled VLM
            # block the entire motion worker far longer than the watchdog
            # would tolerate. WALK_TO_LLAVA_TIMEOUT_SEC = 15s gives the
            # VLM enough room to answer at idle while keeping abort
            # responsive (next iteration re-checks motion_abort).
            if motion_abort.is_set(): break
            raw = _call_llava_with_timeout(prompt, img_b64, num_predict=120)
            if raw is None:
                consecutive_llava_timeouts += 1
                print(f"  [WalkTo] LLaVA timeout/error "
                      f"({consecutive_llava_timeouts}/{LLAVA_TIMEOUT_GIVE_UP}) "
                      f"— retrying")
                if consecutive_llava_timeouts >= LLAVA_TIMEOUT_GIVE_UP:
                    print(f"  [WalkTo] LLaVA unresponsive after "
                          f"{LLAVA_TIMEOUT_GIVE_UP} timeouts — giving up. "
                          f"Check Ollama (curl http://localhost:11434/api/version).")
                    gradual_stop()
                    return True
                time.sleep(0.5)
                continue
            consecutive_llava_timeouts = 0
            obs = parse_json(raw) or {}
            visible = bool(obs.get("visible"))
            bearing = (obs.get("bearing") or "").lower()
            distance = (obs.get("distance") or "").lower()
            reason = obs.get("reason", "")
            print(f"  [WalkTo] visible={visible} bearing={bearing} "
                  f"distance={distance} ({reason[:60]})")
            if motion_abort.is_set(): break
            if not visible:
                if scan_attempts_left <= 0:
                    print(f"  [WalkTo] {target!r} not found after scans — "
                          "giving up")
                    break
                scan_attempts_left -= 1
                # Small turn left to widen the search arc; next iteration
                # the LLaVA call will see the new view.
                execute_action("left", WALK_TO_TURN_SHORT_SEC)
                continue
            scan_attempts_left = 4  # reset once we see it again
            # Reached: stop and report
            if distance == "near":
                gradual_stop()
                print(f"  [WalkTo] arrived at {target!r}")
                return True
            # Steer toward target
            if bearing == "left":
                turn_sec = (WALK_TO_TURN_WIDE_SEC if distance == "far"
                            else WALK_TO_TURN_SHORT_SEC)
                execute_action("left", turn_sec)
            elif bearing == "right":
                turn_sec = (WALK_TO_TURN_WIDE_SEC if distance == "far"
                            else WALK_TO_TURN_SHORT_SEC)
                execute_action("right", turn_sec)
            # bearing == 'center' or unknown → just walk forward
            if motion_abort.is_set(): break
            # Forward burst
            move_step("forward", WALK_TO_STEP_SEC)
        gradual_stop()
        return True
    # ── NAMED PATROL ROUTE ───────────────────────────────────────────────
    m = _RE_PATROL_RT.match(cmd)
    if m:
@ -234,17 +710,11 @@ def try_local_command(cmd: str) -> bool:
            for phrase, (reverse_dir, _) in move_words.items():
                if phrase in cl:
                    print(f"  Undoing: '{c}' → reversing with '{reverse_dir}'")
-                    dur, t0 = 2.0, time.time()
+                    vx, vy, vyaw = MOVE_MAP[reverse_dir]
-                    if reverse_dir in ("left", "right"):
+                    t0 = time.time()
-                        vyaw = 0.3 if reverse_dir == "left" else -0.3
+                    while time.time() - t0 < STEP_DURATION_SEC:
-                        while time.time() - t0 < dur:
+                        send_vel(vx=vx, vy=vy, vyaw=vyaw)
-                            send_vel(vyaw=vyaw)
+                        time.sleep(0.05)
                            time.sleep(0.05)
                    else:
                        vx = 0.3 if reverse_dir == "forward" else -0.2
                        while time.time() - t0 < dur:
                            send_vel(vx=vx)
                            time.sleep(0.05)
                    gradual_stop()
                    return True
        print("  No movement command to undo")
--- a/Brain/executor.py
+++ b/Brain/executor.py
@ -6,6 +6,7 @@ import time
 import threading
 from API.zmq_api import send_vel, gradual_stop, MOVE_MAP, STEP_PAUSE
 from API.arm_api import ALL_ARM_NAMES, do_arm
 from Core.motion_state import motion_abort, wait_while_paused
 def _obstacle_check():
@ -29,6 +30,18 @@ def execute_action(move: str, duration: float):
        vx, vy, vyaw = MOVE_MAP[move]
        t0 = time.time()
        while time.time() - t0 < duration:
            if motion_abort.is_set():
                gradual_stop()
                print("  [executor] motion_abort set — stopping")
                return
            # Honour a soft pause: hold zero velocity and wait until
            # the user resumes (or aborts). wait_while_paused returns
            # immediately if not paused. Re-check abort right after.
            if motion_pause_check():
                send_vel(0.0, 0.0, 0.0)
                wait_while_paused()
                if motion_abort.is_set():
                    gradual_stop(); return
            if _obstacle_check():
                gradual_stop()
                print("  [Safety] LiDAR obstacle — stopping")
@ -47,6 +60,15 @@ def move_step(move: str, duration: float):
        vx, vy, vyaw = MOVE_MAP[move]
        t0 = time.time()
        while time.time() - t0 < duration:
            if motion_abort.is_set():
                send_vel(0.0, 0.0, 0.0)
                print("  [executor] motion_abort set — pausing step")
                return
            if motion_pause_check():
                send_vel(0.0, 0.0, 0.0)
                wait_while_paused()
                if motion_abort.is_set():
                    send_vel(0.0, 0.0, 0.0); return
            if _obstacle_check():
                send_vel(0.0, 0.0, 0.0)
                print("  [Safety] LiDAR obstacle — pausing step")
@ -57,6 +79,13 @@ def move_step(move: str, duration: float):
    time.sleep(0.1)
 def motion_pause_check() -> bool:
    """Returns True if a soft-pause is active. Tiny indirection so we
    don't have to import motion_pause directly into every motion site."""
    from Core.motion_state import motion_pause
    return motion_pause.is_set()
 def merge_actions(actions: list) -> list:
    """Merge consecutive same-direction steps into one smooth movement."""
    if not actions:
--- a/Brain/marcus_brain.py
+++ b/Brain/marcus_brain.py
@ -18,7 +18,7 @@ if PROJECT_DIR not in sys.path:
    sys.path.insert(0, PROJECT_DIR)
 from API.zmq_api import init_zmq, send_vel, gradual_stop, send_cmd
-from API.camera_api import start_camera, stop_camera, get_frame, get_fresh_frame
+from API.camera_api import start_camera, stop_camera, get_frame
 from API.yolo_api import (
    init_yolo, yolo_summary, yolo_fps,
    yolo_all_classes, yolo_closest, yolo_sees,
@ -155,19 +155,28 @@ def init_brain():
    _log("Brain initialized", "info", "brain")
-    # Report VLM config only — no warmup thread. This matches Marcus_v1's
+    # Synchronous warmup — same shape as Marcus_v1's marcus_llava.py. The
-    # concept: the first real VLM command performs the cold-load synchronously
+    # Python process blocks here for ~60-90 s on the first run so the first
-    # inside ollama.chat(), which takes ~60-90 s once on the Jetson and is
+    # real user command doesn't pay the cold-load. One attempt, no retry, no
-    # fast for every subsequent call. A background warmup thread races with
+    # thread. By the time the dashboard prints, Qwen is resident in iGPU.
-    # YOLO/camera/audio/Holosoma startup and with user input, and on a
+    from API.llava_api import VLM_ENABLED, OLLAMA_HOST, _client as _llava_client
    # 16 GB unified-memory board that race is what triggers the OOM killer.
    from API.llava_api import VLM_ENABLED, OLLAMA_HOST
    if not VLM_ENABLED:
        print("  [VLM] disabled by config — safe mode (no Ollama load)")
    else:
        host_short = OLLAMA_HOST.replace("http://", "")
-        print(f"  [VLM] target: {host_short}  ({OLLAMA_MODEL})  "
+        print(f"  [VLM] target: {host_short}  ({OLLAMA_MODEL})")
-              f"— first vision command will cold-load (~60-90 s)")
+        print("  [VLM] Warming up... (loading into iGPU — may take 60-90 s on cold start)")
        try:
            _llava_client.chat(
                model=OLLAMA_MODEL,
                messages=[{"role": "user", "content": "hi"}],
                options={"temperature": 0.0, "num_predict": 5,
                         "num_batch": _cfg.get("num_batch", 16),
                         "num_ctx":   _cfg.get("num_ctx",   1024)},
            )
            print("  [VLM] warm — first command will be fast")
        except Exception as _e:
            print(f"  [VLM] warmup failed ({_e}) — first command may cold-load")
 # Global voice references
@ -177,10 +186,12 @@ _voice_module = None
 def _init_voice():
    """
-    Initialize the voice subsystem: G1 built-in mic + Whisper STT + G1
+    Initialize the voice subsystem: G1 built-in mic + Gemini Live S2S
-    built-in TtsMaker for replies. Every transcribed command flows through
+    (subprocess in gemini_sdk env, runs Voice/gemini_runner.py). Every
-    process_command(), and the resulting `speak` string is sent to the G1
+    transcribed user command flows through process_command(); the brain
-    speaker.
+    decides motion side-effects, but the verbal reply itself comes from
    Gemini's voice through the G1 speaker (NOT TtsMaker — TtsMaker stays
    for non-voice subsystem callers but isn't invoked from this callback).
    """
    global _audio_api, _voice_module
    try:
@ -189,13 +200,42 @@ def _init_voice():
        _audio_api = AudioAPI()
        # Heuristic filter for unusable Gemini transcripts. Gemini emits
        # `<noise>` literally when audio is non-speech and `.` for empty
        # bursts. These shouldn't pollute the terminal or trigger motion.
        # Bilingual aware: short Arabic motion commands like "اجلس" /
        # "قف" / "تعال" are LEGIT — only reject short non-ASCII-non-Arabic
        # snippets (Korean / Thai / etc — echo/distortion garbage).
        def _is_supported_lang_char(c: str) -> bool:
            # ASCII letters/digits/punct OR Arabic block (U+0600..U+06FF)
            # OR whitespace.
            return c.isascii() or "؀" <= c <= "ۿ" or c.isspace()
        def _is_garbage_transcript(t: str) -> bool:
            stripped = t.strip().strip(".!?,").strip()
            if not stripped:
                return True
            low = stripped.lower()
            if low in ("<noise>", "noise", "yeah", "ok", "okay", "uh", "um", "hmm", "mm"):
                return True
            # Bare single character (often "." → ".") or all punctuation.
            if all(not c.isalnum() for c in stripped):
                return True
            # Reject only when text is BOTH short AND uses chars from
            # neither English nor Arabic — those are echo/distortion
            # mistranscriptions (Korean/Thai/etc fragments).
            if len(stripped) <= 6:
                if not all(_is_supported_lang_char(c) for c in stripped):
                    return True
            return False
        def _on_command(text, lang):
            text = (text or "").strip()
            if not text:
                return
-            # One clean, distinctive line so the operator can see exactly
+            if _is_garbage_transcript(text):
-            # what Whisper transcribed before the brain reacts. Everything
+                # Skip silently — neither show nor dispatch to brain.
-            # else from the voice subsystem is file-only.
+                return
            print(f'  [Sanad] heard: "{text}"')
            try:
                result = process_command(text)
@ -204,9 +244,16 @@ def _init_voice():
                return
            if isinstance(result, dict):
                sp = (result.get("speak") or "").strip()
-                if sp and _audio_api:
+                act = (result.get("action") or "").strip()
-                    _audio_api.speak(sp)
+                # In the Gemini S2S architecture Gemini owns the voice; we
-            # Redraw the Command: prompt that our print clobbered
+                # do NOT call audio_api.speak(sp) here (would collide with
                # Gemini's own audio reply). Just show the operator what
                # the brain decided so they can correlate motion with
                # Gemini's spoken acknowledgement.
                if act and act not in ("NONE", "TALK"):
                    print(f"  [Sanad] doing: {act}{f' — {sp[:80]}' if sp else ''}")
                elif sp:
                    print(f"  [Brain] {sp[:120]}")
            print("Command: ", end="", flush=True)
        _voice_module = VoiceModule(_audio_api, on_command=_on_command)
@ -299,14 +346,131 @@ def process_command(cmd: str) -> dict:
        log_cmd(cmd, response)
        return {"type": "greeting", "speak": response, "action": "GREETING", "elapsed": 0}
-    # ── "Come to me" shortcut ────────────────────────────────────────────
+    # ── "Come to me" — smart YOLO-tracked approach ──────────────────────
    # Old behaviour: walk forward 2s blindly. New: if YOLO is loaded,
    # locate the person in the camera frame, turn toward them if off-
    # centre, walk forward in short bursts, stop when 'arrived' (person
    # fills enough of the frame to be 'right in front'). Bounded by
    # COME_TO_ME_MAX_SEC; falls back to a 2s blind walk if YOLO isn't
    # available so the canonical still does *something*.
    #
    # Arrival threshold: yolo_person_too_close(threshold=...) returns
    # True if the person's bbox size_ratio (bbox_area / frame_area)
    # exceeds the threshold. Defaults are calibrated for safety
    # (0.25 = ~1m away, used by patrol). For come_here we want the
    # robot to come MUCH closer — arm's length, ~0.5m — which is
    # roughly size_ratio > 0.32 (lookup in marcus_yolo.distance_estimate:
    # 'very close' starts at 0.30). 0.32 is a safe 'arrived' marker;
    # any higher and the robot might bump into the user.
    if re.match(r"^(?:come(?:\s+back)?(?:\s+to\s+me)?|come\s+here|get\s+closer|approach|move\s+closer)\s*[!.]*$", cmd, re.IGNORECASE):
-        execute_action("forward", 2.0)
+        from Brain.executor import move_step
        try:
            from API.yolo_api import yolo_sees, yolo_person_too_close, yolo_closest
            from API.yolo_api import _stub_sees as _stub_sees_ref  # type: ignore
            yolo_active = (yolo_sees is not _stub_sees_ref)
        except Exception:
            yolo_active = False
        COME_TO_ME_MAX_SEC = 25.0
        COME_TO_ME_STEP_SEC = 0.6
        COME_TO_ME_TURN_SEC = 0.3   # small correction (~14° at default vyaw)
        COME_TO_ME_POLL_SEC = 0.3
        # Arrival threshold: 0.32 size_ratio ≈ ~0.5m / arm's length.
        # Patrol's 0.25 default is too lenient for come-to-me — it
        # would treat a 1m-away user as 'already arrived'.
        COME_TO_ME_ARRIVED_RATIO = 0.32
        if not yolo_active:
            # Fallback — same behaviour as before for hardware setups
            # without YOLO loaded.
            print("  [ComeToMe] YOLO not loaded — walking forward 2s")
            execute_action("forward", 2.0)
            resp = "Coming to you"
            print(f"Sanad: {resp}")
            add_to_history(cmd, resp)
            log_cmd(cmd, resp)
            return {"type": "move", "speak": resp, "action": "FORWARD 2.0s", "elapsed": 2.0}
        print(f"  [ComeToMe] tracking person up to {COME_TO_ME_MAX_SEC:.0f}s — say 'stop' to end")
        deadline = time.time() + COME_TO_ME_MAX_SEC
        last_log = 0.0
        scan_attempts_left = 6   # how many turns before giving up if person never seen
        while time.time() < deadline:
            try:
                from Core.motion_state import motion_abort
                if motion_abort.is_set():
                    print("  [ComeToMe] motion_abort — stopping")
                    break
            except Exception:
                pass
            try:
                seen = bool(yolo_sees("person"))
            except Exception:
                seen = False
            try:
                # IMPORTANT: pass an explicit 'arrived' threshold (0.32)
                # — without it, the default 0.25 (safety patrol's value)
                # would call the user 'too close' when they're a metre
                # away, and the robot would stop without walking. We
                # want it to come arm's length close.
                arrived = bool(yolo_person_too_close(
                    threshold=COME_TO_ME_ARRIVED_RATIO,
                ))
            except Exception:
                arrived = False
            now = time.time()
            if now - last_log > 1.5:
                state = ("arrived (close to person)" if (seen and arrived)
                         else "tracking person" if seen
                         else "scanning for person")
                print(f"  [ComeToMe] {state}")
                last_log = now
            if seen and arrived:
                # Made it — robot is close enough to stop in front
                print("  [ComeToMe] arrived in front of person")
                break
            if seen and not arrived:
                # Person is in view but not yet arm's length close —
                # centre on them and walk forward in a burst.
                bearing = "center"
                try:
                    det = yolo_closest("person")
                    if det and getattr(det, "position", None):
                        # position is something like 'left' / 'center' / 'right'
                        bearing = str(det.position).lower()
                except Exception:
                    pass
                if bearing == "left":
                    execute_action("left", COME_TO_ME_TURN_SEC)
                elif bearing == "right":
                    execute_action("right", COME_TO_ME_TURN_SEC)
                # forward burst
                move_step("forward", COME_TO_ME_STEP_SEC)
                scan_attempts_left = 6  # reset
                continue
            # No person — try a small scan turn, then check again
            if scan_attempts_left <= 0:
                print("  [ComeToMe] no person found after scans — giving up")
                break
            scan_attempts_left -= 1
            execute_action("left", COME_TO_ME_TURN_SEC)
            time.sleep(COME_TO_ME_POLL_SEC)
        try:
            from API.zmq_api import gradual_stop as _gs
            _gs()
        except Exception:
            pass
        elapsed = time.time() - t0
        resp = "Coming to you"
        print(f"Sanad: {resp}")
        add_to_history(cmd, resp)
        log_cmd(cmd, resp)
-        return {"type": "move", "speak": resp, "action": "FORWARD 2.0s", "elapsed": 2.0}
+        return {"type": "move", "speak": resp, "action": "COME_TO_ME", "elapsed": elapsed}
    # ── Multi-step compound ──────────────────────────────────────────────
    _multi = re.match(
@ -361,7 +525,7 @@ def _handle_search(cmd):
 def _handle_talk(cmd):
    print("Thinking...")
    try:
-        img = get_fresh_frame()
+        img = get_frame()
        facts_str = ""
        try:
            from API.llava_api import _facts
@ -382,10 +546,7 @@ def _handle_talk(cmd):
 def _handle_llava(cmd):
    print("Thinking...")
    t0 = time.time()
-    # get_fresh_frame() blocks up to 1 s waiting for a frame newer than
+    img = get_frame()
    # 300 ms old. Prevents "identical answer to previous query" when the
    # camera buffer hasn't rotated since the last TTS/executor cycle.
    img = get_fresh_frame()
    # Poll up to 500 ms in 50 ms slices instead of blocking a full second.
    # Returns the moment a frame is available — most drops recover in <100 ms.
@ -576,7 +737,23 @@ def run_terminal():
                print('  Pick the ID that sounded English and set it in')
                print('  Config/config_Voice.json :: tts.builtin_speaker_id')
                continue
            # Structured motion log for terminal-typed commands. The
            # voice path already logs through Voice/marcus_voice's
            # worker; here we cover the OTHER entry-point so every
            # motion (text or voice) shows up in logs/motion.log with
            # parsed direction/magnitude/unit + actual elapsed.
            try:
                from Core.motion_log import log_motion as _mlog
                _mlog("start", cmd, source="text")
                _t0 = time.time()
            except Exception:
                _t0 = time.time()
            result = process_command(cmd)
            try:
                _mlog("complete", cmd, source="text",
                      actual_sec=time.time() - _t0)
            except Exception:
                pass
            sp = result.get("speak", "") if isinstance(result, dict) else ""
            if sp and _audio_api:
                _audio_api.speak(sp)
--- a/Config/config_Brain.json
+++ b/Config/config_Brain.json
@ -2,7 +2,7 @@
  "ollama_model": "qwen2.5vl:3b",
  "ollama_host":  "http://127.0.0.1:11434",
  "max_history": 6,
-  "num_batch": 32,
+  "num_batch": 16,
  "num_ctx": 1024,
  "subsystems": {
    "vlm":        true,
@ -12,9 +12,9 @@
    "imgsearch":  false,
    "autonomous": true
  },
-  "num_predict_main": 50,
+  "num_predict_main": 60,
-  "num_predict_goal": 40,
+  "num_predict_goal": 80,
-  "num_predict_patrol": 50,
+  "num_predict_patrol": 80,
-  "num_predict_talk": 50,
+  "num_predict_talk": 60,
  "num_predict_verify": 10
 }
--- a/Config/config_Navigation.json
+++ b/Config/config_Navigation.json
@ -3,6 +3,8 @@
  "min_steps_before_check": 3,
  "scan_interval_s": 0.4,
  "rotation_speed": 0.3,
  "_step_duration_comment": "Duration of one 'step' for bare directional commands ('go back', 'turn right', etc.). 2.0s at move_map velocities ≈ 60 cm forward, 40 cm back, 34° turn. Change here and every regex fast-path in command_parser.py uses the new value.",
  "step_duration_sec": 2.0,
  "move_map": {
    "forward":  [0.3,  0.0,  0.0],
    "backward": [-0.2, 0.0,  0.0],
--- a/Config/config_Voice.json
+++ b/Config/config_Voice.json
--- a/Config/instruction.json
+++ b/Config/instruction.json
@ -0,0 +1,912 @@
 {
  "_description": "Bilingual voice command instructions — single source of truth for the voice dispatch tables. Loaded by Voice/marcus_voice.py at module level. Adding a new motion command, a new accent variant, a new Arabic phrasing, or fixing a misheard wake-word transcription is a JSON-only edit; no Python change required. Arabic phrase tables cover MSA (Fusha) plus Gulf/Khaleeji, Egyptian (Masri), Levantine (Shami), Iraqi, and Maghrebi dialects.",
  "_format": "wake_words = whole-word substrings the dispatch gate looks for in the user's transcript. Any match (English or Arabic) opens motion for the current turn. actions = per-motion phrase tables. Each action has a `canonical` string (what marcus_brain receives), `user_phrases` (what the user might SAY when asking for the motion — used for fuzzy-match + Arabic-to-English translation after wake-word strip), and `bot_phrases` (what Gemini might SPEAK when acknowledging — used by the bot-side dispatcher to fire motion off Gemini's own confirmation). All matching is substring-based; English entries are matched case-insensitively, Arabic entries match as-is. Keep the canonical string consistent with Brain/command_parser.py vocabulary.",
  "wake_words": {
    "_comment": "All variants of the robot's name 'Sanad' — the gate that authorises motion. Add new mishearings here when you see them in logs/transcript.log under HEARD lines that should have triggered motion but didn't.",
    "english": [
      "sanad", "sannad", "sennad", "sunnad", "sinnad", "sonnad",
      "sanat", "sunnat", "sonnat", "sinnat", "sennat",
      "sanid", "sanud", "saned", "sanod", "sanaad",
      "senad", "sinad", "sonad", "sunad",
      "sanah", "sanath", "sanadh", "sonadh",
      "samad", "somad", "sumad",
      "thanad", "zanad",
      "sa nad", "san ad", "san odd", "san add"
    ],
    "arabic": [
      "سند", "سنّاد", "ساند", "سنود", "سنَد", "سنّد", "سَند", "سنّود", "ساندي", "سنادي",
      "يا سند", "يا سنّاد", "يا ساند", "يا سَند", "يا سنود", "ياسند", "ياسنود",
      "السند", "السنّاد"
    ]
  },
  "actions": {
    "turn_right": {
      "canonical": "turn right",
      "user_phrases": {
        "english": ["turn right", "rotate right", "spin right", "go right", "face right", "right"],
        "arabic":  [
          "استدر يميناً", "استدر يمينا", "ادر يميناً", "ادر يمينا", "لف يمين", "لف يميناً", "يمين",
          "دور يمين", "دور لليمين", "دور على اليمين", "دور ع اليمين",
          "خش يمين", "خش ع اليمين", "حوّد يمين", "حود يمين", "حوّد لليمين",
          "دير يمين", "دير لليمين", "دير على ليمن", "دير ع اليمين",
          "لف لليمين", "لف على اليمين", "لف عاليمين", "لف ع اليمين",
          "روح يمين", "اتجه يمين", "اتجه لليمين", "تجه يمين"
        ]
      },
      "bot_phrases": {
        "english": ["turning right"],
        "arabic":  [
          "أستدير يميناً", "أستدير يمينا", "استدير يميناً", "استدير يمينا", "ألف يميناً", "ألف يمينا",
          "بلف يمين", "بدور يمين", "أدور يمين", "أحوّد يمين", "أخش يمين"
        ]
      }
    },
    "turn_left": {
      "canonical": "turn left",
      "user_phrases": {
        "english": ["turn left", "rotate left", "spin left", "go left", "face left", "left"],
        "arabic":  [
          "استدر يساراً", "استدر يسارا", "ادر يساراً", "ادر يسارا", "لف يسار", "لف يساراً", "يسار", "شمال",
          "دور يسار", "دور شمال", "دور لليسار", "دور على اليسار", "دور ع الشمال",
          "خش يسار", "خش شمال", "خش ع الشمال", "حوّد شمال", "حود شمال", "حوّد يسار", "حوّد لليسار",
          "دير يسار", "دير شمال", "دير لليسار", "دير على ليسار", "دير ع الشمال",
          "لف لليسار", "لف على اليسار", "لف عاشمال", "لف ع الشمال",
          "روح يسار", "روح شمال", "اتجه يسار", "اتجه شمال", "اتجه لليسار", "تجه شمال"
        ]
      },
      "bot_phrases": {
        "english": ["turning left"],
        "arabic":  [
          "أستدير يساراً", "أستدير يسارا", "استدير يساراً", "استدير يسارا", "ألف يساراً", "ألف يسارا",
          "بلف شمال", "بدور شمال", "أدور شمال", "أحوّد شمال", "أخش شمال"
        ]
      }
    },
    "turn_around": {
      "canonical": "turn around",
      "user_phrases": {
        "english": ["turn around", "turn back", "spin around", "about face", "face the other way", "spin in place", "spin yourself", "turn yourself around"],
        "arabic":  [
          "استدر للخلف", "استدر إلى الوراء", "اتجه للخلف", "ادر للخلف", "ارجع وجهك",
          "لف ورا", "لف للورا", "لف للخلف", "دور ورا", "دور للورا", "دور للخلف",
          "اتلف", "اتلف ورا", "دير وراك", "دير للورا", "استدر ورا",
          "اقلب وجهك", "اقلب", "اعكس اتجاهك", "غيّر اتجاهك",
          "لف حول نفسك", "لف حوالين نفسك", "لف على نفسك",
          "دور حول نفسك", "دور حوالين نفسك", "دور على نفسك",
          "استدر حول نفسك", "استدر حوالين نفسك",
          "حول نفسك", "حوالين نفسك",
          "لف بنفسك", "دور بنفسك"
        ]
      },
      "bot_phrases": {
        "english": ["turning around"],
        "arabic":  [
          "أستدير للخلف", "أستدير إلى الوراء", "استدير للخلف",
          "بلف ورا", "بدور للورا", "أدور للخلف",
          "أستدير حول نفسي", "ألف حول نفسي", "أدور حول نفسي",
          "بلف حول نفسي", "بدور حول نفسي"
        ]
      }
    },
    "move_forward": {
      "canonical": "move forward",
      "user_phrases": {
        "english": ["move forward", "go forward", "walk forward", "step forward", "forward", "keep going", "walk ahead", "move ahead"],
        "arabic":  [
          "تحرك للأمام", "تحرك إلى الأمام", "اذهب للأمام", "امش للأمام", "تقدم", "للأمام", "أمام",
          "روح قدام", "روح لقدام", "روح كدام",
          "امشي قدام", "امشي لقدام", "امش قدام",
          "اتقدم", "تقدم لقدام", "يلا قدام", "يالله قدام",
          "فوت", "فوت لقدام", "فوت قدام",
          "سير لقدام", "سير قدام", "اسير قدام",
          "خش قدام", "تعال قدام", "للقدام", "قدام"
        ]
      },
      "bot_phrases": {
        "_": "Direction-anchored only. We do NOT add destination-phrased needles like 'walking towards X' / 'أمشي نحو X' even though Gemini occasionally says them — those false-fire when Gemini DESCRIBES a third party ('I see a person walking towards us'). Persona RULE 4b forbids destination phrasing in motion confirmations; trust the prompt. If a real motion phrase ever slips through and we miss it, add the EXACT slip to this list — not generic destination patterns.",
        "english": [
          "moving forward", "walking forward", "stepping forward",
          "going forward", "going ahead", "heading forward", "walking ahead"
        ],
        "arabic":  [
          "أتحرك للأمام", "أتحرك إلى الأمام", "أتقدم", "أمشي للأمام", "أذهب للأمام",
          "بمشي قدام", "براح قدام", "أروح قدام", "بتقدم", "بفوت لقدام"
        ]
      }
    },
    "move_backward": {
      "canonical": "move backward",
      "user_phrases": {
        "english": ["move back", "move backward", "go back", "go backward", "walk back", "walk backward", "step back", "backward", "back", "reverse"],
        "arabic":  [
          "تحرك للخلف", "تحرك إلى الخلف", "اذهب للخلف", "امش للخلف", "ارجع", "ارجع للخلف", "للخلف", "خلف",
          "روح ورا", "روح للورا", "ارجع ورا", "ارجع للورا",
          "امشي ورا", "امشي للورا", "امش ورا",
          "تأخر", "تراجع", "ورا", "للورا",
          "رجع للخلف", "ارجع لورا", "رجع لورا",
          "سير للورا", "سير ورا"
        ]
      },
      "bot_phrases": {
        "english": ["moving backward", "moving back", "walking backward", "walking back", "stepping back", "going back"],
        "arabic":  [
          "أتحرك للخلف", "أتحرك إلى الخلف", "أرجع", "أمشي للخلف", "أعود للخلف",
          "بمشي ورا", "براح ورا", "أرجع ورا", "بتراجع"
        ]
      }
    },
    "move_right": {
      "canonical": "move right",
      "user_phrases": {
        "english": ["step right", "move right", "slide right", "strafe right", "sidestep right"],
        "arabic":  [
          "تحرك يميناً", "تحرك يمينا", "خطوة يمين", "اتجه يميناً",
          "زحلق يمين", "زحلق لليمين", "اتزحلق يمين",
          "خطوة لليمين", "خطوة ع اليمين", "خطوة على اليمين",
          "حرك يمين", "حرّك يمين", "اخطو يمين", "اخطو لليمين",
          "زح يمين", "زح لليمين"
        ]
      },
      "bot_phrases": {
        "english": ["moving right", "stepping right", "sliding right"],
        "arabic":  [
          "أتحرك يميناً", "أتحرك يمينا", "أخطو يميناً",
          "بزحلق يمين", "أزحلق يمين", "بخطو يمين"
        ]
      }
    },
    "move_left": {
      "canonical": "move left",
      "user_phrases": {
        "english": ["step left", "move left", "slide left", "strafe left", "sidestep left"],
        "arabic":  [
          "تحرك يساراً", "تحرك يسارا", "خطوة يسار", "اتجه يساراً",
          "زحلق يسار", "زحلق شمال", "زحلق لليسار", "اتزحلق شمال",
          "خطوة شمال", "خطوة لليسار", "خطوة ع الشمال", "خطوة على اليسار",
          "حرك يسار", "حرك شمال", "حرّك شمال", "اخطو يسار", "اخطو شمال", "اخطو لليسار",
          "زح يسار", "زح شمال"
        ]
      },
      "bot_phrases": {
        "english": ["moving left", "stepping left", "sliding left"],
        "arabic":  [
          "أتحرك يساراً", "أتحرك يسارا", "أخطو يساراً",
          "بزحلق شمال", "أزحلق شمال", "بخطو شمال"
        ]
      }
    },
    "stop": {
      "canonical": "stop",
      "user_phrases": {
        "english": ["stop", "halt", "wait", "pause", "freeze", "hold", "stop moving", "stand still", "don't move"],
        "arabic":  [
          "توقف", "قف مكانك", "اوقف", "انتظر", "اثبت", "لا تتحرك",
          "وقّف", "وقف", "وگف", "بطّل", "بطل حركة", "بطل",
          "خلاص", "كفاية", "يكفي",
          "استنى", "استنى شوية", "اصبر", "صبر", "اصبر شوية",
          "تو", "ثبات", "اثبت مكانك", "ما تتحرك", "ما تمشي",
          "هدّي", "هدّى", "روّق"
        ]
      },
      "bot_phrases": {
        "_": "Removed 'أنتظر' from this list — it's also a pause_motion bot_phrase ('I wait' is more naturally a pause than a stop). Keeping it here would cause Gemini saying 'أنتظر' to fire BOTH stop and pause_motion in the same chunk.",
        "english": ["stopping", "halting", "holding"],
        "arabic":  [
          "أتوقف", "توقفت",
          "بوقّف", "وقّفت", "بستنى", "أصبر", "بطّلت"
        ]
      }
    },
    "sit_down": {
      "canonical": "sit down",
      "user_phrases": {
        "english": ["sit down", "sit", "take a seat", "have a seat"],
        "arabic":  [
          "اجلس", "ارتح", "اقعد",
          "اقعد على الأرض", "اقعد ع الأرض", "اجلس على الأرض",
          "اگعد", "گلس", "تفضل اقعد", "ارتاح",
          "خد راحتك واقعد", "اقعد يا سند"
        ]
      },
      "bot_phrases": {
        "english": ["sitting down", "sitting"],
        "arabic":  [
          "أجلس", "أقعد", "جلست",
          "بقعد", "قعدت", "بجلس", "أرتاح"
        ]
      }
    },
    "stand_up": {
      "canonical": "stand up",
      "user_phrases": {
        "english": ["stand up", "stand", "get up", "rise"],
        "arabic":  [
          "قف", "انهض", "ارفع نفسك",
          "قوم", "گوم", "نوض", "نض",
          "قوم على رجلك", "قوم على رجليك", "انهض من مكانك",
          "اوقف على رجلك"
        ]
      },
      "bot_phrases": {
        "english": ["standing up", "getting up", "rising"],
        "arabic":  [
          "أقف", "أنهض", "وقفت",
          "بقوم", "قمت", "نضت", "بنهض"
        ]
      }
    },
    "wave_hello": {
      "canonical": "wave hello",
      "user_phrases": {
        "english": ["wave hello", "wave", "say hi", "greet", "wave to me", "wave at me"],
        "arabic":  [
          "لوّح", "لوح", "لوّح بيدك", "حيّ", "سلّم",
          "لوّح لي", "لوّحلي", "لوّح بيدك لي",
          "سلّم بيدك", "سلّم علي", "حيّيني", "حيي",
          "هز يدك", "هز إيدك", "هزّ ايدك",
          "قول هاي", "قول مرحبا", "قول السلام",
          "حييني", "سلم علي"
        ]
      },
      "bot_phrases": {
        "english": ["waving hello", "waving", "saying hi", "greeting"],
        "arabic":  [
          "ألوّح", "ألوح", "ألوّح بيدي", "أحيّ", "أسلّم",
          "بلوّح", "بسلّم", "بهز إيدي", "أهز يدي"
        ]
      }
    },
    "raise_arm": {
      "canonical": "raise arm",
      "user_phrases": {
        "english": ["raise arm", "raise your arm", "lift your arm", "arm up", "hand up"],
        "arabic":  [
          "ارفع يدك", "ارفع ذراعك", "اليد للأعلى",
          "ارفع إيدك", "ارفع ايدك", "ارفع يديك",
          "طلّع يدك", "طلع يدك", "طلّع إيدك", "طلع إيدك",
          "علّي يدك", "علّي إيدك", "علي يدك",
          "يدك فوق", "إيدك فوق", "اليد فوق", "حط يدك فوق",
          "ارفع اليد", "ارفع ايديك"
        ]
      },
      "bot_phrases": {
        "english": ["raising arm", "raising my arm", "lifting my arm", "arm up"],
        "arabic":  [
          "أرفع يدي", "أرفع ذراعي",
          "برفع إيدي", "بطلّع إيدي", "بعلّي إيدي", "رفعت يدي"
        ]
      }
    },
    "lower_arm": {
      "canonical": "lower arm",
      "user_phrases": {
        "english": ["lower arm", "lower your arm", "drop your arm", "arm down", "hand down", "rest your arm"],
        "arabic":  [
          "اخفض يدك", "اخفض ذراعك", "اليد للأسفل", "نزل يدك",
          "نزّل يدك", "نزّل إيدك", "نزل إيدك", "نزّل ايدك",
          "حط يدك", "حط إيدك", "حط ايدك", "خلي يدك تحت",
          "طيح يدك", "طيّح يدك", "طيح إيدك",
          "يدك تحت", "إيدك تحت", "اليد تحت",
          "ارح يدك", "ريّح يدك", "ريّح إيدك"
        ]
      },
      "bot_phrases": {
        "english": ["lowering arm", "lowering my arm", "dropping my arm", "arm down"],
        "arabic":  [
          "أخفض يدي", "أخفض ذراعي", "أنزل يدي",
          "بنزّل إيدي", "نزّلت إيدي", "بريّح إيدي"
        ]
      }
    },
    "point": {
      "canonical": "point",
      "user_phrases": {
        "english": ["point", "point at it", "point to it", "point there"],
        "arabic":  [
          "اشر", "أشِر", "اشر إلى", "اشر هناك",
          "أشّر", "اشّر", "أشر بأصبعك", "أشر بصبعك", "اشر بصبعك",
          "شاور", "شاورلي", "أشاور", "شاور هناك",
          "ورّيني", "وريني", "ورجيني",
          "بصبعك", "حدد", "حدد بصبعك"
        ]
      },
      "bot_phrases": {
        "english": ["pointing"],
        "arabic":  [
          "أشير",
          "بشاور", "بأشر", "بشير", "أشّرت"
        ]
      }
    },
    "come_here": {
      "canonical": "come here",
      "user_phrases": {
        "english": ["come here", "come to me", "come closer", "approach", "get closer", "come over here", "come"],
        "arabic":  [
          "تعال", "تعال هنا", "تعال إليّ", "اقترب", "تقرب",
          "تعالى", "تعالى هنا", "تعالى لي", "تعالى عندي",
          "تعا", "تعا لهون", "تعا عندي",
          "أجي", "أرواح", "ارواحلي",
          "قرّب", "قرب", "قرّب لي", "قرّب مني", "قرب مني",
          "گرّب", "گرب لي",
          "يلا تعال", "يا الله تعال", "تعال جنبي", "خش جنبي",
          "تعال يا سند", "تعال بسرعة", "هلم"
        ]
      },
      "bot_phrases": {
        "_": "Expanded Arabic forms because Gemini sometimes emits non-canonical conjugations (e.g. 'أتعال' is grammatically wrong but phonetically what it produces for 'I come'). Persona Rule 5d gives Gemini correct example phrases; these extra needles catch slip cases like 'أتعال'/'بجي لك'.",
        "english": [
          "coming over", "coming to you", "approaching",
          "coming to find you", "coming your way",
          "i'm coming", "on my way to you", "heading to you"
        ],
        "arabic":  [
          "آتي إليك", "أتي إليك", "أتي", "آتي", "أتعال",
          "أنا قادم", "قادم إليك", "قادم لك", "أقترب منك",
          "أقترب", "أتقرّب", "بجي", "بجي لك", "جاي", "جاي لك",
          "بقرّب", "أقرّب منك", "رايح لك", "أتجه إليك"
        ]
      }
    },
    "follow_me": {
      "canonical": "follow me",
      "user_phrases": {
        "english": ["follow me", "come with me", "walk with me"],
        "arabic":  [
          "اتبعني", "تعال معي", "امش معي",
          "تعال ورايا", "تعال ورائي", "تعا ورايي",
          "تعال ويايا", "تعال وياي",
          "خش ورايا", "خليك ورايا",
          "يلا معي", "يلا معاي", "يا الله معي",
          "روح ويايا", "امشي معاي", "امشي معي",
          "خليك معي", "ضل معي", "ضل ورايا",
          "تعال خلفي", "خش خلفي", "اتبعني يا سند"
        ]
      },
      "bot_phrases": {
        "english": ["following you", "following", "coming with you"],
        "arabic":  [
          "أتبعك", "آتي معك", "أمشي معك",
          "بتبعك", "جاي ورايك", "بمشي معاك", "ماشي معاك"
        ]
      }
    },
    "stay_here": {
      "canonical": "stay here",
      "user_phrases": {
        "english": ["stay here", "stay", "wait here", "hold position", "don't follow me"],
        "arabic":  [
          "ابق هنا", "اثبت هنا", "انتظر هنا", "لا تتبعني",
          "خليك هنا", "خليك هون", "خلك هنا", "خل هنا",
          "فضل هنا", "ضل هنا", "ضل هون", "ابقى هنا",
          "اصبر هنا", "استنى هنا", "تو هنا",
          "ثبات هنا", "اثبت بمكانك", "ابق بمكانك",
          "ما تتبعني", "ما تجي ورايا", "ما تمشي معي",
          "خليك بمحلك", "ضل بمحلك"
        ]
      },
      "bot_phrases": {
        "english": ["staying here", "staying", "waiting here"],
        "arabic":  [
          "أبقى هنا", "أنتظر هنا", "أثبت هنا",
          "باقي هنا", "بضل هنا", "بستنى هنا", "بقعد هنا"
        ]
      }
    },
    "go_home": {
      "canonical": "go home",
      "user_phrases": {
        "english": ["go home", "return home", "head home", "go back home"],
        "arabic":  [
          "اذهب للبيت", "اذهب إلى البيت", "ارجع للبيت", "عُد للبيت",
          "روح البيت", "روح للبيت", "روح ع البيت", "روح عالبيت", "روح بيت",
          "ارجع البيت", "ارجاع البيت", "ارجع عالبيت", "ارجع ع البيت",
          "خش البيت", "اطلع البيت",
          "ارجع للقاعدة", "روح للقاعدة", "اتجه للبيت", "اتجه للقاعدة",
          "عد للبيت", "عود للبيت"
        ]
      },
      "bot_phrases": {
        "english": ["going home", "heading home", "returning home"],
        "arabic":  [
          "أعود للبيت", "أذهب للبيت", "أتجه للبيت",
          "براح البيت", "راجع البيت", "بروح للبيت", "أرجع للبيت"
        ]
      }
    },
    "patrol": {
      "canonical": "patrol",
      "user_phrases": {
        "english": ["patrol", "start patrol", "begin patrol", "patrol the area", "walk the route"],
        "arabic":  [
          "طوف", "ابدأ الدورية", "ابدأ التطواف",
          "دور بالمكان", "دور حواليك", "دور حوالينا", "لف بالمكان", "لف في المكان",
          "تجول", "ابدأ التجوال", "تجول بالمكان",
          "حراسة", "ابدأ الحراسة", "احرس المكان",
          "طوف بالمكان", "تطوّف", "روح طوف"
        ]
      },
      "bot_phrases": {
        "english": ["patrolling", "starting patrol", "beginning patrol"],
        "arabic":  [
          "أطوف", "أبدأ الدورية", "أبدأ التطواف",
          "بطوف", "بدور بالمكان", "ببدأ الدورية", "أتجول"
        ]
      }
    },
    "look_around": {
      "canonical": "look around",
      "user_phrases": {
        "english": ["look around", "scan the room", "scan around", "survey the area", "have a look around"],
        "arabic":  [
          "انظر حولك", "تفحص المكان", "افحص المكان", "تطلع حولك",
          "شوف حولك", "شوف حواليك", "شوف الغرفة", "شوف اللي حواليك",
          "بص حواليك", "بص حولك", "بص في الغرفة",
          "طلّ حواليك", "طلّ حولك", "تطلّع حواليك", "تطلّع حولك",
          "افحص الغرفة", "افحص الموقع", "افحص اللي حولك",
          "اسكان المكان", "امسح المكان", "امسح بنظرك"
        ]
      },
      "bot_phrases": {
        "english": ["looking around", "scanning around", "surveying the area"],
        "arabic":  [
          "أنظر حولي", "أتفحص المكان", "أتطلع حولي",
          "بشوف حواليّ", "بطلّ حواليّ", "بفحص المكان", "بمسح المكان"
        ]
      }
    },
    "what_do_you_see": {
      "canonical": "what do you see",
      "user_phrases": {
        "english": ["what do you see", "what can you see", "describe this", "describe what you see", "tell me what you see"],
        "arabic":  [
          "ماذا ترى", "ماذا تشاهد", "صف ما تراه", "أخبرني ماذا ترى",
          "شو شايف", "شو بتشوف", "شو عم تشوف", "شو تشوف",
          "شنو تشوف", "شنو شايف", "شنو هذا",
          "إيش تشوف", "ايش تشوف", "ايش شايف", "إيش شايف",
          "وش تشوف", "وش شايف", "وش هذا",
          "شايف إيه", "بتشوف إيه", "إيه اللي قدامك", "إيه اللي شايفه", "إيه ده",
          "اشنو كاتشوف", "اشنو كاتشوف قدامك",
          "صف لي اللي قدامك", "قول لي شو شايف", "وصف لي اللي قدامك", "وصفلي اللي قدامك",
          "احكي لي اللي شايفه", "احكيلي شو شايف", "قولي شو شايف"
        ]
      },
      "bot_phrases": {
        "english": [],
        "arabic":  []
      }
    },
    "repeat_last": {
      "_": "Memory action: re-fire the most recent dispatched command. Marcus VoiceModule keeps a deque of recent commands and intercepts this canonical — it is NEVER passed to the brain, the resolved (real) command is dispatched instead. bot_phrases use distinctive multi-word forms ('repeating that', 'doing that again') to avoid colliding with the bare 'again' / 'تاني' that users might say in a different context.",
      "canonical": "repeat last",
      "user_phrases": {
        "english": [
          "do that again", "do it again", "again", "repeat", "redo",
          "one more time", "once more", "do the same again",
          "say that again", "do it once more", "repeat that",
          "another time", "another one"
        ],
        "arabic":  [
          "كرر", "كرّر", "كررها", "أعد", "أعدها", "أعد العملية",
          "تاني", "مرة ثانية", "مرة تانية", "مرة كمان", "مرة أخرى",
          "كرر اللي سويته", "كرر الحركة", "أعد الحركة", "مرة ثانية لو سمحت",
          "أعد الكرة", "كمان مرة"
        ]
      },
      "bot_phrases": {
        "english": [
          "repeating that", "doing that again", "doing it again",
          "one more time", "doing the same again", "repeating the last action"
        ],
        "arabic":  [
          "أعيد الحركة", "أكرر الحركة", "مرة ثانية", "أعيدها",
          "أكررها", "أعيد آخر حركة", "أكرر آخر حركة"
        ]
      }
    },
    "pause_motion": {
      "_": "Soft hold: brain motion loops freeze in place (zero velocity) and wait. Resume continues the SAME motion from where it paused — different from stop, which terminates the chain. Marcus VoiceModule intercepts this canonical and sets motion_pause directly; brain never sees it. Persona Rule 10 instructs Gemini to confirm with 'Pausing.' / 'أتوقف مؤقتاً.'.",
      "canonical": "pause motion",
      "user_phrases": {
        "english": [
          "pause", "pause motion", "hold on", "hold position", "hold up",
          "wait a moment", "wait a sec", "freeze for a moment", "one second",
          "give me a sec", "pause for a moment", "stay there"
        ],
        "arabic":  [
          "توقف لحظة", "انتظر لحظة", "ثبت", "ثبت مكانك",
          "استنى شوي", "استنى ثانية", "ثانية واحدة", "لحظة", "هدّي شوي",
          "ايقاف مؤقت", "وقفة قصيرة", "انتظرني", "خليك مكانك"
        ]
      },
      "bot_phrases": {
        "english": [
          "pausing", "holding position", "holding for a moment",
          "freezing in place", "pausing for a moment"
        ],
        "arabic":  [
          "أتوقف مؤقتاً", "أتوقف لحظة", "أنتظر", "أثبت مكاني",
          "إيقاف مؤقت", "انتظار قصير", "أبقى مكاني"
        ]
      }
    },
    "resume_motion": {
      "_": "Clears motion_pause so the previously-running brain loop continues. If nothing was actually paused, this is a quiet no-op. Persona Rule 10 instructs Gemini to confirm with 'Resuming.' / 'أكمل.'.",
      "canonical": "resume motion",
      "user_phrases": {
        "english": [
          "resume", "continue", "go on", "carry on", "keep going",
          "proceed", "resume motion", "continue what you were doing",
          "you can continue", "go ahead now"
        ],
        "arabic":  [
          "أكمل", "استكمل", "كمّل", "كمل", "تابع", "استمر",
          "كمّل الحركة", "كمّل اللي كنت فيه", "تابع اللي كنت تسوي",
          "روح تابع", "استمر بالحركة"
        ]
      },
      "bot_phrases": {
        "english": [
          "resuming", "continuing", "carrying on", "resuming motion",
          "picking up where I left off"
        ],
        "arabic":  [
          "أكمل", "أستمر", "أتابع", "أكمل الحركة",
          "أكمل من حيث توقفت", "أتابع اللي كنت أسويه"
        ]
      }
    },
    "start_recording": {
      "_": "Begin capturing subsequent dispatched commands into a named sequence buffer. Marcus VoiceModule intercepts this canonical and starts recording. Optionally pre-name the sequence here ('start recording as my-greet') — otherwise the user must name it on save. The recording is in-memory until save_sequence is called.",
      "canonical": "start recording",
      "user_phrases": {
        "english": [
          "start recording", "begin recording", "record this",
          "remember this sequence", "record what i do",
          "record sequence", "start saving moves", "save what i do"
        ],
        "arabic":  [
          "ابدأ التسجيل", "ابدأ تسجيل", "ابدا التسجيل",
          "سجل اللي بسويه", "سجل الحركات",
          "سجل ما أفعله", "ابدأ الحفظ"
        ]
      },
      "bot_phrases": {
        "english": [
          "starting to record", "recording your sequence",
          "i'm recording", "recording started"
        ],
        "arabic":  [
          "بدأت التسجيل", "أسجل الحركات", "بدأت تسجيل التسلسل",
          "بدأت أسجل"
        ]
      }
    },
    "save_sequence": {
      "_": "Stop recording and persist the buffered commands. The user typically appends a name in the SAME utterance ('save sequence as my-greet') — Gemini extracts the name and includes it in the confirmation. Marcus parses the bot_phrase, extracts the trailing name, calls Sequences.save_recording(name).",
      "canonical": "save sequence",
      "user_phrases": {
        "english": [
          "save sequence", "save this sequence", "save it as",
          "save recording", "stop recording and save",
          "stop recording save as", "save recording as",
          "name this sequence", "call this sequence"
        ],
        "arabic":  [
          "احفظ التسلسل", "احفظ التسجيل", "احفظ باسم",
          "أوقف التسجيل واحفظ", "احفظ هذا التسلسل",
          "سمي هذا التسلسل", "احفظ التسجيل باسم", "خلصت سجل"
        ]
      },
      "bot_phrases": {
        "english": [
          "saved sequence", "saved as", "saving sequence as",
          "recording saved", "saved your sequence as"
        ],
        "arabic":  [
          "حفظت التسلسل", "حفظت باسم", "أحفظ التسلسل باسم",
          "تم الحفظ"
        ]
      }
    },
    "play_sequence": {
      "_": "Replay a saved sequence by name. The bot_phrase carries the name ('Playing my-greet.'); Marcus extracts it and calls Sequences.play(name) → enqueues each command on the motion worker queue. Sequence commands execute serially with normal interrupt support — say 'stop' to abort the playback.",
      "canonical": "play sequence",
      "user_phrases": {
        "english": [
          "play sequence", "play my", "run my", "do my",
          "play recording", "execute sequence", "run sequence",
          "perform sequence"
        ],
        "arabic":  [
          "شغل التسلسل", "نفذ التسلسل", "سوي اللي سجلته",
          "ابدأ تسلسل", "نفذ تسجيل", "شغل التسجيل",
          "اعمل التسلسل"
        ]
      },
      "bot_phrases": {
        "english": [
          "playing", "running", "performing sequence",
          "playing sequence", "starting sequence"
        ],
        "arabic":  [
          "أشغل التسلسل", "أنفذ التسلسل", "أبدأ التسلسل",
          "بدأت التشغيل"
        ]
      }
    },
    "cancel_recording": {
      "_": "Abandon the in-progress recording without saving. Marcus calls Sequences.cancel_recording().",
      "canonical": "cancel recording",
      "user_phrases": {
        "english": [
          "cancel recording", "discard recording", "throw away recording",
          "stop recording without saving", "forget the recording"
        ],
        "arabic":  [
          "ألغ التسجيل", "احذف التسجيل", "اوقف التسجيل بدون حفظ",
          "تجاهل التسجيل"
        ]
      },
      "bot_phrases": {
        "english": [
          "discarded recording", "cancelled recording",
          "recording cancelled"
        ],
        "arabic":  [
          "ألغيت التسجيل", "حذفت التسجيل", "تم إلغاء التسجيل"
        ]
      }
    },
    "reverse_last": {
      "_": "Memory action: fire the INVERSE of the last dispatched command (turn right→turn left, walk forward→walk backward, sit down→stand up, raise arm→lower arm; parametric forms flip direction and keep the number). Marcus intercepts the 'reverse last' canonical and computes the inverse from history. NOTE: the brain has its own 'undo' regex that returns to the start position via odometry — that's a different feature; reverse_last only flips the LAST motion. Distinctive bot_phrases avoid colliding with move_backward needles like 'going back' / 'reversing'.",
      "canonical": "reverse last",
      "user_phrases": {
        "english": [
          "undo", "undo that", "reverse", "reverse that",
          "go back", "take that back", "cancel that",
          "undo the last", "reverse the last", "do the opposite"
        ],
        "arabic":  [
          "تراجع", "تراجع عن ذلك", "ارجع ما سويت", "ارجع",
          "اعكس", "اعكسها", "ألغي", "ألغي ذلك", "ألغي الحركة",
          "رد ما سويت", "تراجع عن آخر حركة", "اعكس آخر حركة",
          "اعمل عكس", "اعمل العكس"
        ]
      },
      "bot_phrases": {
        "english": [
          "reversing that", "undoing that", "reversing the last action",
          "undoing the last action", "going back to before"
        ],
        "arabic":  [
          "أتراجع عن ذلك", "أعكس آخر حركة", "ألغي آخر حركة",
          "أتراجع عن آخر حركة", "أرجع للحالة السابقة", "أعكسها"
        ]
      }
    }
  },
  "_parametric_description": "Motion confirmations from Gemini that carry a NUMBER (turn 90 degrees, walk 5 steps, walk 2 meters). Each entry is regex-matched against Gemini's spoken reply; the captured groups are substituted into command_template via $1, $2 placeholders, and the resulting command_template string is dispatched to the brain — Brain/command_parser.py already parses 'turn left 90 degrees' / 'walk forward 5 steps' / 'walk 2 meters' natively. Order matters: more-specific patterns (with direction) MUST come before less-specific (no direction) so they claim spans first.",
  "parametric_actions": [
    {
      "_": "turn left/right N degrees — 'Turning right 90 degrees.'",
      "regex": "(?:turn(?:ing)?|spin(?:ning)?|rotat(?:e|ing))\\s+(left|right)\\s+(\\d+)\\s*deg(?:ree(?:s)?)?",
      "command_template": "turn $1 $2 degrees"
    },
    {
      "_": "turn left/right N steps (English) — 'Turning left 1 steps.' Maps to brain's _RE_TURN_STEP single-motion canonical 'turn left N steps' (NOT compound). User says 'turn left one step' — meaning one step's worth of left rotation.",
      "regex": "(?:turn(?:ing)?|spin(?:ning)?|rotat(?:e|ing))\\s+(left|right)\\s+(\\d+(?:\\.\\d+)?)\\s*steps?",
      "command_template": "turn $1 $2 steps"
    },
    {
      "_": "Arabic أستدير يميناً/يساراً N خطوات — 'أستدير يساراً 1 خطوات' for 'لف يسار خطوة وحدة' single-motion intent. Parses as 'turn left N steps' brain canonical (not compound 'turn + walk').",
      "regex": "(?:أستدير|استدر|لف+|ألف+|بلف|دور+|أدور+|بدور)\\s+(?:(?:على|ع|إلى)\\s+)?(?:يمين|يميناً|يمينا|اليمين|لليمين)\\s+(\\d+(?:\\.\\d+)?)\\s*(?:خطوة|خطوات|خطوتين)",
      "command_template": "turn right $1 steps"
    },
    {
      "_": "Arabic أستدير يساراً N خطوات — same as above, left side.",
      "regex": "(?:أستدير|استدر|لف+|ألف+|بلف|دور+|أدور+|بدور)\\s+(?:(?:على|ع|إلى)\\s+)?(?:يسار|يساراً|يسارا|اليسار|لليسار|شمال|الشمال|للشمال)\\s+(\\d+(?:\\.\\d+)?)\\s*(?:خطوة|خطوات|خطوتين)",
      "command_template": "turn left $1 steps"
    },
    {
      "_": "turn N degrees — 'Turning 360 degrees.'",
      "regex": "(?:turn(?:ing)?|spin(?:ning)?|rotat(?:e|ing))\\s+(\\d+)\\s*deg(?:ree(?:s)?)?",
      "command_template": "turn $1 degrees"
    },
    {
      "_": "walk forward/back N steps — 'Walking forward 5 steps.'",
      "regex": "(?:walk(?:ing)?|step(?:ping)?|mov(?:e|ing))\\s+(forward|back(?:ward)?)\\s+(\\d+(?:\\.\\d+)?)\\s+steps?",
      "command_template": "walk $1 $2 steps"
    },
    {
      "_": "walk N steps forward/back — 'Walking 5 steps forward.'",
      "regex": "(?:walk(?:ing)?|step(?:ping)?|tak(?:e|ing))\\s+(\\d+(?:\\.\\d+)?)\\s+steps?\\s+(forward|back(?:ward)?)",
      "command_template": "walk $2 $1 steps"
    },
    {
      "_": "walk N steps (default forward) — 'Taking 5 steps.'",
      "regex": "(?:walk(?:ing)?|step(?:ping)?|tak(?:e|ing))\\s+(\\d+(?:\\.\\d+)?)\\s+steps?",
      "command_template": "walk $1 steps"
    },
    {
      "_": "walk forward/back N meters — 'Walking forward 2 meters.'",
      "regex": "(?:walk(?:ing)?|mov(?:e|ing))\\s+(forward|back(?:ward)?)\\s+(\\d+(?:\\.\\d+)?)\\s*m(?:eter(?:s)?)?\\b",
      "command_template": "walk $1 $2 meters"
    },
    {
      "_": "walk N meters forward/back — 'Walking 2 meters forward.'",
      "regex": "(?:walk(?:ing)?|mov(?:e|ing))\\s+(\\d+(?:\\.\\d+)?)\\s*m(?:eter(?:s)?)?\\s+(forward|back(?:ward)?)",
      "command_template": "walk $2 $1 meters"
    },
    {
      "_": "walk N meters (default forward) — 'Walking 2 meters.'",
      "regex": "(?:walk(?:ing)?|mov(?:e|ing))\\s+(\\d+(?:\\.\\d+)?)\\s*m(?:eter(?:s)?)?\\b",
      "command_template": "walk $1 meters"
    },
    {
      "_": "Arabic turn right N degrees — covers MSA + Levantine/Syrian (لف يمين), Gulf/Iraqi (دور يمين), Egyptian (خش يمين). Verb roots: أستدير | استدر | لف | دور | خش. Direction tokens: يمين | يميناً | يمينا | اليمين (with optional على/ع prefix).",
      "regex": "(?:أستدير|استدر|لف+|ألف+|دور+|أدور+|بدور|خش|أخش|بخش)\\s+(?:(?:على|ع)\\s+)?(?:يمين|يميناً|يمينا|اليمين|لليمين)\\s+(\\d+)\\s*درجة",
      "command_template": "turn right $1 degrees"
    },
    {
      "_": "Arabic turn left N degrees — also accepts شمال/الشمال (Lev/Egy 'left').",
      "regex": "(?:أستدير|استدر|لف+|ألف+|دور+|أدور+|بدور|خش|أخش|بخش)\\s+(?:(?:على|ع)\\s+)?(?:يسار|يساراً|يسارا|اليسار|لليسار|شمال|الشمال|للشمال)\\s+(\\d+)\\s*درجة",
      "command_template": "turn left $1 degrees"
    },
    {
      "_": "Arabic turn N degrees (no direction) — 'أستدير 360 درجة' / 'لف 180 درجة' / 'دور 90 درجة'.",
      "regex": "(?:أستدير|استدر|لف+|دور+)\\s+(\\d+)\\s*درجة",
      "command_template": "turn $1 degrees"
    },
    {
      "_": "Arabic walk N steps forward — verbs: أمشي | امشي | أتحرك | تحرك | روح | اروح. Forward: للأمام | لقدام | قدام (Egy/Lev).",
      "regex": "(?:أمشي|امشي|امش|أتحرك|تحرك|اروح|روح|اروحي|روحي)\\s+(?:للأمام|لقدام|قدام)\\s+(\\d+(?:\\.\\d+)?)\\s*(?:خطوة|خطوات|خطوتين)",
      "command_template": "walk forward $1 steps"
    },
    {
      "_": "Arabic walk N steps back — Back: للخلف | لورا | ورا (Egy/Lev/Gulf).",
      "regex": "(?:أمشي|امشي|امش|أتحرك|تحرك|اروح|روح|اروحي|روحي)\\s+(?:للخلف|لورا|ورا)\\s+(\\d+(?:\\.\\d+)?)\\s*(?:خطوة|خطوات|خطوتين)",
      "command_template": "walk backward $1 steps"
    },
    {
      "_": "Arabic walk N steps forward — number BEFORE direction (also common in Egy/Lev).",
      "regex": "(?:أمشي|امشي|امش|أتحرك|تحرك|اروح|روح|اروحي|روحي)\\s+(\\d+(?:\\.\\d+)?)\\s*(?:خطوة|خطوات|خطوتين)\\s+(?:للأمام|لقدام|قدام)",
      "command_template": "walk forward $1 steps"
    },
    {
      "_": "Arabic walk N steps back — number BEFORE direction.",
      "regex": "(?:أمشي|امشي|امش|أتحرك|تحرك|اروح|روح|اروحي|روحي)\\s+(\\d+(?:\\.\\d+)?)\\s*(?:خطوة|خطوات|خطوتين)\\s+(?:للخلف|لورا|ورا)",
      "command_template": "walk backward $1 steps"
    },
    {
      "_": "Arabic walk N steps (default forward) — 'أمشي 5 خطوات' / 'امشي خمس خطوات' (digits only — Gemini is told to use digits).",
      "regex": "(?:أمشي|امشي|امش|أتحرك|تحرك|اروح|روح|اروحي|روحي)\\s+(\\d+(?:\\.\\d+)?)\\s*(?:خطوة|خطوات|خطوتين)",
      "command_template": "walk $1 steps"
    },
    {
      "_": "Arabic walk N steps — REVERSED word order: verb + unit + number ('أمشي خطوة واحدة' → after normalise_numbers → 'أمشي خطوة 1'). Common natural-Arabic phrasing where the number-adjective follows the noun. Without this entry, this very natural phrasing fails to dispatch.",
      "regex": "(?:أمشي|امشي|امش|أتحرك|تحرك|اروح|روح|اروحي|روحي)\\s+(?:خطوة|خطوات|خطوتين)\\s+(\\d+)",
      "command_template": "walk $1 steps"
    },
    {
      "_": "Arabic DUAL form 'خطوتين' (two steps) — single word, no separate digit. Without this entry the dispatcher would miss 'أمشي خطوتين' since the parametric regex needs \\d+. Maps directly to walk 2 steps.",
      "regex": "(?:أمشي|امشي|امش|أتحرك|تحرك|اروح|روح|اروحي|روحي)\\s+خطوتين",
      "command_template": "walk 2 steps"
    },
    {
      "_": "Arabic DUAL form 'مترين' (two meters) — same dual-suffix pattern as above. 'أمشي مترين' / 'أتحرك مترين' → walk 2 meters.",
      "regex": "(?:أمشي|امشي|امش|أتحرك|تحرك|اروح|روح|اروحي|روحي)\\s+مترين",
      "command_template": "walk 2 meters"
    },
    {
      "_": "Arabic DUAL form 'درجتين' (two degrees) — for completeness, though 2-degree turns are rare.",
      "regex": "(?:أستدير|استدر|لف+|ألف+|دور+|أدور+|بدور)\\s+(?:يميناً|يمينا|اليمين|لليمين|يساراً|يسارا|اليسار|لليسار|شمال|الشمال|للشمال)?\\s*درجتين",
      "command_template": "turn 2 degrees"
    },
    {
      "_": "Arabic walk N meters forward — direction first.",
      "regex": "(?:أمشي|امشي|امش|أتحرك|تحرك|اروح|روح|اروحي|روحي)\\s+(?:للأمام|لقدام|قدام)\\s+(\\d+(?:\\.\\d+)?)\\s*(?:متر|مترين|أمتار)",
      "command_template": "walk forward $1 meters"
    },
    {
      "_": "Arabic walk N meters back — direction first.",
      "regex": "(?:أمشي|امشي|امش|أتحرك|تحرك|اروح|روح|اروحي|روحي)\\s+(?:للخلف|لورا|ورا)\\s+(\\d+(?:\\.\\d+)?)\\s*(?:متر|مترين|أمتار)",
      "command_template": "walk backward $1 meters"
    },
    {
      "_": "Arabic walk N meters forward — number first.",
      "regex": "(?:أمشي|امشي|امش|أتحرك|تحرك|اروح|روح|اروحي|روحي)\\s+(\\d+(?:\\.\\d+)?)\\s*(?:متر|مترين|أمتار)\\s+(?:للأمام|لقدام|قدام)",
      "command_template": "walk forward $1 meters"
    },
    {
      "_": "Arabic walk N meters back — number first.",
      "regex": "(?:أمشي|امشي|امش|أتحرك|تحرك|اروح|روح|اروحي|روحي)\\s+(\\d+(?:\\.\\d+)?)\\s*(?:متر|مترين|أمتار)\\s+(?:للخلف|لورا|ورا)",
      "command_template": "walk backward $1 meters"
    },
    {
      "_": "Arabic walk N meters (default forward).",
      "regex": "(?:أمشي|امشي|امش|أتحرك|تحرك|اروح|روح|اروحي|روحي)\\s+(\\d+(?:\\.\\d+)?)\\s*(?:متر|مترين|أمتار)",
      "command_template": "walk $1 meters"
    },
    {
      "_": "Generic dialect Arabic 'turn right' WITHOUT number. Catches forms NOT in turn_right.bot_phrases.arabic, e.g. 'ألف على اليمين', 'دور لليمين', 'أستدير إلى اليمين', 'لف ع اليمين'. Verbs: لف/ألف/بلف/دور/أدور/بدور/خش/أخش/بخش/حوّد/أحوّد/أستدير/استدير. Optional prep على/ع/إلى/لـ in front of direction.",
      "regex": "(?:أستدير|استدير|استدر|لف+|ألف+|بلف|دور+|أدور+|بدور|خش|أخش|بخش|حوّد|أحوّد)\\s+(?:(?:على|ع|إلى)\\s+)?(?:ال)?(?:يمين|يميناً|يمينا|لليمين)",
      "command_template": "turn right"
    },
    {
      "_": "Generic dialect Arabic 'turn left' WITHOUT number — also catches شمال/الشمال.",
      "regex": "(?:أستدير|استدير|استدر|لف+|ألف+|بلف|دور+|أدور+|بدور|خش|أخش|بخش|حوّد|أحوّد)\\s+(?:(?:على|ع|إلى)\\s+)?(?:ال)?(?:يسار|يساراً|يسارا|لليسار|شمال|الشمال|للشمال)",
      "command_template": "turn left"
    },
    {
      "_": "Generic dialect Arabic 'turn around' WITHOUT number — back/wara variants.",
      "regex": "(?:أستدير|استدير|استدر|لف+|ألف+|بلف|دور+|أدور+|بدور)\\s+(?:(?:إلى|لـ)\\s*)?(?:للخلف|الخلف|للوراء|الوراء|للورا|ورا|لورا)",
      "command_template": "turn around"
    },
    {
      "_": "Generic dialect Arabic 'move forward' WITHOUT meters/steps — أمشي/أتحرك/أتقدم/أروح/بمشي/بتقدم + قدام/للأمام/الأمام variants.",
      "regex": "(?:أمشي|امشي|امش|أتحرك|تحرك|أتقدم|بتقدم|أتقدّم|أروح|اروح|روح|براح|بمشي|بفوت|أفوت)\\s+(?:(?:إلى|لـ)\\s*)?(?:للأمام|الأمام|لقدام|قدام|للقدام)",
      "command_template": "move forward"
    },
    {
      "_": "Generic dialect Arabic 'move backward' WITHOUT meters/steps — أرجع/أتراجع/برجع + للخلف/ورا/الوراء variants.",
      "regex": "(?:أرجع|ارجع|برجع|أتراجع|بتراجع|أتحرك|تحرك|أمشي|امشي|امش)\\s+(?:(?:إلى|لـ)\\s*)?(?:للخلف|الخلف|لورا|ورا|للوراء|الوراء)",
      "command_template": "move backward"
    },
    {
      "_": "STEP-CLOSER (English). Note: normalise_numbers runs BEFORE this regex matches and converts 'one'→'1'. The regex must accept both forms ('one step closer' OR '1 step closer'). Maps to a deterministic 1-step walk; distinct from 'walking to X' (planner) and 'come here' (YOLO-tracked).",
      "regex": "(?:stepping|moving|coming|edging)\\s+closer|(?:one|1)\\s+step\\s+closer|step(?:ping)?\\s+(?:a\\s+(?:bit|little)\\s+)?forward",
      "command_template": "walk forward 1 steps"
    },
    {
      "_": "STEP-CLOSER (Arabic). Note: normalise_numbers converts 'واحدة'→'1' before this regex runs, so the optional 'one' part must match either واحدة or 1. Same single-step semantics as English.",
      "regex": "أتقدم\\s+خطوة|أقترب\\s+خطوة|خطوة\\s+(?:(?:واحدة|1)\\s+)?(?:للأمام|قدام)|أقرب\\s+خطوة|أتقرّب\\s+خطوة|أقدّم\\s+خطوة|أخطو\\s+للأمام",
      "command_template": "walk forward 1 steps"
    },
    {
      "_": "WALK-TO-TARGET (English) — captures the named object after 'walking/heading/going to/towards'. Example bot phrases the dispatcher catches: 'Walking to the door.' / 'Heading to the chair.' / 'Walking towards the table.' / 'Going up to the wall.'. The captured target ($1) is plugged into the brain command 'walk to <target>', which Brain/command_parser._RE_WALK_TO routes into the LLaVA-grounded spatial planner. Constraints: (a) stops at sentence punctuation so 'walking to the door, then sitting' picks up just 'door', (b) the determiner-strip group includes 'that\\s+|this\\s+' so anaphora 'walking to that chair' captures 'chair' not 'that chair', (c) negative-lookahead before the target rejects pronouns (us|me|him|her|them|you|it|myself|yourself) so a Rule-4-violating description like 'I see a person walking towards us' does NOT fire 'walk to us'. Persona Rule 12 instructs Gemini to use these literal shapes. NOTE: target capture allows Arabic chars (U+0600–U+06FF) too — handles the field case where canonical_normalizer translated an Arabic verb+connective to English ('أتجه نحو X' → 'walking to X' with X still Arabic) and we want the planner to receive the Arabic target name. Qwen2.5-VL is multilingual; passing Arabic target to LLaVA prompt works.",
      "regex": "(?:approaching|(?:walking|heading|going|moving)\\s+(?:over\\s+)?(?:to|towards?|up\\s+to)\\b)\\s+(?:the\\s+|a\\s+|an\\s+|that\\s+|this\\s+|ال)?(?!(?:us|me|him|her|them|you|it|myself|yourself|themselves|himself|herself|itself|هو|هي|هم|هما|هن|نحن|أنت|أنا)\\b)([A-Za-z\\u0600-\\u06FF][A-Za-z0-9\\u0600-\\u06FF ._-]{0,40}?)\\s*(?=[.!?،,]|$)",
      "command_template": "walk to $1"
    },
    {
      "_": "WALK-TO-TARGET (Arabic) — أمشي / أتجه / أتحرك + (إلى | نحو | تجاه | باتجاه) + <target>. Character class is the WHOLE Arabic block U+0600–U+06FF so vowel diacritics (fatha/damma/kasra/sukun, U+064B–U+065F) inside the target name don't truncate the capture (e.g. 'البَاب' with fatha was previously stopping at the fatha and capturing None). Determiner-strip includes هذا/هذه/ذلك/تلك (anaphora) plus the optional ال prefix. Pronoun negative-lookahead rejects هو/هي/هم/أنت/نحن/نا/ي etc as targets — handled by lookbehind avoiding common pronoun stems.",
      "regex": "(?:أمشي|امشي|أتحرك|تحرك|أتجه|متجه|أروح|اروح|روح)\\s+(?:إلى|نحو|تجاه|باتجاه|لـ?ال?)\\s+(?:هذا|هذه|ذلك|تلك)?\\s*(?:ال)?(?!(?:هو|هي|هم|هما|هن|نحن|أنت|أنتم|أنا)\\b)([\\u0600-\\u06FF][\\u0600-\\u06FF 0-9_-]{0,40}?)\\s*(?=[.!?،,]|$)",
      "command_template": "walk to $1"
    }
  ]
 }
--- a/Config/language_tables.json
+++ b/Config/language_tables.json
@ -0,0 +1,222 @@
 {
  "_description": "Language tables for voice motion processing. Single source of truth for ALL vocabulary data — no Arabic/English motion words live in code anymore. Adding a new dialect / variant is a JSON-only edit.",
  "_consumers": [
    "Voice/number_words.py — spelled-out numbers → digits (EN + AR)",
    "Voice/canonical_normalizer.py — Arabic structural translation → English",
    "Voice/marcus_voice.py — reverse-command map for memory operations",
    "Voice/sequences.py — control commands that must NOT be captured into recordings"
  ],
  "english_fractions": {
    "_description": "English fractional words. Two semantic flavours: 'additive' (combines with a preceding integer — '3 and a half steps' → 3.5) and 'leading' (standalone — 'half a meter' → 0.5). Both forms map to the same numeric value; flavours are kept separate so the parser knows the syntactic context to apply.",
    "additive": {
      "half":     0.5,
      "halves":   0.5,
      "quarter":  0.25,
      "quarters": 0.25,
      "third":    0.3333,
      "thirds":   0.3333
    },
    "leading": {
      "half":     0.5,
      "quarter":  0.25,
      "third":    0.3333
    }
  },
  "arabic_fractions": {
    "_description": "Arabic fractional words. Same additive vs leading split as English. 'Additive' applies after a digit + ون / و conjunction ('3 ونصف' → 3.5). 'Leading' applies before a unit ('نصف متر' → 0.5 meter). Multiple dialect spellings of half (نصف / نص).",
    "additive": {
      "نصف":  0.5,
      "نص":   0.5,
      "ربع":  0.25,
      "ثلث":  0.3333
    },
    "leading": {
      "نصف":  0.5,
      "نص":   0.5,
      "ربع":  0.25,
      "ثلث":  0.3333
    }
  },
  "english_numbers": {
    "_description": "English number-word parser tables. Used by number_words.py to convert 'ninety degrees' → '90 degrees' before regex matching.",
    "ones": {
      "zero": 0, "one": 1, "two": 2, "three": 3, "four": 4,
      "five": 5, "six": 6, "seven": 7, "eight": 8, "nine": 9,
      "ten": 10, "eleven": 11, "twelve": 12, "thirteen": 13,
      "fourteen": 14, "fifteen": 15, "sixteen": 16,
      "seventeen": 17, "eighteen": 18, "nineteen": 19
    },
    "tens": {
      "twenty": 20, "thirty": 30, "forty": 40, "fifty": 50,
      "sixty": 60, "seventy": 70, "eighty": 80, "ninety": 90
    },
    "scale": {
      "hundred": 100
    },
    "glue": ["and", "-"]
  },
  "arabic_numbers": {
    "_description": "Arabic spelled-out numbers. Order matters — longer multi-word phrases come first so they're matched before their shorter prefixes.",
    "literals": [
      ["ثلاثمائة وستون", 360],
      ["ثلاثمائة وستين", 360],
      ["ثلاث مائة وستون", 360],
      ["ثلاث مائة وستين", 360],
      ["مائة وثمانون", 180],
      ["مائة وثمانين", 180],
      ["مية وثمانين", 180],
      ["مائتان وسبعون", 270],
      ["مائتان وسبعين", 270],
      ["عشرون", 20], ["عشرين", 20],
      ["ثلاثون", 30], ["ثلاثين", 30],
      ["أربعون", 40], ["أربعين", 40], ["اربعون", 40], ["اربعين", 40],
      ["خمسون", 50], ["خمسين", 50],
      ["ستون", 60], ["ستين", 60],
      ["سبعون", 70], ["سبعين", 70],
      ["ثمانون", 80], ["ثمانين", 80],
      ["تسعون", 90], ["تسعين", 90],
      ["مائتان", 200], ["مائتين", 200], ["ميتين", 200],
      ["ثلاثمائة", 300], ["ثلاث مائة", 300],
      ["أربعمائة", 400], ["اربعمائة", 400], ["أربع مائة", 400],
      ["خمسمائة", 500], ["خمس مائة", 500],
      ["مائة", 100], ["مية", 100], ["ميه", 100],
      ["أحد عشر", 11], ["احد عشر", 11],
      ["اثنا عشر", 12], ["اثني عشر", 12],
      ["ثلاثة عشر", 13], ["ثلاث عشرة", 13],
      ["أربعة عشر", 14], ["اربعة عشر", 14],
      ["خمسة عشر", 15], ["خمس عشرة", 15],
      ["ستة عشر", 16], ["ست عشرة", 16],
      ["سبعة عشر", 17],
      ["ثمانية عشر", 18],
      ["تسعة عشر", 19],
      ["واحد", 1], ["واحدة", 1],
      ["اثنان", 2], ["اثنين", 2], ["اثنتان", 2], ["اثنتين", 2],
      ["ثلاثة", 3], ["ثلاث", 3],
      ["أربعة", 4], ["أربع", 4], ["اربعة", 4], ["اربع", 4],
      ["خمسة", 5], ["خمس", 5],
      ["ستة", 6], ["ست", 6],
      ["سبعة", 7], ["سبع", 7],
      ["ثمانية", 8], ["ثمان", 8],
      ["تسعة", 9], ["تسع", 9],
      ["عشرة", 10], ["عشر", 10]
    ]
  },
  "arabic_verbs": {
    "_description": "Arabic verb roots → English gerund. Gerund matches Gemini's English bot_phrases (e.g. 'walking forward'). Multiple Arabic dialect roots can map to the same English verb.",
    "walking": [
      "أمشي", "امشي", "امش",
      "أتحرك", "تحرك",
      "اروح", "روح", "اروحي", "روحي",
      "بمشي", "براح", "أراح",
      "بفوت", "أفوت",
      "سير", "اسير",
      "أتجه", "اتجه", "يتجه", "نتجه",
      "أتوجه", "اتوجه", "يتوجه", "توجه",
      "متوجه", "متجه"
    ],
    "turning": [
      "أستدير", "استدر", "استدير",
      "لفّ", "لف", "ألف", "بلف",
      "دوّر", "دور", "أدور", "بدور",
      "خش", "أخش", "بخش",
      "حوّد", "أحوّد"
    ],
    "walking backward": [
      "أرجع", "ارجع", "برجع",
      "أتراجع", "بتراجع", "تراجع"
    ],
    "walking forward": [
      "أتقدم", "بتقدم", "أتقدّم", "تقدم"
    ]
  },
  "arabic_directions": {
    "_description": "Arabic direction words → English directions. Includes definite-article variants (اليمين), preposition-prefixed forms (لليمين), and dialect alternatives (شمال = left in Levantine/Egyptian).",
    "right": [
      "يميناً", "يمينا",
      "اليمين", "لليمين",
      "يمين"
    ],
    "left": [
      "يساراً", "يسارا",
      "اليسار", "لليسار",
      "يسار",
      "الشمال", "للشمال", "شمال"
    ],
    "forward": [
      "للأمام", "الأمام", "أمام",
      "لقدام", "للقدام", "قدام"
    ],
    "backward": [
      "للخلف", "الخلف",
      "للوراء", "الوراء",
      "لورا", "ورا", "للورا"
    ]
  },
  "arabic_units": {
    "_description": "Arabic unit nouns → English unit. Singular-vs-plural form is preserved so the dispatcher's regex layer (which accepts both) sees a natural form. Dual forms (خطوتين / مترين / درجتين) live in arabic_duals.",
    "step": ["خطوة"],
    "steps": ["خطوات"],
    "meter": ["متر"],
    "meters": ["أمتار"],
    "degree": ["درجة"],
    "degrees": ["درجات"]
  },
  "arabic_duals": {
    "_description": "Arabic dual forms — single words containing both count and unit ('خطوتين' = 'two steps'). Substituted as a single unit BEFORE the verb/dir/unit pass. The English target form must be readable as 'N units' so the regex layer can pick up the count.",
    "2 steps": ["خطوتين"],
    "2 meters": ["مترين", "أمتارين"],
    "2 degrees": ["درجتين"],
    "2 seconds": ["ثانيتين"]
  },
  "arabic_conjunctions": {
    "_description": "Arabic conjunctions translated to space-padded English glue. Subbed early so subsequent verb/dir passes don't accidentally treat the conjunction as a word.",
    " then ": ["ثم", "وبعدين", "بعدين", "وبعد"]
  },
  "arabic_connectives": {
    "_description": "Arabic prepositions / determiners / pronouns. Mostly pass-through; 'إلى' / 'نحو' map to 'to' so walk-to-target patterns work. 'ع' / 'على' get folded into nothing meaningful but are translated to 'on' to avoid leaving raw Arabic mid-string.",
    "to": ["إلى", "نحو", "تجاه", "باتجاه"],
    "on": ["على", "ع"],
    "in": ["في"],
    "this": ["هذا", "هذه"],
    "that": ["ذلك", "تلك"]
  },
  "motion_inverses": {
    "_description": "Pairwise inverse map for reverse_last memory operation. Used by Voice/marcus_voice.py::_reverse_command. Parametric forms (e.g. 'turn left 90 degrees' ↔ 'turn right 90 degrees') are derived in code via regex, NOT listed here — only fixed pairs.",
    "turn right":  "turn left",
    "turn left":   "turn right",
    "move forward":  "move backward",
    "move backward": "move forward",
    "sit down":  "stand up",
    "stand up":  "sit down",
    "raise arm": "lower arm",
    "lower arm": "raise arm"
  },
  "sequence_never_record": {
    "_description": "Canonical commands that must NEVER be captured into a recording session. Control commands (start/save/cancel/play recording, pause/resume, stop, repeat/reverse) would create absurd macros if captured. Used by Voice/sequences.py::record_command.",
    "canonicals": [
      "start recording",
      "save sequence",
      "cancel recording",
      "play sequence",
      "list sequences",
      "delete sequence",
      "pause motion",
      "resume motion",
      "stop",
      "repeat last",
      "reverse last"
    ]
  }
 }
--- a/Config/marcus_prompts.yaml
+++ b/Config/marcus_prompts.yaml
@ -1,519 +1,142 @@
-# marcus_prompts.yaml — Marcus AI Prompts
+# marcus_prompts.yaml — Marcus AI Prompts (compact, 2048-ctx-safe)
 # =========================================
 # Project  : Marcus | YS Lootah Technology
 # Hardware : Unitree G1 EDU + Jetson Orin NX
-# Model    : Qwen2.5-VL 3B (Ollama, fully offline)
+# Model    : Qwen2.5-VL 3B (Ollama)
 #
-# Placeholders:
+# Placeholder convention: fields surrounded by <...> are instructions, NOT
-#   {command}   — the user's typed/spoken command (English)
+# text to be copied. Qwen2.5-VL will copy quoted example strings verbatim
-#   {goal}      — the navigation goal description
+# if they look like valid answers, so we keep example values abstract.
 #   {facts}     — known facts from memory (e.g. "Kassam is the programmer")
 #   {target}    — YOLO class being searched (e.g. "person")
 #   {condition} — extra condition for verify prompt (e.g. "holding a phone")
 #   {hint}      — text description for image search
 #   {hint_line} — formatted hint line for image search compare
 #
 # LANGUAGE NOTE:
 #   All prompts instruct Qwen to detect the command language automatically
 #   and respond in the same language. No code-side language detection needed.
 #   English-only by policy — Arabic support was removed 2026-04-21.
 # =============================================================================
 # ══════════════════════════════════════════════════════════════════════════════
 # MAIN PROMPT
 # Used for: all standard commands (movement, arms, vision, questions)
 # ══════════════════════════════════════════════════════════════════════════════
 # ── MAIN PROMPT ──────────────────────────────────────────────────────────────
 main_prompt: |
-  You are Sanad — an advanced humanoid robot assistant built by YS Lootah Technology, Dubai.
+  You are Sanad, a humanoid robot (YS Lootah Technology). You have a camera,
-  You are physically present in the room. You have a body, arms, and a camera.
+  two arms, and can move. Respond to commands with ONE JSON object only — no
-  You follow commands from your operator and respond intelligently.
+  text before or after the JSON, no markdown.
  {facts}
  ── YOUR CAPABILITIES ──────────────────────────────────────────────────────
  Movement  : forward · backward · left (rotate) · right (rotate)
  Arms      : wave · raise_right · raise_left · clap · high_five · hug · heart · shake_hand · face_wave
  Vision    : you can see through your camera right now
  Speech    : you respond in one clear sentence
  ── COMMAND ────────────────────────────────────────────────────────────────
  "{command}"
  ── OUTPUT FORMAT ──────────────────────────────────────────────────────────
  Reply with ONLY this JSON — no markdown, no extra text, no explanation:
  {{"actions":[{{"move":"forward|backward|left|right|stop","duration":2.0}}],"arm":null,"speak":"one sentence","abort":null}}
  ── MOVEMENT RULES ─────────────────────────────────────────────────────────
  - actions: ordered list of movement steps executed in sequence
  - move: "forward" "backward" "left" "right" "stop"  — exactly these values
  - duration: seconds per step, max 5.0s (chain steps for longer movements)
  - Merge consecutive same-direction steps into one:
    "forward 2s + forward 2s" → "forward 4s" — NOT two separate steps
  - Duration reference:
      "1 step" = 1.0s
      "tiny step" = 0.3s
      "half a step" = 0.5s
      "2 steps" = 2.0s
      "3 steps" = 3.0s
      "45 degrees" = 2.5s
      "90 degrees" = 5.0s
      "180 degrees" = 10.0s
  - Speed modifiers:
      "slowly" → multiply duration by 0.5
      "quickly" / "fast" → multiply duration by 1.5 (cap at 5.0s)
  ── ARM RULES ──────────────────────────────────────────────────────────────
  - arm: one value from the list above, or null
  - arm runs AFTER all movement steps complete — never inside actions list
  - One arm action per command maximum
  - arm = null when no gesture is needed
  ── SPEAK RULES ────────────────────────────────────────────────────────────
  - speak: one sentence, first person, natural
  - Describe what you are doing OR what you see — never both in one sentence
  - For pure movement: "Turning right"
  - For vision questions: describe what the camera shows
  - Never repeat the command word-for-word
  - Always respond in English
  ── SAFETY RULES ───────────────────────────────────────────────────────────
  - abort = null for all normal commands
  - abort = "obstacle detected" if camera shows obstacle closer than 0.5m
  - abort = "unsafe command" if the command could damage the robot or people
  - abort = "cannot comply" if physically impossible
  - When aborting: actions = [] and explain in speak
  ── CONTEXT RULES ──────────────────────────────────────────────────────────
  - "that person" / "him" / "her" → resolve from conversation or camera
  - "it" / "there" → resolve from last command context
  - If ambiguous → choose the most reasonable safe interpretation
  ══ ENGLISH EXAMPLES ═══════════════════════════════════════════════════════
  Basic movement:
  "turn right"
  → {{"actions":[{{"move":"right","duration":2.0}}],"arm":null,"speak":"Turning right","abort":null}}
  "turn left 90 degrees"
  → {{"actions":[{{"move":"left","duration":5.0}}],"arm":null,"speak":"Turning 90 degrees left","abort":null}}
  "turn right 45 degrees slowly"
  → {{"actions":[{{"move":"right","duration":1.25}}],"arm":null,"speak":"Turning right slowly","abort":null}}
  "walk forward 3 steps"
  → {{"actions":[{{"move":"forward","duration":3.0}}],"arm":null,"speak":"Walking forward 3 steps","abort":null}}
  "spin around"
  → {{"actions":[{{"move":"right","duration":10.0}}],"arm":null,"speak":"Spinning around","abort":null}}
  "stop"
  → {{"actions":[{{"move":"stop","duration":0}}],"arm":null,"speak":"Stopping","abort":null}}
  Multi-step:
  "move back then turn left"
  → {{"actions":[{{"move":"backward","duration":2.0}},{{"move":"left","duration":2.0}}],"arm":null,"speak":"Moving back then turning left","abort":null}}
  "turn right 90 degrees then walk forward 2 steps"
  → {{"actions":[{{"move":"right","duration":5.0}},{{"move":"forward","duration":2.0}}],"arm":null,"speak":"Turning right then walking forward","abort":null}}
  "step back twice then face left"
  → {{"actions":[{{"move":"backward","duration":2.0}},{{"move":"left","duration":2.0}}],"arm":null,"speak":"Stepping back twice then turning left","abort":null}}
  Approach / distance:
  "come to me"
  → {{"actions":[{{"move":"forward","duration":2.0}}],"arm":null,"speak":"Coming to you","abort":null}}
  "come back"
  → {{"actions":[{{"move":"forward","duration":2.0}}],"arm":null,"speak":"Coming back","abort":null}}
  "get closer"
  → {{"actions":[{{"move":"forward","duration":1.0}}],"arm":null,"speak":"Moving closer","abort":null}}
  "go away" / "back off"
  → {{"actions":[{{"move":"backward","duration":2.0}}],"arm":null,"speak":"Moving away","abort":null}}
  "keep your distance"
  → {{"actions":[{{"move":"backward","duration":1.5}}],"arm":null,"speak":"Keeping my distance","abort":null}}
  Arms:
  "wave"
  → {{"actions":[],"arm":"wave","speak":"Waving hello","abort":null}}
  "clap"
  → {{"actions":[],"arm":"clap","speak":"Clapping","abort":null}}
  "high five"
  → {{"actions":[],"arm":"high_five","speak":"High five!","abort":null}}
  "raise your right arm"
  → {{"actions":[],"arm":"raise_right","speak":"Raising right arm","abort":null}}
  "give him a hug"
  → {{"actions":[{{"move":"forward","duration":1.0}}],"arm":"hug","speak":"Going in for a hug","abort":null}}
  "shake my hand"
  → {{"actions":[],"arm":"shake_hand","speak":"Extending hand for a handshake","abort":null}}
  "walk forward and wave"
  → {{"actions":[{{"move":"forward","duration":2.0}}],"arm":"wave","speak":"Walking forward and waving","abort":null}}
  "approach and shake hand"
  → {{"actions":[{{"move":"forward","duration":1.5}}],"arm":"shake_hand","speak":"Approaching to shake hands","abort":null}}
  Vision / questions:
  "what do you see"
  → {{"actions":[],"arm":null,"speak":"I see a person sitting at a desk with a laptop and monitor","abort":null}}
  "describe the room"
  → {{"actions":[],"arm":null,"speak":"I see an open office with several workstations, chairs, and screens","abort":null}}
  "is anyone here"
  → {{"actions":[],"arm":null,"speak":"Yes, I can see one person sitting in front of me","abort":null}}
  "look around"
  → {{"actions":[{{"move":"left","duration":3.0}}],"arm":null,"speak":"Looking around the environment","abort":null}}
  "face me"
  → {{"actions":[{{"move":"left","duration":2.0}}],"arm":null,"speak":"Turning to face you","abort":null}}
  Identity / facts:
  "my name is Kassam"
  → {{"actions":[],"arm":null,"speak":"Nice to meet you Kassam, I will remember that","abort":null}}
  "who are you"
  → {{"actions":[],"arm":null,"speak":"I am Sanad, a humanoid robot assistant by YS Lootah Technology","abort":null}}
  Safety:
  "walk into the wall"
  → {{"actions":[],"arm":null,"speak":"I cannot do that safely","abort":"unsafe command"}}
  Command: "{command}"
  JSON:
 # ══════════════════════════════════════════════════════════════════════════════
 # GOAL PROMPT
 # Used for: navigate_to_goal() — YOLO found the class, now verify with LLaVA
 # ══════════════════════════════════════════════════════════════════════════════
 goal_prompt: |
  You are Sanad, a humanoid robot actively navigating toward a specific target.
  YOUR MISSION: "{goal}"
  Study the camera image carefully and honestly.
  Reply ONLY with this JSON — no markdown, no explanation:
  {{"reached":false,"next_move":"left","duration":0.5,"speak":"what you actually see right now","confidence":"low|medium|high"}}
  ── REACHED RULES ──────────────────────────────────────────────────────────
  - reached = true ONLY when the target is CLEARLY and UNAMBIGUOUSLY visible now
  - reached = false if: partially visible · occluded · uncertain · far away · similar but not exact
  - For compound goals ("person holding a phone"):
      reached = true only when BOTH parts are confirmed simultaneously
  - confidence:
      "high"   — very clear, no doubt
      "medium" — likely, small uncertainty
      "low"    — possible but unclear — keep searching
  - Only set reached=true when confidence is "medium" or "high"
  ── MOVEMENT RULES ─────────────────────────────────────────────────────────
  - next_move: "left" · "right" · "forward"
  - duration: 0.3 to 0.8 seconds per step
  - Default when not found: "left" at 0.4s — keep scanning
  - Use "forward" when target IS visible but too far — to approach
  - Use "right" if you scanned too far left and may have passed it
  - Use "forward" + short duration to reposition when target is at edge of frame
  ── SPEAK RULES ────────────────────────────────────────────────────────────
  - Describe what the camera ACTUALLY shows right now — not what you want to see
  - Be specific: mention what you DO see and why the goal is/isn't met
  - Good: "I see a person at a desk but they are not holding a phone"
  - Good: "Target confirmed — person holding phone visible at center"
  - Bad: "I don't see the target" — always say what you DO see instead
  ── EXAMPLES (English) ─────────────────────────────────────────────────────
  Goal: "stop when you see a person"
  Camera: empty office corridor
  → {{"reached":false,"next_move":"left","duration":0.4,"speak":"I see an empty corridor with chairs — no person visible","confidence":"high"}}
  Goal: "stop when you see a person"
  Camera: person sitting at desk
  → {{"reached":true,"next_move":"left","duration":0.0,"speak":"Person clearly visible at center — stopping","confidence":"high"}}
  Goal: "find a laptop"
  Camera: desk with monitor but no laptop
  → {{"reached":false,"next_move":"left","duration":0.4,"speak":"I see a desk and monitor but no laptop","confidence":"high"}}
  Goal: "stop when you see a person holding a phone"
  Camera: person visible but no phone in hand
  → {{"reached":false,"next_move":"left","duration":0.4,"speak":"I see a person but they are not holding a phone","confidence":"high"}}
  Goal: "stop when you see a person holding a phone"
  Camera: person clearly holding phone
  → {{"reached":true,"next_move":"left","duration":0.0,"speak":"Person holding a phone confirmed — stopping","confidence":"high"}}
  Goal: "find a chair"
  Camera: chair visible but far away at edge of frame
  → {{"reached":false,"next_move":"forward","duration":0.5,"speak":"I can see a chair far ahead — moving closer","confidence":"medium"}}
 # ══════════════════════════════════════════════════════════════════════════════
 # PATROL PROMPT
 # Used for: autonomous office exploration — auto on / auto off
 # ══════════════════════════════════════════════════════════════════════════════
 patrol_prompt: |
  You are Sanad, a humanoid robot autonomously exploring and mapping an office environment.
  Your mission: move through the space intelligently, identify areas and objects,
  and build a spatial understanding of the layout.
  Study the camera image carefully.
  Reply ONLY with this JSON — no markdown, no explanation:
  {{"observation":"what you see","area_type":"office|corridor|meeting_room|reception|storage|lab|kitchen|unknown","objects":["obj1","obj2"],"people_count":0,"next_move":"forward","duration":1.0,"interesting":false,"landmark":null}}
  ── OBSERVATION RULES ──────────────────────────────────────────────────────
  - observation: one clear factual sentence about the current scene
  - area_type — classify based on visual evidence:
      "office"        — desks, monitors, computers, office chairs
      "corridor"      — narrow passage, doors along sides, no furniture clusters
      "meeting_room"  — large central table, multiple chairs around it, screen/projector
      "reception"     — front desk / reception counter, waiting chairs, entrance
      "storage"       — shelves, boxes, filing cabinets, equipment racks
      "lab"           — specialized equipment, workbenches, electronics
      "kitchen"       — refrigerator, microwave, sink, coffee machine
      "unknown"       — cannot determine from current view
  - objects: list up to 6 identifiable objects
      Be specific: "office chair" not just "chair", "standing desk" not just "desk"
      Include: desk · monitor · chair · laptop · printer · cabinet · door · window
               whiteboard · phone · person · plant · screen · projector · rack
  - people_count: exact number of people visible (0 if none)
  - interesting: true when you see:
      Any person · a new room type not seen recently · a landmark · something unusual
      An exit or entrance · a feature worth recording for navigation
  ── LANDMARK RULE ──────────────────────────────────────────────────────────
  - landmark: describe a specific, memorable visual anchor point, or null
  - Good landmarks: "red fire extinguisher on left wall", "large window at end of corridor",
    "reception desk with YS Lootah sign", "glass meeting room with whiteboard"
  - Null if nothing distinctive is visible
  ── MOVEMENT RULES ─────────────────────────────────────────────────────────
  - next_move: "forward" | "left" | "right"
  - duration: 0.5 to 2.0 seconds
  - Strategy:
      Prefer "forward" to explore new unseen areas
      Use "left" or "right" to scan when in an interesting area
      Use shorter duration (0.5–0.8s) near people, obstacles, or interesting objects
      Use longer duration (1.5–2.0s) in clear open corridors
  ── EXAMPLES ───────────────────────────────────────────────────────────────
  Open office with people:
  {{"observation":"Open office area with four workstations, two people working at monitors","area_type":"office","objects":["desk","monitor","office chair","laptop","phone","plant"],"people_count":2,"next_move":"left","duration":0.8,"interesting":true,"landmark":"desk cluster near window on right side"}}
  Empty corridor:
  {{"observation":"Long corridor with closed doors on both sides, no people, overhead lighting","area_type":"corridor","objects":["door","wall","light fixture","fire extinguisher"],"people_count":0,"next_move":"forward","duration":2.0,"interesting":false,"landmark":null}}
  Meeting room:
  {{"observation":"Large meeting room with central table, six chairs, wall-mounted screen at far end","area_type":"meeting_room","objects":["conference table","chair","screen","whiteboard","projector"],"people_count":0,"next_move":"left","duration":0.8,"interesting":true,"landmark":"large wall-mounted screen with YS Lootah branding"}}
  Kitchen area:
  {{"observation":"Small kitchen area with coffee machine, microwave, and refrigerator","area_type":"kitchen","objects":["coffee machine","microwave","refrigerator","counter","sink"],"people_count":0,"next_move":"right","duration":0.6,"interesting":true,"landmark":"coffee machine on counter near window"}}
  Storage room:
  {{"observation":"Storage area with metal shelving units holding boxes and equipment","area_type":"storage","objects":["shelf","box","cabinet","equipment rack"],"people_count":0,"next_move":"forward","duration":1.0,"interesting":false,"landmark":null}}
  JSON:
 # ══════════════════════════════════════════════════════════════════════════════
 # TALK PROMPT
 # Used for: questions, statements, identity, facts — no movement
 # ══════════════════════════════════════════════════════════════════════════════
 talk_prompt: |
  You are Sanad, a humanoid robot assistant. You have been asked a question
  or given information. Do NOT move — just respond intelligently.
  {facts}
  Command: "{command}"
-  Use the camera image to inform your answer when relevant.
+  Schema (replace every <…> with your actual value):
-
+  {{"actions":[{{"move":"<forward|backward|left|right|stop>","duration":<seconds 0.0-5.0>}}],"arm":<null or one gesture>,"speak":"<one short sentence in first person>","abort":<null or short reason>}}
  Reply ONLY with this JSON — no markdown, no explanation:
  {{"actions":[],"arm":null,"speak":"your response in 1-2 sentences","abort":null}}
  ── RESPONSE RULES ─────────────────────────────────────────────────────────
  - actions MUST always be [] — you are not moving
  - arm MUST always be null — no gestures for talk-only
  - speak: answer naturally and helpfully in 1-2 sentences maximum
  - Use the camera: if asked about the environment, look at the image and describe what you see
  - If given a fact ("my name is X"): acknowledge clearly and confirm you remember
  - If asked who someone is: use known facts from memory + what you see in camera
  - If asked about your status: report systems are functioning
  - If asked opinion or advice: give a helpful, honest answer
  - CRITICAL: respond in the exact same language as the command
  ── ENGLISH EXAMPLES ───────────────────────────────────────────────────────
  "what do you see"
  → {{"actions":[],"arm":null,"speak":"I see a person sitting at a desk with a laptop and a large monitor","abort":null}}
  "describe what is in front of you"
  → {{"actions":[],"arm":null,"speak":"In front of me I see an office workspace with a monitor, keyboard, and a person working","abort":null}}
  "is there anyone in the room"
  → {{"actions":[],"arm":null,"speak":"Yes, I can see one person sitting at a workstation in front of me","abort":null}}
  "my name is Kassam"
  → {{"actions":[],"arm":null,"speak":"Got it, nice to meet you Kassam — I will remember your name","abort":null}}
  "I am your programmer"
  → {{"actions":[],"arm":null,"speak":"Understood, you are my programmer — ready for your instructions","abort":null}}
  "who is that person"
  → {{"actions":[],"arm":null,"speak":"I can see a person at the desk — based on what you told me, that is Kassam","abort":null}}
  "how are you"
  → {{"actions":[],"arm":null,"speak":"All systems are functioning normally and I am ready to assist you","abort":null}}
  "what is your name"
  → {{"actions":[],"arm":null,"speak":"My name is Sanad, a humanoid robot assistant by YS Lootah Technology","abort":null}}
  "who built you"
  → {{"actions":[],"arm":null,"speak":"I was built by YS Lootah Technology in Dubai, and my programmer is Kassam","abort":null}}
  "what can you do"
  → {{"actions":[],"arm":null,"speak":"I can move through the environment, identify objects and people, navigate to goals, and respond in English","abort":null}}
  "ok" / "good" / "thanks"
  → {{"actions":[],"arm":null,"speak":"Ready for your next command","abort":null}}
 # ══════════════════════════════════════════════════════════════════════════════
 # VERIFY PROMPT
 # Used for: two-stage goal detection — confirm extra condition after YOLO finds class
 # ══════════════════════════════════════════════════════════════════════════════
 verify_prompt: |
  You are a visual verification system for a robot.
  A {target} has been detected in the camera image by the object detector.
  Your task: verify whether this specific condition is true:
  → "{condition}"
  Study the image carefully and honestly.
  Reply with ONLY one word: yes or no
  Rules:
-  - "yes" only if the condition is CLEARLY and VISIBLY true in this image
+  - actions: ordered motion steps. duration max 5.0 s. Merge same-direction steps.
-  - "no" if you are uncertain, cannot see clearly, or the condition is not met
+  - Duration guide: 1 step = 1 s · 45° = 2.5 s · 90° = 5 s · "slowly" ×0.5 · "fast" ×1.5
-  - Do not infer or guess — only confirm what is visually obvious
+  - arm: one of wave · raise_right · raise_left · clap · high_five · hug · heart · shake_hand · face_wave — or null. Runs after motion.
-  - A partially held object, unclear position, or occlusion = "no"
+  - speak: actually describe what you are doing OR what the camera shows right now. Do NOT copy example text. First person. English.
  - abort: null normally; "obstacle detected" / "unsafe command" / "cannot comply" with actions=[] when unsafe.
  - CRITICAL — IF THE COMMAND IS UNCLEAR OR NOT AN ACTION:
    If the input text is a single unclear word (like "I", "alright", "ok", "um"),
    a random phrase ("I have a lot of beauty", "turn turn turn"), noise, a
    greeting ("hello", "hi"), or anything that isn't clearly a movement /
    arm / vision / memory instruction — DO NOT INVENT a command. Instead
    reply with:
      {{"actions":[],"arm":null,"speak":"Sorry, I didn't understand that — please repeat","abort":"command not understood"}}
    Better to ask again than to guess and perform the wrong action.
-# ══════════════════════════════════════════════════════════════════════════════
+  Examples (learn the STRUCTURE, don't reuse the speak text):
-# IMAGE SEARCH — COMPARE (two images: reference photo vs current camera)
+  "turn right"  → {{"actions":[{{"move":"right","duration":2.0}}],"arm":null,"speak":"Turning right","abort":null}}
-# Used for: search/ /path/photo.jpg hint
+  "walk 2 steps" → {{"actions":[{{"move":"forward","duration":2.0}}],"arm":null,"speak":"Walking forward","abort":null}}
-# ══════════════════════════════════════════════════════════════════════════════
+  "wave"        → {{"actions":[],"arm":"wave","speak":"Waving","abort":null}}
 image_search_compare_prompt: |
  You are the visual matching system for a robot performing a targeted search.
  IMAGE 1 — Reference photo: shows the specific target to find.
  IMAGE 2 — Current camera: shows what the robot sees right now.
  {hint_line}
  Task: determine if the target from IMAGE 1 is visible in IMAGE 2.
  Reply ONLY with this JSON — no markdown, no explanation:
  {{"found":false,"confidence":"low|medium|high","position":"left|center|right|not visible","description":"one sentence"}}
  ── MATCHING RULES ─────────────────────────────────────────────────────────
  - found = true only when you are confident it is the SAME specific target
  - This is identity matching — same person or same object, not just same category
  - For people: match clothing, hair, body shape, face features — not just "a person"
  - For objects: match color, shape, size, distinctive features — not just "a bag"
  - confidence levels:
      "high"   — very clear match, high certainty (same person clearly visible)
      "medium" — likely the same, minor uncertainty (similar appearance, slightly occluded)
      "low"    — possible match but unclear — robot should keep searching
  - Stop searching only when found=true AND confidence is "medium" or "high"
  - position: where in IMAGE 2 the target appears
      "left" · "center" · "right" · "not visible"
  - description: one honest sentence about what you see in IMAGE 2 and your reasoning
      Good: "Person in blue shirt visible at center, matches reference photo clothing"
      Good: "I see a person but face is not clear enough to confirm identity"
      Good: "No match — the person visible is wearing different clothing"
  ── EXAMPLES ───────────────────────────────────────────────────────────────
  Clear match:
  {{"found":true,"confidence":"high","position":"center","description":"Person in blue shirt and glasses visible at center, closely matches the reference photo"}}
  Likely match:
  {{"found":true,"confidence":"medium","position":"right","description":"Person with similar clothing visible on right, slight occlusion but likely the same individual"}}
  No match:
  {{"found":false,"confidence":"high","position":"not visible","description":"I see a different person — clothing and appearance do not match the reference"}}
  Unclear:
  {{"found":false,"confidence":"low","position":"left","description":"Someone visible on left but partially occluded, cannot confirm identity — continuing search"}}
  JSON:
-# ══════════════════════════════════════════════════════════════════════════════
+# ── GOAL PROMPT ──────────────────────────────────────────────────────────────
-# IMAGE SEARCH — TEXT ONLY (description-based search, no reference photo)
+goal_prompt: |
-# Used for: search/ person in blue shirt
+  You are Sanad navigating toward a target.
-# ══════════════════════════════════════════════════════════════════════════════
+  Mission: "{goal}"
  Study the current camera image carefully and reply with ONE JSON — no text
  before or after, no markdown. Fill every <…> with your actual judgement.
  Schema:
  {{"reached":<true|false>,"next_move":"<left|right|forward>","duration":<0.3-0.8>,"speak":"<one-sentence description of what THIS camera image actually shows>","confidence":"<low|medium|high>"}}
  Rules:
  - reached = true ONLY when the target described by the mission is CLEARLY present in this exact frame. Default to reached = false.
  - "office env" ≠ hallway, door, corridor, or random room — require the specific target type (e.g. an office must show desks/monitors/workstations).
  - "person" means a human body visible — not just a chair or bag that belongs to someone.
  - If you are not sure the target type matches exactly → reached = false, keep searching.
  - For compound goals ("person holding phone"), BOTH parts must be visible in the SAME frame.
  - confidence: "high" clear · "medium" likely · "low" keep searching. Only set reached=true at medium+.
  - next_move: "left" (default scan) · "right" · "forward" (approach if target visible but far).
  - speak: write a concrete description of the objects visible in THIS frame, in your own words.
 # ── PATROL PROMPT ────────────────────────────────────────────────────────────
 patrol_prompt: |
  You are Sanad autonomously exploring. Study the image and reply with ONE
  JSON — no text before or after, no markdown. Replace every <…>.
  Schema:
  {{"observation":"<one factual sentence about the current scene>","area_type":"<office|corridor|meeting_room|reception|storage|lab|kitchen|unknown>","objects":[<up to 6 specific items>],"people_count":<integer>,"next_move":"<forward|left|right>","duration":<0.5-2.0>,"interesting":<true|false>,"landmark":<null or "<specific memorable anchor>">}}
  Rules:
  - observation: describe THIS image, not a generic scene.
  - area_type: pick from the list based on visible evidence.
  - objects: specific items ("standing desk" not "desk").
  - people_count: exact integer.
  - interesting = true when you see a person, new room type, entrance, or unusual object.
  - landmark: a specific visual anchor (e.g. "red extinguisher on left wall") or null.
  - next_move: "forward" to explore, "left"/"right" to scan.
 # ── TALK PROMPT ──────────────────────────────────────────────────────────────
 talk_prompt: |
  You are Sanad, a humanoid robot. The user asked you something. Do NOT move.
  Use the camera image when the question asks about what you see.
  {facts}
  Command: "{command}"
  Reply with ONE JSON only — no text before or after, no markdown:
  {{"actions":[],"arm":null,"speak":"<your honest 1-2 sentence answer>","abort":null}}
  Rules:
  - actions MUST be [] and arm MUST be null. You are not moving.
  - For vision questions ("what do you see", "describe...", "who is there", "what is in front of me"): describe the actual camera image in your own words. Do NOT copy example text.
  - For facts the user tells you ("my name is X"): acknowledge and say you will remember.
  - For "who are you" / "what are you": introduce yourself briefly.
  - Answer honestly and specifically. 1-2 sentences.
 # ── VERIFY PROMPT ────────────────────────────────────────────────────────────
 verify_prompt: |
  A {target} was detected in the image. Verify this condition:
  "{condition}"
  Reply with ONLY one word: yes or no
  - "yes" only if clearly and visibly true right now.
  - "no" if uncertain, occluded, or condition not met.
 # ── IMAGE SEARCH — COMPARE ───────────────────────────────────────────────────
 image_search_compare_prompt: |
  IMAGE 1 = reference photo of the target. IMAGE 2 = current camera view.
  {hint_line}
  Task: is the target from IMAGE 1 visible in IMAGE 2?
  Reply with ONE JSON — no other text, no markdown. Replace every <…>:
  {{"found":<true|false>,"confidence":"<low|medium|high>","position":"<left|center|right|not visible>","description":"<one sentence about IMAGE 2 and your reasoning>"}}
  Rules:
  - Identity matching: same specific person/object, not just same category.
  - People: match clothing, hair, body shape, face.
  - Objects: match color, shape, size, distinctive features.
  - Only found=true at medium+ confidence.
 # ── IMAGE SEARCH — TEXT ONLY ─────────────────────────────────────────────────
 image_search_text_prompt: |
  You are the visual search system for a robot looking for a target by description.
  Target description: "{hint}"
  Study the current camera image.
-  Study the current camera image carefully.
+  Reply with ONE JSON — no other text, no markdown. Replace every <…>:
  {{"found":<true|false>,"confidence":"<low|medium|high>","position":"<left|center|right|not visible>","description":"<one sentence about what you see>"}}
-  Reply ONLY with this JSON — no markdown, no explanation:
+  Rules:
-  {{"found":false,"confidence":"low|medium|high","position":"left|center|right|not visible","description":"one sentence"}}
+  - found = true only when the image clearly matches all described attributes.
-
+  - confidence: "high" all elements confirmed · "medium" minor uncertainty · "low" unclear.
-  ── SEARCH RULES ───────────────────────────────────────────────────────────
+  - Only report found=true at medium+ confidence.
  - found = true only when the camera shows something that clearly matches the description
  - Be specific about the match — does the image actually show what was described?
  - For people descriptions (color, clothing, activity): all mentioned attributes must match
  - For object descriptions (color, type, location): all mentioned attributes must match
  - confidence levels:
      "high"   — target clearly visible, all description elements confirmed
      "medium" — target likely visible, minor uncertainty on one element
      "low"    — possible match but one or more elements unclear or missing
  - Only report found=true at "medium" or "high" confidence
  ── EXAMPLES ───────────────────────────────────────────────────────────────
  Hint: "person in blue shirt"
  Camera: person in blue shirt clearly visible
  → {{"found":true,"confidence":"high","position":"center","description":"Person wearing a blue shirt clearly visible at center of frame"}}
  Hint: "person in blue shirt"
  Camera: person in white shirt
  → {{"found":false,"confidence":"high","position":"not visible","description":"I see a person but they are wearing white, not blue"}}
  Hint: "red backpack near the door"
  Camera: red backpack on a chair, no door visible
  → {{"found":false,"confidence":"medium","position":"left","description":"Red backpack visible on left but no door nearby — partial match"}}
  Hint: "laptop on desk"
  Camera: laptop clearly on desk
  → {{"found":true,"confidence":"high","position":"center","description":"Laptop visible on desk at center of frame"}}
  JSON:
--- a/Core/motion_log.py
+++ b/Core/motion_log.py
@ -0,0 +1,218 @@
 """
 motion_log.py — structured per-motion event log.
 Drop-in: brain handlers and worker call log_motion(event, cmd, ...)
 and the helper auto-parses direction/magnitude/unit from the canonical
 string. The worker barely needs to know what it's logging.
 Every motion the brain or worker executes writes ONE line to
 `logs/motion.log` capturing:
    timestamp | canonical | direction mag unit | target | actual | outcome | source
 So a single voice command produces a paired START + END line:
    17:45:01 | turn left 19 degrees    | dir=left  mag=19   unit=deg  | target=2.4s | -        | start       | voice
    17:45:03 | turn left 19 degrees    | dir=left  mag=19   unit=deg  | target=2.4s | actual=2.41s | complete | voice
 Or for a parameterless motion:
    17:46:00 | turn right              | dir=right mag=-    unit=-    | target=2.0s | -        | start       | voice
    17:46:02 | turn right              | dir=right mag=-    unit=-    | target=2.0s | actual=2.02s | complete | voice
 This is independent of voice.log and transcript.log:
  - voice.log  = per-thread debug detail (dispatch, worker, watchdog,
                 reconnect)
  - transcript.log = user-facing dialogue (what user said vs. what
                 Gemini said vs. what canonical fired)
  - motion.log = ONE LINE PER MOTION with parsed params + actual
                 outcome — for post-hoc auditing of physical movements
 API:
    log_motion(event, canonical, direction=..., magnitude=..., unit=...,
               target_sec=..., actual_sec=..., source="voice"|"text",
               outcome="start"|"complete"|"interrupted"|"error", **extra)
 Events (event field):
    'start'        — brain handler is about to issue motor commands.
                     target_sec is the planned duration; actual is None.
    'complete'     — brain returned normally; actual_sec set.
    'interrupted'  — motion_abort fired during execution.
    'error'        — brain raised an exception; reason in extra.
 """
 from __future__ import annotations
 import logging
 import os
 import re
 from logging.handlers import RotatingFileHandler
 # ─── canonical parser ─────────────────────────────────────────────
 #
 # Map a canonical command string to (direction, magnitude, unit) so
 # the structured log auto-fills these fields when the caller doesn't
 # pass them explicitly.
 _RE_TURN_DEG_DIR = re.compile(r"^turn\s+(left|right)\s+(\d+(?:\.\d+)?)\s*deg(?:rees?)?$", re.I)
 _RE_TURN_DEG     = re.compile(r"^turn\s+(\d+(?:\.\d+)?)\s*deg(?:rees?)?$", re.I)
 _RE_TURN_STEP_DIR = re.compile(r"^turn\s+(left|right)\s+(\d+(?:\.\d+)?)\s*steps?$", re.I)
 _RE_TURN_DIR     = re.compile(r"^turn\s+(left|right|around)$", re.I)
 _RE_WALK_STEP    = re.compile(r"^walk\s+(?:(forward|back(?:ward)?)\s+)?(\d+(?:\.\d+)?)\s*steps?$", re.I)
 _RE_WALK_M       = re.compile(r"^walk\s+(?:(forward|back(?:ward)?)\s+)?(\d+(?:\.\d+)?)\s*m(?:eter(?:s)?)?$", re.I)
 _RE_MOVE_DIR     = re.compile(r"^move\s+(forward|back(?:ward)?|left|right)$", re.I)
 _RE_WALK_TO      = re.compile(r"^walk\s+to\s+(.+)$", re.I)
 def parse_canonical(cmd: str) -> dict:
    """Parse a canonical command into structured motion fields.
    Returns {direction, magnitude, unit, kind}. Best-effort — if the
    canonical doesn't match any known shape, all fields are '-' and
    kind='other'."""
    out = {"direction": "-", "magnitude": "-", "unit": "-", "kind": "other"}
    if not cmd:
        return out
    s = cmd.strip().lower()
    m = _RE_TURN_DEG_DIR.match(s)
    if m:
        return {"direction": m.group(1), "magnitude": m.group(2),
                "unit": "deg", "kind": "turn"}
    m = _RE_TURN_DEG.match(s)
    if m:
        return {"direction": "-", "magnitude": m.group(1),
                "unit": "deg", "kind": "turn"}
    m = _RE_TURN_STEP_DIR.match(s)
    if m:
        return {"direction": m.group(1), "magnitude": m.group(2),
                "unit": "step", "kind": "turn"}
    m = _RE_TURN_DIR.match(s)
    if m:
        return {"direction": m.group(1), "magnitude": "-",
                "unit": "-", "kind": "turn"}
    m = _RE_WALK_STEP.match(s)
    if m:
        d = (m.group(1) or "forward").lower()
        if d.startswith("back"): d = "backward"
        return {"direction": d, "magnitude": m.group(2),
                "unit": "step", "kind": "walk"}
    m = _RE_WALK_M.match(s)
    if m:
        d = (m.group(1) or "forward").lower()
        if d.startswith("back"): d = "backward"
        return {"direction": d, "magnitude": m.group(2),
                "unit": "m", "kind": "walk"}
    m = _RE_MOVE_DIR.match(s)
    if m:
        d = m.group(1).lower()
        if d.startswith("back"): d = "backward"
        return {"direction": d, "magnitude": "-",
                "unit": "-", "kind": "move"}
    m = _RE_WALK_TO.match(s)
    if m:
        return {"direction": "tracked", "magnitude": m.group(1).strip(),
                "unit": "target", "kind": "walk_to"}
    # Bare canonicals
    if s == "stop":      return {"direction": "-", "magnitude": "-", "unit": "-", "kind": "stop"}
    if s == "sit down":  return {"direction": "-", "magnitude": "-", "unit": "-", "kind": "posture"}
    if s == "stand up":  return {"direction": "-", "magnitude": "-", "unit": "-", "kind": "posture"}
    if s == "wave hello":return {"direction": "-", "magnitude": "-", "unit": "-", "kind": "arm"}
    if s == "raise arm": return {"direction": "right", "magnitude": "-", "unit": "-", "kind": "arm"}
    if s == "lower arm": return {"direction": "right", "magnitude": "-", "unit": "-", "kind": "arm"}
    if s == "come here": return {"direction": "tracked", "magnitude": "-", "unit": "yolo", "kind": "come"}
    if s == "follow me": return {"direction": "tracked", "magnitude": "-", "unit": "yolo", "kind": "follow"}
    if s == "look around":return {"direction": "scan",   "magnitude": "-", "unit": "-", "kind": "look"}
    if s == "stay here": return {"direction": "-", "magnitude": "-", "unit": "-", "kind": "stay"}
    if s == "go home":   return {"direction": "home",    "magnitude": "-", "unit": "-", "kind": "nav"}
    if s == "patrol":    return {"direction": "scan",    "magnitude": "-", "unit": "-", "kind": "patrol"}
    if s == "pause motion":  return {"direction": "-", "magnitude": "-", "unit": "-", "kind": "pause"}
    if s == "resume motion": return {"direction": "-", "magnitude": "-", "unit": "-", "kind": "resume"}
    return out
 try:
    from Core.env_loader import PROJECT_ROOT  # type: ignore
 except Exception:
    PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 _LOG_DIR = os.path.join(PROJECT_ROOT, "logs")
 os.makedirs(_LOG_DIR, exist_ok=True)
 _LOG_PATH = os.path.join(_LOG_DIR, "motion.log")
 _logger = logging.getLogger("marcus.motion")
 _logger.setLevel(logging.INFO)
 _logger.propagate = False
 if not _logger.handlers:
    _h = RotatingFileHandler(
        _LOG_PATH, maxBytes=2_000_000, backupCount=3, encoding="utf-8",
    )
    # Use pipe-separator format so the file is grep/awk friendly.
    _h.setFormatter(logging.Formatter("%(asctime)s | %(message)s"))
    _logger.addHandler(_h)
 def log_motion(event: str,
               canonical: str,
               direction=None,
               magnitude=None,
               unit=None,
               target_sec=None,
               actual_sec=None,
               source: str = "voice",
               **extra) -> None:
    """Emit one structured line describing a motion event.
    direction/magnitude/unit auto-fill from parse_canonical(canonical)
    if not passed — so the worker just calls log_motion('start', cmd)
    and the parser handles the rest. Override any field by passing it
    explicitly (useful when the brain knows odom-measured actuals).
    Fields render with fixed width for grep/awk friendliness. Extra
    kwargs become trailing 'k=v' tokens.
    """
    def _f(v):
        if v is None: return "-"
        if isinstance(v, float): return "{:.2f}".format(v)
        return str(v)
    canonical = (canonical or "").strip() or "-"
    parsed = parse_canonical(canonical) if canonical != "-" else {}
    direction = direction if direction is not None else parsed.get("direction", "-")
    magnitude = magnitude if magnitude is not None else parsed.get("magnitude", "-")
    unit      = unit      if unit      is not None else parsed.get("unit", "-")
    parts = [
        "{:35s}".format(canonical[:35]),
        "dir={:8s}".format(_f(direction)[:8]),
        "mag={:8s}".format(_f(magnitude)[:8]),
        "unit={:6s}".format(_f(unit)[:6]),
        "target={:>7s}".format(_f(target_sec) + "s" if target_sec is not None else "-"),
        "actual={:>7s}".format(_f(actual_sec) + "s" if actual_sec is not None else "-"),
        "{:11s}".format(event[:11]),
        "{:5s}".format(source[:5]),
    ]
    if extra:
        parts.append(" ".join("{}={}".format(k, _f(v)) for k, v in extra.items()))
    _logger.info(" | ".join(parts))
 # Convenience tee so console + log show the same one-liner
 def print_motion(event: str, canonical: str, **kw) -> None:
    """Like log_motion but ALSO prints to stdout so the operator sees
    the line in the terminal dashboard. Use for the high-traffic
    'start' and 'complete' events; use log_motion alone for the noisier
    intermediate events."""
    log_motion(event, canonical, **kw)
    direction = kw.get("direction", "-")
    magnitude = kw.get("magnitude", "-")
    unit = kw.get("unit", "-")
    target = kw.get("target_sec")
    actual = kw.get("actual_sec")
    src = kw.get("source", "voice")
    line = "  [Motion {:9s}] {:30s} dir={} mag={} unit={}".format(
        event, canonical[:30], direction, magnitude, unit,
    )
    if target is not None:
        line += " target={:.2f}s".format(float(target))
    if actual is not None:
        line += " actual={:.2f}s".format(float(actual))
    print(line)
--- a/Core/motion_state.py
+++ b/Core/motion_state.py
@ -0,0 +1,46 @@
 """
 motion_state.py — shared motion-control signals.
 Three module-level threading.Events used by Voice/marcus_voice.py to
 control in-progress motion. Producers are VoiceModule's motion worker;
 consumers are brain motion loops in Brain/command_parser.py,
 Brain/executor.py, Navigation/marcus_odometry.py, Navigation/patrol.py.
  motion_abort  — hard stop. Loops break out and run gradual_stop().
                   Set by 'stop'/'cancel'/etc. Cleared by the worker
                   at the start of each new non-stop command, so a
                   stale set never kills the next motion.
  motion_pause  — soft hold. Loops do NOT break; they hold zero
                   velocity and wait for the event to clear, then
                   resume the same loop iteration where they paused.
                   While paused they ALSO check motion_abort each
                   ~50ms — a pause-then-stop combo cleanly aborts.
 The helper `wait_while_paused` centralizes the polling loop so brain
 sites don't repeat the same stanza:
    if motion_abort.is_set(): break
    wait_while_paused()
    if motion_abort.is_set(): break
    send_vel(...)
    time.sleep(0.05)
 Single source of truth — placed in Core/ so both Voice/* and Brain/*
 can import without circular deps.
 """
 from __future__ import annotations
 import threading
 import time
 motion_abort: threading.Event = threading.Event()
 motion_pause: threading.Event = threading.Event()
 def wait_while_paused(poll_sec: float = 0.05) -> None:
    """Block while motion_pause is set. Returns when pause clears OR
    motion_abort fires (so the caller can immediately bail out via
    its existing motion_abort check)."""
    while motion_pause.is_set() and not motion_abort.is_set():
        time.sleep(poll_sec)
--- a/Data/Brain/Sessions/session_001_2026-04-27/alerts.json
+++ b/Data/Brain/Sessions/session_001_2026-04-27/alerts.json
--- a/Data/Brain/Sessions/session_001_2026-04-27/commands.json
+++ b/Data/Brain/Sessions/session_001_2026-04-27/commands.json
--- a/Data/Brain/Sessions/session_001_2026-04-27/detections.json
+++ b/Data/Brain/Sessions/session_001_2026-04-27/detections.json
--- a/Data/Brain/Sessions/session_001_2026-04-27/places.json
+++ b/Data/Brain/Sessions/session_001_2026-04-27/places.json
--- a/Data/Brain/Sessions/session_002_2026-04-05/commands.json
+++ b/Data/Brain/Sessions/session_002_2026-04-05/commands.json
@ -1,20 +0,0 @@
 [
  {
    "time": "16:42:30",
    "cmd": "hi",
    "response": "Hello!",
    "duration_s": 5.98
  },
  {
    "time": "16:43:00",
    "cmd": "go back two step",
    "response": "Going back two steps",
    "duration_s": 10.01
  },
  {
    "time": "16:43:23",
    "cmd": "now com",
    "response": "Coming to you",
    "duration_s": 8.11
  }
 ]
--- a/Data/Brain/Sessions/session_002_2026-04-05/detections.json
+++ b/Data/Brain/Sessions/session_002_2026-04-05/detections.json
@ -1,66 +0,0 @@
 [
  {
    "time": "16:42:30",
    "class": "mouse",
    "position": "left",
    "distance": "far",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "16:42:30",
    "class": "chair",
    "position": "center",
    "distance": "far",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "16:42:30",
    "class": "laptop",
    "position": "left",
    "distance": "medium",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "16:42:30",
    "class": "person",
    "position": "center",
    "distance": "close",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "16:43:00",
    "class": "laptop",
    "position": "left",
    "distance": "close",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "16:43:00",
    "class": "person",
    "position": "center",
    "distance": "close",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "16:43:23",
    "class": "chair",
    "position": "center",
    "distance": "medium",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "16:43:23",
    "class": "laptop",
    "position": "left",
    "distance": "medium",
    "x": 0.0,
    "y": 0.0
  }
 ]
--- a/Data/Brain/Sessions/session_003_2026-04-05/commands.json
+++ b/Data/Brain/Sessions/session_003_2026-04-05/commands.json
@ -1,14 +0,0 @@
 [
  {
    "time": "16:44:40",
    "cmd": "what do you see",
    "response": "I see a person sitting at a desk in an office environment. The individual appears to be engaged with a computer screen and is wearing a red shirt.",
    "duration_s": 8.01
  },
  {
    "time": "16:45:06",
    "cmd": "that is me the one whos is programming you",
    "response": "That is me, the one who's programming you",
    "duration_s": 6.71
  }
 ]
--- a/Data/Brain/Sessions/session_003_2026-04-05/detections.json
+++ b/Data/Brain/Sessions/session_003_2026-04-05/detections.json
@ -1,42 +0,0 @@
 [
  {
    "time": "16:44:40",
    "class": "person",
    "position": "left",
    "distance": "very close",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "16:45:06",
    "class": "mouse",
    "position": "left",
    "distance": "far",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "16:45:06",
    "class": "chair",
    "position": "center",
    "distance": "medium",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "16:45:06",
    "class": "person",
    "position": "center",
    "distance": "very close",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "16:45:06",
    "class": "laptop",
    "position": "left",
    "distance": "medium",
    "x": 0.0,
    "y": 0.0
  }
 ]
--- a/Data/Brain/Sessions/session_003_2026-04-27/alerts.json
+++ b/Data/Brain/Sessions/session_003_2026-04-27/alerts.json
--- a/Data/Brain/Sessions/session_003_2026-04-27/commands.json
+++ b/Data/Brain/Sessions/session_003_2026-04-27/commands.json
@ -0,0 +1,434 @@
 [
  {
    "time": "14:39:02",
    "cmd": "turn around",
    "response": "Turning left",
    "duration_s": 4.94
  },
  {
    "time": "14:40:32",
    "cmd": "move forward",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "14:40:53",
    "cmd": "move backward",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "14:41:17",
    "cmd": "walk backward 1 steps",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "14:41:20",
    "cmd": "turn left",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "14:41:23",
    "cmd": "walk forward 1 steps",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "14:41:26",
    "cmd": "walk 1 steps",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "14:41:29",
    "cmd": "turn left",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "14:41:32",
    "cmd": "walk forward 1 steps",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "14:41:35",
    "cmd": "walk forward 1 steps",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "14:41:39",
    "cmd": "walk 1 steps",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "14:41:42",
    "cmd": "walk forward 1 steps",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "14:42:22",
    "cmd": "move forward",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "14:42:47",
    "cmd": "move forward",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "14:43:08",
    "cmd": "move forward",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "14:43:24",
    "cmd": "turn right",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "14:43:37",
    "cmd": "stop",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "14:43:40",
    "cmd": "stop",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "14:44:02",
    "cmd": "move backward",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "14:44:08",
    "cmd": "turn right 90 degrees",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "14:44:15",
    "cmd": "turn right 90 degrees",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "14:44:21",
    "cmd": "turn right 90 degrees",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "14:44:47",
    "cmd": "turn left 90 degrees",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "14:45:29",
    "cmd": "move forward",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "14:45:48",
    "cmd": "turn left 90 degrees",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "14:46:25",
    "cmd": "move forward",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "14:46:28",
    "cmd": "move forward",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "14:46:32",
    "cmd": "move forward",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "14:46:43",
    "cmd": "stop",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "14:46:46",
    "cmd": "stop",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "14:47:36",
    "cmd": "stop",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "14:47:49",
    "cmd": "stop",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "14:48:16",
    "cmd": "turn around",
    "response": "Turning left",
    "duration_s": 17.53
  },
  {
    "time": "14:48:52",
    "cmd": "move forward",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "14:49:39",
    "cmd": "walk forward 1 meters",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "14:49:43",
    "cmd": "walk forward 1 meters",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "14:49:47",
    "cmd": "walk forward 1 meters",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "14:49:52",
    "cmd": "walk forward 1 meters",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "14:50:07",
    "cmd": "stop",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "14:51:17",
    "cmd": "move forward",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "14:51:41",
    "cmd": "turn right",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "14:52:13",
    "cmd": "move forward",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "14:52:50",
    "cmd": "move forward",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "14:53:15",
    "cmd": "turn right",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "14:54:05",
    "cmd": "move forward",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "14:56:07",
    "cmd": "go  back o",
    "response": "Turning left",
    "duration_s": 15.61
  },
  {
    "time": "14:56:59",
    "cmd": "move forward",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "14:57:00",
    "cmd": "stop",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "14:57:04",
    "cmd": "stop",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "14:57:33",
    "cmd": "move forward",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "14:57:37",
    "cmd": "move forward",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "14:57:40",
    "cmd": "move forward",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "14:57:45",
    "cmd": "stop",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "14:58:00",
    "cmd": "move backward",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "14:58:05",
    "cmd": "move backward",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "14:58:23",
    "cmd": "turn left",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "14:58:26",
    "cmd": "turn left",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "14:58:30",
    "cmd": "turn left",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "14:58:33",
    "cmd": "turn left",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "15:00:23",
    "cmd": "stop",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "15:00:25",
    "cmd": "stop",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "15:01:58",
    "cmd": "follow me",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "15:02:03",
    "cmd": "walk 2 steps",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "15:02:07",
    "cmd": "stop",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "15:02:18",
    "cmd": "stop",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "15:02:25",
    "cmd": "stop",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "15:02:28",
    "cmd": "stop",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "15:02:53",
    "cmd": "move forward",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "15:04:40",
    "cmd": "turn left 90 degrees",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "15:04:46",
    "cmd": "turn left 90 degrees",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "15:05:04",
    "cmd": "turn right 90 degrees",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "15:05:10",
    "cmd": "turn right 90 degrees",
    "response": "local command",
    "duration_s": 0.0
  }
 ]
--- a/Data/Brain/Sessions/session_003_2026-04-27/detections.json
+++ b/Data/Brain/Sessions/session_003_2026-04-27/detections.json
--- a/Data/Brain/Sessions/session_003_2026-04-27/places.json
+++ b/Data/Brain/Sessions/session_003_2026-04-27/places.json
--- a/Data/Brain/Sessions/session_004_2026-04-05/commands.json
+++ b/Data/Brain/Sessions/session_004_2026-04-05/commands.json
@ -1,14 +0,0 @@
 [
  {
    "time": "16:46:46",
    "cmd": "hi",
    "response": "Error.",
    "duration_s": 0.0
  },
  {
    "time": "16:46:49",
    "cmd": "hiii",
    "response": "Error.",
    "duration_s": 0.0
  }
 ]
--- a/Data/Brain/Sessions/session_004_2026-04-05/detections.json
+++ b/Data/Brain/Sessions/session_004_2026-04-05/detections.json
@ -1,34 +0,0 @@
 [
  {
    "time": "16:46:46",
    "class": "person",
    "position": "center",
    "distance": "very close",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "16:46:46",
    "class": "chair",
    "position": "center",
    "distance": "close",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "16:46:46",
    "class": "laptop",
    "position": "left",
    "distance": "medium",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "16:46:49",
    "class": "mouse",
    "position": "left",
    "distance": "far",
    "x": 0.0,
    "y": 0.0
  }
 ]
--- a/Data/Brain/Sessions/session_004_2026-04-27/alerts.json
+++ b/Data/Brain/Sessions/session_004_2026-04-27/alerts.json
--- a/Data/Brain/Sessions/session_004_2026-04-27/commands.json
+++ b/Data/Brain/Sessions/session_004_2026-04-27/commands.json
@ -0,0 +1,74 @@
 [
  {
    "time": "16:40:07",
    "cmd": "walk forward 1 steps",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "16:40:22",
    "cmd": "turn left 19 degrees",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "16:40:37",
    "cmd": "walk backward 1 steps",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "16:40:55",
    "cmd": "walk forward 4 steps",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "16:41:05",
    "cmd": "walk backward 4 steps",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "16:41:47",
    "cmd": "come here",
    "response": "Coming to you",
    "duration_s": 0.0
  },
  {
    "time": "16:42:02",
    "cmd": "stop",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "16:42:18",
    "cmd": "come here",
    "response": "Coming to you",
    "duration_s": 0.0
  },
  {
    "time": "16:42:44",
    "cmd": "walk 1 steps",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "16:43:05",
    "cmd": "walk backward 3 steps",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "16:43:06",
    "cmd": "turn right 0 degrees",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "16:43:49",
    "cmd": "turn left",
    "response": "local command",
    "duration_s": 0.0
  }
 ]
--- a/Data/Brain/Sessions/session_004_2026-04-27/detections.json
+++ b/Data/Brain/Sessions/session_004_2026-04-27/detections.json
--- a/Data/Brain/Sessions/session_004_2026-04-27/places.json
+++ b/Data/Brain/Sessions/session_004_2026-04-27/places.json
--- a/Data/Brain/Sessions/session_005_2026-04-05/commands.json
+++ b/Data/Brain/Sessions/session_005_2026-04-05/commands.json
@ -1,32 +0,0 @@
 [
  {
    "time": "16:48:15",
    "cmd": "hii",
    "response": "Greeting",
    "duration_s": 7.93
  },
  {
    "time": "16:48:32",
    "cmd": "what do you see",
    "response": "I see a person sitting at a desk in an office environment. The person appears to be working on a computer.",
    "duration_s": 7.64
  },
  {
    "time": "16:48:58",
    "cmd": "that person is the guy who is programming you",
    "response": "That person is the one who programmed me",
    "duration_s": 6.76
  },
  {
    "time": "16:49:18",
    "cmd": "who is that person",
    "response": "I don't have enough information to identify the person in the image.",
    "duration_s": 7.1
  },
  {
    "time": "16:50:03",
    "cmd": "example/",
    "response": "local command",
    "duration_s": 0.0
  }
 ]
--- a/Data/Brain/Sessions/session_005_2026-04-05/detections.json
+++ b/Data/Brain/Sessions/session_005_2026-04-05/detections.json
@ -1,34 +0,0 @@
 [
  {
    "time": "16:48:15",
    "class": "person",
    "position": "center",
    "distance": "very close",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "16:48:32",
    "class": "person",
    "position": "center",
    "distance": "very close",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "16:48:58",
    "class": "person",
    "position": "center",
    "distance": "very close",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "16:49:18",
    "class": "person",
    "position": "center",
    "distance": "very close",
    "x": 0.0,
    "y": 0.0
  }
 ]
--- a/Data/Brain/Sessions/session_005_2026-04-28/alerts.json
+++ b/Data/Brain/Sessions/session_005_2026-04-28/alerts.json
--- a/Data/Brain/Sessions/session_005_2026-04-28/commands.json
+++ b/Data/Brain/Sessions/session_005_2026-04-28/commands.json
@ -0,0 +1,8 @@
 [
  {
    "time": "09:02:24",
    "cmd": "turn left",
    "response": "local command",
    "duration_s": 0.0
  }
 ]
--- a/Data/Brain/Sessions/session_005_2026-04-28/detections.json
+++ b/Data/Brain/Sessions/session_005_2026-04-28/detections.json
--- a/Data/Brain/Sessions/session_005_2026-04-28/places.json
+++ b/Data/Brain/Sessions/session_005_2026-04-28/places.json
--- a/Data/Brain/Sessions/session_006_2026-04-05/commands.json
+++ b/Data/Brain/Sessions/session_006_2026-04-05/commands.json
@ -1,44 +0,0 @@
 [
  {
    "time": "16:51:08",
    "cmd": "hiii",
    "response": "Greeting",
    "duration_s": 7.97
  },
  {
    "time": "16:51:23",
    "cmd": "hi",
    "response": "Greeting",
    "duration_s": 6.16
  },
  {
    "time": "16:51:51",
    "cmd": "my name is Kassam",
    "response": "My name is Kassam",
    "duration_s": 6.48
  },
  {
    "time": "16:51:59",
    "cmd": "that person is Kassam the programmer",
    "response": "That person is Kassam the programmer",
    "duration_s": 6.74
  },
  {
    "time": "16:52:07",
    "cmd": "who is that person",
    "response": "That is a man",
    "duration_s": 6.42
  },
  {
    "time": "16:52:47",
    "cmd": "hiigo back 1 step",
    "response": "Backing up one step",
    "duration_s": 8.51
  },
  {
    "time": "16:53:24",
    "cmd": "what can you do",
    "response": "I am a humanoid robot and I can perform various tasks such as walking, talking, and interacting with objects. However, in this image, I see a person sitting at a desk using a computer. If you have any specific questions or requests for me to perform, please let me know!",
    "duration_s": 10.6
  }
 ]
--- a/Data/Brain/Sessions/session_006_2026-04-05/detections.json
+++ b/Data/Brain/Sessions/session_006_2026-04-05/detections.json
@ -1,146 +0,0 @@
 [
  {
    "time": "16:51:08",
    "class": "chair",
    "position": "center",
    "distance": "medium",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "16:51:08",
    "class": "person",
    "position": "center",
    "distance": "very close",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "16:51:23",
    "class": "chair",
    "position": "center",
    "distance": "close",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "16:51:23",
    "class": "person",
    "position": "center",
    "distance": "close",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "16:51:23",
    "class": "laptop",
    "position": "left",
    "distance": "medium",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "16:51:51",
    "class": "chair",
    "position": "center",
    "distance": "close",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "16:51:51",
    "class": "person",
    "position": "center",
    "distance": "close",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "16:51:51",
    "class": "laptop",
    "position": "left",
    "distance": "medium",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "16:51:59",
    "class": "chair",
    "position": "center",
    "distance": "far",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "16:51:59",
    "class": "person",
    "position": "center",
    "distance": "very close",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "16:51:59",
    "class": "laptop",
    "position": "left",
    "distance": "medium",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "16:52:07",
    "class": "chair",
    "position": "center",
    "distance": "far",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "16:52:07",
    "class": "person",
    "position": "center",
    "distance": "very close",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "16:52:47",
    "class": "chair",
    "position": "center",
    "distance": "medium",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "16:52:47",
    "class": "person",
    "position": "center",
    "distance": "very close",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "16:52:47",
    "class": "cup",
    "position": "left",
    "distance": "far",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "16:53:24",
    "class": "chair",
    "position": "center",
    "distance": "close",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "16:53:24",
    "class": "person",
    "position": "left",
    "distance": "very close",
    "x": 0.0,
    "y": 0.0
  }
 ]
--- a/Data/Brain/Sessions/session_006_2026-04-28/alerts.json
+++ b/Data/Brain/Sessions/session_006_2026-04-28/alerts.json
--- a/Data/Brain/Sessions/session_006_2026-04-28/commands.json
+++ b/Data/Brain/Sessions/session_006_2026-04-28/commands.json
@ -0,0 +1,56 @@
 [
  {
    "time": "09:14:00",
    "cmd": "turn left",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "09:14:04",
    "cmd": "walk forward 1 steps",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "09:15:07",
    "cmd": "move backward",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "09:16:07",
    "cmd": "turn left 90 degrees",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "09:17:20",
    "cmd": "walk forward 1 steps",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "09:17:40",
    "cmd": "walk back 1 steps",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "09:18:27",
    "cmd": "come here",
    "response": "Coming to you",
    "duration_s": 0.0
  },
  {
    "time": "09:18:40",
    "cmd": "walk forward 1 steps",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "09:19:55",
    "cmd": "move forward",
    "response": "local command",
    "duration_s": 0.0
  }
 ]
--- a/Data/Brain/Sessions/session_006_2026-04-28/detections.json
+++ b/Data/Brain/Sessions/session_006_2026-04-28/detections.json
--- a/Data/Brain/Sessions/session_006_2026-04-28/places.json
+++ b/Data/Brain/Sessions/session_006_2026-04-28/places.json
--- a/Data/Brain/Sessions/session_007_2026-04-05/alerts.json
+++ b/Data/Brain/Sessions/session_007_2026-04-05/alerts.json
@ -1 +0,0 @@
 []
--- a/Data/Brain/Sessions/session_007_2026-04-05/commands.json
+++ b/Data/Brain/Sessions/session_007_2026-04-05/commands.json
@ -1 +0,0 @@
 []
--- a/Data/Brain/Sessions/session_007_2026-04-05/detections.json
+++ b/Data/Brain/Sessions/session_007_2026-04-05/detections.json
@ -1 +0,0 @@
 []
--- a/Data/Brain/Sessions/session_007_2026-04-05/places.json
+++ b/Data/Brain/Sessions/session_007_2026-04-05/places.json
@ -1 +0,0 @@
 []
--- a/Data/Brain/Sessions/session_007_2026-04-28/alerts.json
+++ b/Data/Brain/Sessions/session_007_2026-04-28/alerts.json
--- a/Data/Brain/Sessions/session_007_2026-04-28/commands.json
+++ b/Data/Brain/Sessions/session_007_2026-04-28/commands.json
@ -0,0 +1,26 @@
 [
  {
    "time": "09:40:17",
    "cmd": "turn left",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "09:40:20",
    "cmd": "walk forward 1 steps",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "09:40:37",
    "cmd": "turn left",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "09:40:40",
    "cmd": "walk 1 steps",
    "response": "local command",
    "duration_s": 0.0
  }
 ]
--- a/Data/Brain/Sessions/session_007_2026-04-28/detections.json
+++ b/Data/Brain/Sessions/session_007_2026-04-28/detections.json
--- a/Data/Brain/Sessions/session_007_2026-04-28/places.json
+++ b/Data/Brain/Sessions/session_007_2026-04-28/places.json
--- a/Data/Brain/Sessions/session_008_2026-04-05/commands.json
+++ b/Data/Brain/Sessions/session_008_2026-04-05/commands.json
@ -1 +0,0 @@
 []
--- a/Data/Brain/Sessions/session_008_2026-04-28/alerts.json
+++ b/Data/Brain/Sessions/session_008_2026-04-28/alerts.json
--- a/Data/Brain/Sessions/session_008_2026-04-28/commands.json
+++ b/Data/Brain/Sessions/session_008_2026-04-28/commands.json
@ -0,0 +1,20 @@
 [
  {
    "time": "09:42:11",
    "cmd": "turn around",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "09:42:20",
    "cmd": "look around",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "09:42:34",
    "cmd": "turn around",
    "response": "local command",
    "duration_s": 0.0
  }
 ]
--- a/Data/Brain/Sessions/session_008_2026-04-28/detections.json
+++ b/Data/Brain/Sessions/session_008_2026-04-28/detections.json
--- a/Data/Brain/Sessions/session_008_2026-04-28/places.json
+++ b/Data/Brain/Sessions/session_008_2026-04-28/places.json
--- a/Data/Brain/Sessions/session_009_2026-04-05/commands.json
+++ b/Data/Brain/Sessions/session_009_2026-04-05/commands.json
@ -1,44 +0,0 @@
 [
  {
    "time": "17:43:52",
    "cmd": "hi",
    "response": "Hi",
    "duration_s": 3.23
  },
  {
    "time": "17:44:32",
    "cmd": "step one step back",
    "response": "Step one step back",
    "duration_s": 3.44
  },
  {
    "time": "17:44:43",
    "cmd": "what do you see",
    "response": "I see a person sitting at a desk with a laptop in front of them.",
    "duration_s": 3.93
  },
  {
    "time": "17:44:52",
    "cmd": "hii",
    "response": "Hi",
    "duration_s": 3.31
  },
  {
    "time": "17:45:27",
    "cmd": "one step back",
    "response": "One step back",
    "duration_s": 3.46
  },
  {
    "time": "17:45:38",
    "cmd": "one more",
    "response": "One more",
    "duration_s": 3.43
  },
  {
    "time": "17:46:09",
    "cmd": "two step back",
    "response": "Moving back two steps",
    "duration_s": 3.51
  }
 ]
--- a/Data/Brain/Sessions/session_009_2026-04-05/detections.json
+++ b/Data/Brain/Sessions/session_009_2026-04-05/detections.json
@ -1,138 +0,0 @@
 [
  {
    "time": "17:43:52",
    "class": "chair",
    "position": "center",
    "distance": "far",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "17:43:52",
    "class": "person",
    "position": "center",
    "distance": "very close",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "17:43:52",
    "class": "laptop",
    "position": "left",
    "distance": "medium",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "17:43:52",
    "class": "cup",
    "position": "left",
    "distance": "far",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "17:44:32",
    "class": "person",
    "position": "right",
    "distance": "very close",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "17:44:32",
    "class": "laptop",
    "position": "center",
    "distance": "close",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "17:44:43",
    "class": "person",
    "position": "right",
    "distance": "close",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "17:44:52",
    "class": "chair",
    "position": "right",
    "distance": "medium",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "17:44:52",
    "class": "person",
    "position": "right",
    "distance": "very close",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "17:45:27",
    "class": "person",
    "position": "right",
    "distance": "very close",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "17:45:27",
    "class": "laptop",
    "position": "center",
    "distance": "close",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "17:45:38",
    "class": "chair",
    "position": "right",
    "distance": "medium",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "17:45:38",
    "class": "person",
    "position": "right",
    "distance": "very close",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "17:45:38",
    "class": "laptop",
    "position": "left",
    "distance": "close",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "17:46:09",
    "class": "chair",
    "position": "right",
    "distance": "close",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "17:46:09",
    "class": "person",
    "position": "center",
    "distance": "very close",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "17:46:09",
    "class": "laptop",
    "position": "center",
    "distance": "close",
    "x": 0.0,
    "y": 0.0
  }
 ]
--- a/Data/Brain/Sessions/session_009_2026-04-28/alerts.json
+++ b/Data/Brain/Sessions/session_009_2026-04-28/alerts.json
--- a/Data/Brain/Sessions/session_009_2026-04-28/commands.json
+++ b/Data/Brain/Sessions/session_009_2026-04-28/commands.json
@ -0,0 +1,26 @@
 [
  {
    "time": "09:49:11",
    "cmd": "move forward",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "09:49:39",
    "cmd": "turn right",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "09:50:07",
    "cmd": "turn right",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "09:50:28",
    "cmd": "turn right 100 degrees",
    "response": "local command",
    "duration_s": 0.0
  }
 ]
--- a/Data/Brain/Sessions/session_009_2026-04-28/detections.json
+++ b/Data/Brain/Sessions/session_009_2026-04-28/detections.json
--- a/Data/Brain/Sessions/session_009_2026-04-28/places.json
+++ b/Data/Brain/Sessions/session_009_2026-04-28/places.json
--- a/Data/Brain/Sessions/session_010_2026-04-05/commands.json
+++ b/Data/Brain/Sessions/session_010_2026-04-05/commands.json
@ -1,14 +0,0 @@
 [
  {
    "time": "17:46:37",
    "cmd": "hii",
    "response": "Hii",
    "duration_s": 3.27
  },
  {
    "time": "17:46:54",
    "cmd": "why did you moved",
    "response": "I moved because I was instructed to do so.",
    "duration_s": 3.71
  }
 ]
--- a/Data/Brain/Sessions/session_010_2026-04-05/detections.json
+++ b/Data/Brain/Sessions/session_010_2026-04-05/detections.json
@ -1,58 +0,0 @@
 [
  {
    "time": "17:46:37",
    "class": "chair",
    "position": "right",
    "distance": "medium",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "17:46:37",
    "class": "person",
    "position": "center",
    "distance": "close",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "17:46:37",
    "class": "cup",
    "position": "left",
    "distance": "far",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "17:46:54",
    "class": "mouse",
    "position": "center",
    "distance": "far",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "17:46:54",
    "class": "person",
    "position": "center",
    "distance": "close",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "17:46:54",
    "class": "chair",
    "position": "right",
    "distance": "close",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "17:46:54",
    "class": "laptop",
    "position": "left",
    "distance": "close",
    "x": 0.0,
    "y": 0.0
  }
 ]
--- a/Data/Brain/Sessions/session_010_2026-04-28/alerts.json
+++ b/Data/Brain/Sessions/session_010_2026-04-28/alerts.json
--- a/Data/Brain/Sessions/session_010_2026-04-28/commands.json
+++ b/Data/Brain/Sessions/session_010_2026-04-28/commands.json
@ -0,0 +1,8 @@
 [
  {
    "time": "11:24:20",
    "cmd": "turn right",
    "response": "local command",
    "duration_s": 0.0
  }
 ]
--- a/Data/Brain/Sessions/session_010_2026-04-28/detections.json
+++ b/Data/Brain/Sessions/session_010_2026-04-28/detections.json
--- a/Data/Brain/Sessions/session_010_2026-04-28/places.json
+++ b/Data/Brain/Sessions/session_010_2026-04-28/places.json
--- a/Data/Brain/Sessions/session_011_2026-04-05/commands.json
+++ b/Data/Brain/Sessions/session_011_2026-04-05/commands.json
@ -1,56 +0,0 @@
 [
  {
    "time": "17:49:16",
    "cmd": "hii",
    "response": "Hello! I am Marcus. How can I help you?",
    "duration_s": 0.0
  },
  {
    "time": "17:49:19",
    "cmd": "hi",
    "response": "Hello! I am Marcus. How can I help you?",
    "duration_s": 0.0
  },
  {
    "time": "17:49:20",
    "cmd": "Hi",
    "response": "Hello! I am Marcus. How can I help you?",
    "duration_s": 0.0
  },
  {
    "time": "17:49:37",
    "cmd": "move one step back",
    "response": "Moving back",
    "duration_s": 3.47
  },
  {
    "time": "17:49:50",
    "cmd": "one tiny step back",
    "response": "Moving back",
    "duration_s": 3.44
  },
  {
    "time": "17:50:05",
    "cmd": "now come back to me",
    "response": "Coming back to you",
    "duration_s": 3.53
  },
  {
    "time": "17:50:36",
    "cmd": "turn right",
    "response": "Turning right",
    "duration_s": 3.4
  },
  {
    "time": "17:51:01",
    "cmd": "come back to me",
    "response": "Coming back to you",
    "duration_s": 3.5
  },
  {
    "time": "17:51:22",
    "cmd": "turn right 90deg and then back two steps",
    "response": "Turning right",
    "duration_s": 3.43
  }
 ]
--- a/Data/Brain/Sessions/session_011_2026-04-05/detections.json
+++ b/Data/Brain/Sessions/session_011_2026-04-05/detections.json
@ -1,106 +0,0 @@
 [
  {
    "time": "17:49:37",
    "class": "chair",
    "position": "right",
    "distance": "medium",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "17:49:37",
    "class": "laptop",
    "position": "center",
    "distance": "close",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "17:49:37",
    "class": "person",
    "position": "center",
    "distance": "close",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "17:49:50",
    "class": "laptop",
    "position": "left",
    "distance": "close",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "17:49:50",
    "class": "person",
    "position": "right",
    "distance": "very close",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "17:50:05",
    "class": "chair",
    "position": "right",
    "distance": "medium",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "17:50:05",
    "class": "laptop",
    "position": "center",
    "distance": "close",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "17:50:05",
    "class": "person",
    "position": "center",
    "distance": "close",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "17:50:36",
    "class": "chair",
    "position": "center",
    "distance": "medium",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "17:50:36",
    "class": "person",
    "position": "left",
    "distance": "close",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "17:51:01",
    "class": "laptop",
    "position": "left",
    "distance": "close",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "17:51:01",
    "class": "person",
    "position": "center",
    "distance": "very close",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "17:51:22",
    "class": "person",
    "position": "left",
    "distance": "close",
    "x": 0.0,
    "y": 0.0
  }
 ]
--- a/Data/Brain/Sessions/session_011_2026-04-28/alerts.json
+++ b/Data/Brain/Sessions/session_011_2026-04-28/alerts.json
--- a/Data/Brain/Sessions/session_011_2026-04-28/commands.json
+++ b/Data/Brain/Sessions/session_011_2026-04-28/commands.json
@ -0,0 +1,248 @@
 [
  {
    "time": "11:31:56",
    "cmd": "walk 1 steps",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "11:32:29",
    "cmd": "move backward",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "11:33:19",
    "cmd": "walk forward 1 steps",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "11:33:31",
    "cmd": "move backward",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "11:34:22",
    "cmd": "come here",
    "response": "Coming to you",
    "duration_s": 0.0
  },
  {
    "time": "11:34:57",
    "cmd": "turn left",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "11:35:02",
    "cmd": "stop",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "11:35:28",
    "cmd": "turn left",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "11:35:37",
    "cmd": "move forward",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "11:36:13",
    "cmd": "move backward",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "11:36:28",
    "cmd": "turn left",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "11:36:47",
    "cmd": "move forward",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "11:36:50",
    "cmd": "turn right",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "11:37:09",
    "cmd": "move backward",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "11:37:12",
    "cmd": "move forward",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "11:37:15",
    "cmd": "walk forward 1 steps",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "11:37:26",
    "cmd": "stop",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "11:37:37",
    "cmd": "move backward",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "11:37:52",
    "cmd": "come here",
    "response": "Coming to you",
    "duration_s": 0.0
  },
  {
    "time": "11:37:56",
    "cmd": "stop",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "11:39:06",
    "cmd": "come here",
    "response": "Coming to you",
    "duration_s": 0.0
  },
  {
    "time": "11:39:33",
    "cmd": "move backward",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "11:40:00",
    "cmd": "come here",
    "response": "Coming to you",
    "duration_s": 0.0
  },
  {
    "time": "11:40:53",
    "cmd": "stay here",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "11:41:15",
    "cmd": "come here",
    "response": "Coming to you",
    "duration_s": 0.0
  },
  {
    "time": "11:42:27",
    "cmd": "walk 2 meters",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "11:42:50",
    "cmd": "turn 90 degrees",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "11:43:22",
    "cmd": "turn 360 degrees",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "11:43:42",
    "cmd": "stop",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "11:44:15",
    "cmd": "come here",
    "response": "Coming to you",
    "duration_s": 0.0
  },
  {
    "time": "11:44:57",
    "cmd": "walk forward 10 steps",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "11:44:58",
    "cmd": "stop",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "11:45:18",
    "cmd": "walk backward 5 steps",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "11:45:30",
    "cmd": "turn right",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "11:45:56",
    "cmd": "walk back 3 steps",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "11:45:59",
    "cmd": "turn right",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "11:46:19",
    "cmd": "walk to person",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "11:47:19",
    "cmd": "move backward",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "11:48:05",
    "cmd": "walk forward 5 steps",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "11:48:08",
    "cmd": "turn right 1 steps",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "11:48:13",
    "cmd": "turn left 2 steps",
    "response": "local command",
    "duration_s": 0.0
  }
 ]
--- a/Data/Brain/Sessions/session_011_2026-04-28/detections.json
+++ b/Data/Brain/Sessions/session_011_2026-04-28/detections.json
--- a/Data/Brain/Sessions/session_011_2026-04-28/places.json
+++ b/Data/Brain/Sessions/session_011_2026-04-28/places.json
--- a/Data/Brain/Sessions/session_012_2026-04-05/commands.json
+++ b/Data/Brain/Sessions/session_012_2026-04-05/commands.json
@ -1,26 +0,0 @@
 [
  {
    "time": "17:53:36",
    "cmd": "turn right 90deg and then move back 3 steps",
    "response": "Turned right 90 degrees then moved backward",
    "duration_s": 0.0
  },
  {
    "time": "17:54:21",
    "cmd": "keep turning left unitl you see a guy holding a phone",
    "response": "Turning left",
    "duration_s": 3.72
  },
  {
    "time": "17:55:33",
    "cmd": "goal/ keep turning left unitl you see a guy holding a phone",
    "response": "Goal navigation: keep turning left unitl you see a guy holding a phone",
    "duration_s": 5.47
  },
  {
    "time": "17:55:53",
    "cmd": "describe what he is doing",
    "response": "Turning left",
    "duration_s": 3.73
  }
 ]
--- a/Data/Brain/Sessions/session_012_2026-04-05/detections.json
+++ b/Data/Brain/Sessions/session_012_2026-04-05/detections.json
@ -1,50 +0,0 @@
 [
  {
    "time": "17:54:21",
    "class": "suitcase",
    "position": "center",
    "distance": "close",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "17:54:21",
    "class": "backpack",
    "position": "center",
    "distance": "medium",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "17:55:32",
    "class": "person",
    "position": "left",
    "distance": "close",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "17:55:53",
    "class": "person",
    "position": "center",
    "distance": "very close",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "17:55:53",
    "class": "chair",
    "position": "center",
    "distance": "medium",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "17:55:53",
    "class": "laptop",
    "position": "left",
    "distance": "close",
    "x": 0.0,
    "y": 0.0
  }
 ]
--- a/Data/Brain/Sessions/session_012_2026-04-28/alerts.json
+++ b/Data/Brain/Sessions/session_012_2026-04-28/alerts.json
--- a/Data/Brain/Sessions/session_012_2026-04-28/commands.json
+++ b/Data/Brain/Sessions/session_012_2026-04-28/commands.json
@ -0,0 +1,236 @@
 [
  {
    "time": "12:54:29",
    "cmd": "move backward",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "12:54:46",
    "cmd": "turn left",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "12:55:28",
    "cmd": "walk to حقيبة",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "12:56:36",
    "cmd": "walk forward 1 steps",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "12:56:39",
    "cmd": "turn left 1 steps",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "12:56:59",
    "cmd": "walk backward 1 steps",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "12:57:59",
    "cmd": "turn left",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "12:58:07",
    "cmd": "turn left",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "12:58:22",
    "cmd": "come here",
    "response": "Coming to you",
    "duration_s": 0.0
  },
  {
    "time": "12:59:14",
    "cmd": "turn left",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "12:59:17",
    "cmd": "move forward",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "12:59:30",
    "cmd": "move backward",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "12:59:58",
    "cmd": "turn left 90 degrees",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "13:00:41",
    "cmd": "come here",
    "response": "Coming to you",
    "duration_s": 0.0
  },
  {
    "time": "13:01:52",
    "cmd": "walk forward 5 steps",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "13:02:09",
    "cmd": "walk back 2 steps",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "13:02:52",
    "cmd": "walk 5 steps",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "13:02:57",
    "cmd": "walk forward 2 steps",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "13:03:04",
    "cmd": "turn left 3 steps",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "13:03:07",
    "cmd": "turn right 1 steps",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "13:03:51",
    "cmd": "move forward",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "13:03:56",
    "cmd": "walk forward 2 steps",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "13:03:59",
    "cmd": "move backward",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "13:04:27",
    "cmd": "walk backward 3 steps",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "13:04:32",
    "cmd": "walk forward 2 steps",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "13:05:19",
    "cmd": "walk backward 10 steps",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "13:05:30",
    "cmd": "walk forward 5 steps",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "13:05:34",
    "cmd": "stop",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "13:06:27",
    "cmd": "turn right 180 degrees",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "13:06:30",
    "cmd": "move forward",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "13:06:33",
    "cmd": "move backward",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "13:07:15",
    "cmd": "come here",
    "response": "Coming to you",
    "duration_s": 0.0
  },
  {
    "time": "13:08:52",
    "cmd": "turn right 90 degrees",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "13:08:55",
    "cmd": "walk 1 steps",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "13:08:58",
    "cmd": "stop",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "13:10:28",
    "cmd": "walk backward 20 steps",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "13:10:29",
    "cmd": "stop",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "13:11:41",
    "cmd": "stop",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "13:11:57",
    "cmd": "come here",
    "response": "Coming to you",
    "duration_s": 0.0
  }
 ]
--- a/Data/Brain/Sessions/session_012_2026-04-28/detections.json
+++ b/Data/Brain/Sessions/session_012_2026-04-28/detections.json
--- a/Data/Brain/Sessions/session_012_2026-04-28/places.json
+++ b/Data/Brain/Sessions/session_012_2026-04-28/places.json
--- a/Data/Brain/Sessions/session_013_2026-04-05/alerts.json
+++ b/Data/Brain/Sessions/session_013_2026-04-05/alerts.json
@ -1 +0,0 @@
 []
--- a/Data/Brain/Sessions/session_013_2026-04-05/commands.json
+++ b/Data/Brain/Sessions/session_013_2026-04-05/commands.json
@ -1,32 +0,0 @@
 [
  {
    "time": "17:57:56",
    "cmd": "turn right 90 deg",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "17:58:22",
    "cmd": "keep turning left until you see a guy holding a phone and describe what he is doing and his office",
    "response": "Turning left",
    "duration_s": 3.68
  },
  {
    "time": "17:58:55",
    "cmd": "goal/ keep turning left until you see a guy holding a phone and describe what he is doing and his office",
    "response": "Goal navigation: keep turning left until you see a guy holding a phone and describe what he is doing and his office",
    "duration_s": 25.79
  },
  {
    "time": "17:59:13",
    "cmd": "keep turning left until you see a guy holding a phone and describe what he is doing and his office",
    "response": "Turning left",
    "duration_s": 3.73
  },
  {
    "time": "17:59:40",
    "cmd": "goal/keep turning left until you see a guy holding a phone and describe what he is doing and his office",
    "response": "Goal navigation: keep turning left until you see a guy holding a phone and describe what he is doing and his office",
    "duration_s": 23.61
  }
 ]
--- a/Data/Brain/Sessions/session_013_2026-04-05/detections.json
+++ b/Data/Brain/Sessions/session_013_2026-04-05/detections.json
@ -1,18 +0,0 @@
 [
  {
    "time": "17:58:22",
    "class": "suitcase",
    "position": "right",
    "distance": "medium",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "17:58:22",
    "class": "backpack",
    "position": "center",
    "distance": "close",
    "x": 0.0,
    "y": 0.0
  }
 ]
--- a/Data/Brain/Sessions/session_013_2026-04-05/places.json
+++ b/Data/Brain/Sessions/session_013_2026-04-05/places.json
@ -1 +0,0 @@
 {}
--- a/Data/Brain/Sessions/session_014_2026-04-05/alerts.json
+++ b/Data/Brain/Sessions/session_014_2026-04-05/alerts.json
@ -1 +0,0 @@
 []
--- a/Data/Brain/Sessions/session_014_2026-04-05/commands.json
+++ b/Data/Brain/Sessions/session_014_2026-04-05/commands.json
@ -1,26 +0,0 @@
 [
  {
    "time": "18:09:23",
    "cmd": "what do you see",
    "response": "I see a person sitting at a desk with a computer and a monitor in front of them.",
    "duration_s": 4.48
  },
  {
    "time": "18:09:47",
    "cmd": "what do you see",
    "response": "I see a person sitting at a desk with a computer and a monitor in front of them.",
    "duration_s": 4.52
  },
  {
    "time": "18:10:17",
    "cmd": "example/",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "18:10:41",
    "cmd": "is there anyone in the room",
    "response": "I see a person sitting at a desk with a computer and a monitor in front of them.",
    "duration_s": 4.57
  }
 ]
--- a/Data/Brain/Sessions/session_014_2026-04-05/detections.json
+++ b/Data/Brain/Sessions/session_014_2026-04-05/detections.json
@ -1,58 +0,0 @@
 [
  {
    "time": "18:09:23",
    "class": "mouse",
    "position": "center",
    "distance": "far",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "18:09:23",
    "class": "chair",
    "position": "right",
    "distance": "medium",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "18:09:23",
    "class": "person",
    "position": "right",
    "distance": "close",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "18:09:23",
    "class": "laptop",
    "position": "center",
    "distance": "close",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "18:09:47",
    "class": "person",
    "position": "right",
    "distance": "very close",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "18:10:41",
    "class": "person",
    "position": "right",
    "distance": "close",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "18:10:41",
    "class": "laptop",
    "position": "center",
    "distance": "close",
    "x": 0.0,
    "y": 0.0
  }
 ]
--- a/Data/Brain/Sessions/session_014_2026-04-05/places.json
+++ b/Data/Brain/Sessions/session_014_2026-04-05/places.json
@ -1 +0,0 @@
 {}
--- a/Data/Brain/Sessions/session_014_2026-04-05/summary.txt
+++ b/Data/Brain/Sessions/session_014_2026-04-05/summary.txt
@ -1,13 +0,0 @@
 Session: session_014_2026-04-05
 Date: 2026-04-05 18:10
 Duration: 4m 48s
 Commands: 4
 YOLO detections: 7
 Alerts: 0
 Known places: none
 First commands:
  - what do you see
  - what do you see
  - example/
  - is there anyone in the room
--- a/Data/Brain/Sessions/session_015_2026-04-05/alerts.json
+++ b/Data/Brain/Sessions/session_015_2026-04-05/alerts.json
@ -1 +0,0 @@
 []
--- a/Data/Brain/Sessions/session_015_2026-04-05/commands.json
+++ b/Data/Brain/Sessions/session_015_2026-04-05/commands.json
@ -1,26 +0,0 @@
 [
  {
    "time": "18:12:31",
    "cmd": "example/",
    "response": "local command",
    "duration_s": 0.0
  },
  {
    "time": "18:12:49",
    "cmd": "goal/ keep turning left until you see a guy",
    "response": "Goal navigation: keep turning left until you see a guy",
    "duration_s": 5.22
  },
  {
    "time": "18:13:04",
    "cmd": "goal/ keep turning left until you see a guy",
    "response": "Goal navigation: keep turning left until you see a guy",
    "duration_s": 3.22
  },
  {
    "time": "18:13:25",
    "cmd": "what do you see",
    "response": "I see a man sitting in a chair, talking on a phone. There is a cardboard box and a backpack on a table next to him.",
    "duration_s": 0.0
  }
 ]
--- a/Data/Brain/Sessions/session_015_2026-04-05/detections.json
+++ b/Data/Brain/Sessions/session_015_2026-04-05/detections.json
@ -1,18 +0,0 @@
 [
  {
    "time": "18:12:48",
    "class": "person",
    "position": "left",
    "distance": "close",
    "x": 0.0,
    "y": 0.0
  },
  {
    "time": "18:13:03",
    "class": "person",
    "position": "left",
    "distance": "close",
    "x": 0.0,
    "y": 0.0
  }
 ]
--- a/Data/Brain/Sessions/session_015_2026-04-05/places.json
+++ b/Data/Brain/Sessions/session_015_2026-04-05/places.json
@ -1 +0,0 @@
 {}
--- a/Data/Brain/Sessions/session_016_2026-04-09/alerts.json
+++ b/Data/Brain/Sessions/session_016_2026-04-09/alerts.json
@ -1 +0,0 @@
 []
--- a/Data/Brain/Sessions/session_016_2026-04-09/commands.json
+++ b/Data/Brain/Sessions/session_016_2026-04-09/commands.json
@ -1,8 +0,0 @@
 [
  {
    "time": "13:33:49",
    "cmd": "hi",
    "response": "Hello! I am Marcus. How can I help you?",
    "duration_s": 0.0
  }
 ]
--- a/Data/Brain/Sessions/session_016_2026-04-09/places.json
+++ b/Data/Brain/Sessions/session_016_2026-04-09/places.json
@ -1 +0,0 @@
 {}
--- a/Data/Brain/Sessions/session_017_2026-04-09/alerts.json
+++ b/Data/Brain/Sessions/session_017_2026-04-09/alerts.json
@ -1 +0,0 @@
 []
--- a/Data/Brain/Sessions/session_017_2026-04-09/commands.json
+++ b/Data/Brain/Sessions/session_017_2026-04-09/commands.json
@ -1,8 +0,0 @@
 [
  {
    "time": "13:35:59",
    "cmd": "hi",
    "response": "Hello! I am Marcus. How can I help you?",
    "duration_s": 0.0
  }
 ]
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
kassam	bcb2fbbcdf	Update 2026-04-28 15:55:43	2026-04-28 15:55:45 +04:00
kassam	c6c847e3ab	Update 2026-04-27 13:48:17	2026-04-27 13:48:18 +04:00
kassam	2129cadb11	Update 2026-04-27 11:16:57	2026-04-27 11:16:57 +04:00
kassam	211d4f52ab	Update 2026-04-27 09:39:12	2026-04-27 09:39:13 +04:00
kassam	9485601e18	Update 2026-04-24 15:23:19	2026-04-24 15:23:19 +04:00
kassam	5d839d4f4e	Voice: finalise on faster-whisper + energy wake, remove Vosk Full-day voice-stack refactor. Experiments run and reverted: - Gemini Live HTTP microservice (Python 3.8 env incompat, latency) - Vosk grammar STT (English lexicon can't decode 'Sanad'; big model cold-load too slow on Jetson CPU) Kept architecture: - Voice/wake_detector.py — pure-numpy energy state machine with adaptive baseline, burst-audio capture for post-hoc verify. - Voice/marcus_voice.py — orchestrator with 3 modes (wake_and_command / always_on / always_on_gated), hysteretic VAD, pre-silence trim (300 ms pre-roll), DSP pipeline (DC remove, 80 Hz HPF, 0.97 pre-emphasis, peak-normalize), faster-whisper base.en int8 with beam=8 + temperature fallback [0,0.2,0.4], fuzzy-match canonicalisation, GARBAGE_PATTERNS + length filter, /s-/ phonetic wake-verify, full-turn debug WAV recording. Config-driven vocab (zero hardcoded strings in Python): - stt.wake_words (33 variants of 'Sanad') - stt.command_vocab (68 canonical phrases) - stt.garbage_patterns (17 Whisper noise outputs) - stt.min_transcription_length, stt.command_vocab_cutoff Command parser widened (Brain/command_parser.py): - _RE_SIMPLE_DIR — bare direction + verb+direction combos ('left', 'go back', 'move forward', 'step right', ...) - _RE_STOP_SIMPLE — bare stop/halt/wait/pause/freeze/hold - All motion constants sourced from config_Navigation.json (move_map + step_duration_sec) via API/zmq_api.py; no more hardcoded 0.3 / 2.0 magic numbers. API/audio_api.py — _play_pcm now uses AudioClient.PlayStream with automatic resampling to 16 kHz (matches Sanad's proven pattern). Removed: - Voice/vosk_stt.py (and all Vosk references in marcus_voice.py) - Models/vosk-model-small-en-us-0.15/ (40 MB model + zip) - All Vosk keys from Config/config_Voice.json Documentation synced across README, Doc/architecture.md, Doc/pipeline.md, Doc/functions.md, Doc/controlling.md, Doc/MARCUS_API.md, Doc/environment.md changelog. Known limitation: faster-whisper base.en on Jetson CPU + G1 far-field mic yields ~50% command-transcription accuracy due to model capacity and mic reverberation. Wake + ack + recording + trim + Whisper + fuzzy + brain + motion all verified working end-to-end. Future improvement path (unused): close-talking USB mic via pactl_parec, or Gemini Live via HTTP microservice. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>	2026-04-24 14:32:28 +04:00
kassam	f45e12fae5	Update 2026-04-23 09:54:45	2026-04-23 09:54:45 +04:00
kassam	ce09b6920a	Update 2026-04-22 17:54:49	2026-04-22 17:54:50 +04:00
kassam	00e52496a9	Update 2026-04-22 17:01:46	2026-04-22 17:01:48 +04:00
kassam	78a5b0b408	Update 2026-04-22 15:37:55	2026-04-22 15:37:56 +04:00