Update 2026-04-28 15:55:43

This commit is contained in:
kassam 2026-04-28 15:55:45 +04:00
parent c6c847e3ab
commit bcb2fbbcdf
61 changed files with 4642 additions and 62 deletions

View File

@ -3,13 +3,16 @@ command_parser.py — Local command regex patterns + dispatcher
Handles place memory, odometry, session recall, help, examples
"""
import re
import threading
import time
from API.zmq_api import send_vel, gradual_stop, MOVE_MAP, STEP_DURATION_SEC
from API.memory_api import mem, place_save, place_goto, places_list_str
from API.odometry_api import odom, ODOM_AVAILABLE
from API.camera_api import get_frame
from API.llava_api import ask
from API.llava_api import ask, call_llava, parse_json
from Brain.executor import execute
from Core.motion_state import motion_abort, wait_while_paused
from Core.motion_log import log_motion, print_motion
# ── Compiled patterns ────────────────────────────────────────────────────────
@ -33,9 +36,9 @@ _RE_TURN_DEG = re.compile(
# that for a trivial two-second motion. One step = 2 s of motion at the default
# velocity, matching the undo-loop duration already used below.
_RE_WALK_STEP = re.compile(
r"^(?:walk|go|move|step)(?:\s+(forward|back(?:ward)?))?\s+(\d+)\s*steps?$", re.I)
r"^(?:walk|go|move|step)(?:\s+(forward|back(?:ward)?))?\s+(\d+(?:\.\d+)?)\s*steps?$", re.I)
_RE_TURN_STEP = re.compile(
r"^turn\s+(left|right)(?:\s+(\d+)\s*steps?)?$", re.I)
r"^turn\s+(left|right)(?:\s+(\d+(?:\.\d+)?)\s*steps?)?$", re.I)
# Simple one-shot motion — one word or verb+direction, no counts/units.
# All default to a ~2 s motion at the normal velocity. Kept local so the
@ -79,6 +82,106 @@ _RE_SESSION_SUMMARY = re.compile(
_RE_AUTO = re.compile(
r"^auto(?:nomous)?\s+(on|off|status|save|summary)$", re.I)
# ── DIRECT-MOTION CANONICALS ─────────────────────────────────────────────
# Canonicals that the dispatcher emits but were previously falling through
# to the LLaVA/Qwen fallback (530s blocking call on Jetson). Each maps
# to a fast, deterministic motion or arm gesture. See Bug #3 in the
# 14-bug audit.
_RE_TURN_AROUND = re.compile(r"^turn[\s_-]+around$", re.I)
_RE_SIT_DOWN = re.compile(r"^sit[\s_-]+down$", re.I)
_RE_STAND_UP = re.compile(r"^stand[\s_-]+up$", re.I)
_RE_WAVE_HELLO = re.compile(r"^wave(?:[\s_-]+(?:hello|hi))?$", re.I)
_RE_RAISE_ARM = re.compile(r"^raise[\s_-]+(?:right[\s_-]+)?arm$", re.I)
_RE_LOWER_ARM = re.compile(r"^lower[\s_-]+(?:right[\s_-]+)?arm$", re.I)
_RE_STAY_HERE = re.compile(r"^stay[\s_-]+(?:here|in[\s_-]+place)$", re.I)
_RE_POINT_GESTURE = re.compile(r"^point(?:[\s_-]+(?:at|to)[\s_-]+.*)?$", re.I)
# Note: 'move left' / 'move right' canonicals are handled by the existing
# _RE_SIMPLE_DIR (verb=move, direction=left/right via MOVE_MAP). MOVE_MAP
# in config_Navigation.json defines 'left'/'right' as ROTATIONS, not
# strafes — historical decision; if real strafes are wanted later add
# vy axis to MOVE_MAP and a separate strafe handler.
# ── HIGHER-LEVEL MOTIONS ─────────────────────────────────────────────────────
# 'look around' = scan-left + return + scan-right + return; uses execute_action
# so motion_abort halts it cleanly.
_RE_LOOK_AROUND = re.compile(
r"^(?:look[\s_-]+around|scan[\s_-]+(?:the\s+)?(?:room|area)|survey[\s_-]+(?:the\s+)?surroundings?)$",
re.I,
)
# 'follow me' = walk forward when YOLO sees a person and they aren't too close,
# else hold. Loop bounded by FOLLOW_ME_MAX_SEC and motion_abort.
_RE_FOLLOW_ME = re.compile(
r"^(?:follow[\s_-]+me|come[\s_-]+with[\s_-]+me|stick[\s_-]+with[\s_-]+me)$",
re.I,
)
# Spatial planner: 'walk to/towards the <X>'. Distinct from _RE_GOTO
# above because that one is reserved for place-memory names ('go to
# kitchen' looks up the saved 'kitchen' position). This planner uses
# LLaVA scene grounding to actually steer toward an arbitrary visible
# object — used for unsaved targets like 'the door' or 'the red chair'.
# Verbs: walk / step + to/towards/over to. We do NOT include 'go to'
# here to avoid colliding with place memory.
_RE_WALK_TO = re.compile(
r"^(?:(?:walk|step)\s+(?:over\s+)?(?:to|towards?|up\s+to)|approach)\s+"
r"(?:the\s+|a\s+|an\s+|that\s+|this\s+)?(.+?)$",
re.I,
)
# Tunable durations for higher-level motions. Kept inline (not config) so a
# hot-edit doesn't risk an import-time crash; tweak if the demo asks for
# longer scan / follow runs.
LOOK_AROUND_TURN_SEC = 1.0 # ~45° at default vyaw
LOOK_AROUND_HOLD_SEC = 0.4 # pause at each extreme so VLM can see
FOLLOW_ME_MAX_SEC = 30.0 # hard upper bound on a 'follow me' session
FOLLOW_ME_STEP_SEC = 0.5 # one forward burst per yolo-poll cycle
FOLLOW_ME_POLL_SEC = 0.2 # how often to re-check yolo when no person
# Spatial planner tunables. The planner asks LLaVA where the target is
# (left/center/right + near/medium/far) every WALK_TO_REPLAN_SEC, then
# turns to centre + walks forward in WALK_TO_STEP_SEC bursts. Total
# session bounded by WALK_TO_MAX_SEC.
WALK_TO_MAX_SEC = 45.0
WALK_TO_REPLAN_SEC = 2.5 # cadence of LLaVA scene queries
WALK_TO_STEP_SEC = 1.0 # forward burst length between re-plans
WALK_TO_TURN_SHORT_SEC = 0.4 # small correction (~18° at default vyaw)
WALK_TO_TURN_WIDE_SEC = 1.0 # larger correction when target is far off
WALK_TO_LLAVA_TIMEOUT_SEC = 15.0 # per-call deadline on the VLM scene query.
# Without this, ollama-python's default
# 5-min HTTP timeout would let a stalled
# VLM block the entire motion worker
# — and motion_abort can't break out of
# an in-flight HTTP call.
def _call_llava_with_timeout(prompt, img_b64, num_predict=120,
timeout_sec=WALK_TO_LLAVA_TIMEOUT_SEC):
"""Run call_llava in a worker thread with a hard deadline.
Returns the raw string response, or None on timeout/error.
We cannot truly cancel the HTTP request (ollama-python doesn't
expose a cancel handle), so the thread is left to finish on its
own but the planner moves on. The orphaned thread will exit
when the HTTP eventually returns; we rely on it being a daemon
so it doesn't block process exit.
"""
result = {"raw": None, "err": None}
def _runner():
try:
result["raw"] = call_llava(prompt, img_b64, num_predict=num_predict)
except Exception as e:
result["err"] = str(e)
th = threading.Thread(target=_runner, daemon=True)
th.start()
th.join(timeout=timeout_sec)
if th.is_alive():
return None # timeout
if result["err"]:
return None # error
return result["raw"]
# Autonomous mode instance — set by init_autonomous()
_auto = None
@ -108,9 +211,22 @@ def try_local_command(cmd: str) -> bool:
odom.return_to_start()
else:
print(" [Places] Odometry not running — cannot return to start")
else:
place_goto(name)
return True
return True
# Try saved-place memory first. If place_goto returns False
# (no such place), fall through to the spatial planner — the
# user probably meant a visible object like 'the door' that
# was never saved as a place. Do this by rewriting the cmd
# to the planner's input shape and re-dispatching through
# try_local_command via _RE_WALK_TO below; we DON'T return
# True here so the rest of try_local_command runs.
if place_goto(name):
return True
# Re-target this same call through the spatial planner. We
# mutate the local 'cmd' string, then fall through to the
# _RE_WALK_TO branch later in this function (see further down).
print(f" [Places] '{name}' not saved — trying spatial planner")
cmd = "walk to " + name
# Note: don't return — let control fall through to _RE_WALK_TO.
m = _RE_FORGET.match(cmd)
if m:
@ -142,6 +258,7 @@ def try_local_command(cmd: str) -> bool:
vx, _, _ = MOVE_MAP["forward"]
t0 = time.time()
while time.time() - t0 < meters / abs(vx):
if motion_abort.is_set(): break
send_vel(vx=vx)
time.sleep(0.05)
gradual_stop()
@ -156,6 +273,7 @@ def try_local_command(cmd: str) -> bool:
vx, _, _ = MOVE_MAP["backward"]
t0 = time.time()
while time.time() - t0 < meters / abs(vx):
if motion_abort.is_set(): break
send_vel(vx=vx)
time.sleep(0.05)
gradual_stop()
@ -178,6 +296,7 @@ def try_local_command(cmd: str) -> bool:
duration = abs(degrees) / vyaw_deg_per_sec
t0 = time.time()
while time.time() - t0 < duration:
if motion_abort.is_set(): break
send_vel(vyaw=vyaw)
time.sleep(0.05)
gradual_stop()
@ -188,11 +307,17 @@ def try_local_command(cmd: str) -> bool:
direction = (m.group(1) or "forward").lower()
if direction.startswith("back"):
direction = "backward"
steps = int(m.group(2))
# Step count may be fractional ('walk 3.5 steps' / 'walk half
# a step' after fraction normalization). Convert to float for
# the duration calc; STEP_DURATION_SEC × 3.5 = 7s of motion.
steps = float(m.group(2))
vx, _, _ = MOVE_MAP[direction]
duration = STEP_DURATION_SEC * steps
t0 = time.time()
while time.time() - t0 < duration:
if motion_abort.is_set(): break
wait_while_paused()
if motion_abort.is_set(): break
send_vel(vx=vx)
time.sleep(0.05)
gradual_stop()
@ -201,11 +326,16 @@ def try_local_command(cmd: str) -> bool:
m = _RE_TURN_STEP.match(cmd)
if m:
direction = m.group(1).lower()
steps = int(m.group(2)) if m.group(2) else 1
# Fractional step count for sub-step turns (e.g. 'turn left 0.5
# steps' from 'turn left half a step').
steps = float(m.group(2)) if m.group(2) else 1.0
_, _, vyaw = MOVE_MAP[direction]
duration = STEP_DURATION_SEC * steps
t0 = time.time()
while time.time() - t0 < duration:
if motion_abort.is_set(): break
wait_while_paused()
if motion_abort.is_set(): break
send_vel(vyaw=vyaw)
time.sleep(0.05)
gradual_stop()
@ -225,6 +355,9 @@ def try_local_command(cmd: str) -> bool:
vx, vy, vyaw = MOVE_MAP[direction]
t0 = time.time()
while time.time() - t0 < STEP_DURATION_SEC:
if motion_abort.is_set(): break
wait_while_paused()
if motion_abort.is_set(): break
send_vel(vx=vx, vy=vy, vyaw=vyaw)
time.sleep(0.05)
gradual_stop()
@ -236,6 +369,292 @@ def try_local_command(cmd: str) -> bool:
gradual_stop()
return True
# ── DIRECT-MOTION CANONICALS (no LLaVA fallback) ────────────────────
# These canonicals were previously falling through to ask(cmd, img) →
# Ollama → 530s blocking call. We handle them deterministically here.
# Bug #3 in the 14-bug audit; see README of that fix for rationale.
# 'turn around' = 180° turn. Synthesize the brain call as the
# parametric form so we reuse the existing _RE_TURN_DEG path
# (which has motion_abort / motion_pause / odom support).
if _RE_TURN_AROUND.match(cmd):
return try_local_command("turn 180 degrees")
# 'wave hello' / 'wave' → arm gesture via do_arm(). The arm_api
# is currently a stub (arm_available=false) so this prints a clear
# message rather than calling Ollama. Once GR00T is wired the gesture
# will fire automatically — no change here needed.
if _RE_WAVE_HELLO.match(cmd):
from API.arm_api import do_arm
do_arm("wave")
return True
if _RE_RAISE_ARM.match(cmd):
from API.arm_api import do_arm
do_arm("raise_right") # default to right; alias is in config_Arm.json
return True
if _RE_LOWER_ARM.match(cmd):
from API.arm_api import do_arm
do_arm("lower")
return True
if _RE_POINT_GESTURE.match(cmd):
from API.arm_api import do_arm
# 'point at X' has no canonical gesture in the arm vocabulary —
# closest match is 'right_up' which extends the right arm forward.
# Use 'high_wave' (alias=wave) only if 'right_up' isn't registered.
from API.arm_api import ARM_ALIASES, ARM_ACTIONS
for choice in ("right_up", "raise_right", "high_wave", "wave"):
if choice in ARM_ALIASES or choice in ARM_ACTIONS:
do_arm(choice)
break
else:
print(" [Arm] no point/raise gesture configured — skipping")
return True
# 'sit down' / 'stand up' need posture commands the SDK doesn't
# currently expose through marcus's ZMQ velocity bridge. Print a
# clear log line and gradual_stop (safety) instead of taking 20s
# to have Qwen invent something. When G1 SportClient.StandUp() /
# Sit() are wired into a posture_api, replace these with real calls.
if _RE_SIT_DOWN.match(cmd):
gradual_stop()
print(" [Posture] sit_down not yet wired — gradual_stop only")
return True
if _RE_STAND_UP.match(cmd):
gradual_stop()
print(" [Posture] stand_up not yet wired — gradual_stop only")
return True
# 'stay here' / 'stay in place' — explicit hold. Just stops; the
# robot maintains its standing posture by SDK default.
if _RE_STAY_HERE.match(cmd):
gradual_stop()
print(" [Posture] holding position")
return True
# ── LOOK AROUND ───────────────────────────────────────────────────────
# Scan left ~45° → centre → right ~90° → centre. Each leg goes through
# execute_action which polls motion_abort, so 'stop' mid-scan halts
# cleanly. No-op fallback if MOVE_MAP doesn't define left/right (shouldn't
# happen on a real config but keep us defensive).
if _RE_LOOK_AROUND.match(cmd):
from Brain.executor import execute_action # late import — avoids
# circular at module load
try:
execute_action("left", LOOK_AROUND_TURN_SEC)
if motion_abort.is_set(): return True
time.sleep(LOOK_AROUND_HOLD_SEC)
if motion_abort.is_set(): return True
execute_action("right", LOOK_AROUND_TURN_SEC * 2)
if motion_abort.is_set(): return True
time.sleep(LOOK_AROUND_HOLD_SEC)
if motion_abort.is_set(): return True
execute_action("left", LOOK_AROUND_TURN_SEC)
except Exception as e:
print(f" [LookAround] failed: {e}")
return True
# ── FOLLOW ME ─────────────────────────────────────────────────────────
# Watch for a person in the camera frame; when one is detected and not
# already too close, advance forward in a short burst. If the person is
# too close, hold. If no person seen, hold and re-poll. Bounded by
# FOLLOW_ME_MAX_SEC and motion_abort. yolo_api uses fallback stubs when
# YOLO isn't loaded — in that case we give up immediately and tell the
# operator (no person tracking = no point walking blindly forward).
if _RE_FOLLOW_ME.match(cmd):
from Brain.executor import move_step
try:
from API.yolo_api import yolo_sees, yolo_person_too_close
except Exception as e:
print(f" [FollowMe] yolo_api unavailable: {e}")
return True
# Probe for actual YOLO availability — both the stub `yolo_sees`
# and the stub `yolo_person_too_close` always return False, so a
# follow_me run with no real YOLO would just hold forever.
try:
from API.yolo_api import _stub_sees as _stub_sees_ref # type: ignore
stubbed = (yolo_sees is _stub_sees_ref)
except Exception:
stubbed = False
if stubbed:
print(" [FollowMe] YOLO not loaded — cannot track persons. "
"Skipping. (init_yolo() at startup to enable.)")
return True
print(f" [FollowMe] tracking up to {FOLLOW_ME_MAX_SEC:.0f}s — "
"say 'stop' to end")
deadline = time.time() + FOLLOW_ME_MAX_SEC
last_log = 0.0
while time.time() < deadline:
if motion_abort.is_set():
print(" [FollowMe] motion_abort set — ending")
gradual_stop()
break
try:
see_person = bool(yolo_sees("person"))
except Exception:
see_person = False
try:
too_close = bool(yolo_person_too_close())
except Exception:
too_close = False
now = time.time()
if now - last_log > 2.0:
state = ("person too close" if (see_person and too_close)
else "tracking" if see_person
else "no person")
print(f" [FollowMe] {state}")
last_log = now
if see_person and not too_close:
move_step("forward", FOLLOW_ME_STEP_SEC)
else:
gradual_stop()
time.sleep(FOLLOW_ME_POLL_SEC)
gradual_stop()
print(" [FollowMe] done")
return True
# ── WALK TO TARGET (spatial planner) ─────────────────────────────────
# LLaVA-grounded approach to an arbitrary visible object. This is a
# best-effort planner — accuracy depends entirely on the VLM. Loop:
# 1. Capture a frame, ask LLaVA: "Is <target> visible? bearing
# (left/center/right)? distance (near/medium/far)?"
# 2. If not visible: scan left/right small turn, retry up to N times.
# 3. If visible: turn-toward (small or wide based on bearing) then
# walk forward for WALK_TO_STEP_SEC.
# 4. Done when distance == 'near' OR motion_abort OR timeout.
# All motion goes through move_step / execute_action which already
# poll motion_abort and motion_pause, so 'stop'/'pause' work normally.
m = _RE_WALK_TO.match(cmd)
if m:
target = m.group(1).strip().rstrip(".!?,").strip()
if not target:
return True
# If the target is a known place name, defer to place_goto —
# the user probably meant the saved place, not the visible
# object with that name. (Belt-and-braces with _RE_GOTO above.)
if mem and target.lower() in {p.lower() for p in (mem.list_places() or [])}:
place_goto(target)
return True
from Brain.executor import move_step, execute_action
print(f" [WalkTo] target = {target!r}; max {WALK_TO_MAX_SEC:.0f}s")
deadline = time.time() + WALK_TO_MAX_SEC
scan_attempts_left = 4 # how many turns before giving up if unseen
last_replan = 0.0
# Kill-switch on the LLaVA retry loop. Without this, an unresponsive
# Ollama (server crashed / model OOM / network wedged) makes the
# planner grind for the full WALK_TO_MAX_SEC, hammering with retries
# every 0.5 s and producing nothing but noise. Bail after a small
# consecutive-timeout count so the user gets a fast 'unable to plan'
# rather than a 45 s silent wait. Field-tested cap.
consecutive_llava_timeouts = 0
LLAVA_TIMEOUT_GIVE_UP = 3
while time.time() < deadline:
if motion_abort.is_set():
print(" [WalkTo] motion_abort set — ending")
gradual_stop()
break
# Re-plan via LLaVA at each cycle. Caching one query per
# WALK_TO_REPLAN_SEC keeps token cost in check.
now = time.time()
if now - last_replan < WALK_TO_REPLAN_SEC and last_replan > 0:
time.sleep(0.1)
continue
last_replan = now
img_b64 = get_frame()
if not img_b64:
print(" [WalkTo] no camera frame — pausing")
time.sleep(0.5)
continue
prompt = (
f"You are guiding a robot toward the {target!r}. Look at "
f"the image. Answer ONLY in JSON, no other text:\n"
f"{{\"visible\": true|false, \"bearing\": \"left|center|right\", "
f"\"distance\": \"near|medium|far\", \"reason\": \"<brief>\"}}\n"
f"- 'visible' = whether the {target!r} is in the frame\n"
f"- 'bearing' = which side of the frame it's on\n"
f"- 'distance' = rough estimate based on apparent size\n"
f"- 'near' means within ~1 metre (almost stopping distance)"
)
# Wrap LLaVA in a thread-timeout: ollama-python's default
# HTTP timeout is ~5 minutes, which would let a stalled VLM
# block the entire motion worker far longer than the watchdog
# would tolerate. WALK_TO_LLAVA_TIMEOUT_SEC = 15s gives the
# VLM enough room to answer at idle while keeping abort
# responsive (next iteration re-checks motion_abort).
if motion_abort.is_set(): break
raw = _call_llava_with_timeout(prompt, img_b64, num_predict=120)
if raw is None:
consecutive_llava_timeouts += 1
print(f" [WalkTo] LLaVA timeout/error "
f"({consecutive_llava_timeouts}/{LLAVA_TIMEOUT_GIVE_UP}) "
f"— retrying")
if consecutive_llava_timeouts >= LLAVA_TIMEOUT_GIVE_UP:
print(f" [WalkTo] LLaVA unresponsive after "
f"{LLAVA_TIMEOUT_GIVE_UP} timeouts — giving up. "
f"Check Ollama (curl http://localhost:11434/api/version).")
gradual_stop()
return True
time.sleep(0.5)
continue
consecutive_llava_timeouts = 0
obs = parse_json(raw) or {}
visible = bool(obs.get("visible"))
bearing = (obs.get("bearing") or "").lower()
distance = (obs.get("distance") or "").lower()
reason = obs.get("reason", "")
print(f" [WalkTo] visible={visible} bearing={bearing} "
f"distance={distance} ({reason[:60]})")
if motion_abort.is_set(): break
if not visible:
if scan_attempts_left <= 0:
print(f" [WalkTo] {target!r} not found after scans — "
"giving up")
break
scan_attempts_left -= 1
# Small turn left to widen the search arc; next iteration
# the LLaVA call will see the new view.
execute_action("left", WALK_TO_TURN_SHORT_SEC)
continue
scan_attempts_left = 4 # reset once we see it again
# Reached: stop and report
if distance == "near":
gradual_stop()
print(f" [WalkTo] arrived at {target!r}")
return True
# Steer toward target
if bearing == "left":
turn_sec = (WALK_TO_TURN_WIDE_SEC if distance == "far"
else WALK_TO_TURN_SHORT_SEC)
execute_action("left", turn_sec)
elif bearing == "right":
turn_sec = (WALK_TO_TURN_WIDE_SEC if distance == "far"
else WALK_TO_TURN_SHORT_SEC)
execute_action("right", turn_sec)
# bearing == 'center' or unknown → just walk forward
if motion_abort.is_set(): break
# Forward burst
move_step("forward", WALK_TO_STEP_SEC)
gradual_stop()
return True
# ── NAMED PATROL ROUTE ───────────────────────────────────────────────
m = _RE_PATROL_RT.match(cmd)
if m:

View File

@ -6,6 +6,7 @@ import time
import threading
from API.zmq_api import send_vel, gradual_stop, MOVE_MAP, STEP_PAUSE
from API.arm_api import ALL_ARM_NAMES, do_arm
from Core.motion_state import motion_abort, wait_while_paused
def _obstacle_check():
@ -29,6 +30,18 @@ def execute_action(move: str, duration: float):
vx, vy, vyaw = MOVE_MAP[move]
t0 = time.time()
while time.time() - t0 < duration:
if motion_abort.is_set():
gradual_stop()
print(" [executor] motion_abort set — stopping")
return
# Honour a soft pause: hold zero velocity and wait until
# the user resumes (or aborts). wait_while_paused returns
# immediately if not paused. Re-check abort right after.
if motion_pause_check():
send_vel(0.0, 0.0, 0.0)
wait_while_paused()
if motion_abort.is_set():
gradual_stop(); return
if _obstacle_check():
gradual_stop()
print(" [Safety] LiDAR obstacle — stopping")
@ -47,6 +60,15 @@ def move_step(move: str, duration: float):
vx, vy, vyaw = MOVE_MAP[move]
t0 = time.time()
while time.time() - t0 < duration:
if motion_abort.is_set():
send_vel(0.0, 0.0, 0.0)
print(" [executor] motion_abort set — pausing step")
return
if motion_pause_check():
send_vel(0.0, 0.0, 0.0)
wait_while_paused()
if motion_abort.is_set():
send_vel(0.0, 0.0, 0.0); return
if _obstacle_check():
send_vel(0.0, 0.0, 0.0)
print(" [Safety] LiDAR obstacle — pausing step")
@ -57,6 +79,13 @@ def move_step(move: str, duration: float):
time.sleep(0.1)
def motion_pause_check() -> bool:
"""Returns True if a soft-pause is active. Tiny indirection so we
don't have to import motion_pause directly into every motion site."""
from Core.motion_state import motion_pause
return motion_pause.is_set()
def merge_actions(actions: list) -> list:
"""Merge consecutive same-direction steps into one smooth movement."""
if not actions:

View File

@ -346,14 +346,131 @@ def process_command(cmd: str) -> dict:
log_cmd(cmd, response)
return {"type": "greeting", "speak": response, "action": "GREETING", "elapsed": 0}
# ── "Come to me" shortcut ────────────────────────────────────────────
# ── "Come to me" — smart YOLO-tracked approach ──────────────────────
# Old behaviour: walk forward 2s blindly. New: if YOLO is loaded,
# locate the person in the camera frame, turn toward them if off-
# centre, walk forward in short bursts, stop when 'arrived' (person
# fills enough of the frame to be 'right in front'). Bounded by
# COME_TO_ME_MAX_SEC; falls back to a 2s blind walk if YOLO isn't
# available so the canonical still does *something*.
#
# Arrival threshold: yolo_person_too_close(threshold=...) returns
# True if the person's bbox size_ratio (bbox_area / frame_area)
# exceeds the threshold. Defaults are calibrated for safety
# (0.25 = ~1m away, used by patrol). For come_here we want the
# robot to come MUCH closer — arm's length, ~0.5m — which is
# roughly size_ratio > 0.32 (lookup in marcus_yolo.distance_estimate:
# 'very close' starts at 0.30). 0.32 is a safe 'arrived' marker;
# any higher and the robot might bump into the user.
if re.match(r"^(?:come(?:\s+back)?(?:\s+to\s+me)?|come\s+here|get\s+closer|approach|move\s+closer)\s*[!.]*$", cmd, re.IGNORECASE):
execute_action("forward", 2.0)
from Brain.executor import move_step
try:
from API.yolo_api import yolo_sees, yolo_person_too_close, yolo_closest
from API.yolo_api import _stub_sees as _stub_sees_ref # type: ignore
yolo_active = (yolo_sees is not _stub_sees_ref)
except Exception:
yolo_active = False
COME_TO_ME_MAX_SEC = 25.0
COME_TO_ME_STEP_SEC = 0.6
COME_TO_ME_TURN_SEC = 0.3 # small correction (~14° at default vyaw)
COME_TO_ME_POLL_SEC = 0.3
# Arrival threshold: 0.32 size_ratio ≈ ~0.5m / arm's length.
# Patrol's 0.25 default is too lenient for come-to-me — it
# would treat a 1m-away user as 'already arrived'.
COME_TO_ME_ARRIVED_RATIO = 0.32
if not yolo_active:
# Fallback — same behaviour as before for hardware setups
# without YOLO loaded.
print(" [ComeToMe] YOLO not loaded — walking forward 2s")
execute_action("forward", 2.0)
resp = "Coming to you"
print(f"Sanad: {resp}")
add_to_history(cmd, resp)
log_cmd(cmd, resp)
return {"type": "move", "speak": resp, "action": "FORWARD 2.0s", "elapsed": 2.0}
print(f" [ComeToMe] tracking person up to {COME_TO_ME_MAX_SEC:.0f}s — say 'stop' to end")
deadline = time.time() + COME_TO_ME_MAX_SEC
last_log = 0.0
scan_attempts_left = 6 # how many turns before giving up if person never seen
while time.time() < deadline:
try:
from Core.motion_state import motion_abort
if motion_abort.is_set():
print(" [ComeToMe] motion_abort — stopping")
break
except Exception:
pass
try:
seen = bool(yolo_sees("person"))
except Exception:
seen = False
try:
# IMPORTANT: pass an explicit 'arrived' threshold (0.32)
# — without it, the default 0.25 (safety patrol's value)
# would call the user 'too close' when they're a metre
# away, and the robot would stop without walking. We
# want it to come arm's length close.
arrived = bool(yolo_person_too_close(
threshold=COME_TO_ME_ARRIVED_RATIO,
))
except Exception:
arrived = False
now = time.time()
if now - last_log > 1.5:
state = ("arrived (close to person)" if (seen and arrived)
else "tracking person" if seen
else "scanning for person")
print(f" [ComeToMe] {state}")
last_log = now
if seen and arrived:
# Made it — robot is close enough to stop in front
print(" [ComeToMe] arrived in front of person")
break
if seen and not arrived:
# Person is in view but not yet arm's length close —
# centre on them and walk forward in a burst.
bearing = "center"
try:
det = yolo_closest("person")
if det and getattr(det, "position", None):
# position is something like 'left' / 'center' / 'right'
bearing = str(det.position).lower()
except Exception:
pass
if bearing == "left":
execute_action("left", COME_TO_ME_TURN_SEC)
elif bearing == "right":
execute_action("right", COME_TO_ME_TURN_SEC)
# forward burst
move_step("forward", COME_TO_ME_STEP_SEC)
scan_attempts_left = 6 # reset
continue
# No person — try a small scan turn, then check again
if scan_attempts_left <= 0:
print(" [ComeToMe] no person found after scans — giving up")
break
scan_attempts_left -= 1
execute_action("left", COME_TO_ME_TURN_SEC)
time.sleep(COME_TO_ME_POLL_SEC)
try:
from API.zmq_api import gradual_stop as _gs
_gs()
except Exception:
pass
elapsed = time.time() - t0
resp = "Coming to you"
print(f"Sanad: {resp}")
add_to_history(cmd, resp)
log_cmd(cmd, resp)
return {"type": "move", "speak": resp, "action": "FORWARD 2.0s", "elapsed": 2.0}
return {"type": "move", "speak": resp, "action": "COME_TO_ME", "elapsed": elapsed}
# ── Multi-step compound ──────────────────────────────────────────────
_multi = re.match(
@ -620,7 +737,23 @@ def run_terminal():
print(' Pick the ID that sounded English and set it in')
print(' Config/config_Voice.json :: tts.builtin_speaker_id')
continue
# Structured motion log for terminal-typed commands. The
# voice path already logs through Voice/marcus_voice's
# worker; here we cover the OTHER entry-point so every
# motion (text or voice) shows up in logs/motion.log with
# parsed direction/magnitude/unit + actual elapsed.
try:
from Core.motion_log import log_motion as _mlog
_mlog("start", cmd, source="text")
_t0 = time.time()
except Exception:
_t0 = time.time()
result = process_command(cmd)
try:
_mlog("complete", cmd, source="text",
actual_sec=time.time() - _t0)
except Exception:
pass
sp = result.get("speak", "") if isinstance(result, dict) else ""
if sp and _audio_api:
_audio_api.speak(sp)

File diff suppressed because one or more lines are too long

View File

@ -70,19 +70,26 @@
"turn_around": {
"canonical": "turn around",
"user_phrases": {
"english": ["turn around", "turn back", "spin around", "about face", "face the other way"],
"english": ["turn around", "turn back", "spin around", "about face", "face the other way", "spin in place", "spin yourself", "turn yourself around"],
"arabic": [
"استدر للخلف", "استدر إلى الوراء", "اتجه للخلف", "ادر للخلف", "ارجع وجهك",
"لف ورا", "لف للورا", "لف للخلف", "دور ورا", "دور للورا", "دور للخلف",
"اتلف", "اتلف ورا", "دير وراك", "دير للورا", "استدر ورا",
"اقلب وجهك", "اقلب", "اعكس اتجاهك", "غيّر اتجاهك"
"اقلب وجهك", "اقلب", "اعكس اتجاهك", "غيّر اتجاهك",
"لف حول نفسك", "لف حوالين نفسك", "لف على نفسك",
"دور حول نفسك", "دور حوالين نفسك", "دور على نفسك",
"استدر حول نفسك", "استدر حوالين نفسك",
"حول نفسك", "حوالين نفسك",
"لف بنفسك", "دور بنفسك"
]
},
"bot_phrases": {
"english": ["turning around"],
"arabic": [
"أستدير للخلف", "أستدير إلى الوراء", "استدير للخلف",
"بلف ورا", "بدور للورا", "أدور للخلف"
"بلف ورا", "بدور للورا", "أدور للخلف",
"أستدير حول نفسي", "ألف حول نفسي", "أدور حول نفسي",
"بلف حول نفسي", "بدور حول نفسي"
]
}
},
@ -102,7 +109,11 @@
]
},
"bot_phrases": {
"english": ["moving forward", "walking forward", "stepping forward", "going forward", "going ahead"],
"_": "Direction-anchored only. We do NOT add destination-phrased needles like 'walking towards X' / 'أمشي نحو X' even though Gemini occasionally says them — those false-fire when Gemini DESCRIBES a third party ('I see a person walking towards us'). Persona RULE 4b forbids destination phrasing in motion confirmations; trust the prompt. If a real motion phrase ever slips through and we miss it, add the EXACT slip to this list — not generic destination patterns.",
"english": [
"moving forward", "walking forward", "stepping forward",
"going forward", "going ahead", "heading forward", "walking ahead"
],
"arabic": [
"أتحرك للأمام", "أتحرك إلى الأمام", "أتقدم", "أمشي للأمام", "أذهب للأمام",
"بمشي قدام", "براح قدام", "أروح قدام", "بتقدم", "بفوت لقدام"
@ -188,9 +199,10 @@
]
},
"bot_phrases": {
"_": "Removed 'أنتظر' from this list — it's also a pause_motion bot_phrase ('I wait' is more naturally a pause than a stop). Keeping it here would cause Gemini saying 'أنتظر' to fire BOTH stop and pause_motion in the same chunk.",
"english": ["stopping", "halting", "holding"],
"arabic": [
"أتوقف", "توقفت", "أنتظر",
"أتوقف", "توقفت",
"بوقّف", "وقّفت", "بستنى", "أصبر", "بطّلت"
]
}
@ -339,10 +351,17 @@
]
},
"bot_phrases": {
"english": ["coming over", "coming to you", "approaching"],
"_": "Expanded Arabic forms because Gemini sometimes emits non-canonical conjugations (e.g. 'أتعال' is grammatically wrong but phonetically what it produces for 'I come'). Persona Rule 5d gives Gemini correct example phrases; these extra needles catch slip cases like 'أتعال'/'بجي لك'.",
"english": [
"coming over", "coming to you", "approaching",
"coming to find you", "coming your way",
"i'm coming", "on my way to you", "heading to you"
],
"arabic": [
"آتي إليك", "أقترب", "أتقرّب",
"بجي", "جاي", "جاي لك", "بقرّب", "أقرّب منك"
"آتي إليك", "أتي إليك", "أتي", "آتي", "أتعال",
"أنا قادم", "قادم إليك", "قادم لك", "أقترب منك",
"أقترب", "أتقرّب", "بجي", "بجي لك", "جاي", "جاي لك",
"بقرّب", "أقرّب منك", "رايح لك", "أتجه إليك"
]
}
},
@ -479,6 +498,223 @@
"english": [],
"arabic": []
}
},
"repeat_last": {
"_": "Memory action: re-fire the most recent dispatched command. Marcus VoiceModule keeps a deque of recent commands and intercepts this canonical — it is NEVER passed to the brain, the resolved (real) command is dispatched instead. bot_phrases use distinctive multi-word forms ('repeating that', 'doing that again') to avoid colliding with the bare 'again' / 'تاني' that users might say in a different context.",
"canonical": "repeat last",
"user_phrases": {
"english": [
"do that again", "do it again", "again", "repeat", "redo",
"one more time", "once more", "do the same again",
"say that again", "do it once more", "repeat that",
"another time", "another one"
],
"arabic": [
"كرر", "كرّر", "كررها", "أعد", "أعدها", "أعد العملية",
"تاني", "مرة ثانية", "مرة تانية", "مرة كمان", "مرة أخرى",
"كرر اللي سويته", "كرر الحركة", "أعد الحركة", "مرة ثانية لو سمحت",
"أعد الكرة", "كمان مرة"
]
},
"bot_phrases": {
"english": [
"repeating that", "doing that again", "doing it again",
"one more time", "doing the same again", "repeating the last action"
],
"arabic": [
"أعيد الحركة", "أكرر الحركة", "مرة ثانية", "أعيدها",
"أكررها", "أعيد آخر حركة", "أكرر آخر حركة"
]
}
},
"pause_motion": {
"_": "Soft hold: brain motion loops freeze in place (zero velocity) and wait. Resume continues the SAME motion from where it paused — different from stop, which terminates the chain. Marcus VoiceModule intercepts this canonical and sets motion_pause directly; brain never sees it. Persona Rule 10 instructs Gemini to confirm with 'Pausing.' / 'أتوقف مؤقتاً.'.",
"canonical": "pause motion",
"user_phrases": {
"english": [
"pause", "pause motion", "hold on", "hold position", "hold up",
"wait a moment", "wait a sec", "freeze for a moment", "one second",
"give me a sec", "pause for a moment", "stay there"
],
"arabic": [
"توقف لحظة", "انتظر لحظة", "ثبت", "ثبت مكانك",
"استنى شوي", "استنى ثانية", "ثانية واحدة", "لحظة", "هدّي شوي",
"ايقاف مؤقت", "وقفة قصيرة", "انتظرني", "خليك مكانك"
]
},
"bot_phrases": {
"english": [
"pausing", "holding position", "holding for a moment",
"freezing in place", "pausing for a moment"
],
"arabic": [
"أتوقف مؤقتاً", "أتوقف لحظة", "أنتظر", "أثبت مكاني",
"إيقاف مؤقت", "انتظار قصير", "أبقى مكاني"
]
}
},
"resume_motion": {
"_": "Clears motion_pause so the previously-running brain loop continues. If nothing was actually paused, this is a quiet no-op. Persona Rule 10 instructs Gemini to confirm with 'Resuming.' / 'أكمل.'.",
"canonical": "resume motion",
"user_phrases": {
"english": [
"resume", "continue", "go on", "carry on", "keep going",
"proceed", "resume motion", "continue what you were doing",
"you can continue", "go ahead now"
],
"arabic": [
"أكمل", "استكمل", "كمّل", "كمل", "تابع", "استمر",
"كمّل الحركة", "كمّل اللي كنت فيه", "تابع اللي كنت تسوي",
"روح تابع", "استمر بالحركة"
]
},
"bot_phrases": {
"english": [
"resuming", "continuing", "carrying on", "resuming motion",
"picking up where I left off"
],
"arabic": [
"أكمل", "أستمر", "أتابع", "أكمل الحركة",
"أكمل من حيث توقفت", "أتابع اللي كنت أسويه"
]
}
},
"start_recording": {
"_": "Begin capturing subsequent dispatched commands into a named sequence buffer. Marcus VoiceModule intercepts this canonical and starts recording. Optionally pre-name the sequence here ('start recording as my-greet') — otherwise the user must name it on save. The recording is in-memory until save_sequence is called.",
"canonical": "start recording",
"user_phrases": {
"english": [
"start recording", "begin recording", "record this",
"remember this sequence", "record what i do",
"record sequence", "start saving moves", "save what i do"
],
"arabic": [
"ابدأ التسجيل", "ابدأ تسجيل", "ابدا التسجيل",
"سجل اللي بسويه", "سجل الحركات",
"سجل ما أفعله", "ابدأ الحفظ"
]
},
"bot_phrases": {
"english": [
"starting to record", "recording your sequence",
"i'm recording", "recording started"
],
"arabic": [
"بدأت التسجيل", "أسجل الحركات", "بدأت تسجيل التسلسل",
"بدأت أسجل"
]
}
},
"save_sequence": {
"_": "Stop recording and persist the buffered commands. The user typically appends a name in the SAME utterance ('save sequence as my-greet') — Gemini extracts the name and includes it in the confirmation. Marcus parses the bot_phrase, extracts the trailing name, calls Sequences.save_recording(name).",
"canonical": "save sequence",
"user_phrases": {
"english": [
"save sequence", "save this sequence", "save it as",
"save recording", "stop recording and save",
"stop recording save as", "save recording as",
"name this sequence", "call this sequence"
],
"arabic": [
"احفظ التسلسل", "احفظ التسجيل", "احفظ باسم",
"أوقف التسجيل واحفظ", "احفظ هذا التسلسل",
"سمي هذا التسلسل", "احفظ التسجيل باسم", "خلصت سجل"
]
},
"bot_phrases": {
"english": [
"saved sequence", "saved as", "saving sequence as",
"recording saved", "saved your sequence as"
],
"arabic": [
"حفظت التسلسل", "حفظت باسم", "أحفظ التسلسل باسم",
"تم الحفظ"
]
}
},
"play_sequence": {
"_": "Replay a saved sequence by name. The bot_phrase carries the name ('Playing my-greet.'); Marcus extracts it and calls Sequences.play(name) → enqueues each command on the motion worker queue. Sequence commands execute serially with normal interrupt support — say 'stop' to abort the playback.",
"canonical": "play sequence",
"user_phrases": {
"english": [
"play sequence", "play my", "run my", "do my",
"play recording", "execute sequence", "run sequence",
"perform sequence"
],
"arabic": [
"شغل التسلسل", "نفذ التسلسل", "سوي اللي سجلته",
"ابدأ تسلسل", "نفذ تسجيل", "شغل التسجيل",
"اعمل التسلسل"
]
},
"bot_phrases": {
"english": [
"playing", "running", "performing sequence",
"playing sequence", "starting sequence"
],
"arabic": [
"أشغل التسلسل", "أنفذ التسلسل", "أبدأ التسلسل",
"بدأت التشغيل"
]
}
},
"cancel_recording": {
"_": "Abandon the in-progress recording without saving. Marcus calls Sequences.cancel_recording().",
"canonical": "cancel recording",
"user_phrases": {
"english": [
"cancel recording", "discard recording", "throw away recording",
"stop recording without saving", "forget the recording"
],
"arabic": [
"ألغ التسجيل", "احذف التسجيل", "اوقف التسجيل بدون حفظ",
"تجاهل التسجيل"
]
},
"bot_phrases": {
"english": [
"discarded recording", "cancelled recording",
"recording cancelled"
],
"arabic": [
"ألغيت التسجيل", "حذفت التسجيل", "تم إلغاء التسجيل"
]
}
},
"reverse_last": {
"_": "Memory action: fire the INVERSE of the last dispatched command (turn right→turn left, walk forward→walk backward, sit down→stand up, raise arm→lower arm; parametric forms flip direction and keep the number). Marcus intercepts the 'reverse last' canonical and computes the inverse from history. NOTE: the brain has its own 'undo' regex that returns to the start position via odometry — that's a different feature; reverse_last only flips the LAST motion. Distinctive bot_phrases avoid colliding with move_backward needles like 'going back' / 'reversing'.",
"canonical": "reverse last",
"user_phrases": {
"english": [
"undo", "undo that", "reverse", "reverse that",
"go back", "take that back", "cancel that",
"undo the last", "reverse the last", "do the opposite"
],
"arabic": [
"تراجع", "تراجع عن ذلك", "ارجع ما سويت", "ارجع",
"اعكس", "اعكسها", "ألغي", "ألغي ذلك", "ألغي الحركة",
"رد ما سويت", "تراجع عن آخر حركة", "اعكس آخر حركة",
"اعمل عكس", "اعمل العكس"
]
},
"bot_phrases": {
"english": [
"reversing that", "undoing that", "reversing the last action",
"undoing the last action", "going back to before"
],
"arabic": [
"أتراجع عن ذلك", "أعكس آخر حركة", "ألغي آخر حركة",
"أتراجع عن آخر حركة", "أرجع للحالة السابقة", "أعكسها"
]
}
}
},
@ -489,6 +725,21 @@
"regex": "(?:turn(?:ing)?|spin(?:ning)?|rotat(?:e|ing))\\s+(left|right)\\s+(\\d+)\\s*deg(?:ree(?:s)?)?",
"command_template": "turn $1 $2 degrees"
},
{
"_": "turn left/right N steps (English) — 'Turning left 1 steps.' Maps to brain's _RE_TURN_STEP single-motion canonical 'turn left N steps' (NOT compound). User says 'turn left one step' — meaning one step's worth of left rotation.",
"regex": "(?:turn(?:ing)?|spin(?:ning)?|rotat(?:e|ing))\\s+(left|right)\\s+(\\d+(?:\\.\\d+)?)\\s*steps?",
"command_template": "turn $1 $2 steps"
},
{
"_": "Arabic أستدير يميناً/يساراً N خطوات — 'أستدير يساراً 1 خطوات' for 'لف يسار خطوة وحدة' single-motion intent. Parses as 'turn left N steps' brain canonical (not compound 'turn + walk').",
"regex": "(?:أستدير|استدر|لف+|ألف+|بلف|دور+|أدور+|بدور)\\s+(?:(?:على|ع|إلى)\\s+)?(?:يمين|يميناً|يمينا|اليمين|لليمين)\\s+(\\d+(?:\\.\\d+)?)\\s*(?:خطوة|خطوات|خطوتين)",
"command_template": "turn right $1 steps"
},
{
"_": "Arabic أستدير يساراً N خطوات — same as above, left side.",
"regex": "(?:أستدير|استدر|لف+|ألف+|بلف|دور+|أدور+|بدور)\\s+(?:(?:على|ع|إلى)\\s+)?(?:يسار|يساراً|يسارا|اليسار|لليسار|شمال|الشمال|للشمال)\\s+(\\d+(?:\\.\\d+)?)\\s*(?:خطوة|خطوات|خطوتين)",
"command_template": "turn left $1 steps"
},
{
"_": "turn N degrees — 'Turning 360 degrees.'",
"regex": "(?:turn(?:ing)?|spin(?:ning)?|rotat(?:e|ing))\\s+(\\d+)\\s*deg(?:ree(?:s)?)?",
@ -496,17 +747,17 @@
},
{
"_": "walk forward/back N steps — 'Walking forward 5 steps.'",
"regex": "(?:walk(?:ing)?|step(?:ping)?|mov(?:e|ing))\\s+(forward|back(?:ward)?)\\s+(\\d+)\\s+steps?",
"regex": "(?:walk(?:ing)?|step(?:ping)?|mov(?:e|ing))\\s+(forward|back(?:ward)?)\\s+(\\d+(?:\\.\\d+)?)\\s+steps?",
"command_template": "walk $1 $2 steps"
},
{
"_": "walk N steps forward/back — 'Walking 5 steps forward.'",
"regex": "(?:walk(?:ing)?|step(?:ping)?|tak(?:e|ing))\\s+(\\d+)\\s+steps?\\s+(forward|back(?:ward)?)",
"regex": "(?:walk(?:ing)?|step(?:ping)?|tak(?:e|ing))\\s+(\\d+(?:\\.\\d+)?)\\s+steps?\\s+(forward|back(?:ward)?)",
"command_template": "walk $2 $1 steps"
},
{
"_": "walk N steps (default forward) — 'Taking 5 steps.'",
"regex": "(?:walk(?:ing)?|step(?:ping)?|tak(?:e|ing))\\s+(\\d+)\\s+steps?",
"regex": "(?:walk(?:ing)?|step(?:ping)?|tak(?:e|ing))\\s+(\\d+(?:\\.\\d+)?)\\s+steps?",
"command_template": "walk $1 steps"
},
{
@ -525,34 +776,137 @@
"command_template": "walk $1 meters"
},
{
"_": "Arabic: 'أستدير يميناً 90 درجة' / 'أستدير يمينا 90 درجة'",
"regex": "أستدير\\s+يمين[اًا]?ً?\\s+(\\d+)\\s*درجة",
"_": "Arabic turn right N degrees — covers MSA + Levantine/Syrian (لف يمين), Gulf/Iraqi (دور يمين), Egyptian (خش يمين). Verb roots: أستدير | استدر | لف | دور | خش. Direction tokens: يمين | يميناً | يمينا | اليمين (with optional على/ع prefix).",
"regex": "(?:أستدير|استدر|لف+|ألف+|دور+|أدور+|بدور|خش|أخش|بخش)\\s+(?:(?:على|ع)\\s+)?(?:يمين|يميناً|يمينا|اليمين|لليمين)\\s+(\\d+)\\s*درجة",
"command_template": "turn right $1 degrees"
},
{
"_": "Arabic: 'أستدير يساراً 90 درجة'",
"regex": "أستدير\\s+يسار[اًا]?ً?\\s+(\\d+)\\s*درجة",
"_": "Arabic turn left N degrees — also accepts شمال/الشمال (Lev/Egy 'left').",
"regex": "(?:أستدير|استدر|لف+|ألف+|دور+|أدور+|بدور|خش|أخش|بخش)\\s+(?:(?:على|ع)\\s+)?(?:يسار|يساراً|يسارا|اليسار|لليسار|شمال|الشمال|للشمال)\\s+(\\d+)\\s*درجة",
"command_template": "turn left $1 degrees"
},
{
"_": "Arabic: 'أستدير 360 درجة' (no direction)",
"regex": "أستدير\\s+(\\d+)\\s*درجة",
"_": "Arabic turn N degrees (no direction) — 'أستدير 360 درجة' / 'لف 180 درجة' / 'دور 90 درجة'.",
"regex": "(?:أستدير|استدر|لف+|دور+)\\s+(\\d+)\\s*درجة",
"command_template": "turn $1 degrees"
},
{
"_": "Arabic: 'أمشي/أتحرك 5 خطوات'",
"regex": "(?:أمشي|أتحرك)\\s+(\\d+)\\s*خطو(?:ات|ة|تين)?",
"_": "Arabic walk N steps forward — verbs: أمشي | امشي | أتحرك | تحرك | روح | اروح. Forward: للأمام | لقدام | قدام (Egy/Lev).",
"regex": "(?:أمشي|امشي|امش|أتحرك|تحرك|اروح|روح|اروحي|روحي)\\s+(?:للأمام|لقدام|قدام)\\s+(\\d+(?:\\.\\d+)?)\\s*(?:خطوة|خطوات|خطوتين)",
"command_template": "walk forward $1 steps"
},
{
"_": "Arabic walk N steps back — Back: للخلف | لورا | ورا (Egy/Lev/Gulf).",
"regex": "(?:أمشي|امشي|امش|أتحرك|تحرك|اروح|روح|اروحي|روحي)\\s+(?:للخلف|لورا|ورا)\\s+(\\d+(?:\\.\\d+)?)\\s*(?:خطوة|خطوات|خطوتين)",
"command_template": "walk backward $1 steps"
},
{
"_": "Arabic walk N steps forward — number BEFORE direction (also common in Egy/Lev).",
"regex": "(?:أمشي|امشي|امش|أتحرك|تحرك|اروح|روح|اروحي|روحي)\\s+(\\d+(?:\\.\\d+)?)\\s*(?:خطوة|خطوات|خطوتين)\\s+(?:للأمام|لقدام|قدام)",
"command_template": "walk forward $1 steps"
},
{
"_": "Arabic walk N steps back — number BEFORE direction.",
"regex": "(?:أمشي|امشي|امش|أتحرك|تحرك|اروح|روح|اروحي|روحي)\\s+(\\d+(?:\\.\\d+)?)\\s*(?:خطوة|خطوات|خطوتين)\\s+(?:للخلف|لورا|ورا)",
"command_template": "walk backward $1 steps"
},
{
"_": "Arabic walk N steps (default forward) — 'أمشي 5 خطوات' / 'امشي خمس خطوات' (digits only — Gemini is told to use digits).",
"regex": "(?:أمشي|امشي|امش|أتحرك|تحرك|اروح|روح|اروحي|روحي)\\s+(\\d+(?:\\.\\d+)?)\\s*(?:خطوة|خطوات|خطوتين)",
"command_template": "walk $1 steps"
},
{
"_": "Arabic: 'أتحرك للأمام مترين' / 'أمشي للأمام 2 متر'",
"regex": "(?:أمشي|أتحرك)\\s+للأمام\\s+(\\d+(?:\\.\\d+)?)\\s*متر",
"_": "Arabic walk N steps — REVERSED word order: verb + unit + number ('أمشي خطوة واحدة' → after normalise_numbers → 'أمشي خطوة 1'). Common natural-Arabic phrasing where the number-adjective follows the noun. Without this entry, this very natural phrasing fails to dispatch.",
"regex": "(?:أمشي|امشي|امش|أتحرك|تحرك|اروح|روح|اروحي|روحي)\\s+(?:خطوة|خطوات|خطوتين)\\s+(\\d+)",
"command_template": "walk $1 steps"
},
{
"_": "Arabic DUAL form 'خطوتين' (two steps) — single word, no separate digit. Without this entry the dispatcher would miss 'أمشي خطوتين' since the parametric regex needs \\d+. Maps directly to walk 2 steps.",
"regex": "(?:أمشي|امشي|امش|أتحرك|تحرك|اروح|روح|اروحي|روحي)\\s+خطوتين",
"command_template": "walk 2 steps"
},
{
"_": "Arabic DUAL form 'مترين' (two meters) — same dual-suffix pattern as above. 'أمشي مترين' / 'أتحرك مترين' → walk 2 meters.",
"regex": "(?:أمشي|امشي|امش|أتحرك|تحرك|اروح|روح|اروحي|روحي)\\s+مترين",
"command_template": "walk 2 meters"
},
{
"_": "Arabic DUAL form 'درجتين' (two degrees) — for completeness, though 2-degree turns are rare.",
"regex": "(?:أستدير|استدر|لف+|ألف+|دور+|أدور+|بدور)\\s+(?:يميناً|يمينا|اليمين|لليمين|يساراً|يسارا|اليسار|لليسار|شمال|الشمال|للشمال)?\\s*درجتين",
"command_template": "turn 2 degrees"
},
{
"_": "Arabic walk N meters forward — direction first.",
"regex": "(?:أمشي|امشي|امش|أتحرك|تحرك|اروح|روح|اروحي|روحي)\\s+(?:للأمام|لقدام|قدام)\\s+(\\d+(?:\\.\\d+)?)\\s*(?:متر|مترين|أمتار)",
"command_template": "walk forward $1 meters"
},
{
"_": "Arabic: 'أتحرك للخلف مترين'",
"regex": "(?:أمشي|أتحرك)\\s+للخلف\\s+(\\d+(?:\\.\\d+)?)\\s*متر",
"_": "Arabic walk N meters back — direction first.",
"regex": "(?:أمشي|امشي|امش|أتحرك|تحرك|اروح|روح|اروحي|روحي)\\s+(?:للخلف|لورا|ورا)\\s+(\\d+(?:\\.\\d+)?)\\s*(?:متر|مترين|أمتار)",
"command_template": "walk backward $1 meters"
},
{
"_": "Arabic walk N meters forward — number first.",
"regex": "(?:أمشي|امشي|امش|أتحرك|تحرك|اروح|روح|اروحي|روحي)\\s+(\\d+(?:\\.\\d+)?)\\s*(?:متر|مترين|أمتار)\\s+(?:للأمام|لقدام|قدام)",
"command_template": "walk forward $1 meters"
},
{
"_": "Arabic walk N meters back — number first.",
"regex": "(?:أمشي|امشي|امش|أتحرك|تحرك|اروح|روح|اروحي|روحي)\\s+(\\d+(?:\\.\\d+)?)\\s*(?:متر|مترين|أمتار)\\s+(?:للخلف|لورا|ورا)",
"command_template": "walk backward $1 meters"
},
{
"_": "Arabic walk N meters (default forward).",
"regex": "(?:أمشي|امشي|امش|أتحرك|تحرك|اروح|روح|اروحي|روحي)\\s+(\\d+(?:\\.\\d+)?)\\s*(?:متر|مترين|أمتار)",
"command_template": "walk $1 meters"
},
{
"_": "Generic dialect Arabic 'turn right' WITHOUT number. Catches forms NOT in turn_right.bot_phrases.arabic, e.g. 'ألف على اليمين', 'دور لليمين', 'أستدير إلى اليمين', 'لف ع اليمين'. Verbs: لف/ألف/بلف/دور/أدور/بدور/خش/أخش/بخش/حوّد/أحوّد/أستدير/استدير. Optional prep على/ع/إلى/لـ in front of direction.",
"regex": "(?:أستدير|استدير|استدر|لف+|ألف+|بلف|دور+|أدور+|بدور|خش|أخش|بخش|حوّد|أحوّد)\\s+(?:(?:على|ع|إلى)\\s+)?(?:ال)?(?:يمين|يميناً|يمينا|لليمين)",
"command_template": "turn right"
},
{
"_": "Generic dialect Arabic 'turn left' WITHOUT number — also catches شمال/الشمال.",
"regex": "(?:أستدير|استدير|استدر|لف+|ألف+|بلف|دور+|أدور+|بدور|خش|أخش|بخش|حوّد|أحوّد)\\s+(?:(?:على|ع|إلى)\\s+)?(?:ال)?(?:يسار|يساراً|يسارا|لليسار|شمال|الشمال|للشمال)",
"command_template": "turn left"
},
{
"_": "Generic dialect Arabic 'turn around' WITHOUT number — back/wara variants.",
"regex": "(?:أستدير|استدير|استدر|لف+|ألف+|بلف|دور+|أدور+|بدور)\\s+(?:(?:إلى|لـ)\\s*)?(?:للخلف|الخلف|للوراء|الوراء|للورا|ورا|لورا)",
"command_template": "turn around"
},
{
"_": "Generic dialect Arabic 'move forward' WITHOUT meters/steps — أمشي/أتحرك/أتقدم/أروح/بمشي/بتقدم + قدام/للأمام/الأمام variants.",
"regex": "(?:أمشي|امشي|امش|أتحرك|تحرك|أتقدم|بتقدم|أتقدّم|أروح|اروح|روح|براح|بمشي|بفوت|أفوت)\\s+(?:(?:إلى|لـ)\\s*)?(?:للأمام|الأمام|لقدام|قدام|للقدام)",
"command_template": "move forward"
},
{
"_": "Generic dialect Arabic 'move backward' WITHOUT meters/steps — أرجع/أتراجع/برجع + للخلف/ورا/الوراء variants.",
"regex": "(?:أرجع|ارجع|برجع|أتراجع|بتراجع|أتحرك|تحرك|أمشي|امشي|امش)\\s+(?:(?:إلى|لـ)\\s*)?(?:للخلف|الخلف|لورا|ورا|للوراء|الوراء)",
"command_template": "move backward"
},
{
"_": "STEP-CLOSER (English). Note: normalise_numbers runs BEFORE this regex matches and converts 'one'→'1'. The regex must accept both forms ('one step closer' OR '1 step closer'). Maps to a deterministic 1-step walk; distinct from 'walking to X' (planner) and 'come here' (YOLO-tracked).",
"regex": "(?:stepping|moving|coming|edging)\\s+closer|(?:one|1)\\s+step\\s+closer|step(?:ping)?\\s+(?:a\\s+(?:bit|little)\\s+)?forward",
"command_template": "walk forward 1 steps"
},
{
"_": "STEP-CLOSER (Arabic). Note: normalise_numbers converts 'واحدة'→'1' before this regex runs, so the optional 'one' part must match either واحدة or 1. Same single-step semantics as English.",
"regex": "أتقدم\\s+خطوة|أقترب\\s+خطوة|خطوة\\s+(?:(?:واحدة|1)\\s+)?(?:للأمام|قدام)|أقرب\\s+خطوة|أتقرّب\\s+خطوة|أقدّم\\s+خطوة|أخطو\\s+للأمام",
"command_template": "walk forward 1 steps"
},
{
"_": "WALK-TO-TARGET (English) — captures the named object after 'walking/heading/going to/towards'. Example bot phrases the dispatcher catches: 'Walking to the door.' / 'Heading to the chair.' / 'Walking towards the table.' / 'Going up to the wall.'. The captured target ($1) is plugged into the brain command 'walk to <target>', which Brain/command_parser._RE_WALK_TO routes into the LLaVA-grounded spatial planner. Constraints: (a) stops at sentence punctuation so 'walking to the door, then sitting' picks up just 'door', (b) the determiner-strip group includes 'that\\s+|this\\s+' so anaphora 'walking to that chair' captures 'chair' not 'that chair', (c) negative-lookahead before the target rejects pronouns (us|me|him|her|them|you|it|myself|yourself) so a Rule-4-violating description like 'I see a person walking towards us' does NOT fire 'walk to us'. Persona Rule 12 instructs Gemini to use these literal shapes. NOTE: target capture allows Arabic chars (U+0600U+06FF) too — handles the field case where canonical_normalizer translated an Arabic verb+connective to English ('أتجه نحو X' → 'walking to X' with X still Arabic) and we want the planner to receive the Arabic target name. Qwen2.5-VL is multilingual; passing Arabic target to LLaVA prompt works.",
"regex": "(?:approaching|(?:walking|heading|going|moving)\\s+(?:over\\s+)?(?:to|towards?|up\\s+to)\\b)\\s+(?:the\\s+|a\\s+|an\\s+|that\\s+|this\\s+|ال)?(?!(?:us|me|him|her|them|you|it|myself|yourself|themselves|himself|herself|itself|هو|هي|هم|هما|هن|نحن|أنت|أنا)\\b)([A-Za-z\\u0600-\\u06FF][A-Za-z0-9\\u0600-\\u06FF ._-]{0,40}?)\\s*(?=[.!?،,]|$)",
"command_template": "walk to $1"
},
{
"_": "WALK-TO-TARGET (Arabic) — أمشي / أتجه / أتحرك + (إلى | نحو | تجاه | باتجاه) + <target>. Character class is the WHOLE Arabic block U+0600U+06FF so vowel diacritics (fatha/damma/kasra/sukun, U+064BU+065F) inside the target name don't truncate the capture (e.g. 'البَاب' with fatha was previously stopping at the fatha and capturing None). Determiner-strip includes هذا/هذه/ذلك/تلك (anaphora) plus the optional ال prefix. Pronoun negative-lookahead rejects هو/هي/هم/أنت/نحن/نا/ي etc as targets — handled by lookbehind avoiding common pronoun stems.",
"regex": "(?:أمشي|امشي|أتحرك|تحرك|أتجه|متجه|أروح|اروح|روح)\\s+(?:إلى|نحو|تجاه|باتجاه|لـ?ال?)\\s+(?:هذا|هذه|ذلك|تلك)?\\s*(?:ال)?(?!(?:هو|هي|هم|هما|هن|نحن|أنت|أنتم|أنا)\\b)([\\u0600-\\u06FF][\\u0600-\\u06FF 0-9_-]{0,40}?)\\s*(?=[.!?،,]|$)",
"command_template": "walk to $1"
}
]
}

222
Config/language_tables.json Normal file
View File

@ -0,0 +1,222 @@
{
"_description": "Language tables for voice motion processing. Single source of truth for ALL vocabulary data — no Arabic/English motion words live in code anymore. Adding a new dialect / variant is a JSON-only edit.",
"_consumers": [
"Voice/number_words.py — spelled-out numbers → digits (EN + AR)",
"Voice/canonical_normalizer.py — Arabic structural translation → English",
"Voice/marcus_voice.py — reverse-command map for memory operations",
"Voice/sequences.py — control commands that must NOT be captured into recordings"
],
"english_fractions": {
"_description": "English fractional words. Two semantic flavours: 'additive' (combines with a preceding integer — '3 and a half steps' → 3.5) and 'leading' (standalone — 'half a meter' → 0.5). Both forms map to the same numeric value; flavours are kept separate so the parser knows the syntactic context to apply.",
"additive": {
"half": 0.5,
"halves": 0.5,
"quarter": 0.25,
"quarters": 0.25,
"third": 0.3333,
"thirds": 0.3333
},
"leading": {
"half": 0.5,
"quarter": 0.25,
"third": 0.3333
}
},
"arabic_fractions": {
"_description": "Arabic fractional words. Same additive vs leading split as English. 'Additive' applies after a digit + ون / و conjunction ('3 ونصف' → 3.5). 'Leading' applies before a unit ('نصف متر' → 0.5 meter). Multiple dialect spellings of half (نصف / نص).",
"additive": {
"نصف": 0.5,
"نص": 0.5,
"ربع": 0.25,
"ثلث": 0.3333
},
"leading": {
"نصف": 0.5,
"نص": 0.5,
"ربع": 0.25,
"ثلث": 0.3333
}
},
"english_numbers": {
"_description": "English number-word parser tables. Used by number_words.py to convert 'ninety degrees' → '90 degrees' before regex matching.",
"ones": {
"zero": 0, "one": 1, "two": 2, "three": 3, "four": 4,
"five": 5, "six": 6, "seven": 7, "eight": 8, "nine": 9,
"ten": 10, "eleven": 11, "twelve": 12, "thirteen": 13,
"fourteen": 14, "fifteen": 15, "sixteen": 16,
"seventeen": 17, "eighteen": 18, "nineteen": 19
},
"tens": {
"twenty": 20, "thirty": 30, "forty": 40, "fifty": 50,
"sixty": 60, "seventy": 70, "eighty": 80, "ninety": 90
},
"scale": {
"hundred": 100
},
"glue": ["and", "-"]
},
"arabic_numbers": {
"_description": "Arabic spelled-out numbers. Order matters — longer multi-word phrases come first so they're matched before their shorter prefixes.",
"literals": [
["ثلاثمائة وستون", 360],
["ثلاثمائة وستين", 360],
["ثلاث مائة وستون", 360],
["ثلاث مائة وستين", 360],
["مائة وثمانون", 180],
["مائة وثمانين", 180],
["مية وثمانين", 180],
["مائتان وسبعون", 270],
["مائتان وسبعين", 270],
["عشرون", 20], ["عشرين", 20],
["ثلاثون", 30], ["ثلاثين", 30],
["أربعون", 40], ["أربعين", 40], ["اربعون", 40], ["اربعين", 40],
["خمسون", 50], ["خمسين", 50],
["ستون", 60], ["ستين", 60],
["سبعون", 70], ["سبعين", 70],
["ثمانون", 80], ["ثمانين", 80],
["تسعون", 90], ["تسعين", 90],
["مائتان", 200], ["مائتين", 200], ["ميتين", 200],
["ثلاثمائة", 300], ["ثلاث مائة", 300],
["أربعمائة", 400], ["اربعمائة", 400], ["أربع مائة", 400],
["خمسمائة", 500], ["خمس مائة", 500],
["مائة", 100], ["مية", 100], ["ميه", 100],
["أحد عشر", 11], ["احد عشر", 11],
["اثنا عشر", 12], ["اثني عشر", 12],
["ثلاثة عشر", 13], ["ثلاث عشرة", 13],
["أربعة عشر", 14], ["اربعة عشر", 14],
["خمسة عشر", 15], ["خمس عشرة", 15],
["ستة عشر", 16], ["ست عشرة", 16],
["سبعة عشر", 17],
["ثمانية عشر", 18],
["تسعة عشر", 19],
["واحد", 1], ["واحدة", 1],
["اثنان", 2], ["اثنين", 2], ["اثنتان", 2], ["اثنتين", 2],
["ثلاثة", 3], ["ثلاث", 3],
["أربعة", 4], ["أربع", 4], ["اربعة", 4], ["اربع", 4],
["خمسة", 5], ["خمس", 5],
["ستة", 6], ["ست", 6],
["سبعة", 7], ["سبع", 7],
["ثمانية", 8], ["ثمان", 8],
["تسعة", 9], ["تسع", 9],
["عشرة", 10], ["عشر", 10]
]
},
"arabic_verbs": {
"_description": "Arabic verb roots → English gerund. Gerund matches Gemini's English bot_phrases (e.g. 'walking forward'). Multiple Arabic dialect roots can map to the same English verb.",
"walking": [
"أمشي", "امشي", "امش",
"أتحرك", "تحرك",
"اروح", "روح", "اروحي", "روحي",
"بمشي", "براح", "أراح",
"بفوت", "أفوت",
"سير", "اسير",
"أتجه", "اتجه", "يتجه", "نتجه",
"أتوجه", "اتوجه", "يتوجه", "توجه",
"متوجه", "متجه"
],
"turning": [
"أستدير", "استدر", "استدير",
"لفّ", "لف", "ألف", "بلف",
"دوّر", "دور", "أدور", "بدور",
"خش", "أخش", "بخش",
"حوّد", "أحوّد"
],
"walking backward": [
"أرجع", "ارجع", "برجع",
"أتراجع", "بتراجع", "تراجع"
],
"walking forward": [
"أتقدم", "بتقدم", "أتقدّم", "تقدم"
]
},
"arabic_directions": {
"_description": "Arabic direction words → English directions. Includes definite-article variants (اليمين), preposition-prefixed forms (لليمين), and dialect alternatives (شمال = left in Levantine/Egyptian).",
"right": [
"يميناً", "يمينا",
"اليمين", "لليمين",
"يمين"
],
"left": [
"يساراً", "يسارا",
"اليسار", "لليسار",
"يسار",
"الشمال", "للشمال", "شمال"
],
"forward": [
"للأمام", "الأمام", "أمام",
"لقدام", "للقدام", "قدام"
],
"backward": [
"للخلف", "الخلف",
"للوراء", "الوراء",
"لورا", "ورا", "للورا"
]
},
"arabic_units": {
"_description": "Arabic unit nouns → English unit. Singular-vs-plural form is preserved so the dispatcher's regex layer (which accepts both) sees a natural form. Dual forms (خطوتين / مترين / درجتين) live in arabic_duals.",
"step": ["خطوة"],
"steps": ["خطوات"],
"meter": ["متر"],
"meters": ["أمتار"],
"degree": ["درجة"],
"degrees": ["درجات"]
},
"arabic_duals": {
"_description": "Arabic dual forms — single words containing both count and unit ('خطوتين' = 'two steps'). Substituted as a single unit BEFORE the verb/dir/unit pass. The English target form must be readable as 'N units' so the regex layer can pick up the count.",
"2 steps": ["خطوتين"],
"2 meters": ["مترين", "أمتارين"],
"2 degrees": ["درجتين"],
"2 seconds": ["ثانيتين"]
},
"arabic_conjunctions": {
"_description": "Arabic conjunctions translated to space-padded English glue. Subbed early so subsequent verb/dir passes don't accidentally treat the conjunction as a word.",
" then ": ["ثم", "وبعدين", "بعدين", "وبعد"]
},
"arabic_connectives": {
"_description": "Arabic prepositions / determiners / pronouns. Mostly pass-through; 'إلى' / 'نحو' map to 'to' so walk-to-target patterns work. 'ع' / 'على' get folded into nothing meaningful but are translated to 'on' to avoid leaving raw Arabic mid-string.",
"to": ["إلى", "نحو", "تجاه", "باتجاه"],
"on": ["على", "ع"],
"in": ["في"],
"this": ["هذا", "هذه"],
"that": ["ذلك", "تلك"]
},
"motion_inverses": {
"_description": "Pairwise inverse map for reverse_last memory operation. Used by Voice/marcus_voice.py::_reverse_command. Parametric forms (e.g. 'turn left 90 degrees' ↔ 'turn right 90 degrees') are derived in code via regex, NOT listed here — only fixed pairs.",
"turn right": "turn left",
"turn left": "turn right",
"move forward": "move backward",
"move backward": "move forward",
"sit down": "stand up",
"stand up": "sit down",
"raise arm": "lower arm",
"lower arm": "raise arm"
},
"sequence_never_record": {
"_description": "Canonical commands that must NEVER be captured into a recording session. Control commands (start/save/cancel/play recording, pause/resume, stop, repeat/reverse) would create absurd macros if captured. Used by Voice/sequences.py::record_command.",
"canonicals": [
"start recording",
"save sequence",
"cancel recording",
"play sequence",
"list sequences",
"delete sequence",
"pause motion",
"resume motion",
"stop",
"repeat last",
"reverse last"
]
}
}

218
Core/motion_log.py Normal file
View File

@ -0,0 +1,218 @@
"""
motion_log.py structured per-motion event log.
Drop-in: brain handlers and worker call log_motion(event, cmd, ...)
and the helper auto-parses direction/magnitude/unit from the canonical
string. The worker barely needs to know what it's logging.
Every motion the brain or worker executes writes ONE line to
`logs/motion.log` capturing:
timestamp | canonical | direction mag unit | target | actual | outcome | source
So a single voice command produces a paired START + END line:
17:45:01 | turn left 19 degrees | dir=left mag=19 unit=deg | target=2.4s | - | start | voice
17:45:03 | turn left 19 degrees | dir=left mag=19 unit=deg | target=2.4s | actual=2.41s | complete | voice
Or for a parameterless motion:
17:46:00 | turn right | dir=right mag=- unit=- | target=2.0s | - | start | voice
17:46:02 | turn right | dir=right mag=- unit=- | target=2.0s | actual=2.02s | complete | voice
This is independent of voice.log and transcript.log:
- voice.log = per-thread debug detail (dispatch, worker, watchdog,
reconnect)
- transcript.log = user-facing dialogue (what user said vs. what
Gemini said vs. what canonical fired)
- motion.log = ONE LINE PER MOTION with parsed params + actual
outcome for post-hoc auditing of physical movements
API:
log_motion(event, canonical, direction=..., magnitude=..., unit=...,
target_sec=..., actual_sec=..., source="voice"|"text",
outcome="start"|"complete"|"interrupted"|"error", **extra)
Events (event field):
'start' brain handler is about to issue motor commands.
target_sec is the planned duration; actual is None.
'complete' brain returned normally; actual_sec set.
'interrupted' motion_abort fired during execution.
'error' brain raised an exception; reason in extra.
"""
from __future__ import annotations
import logging
import os
import re
from logging.handlers import RotatingFileHandler
# ─── canonical parser ─────────────────────────────────────────────
#
# Map a canonical command string to (direction, magnitude, unit) so
# the structured log auto-fills these fields when the caller doesn't
# pass them explicitly.
_RE_TURN_DEG_DIR = re.compile(r"^turn\s+(left|right)\s+(\d+(?:\.\d+)?)\s*deg(?:rees?)?$", re.I)
_RE_TURN_DEG = re.compile(r"^turn\s+(\d+(?:\.\d+)?)\s*deg(?:rees?)?$", re.I)
_RE_TURN_STEP_DIR = re.compile(r"^turn\s+(left|right)\s+(\d+(?:\.\d+)?)\s*steps?$", re.I)
_RE_TURN_DIR = re.compile(r"^turn\s+(left|right|around)$", re.I)
_RE_WALK_STEP = re.compile(r"^walk\s+(?:(forward|back(?:ward)?)\s+)?(\d+(?:\.\d+)?)\s*steps?$", re.I)
_RE_WALK_M = re.compile(r"^walk\s+(?:(forward|back(?:ward)?)\s+)?(\d+(?:\.\d+)?)\s*m(?:eter(?:s)?)?$", re.I)
_RE_MOVE_DIR = re.compile(r"^move\s+(forward|back(?:ward)?|left|right)$", re.I)
_RE_WALK_TO = re.compile(r"^walk\s+to\s+(.+)$", re.I)
def parse_canonical(cmd: str) -> dict:
"""Parse a canonical command into structured motion fields.
Returns {direction, magnitude, unit, kind}. Best-effort if the
canonical doesn't match any known shape, all fields are '-' and
kind='other'."""
out = {"direction": "-", "magnitude": "-", "unit": "-", "kind": "other"}
if not cmd:
return out
s = cmd.strip().lower()
m = _RE_TURN_DEG_DIR.match(s)
if m:
return {"direction": m.group(1), "magnitude": m.group(2),
"unit": "deg", "kind": "turn"}
m = _RE_TURN_DEG.match(s)
if m:
return {"direction": "-", "magnitude": m.group(1),
"unit": "deg", "kind": "turn"}
m = _RE_TURN_STEP_DIR.match(s)
if m:
return {"direction": m.group(1), "magnitude": m.group(2),
"unit": "step", "kind": "turn"}
m = _RE_TURN_DIR.match(s)
if m:
return {"direction": m.group(1), "magnitude": "-",
"unit": "-", "kind": "turn"}
m = _RE_WALK_STEP.match(s)
if m:
d = (m.group(1) or "forward").lower()
if d.startswith("back"): d = "backward"
return {"direction": d, "magnitude": m.group(2),
"unit": "step", "kind": "walk"}
m = _RE_WALK_M.match(s)
if m:
d = (m.group(1) or "forward").lower()
if d.startswith("back"): d = "backward"
return {"direction": d, "magnitude": m.group(2),
"unit": "m", "kind": "walk"}
m = _RE_MOVE_DIR.match(s)
if m:
d = m.group(1).lower()
if d.startswith("back"): d = "backward"
return {"direction": d, "magnitude": "-",
"unit": "-", "kind": "move"}
m = _RE_WALK_TO.match(s)
if m:
return {"direction": "tracked", "magnitude": m.group(1).strip(),
"unit": "target", "kind": "walk_to"}
# Bare canonicals
if s == "stop": return {"direction": "-", "magnitude": "-", "unit": "-", "kind": "stop"}
if s == "sit down": return {"direction": "-", "magnitude": "-", "unit": "-", "kind": "posture"}
if s == "stand up": return {"direction": "-", "magnitude": "-", "unit": "-", "kind": "posture"}
if s == "wave hello":return {"direction": "-", "magnitude": "-", "unit": "-", "kind": "arm"}
if s == "raise arm": return {"direction": "right", "magnitude": "-", "unit": "-", "kind": "arm"}
if s == "lower arm": return {"direction": "right", "magnitude": "-", "unit": "-", "kind": "arm"}
if s == "come here": return {"direction": "tracked", "magnitude": "-", "unit": "yolo", "kind": "come"}
if s == "follow me": return {"direction": "tracked", "magnitude": "-", "unit": "yolo", "kind": "follow"}
if s == "look around":return {"direction": "scan", "magnitude": "-", "unit": "-", "kind": "look"}
if s == "stay here": return {"direction": "-", "magnitude": "-", "unit": "-", "kind": "stay"}
if s == "go home": return {"direction": "home", "magnitude": "-", "unit": "-", "kind": "nav"}
if s == "patrol": return {"direction": "scan", "magnitude": "-", "unit": "-", "kind": "patrol"}
if s == "pause motion": return {"direction": "-", "magnitude": "-", "unit": "-", "kind": "pause"}
if s == "resume motion": return {"direction": "-", "magnitude": "-", "unit": "-", "kind": "resume"}
return out
try:
from Core.env_loader import PROJECT_ROOT # type: ignore
except Exception:
PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
_LOG_DIR = os.path.join(PROJECT_ROOT, "logs")
os.makedirs(_LOG_DIR, exist_ok=True)
_LOG_PATH = os.path.join(_LOG_DIR, "motion.log")
_logger = logging.getLogger("marcus.motion")
_logger.setLevel(logging.INFO)
_logger.propagate = False
if not _logger.handlers:
_h = RotatingFileHandler(
_LOG_PATH, maxBytes=2_000_000, backupCount=3, encoding="utf-8",
)
# Use pipe-separator format so the file is grep/awk friendly.
_h.setFormatter(logging.Formatter("%(asctime)s | %(message)s"))
_logger.addHandler(_h)
def log_motion(event: str,
canonical: str,
direction=None,
magnitude=None,
unit=None,
target_sec=None,
actual_sec=None,
source: str = "voice",
**extra) -> None:
"""Emit one structured line describing a motion event.
direction/magnitude/unit auto-fill from parse_canonical(canonical)
if not passed so the worker just calls log_motion('start', cmd)
and the parser handles the rest. Override any field by passing it
explicitly (useful when the brain knows odom-measured actuals).
Fields render with fixed width for grep/awk friendliness. Extra
kwargs become trailing 'k=v' tokens.
"""
def _f(v):
if v is None: return "-"
if isinstance(v, float): return "{:.2f}".format(v)
return str(v)
canonical = (canonical or "").strip() or "-"
parsed = parse_canonical(canonical) if canonical != "-" else {}
direction = direction if direction is not None else parsed.get("direction", "-")
magnitude = magnitude if magnitude is not None else parsed.get("magnitude", "-")
unit = unit if unit is not None else parsed.get("unit", "-")
parts = [
"{:35s}".format(canonical[:35]),
"dir={:8s}".format(_f(direction)[:8]),
"mag={:8s}".format(_f(magnitude)[:8]),
"unit={:6s}".format(_f(unit)[:6]),
"target={:>7s}".format(_f(target_sec) + "s" if target_sec is not None else "-"),
"actual={:>7s}".format(_f(actual_sec) + "s" if actual_sec is not None else "-"),
"{:11s}".format(event[:11]),
"{:5s}".format(source[:5]),
]
if extra:
parts.append(" ".join("{}={}".format(k, _f(v)) for k, v in extra.items()))
_logger.info(" | ".join(parts))
# Convenience tee so console + log show the same one-liner
def print_motion(event: str, canonical: str, **kw) -> None:
"""Like log_motion but ALSO prints to stdout so the operator sees
the line in the terminal dashboard. Use for the high-traffic
'start' and 'complete' events; use log_motion alone for the noisier
intermediate events."""
log_motion(event, canonical, **kw)
direction = kw.get("direction", "-")
magnitude = kw.get("magnitude", "-")
unit = kw.get("unit", "-")
target = kw.get("target_sec")
actual = kw.get("actual_sec")
src = kw.get("source", "voice")
line = " [Motion {:9s}] {:30s} dir={} mag={} unit={}".format(
event, canonical[:30], direction, magnitude, unit,
)
if target is not None:
line += " target={:.2f}s".format(float(target))
if actual is not None:
line += " actual={:.2f}s".format(float(actual))
print(line)

46
Core/motion_state.py Normal file
View File

@ -0,0 +1,46 @@
"""
motion_state.py shared motion-control signals.
Three module-level threading.Events used by Voice/marcus_voice.py to
control in-progress motion. Producers are VoiceModule's motion worker;
consumers are brain motion loops in Brain/command_parser.py,
Brain/executor.py, Navigation/marcus_odometry.py, Navigation/patrol.py.
motion_abort hard stop. Loops break out and run gradual_stop().
Set by 'stop'/'cancel'/etc. Cleared by the worker
at the start of each new non-stop command, so a
stale set never kills the next motion.
motion_pause soft hold. Loops do NOT break; they hold zero
velocity and wait for the event to clear, then
resume the same loop iteration where they paused.
While paused they ALSO check motion_abort each
~50ms a pause-then-stop combo cleanly aborts.
The helper `wait_while_paused` centralizes the polling loop so brain
sites don't repeat the same stanza:
if motion_abort.is_set(): break
wait_while_paused()
if motion_abort.is_set(): break
send_vel(...)
time.sleep(0.05)
Single source of truth placed in Core/ so both Voice/* and Brain/*
can import without circular deps.
"""
from __future__ import annotations
import threading
import time
motion_abort: threading.Event = threading.Event()
motion_pause: threading.Event = threading.Event()
def wait_while_paused(poll_sec: float = 0.05) -> None:
"""Block while motion_pause is set. Returns when pause clears OR
motion_abort fires (so the caller can immediately bail out via
its existing motion_abort check)."""
while motion_pause.is_set() and not motion_abort.is_set():
time.sleep(poll_sec)

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1,434 @@
[
{
"time": "14:39:02",
"cmd": "turn around",
"response": "Turning left",
"duration_s": 4.94
},
{
"time": "14:40:32",
"cmd": "move forward",
"response": "local command",
"duration_s": 0.0
},
{
"time": "14:40:53",
"cmd": "move backward",
"response": "local command",
"duration_s": 0.0
},
{
"time": "14:41:17",
"cmd": "walk backward 1 steps",
"response": "local command",
"duration_s": 0.0
},
{
"time": "14:41:20",
"cmd": "turn left",
"response": "local command",
"duration_s": 0.0
},
{
"time": "14:41:23",
"cmd": "walk forward 1 steps",
"response": "local command",
"duration_s": 0.0
},
{
"time": "14:41:26",
"cmd": "walk 1 steps",
"response": "local command",
"duration_s": 0.0
},
{
"time": "14:41:29",
"cmd": "turn left",
"response": "local command",
"duration_s": 0.0
},
{
"time": "14:41:32",
"cmd": "walk forward 1 steps",
"response": "local command",
"duration_s": 0.0
},
{
"time": "14:41:35",
"cmd": "walk forward 1 steps",
"response": "local command",
"duration_s": 0.0
},
{
"time": "14:41:39",
"cmd": "walk 1 steps",
"response": "local command",
"duration_s": 0.0
},
{
"time": "14:41:42",
"cmd": "walk forward 1 steps",
"response": "local command",
"duration_s": 0.0
},
{
"time": "14:42:22",
"cmd": "move forward",
"response": "local command",
"duration_s": 0.0
},
{
"time": "14:42:47",
"cmd": "move forward",
"response": "local command",
"duration_s": 0.0
},
{
"time": "14:43:08",
"cmd": "move forward",
"response": "local command",
"duration_s": 0.0
},
{
"time": "14:43:24",
"cmd": "turn right",
"response": "local command",
"duration_s": 0.0
},
{
"time": "14:43:37",
"cmd": "stop",
"response": "local command",
"duration_s": 0.0
},
{
"time": "14:43:40",
"cmd": "stop",
"response": "local command",
"duration_s": 0.0
},
{
"time": "14:44:02",
"cmd": "move backward",
"response": "local command",
"duration_s": 0.0
},
{
"time": "14:44:08",
"cmd": "turn right 90 degrees",
"response": "local command",
"duration_s": 0.0
},
{
"time": "14:44:15",
"cmd": "turn right 90 degrees",
"response": "local command",
"duration_s": 0.0
},
{
"time": "14:44:21",
"cmd": "turn right 90 degrees",
"response": "local command",
"duration_s": 0.0
},
{
"time": "14:44:47",
"cmd": "turn left 90 degrees",
"response": "local command",
"duration_s": 0.0
},
{
"time": "14:45:29",
"cmd": "move forward",
"response": "local command",
"duration_s": 0.0
},
{
"time": "14:45:48",
"cmd": "turn left 90 degrees",
"response": "local command",
"duration_s": 0.0
},
{
"time": "14:46:25",
"cmd": "move forward",
"response": "local command",
"duration_s": 0.0
},
{
"time": "14:46:28",
"cmd": "move forward",
"response": "local command",
"duration_s": 0.0
},
{
"time": "14:46:32",
"cmd": "move forward",
"response": "local command",
"duration_s": 0.0
},
{
"time": "14:46:43",
"cmd": "stop",
"response": "local command",
"duration_s": 0.0
},
{
"time": "14:46:46",
"cmd": "stop",
"response": "local command",
"duration_s": 0.0
},
{
"time": "14:47:36",
"cmd": "stop",
"response": "local command",
"duration_s": 0.0
},
{
"time": "14:47:49",
"cmd": "stop",
"response": "local command",
"duration_s": 0.0
},
{
"time": "14:48:16",
"cmd": "turn around",
"response": "Turning left",
"duration_s": 17.53
},
{
"time": "14:48:52",
"cmd": "move forward",
"response": "local command",
"duration_s": 0.0
},
{
"time": "14:49:39",
"cmd": "walk forward 1 meters",
"response": "local command",
"duration_s": 0.0
},
{
"time": "14:49:43",
"cmd": "walk forward 1 meters",
"response": "local command",
"duration_s": 0.0
},
{
"time": "14:49:47",
"cmd": "walk forward 1 meters",
"response": "local command",
"duration_s": 0.0
},
{
"time": "14:49:52",
"cmd": "walk forward 1 meters",
"response": "local command",
"duration_s": 0.0
},
{
"time": "14:50:07",
"cmd": "stop",
"response": "local command",
"duration_s": 0.0
},
{
"time": "14:51:17",
"cmd": "move forward",
"response": "local command",
"duration_s": 0.0
},
{
"time": "14:51:41",
"cmd": "turn right",
"response": "local command",
"duration_s": 0.0
},
{
"time": "14:52:13",
"cmd": "move forward",
"response": "local command",
"duration_s": 0.0
},
{
"time": "14:52:50",
"cmd": "move forward",
"response": "local command",
"duration_s": 0.0
},
{
"time": "14:53:15",
"cmd": "turn right",
"response": "local command",
"duration_s": 0.0
},
{
"time": "14:54:05",
"cmd": "move forward",
"response": "local command",
"duration_s": 0.0
},
{
"time": "14:56:07",
"cmd": "go back o",
"response": "Turning left",
"duration_s": 15.61
},
{
"time": "14:56:59",
"cmd": "move forward",
"response": "local command",
"duration_s": 0.0
},
{
"time": "14:57:00",
"cmd": "stop",
"response": "local command",
"duration_s": 0.0
},
{
"time": "14:57:04",
"cmd": "stop",
"response": "local command",
"duration_s": 0.0
},
{
"time": "14:57:33",
"cmd": "move forward",
"response": "local command",
"duration_s": 0.0
},
{
"time": "14:57:37",
"cmd": "move forward",
"response": "local command",
"duration_s": 0.0
},
{
"time": "14:57:40",
"cmd": "move forward",
"response": "local command",
"duration_s": 0.0
},
{
"time": "14:57:45",
"cmd": "stop",
"response": "local command",
"duration_s": 0.0
},
{
"time": "14:58:00",
"cmd": "move backward",
"response": "local command",
"duration_s": 0.0
},
{
"time": "14:58:05",
"cmd": "move backward",
"response": "local command",
"duration_s": 0.0
},
{
"time": "14:58:23",
"cmd": "turn left",
"response": "local command",
"duration_s": 0.0
},
{
"time": "14:58:26",
"cmd": "turn left",
"response": "local command",
"duration_s": 0.0
},
{
"time": "14:58:30",
"cmd": "turn left",
"response": "local command",
"duration_s": 0.0
},
{
"time": "14:58:33",
"cmd": "turn left",
"response": "local command",
"duration_s": 0.0
},
{
"time": "15:00:23",
"cmd": "stop",
"response": "local command",
"duration_s": 0.0
},
{
"time": "15:00:25",
"cmd": "stop",
"response": "local command",
"duration_s": 0.0
},
{
"time": "15:01:58",
"cmd": "follow me",
"response": "local command",
"duration_s": 0.0
},
{
"time": "15:02:03",
"cmd": "walk 2 steps",
"response": "local command",
"duration_s": 0.0
},
{
"time": "15:02:07",
"cmd": "stop",
"response": "local command",
"duration_s": 0.0
},
{
"time": "15:02:18",
"cmd": "stop",
"response": "local command",
"duration_s": 0.0
},
{
"time": "15:02:25",
"cmd": "stop",
"response": "local command",
"duration_s": 0.0
},
{
"time": "15:02:28",
"cmd": "stop",
"response": "local command",
"duration_s": 0.0
},
{
"time": "15:02:53",
"cmd": "move forward",
"response": "local command",
"duration_s": 0.0
},
{
"time": "15:04:40",
"cmd": "turn left 90 degrees",
"response": "local command",
"duration_s": 0.0
},
{
"time": "15:04:46",
"cmd": "turn left 90 degrees",
"response": "local command",
"duration_s": 0.0
},
{
"time": "15:05:04",
"cmd": "turn right 90 degrees",
"response": "local command",
"duration_s": 0.0
},
{
"time": "15:05:10",
"cmd": "turn right 90 degrees",
"response": "local command",
"duration_s": 0.0
}
]

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1 @@
{}

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1,74 @@
[
{
"time": "16:40:07",
"cmd": "walk forward 1 steps",
"response": "local command",
"duration_s": 0.0
},
{
"time": "16:40:22",
"cmd": "turn left 19 degrees",
"response": "local command",
"duration_s": 0.0
},
{
"time": "16:40:37",
"cmd": "walk backward 1 steps",
"response": "local command",
"duration_s": 0.0
},
{
"time": "16:40:55",
"cmd": "walk forward 4 steps",
"response": "local command",
"duration_s": 0.0
},
{
"time": "16:41:05",
"cmd": "walk backward 4 steps",
"response": "local command",
"duration_s": 0.0
},
{
"time": "16:41:47",
"cmd": "come here",
"response": "Coming to you",
"duration_s": 0.0
},
{
"time": "16:42:02",
"cmd": "stop",
"response": "local command",
"duration_s": 0.0
},
{
"time": "16:42:18",
"cmd": "come here",
"response": "Coming to you",
"duration_s": 0.0
},
{
"time": "16:42:44",
"cmd": "walk 1 steps",
"response": "local command",
"duration_s": 0.0
},
{
"time": "16:43:05",
"cmd": "walk backward 3 steps",
"response": "local command",
"duration_s": 0.0
},
{
"time": "16:43:06",
"cmd": "turn right 0 degrees",
"response": "local command",
"duration_s": 0.0
},
{
"time": "16:43:49",
"cmd": "turn left",
"response": "local command",
"duration_s": 0.0
}
]

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1 @@
{}

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1,8 @@
[
{
"time": "09:02:24",
"cmd": "turn left",
"response": "local command",
"duration_s": 0.0
}
]

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1 @@
{}

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1,56 @@
[
{
"time": "09:14:00",
"cmd": "turn left",
"response": "local command",
"duration_s": 0.0
},
{
"time": "09:14:04",
"cmd": "walk forward 1 steps",
"response": "local command",
"duration_s": 0.0
},
{
"time": "09:15:07",
"cmd": "move backward",
"response": "local command",
"duration_s": 0.0
},
{
"time": "09:16:07",
"cmd": "turn left 90 degrees",
"response": "local command",
"duration_s": 0.0
},
{
"time": "09:17:20",
"cmd": "walk forward 1 steps",
"response": "local command",
"duration_s": 0.0
},
{
"time": "09:17:40",
"cmd": "walk back 1 steps",
"response": "local command",
"duration_s": 0.0
},
{
"time": "09:18:27",
"cmd": "come here",
"response": "Coming to you",
"duration_s": 0.0
},
{
"time": "09:18:40",
"cmd": "walk forward 1 steps",
"response": "local command",
"duration_s": 0.0
},
{
"time": "09:19:55",
"cmd": "move forward",
"response": "local command",
"duration_s": 0.0
}
]

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1 @@
{}

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1,26 @@
[
{
"time": "09:40:17",
"cmd": "turn left",
"response": "local command",
"duration_s": 0.0
},
{
"time": "09:40:20",
"cmd": "walk forward 1 steps",
"response": "local command",
"duration_s": 0.0
},
{
"time": "09:40:37",
"cmd": "turn left",
"response": "local command",
"duration_s": 0.0
},
{
"time": "09:40:40",
"cmd": "walk 1 steps",
"response": "local command",
"duration_s": 0.0
}
]

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1 @@
{}

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1,20 @@
[
{
"time": "09:42:11",
"cmd": "turn around",
"response": "local command",
"duration_s": 0.0
},
{
"time": "09:42:20",
"cmd": "look around",
"response": "local command",
"duration_s": 0.0
},
{
"time": "09:42:34",
"cmd": "turn around",
"response": "local command",
"duration_s": 0.0
}
]

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1 @@
{}

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1,26 @@
[
{
"time": "09:49:11",
"cmd": "move forward",
"response": "local command",
"duration_s": 0.0
},
{
"time": "09:49:39",
"cmd": "turn right",
"response": "local command",
"duration_s": 0.0
},
{
"time": "09:50:07",
"cmd": "turn right",
"response": "local command",
"duration_s": 0.0
},
{
"time": "09:50:28",
"cmd": "turn right 100 degrees",
"response": "local command",
"duration_s": 0.0
}
]

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1 @@
{}

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1,8 @@
[
{
"time": "11:24:20",
"cmd": "turn right",
"response": "local command",
"duration_s": 0.0
}
]

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1 @@
{}

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1,248 @@
[
{
"time": "11:31:56",
"cmd": "walk 1 steps",
"response": "local command",
"duration_s": 0.0
},
{
"time": "11:32:29",
"cmd": "move backward",
"response": "local command",
"duration_s": 0.0
},
{
"time": "11:33:19",
"cmd": "walk forward 1 steps",
"response": "local command",
"duration_s": 0.0
},
{
"time": "11:33:31",
"cmd": "move backward",
"response": "local command",
"duration_s": 0.0
},
{
"time": "11:34:22",
"cmd": "come here",
"response": "Coming to you",
"duration_s": 0.0
},
{
"time": "11:34:57",
"cmd": "turn left",
"response": "local command",
"duration_s": 0.0
},
{
"time": "11:35:02",
"cmd": "stop",
"response": "local command",
"duration_s": 0.0
},
{
"time": "11:35:28",
"cmd": "turn left",
"response": "local command",
"duration_s": 0.0
},
{
"time": "11:35:37",
"cmd": "move forward",
"response": "local command",
"duration_s": 0.0
},
{
"time": "11:36:13",
"cmd": "move backward",
"response": "local command",
"duration_s": 0.0
},
{
"time": "11:36:28",
"cmd": "turn left",
"response": "local command",
"duration_s": 0.0
},
{
"time": "11:36:47",
"cmd": "move forward",
"response": "local command",
"duration_s": 0.0
},
{
"time": "11:36:50",
"cmd": "turn right",
"response": "local command",
"duration_s": 0.0
},
{
"time": "11:37:09",
"cmd": "move backward",
"response": "local command",
"duration_s": 0.0
},
{
"time": "11:37:12",
"cmd": "move forward",
"response": "local command",
"duration_s": 0.0
},
{
"time": "11:37:15",
"cmd": "walk forward 1 steps",
"response": "local command",
"duration_s": 0.0
},
{
"time": "11:37:26",
"cmd": "stop",
"response": "local command",
"duration_s": 0.0
},
{
"time": "11:37:37",
"cmd": "move backward",
"response": "local command",
"duration_s": 0.0
},
{
"time": "11:37:52",
"cmd": "come here",
"response": "Coming to you",
"duration_s": 0.0
},
{
"time": "11:37:56",
"cmd": "stop",
"response": "local command",
"duration_s": 0.0
},
{
"time": "11:39:06",
"cmd": "come here",
"response": "Coming to you",
"duration_s": 0.0
},
{
"time": "11:39:33",
"cmd": "move backward",
"response": "local command",
"duration_s": 0.0
},
{
"time": "11:40:00",
"cmd": "come here",
"response": "Coming to you",
"duration_s": 0.0
},
{
"time": "11:40:53",
"cmd": "stay here",
"response": "local command",
"duration_s": 0.0
},
{
"time": "11:41:15",
"cmd": "come here",
"response": "Coming to you",
"duration_s": 0.0
},
{
"time": "11:42:27",
"cmd": "walk 2 meters",
"response": "local command",
"duration_s": 0.0
},
{
"time": "11:42:50",
"cmd": "turn 90 degrees",
"response": "local command",
"duration_s": 0.0
},
{
"time": "11:43:22",
"cmd": "turn 360 degrees",
"response": "local command",
"duration_s": 0.0
},
{
"time": "11:43:42",
"cmd": "stop",
"response": "local command",
"duration_s": 0.0
},
{
"time": "11:44:15",
"cmd": "come here",
"response": "Coming to you",
"duration_s": 0.0
},
{
"time": "11:44:57",
"cmd": "walk forward 10 steps",
"response": "local command",
"duration_s": 0.0
},
{
"time": "11:44:58",
"cmd": "stop",
"response": "local command",
"duration_s": 0.0
},
{
"time": "11:45:18",
"cmd": "walk backward 5 steps",
"response": "local command",
"duration_s": 0.0
},
{
"time": "11:45:30",
"cmd": "turn right",
"response": "local command",
"duration_s": 0.0
},
{
"time": "11:45:56",
"cmd": "walk back 3 steps",
"response": "local command",
"duration_s": 0.0
},
{
"time": "11:45:59",
"cmd": "turn right",
"response": "local command",
"duration_s": 0.0
},
{
"time": "11:46:19",
"cmd": "walk to person",
"response": "local command",
"duration_s": 0.0
},
{
"time": "11:47:19",
"cmd": "move backward",
"response": "local command",
"duration_s": 0.0
},
{
"time": "11:48:05",
"cmd": "walk forward 5 steps",
"response": "local command",
"duration_s": 0.0
},
{
"time": "11:48:08",
"cmd": "turn right 1 steps",
"response": "local command",
"duration_s": 0.0
},
{
"time": "11:48:13",
"cmd": "turn left 2 steps",
"response": "local command",
"duration_s": 0.0
}
]

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1 @@
{}

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1,236 @@
[
{
"time": "12:54:29",
"cmd": "move backward",
"response": "local command",
"duration_s": 0.0
},
{
"time": "12:54:46",
"cmd": "turn left",
"response": "local command",
"duration_s": 0.0
},
{
"time": "12:55:28",
"cmd": "walk to حقيبة",
"response": "local command",
"duration_s": 0.0
},
{
"time": "12:56:36",
"cmd": "walk forward 1 steps",
"response": "local command",
"duration_s": 0.0
},
{
"time": "12:56:39",
"cmd": "turn left 1 steps",
"response": "local command",
"duration_s": 0.0
},
{
"time": "12:56:59",
"cmd": "walk backward 1 steps",
"response": "local command",
"duration_s": 0.0
},
{
"time": "12:57:59",
"cmd": "turn left",
"response": "local command",
"duration_s": 0.0
},
{
"time": "12:58:07",
"cmd": "turn left",
"response": "local command",
"duration_s": 0.0
},
{
"time": "12:58:22",
"cmd": "come here",
"response": "Coming to you",
"duration_s": 0.0
},
{
"time": "12:59:14",
"cmd": "turn left",
"response": "local command",
"duration_s": 0.0
},
{
"time": "12:59:17",
"cmd": "move forward",
"response": "local command",
"duration_s": 0.0
},
{
"time": "12:59:30",
"cmd": "move backward",
"response": "local command",
"duration_s": 0.0
},
{
"time": "12:59:58",
"cmd": "turn left 90 degrees",
"response": "local command",
"duration_s": 0.0
},
{
"time": "13:00:41",
"cmd": "come here",
"response": "Coming to you",
"duration_s": 0.0
},
{
"time": "13:01:52",
"cmd": "walk forward 5 steps",
"response": "local command",
"duration_s": 0.0
},
{
"time": "13:02:09",
"cmd": "walk back 2 steps",
"response": "local command",
"duration_s": 0.0
},
{
"time": "13:02:52",
"cmd": "walk 5 steps",
"response": "local command",
"duration_s": 0.0
},
{
"time": "13:02:57",
"cmd": "walk forward 2 steps",
"response": "local command",
"duration_s": 0.0
},
{
"time": "13:03:04",
"cmd": "turn left 3 steps",
"response": "local command",
"duration_s": 0.0
},
{
"time": "13:03:07",
"cmd": "turn right 1 steps",
"response": "local command",
"duration_s": 0.0
},
{
"time": "13:03:51",
"cmd": "move forward",
"response": "local command",
"duration_s": 0.0
},
{
"time": "13:03:56",
"cmd": "walk forward 2 steps",
"response": "local command",
"duration_s": 0.0
},
{
"time": "13:03:59",
"cmd": "move backward",
"response": "local command",
"duration_s": 0.0
},
{
"time": "13:04:27",
"cmd": "walk backward 3 steps",
"response": "local command",
"duration_s": 0.0
},
{
"time": "13:04:32",
"cmd": "walk forward 2 steps",
"response": "local command",
"duration_s": 0.0
},
{
"time": "13:05:19",
"cmd": "walk backward 10 steps",
"response": "local command",
"duration_s": 0.0
},
{
"time": "13:05:30",
"cmd": "walk forward 5 steps",
"response": "local command",
"duration_s": 0.0
},
{
"time": "13:05:34",
"cmd": "stop",
"response": "local command",
"duration_s": 0.0
},
{
"time": "13:06:27",
"cmd": "turn right 180 degrees",
"response": "local command",
"duration_s": 0.0
},
{
"time": "13:06:30",
"cmd": "move forward",
"response": "local command",
"duration_s": 0.0
},
{
"time": "13:06:33",
"cmd": "move backward",
"response": "local command",
"duration_s": 0.0
},
{
"time": "13:07:15",
"cmd": "come here",
"response": "Coming to you",
"duration_s": 0.0
},
{
"time": "13:08:52",
"cmd": "turn right 90 degrees",
"response": "local command",
"duration_s": 0.0
},
{
"time": "13:08:55",
"cmd": "walk 1 steps",
"response": "local command",
"duration_s": 0.0
},
{
"time": "13:08:58",
"cmd": "stop",
"response": "local command",
"duration_s": 0.0
},
{
"time": "13:10:28",
"cmd": "walk backward 20 steps",
"response": "local command",
"duration_s": 0.0
},
{
"time": "13:10:29",
"cmd": "stop",
"response": "local command",
"duration_s": 0.0
},
{
"time": "13:11:41",
"cmd": "stop",
"response": "local command",
"duration_s": 0.0
},
{
"time": "13:11:57",
"cmd": "come here",
"response": "Coming to you",
"duration_s": 0.0
}
]

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1 @@
{}

View File

@ -0,0 +1,8 @@
{
"name": "name-with-spaces",
"created_at": 1777296029.0382416,
"saved_at": 1777296029.0382435,
"commands": [
"turn right"
]
}

View File

@ -3,10 +3,27 @@
**Project**: Marcus | YS Lootah Technology
**Hardware**: Unitree G1 EDU Humanoid (29 DOF) + Jetson Orin NX 16 GB
**Robot persona**: **Sanad** (wake word + self-intro; project code still lives under `Marcus/`)
**Updated**: 2026-04-21
**Updated**: 2026-04-28
---
## Recent deltas (since 2026-04-25 — bilingual S2S + decimal/fraction motion + dispatcher hardening)
- **Voice flipped from STT-only → full Gemini Live S2S** — Gemini now hears the mic AND replies through the G1 speaker (Sanad pattern). The Gemini WebSocket lives in a separate Python 3.10+ subprocess (`Voice/gemini_runner.py`, `gemini_sdk` env) that **owns the speaker** via `unitree_sdk2py`; Marcus's parent process (Python 3.8) forwards camera frames to it over stdin and reads JSON-line transcripts off its stdout via `Voice/gemini_script.py::GeminiBrain` (the manager). `TtsMaker` is no longer on the conversational path — it remains wired through `API/audio_api.py` for any non-Gemini brain announcement that ever needs to speak.
- **Wake-word gating moved from Marcus → Gemini persona** — Marcus does NOT check for "Sanad" / "سند" in Python any more. The dispatcher just listens to whatever Gemini speaks; if Gemini emits a motion-confirmation phrase, the matching motion fires. Discipline lives in `config_Voice.json::stt.gemini_system_prompt` (~21 KB persona with Rules 112). Field-tested rules include 1c (no hallucinated motion from silence/ambiguity), 1d (no fabricated compound chains), 4b (no destination words in motion confirmations), 5d (correct Arabic come-to-user grammar — `أتي إليك`, not `أتعال`), 5e (no zero/negative quantities), 5g (step-closer vs come-to-user), 5h + 5h-2 (act OR ask, never both — and never put motion verbs inside a clarifying question), 6 (parametric motions in canonical shapes), 6b (decimals + fractions), 7 (repeat/reverse memory ops), 8 (stop priority), 9 (state-marker awareness), 9b (never emit `[STATE-...]` yourself), 10 (pause vs stop), 11 (record/save/play sequences), 12 + 12b (walk-to-target spatial planner + anaphora resolution).
- **Bilingual support — Arabic is back**, in both directions, fluently. `Voice/canonical_normalizer.py` translates Arabic structural patterns ("أمشي يميناً 3 خطوات") to English canonical phrases ("walking right 3 steps") before regex scan; `Voice/number_words.py` converts spelled-out numbers and fractions in both languages to digit-decimal form ("ثلاث خطوات ونصف" → "3.5 steps", "three and a half steps" → "3.5 steps"). All vocabulary (Arabic verb roots / directions / units / duals / conjunctions / connectives / fractions / English number words / motion inverses) is externalised to **`Config/language_tables.json`** — single source of truth, JSON-only edit to add a dialect.
- **Decimal + fraction motion support across the pipeline**`walk 1.79 meters`, `turn 22.5 degrees`, `walk 3.5 steps`, `walk 0.5 steps`, `أمشي 3 خطوات ونصف`, `نصف متر للأمام`, `مترين وربع` all dispatch correctly. Step regexes in `Brain/command_parser.py` widened from `(\d+)` to `(\d+(?:\.\d+)?)`; durations switched from `int(steps)` to `float(steps)`. Persona Rule 6b documents the conversion contract for Gemini.
- **Dispatcher strip-layer pipeline** in `Voice/marcus_voice.py` — every Gemini bot transcript now passes: `_STATE_ECHO_RE` (drop hallucinated `[STATE-...]` echoes) → `_QUOTED_RE` (drop quoted user mentions) → `_QUESTION_RE` (drop question clauses, even those containing motion verbs) → `normalise_numbers()``to_canonical_english()` → instruction regex scan. Per-turn fired-set + `command_cooldown_sec` dedup prevents streaming-partial double-fires. `_REVERSE_PAIRS` (used by `reverse_last`) is loaded from `language_tables.motion_inverses`, no longer a hardcoded dict. `Voice/sequences.py` loads its never-record list from `language_tables.sequence_never_record.canonicals`.
- **Motion-state primitives** — new `Core/motion_state.py` exposes `motion_abort` and `motion_pause` `threading.Event`s plus `wait_while_paused()`; consumed by the executor, command_parser fast-paths, odometry, autonomous mode, and goal_nav for clean voice-triggered abort/pause/resume. `Core/motion_log.py` adds `log_motion()` / `print_motion()` for actionable motion summaries (commanded vs target durations, step counts, rotation degrees) per the user's "log actual movement" request.
- **Three motion-execution modes**, dispatched by intent shape (see [Brain/command_parser.py](Brain/command_parser.py) and [Brain/marcus_brain.py](Brain/marcus_brain.py)):
1. **Deterministic parametric** (no vision) — `walk N steps`, `walk N meters`, `turn N degrees`, bare directionals. Time × velocity OR closed-loop on dead-reckoned position. ±10 cm typical (no real position feedback today; see "open issue" below).
2. **YOLO-tracked**`come here` / `come to me` / `تعال` (smart approach, stops ~arm's length when person bbox fills ≥ 0.32 of frame); `follow me` / `اتبعني` (forward bursts while person visible and not too close).
3. **LLaVA-grounded**`walk to the door` / `walk to the chair` / `أمشي إلى الباب`. Spatial planner re-asks LLaVA every 2.5 s for bearing + distance; turn-toward + walk-forward bursts until `distance == "near"` or scan-attempts exhausted.
- **mic_gain bumped to 4.5** (was 2.5) — far-field G1 mic is quiet at default; field-tuned for ~12 m talking distance.
- **Holosoma plumbing observation (no code change)** — upstream `holosoma_inference` only accepts `velocity_input ∈ {keyboard, interface, joystick, ros2}`; the `--velocity-input zmq` shown in [Doc/note.txt](Doc/note.txt) implies a local fork or aspirational config. Marcus's `send_vel()` PUBs JSON over ZMQ regardless; whatever subscribes (a fork, the existing `Bridge/ros2_zmq_bridge.py`, or nothing) determines whether motion actually reaches the policy. Marcus's own dead-reckoning integrates the **commanded** velocity, not the robot's real motion — so `walk 1 meter` accuracy is policy-tracking-error bounded (±10 cm), not closed-loop. ROS2 `/dog_odom` is intentionally disabled in `Navigation/marcus_odometry.py:230-233` because in-process DDS init causes `bad_alloc` against Holosoma's ONNX arena and YOLO's CUDA allocator. **Open issue** — fix paths discussed but not implemented: sidecar DDS state publisher (option B), Holosoma state-tee fork (option A), or sidecar ROS2 republisher.
- **New files**`Voice/canonical_normalizer.py`, `Voice/_language_tables.py`, `Voice/number_words.py`, `Voice/sequences.py`, `Voice/_probe_*.py` (smoke probes), `Core/motion_state.py`, `Core/motion_log.py`, `Config/language_tables.json`, `Config/instruction.json`.
- **YOLO device policy SOFTENED** — earlier docs said `yolo_device=cuda` was hard-required and `_resolve_device` raised `RuntimeError` on missing CUDA. That policy was relaxed: when Qwen2.5-VL is resident in VRAM (~11 GB), YOLO on `cuda` adds another ~2 GB and pushes the 16 GB Orin NX over budget. Current default is **`yolo_device: cpu`** in `Config/config_Vision.json` (13 fps on Orin CPU — sufficient for the YOLO fast-path use cases: `come here` arrival check at 1 Hz, `follow me` at 2 Hz, goal-detection one-shot). `Vision/marcus_yolo.py::_resolve_device` still raises if `yolo_device=cuda` is set without working CUDA. Use `cuda` only when `subsystems.vlm=false` (no Qwen) so YOLO has GPU headroom. The "Steady-state FPS on Orin" 21.9 fps figure in `environment.md` was measured with cuda+VLM-disabled — a different operating point than production.
## Recent deltas (since 2026-04-06)
- **GPU-only YOLO**`_resolve_device()` raises `RuntimeError` if CUDA is missing. `yolo_device=cuda`, `yolo_half=true` by default.

View File

@ -1,7 +1,7 @@
# Marcus — End-to-End Pipeline
**Robot persona:** Sanad (wake word + self-intro)
**Updated:** 2026-04-21
**Updated:** 2026-04-28
One map of every data path from sensor to motor, voice to speech. Cross-reference with `architecture.md` (what each file is), `functions.md` (exact function signatures — AST-generated), and `MARCUS_API.md` (usage examples + JSON schemas).

View File

@ -50,6 +50,26 @@ try:
except Exception:
_cfg = {}
# Shared motion-abort signal — Voice/marcus_voice.py sets this when the
# user says 'stop'; we poll it inside walk/turn loops to break out
# early. See Core/motion_state.py. Import is wrapped so the odometry
# module can still be loaded standalone (e.g. for tests) where Core
# isn't on sys.path.
try:
from Core.motion_state import motion_abort, wait_while_paused
except Exception:
# Standalone-only fallback. Production Marcus always succeeds the
# import above (Core/motion_state.py is in the project root). The
# inert objects here exist solely so the odometry module can be
# imported in isolation for unit tests / standalone tooling without
# a Core/ dependency. If you see these get used at runtime, voice
# pause/abort won't propagate — but that's a deployment-error
# symptom, not normal operation. Bug #14 in the 14-bug audit.
import threading as _t
motion_abort = _t.Event() # inert fallback; never set
def wait_while_paused(_ps: float = 0.05) -> None: # type: ignore
return # no-op fallback
ZMQ_HOST = str(_cfg.get("zmq_host", "127.0.0.1"))
ZMQ_PORT = int(_cfg.get("zmq_port", 5556))
ROS2_ODOM_TOPIC = str(_cfg.get("ros2_odom_topic", "/dog_odom"))
@ -436,6 +456,14 @@ class Odometry:
try:
while time.time() - t0 < timeout:
if motion_abort.is_set():
print(f" [Odom] ⏸ Walk aborted by user (motion_abort)")
self._gradual_stop()
return False
wait_while_paused()
if motion_abort.is_set():
self._gradual_stop()
return False
with self._lock:
dx = self._x - start_x
dy = self._y - start_y
@ -474,6 +502,14 @@ class Odometry:
t0 = time.time()
try:
while time.time() - t0 < duration:
if motion_abort.is_set():
print(f" [Odom] ⏸ Time-based walk aborted by user")
self._gradual_stop()
return False
wait_while_paused()
if motion_abort.is_set():
self._gradual_stop()
return False
self._send_vel(vx=vx)
time.sleep(0.05)
except KeyboardInterrupt:
@ -543,6 +579,14 @@ class Odometry:
try:
while time.time() - t0 < timeout:
if motion_abort.is_set():
print(f" [Odom] ⏸ Turn aborted by user (motion_abort)")
self._gradual_stop()
return False
wait_while_paused()
if motion_abort.is_set():
self._gradual_stop()
return False
with self._lock:
current = self._heading
@ -577,6 +621,14 @@ class Odometry:
t0 = time.time()
try:
while time.time() - t0 < duration:
if motion_abort.is_set():
print(f" [Odom] ⏸ Time-based turn aborted by user")
self._gradual_stop()
return False
wait_while_paused()
if motion_abort.is_set():
self._gradual_stop()
return False
self._send_vel(vyaw=vyaw)
time.sleep(0.05)
except KeyboardInterrupt:

View File

@ -10,6 +10,7 @@ from API.llava_api import ask_patrol
from API.memory_api import mem, log_detection
from Brain.executor import move_step
from Core.config_loader import load_config
from Core.motion_state import motion_abort
# Persist patrol events to logs/navigation.log so a long unattended patrol
# session leaves a usable post-mortem audit (start/finish, every step's
@ -63,6 +64,13 @@ def patrol(duration_minutes: float = 0.0, alert_callback=None):
try:
while time.time() < end_time:
# User said 'stop' (or any other interrupt). Bail out cleanly
# — gradual_stop is in the finally block, so the robot
# decelerates safely. The patrol's outer 'duration' is just
# the upper bound; honouring abort means we exit early.
if motion_abort.is_set():
_plog("motion_abort set — ending patrol early", "warn")
break
step += 1
# ----- 1. YOLO PPE violations -----

198
Voice/_language_tables.py Normal file
View File

@ -0,0 +1,198 @@
"""
_language_tables.py load Config/language_tables.json once and cache.
All Voice/* modules that need vocabulary data import from here instead
of hardcoding tables. The JSON file is the single source of truth for
Arabic verbs / directions / units / numbers / English number words /
motion inverses / sequence-never-record list.
Loud failure on missing/malformed file (matches Marcus's 'no silent
config degradation' policy from earlier rounds).
Public API:
LANG = load() dict with all top-level sections
LANG["english_numbers"]["ones"] {word: int}
LANG["arabic_verbs"]["walking"] list of Arabic root strings
LANG["arabic_duals"]["2 steps"] list of dual-form strings
LANG["motion_inverses"] flat dict of pairs
LANG["sequence_never_record"]["canonicals"] list of canonical names
Convenience flatteners (build the inverse maps consumers usually want):
flat_arabic_verbs() {ar_root: en_gerund}
flat_arabic_directions() {ar_word: en_direction}
flat_arabic_units() {ar_unit: en_unit}
flat_arabic_duals() {ar_dual_word: 'N units'}
flat_arabic_conjunctions() {ar_conj: ' english_glue '}
flat_arabic_connectives() {ar_word: en_word}
"""
from __future__ import annotations
import json
import os
import sys
from typing import Dict, List
_PROJECT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
if _PROJECT_DIR not in sys.path:
sys.path.insert(0, _PROJECT_DIR)
_CONFIG_PATH = os.path.join(_PROJECT_DIR, "Config", "language_tables.json")
_CACHE: dict = {}
def load() -> dict:
"""Load and cache the language tables. Raises RuntimeError on
missing/malformed config voice processing is unusable without it,
so loud failure surfaces the problem at startup rather than during
a user demo."""
if _CACHE:
return _CACHE
if not os.path.isfile(_CONFIG_PATH):
raise RuntimeError(
"Config/language_tables.json missing at {} — voice motion "
"vocabulary cannot load without it. Restore from git or "
"rebuild from the documented schema.".format(_CONFIG_PATH)
)
try:
with open(_CONFIG_PATH, "r", encoding="utf-8") as f:
data = json.load(f) or {}
except Exception as e:
raise RuntimeError(
"Config/language_tables.json malformed: {}".format(e),
) from e
# Quick sanity — make sure the top-level keys we care about exist
required = (
"english_numbers", "arabic_numbers",
"english_fractions", "arabic_fractions",
"arabic_verbs", "arabic_directions", "arabic_units",
"arabic_duals", "arabic_conjunctions", "arabic_connectives",
"motion_inverses", "sequence_never_record",
)
missing = [k for k in required if k not in data]
if missing:
raise RuntimeError(
"Config/language_tables.json missing top-level sections: {}"
.format(missing)
)
_CACHE.update(data)
return _CACHE
def _invert_grouped(grouped: dict, drop_keys: tuple = ("_description",)) -> Dict[str, str]:
"""Helper: flip a {english_label: [arabic_word, ...]} dict into
{arabic_word: english_label}. Skips keys named in drop_keys (used
to skip the '_description' annotation in the JSON)."""
out: Dict[str, str] = {}
for en, ar_list in grouped.items():
if en in drop_keys:
continue
if not isinstance(ar_list, list):
continue
for ar in ar_list:
if isinstance(ar, str) and ar.strip():
out[ar] = en
return out
def flat_arabic_verbs() -> Dict[str, str]:
return _invert_grouped(load()["arabic_verbs"])
def flat_arabic_directions() -> Dict[str, str]:
return _invert_grouped(load()["arabic_directions"])
def flat_arabic_units() -> Dict[str, str]:
return _invert_grouped(load()["arabic_units"])
def flat_arabic_duals() -> Dict[str, str]:
return _invert_grouped(load()["arabic_duals"])
def flat_arabic_conjunctions() -> Dict[str, str]:
return _invert_grouped(load()["arabic_conjunctions"])
def flat_arabic_connectives() -> Dict[str, str]:
return _invert_grouped(load()["arabic_connectives"])
def english_numbers_ones() -> Dict[str, int]:
return {k: int(v) for k, v in load()["english_numbers"]["ones"].items()
if k != "_description"}
def english_numbers_tens() -> Dict[str, int]:
return {k: int(v) for k, v in load()["english_numbers"]["tens"].items()
if k != "_description"}
def english_numbers_scale() -> Dict[str, int]:
return {k: int(v) for k, v in load()["english_numbers"]["scale"].items()
if k != "_description"}
def english_numbers_glue() -> set:
return set(load()["english_numbers"]["glue"])
def arabic_number_literals() -> List[tuple]:
"""List of (arabic_word, integer_value) pairs, in declaration order.
Caller is expected to keep ordering (longest-first) when applying."""
out: List[tuple] = []
for entry in load()["arabic_numbers"].get("literals", []):
if isinstance(entry, list) and len(entry) == 2:
ar, val = entry
if isinstance(ar, str) and isinstance(val, int):
out.append((ar, val))
return out
def motion_inverses() -> Dict[str, str]:
return {k: v for k, v in load()["motion_inverses"].items()
if k != "_description"}
def sequence_never_record() -> set:
return set(load()["sequence_never_record"].get("canonicals", []))
def english_fractions_additive() -> Dict[str, float]:
"""English fraction words that COMBINE with a preceding integer
('3 and a half' 3 + 0.5)."""
return {k: float(v) for k, v in load()["english_fractions"]["additive"].items()
if k != "_description"}
def english_fractions_leading() -> Dict[str, float]:
"""English fraction words that STAND ALONE before a unit
('half a meter' 0.5 meter)."""
return {k: float(v) for k, v in load()["english_fractions"]["leading"].items()
if k != "_description"}
def arabic_fractions_additive() -> Dict[str, float]:
"""Arabic fraction words that COMBINE with a preceding digit
via و conjunction ('3 ونصف' 3.5)."""
return {k: float(v) for k, v in load()["arabic_fractions"]["additive"].items()
if k != "_description"}
def arabic_fractions_leading() -> Dict[str, float]:
"""Arabic fraction words STANDING ALONE before a unit
('نصف متر' 0.5 meter)."""
return {k: float(v) for k, v in load()["arabic_fractions"]["leading"].items()
if k != "_description"}
def arabic_unit_words() -> set:
"""Set of all Arabic unit words from arabic_units. Used by the
fraction parser to detect 'N <unit> ونصف' patterns."""
out = set()
for vals in load()["arabic_units"].values():
if isinstance(vals, list):
for v in vals:
if isinstance(v, str) and v.strip():
out.add(v)
return out

View File

@ -0,0 +1,276 @@
"""
canonical_normalizer.py translate Arabic / dialect motion phrasings
to canonical English form before regex matching.
Runs in the dispatcher pipeline AFTER normalise_numbers (which handles
spelled-out numbers in both languages) and BEFORE the parametric regex
scan. The output is English-shaped text the existing English parametric
regexes can match replacing the previous approach of maintaining 14+
dialect-specific Arabic regexes per command type.
Pipeline:
raw chunk _STATE_ECHO_RE strip _QUOTED_RE strip
_QUESTION_RE strip normalise_numbers
to_canonical_english HERE
English regex scan dispatch
Input/output examples:
أمشي خطوة واحدة. walk 1 step.
أمشي خطوتين. walk 2 steps.
لف يسار 90 درجة. turn left 90 degree.
أستدير يميناً 100 درجة. turn right 100 degree.
أمشي للأمام 5 خطوات. walk forward 5 steps.
أمشي للخلف 3 خطوات. walk backward 3 steps.
أرجع 3 خطوات. walk backward 3 steps.
أتي إليك. (unchanged bot_phrase fixed canonical)
Compound chains (مع conjunctions) are translated in place:
أمشي للخلف 3 خطوات، ثم أستدير يميناً 90 درجة.
walk backward 3 steps, then turn right 90 degree.
Idempotent for already-English text:
Walking forward 5 steps. Walking forward 5 steps.
Ambiguity note: tokens like 'left' / 'right' have surface ambiguity
('I left the room' vs 'turn left'). The current normalizer uses
context-free token substitution and accepts that risk same
behaviour as today's regex layer. A future memory + LiDAR-aware
disambiguation hook (using prior dialogue context and current
spatial state) belongs ABOVE this layer; see TODO at the bottom.
"""
from __future__ import annotations
import re
# ─── Vocabulary tables loaded from Config/language_tables.json ────
#
# Single source of truth for verb / direction / unit / dual / conj /
# connective dictionaries. Adding a new dialect = JSON edit. See
# Voice/_language_tables.py for the loader and flattening helpers.
from Voice._language_tables import (
flat_arabic_verbs, flat_arabic_directions, flat_arabic_units,
flat_arabic_duals, flat_arabic_conjunctions, flat_arabic_connectives,
)
# Verb roots: Arabic → English gerund. Loaded once at module import.
_AR_VERB = flat_arabic_verbs()
# Direction words: Arabic → English direction.
_AR_DIR = flat_arabic_directions()
# Units: Arabic → English unit (singular/plural preserved).
_AR_UNIT = flat_arabic_units()
# Dual forms: single Arabic word → 'N units' English target.
_AR_DUAL = flat_arabic_duals()
# Conjunctions for compound chains: Arabic → space-padded English.
_AR_CONJ = flat_arabic_conjunctions()
# Connectives (prepositions / determiners): Arabic → English.
_AR_CONNECTIVES = flat_arabic_connectives()
_HAS_ARABIC_RE = re.compile(r"[؀-ۿ]")
def _ar_word_sub(text: str, mapping: dict) -> str:
"""Substitute Arabic tokens from `mapping` keys to their English
values. Uses Arabic-LETTER boundary on both sides (NOT the full
Arabic block Arabic punctuation '،' / '؟' / '؛' is U+060C/061F/061B
and is INSIDE the Arabic block but should be treated as a boundary
so 'خطوات،' substitutes correctly). Letter range is U+0621-U+064A
(ء to ي); also include diacritics U+064B-U+065F as in-word so
'البَاب' (with fatha) isn't split.
Longest-first ordering resolves substring conflicts (e.g. match
'ألف' before its substring 'لف')."""
keys = sorted(mapping.keys(), key=len, reverse=True)
for ar in keys:
en = mapping[ar]
# In-word characters: Arabic letters + diacritics. Outside this
# class is a boundary (whitespace, ASCII, Arabic punctuation).
pattern = (
r"(?<![ء-يً-ٟ])"
+ re.escape(ar)
+ r"(?![ء-يً-ٟ])"
)
text = re.sub(pattern, en, text)
return text
def to_canonical_english(text: str) -> str:
"""Translate Arabic / dialect structural motion phrasings to
English canonical form so the existing English parametric regex
layer can match them.
Order of operations (each is idempotent on its output):
1. DUAL substitution (single-word counts: خطوتين 2 steps)
2. CONJ substitution (ثم then) done early so subsequent
passes don't accidentally treat conjunctions as words.
3. VERB substitution (أمشي walk, لف turn, ...)
4. DIR substitution (يمين right, ...)
5. UNIT substitution (خطوة step, متر meter, ...)
6. CONNECTIVES (إلى to, etc.)
7. Arabic comma English comma (preserves compound parsing)
8. Word-order fix: 'walk step 1' 'walk 1 step'
'walk forward step 1' 'walk forward 1 step'
9. Whitespace collapse.
Bails out early if the input contains no Arabic (idempotent for
pure English).
"""
if not text or not _HAS_ARABIC_RE.search(text):
return text
# 1. Dual forms first — these are single Arabic words that carry
# both count and unit ('خطوتين' = '2 steps').
text = _ar_word_sub(text, _AR_DUAL)
# 2. Conjunctions — translate before verbs so 'ثم' between two
# motions becomes ' then ' for the regex layer's compound parser.
text = _ar_word_sub(text, _AR_CONJ)
# 3. Verbs.
text = _ar_word_sub(text, _AR_VERB)
# 4. Directions.
text = _ar_word_sub(text, _AR_DIR)
# 5. Units.
text = _ar_word_sub(text, _AR_UNIT)
# 6. Connectives.
text = _ar_word_sub(text, _AR_CONNECTIVES)
# 7. Arabic comma → ASCII comma (so the English regex's compound
# detection sees the boundary).
text = text.replace("،", ",")
# 8. Word-order fix. After token substitution we may have
# 'walk [direction]? step 1' / 'walk [direction]? meter 2'
# which has the unit before the number. The English regex expects
# '[verb] [direction]? <number> <unit>'. Swap them.
text = re.sub(
r"\b(walk(?:ing)?|turn(?:ing)?|mov(?:e|ing))"
r"(\s+(?:forward|backward|back|left|right|around))?"
r"\s+(steps?|meters?|degrees?)"
r"\s+(\d+(?:\.\d+)?)",
r"\1\2 \4 \3",
text,
flags=re.IGNORECASE,
)
# 9a. Idiomatic post-fix: 'turning backward' (which is what we get
# from 'استدر للخلف' / 'لف ورا') should map to 'turning around'.
# 'turning backward' isn't a meaningful phrase in English motion
# vocab; the canonical is 'turn around' (180° rotation).
text = re.sub(
r"\bturn(?:ing)?\s+backward\b",
"turning around",
text,
flags=re.IGNORECASE,
)
# 9b. Double-direction collapse. After substitution we sometimes get
# 'walking backward backward 2 steps' because the Arabic verb
# ('أرجع' = walking-backward gerund) AND the explicit direction
# ('للخلف'/'لورا' = backward) both translated to 'backward'. Collapse
# repeated adjacent direction tokens so the parametric regex matches.
# Field-observed cases:
# 'أرجع للخلف خطوتين' → 'walking backward backward 2 steps' →
# 'walking backward 2 steps'
# 'أتقدم للأمام خطوة' → 'walking forward forward 1 step' →
# 'walking forward 1 step'
text = re.sub(
r"\b(forward|backward|back|left|right|around)(?:\s+\1)+\b",
r"\1",
text,
flags=re.IGNORECASE,
)
# 9c. Spin-in-place idiom — Arabic phrases for turning around oneself
# (`حول نفسي`, `حول نفسك`, `حوالين نفسي`, `حوالين نفسك`, `على نفسي`,
# `على نفسك`, `بنفسي`, `بنفسك`) all mean 360°/180° self-rotation.
# Translated to ' around ' so 'turning حول نفسي' becomes 'turning
# around' and the turn_around canonical regex catches it.
# Field case: user says 'لف حول نفسك' → Gemini replies 'أستدير حول
# نفسي' → after verb sub = 'turning حول نفسي' → after this pass =
# 'turning around' → dispatch turn_around. Without this, the bot
# phrase didn't dispatch and the robot stood still.
text = re.sub(
r"\s*(?:حوال?ين|حول|على)\s+(?:نفسي|نفسك|نفسه|نفسها|نفسنا)\b",
" around",
text,
)
text = re.sub(r"\s*ب(?:نفسي|نفسك|نفسه|نفسها|نفسنا)\b", " around", text)
# 9. Collapse extra whitespace introduced by substitutions.
text = re.sub(r"\s+", " ", text).strip()
return text
# ────────────────────────────────────────────────────────────────
# TODO (future) — AMBIGUITY DISAMBIGUATION HOOK
#
# 'left' / 'right' / 'forward' have surface ambiguity (direction vs
# past-tense / surname / general adverb). When LiDAR + dialogue memory
# are wired in, a higher-level resolver should sit ABOVE this module
# and decide: is the current chunk a motion intent or descriptive
# language? Inputs to that resolver could include:
# - command_history.json (recent commands — context for repeat/follow)
# - LiDAR snapshot (is the path clear? is something blocking?)
# - prior 1-2 Gemini turns (what was discussed? still on motion topic?)
# - mic energy + duration (long quiet vs decisive utterance)
# The resolver gates whether to call to_canonical_english at all OR
# returns the original text unchanged when context says 'this is chat,
# not motion'.
#
# Current behaviour: context-free token substitution. Same risk profile
# as today's regex layer.
# ────────────────────────────────────────────────────────────────
# Standalone smoke check
if __name__ == "__main__":
# Apply normalise_numbers FIRST to match production pipeline
from Voice.number_words import normalise_numbers as _nn
def pipeline(text):
return to_canonical_english(_nn(text))
cases = [
("أمشي خطوة واحدة.", "walk 1 step."),
("أمشي خطوتين.", "walk 2 steps."),
("لف يسار 90 درجة.", "turn left 90 degree."),
("أستدير يميناً 100 درجة.", "turn right 100 degree."),
("أمشي للأمام 5 خطوات.", "walk forward 5 steps."),
("أمشي للخلف 3 خطوات.", "walk backward 3 steps."),
("أرجع 3 خطوات.", "walk backward 3 steps."),
("أمشي خطوة 1.", "walk 1 step."),
("أستدير يساراً 1 خطوات.", "turn left 1 steps."),
("أمشي للخلف 3 خطوات، ثم أستدير يميناً 90 درجة.",
"walk backward 3 steps, then turn right 90 degree."),
("Walking forward 5 steps.", "Walking forward 5 steps."),
("Turning right 90 degrees.", "Turning right 90 degrees."),
("لف على اليمين.", "turn on right."),
# Spelled-out + structural together
("أستدير يساراً تسعين درجة.", "turn left 90 degree."),
("أمشي خمس خطوات.", "walk 5 steps."),
# Compound chain
("أمشي خطوة واحدة، ثم أستدير يميناً.",
"walk 1 step, then turn right."),
]
ok = bad = 0
for inp, exp in cases:
got = pipeline(inp)
success = got == exp
mark = "" if success else ""
if success: ok += 1
else: bad += 1
print(f" {mark} {inp!r:55s}")
print(f"{got!r}")
if not success:
print(f" expected: {exp!r}")
print(f"\n{ok}/{ok+bad} passed")

View File

@ -36,6 +36,16 @@ Stdin protocol (line-based):
"flush\n" drop mic buffer (echo prevention)
"frame:<base64-jpeg>\n" forward a camera frame to Gemini Live
(Marcus parent throttles to ~2 fps)
"state:<json>\n" motion state update from Marcus's worker.
JSON: {"event":"start|complete|interrupted|error",
"cmd":"<canonical>",
"elapsed_sec": float (optional),
"reason": "<text>" (error only)}.
Runner injects '[STATE-...] <cmd>' into the
live session as silent text context Gemini
reads it for honest answers to "what are you
doing?" but does NOT speak about it unprompted
(per persona Rule 9).
Env vars:
@ -107,6 +117,13 @@ _MIC_HOLDER: list = [] # [BuiltinMic] when active
_LATEST_FRAME_LOCK = threading.Lock()
_LATEST_FRAME: dict = {"bytes": None, "ts": 0.0}
# Pending motion-state updates to inject into Gemini Live as silent
# text context. The stdin watcher (running in a regular thread) drops
# parsed payloads here; an asyncio task in the main session loop
# drains them and calls session.send_realtime_input(text=...).
_STATE_LOCK = threading.Lock()
_STATE_PENDING: list = [] # list[str] of formatted '[STATE-...] ...' lines
def _stdin_watcher() -> None:
try:
@ -136,6 +153,38 @@ def _stdin_watcher() -> None:
with _LATEST_FRAME_LOCK:
_LATEST_FRAME["bytes"] = data
_LATEST_FRAME["ts"] = time.time()
elif line.startswith("state:"):
# Motion state update from Marcus's worker. Parse + format
# for Gemini Live; the actual session.send_realtime_input
# call happens in an asyncio task that drains _STATE_PENDING.
try:
payload = json.loads(line[len("state:"):])
except Exception:
continue
event = (payload.get("event") or "").strip().lower()
cmd = (payload.get("cmd") or "").strip()
if not event or not cmd:
continue
tag_map = {
"start": "[STATE-START]",
"complete": "[STATE-DONE]",
"interrupted": "[STATE-INTERRUPTED]",
"error": "[STATE-ERROR]",
"paused": "[STATE-PAUSED]",
"resumed": "[STATE-RESUMED]",
}
tag = tag_map.get(event)
if tag is None:
continue
msg = "{} {}".format(tag, cmd)
elapsed = payload.get("elapsed_sec")
if isinstance(elapsed, (int, float)):
msg += " ({:.1f}s)".format(float(elapsed))
reason = payload.get("reason")
if reason and event == "error":
msg += "{}".format(reason)
with _STATE_LOCK:
_STATE_PENDING.append(msg)
except Exception:
return
@ -429,6 +478,38 @@ async def _send_frame_loop(session, types_mod, done: asyncio.Event) -> None:
# Keep going — frames are best-effort.
async def _send_state_loop(session, done: asyncio.Event) -> None:
"""Drain _STATE_PENDING and inject each line into Gemini Live as
silent text context. This is what makes Gemini aware of the
robot's actual motion state — start/complete/interrupted/error
events from Marcus's motion worker. Persona Rule 9 instructs
Gemini to read these for context but NOT speak about them
unprompted; mention only when the user asks 'what are you doing?'.
Polls at 10 Hz which is plenty state events arrive at most a
few per second and they're tiny. send_realtime_input(text=...)
is fire-and-forget; we log+swallow errors and keep draining."""
while not done.is_set() and not _STOP_REQUESTED.is_set():
await asyncio.sleep(0.1)
with _STATE_LOCK:
if not _STATE_PENDING:
continue
pending = list(_STATE_PENDING)
_STATE_PENDING.clear()
for msg in pending:
try:
await session.send_realtime_input(text=msg)
log("info", f"STATE injected: {msg}")
except asyncio.CancelledError:
return
except Exception as e:
# Some Gemini Live SDK versions may not accept text on
# send_realtime_input. Log once and drop — motion still
# works fine, only the live-state-to-Gemini channel is
# missing. Marcus side and dispatcher are unaffected.
log("warn", f"state inject failed: {e}")
async def _receive_loop(session, speaker, recorder, sess: _Session, done: asyncio.Event) -> None:
loop = asyncio.get_event_loop()
last_recv = time.time()
@ -601,6 +682,7 @@ async def main_async() -> int:
asyncio.gather(
_send_mic_loop(session, types, mic, speaker, recorder, sess, done),
_send_frame_loop(session, types, done),
_send_state_loop(session, done),
_receive_loop(session, speaker, recorder, sess, done),
),
timeout=_SESSION_TIMEOUT,

View File

@ -211,6 +211,36 @@ class GeminiBrain:
return
self._send_stdin("frame:" + b64 + "\n")
def send_state(self, event: str, cmd: str,
elapsed_sec: Optional[float] = None,
reason: Optional[str] = None) -> None:
"""
Push a motion state update to the runner so Gemini Live can
track what the robot is actually doing. Events:
'start' worker dequeued cmd, brain about to run
'complete' brain returned and motion_abort was clear
'interrupted' brain returned but motion_abort was set
'error' brain raised an exception (`reason` set)
The runner formats this as '[STATE-...] <cmd>' and injects it
into the live session as silent text context. Best-effort
no-op if runner not yet started or stdin closed.
"""
proc = self._proc
if proc is None or proc.stdin is None:
return
if not event or not cmd:
return
payload = {"event": event, "cmd": cmd}
if elapsed_sec is not None:
payload["elapsed_sec"] = round(float(elapsed_sec), 2)
if reason:
payload["reason"] = str(reason)[:200]
try:
line = "state:" + json.dumps(payload, ensure_ascii=False) + "\n"
except Exception:
return
self._send_stdin(line)
def _send_stdin(self, line: str) -> None:
"""Serialised stdin write — multiple threads (frame sender + flush) can call safely."""
proc = self._proc

View File

@ -32,10 +32,12 @@ from __future__ import annotations
import logging
import os
import queue
import re
import sys
import threading
import time
from collections import deque
from difflib import SequenceMatcher
from logging.handlers import RotatingFileHandler
from typing import Callable, Optional
@ -47,6 +49,11 @@ if _PROJECT_DIR not in sys.path:
sys.path.insert(0, _PROJECT_DIR)
from Core.env_loader import PROJECT_ROOT
from Core.config_loader import load_config
from Core.motion_state import motion_abort, motion_pause
from Core.motion_log import log_motion as _log_motion
from Voice.number_words import normalise_numbers
from Voice.canonical_normalizer import to_canonical_english
from Voice.sequences import get_sequences
LOG_DIR = os.path.join(PROJECT_ROOT, "logs")
os.makedirs(LOG_DIR, exist_ok=True)
@ -95,18 +102,34 @@ import json as _json
def _load_instructions() -> dict:
"""Load Config/instruction.json. Raises RuntimeError if the file is
missing, empty, or has no actions voice dispatch is unusable
without it (every Gemini phrase gets rejected, robot never moves,
operator gets a confused user complaint hours later). Loud failure
surfaces in the brain init banner; quiet degradation does not."""
path = os.path.join(PROJECT_ROOT, "Config", "instruction.json")
try:
with open(path, "r", encoding="utf-8") as f:
return _json.load(f) or {}
data = _json.load(f) or {}
except FileNotFoundError as e:
raise RuntimeError(
"instruction.json missing at {} — voice dispatch cannot run "
"without it (this file holds every wake-word + bot phrase "
"the dispatcher matches against). Restore from git or "
"rebuild from Config/instruction.example.json.".format(path)
) from e
except Exception as e:
# Fail soft — empty tables mean the dispatch gate just rejects
# everything and is_running stays True. Better than a crash.
try:
log.error("instruction.json not loaded: %s", e)
except Exception:
pass
return {}
raise RuntimeError(
"instruction.json at {} is malformed JSON: {}".format(path, e)
) from e
actions = data.get("actions")
if not isinstance(actions, dict) or not actions:
raise RuntimeError(
"instruction.json at {} has no actions — every dispatch "
"would silently fail. Add at least one action with "
"canonical/user_phrases/bot_phrases.".format(path)
)
return data
_INSTRUCTIONS = _load_instructions()
@ -184,14 +207,179 @@ def _build_parametric_actions(data: dict) -> list:
return out
# ─── quoted-phrase stripper ─────────────────────────────────────
#
# When Gemini quotes the user's phrase back ("Sorry, I don't understand
# the phrase 'turn left'") the dispatcher would otherwise match the
# canonical name INSIDE the quoted string and false-fire. Strip every
# matched pair of quotes — straight, smart, Arabic — before scanning.
# Pattern handles ASCII '...' "..." plus typographic “…” ’…’ «…» 「…」.
_QUOTED_RE = re.compile(
r"\".*?\""
r"|\'.*?\'"
r"|“.*?”"
r"|.*?"
r"|«.*?»"
r"|「.*?」"
r"|『.*?』",
re.DOTALL,
)
# ─── STATE echo stripper ────────────────────────────────────────
#
# When the runner injects '[STATE-DONE] walk 1 steps (3.0s)' into
# Gemini Live as silent context, Gemini occasionally echoes that text
# back through its output transcription. A naive dispatcher scan
# would re-fire 'walk 1 steps' from the embedded canonical, creating
# a feedback loop (one voice command → 3-4× motions). This regex
# strips:
# '[STATE-START] <cmd>'
# '[STATE-DONE] <cmd> (1.5s)'
# '[STATE-INTERRUPTED] <cmd> (2.3s)'
# '[STATE-ERROR] <cmd> — <reason>'
# '[STATE-PAUSED] <cmd>' / '[STATE-RESUMED] <cmd>'
# The match consumes the bracketed tag and the canonical/duration
# tail, but stops at sentence boundaries (./!/?/؟/،) so a coexisting
# real motion phrase in the same chunk survives.
_STATE_ECHO_RE = re.compile(
r"\[STATE-(?:START|DONE|INTERRUPTED|ERROR|PAUSED|RESUMED)\]"
r"\s*[^.!?؟،,\n(]*" # canonical name; stops before sentence-end OR '('
r"(?:\s*\([^)]*\))?" # optional parenthesised duration like (1.5s)
r"(?:\s+—\s+[^.!?؟،\n]*)?", # optional ' — <reason>' suffix on errors
re.IGNORECASE,
)
# ─── question-clause stripper ───────────────────────────────────
#
# CRITICAL safety filter. Sentences ending in '?' or '؟' (Arabic
# question mark) are CLARIFICATION QUESTIONS Gemini asks the user —
# they are NEVER motion confirmations. Without this filter, a question
# like 'Do you mean turn left or walk one step forward?' would match
# BOTH 'turn left' and 'walk one step' canonicals inside, dispatching
# contradictory motions while Gemini thinks it's still waiting for
# clarification. Production failure observed at 09:13:57.
#
# Match: from previous sentence terminator (or string start) up to and
# including the question mark. Greedy on the left so consecutive
# questions are stripped one at a time and statements between them
# survive in the residual text.
_QUESTION_RE = re.compile(
r"(?:^|(?<=[.!؟?\n]))[^.!؟?\n]*[?؟]",
re.DOTALL,
)
# ─── sequence-name extraction ───────────────────────────────────
#
# Sequence canonicals carry a NAME ('save sequence as my-greet',
# 'play my-greet'). Gemini's bot_phrase contains it; we extract from
# the surrounding spoken text. Two extractor patterns:
# - 'as <name>' — for save: 'saved as my-greet'
# - 'play(ing)? <name>' / 'run(ning)? <name>' — for play
# Names follow the same shape Sequences validates: lowercase letters,
# digits, hyphens, underscores; 131 chars. Gemini occasionally adds
# a period; we strip trailing punctuation.
_SEQ_NAME_AS = re.compile(
r"\bas\s+(?:my\s+)?([a-z0-9][a-z0-9_-]{0,30})\b", re.I,
)
_SEQ_NAME_PLAY = re.compile(
r"\b(?:play(?:ing)?|run(?:ning)?|perform(?:ing)?|do(?:ing)?)\s+"
r"(?:my\s+|the\s+|sequence\s+)*"
r"([a-z0-9][a-z0-9_-]{0,30})\b",
re.I,
)
def _extract_sequence_name(text: str, mode: str) -> Optional[str]:
"""Pull a sequence name out of Gemini's spoken confirmation.
`mode` is 'as' (save) or 'play' (start_recording / play). Returns
None if no plausible name is found caller decides what to do."""
if not text:
return None
t = text.strip().rstrip(".!?,").strip()
rx = _SEQ_NAME_AS if mode == "as" else _SEQ_NAME_PLAY
m = rx.search(t)
if not m:
return None
name = m.group(1).strip().lower()
# Reject likely false positives — bare 'sequence' / 'recording' /
# 'this' shouldn't be a name.
if name in {"sequence", "recording", "macro", "this", "that",
"it", "the", "my"}:
return None
return name
# ─── memory helpers ─────────────────────────────────────────────
#
# Static map of pairwise inverses for fixed canonicals. Parametric forms
# (turn left N degrees ↔ turn right N degrees, walk forward N → walk
# backward N) are handled by _reverse_command via regex below.
# Non-reversible canonicals (wave, look around, follow me, patrol, stop,
# come here, stay here, go home, turn around) intentionally absent —
# _reverse_command returns None for those and the dispatcher logs &
# skips with no harm done.
# Pairs loaded from Config/language_tables.json::motion_inverses.
# Adding a new reversible-pair = JSON edit. See Voice/_language_tables.py.
from Voice._language_tables import motion_inverses as _load_motion_inverses
_REVERSE_PAIRS = _load_motion_inverses()
def _reverse_command(cmd: str) -> Optional[str]:
"""Return the inverse of `cmd` if it is reversible, else None.
Handles fixed pairs via _REVERSE_PAIRS and parametric forms via
regex (turn left N deg turn right N deg, walk forward N steps/m
walk backward N steps/m, default-direction walks become explicit
backward). Bare 'turn N degrees' (no direction) is treated as
self-inverse turning N degrees is geometrically reversible by
repeating it, but more importantly the brain accepts it unchanged.
"""
if not cmd:
return None
s = cmd.strip().lower()
if s in _REVERSE_PAIRS:
return _REVERSE_PAIRS[s]
m = re.match(r"^turn\s+(left|right)\s+(\d+(?:\.\d+)?)\s*deg(?:rees?)?$", s)
if m:
flip = "right" if m.group(1) == "left" else "left"
return "turn {} {} degrees".format(flip, m.group(2))
m = re.match(r"^turn\s+(\d+(?:\.\d+)?)\s*deg(?:rees?)?$", s)
if m:
return s # un-directional turn — self-inverse for the brain
m = re.match(
r"^walk\s+(forward|back|backward)\s+(\d+(?:\.\d+)?)\s*(steps?|meters?)$", s,
)
if m:
flip = "backward" if m.group(1) in ("forward",) else "forward"
return "walk {} {} {}".format(flip, m.group(2), m.group(3))
m = re.match(r"^walk\s+(\d+(?:\.\d+)?)\s*(steps?|meters?)$", s)
if m:
# Implicit-direction walk → defaults to forward in the brain, so
# the inverse is an explicit backward walk.
return "walk backward {} {}".format(m.group(1), m.group(2))
return None
def _format_param_template(template: str, groups: tuple) -> str:
"""Substitute $1, $2, … in a parametric command template with the
matching regex groups. None groups (optional captures that didn't
fire) are treated as empty strings; surrounding whitespace is
collapsed so 'turn 90 degrees' becomes 'turn 90 degrees'."""
collapsed so 'turn 90 degrees' becomes 'turn 90 degrees'.
Returns an EMPTY string if any captured numeric group is a literal
'0' (or '0.0') a 0-quantity motion is a no-op and the dispatcher
skips empty results. This protects against Gemini hallucinating
'turn 0 degrees' when the user didn't actually request a turn but
Gemini misheard 'صفر' or similar. Bug #3 in the 16:42:58 audit.
"""
out = template
for i, g in enumerate(groups, 1):
token = "$" + str(i)
if g is not None and re.match(r"^0+(?:\.0+)?$", str(g).strip()):
return "" # zero-quantity motion → discard
out = out.replace(token, "" if g is None else str(g))
out = re.sub(r"\s+", " ", out).strip()
return out
@ -399,6 +587,54 @@ class VoiceModule:
self._gemini_say_lock = threading.Lock()
self._gemini_say_last_chunk_at = 0.0
# CUMULATIVE per-turn text used by the motion dispatcher. Gemini
# frequently splits a single phrase across chunks ('Turning' /
# 'right.'), and a per-chunk scan would miss 'turning right'
# because neither chunk alone contains the full needle. We
# instead append every chunk to this string and re-scan the
# WHOLE accumulated text on each new chunk; the per-turn
# _fired_canons_this_turn set prevents the same motion from
# dispatching twice. Reset on turn_end.
self._gemini_turn_text = ""
# Short-term motion memory. Every REAL command dispatched to the
# brain is appended here (the memory canonicals 'repeat last' /
# 'reverse last' are NOT — only their resolved targets). Used
# by the dispatcher to handle 'do that again' / 'undo' triggers
# locally without bothering the brain. Cap = 10; older commands
# roll off automatically. PERSISTS to disk across Marcus
# restarts AND Gemini Live session reconnects so the user can
# ask 'what did you do last hour' without losing context (Bug
# #12 in the 14-bug audit).
self._command_history: deque = deque(maxlen=10)
self._history_path = os.path.join(
PROJECT_ROOT, "logs", "command_history.json",
)
self._load_command_history()
# ─── motion worker (async dispatch) ───────────────────────
#
# The dispatcher used to call self._on_command directly on the
# IPC reader thread. That thread blocks for the duration of
# the brain's motion (a 10-step walk = ~10s of dead silence
# for Gemini chunks). We now route every command through a
# Queue + dedicated worker thread:
# - dispatcher pushes commands and returns immediately
# - worker pulls FIFO and calls self._on_command serially
# - 'stop' has priority: it sets motion_abort (interrupts
# the running brain loop) AND drains the queue (cancels
# pending motions), then a stop is enqueued so the brain
# runs its proper stop handler.
# - worker tracks self._current_motion for status queries
# and clears motion_abort at the start of each new (non-
# stop) command so a stale set doesn't kill the next one.
self._motion_queue: "queue.Queue" = queue.Queue(maxsize=20)
self._motion_worker: Optional[threading.Thread] = None
self._current_motion: Optional[str] = None
self._current_motion_at: float = 0.0
# Sentinel used to politely shut the worker down on stop().
self._MOTION_SENTINEL = object()
self._running = False
self._thread = None
@ -558,11 +794,33 @@ class VoiceModule:
that prints once on turn_end (or sooner if the chunk ends
with sentence punctuation).
"""
# Motion side-channel — chunk-level so dispatch is fast.
try:
self._dispatch_gemini_bot(text)
except Exception:
pass
# Motion side-channel. Dispatch on the CUMULATIVE per-turn text
# so split chunks ('Turning' + ' right.') still match the full
# 'turning right' needle. We DEFER dispatch until the cumulative
# text settles at a sentence boundary (./!/?/؟) — otherwise a
# bare canonical like 'turn right' would fire on chunk 1
# ('Turning right') before chunk 2 (' 90 degrees.') brings in
# the parametric extension, and the robot would do BOTH a small
# turn and a 90° turn. A leftover-flush on turn_end catches the
# rare case where a phrase ends with no terminal punctuation.
#
# Word-boundary fix: Gemini Live often emits chunks WITHOUT
# leading whitespace (e.g. ["Turning right,", "then", "walking"]
# → naive `+=` produces 'Turning right,thenwalking' and breaks
# every \s+ in our regexes). Insert a space whenever the previous
# buffer doesn't already end in whitespace and the new chunk
# doesn't start with one. This mirrors what _flush_gemini_say
# does via " ".join(...) for the [Sanad] said: line.
if (self._gemini_turn_text
and not self._gemini_turn_text[-1:].isspace()
and text[:1] and not text[:1].isspace()):
self._gemini_turn_text += " "
self._gemini_turn_text += text
if self._gemini_turn_text.rstrip().endswith((".", "!", "?", "؟")):
try:
self._dispatch_gemini_bot(self._gemini_turn_text)
except Exception:
pass
with self._gemini_say_lock:
self._gemini_say_buf.append(text)
@ -574,11 +832,22 @@ class VoiceModule:
def _on_gemini_turn_end(self) -> None:
"""Flush any pending Gemini output chunks at turn boundary, and
reset the per-turn motion dedup set so the next userGemini
exchange starts fresh."""
reset the per-turn motion dedup state (fired-set + cumulative
text buffer) so the next userGemini exchange starts fresh.
IMPORTANT: also runs a final dispatch pass on the cumulative
text in case the last chunk ended without terminal punctuation
(rare Gemini usually finishes with '.', but the model can
occasionally emit a turn that ends mid-clause)."""
with self._gemini_say_lock:
self._flush_gemini_say_locked()
if self._gemini_turn_text.strip():
try:
self._dispatch_gemini_bot(self._gemini_turn_text)
except Exception:
pass
self._fired_canons_this_turn.clear()
self._gemini_turn_text = ""
def _flush_gemini_say_locked(self) -> None:
"""Caller MUST hold self._gemini_say_lock. Prints one [Sanad] said: line."""
@ -658,6 +927,47 @@ class VoiceModule:
if not text:
return
# Strip [STATE-...] markers we previously injected ourselves —
# Gemini Live often echoes them through its output transcription
# ('[STATE-DONE] walk 1 steps') and a naive scan picks up the
# canonical name inside, re-firing the motion. The state markers
# are NOT user intent; they're our own bookkeeping coming back.
# Pattern matches the tag, the optional " (X.Ys)" duration, and
# any optional " — reason" suffix on errors. Leaves the rest of
# the chunk intact so a real user-intent phrase coexisting with
# an echoed state event still gets dispatched.
text = _STATE_ECHO_RE.sub("", text)
# Strip quoted phrases. Gemini sometimes quotes the user's phrase
# back ('Sorry, I don't understand the phrase "turn left"') and
# the dispatcher would otherwise match the embedded canonical
# and false-fire. Quoted phrases are NEVER motion confirmations.
text = _QUOTED_RE.sub(" ", text)
# Strip question clauses. A clarification like 'Do you mean turn
# left or walk a step?' contains motion phrases but is NOT a
# confirmation — Gemini is awaiting an answer, not acting.
# Without this filter, both motions would dispatch contradictorily
# while Gemini thinks the robot is idle. (Production failure at
# 09:13:57 — Gemini said 'هل تقصد أن أستدير لليسار أم أمشي خطوة
# للأمام؟' and the dispatcher fired BOTH turn_left AND walk_forward_1.)
text = _QUESTION_RE.sub(" ", text)
if not text.strip():
return
# Normalise spelled-out numbers BEFORE the lower-cased scan so
# 'turn ninety degrees' / 'أستدير تسعين درجة' get matched by
# the parametric (\d+) regex. normalise_numbers is idempotent
# and word-boundary-aware ('someone' / 'often' are unaffected).
text = normalise_numbers(text)
# Translate Arabic / dialect structural motion phrasings to
# English canonical form so the existing English parametric
# regexes can match them. Replaces 14+ dialect-specific Arabic
# regexes with one principled translator. Idempotent on
# already-English input (passes through unchanged). See
# Voice/canonical_normalizer.py for the verb/dir/unit/dual
# dictionaries and the future memory+LiDAR disambiguation hook.
text = to_canonical_english(text)
# .lower() preserves character-by-character indexing for both
# ASCII and Arabic, so positions in `low` correspond to positions
# in `text` for ordering. Arabic patterns are stored as-is and
@ -691,6 +1001,12 @@ class VoiceModule:
continue
cmd_text = _format_param_template(template, m.groups())
if not cmd_text:
# Parametric matched but produced an empty command
# (e.g. 0-quantity rejection). Claim the span so the
# bare-canonical pass below doesn't double-match
# 'turning right' inside what was 'turning right 0
# degrees' — which would defeat the 0-rejection.
claimed_spans.append((j, end))
continue
matches.append((j, cmd_text, "param"))
claimed_spans.append((j, end))
@ -732,16 +1048,427 @@ class VoiceModule:
self._last_gemini_canon = cmd
self._last_gemini_dispatch_at = now
# Memory intercepts. 'repeat last' and 'reverse last' are
# synthetic canonicals — they NEVER reach the brain. Resolve
# them against _command_history and dispatch the resolved
# command instead. Empty history → log and skip silently
# (no-op rather than an awkward error). Non-reversible
# commands → log and skip.
resolved = cmd
if cmd == "repeat last":
if not self._command_history:
log.info("memory: 'repeat last' but history is empty — skip")
continue
resolved = self._command_history[-1]
log.info("memory: repeat → %r", resolved)
elif cmd == "reverse last":
if not self._command_history:
log.info("memory: 'reverse last' but history is empty — skip")
continue
last = self._command_history[-1]
resolved = _reverse_command(last)
if resolved is None:
log.info("memory: %r is not reversible — skip", last)
continue
log.info("memory: reverse %r%r", last, resolved)
# Push the dispatched (resolved) command to history. This
# means 'again' followed by 'again' replays the original
# motion; 'turn right' followed by 'undo' followed by 'undo'
# toggles right/left. Consistent with what users intuit.
# Persisted to disk so it survives reconnects and restarts.
self._command_history.append(resolved)
self._save_command_history()
log.info(
"dispatch (gemini-bot, %s): %s (heard: %r)",
label, cmd, text[:80],
label, resolved, text[:80],
)
_log_transcript("CMD-BOT", cmd)
if self._on_command:
_log_transcript("CMD-BOT", resolved)
# Pause/resume intercepts. These take effect IMMEDIATELY —
# not after the queue drains — because the user expects
# 'pause' to halt mid-walk right now, not after the brain
# finishes its current command. Setting motion_pause makes
# all interrupt-aware brain loops hold at zero velocity
# (see wait_while_paused in Core/motion_state.py). Resume
# clears the event so the running loop continues. We DO
# NOT enqueue these — the worker never sees them, the
# brain doesn't know about them, no [STATE-...] event is
# emitted (motion is still running, just paused).
if resolved == "pause motion":
motion_pause.set()
# Tag the state event with the motion that's actually
# being held (current_motion), not the literal 'pause
# motion' canonical — Gemini Rule 9 reads this to
# answer 'are you still moving?' honestly. Bug #13.
held = self._current_motion or "no motion in flight"
log.info("memory: motion_pause SET — holding %r", held)
try:
if (self._brain is not None
and self._stt.get("gemini_send_state_events", False)):
self._brain.send_state("paused", held)
except Exception:
pass
continue
if resolved == "resume motion":
was_paused = motion_pause.is_set()
if was_paused:
motion_pause.clear()
held = self._current_motion or "no motion in flight"
log.info("memory: motion_pause CLEARED — resuming %r", held)
else:
held = "(no pause was active)"
log.info("memory: resume requested but nothing paused")
try:
if (self._brain is not None
and self._stt.get("gemini_send_state_events", False)):
self._brain.send_state("resumed", held)
except Exception:
pass
continue
# ─── sequence-control intercepts ────────────────────
# These never reach the brain. They mutate the in-process
# Sequences singleton and (for play) push commands onto
# this same dispatcher path again.
if resolved == "start recording":
# Try to extract a pre-name from Gemini's text:
# 'recording your sequence as my-greet'.
preset_name = _extract_sequence_name(text, mode="as")
seqs = get_sequences()
rs = seqs.start_recording(name=preset_name)
log.info("sequence: start_recording(%r) -> %s",
preset_name, rs)
continue
if resolved == "save sequence":
# Name MUST come from the bot phrase ('saved as <name>').
save_name = _extract_sequence_name(text, mode="as")
seqs = get_sequences()
rs = seqs.save_recording(name=save_name)
log.info("sequence: save_recording(%r) -> %s",
save_name, rs)
if not rs.get("ok"):
try:
if (self._brain is not None
and self._stt.get("gemini_send_state_events", False)):
self._brain.send_state(
"error", "save sequence",
reason=rs.get("reason") or "save failed",
)
except Exception:
pass
continue
if resolved == "cancel recording":
seqs = get_sequences()
rs = seqs.cancel_recording()
log.info("sequence: cancel_recording -> %s", rs)
continue
if resolved == "play sequence":
play_name = _extract_sequence_name(text, mode="play")
seqs = get_sequences()
cmds = seqs.play(play_name) if play_name else None
if cmds is None:
# Two failure modes worth distinguishing for the
# operator AND for Gemini (which uses send_state
# to give honest answers when the user later asks
# 'did you do it?'). Surface both as state errors.
if not play_name:
reason = "no sequence name in your phrase"
else:
reason = "no sequence saved as %r" % play_name
log.warning("sequence: play(%r) failed — %s",
play_name, reason)
try:
if (self._brain is not None
and self._stt.get("gemini_send_state_events", False)):
self._brain.send_state(
"error", "play sequence",
reason=reason,
)
except Exception:
pass
else:
log.info("sequence: play(%r) — enqueueing %d commands",
play_name, len(cmds))
for sub in cmds:
self._enqueue_motion(sub)
continue
# Append to recording buffer if a session is active. We do
# this BEFORE enqueueing so a control flag flip mid-run
# doesn't change the captured macro shape. record_command
# silently skips control canonicals (stop / pause / etc.).
try:
get_sequences().record_command(resolved)
except Exception:
pass
# Enqueue for the motion worker — does NOT block here.
# Worker calls self._on_command(resolved, "en") when it
# picks the command up. 'stop' sets motion_abort + drains
# the queue inside _enqueue_motion before requeueing.
self._enqueue_motion(resolved)
# ─── command history persistence ──────────────────────
def _load_command_history(self) -> None:
"""Restore _command_history from disk on startup. Best-effort —
any read/parse error leaves history empty without crashing."""
try:
if not os.path.isfile(self._history_path):
return
with open(self._history_path, "r", encoding="utf-8") as f:
payload = _json.load(f)
cmds = payload.get("commands", [])
if isinstance(cmds, list):
for c in cmds[-10:]:
if isinstance(c, str) and c.strip():
self._command_history.append(c.strip())
log.info(
"command history restored: %d entries from %s",
len(self._command_history), self._history_path,
)
except Exception as e:
log.warning("could not load command history: %s", e)
def _save_command_history(self) -> None:
"""Write _command_history to disk after every dispatch.
Atomically (write to .tmp + rename) so a crash mid-write doesn't
corrupt the file."""
try:
os.makedirs(os.path.dirname(self._history_path), exist_ok=True)
tmp = self._history_path + ".tmp"
with open(tmp, "w", encoding="utf-8") as f:
_json.dump(
{"commands": list(self._command_history),
"saved_at": time.time()},
f, indent=2, ensure_ascii=False,
)
os.replace(tmp, self._history_path)
except Exception:
# Persistence failure is non-fatal — history still works
# in-memory for the current process lifetime.
pass
# ─── motion worker ────────────────────────────────────
def _motion_worker_loop(self) -> None:
"""
Pull commands FIFO from self._motion_queue and run them on the
brain serially. Clears motion_abort at the start of each new
(non-stop) command so a stale set from a previous interrupt
doesn't kill the next motion.
Also notifies Gemini via send_state(...) at start / complete /
interrupted / error so the model can give honest answers when
the user asks 'what are you doing?'.
A brain exception is logged and the worker keeps running;
the queue must drain even if one motion crashes.
"""
while self._running:
try:
item = self._motion_queue.get()
except Exception:
continue
if item is self._MOTION_SENTINEL:
log.info("motion worker received sentinel — exiting")
return
cmd = item
if not cmd:
continue
is_stop = (cmd.strip().lower() == "stop")
# For non-stop commands, clear any leftover abort flag from
# a previous interrupt so the brain doesn't immediately
# break out of the new motion's loop. Also clear any stale
# pause — a fresh command should not start in a paused
# state just because the previous command was paused at
# the moment it was aborted/stopped.
if not is_stop:
motion_abort.clear()
motion_pause.clear()
self._current_motion = cmd
self._current_motion_at = time.time()
# Tell Gemini what's starting (best effort — runner may not
# be ready yet, e.g. first command before connection).
try:
if self._brain is not None and self._stt.get(
"gemini_send_state_events", False):
self._brain.send_state("start", cmd)
except Exception:
pass
# Structured per-motion log — one line per START with the
# parsed direction/magnitude/unit. Pairs with the END line
# below to give a complete audit of what each command
# actually did. See logs/motion.log.
try:
_log_motion("start", cmd, source="voice")
except Exception:
pass
# Watchdog: if a brain motion call exceeds the configured
# max RUNNING duration, fire motion_abort so its polling
# loops break out (gradual_stop runs as part of the exit
# path). 'Running' excludes time spent in motion_pause —
# a user who deliberately holds for 90s shouldn't trip the
# watchdog. We use a poller thread that ticks 100ms only
# while motion_pause is clear; pause-aware. Skip for
# commands listed in motion_watchdog_skip_canonicals
# (e.g. 'patrol' has its own 5-minute outer duration).
# 0 disables.
wd_sec = float(self._stt.get("motion_watchdog_sec", 0.0))
wd_skip = set(self._stt.get(
"motion_watchdog_skip_canonicals", []
))
wd_canon = cmd.strip().lower().split()[0] if cmd.strip() else ""
wd_active = (wd_sec > 0 and wd_canon and
wd_canon not in wd_skip and not is_stop)
wd_stop_evt = threading.Event()
def _watchdog_loop(_cmd=cmd, _budget=wd_sec, _stop=wd_stop_evt):
# Tick every 100ms while motion is RUNNING (not paused
# and not aborted). When elapsed running-time exceeds
# budget, fire motion_abort. Exits when _stop is set
# by the worker (motion completed normally).
ticks_at_100ms = 0
budget_ticks = int(_budget / 0.1)
while not _stop.is_set():
if motion_abort.is_set():
return # worker / external abort already fired
if not motion_pause.is_set():
ticks_at_100ms += 1
if ticks_at_100ms >= budget_ticks:
log.warning(
"watchdog: %r exceeded %.1fs running time "
"— firing motion_abort", _cmd, _budget,
)
motion_abort.set()
return
_stop.wait(0.1)
wd_thread = None
if wd_active:
wd_thread = threading.Thread(
target=_watchdog_loop, daemon=True,
name="motion-watchdog",
)
wd_thread.start()
t0 = time.time()
had_error = False
err_reason = ""
try:
if self._on_command:
self._on_command(cmd, "en")
except Exception as e:
log.error("on_command error: %s", e, exc_info=True)
except Exception as e:
had_error = True
err_reason = str(e)
log.error("worker on_command(%r) failed: %s", cmd, e, exc_info=True)
finally:
if wd_thread is not None:
wd_stop_evt.set()
# Best-effort join — the watchdog wakes on its
# 100ms tick, so this returns within ~100ms.
wd_thread.join(timeout=0.3)
elapsed = time.time() - t0
# Determine outcome. Stop itself never reports interrupted
# (it IS the interrupt). For other commands, an abort flag
# set at any point during execution means we were halted.
if had_error:
outcome = "error"
elif is_stop:
outcome = "complete"
elif motion_abort.is_set():
outcome = "interrupted"
else:
outcome = "complete"
try:
if (self._brain is not None
and self._stt.get("gemini_send_state_events", False)):
if outcome == "error":
self._brain.send_state(
"error", cmd, elapsed_sec=elapsed, reason=err_reason,
)
else:
self._brain.send_state(
outcome, cmd, elapsed_sec=elapsed,
)
except Exception:
pass
log.info(
"motion worker: %s [%s] in %.2fs", cmd, outcome, elapsed,
)
# Structured per-motion END log — pairs with the START line
# above. Captures actual elapsed and outcome alongside the
# auto-parsed direction/magnitude/unit.
try:
_log_motion(
outcome, cmd, source="voice",
actual_sec=elapsed,
**({"reason": err_reason} if had_error else {}),
)
except Exception:
pass
self._current_motion = None
self._current_motion_at = 0.0
def _enqueue_motion(self, cmd: str) -> None:
"""
Queue a command for the motion worker. 'stop' has priority:
it sets motion_abort (interrupting the in-flight brain loop)
AND drains pending commands so they don't fire after the user
explicitly asked to halt. Non-stop commands are simply enqueued.
"""
if not cmd:
return
if cmd.strip().lower() == "stop":
# 1. Cut the in-flight motion immediately. Brain motion
# loops poll motion_abort.is_set() and break early.
# Also clear any pause — otherwise wait_while_paused
# would keep holding the brain inside its loop and the
# abort check wouldn't fire until pause clears.
motion_pause.clear()
motion_abort.set()
# 2. Drain pending — anything queued behind a stop is
# cancelled. We log the count for ops visibility.
drained = 0
try:
while True:
self._motion_queue.get_nowait()
drained += 1
except queue.Empty:
pass
if drained:
log.info("stop drained %d pending motion(s)", drained)
# 3. Enqueue the stop itself so brain.process_command runs
# its proper stop handler (gradual_stop, neutral pose).
try:
self._motion_queue.put_nowait("stop")
except queue.Full:
log.warning("motion queue full — stop dropped")
return
try:
self._motion_queue.put_nowait(cmd)
except queue.Full:
log.warning("motion queue full — dropping %r", cmd)
@property
def current_motion(self) -> Optional[str]:
"""The canonical command currently being executed by the brain,
or None if idle. Worker sets it before calling on_command and
clears it when the brain returns. Safe to read from any thread."""
return self._current_motion
# ─── start / stop ─────────────────────────────────────
@ -750,6 +1477,13 @@ class VoiceModule:
log.warning("VoiceModule already running")
return
self._running = True
# Spawn the motion worker FIRST so any early dispatch (e.g.
# from a quick wake-on-startup) has somewhere to go.
self._motion_worker = threading.Thread(
target=self._motion_worker_loop,
daemon=True, name="motion-worker",
)
self._motion_worker.start()
self._thread = threading.Thread(
target=self._voice_loop, daemon=True, name="voice",
)
@ -758,6 +1492,14 @@ class VoiceModule:
def stop(self):
self._running = False
# Wake the worker so it can see _running=False and exit.
try:
self._motion_queue.put_nowait(self._MOTION_SENTINEL)
except Exception:
pass
if self._motion_worker:
self._motion_worker.join(timeout=3)
self._motion_worker = None
if self._thread:
self._thread.join(timeout=5)
self._thread = None

307
Voice/number_words.py Normal file
View File

@ -0,0 +1,307 @@
"""
number_words.py convert spelled-out numbers to digits.
Used by Voice/marcus_voice.py to preprocess Gemini's spoken text
before the dispatcher scans it for parametric motion phrases. Reason:
some Gemini Live voices occasionally speak 'ninety degrees' instead
of '90 degrees' even when the persona prompt asks for digits, and
the parametric regexes only recognise digits.
Scope (intentional):
- English compound numbers up to 999 ('three hundred sixty', 'one
hundred and eighty', 'forty-five').
- Arabic ones/tens/hundreds with و-conjunction ('تسعون', 'تسعين',
'مائة وثمانون', 'ثلاثمائة وستون', 'خمس', 'خمسة').
- Word-boundary-aware so it does NOT eat the substring 'one' inside
'someone' or 'thirty' inside 'thirtysomething'.
Out-of-scope:
- Decimals ('two point five').
- Numbers above 999 (motion vocab maxes around 360 degrees / 30
meters / 100 steps; 999 is plenty of headroom).
- Ordinals ('first', 'الأول') those don't appear in motion text.
API:
>>> normalise_numbers("Walking forward five steps.")
'Walking forward 5 steps.'
>>> normalise_numbers("أستدير تسعين درجة")
'أستدير 90 درجة'
>>> normalise_numbers("turn three hundred sixty degrees")
'turn 360 degrees'
"""
from __future__ import annotations
import re
# ─── English ─────────────────────────────────────────────────────
#
# Number-word tables loaded from Config/language_tables.json. Adding
# a new word (e.g. 'twenty-five' as a single hyphenated token, or a
# new dialectal form) is a JSON edit, not a Python change.
from Voice._language_tables import (
english_numbers_ones, english_numbers_tens,
english_numbers_scale, english_numbers_glue,
)
_EN_ONES = english_numbers_ones()
_EN_TENS = english_numbers_tens()
_EN_SCALE = english_numbers_scale()
_EN_GLUE = english_numbers_glue()
_EN_NUMBER_TOKEN = (
set(_EN_ONES.keys())
| set(_EN_TENS.keys())
| set(_EN_SCALE.keys())
| _EN_GLUE
)
def _en_words_to_int(tokens: list) -> int:
"""Convert a list of English number-word tokens to an integer.
Tokens are lowercase and stripped. Glue ('and', '-') is ignored.
Returns the parsed int. Caller guarantees tokens are all in
_EN_NUMBER_TOKEN."""
total = 0
current = 0
for t in tokens:
if t in _EN_GLUE:
continue
if t in _EN_ONES:
current += _EN_ONES[t]
elif t in _EN_TENS:
current += _EN_TENS[t]
elif t in _EN_SCALE:
current = max(current, 1) * _EN_SCALE[t]
total += current
current = 0
return total + current
# Match a maximal run of English number-word tokens. Word-boundary on
# each side so we don't eat 'one' inside 'someone' or 'ten' inside
# 'often'. Allow hyphens between (twenty-five) and 'and' between scales.
_EN_RUN = re.compile(
r"(?:(?<=^)|(?<=[\s\.,!?;:\"\'\(\)\[\]\{\}\-]))"
r"((?:" + "|".join(re.escape(w) for w in sorted(_EN_NUMBER_TOKEN, key=len, reverse=True))
+ r")(?:[\s\-]+(?:" + "|".join(re.escape(w) for w in sorted(_EN_NUMBER_TOKEN, key=len, reverse=True))
+ r"))*)"
r"(?=$|[\s\.,!?;:\"\'\(\)\[\]\{\}\-])",
re.IGNORECASE,
)
def _normalise_english(text: str) -> str:
def _sub(match):
run = match.group(1)
# Tokenise the run, splitting on whitespace and hyphens.
toks = [t for t in re.split(r"[\s\-]+", run.lower()) if t]
# If the run is JUST a single glue word ('and'), don't replace.
digit_toks = [t for t in toks if t not in _EN_GLUE]
if not digit_toks:
return run
# If the run is exactly one ones-word and the surrounding context
# likely doesn't refer to a count (e.g. 'one' in 'one of the
# things'), still replace — it's safer to over-digitize for the
# parametric scan than under-digitize. The dispatcher only fires
# when the digit appears in a parametric pattern context anyway.
try:
value = _en_words_to_int(digit_toks)
except Exception:
return run
return str(value)
return _EN_RUN.sub(_sub, text)
# ─── Arabic ──────────────────────────────────────────────────────
#
# Arabic spelled-out numbers. The list lives in
# Config/language_tables.json under arabic_numbers.literals. Adding
# new dialectal variants (Egyptian / Maghrebi specific forms) is a
# JSON edit, not a Python change. Order in the JSON is preserved here
# (longest-first so multi-token phrases match before their prefixes).
from Voice._language_tables import arabic_number_literals
_AR_LITERALS = arabic_number_literals()
def _normalise_arabic(text: str) -> str:
# Apply longest first so multi-word values claim before singles.
sorted_lits = sorted(_AR_LITERALS, key=lambda p: len(p[0]), reverse=True)
for word, value in sorted_lits:
# Substring replacement is safe for Arabic here because our
# literals are full Arabic tokens with characters outside
# English wordspace; no risk of eating into other words. Still
# use word boundaries (whitespace OR start/end OR punctuation)
# to avoid eating 'خمس' inside 'خمسة'.
pattern = (
r"(?<![ء-ي])"
+ re.escape(word)
+ r"(?![ء-ي])"
)
text = re.sub(pattern, str(value), text)
return text
# ─── Public ─────────────────────────────────────────────────────
def normalise_numbers(text: str) -> str:
"""Convert spelled-out numbers to digits in `text`. Idempotent —
running it twice produces the same result. Returns the original
text unchanged if it contains no recognisable number words.
Order: integer-words pass FIRST (so 'three and a half' becomes
'3 and a half' before fraction handling), then fractions ('3 and
a half''3.5'). Without that order the fraction pass wouldn't
see digits to attach to."""
if not text:
return text
out = _normalise_english(text)
out = _normalise_arabic(out)
out = _apply_fractions(out)
return out
# ─── Fraction parser (English + Arabic) ──────────────────────────
#
# Tables loaded from Config/language_tables.json. Two flavours:
# - additive : combines with a preceding digit ('3 and a half'
# / '3 ونصف' → 3.5). Includes a special-case
# handler for Arabic 'N <unit> ونصف' where the
# fraction trails after the unit noun.
# - leading : standalone before a unit ('half a meter' / 'نصف
# متر' → 0.5 meter / 0.5 متر).
from Voice._language_tables import (
english_fractions_additive, english_fractions_leading,
arabic_fractions_additive, arabic_fractions_leading,
arabic_unit_words,
)
_EN_FRAC_ADD = english_fractions_additive()
_EN_FRAC_LEADING = english_fractions_leading()
_AR_FRAC_ADD = arabic_fractions_additive()
_AR_FRAC_LEADING = arabic_fractions_leading()
_AR_UNITS = arabic_unit_words()
def _fmt_decimal(v: float) -> str:
"""Format a float without trailing zeros: 3.0 → '3', 3.5 → '3.5',
1.79 '1.79'."""
if v == int(v):
return str(int(v))
s = "{:.4f}".format(v).rstrip("0").rstrip(".")
return s
def _apply_fractions(text: str) -> str:
"""Convert fractional expressions to decimal digits.
Patterns handled (all idempotent leaves digit-only text alone):
EN '3 and a half steps' / '3 and half' '3.5 steps'
EN 'half a meter' / 'half meter' '0.5 meter'
EN 'a quarter step' '0.25 step'
AR '3 ونصف خطوات' '3.5 خطوات'
AR '3 خطوات ونصف' (trailing fraction) '3.5 خطوات'
AR 'نصف متر' '0.5 متر'
AR 'متر ونصف' (no preceding count) '0.5 متر' (interpreted as 1.5
if a leading 1 is implicit; we
treat as 0.5 explicit form
'1 ونصف متر' is preferred).
"""
if not text:
return text
# ── ENGLISH additive: '<N> and (a/an)? <frac>' ─────────────────
en_add_alt = "|".join(re.escape(k) for k in sorted(_EN_FRAC_ADD.keys(), key=len, reverse=True))
if en_add_alt:
def _en_add(m):
n = float(m.group(1))
frac_word = m.group(2).lower()
return _fmt_decimal(n + _EN_FRAC_ADD[frac_word])
text = re.sub(
r"\b(\d+(?:\.\d+)?)\s+and\s+(?:an?\s+)?(" + en_add_alt + r")\b",
_en_add, text, flags=re.IGNORECASE,
)
# ── ENGLISH leading: '(a/an)? <frac> (a/an)? <noun>' ───────────
en_lead_alt = "|".join(re.escape(k) for k in sorted(_EN_FRAC_LEADING.keys(), key=len, reverse=True))
if en_lead_alt:
def _en_lead(m):
frac_word = m.group(1).lower()
return _fmt_decimal(_EN_FRAC_LEADING[frac_word]) + " "
text = re.sub(
r"\b(?:an?\s+)?(" + en_lead_alt + r")\s+(?:an?\s+)?(?=[A-Za-z])",
_en_lead, text, flags=re.IGNORECASE,
)
# ── ARABIC trailing-fraction with unit: 'N <unit> ونصف' ─────────
if _AR_FRAC_ADD and _AR_UNITS:
unit_alt = "|".join(re.escape(u) for u in sorted(_AR_UNITS, key=len, reverse=True))
ar_add_alt = "|".join(re.escape(k) for k in sorted(_AR_FRAC_ADD.keys(), key=len, reverse=True))
def _ar_trail(m):
n = float(m.group(1))
unit = m.group(2)
frac_word = m.group(3)
return "{} {}".format(_fmt_decimal(n + _AR_FRAC_ADD[frac_word]), unit)
text = re.sub(
r"(\d+(?:\.\d+)?)\s+(" + unit_alt + r")\s+و(" + ar_add_alt + r")\b",
_ar_trail, text,
)
# ── ARABIC additive: 'N ونصف' / 'N و نصف' ──────────────────────
if _AR_FRAC_ADD:
ar_add_alt = "|".join(re.escape(k) for k in sorted(_AR_FRAC_ADD.keys(), key=len, reverse=True))
def _ar_add(m):
n = float(m.group(1))
frac_word = m.group(2)
return _fmt_decimal(n + _AR_FRAC_ADD[frac_word])
text = re.sub(
r"(\d+(?:\.\d+)?)\s*و\s*(" + ar_add_alt + r")\b",
_ar_add, text,
)
# ── ARABIC leading: '<frac> <noun>' ────────────────────────────
if _AR_FRAC_LEADING:
ar_lead_alt = "|".join(re.escape(k) for k in sorted(_AR_FRAC_LEADING.keys(), key=len, reverse=True))
def _ar_lead(m):
frac_word = m.group(1)
return _fmt_decimal(_AR_FRAC_LEADING[frac_word]) + " "
# Boundary: previous char is non-Arabic-letter, next char is
# an Arabic letter (so 'نصف' inside another word is preserved).
text = re.sub(
r"(?<![ء-يً-ٟ])(" + ar_lead_alt + r")\s+(?=[ء-ي])",
_ar_lead, text,
)
return text
# Standalone smoke check
if __name__ == "__main__":
cases = [
("Walking forward five steps.", "Walking forward 5 steps."),
("turn ninety degrees", "turn 90 degrees"),
("turn left ninety degrees.", "turn left 90 degrees."),
("turn three hundred sixty degrees", "turn 360 degrees"),
("turn one hundred and eighty deg", "turn 180 deg"),
("walk forty-five meters", "walk 45 meters"),
("أستدير تسعين درجة", "أستدير 90 درجة"),
("لف يمين تسعون درجة", "لف يمين 90 درجة"),
("أمشي خمس خطوات", "أمشي 5 خطوات"),
("أستدير مائة وثمانين درجة", "أستدير 180 درجة"),
("turn 90 degrees", "turn 90 degrees"), # already digits
("I see someone there.", "I see someone there."), # 'one' inside 'someone' must NOT be eaten
("often", "often"), # 'ten' inside 'often' must NOT be eaten
]
ok = bad = 0
for inp, expected in cases:
got = normalise_numbers(inp)
mark = "" if got == expected else ""
if got == expected: ok += 1
else: bad += 1
print(f" {mark} {inp!r:45s} -> {got!r}")
print(f"\n{ok}/{ok+bad} passed")

251
Voice/sequences.py Normal file
View File

@ -0,0 +1,251 @@
"""
sequences.py user-defined motion macros (record / save / play).
A sequence is a named list of canonical motion commands. Saved as JSON
under `Data/Sequences/<name>.json`. Created via voice during a recording
session (the user dictates a series of motions, then names the macro);
played back later by the same name. Sequence content is the SAME format
the dispatcher already enqueues strings like 'turn left 90 degrees',
'walk forward 5 steps', 'sit down' so playback simply re-enqueues
each one through the existing motion worker.
State model:
IDLE no recording in progress.
RECORDING(name) capturing dispatched commands into a buffer.
The optional `name` is set if the user pre-named
the sequence ('start recording as my-greet'); if
not, the user must name it on save.
PLAYING(name, queue) currently re-issuing commands from `queue`.
(Tracked just for status; the work is done by
the motion worker, not this module.)
Threading:
All state mutations are guarded by `_lock`. The recording buffer is
appended from the dispatcher thread; saved/loaded from any thread.
Public API (used by Voice/marcus_voice.py):
Sequences() singleton
seq.start_recording(name=None) begin capture
seq.cancel_recording() drop without saving
seq.save_recording(name) finalise + write JSON
seq.record_command(canonical) append to buffer if recording
seq.play(name) -> list[str] | None return commands to enqueue
seq.list() -> list[str] saved names
seq.delete(name) -> bool remove file
seq.recording_state() -> dict for status / persona feedback
"""
from __future__ import annotations
import json
import os
import re
import threading
import time
from typing import Optional, List
# Resolve Data/Sequences/ off PROJECT_ROOT so this module works whether
# Marcus is run from the repo root or invoked via systemd. PROJECT_ROOT
# is set by Core/env_loader.
try:
from Core.env_loader import PROJECT_ROOT # type: ignore
except Exception:
PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
_SEQUENCE_DIR = os.path.join(PROJECT_ROOT, "Data", "Sequences")
_NAME_RE = re.compile(r"^[a-z0-9][a-z0-9_-]{0,30}$", re.I)
# Skip these canonicals when capturing — control commands, not motions.
# Loaded from Config/language_tables.json::sequence_never_record.canonicals.
# Adding a new control canonical that should be excluded from recordings
# is a JSON edit, not a Python change.
from Voice._language_tables import sequence_never_record as _load_never_record
_NEVER_RECORD = _load_never_record()
def _safe_name(name: str) -> Optional[str]:
"""Normalise + validate a sequence name. Returns None if invalid.
Forces lower-case so 'My-Greet' and 'my-greet' refer to the same
file voice transcripts often vary capitalisation."""
if not name:
return None
n = name.strip().lower().replace(" ", "-")
if not _NAME_RE.match(n):
return None
return n
class Sequences:
def __init__(self):
self._lock = threading.Lock()
self._recording: bool = False
self._record_name: Optional[str] = None
self._record_buf: List[str] = []
self._record_started_at: float = 0.0
os.makedirs(_SEQUENCE_DIR, exist_ok=True)
# ─── recording ─────────────────────────────────────────
def start_recording(self, name: Optional[str] = None) -> dict:
"""Begin capturing. If `name` provided + valid, sequence is
pre-named (save_recording can be called with no arg). Returns
a small dict suitable for logging / state-injection."""
with self._lock:
if self._recording:
return {"ok": False, "reason": "already recording",
"name": self._record_name}
self._recording = True
self._record_name = _safe_name(name) if name else None
self._record_buf = []
self._record_started_at = time.time()
return {"ok": True, "name": self._record_name}
def cancel_recording(self) -> dict:
with self._lock:
if not self._recording:
return {"ok": False, "reason": "not recording"}
n = self._record_name
count = len(self._record_buf)
self._recording = False
self._record_name = None
self._record_buf = []
self._record_started_at = 0.0
return {"ok": True, "name": n, "discarded": count}
def save_recording(self, name: Optional[str] = None) -> dict:
"""Persist the buffered commands as <name>.json. Resolves the
name in this order:
1. explicit `name` arg
2. name set by start_recording
3. None error.
Empty buffers are NOT saved (would produce a dud macro)."""
with self._lock:
if not self._recording:
return {"ok": False, "reason": "not recording"}
target = _safe_name(name) or self._record_name
if not target:
return {"ok": False, "reason": "no name supplied"}
if not self._record_buf:
self._recording = False
self._record_name = None
return {"ok": False, "reason": "empty sequence (nothing recorded)",
"name": target}
payload = {
"name": target,
"created_at": self._record_started_at,
"saved_at": time.time(),
"commands": list(self._record_buf),
}
path = os.path.join(_SEQUENCE_DIR, target + ".json")
try:
with open(path, "w", encoding="utf-8") as f:
json.dump(payload, f, indent=2, ensure_ascii=False)
except Exception as e:
return {"ok": False, "reason": "write failed: %s" % e}
count = len(self._record_buf)
self._recording = False
self._record_name = None
self._record_buf = []
self._record_started_at = 0.0
return {"ok": True, "name": target, "count": count, "path": path}
def record_command(self, canonical: str) -> bool:
"""Append a canonical to the recording buffer (if recording).
Returns True if appended, False otherwise. Skips control commands
(see _NEVER_RECORD)."""
if not canonical:
return False
with self._lock:
if not self._recording:
return False
c = canonical.strip()
if not c or c.lower() in _NEVER_RECORD:
return False
self._record_buf.append(c)
return True
# ─── playback ─────────────────────────────────────────
def play(self, name: str) -> Optional[List[str]]:
"""Load the sequence's commands. Caller is responsible for
enqueueing them through the motion worker. Returns None if the
sequence doesn't exist or is malformed."""
target = _safe_name(name)
if not target:
return None
path = os.path.join(_SEQUENCE_DIR, target + ".json")
if not os.path.isfile(path):
return None
try:
with open(path, "r", encoding="utf-8") as f:
payload = json.load(f)
except Exception:
return None
cmds = payload.get("commands")
if not isinstance(cmds, list) or not all(isinstance(c, str) for c in cmds):
return None
return [c for c in cmds if c.strip()]
def list(self) -> List[str]:
"""Sorted list of saved sequence names."""
try:
files = os.listdir(_SEQUENCE_DIR)
except FileNotFoundError:
return []
out = sorted(
os.path.splitext(f)[0]
for f in files
if f.endswith(".json") and _NAME_RE.match(os.path.splitext(f)[0])
)
return out
def delete(self, name: str) -> bool:
target = _safe_name(name)
if not target:
return False
path = os.path.join(_SEQUENCE_DIR, target + ".json")
if not os.path.isfile(path):
return False
try:
os.remove(path)
return True
except Exception:
return False
# ─── status ───────────────────────────────────────────
def recording_state(self) -> dict:
with self._lock:
return {
"recording": self._recording,
"name": self._record_name,
"count": len(self._record_buf),
"elapsed_sec": (time.time() - self._record_started_at
if self._recording else 0.0),
}
# Process-wide singleton — the dispatcher and the worker share one.
_INSTANCE: Optional[Sequences] = None
_INSTANCE_LOCK = threading.Lock()
def get_sequences() -> Sequences:
global _INSTANCE
with _INSTANCE_LOCK:
if _INSTANCE is None:
_INSTANCE = Sequences()
return _INSTANCE
# Standalone smoke check
if __name__ == "__main__":
s = Sequences()
print(s.start_recording("test-greet"))
print(s.record_command("turn right 45 degrees"))
print(s.record_command("wave hello"))
print(s.record_command("turn left 45 degrees"))
print(s.save_recording())
print("listed:", s.list())
print("play:", s.play("test-greet"))
print(s.delete("test-greet"))
print("after delete:", s.list())

15
logs/command_history.json Normal file
View File

@ -0,0 +1,15 @@
{
"commands": [
"move backward",
"resume motion",
"come here",
"turn right 90 degrees",
"walk 1 steps",
"stop",
"walk backward 20 steps",
"stop",
"stop",
"come here"
],
"saved_at": 1777353109.489595
}