Update 2026-07-04 23:27:55

This commit is contained in:
kassam 2026-07-04 23:27:56 +04:00
parent b71130dc50
commit da4f4c714b
58 changed files with 8478 additions and 558 deletions

View File

@ -12,6 +12,19 @@ SANAD_AUDIO_PROFILE=builtin
# DDS interface to the G1 firmware.
SANAD_DDS_INTERFACE=eth0
# --- Plugged/Bluetooth speaker volume (Anker/JBL) ---------------------------
# The volume slider drives ALL speaker types like SanadV3: the G1 chest over DDS
# (always) AND the active PulseAudio sink (plugged/BT). The container runs as root,
# so it needs the HOST pulse socket + cookie to reach a uid-1000 PulseAudio.
# One-time host setup (stable socket at boot, no stray dir):
# loginctl enable-linger unitree
# Override these only if your pulse runs under a different uid or cookie path:
SANAD_PULSE_DIR=/run/user/1000/pulse
PULSE_SERVER=unix:/run/user/1000/pulse/native
# Cookie location varies by distro; if plugged volume/output is silent, try
# /home/<user>/.config/pulse/cookie instead and mount it accordingly.
PULSE_COOKIE=/run/user/1000/pulse/cookie
# Conversation language (overrides the license `language` feature if set).
# e.g. ar, en, hi, ur, zh, ru, fr ... (any Gemini-supported language)
SANAD_LANGUAGE=

View File

@ -22,9 +22,12 @@ ENV DEBIAN_FRONTEND=noninteractive \
WORKDIR /app
# System deps: shared (audio) + P1 (PortAudio + a C toolchain so pyaudio's
# extension compiles on the slim base).
# extension compiles on the slim base) + iproute2 (`ip`).
# `ip` is REQUIRED for chest ('builtin') audio: voice/audio_io.py _find_g1_local_ip()
# runs `ip -4 -o addr` to find the host's 192.168.123.x address for the G1 chest-mic
# UDP multicast — without it the live voice subprocess crashes on FileNotFoundError.
RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates libsndfile1 alsa-utils pulseaudio-utils \
ca-certificates libsndfile1 alsa-utils pulseaudio-utils iproute2 \
portaudio19-dev libportaudio2 build-essential python3-dev \
&& rm -rf /var/lib/apt/lists/*

View File

@ -49,9 +49,16 @@ log = get_logger("pkg1.app")
PACKAGE = "P1"
PACKAGE_TITLE = "Sanad — Basic Communication (P1)"
# SPA tab ids (from dashboard/static/index.html) this package SHOWS.
# SanadV3 SPA tab ids (nav is <div class="tab" onclick="switchTab('X')">, content #tab-X).
# P1 (Basic Communication) shows only the comms tabs; everything else is hidden on /full.
P1_SPA_TABS = ["operations", "voice", "recordings", "settings"]
P1_SPA_HIDE = ["motion", "recognition", "temp", "controller", "terminal"]
P1_SPA_HIDE = ["motion", "controller", "navigation", "livemap", "mapeditor",
"recognition", "mask", "temp", "terminal"]
# Routers P1 does NOT mount — the SPA polls these; short-circuit client-side so
# /full raises no "Not Found" toasts for an unsold feature.
P1_UNMOUNTED = ["/api/nav", "/api/zones", "/api/recognition", "/api/controller",
"/api/motion", "/api/skills", "/api/macros", "/api/replay",
"/api/wake-phrases", "/api/live-voice", "/api/scripts", "/api/mask"]
def _safe(name, factory):
@ -147,25 +154,22 @@ _P1_WS = ["log_stream"]
def _tab_filter_snippet():
hide_ids = ",".join("#tab-%s" % t for t in P1_SPA_HIDE)
hide_words = ["motion", "recognition", "temperature", "controller",
"terminal", "replay", "macros", "zones", "places", "map", "tour"]
"""Hide non-P1 tabs/pills + short-circuit unmounted-router polls on /full.
The SanadV3 nav is <div class="tab" onclick="switchTab('X')">, so hide by the
stable onclick/id via CSS (applies before render, no DOMContentLoaded race)."""
import json as _json
css = ",".join(".tab[onclick*=\"switchTab('%s')\"],#tab-%s" % (t, t) for t in P1_SPA_HIDE) + ",#status-pills"
return (
"<style>%s{display:none!important}</style>"
"<script>window.SANAD_PACKAGE=%s;"
"document.addEventListener('DOMContentLoaded',function(){"
"var hideWords=%s;"
"var nav=document.querySelectorAll('button,a,li,.nav-item,[role=\"tab\"]');"
"nav.forEach(function(el){var t=(el.textContent||'').trim().toLowerCase();"
"if(t&&t.length<20&&hideWords.indexOf(t)>=0){el.style.display='none';}"
"var oc=(el.getAttribute&&el.getAttribute('onclick'))||'';"
"%s.forEach(function(h){if(oc.indexOf('tab-'+h)>=0||oc.indexOf(\"'\"+h+\"'\")>=0){el.style.display='none';}});"
"});});</script>"
% (hide_ids,
_json.dumps({"name": PACKAGE, "title": PACKAGE_TITLE, "tabs": P1_SPA_TABS}),
_json.dumps(hide_words),
_json.dumps(P1_SPA_HIDE))
"(function(){var B=%s,_f=window.fetch;"
"window.fetch=function(i,o){try{var u=(typeof i==='string')?i:(i&&i.url)||'',"
"p=u.replace(/^https?:\\/\\/[^/]+/,'');"
"for(var k=0;k<B.length;k++){if(p.indexOf(B[k])===0)"
"return Promise.resolve(new Response('{}',{status:200,headers:{'Content-Type':'application/json'}}));}"
"}catch(e){}return _f.apply(this,arguments);};})();</script>"
% (css, _json.dumps({"name": PACKAGE, "title": PACKAGE_TITLE, "tabs": P1_SPA_TABS}),
_json.dumps(P1_UNMOUNTED))
)
@ -276,8 +280,18 @@ def build_app():
"loaded": loaded, "failed": failed})
try:
html = index.read_text(encoding="utf-8")
inject = _tab_filter_snippet() + _widget_html()
html = html.replace("</body>", inject + "</body>", 1) if "</body>" in html else html + inject
# Filter (CSS hide + fetch shim) in <head> so the fetch override is set
# BEFORE the SPA's body scripts/pollers run; the widget goes at body end.
filt = _tab_filter_snippet()
if "</head>" in html:
html = html.replace("</head>", filt + "</head>", 1)
elif "</body>" in html:
html = html.replace("</body>", filt + "</body>", 1)
else:
html = filt + html
w = _widget_html()
if w:
html = html.replace("</body>", w + "</body>", 1) if "</body>" in html else html + w
return HTMLResponse(html)
except OSError as exc:
return JSONResponse({"error": "index.html unreadable: %s" % exc}, status_code=500)

View File

@ -32,6 +32,10 @@ services:
SANAD_VOICE_BRAIN: gemini
SANAD_AUDIO_PROFILE: "${SANAD_AUDIO_PROFILE:-builtin}" # builtin (chest) | plugged (USB/Anker)
SANAD_DDS_INTERFACE: "${SANAD_DDS_INTERFACE:-eth0}"
# Plugged/Bluetooth audio (Anker USB, JBL BT) route through the HOST
# PulseAudio server — chest ("builtin") uses DDS and needs none of this.
PULSE_SERVER: "${PULSE_SERVER:-unix:/run/user/1000/pulse/native}"
PULSE_COOKIE: "${PULSE_COOKIE:-/run/user/1000/pulse/cookie}" # root→uid-1000 PA auth
SANAD_LICENSE: /etc/sanad/sanad.lic
SANAD_PUBKEY: /etc/sanad/pubkey.ed25519
SANAD_LICENSE_BIND: "${SANAD_LICENSE_BIND:-0}" # 1 = enforce machine fingerprint
@ -40,6 +44,12 @@ services:
- "/dev/snd:/dev/snd" # USB/plugged audio (Anker) via ALSA/Pulse
volumes:
- "${SANAD_LICENSE_FILE:-./license/sanad.lic.example}:/etc/sanad/sanad.lic:ro"
# Plugged/Bluetooth audio (Anker/JBL): mount the host PulseAudio runtime dir
# (socket + auth cookie) so the container sets ANY sink's volume/output — the
# volume slider then drives chest (DDS) AND plugged (PA), exactly like SanadV3.
# Chest ("builtin") audio uses DDS only and needs none of this. One-time host
# setup for a stable boot-time socket: `loginctl enable-linger unitree`.
- "${SANAD_PULSE_DIR:-/run/user/1000/pulse}:/run/user/1000/pulse"
- "./data:/app/Sanad/data" # persist persona/recordings/config on host
# Bound license (SANAD_LICENSE_BIND=1) also needs the host machine-id so the
# in-container fingerprint matches the host — uncomment:

View File

@ -1,21 +1,21 @@
#!/usr/bin/env bash
# Refresh the vendored Sanad engine + sanad_pkg from a full monorepo checkout.
# P1 ships a SELF-CONTAINED copy of the Sanad source under ./vendor so the repo
# builds standalone. When Sanad/ changes upstream, run this from a checkout that
# has Project/Sanad + Project/Packages, then commit the updated ./vendor.
# Refresh the vendored engine + sanad_pkg from a full monorepo checkout.
# P1 ships a SELF-CONTAINED copy under ./vendor so the repo builds standalone.
# P1 now vendors from SanadV3 (the active engine — same evolved voice/audio/gemini
# as P2; P1 just mounts only its comms slice). Override with SANAD_SRC=/abs/path.
#
# ./sync_vendor.sh [/path/to/Project] # default: ../../ (Packages/.. = Project/)
# ./sync_vendor.sh [/path/to/G1] # default: ../../ (G1/Packages/.. = G1/)
#
# Excludes runtime data (recordings/audio/faces), Logs, caches, the 53M temp3d
# 3D viewer (P1 hides that tab), and tests — keeps code + config + dashboard SPA.
# Excludes runtime data (recordings/audio/faces), Logs, caches, the temp3d 3D
# viewer (P1 hides that tab), and tests — keeps code + config + dashboard SPA.
set -euo pipefail
HERE="$(cd "$(dirname "$0")" && pwd)"
PROJECT="${1:-$(cd "$HERE/../.." && pwd)}" # Packages/Sanad_Package_1 -> ../../ = Project/
SRC_SANAD="$PROJECT/Sanad"
PROJECT="${1:-$(cd "$HERE/../.." && pwd)}" # G1/Packages/Sanad_Package_1 -> ../../ = G1/
SRC_SANAD="${SANAD_SRC:-$PROJECT/Sanadv3}"
SRC_PKG="$PROJECT/Packages/sanad_pkg"
SRC_LIC="$PROJECT/Packages/licensing"
[ -d "$SRC_SANAD" ] || { echo "ERROR: no Sanad/ at $SRC_SANAD (pass the Project/ path)"; exit 1; }
[ -d "$SRC_SANAD" ] || { echo "ERROR: no engine at $SRC_SANAD (set SANAD_SRC=/path)"; exit 1; }
[ -d "$SRC_PKG" ] || { echo "ERROR: no sanad_pkg at $SRC_PKG"; exit 1; }
echo ">> vendoring Sanad engine from $SRC_SANAD"

136
vendor/Sanad/FEATURES.md vendored Normal file
View File

@ -0,0 +1,136 @@
# SanadV3 — Feature Catalog
Sanad is a bilingual (Arabic/English) humanoid receptionist/assistant on a
Unitree G1 (Jetson Orin NX, ROS 2 Foxy, Livox MID-360). This catalogs
**what's built today** (Part A) and **what's on the roadmap** (Part B).
---
# Part A — Current features (built & running)
Verified from the live subsystem registry (19 subsystems), dashboard tabs
(12), and API routers (22).
## 1. Voice & Conversation
- **Gemini live voice** — real-time bilingual AR/EN spoken conversation (native-audio model)
- **Offline brain** — local pipeline via `ollama` (`SANAD_VOICE_BRAIN=local`), no cloud
- **Wake phrases** — configurable wake-word manager
- **Typed replay** — type text, robot speaks it (with speaker-monitor capture)
- **Local TTS** — on-device text-to-speech engine
- **Prompt management** — edit the system prompt from the dashboard
- **Lip-sync** — mask mouth driven by TTS `MOUTH` markers
- **Barge-in** — interrupt speech (volume-scaled threshold)
## 2. Motion & Arm
- **35 arm actions** — 16 SDK built-ins + 19 custom JSONL motions
- **Macro record / playback** — capture and replay motion sequences
- **Teaching mode** — kinesthetic teach-and-repeat
- **Skills** — composed higher-level behaviors (skills.json)
- **Movement dispatch** — voice → motion (53 fixed + 10 parametric phrases, cooldown-gated)
- **Arm motion-block** — auto-inhibits arm moves while locomotion is active (safety interlock)
## 3. Locomotion
- **LocoClient + MotionSwitcher** — walk / pose control via Unitree SDK (eth0)
- **E-STOP** — dashboard kill button
- **Single Ctrl+C teardown** — one signal cleanly stops every subsystem (~2s)
## 4. LED Face Mask
- **Animated expressions** — neutral, smile, blink, look L/R, talk13, surprised, sad
- **Gestural-speaking events** — face reacts while speaking
- **Lip-sync** — mouth animates to speech
## 5. Vision & Recognition
- **Face recognition** — identify people via camera
- **Face gallery** — enroll/manage known faces
- **Zone gallery / zones** — visual zone recognition
- **Camera feed** — attached to the live voice subprocess (vision-in-the-loop)
## 6. Navigation (web_nav3 integration)
- **Live Map tab** — full embedded web_nav3 dashboard (set-pose, goals, bringup)
- **Navigation tab** — native canvas viewer (saved/live map, places, missions)
- **map_relay** — re-publishes the latched `/map` @1Hz so the map renders even when stationary
- **Saved maps** — load & view a pre-built `.db` (localize mode)
- **Places** — save named poses, one-click "Go"
- **Missions** — multi-waypoint routes (defined in web_nav3)
- **Cancel goal** — stop an active goal without tearing down bringup
- **SLAM** — RTABMap LiDAR-ICP, drift-corrected mapping/localization
## 7. Audio
- **Device manager** — sink/source selection, live refresh
- **Audio profiles** — builtin / anker / hollyland_builtin (auto-switch on plug/unplug)
## 8. Operations, System & Diagnostics
- **System control** — start/stop subsystems, status
- **Temperature monitor** — motor temps (live websocket stream)
- **Controller** — gamepad/teleop input
- **Web terminal** — shell in the browser (websocket)
- **Logs** — live log stream
- **Recordings & replay** — record/playback sessions
- **Scripts** — run saved scripts
## Dashboard infrastructure
- 12 tabs, **fault-isolated routers** (one broken module never breaks the dashboard)
- WebSocket streams: log_stream, motor_temps, terminal
- No-store HTML (no stale-cache 404s after deploy)
- Lazy subsystem imports (missing dep → that subsystem unavailable, rest runs)
---
# Part B — Roadmap (to add)
Tiers = priority. 🏗️ = load-bearing · ⚠️ = Foxy constraint.
## Tier 1 — Autonomous behaviors (the product)
1. **Voice-driven navigation** — "Sanad, go to the lobby" → nav goal
2. **Greeter mission** — recognized face → navigate → greet → express
3. **Named-person greeting** — identity → personalized line
4. **Patrol / guided tours** — ordered places, speech at each stop
5. **Return-to-base / dock-on-idle** — auto-home on idle/low battery
## Tier 2 — Navigation & map (harden + edit)
6. 🏗️ **Map republish relay** — ✅ DONE (map_relay)
7. **Click-to-goal on Nav tab canvas**
8. **Live nav telemetry** — distance/ETA/waypoint, "arrived" toast
9. **Battery + nav-state status bar**
10. **Geofence zones on the map**
11. **Cancel-goal button** — ✅ DONE
### Map editing & annotation (all build on #6)
12. **Erase tool** — paint cells free; wipe ghost obstacles + the SLAM "spokes"
13. **Obstacle paint ("black points" / virtual walls)** — ⚠️ Foxy-safe KeepoutFilter substitute
14. **Shape tools + brush size** — line/rectangle/polygon
15. **Non-destructive overlay + undo/redo**
16. **Persist & auto-reload edits per map**
17. **Crop / trim map bounds**
## Tier 3 — Voice & interaction
18. **Barge-in from dashboard**
19. **Quick-phrase soundboard**
20. **Conversation memory / visitor log**
21. **Per-speaker AR/EN auto-detect**
22. **Scheduled announcements**
23. **Bake edited map → PGM/YAML** (static map_server deploy)
## Tier 4 — Face & presence
24. **Gaze / head-track recognized face**
25. **Emotion-from-context** (sentiment → expression)
26. **Idle breathing / look-around**
27. **Lip-sync to TTS amplitude** (enhance existing markers)
## Tier 5 — Operator, fleet & reliability
28. 🏗️ **Global E-STOP button** — ✅ exists; surface consistently
29. **Health watchdog** — auto-restart dead subsystem + alert
30. **Per-subsystem enable/disable toggles**
31. **Behavior recorder → replay** (nav+voice timelines)
32. **Mission editor UI** (visual sequence builder)
33. **Remote access / tunnel**
34. **Reverse-proxy web_nav3 through :8001** — one origin, no iframe cross-port issues
## Tier 6 — Future / blocked
35. **Speed / caution zones** — needs Galactic SpeedFilter or custom layer
36. **Multi-robot fleet** (SanadV3 ↔ BotBrain) — needs LocoClient arbitration + coordinator
---
### Recommended next build order
**#1 voice→nav** → **#2 greeter mission** (the product), then **#12/#13 map editing**
(clean the spokes + virtual walls). #6 republish relay and #11 cancel are already done.

View File

@ -255,12 +255,19 @@ class LocoController:
period = max(0.02, min(0.1, self._wd_timeout / 2.0))
while not self._wd_stop.is_set():
fire = False
park = False
# Read-and-decide under the lock (atomic check-then-act); the actual
# StopMove runs after release so the critical section stays tiny.
with self._lock:
if self._teleop_active and (time.monotonic() - self._last_move_ts) > self._wd_timeout:
self._teleop_active = False
fire = True
# Self-park once there's nothing left to guard. The Gemini
# dispatch path uses step() directly and never calls
# disarm_movement(), so without this the watchdog would spin for
# the rest of the process lifetime after the first voice step.
if not self._armed and not self._teleop_active and not self._discrete_busy:
park = True
if fire:
log.warning("watchdog: teleop setpoint stale (>%.2fs) — StopMove",
self._wd_timeout)
@ -268,6 +275,12 @@ class LocoController:
self._raw_stop()
except Exception:
log.exception("watchdog StopMove failed")
if park:
# Nothing left to guard — stop the thread (a later move/step
# re-arms it via _start_watchdog()). Done AFTER any stale-stop
# above so we never skip a pending StopMove.
self._wd_stop.set()
break
self._wd_stop.wait(period)
def _raw_stop(self) -> bool:
@ -560,8 +573,21 @@ class LocoController:
# ── shutdown helper ──────────────────────────────────────────────────────
def shutdown(self):
"""Best-effort StopMove + disarm for process shutdown."""
"""Best-effort StopMove + disarm for process shutdown.
Uses _raw_stop() (NOT estop()) so teardown never builds a brand-new
LocoClient: estop() _ensure_client() would lazily construct a client
and run bot.Init() (a DDS RPC) during interpreter teardown when we were
armed-but-never-built (Enable movement clicked, never moved, then
Ctrl+C). _raw_stop() no-ops when no client was ever created. Bump the
stop generation so any in-flight motion bails immediately."""
with self._lock:
self._stop_gen += 1
self._teleop_active = False
self._cur_v = (0.0, 0.0, 0.0)
try:
self.estop()
self._raw_stop() # no-op when _bot is None — never re-inits
except Exception:
log.exception("StopMove on shutdown failed")
finally:
self.disarm_movement()

View File

@ -271,7 +271,9 @@ def _resolve_dashboard_host() -> str:
DASHBOARD_HOST = _resolve_dashboard_host()
DASHBOARD_PORT = 8000
# Canonical SanadV3 port (matches shell_scripts/start_all.sh + docs). The
# legacy Sanad ran on :8000; SanadV3 is :8001 to never collide with it.
DASHBOARD_PORT = 8001
# -- Local TTS --
LOCAL_TTS_MODEL = "MBZUAI/speecht5_tts_clartts_ar"
@ -365,6 +367,11 @@ LIVE_TUNE: dict[str, str] = {
CAMERA_SERVICE_PORT = 8091
DIRECT_CAMERA_URL = f"http://127.0.0.1:{CAMERA_SERVICE_PORT}"
# -- Navigation (web_nav3 / rosbridge) --
WEB_NAV3_URL = os.environ.get("WEB_NAV3_URL", "http://127.0.0.1:8765")
ROSBRIDGE_URL = os.environ.get("ROSBRIDGE_URL", "ws://127.0.0.1:9090")
NAV_ROBOT_NAME = os.environ.get("NAV_ROBOT_NAME", "sanad")
# -- DDS / hardware --
# Jetson G1 default is eth0 (the robot's internal network).
# Override with SANAD_DDS_INTERFACE=lo for desktop/sim development.

View File

@ -1,5 +1,6 @@
{
"_description": "Tunables for core/* modules. Loaded via core.config_loader.load('core').",
"brain": {
"allowed_callback_prefixes": [
"Project.Sanad.motion.",
@ -9,6 +10,7 @@
],
"gestural_speaking_default": false
},
"logger": {
"log_level": "INFO",
"format": "%(asctime)s [%(name)s] %(levelname)-7s %(message)s",
@ -16,9 +18,11 @@
"file_max_bytes": 10485760,
"file_backup_count": 7
},
"event_bus": {
"emit_timeout_sec": 0.5
},
"paths": {
"_comment": "Path roots — resolved against BASE_DIR in core/config.py",
"data": "data",
@ -29,6 +33,7 @@
"motion_recordings": "data/recordings/motion",
"motions": "data/motions"
},
"gemini_defaults": {
"_comment": "Baseline Gemini API config — SINGLE SOURCE OF TRUTH. All voice modules read from here.",
"api_key": "",
@ -38,23 +43,27 @@
"ws_timeout_sec": 30,
"default_system_prompt": "You are Bousandah, a wise and friendly Emirati assistant. Speak strictly in the UAE dialect (Khaleeji). Be helpful, concise, and use local greetings like 'Marhaba' and 'Ya Khoy'."
},
"g1_hardware": {
"_comment": "G1 humanoid hardware constants — shared by every motion/voice module that talks to the arm.",
"num_motor": 29,
"enable_arm_sdk_index": 29,
"replay_hz": 60.0
},
"script_files": {
"_comment": "Filenames (under scripts/) used across voice + dashboard",
"persona": "sanad_script.txt",
"rules": "sanad_rule.txt",
"arm_phrases": "sanad_arm.txt"
},
"dashboard_defaults": {
"host": null,
"port": 8000,
"interface": "wlan0"
},
"audio_defaults": {
"_comment": "Host PulseAudio fallback only — the G1 deployment uses UDP multicast mic + AudioClient.PlayStream speaker (see SANAD_USE_G1_MIC in config.py LIVE_TUNE). Default here is the Jetson/G1 built-in platform-sound chip.",
"send_sample_rate": 16000,
@ -64,9 +73,11 @@
"sink": "alsa_output.platform-sound.analog-stereo",
"source": "alsa_input.platform-sound.analog-stereo"
},
"dds": {
"network_interface_default": "eth0"
},
"camera": {
"_comment": "Recognition tab camera daemon (parent process reads this). width/height/fps/jpeg_quality + the reconnect knobs configure CameraDaemon. Frames are cached in memory and pushed to the Gemini child over its stdin (no file drop). send_hz/stale_ms are read by the Gemini child via SANAD_VISION_SEND_HZ / SANAD_VISION_STALE_MS env vars (LIVE_TUNE).",
"width": 424,
@ -80,6 +91,7 @@
"reconnect_max_s": 10.0,
"capture_timeout_ms": 5000
},
"faces": {
"_comment": "Face gallery for Gemini-side recognition. Folder layout: data/faces/face_{id}/{face_1.jpg, ...} + optional meta.json {\"name\": \"...\"}. Gemini does the matching — no local ML model.",
"dir_rel": "data/faces",

27
vendor/Sanad/config/mask_config.json vendored Normal file
View File

@ -0,0 +1,27 @@
{
"_comment": "Shining LED face mask (BLE). Driven by the FaceController subsystem (face/mask_face.py) which imports the standalone Mask project. Needs an env with bleak + Pillow (g1_env). Free the mask from the phone app before connecting.",
"mask_dir": "",
"_mask_dir": "Path to the Mask project (flat shiningmask lib). Empty -> auto: <Project>/Mask. Env override: SANAD_MASK_DIR.",
"name_prefix": "MASK",
"_name_prefix": "BLE scan prefix; the mask advertises e.g. 'MASK-02A711'. Env: SANAD_MASK_NAME_PREFIX.",
"address": "",
"_address": "Specific BLE MAC to connect to. Empty -> scan by name_prefix. Env: SANAD_MASK_ADDRESS.",
"adapter": "",
"_adapter": "BlueZ adapter (e.g. 'hci0'). Empty -> default. Env: SANAD_MASK_ADAPTER.",
"brightness": 95,
"_brightness": "0-128. Keep <=100 to avoid LED flicker (battery-limited).",
"fps": 8.0,
"_fps": "FaceAnimator (fallback driver) frame rate (PLAY commands/sec).",
"lifelike": true,
"_lifelike": "Use the LifelikeFace driver (face/face_motion.py): eye saccades, varied blinks, listening/thinking/speaking states, reactions, smooth lip-sync. false -> basic FaceAnimator.",
"autostart": true,
"_autostart": "Auto-connect + Start face on boot (best-effort, background — never blocks startup). After the one-time frame upload, later boots just connect + animate. false -> connect/start manually from the dashboard.",
"connect_timeout": 15.0,
"connect_attempts": 5,
"eye_color": [0, 230, 255],
"_eye_color": "Face eye/iris RGB (baked into the uploaded frames). Default cyan. Set via the dashboard 'Apply colors' (persisted here).",
"mouth_color": [255, 50, 50],
"_mouth_color": "Face mouth RGB. Default red.",
"sclera_color": [255, 255, 255],
"_sclera_color": "White-of-the-eye RGB. Default white."
}

71
vendor/Sanad/core/persona.py vendored Normal file
View File

@ -0,0 +1,71 @@
"""Active-persona selection — which script file Gemini loads as its system
prompt.
The operator can keep several persona variants in scripts/ (e.g.
``sanad_script.txt``, ``sanad_script_v1.txt``, ``sanad_script_v2.txt``) and pick
which one is live. The selection is a single basename stored in
``data/active_persona.txt``; the DEFAULT (and reset target) is always the
configured persona (``sanad_script.txt``). The Gemini child resolves this at
session start, so a new selection takes effect on the next voice (re)connect.
A missing/blank/stale pointer transparently falls back to the default, so this
can never break the voice worst case it loads ``sanad_script.txt``.
"""
from __future__ import annotations
from pathlib import Path
from Project.Sanad.config import DATA_DIR, SCRIPTS_DIR
ACTIVE_PERSONA_FILE = DATA_DIR / "active_persona.txt"
def default_persona_name() -> str:
"""The configured default persona filename (core.script_files.persona)."""
try:
from Project.Sanad.core.config_loader import section as _section
name = (_section("core", "script_files") or {}).get("persona")
return (name or "sanad_script.txt").strip() or "sanad_script.txt"
except Exception:
return "sanad_script.txt"
def active_persona_name() -> str:
"""Selected persona basename — the chosen variant if set AND still exists,
otherwise the default. Never raises."""
default = default_persona_name()
try:
sel = ACTIVE_PERSONA_FILE.read_text(encoding="utf-8").strip()
except Exception:
sel = ""
if sel:
cand = SCRIPTS_DIR / Path(sel).name # basename only — no traversal
if cand.is_file():
return cand.name
return default
def active_persona_path() -> Path:
"""Full path to the persona script Gemini should load right now."""
return SCRIPTS_DIR / active_persona_name()
def set_active_persona(name: str | None) -> str:
"""Persist the selected persona basename. Passing None/"" or the default
name clears the pointer (revert to default). Returns the effective active
name. Raises FileNotFoundError if a non-default name doesn't exist."""
nm = (Path(str(name)).name if name else "").strip()
default = default_persona_name()
if not nm or nm == default:
try:
ACTIVE_PERSONA_FILE.unlink()
except FileNotFoundError:
pass
except Exception:
pass
return default
if not (SCRIPTS_DIR / nm).is_file():
raise FileNotFoundError(nm)
DATA_DIR.mkdir(parents=True, exist_ok=True)
ACTIVE_PERSONA_FILE.write_text(nm, encoding="utf-8")
return nm

View File

@ -54,6 +54,9 @@ _REST_ROUTES: list[tuple[str, str, str]] = [
("zones", "/api/zones", "zones"),
("temp_monitor", "/api/temp", "temperature"),
("controller", "/api/controller", "controller"),
("mask", "/api/mask", "mask"),
("mask_social", "/api/mask", "mask-social"),
("navigation", "/api/nav", "navigation"),
]
_WS_ROUTES: list[str] = ["log_stream", "motor_temps", "terminal"]
@ -113,7 +116,13 @@ async def root():
if index.exists():
from fastapi.responses import HTMLResponse
try:
return HTMLResponse(index.read_text(encoding="utf-8"))
# no-store so the browser always re-fetches the dashboard HTML/JS
# after a deploy — otherwise stale cached JS keeps calling old
# endpoints (e.g. /nav/* instead of /api/nav/*) and 404s.
return HTMLResponse(
index.read_text(encoding="utf-8"),
headers={"Cache-Control": "no-store, must-revalidate"},
)
except OSError as exc:
return {"error": f"Could not read index.html: {exc}"}
return {

View File

@ -0,0 +1,66 @@
"""In-process arbitration between Nav2 (web_nav3) and the manual LocoController.
Both stacks can drive the G1's legs via different command paths:
- Nav2 (web_nav3) publishes cmd_vel from a navigation goal/mission.
- LocoController issues LocoClient.Move()/step() from the Controller tab and
Gemini movement dispatch.
The documented hazard is "two stacks must never both drive the legs at once".
This module is a tiny thread-safe gate that lets ONE commander own the legs at a
time. controller.py sets loco_active for arm/move/step and refuses when nav is
active; navigation.py sets nav_active for goto/missions/run and refuses when loco
is active. The E-STOP / cancel paths clear the relevant flag.
Pure in-process state (no DDS, no HTTP) both routers share this single module
instance, so the flags are coherent across the dashboard process.
"""
from __future__ import annotations
import threading
_lock = threading.Lock()
_loco_active = False
_nav_active = False
def loco_active() -> bool:
with _lock:
return _loco_active
def nav_active() -> bool:
with _lock:
return _nav_active
def acquire_loco() -> bool:
"""Claim the legs for manual loco. Returns False if Nav2 holds them."""
global _loco_active
with _lock:
if _nav_active:
return False
_loco_active = True
return True
def release_loco() -> None:
global _loco_active
with _lock:
_loco_active = False
def acquire_nav() -> bool:
"""Claim the legs for Nav2. Returns False if manual loco holds them."""
global _nav_active
with _lock:
if _loco_active:
return False
_nav_active = True
return True
def release_nav() -> None:
global _nav_active
with _lock:
_nav_active = False

View File

@ -160,7 +160,13 @@ async def audio_status():
"g1_speaker_muted": g1_muted,
"g1_current_volume": _g1_current_volume,
"g1_user_volume": _g1_user_volume,
"g1_available": _g1_audio_client is not None or (_g1_init_error == ""),
# Only report available once an AudioClient has actually been
# built — reporting True before any init attempt made the UI
# advertise G1 speaker controls that then 503 on first use.
# `g1_init_error` surfaces *why* it's unavailable (or "" if
# init was never attempted yet).
"g1_available": _g1_audio_client is not None,
"g1_init_error": _g1_init_error,
"sink": sink,
"source": source,
"current": cur,
@ -312,7 +318,9 @@ async def get_g1_volume():
"""
def _do():
return {
"available": _g1_audio_client is not None or (_g1_init_error == ""),
# True only after an AudioClient was actually constructed —
# `init_error` (below) explains an unavailable/never-tried state.
"available": _g1_audio_client is not None,
"current_volume": _g1_current_volume,
"user_volume": _g1_user_volume,
"muted": _g1_current_volume == 0,
@ -343,35 +351,48 @@ async def set_g1_volume(payload: G1VolumePayload):
if not 0 <= level <= 100:
raise HTTPException(400, "level must be 0..100")
# 1) G1 chest speaker (DDS) — best-effort so it works even when an
# external sink (JBL) is the active output.
code = None
client = _get_g1_audio_client()
if client is None:
raise HTTPException(
503,
f"G1 AudioClient unavailable: {_g1_init_error or 'unknown'}",
)
if client is not None:
try:
with _g1_audio_lock:
code = client.SetVolume(level)
_g1_current_volume = level
if level > 0:
# Only update the "preferred unmuted" level when the
# user is setting a non-zero volume. Setting 0 is a
# mute, which shouldn't overwrite their preference.
_g1_user_volume = level
except Exception as exc:
raise HTTPException(500, f"SetVolume failed: {exc}")
log.warning("G1 SetVolume failed: %s", exc)
if level > 0:
_g1_user_volume = level
# 2) The ACTIVE profile's PulseAudio sink (JBL / Anker / …). Target the
# RESOLVED sink from the saved selection, NOT @DEFAULT_SINK@ — the PA
# default can be a different sink (e.g. the chest platform-sound) even
# when the JBL is the selected output, so @DEFAULT_SINK@ would move the
# wrong sink and the slider would appear to do nothing on the JBL.
pa_applied = False
try:
sink = (ad.load_state() or {}).get("sink") or "@DEFAULT_SINK@"
_pactl(["set-sink-volume", sink, "%d%%" % level])
if level > 0:
_pactl(["set-sink-mute", sink, "0"])
pa_applied = True
except Exception as exc:
log.warning("PA set-sink-volume failed: %s", exc)
if client is None and not pa_applied:
raise HTTPException(503, "No speaker available (G1 + PulseAudio both failed)")
# Persist the user's preferred level (not the current) so a
# subsequent mute-then-restart restores to the preferred level
_save_persisted_g1_volume(_g1_user_volume)
log.info("G1 volume → %d (user_pref=%d, rc=%s)",
level, _g1_user_volume, code)
log.info("volume → %d (g1_rc=%s, pa=%s, user_pref=%d)",
level, code, pa_applied, _g1_user_volume)
return {
"ok": True,
"current_volume": level,
"user_volume": _g1_user_volume,
"muted": level == 0,
"return_code": code,
"pa_applied": pa_applied,
"persisted": True,
}
return await asyncio.to_thread(_do)
@ -471,6 +492,28 @@ async def apply_audio():
audio_mgr.refresh_devices()
except Exception:
pass
# Hot-swap the live Gemini voice to the selected profile too, so picking
# a device (e.g. the JBL) moves BOTH record playback AND the live voice
# to it — without dropping the session. Best-effort; no-op if not running.
try:
from Project.Sanad.main import live_sub
pid = (ad.load_state() or {}).get("profile_id")
if (pid and live_sub is not None and hasattr(live_sub, "send_profile")
and hasattr(live_sub, "is_running") and live_sub.is_running()):
live_sub.send_profile(pid, reason="dashboard audio Apply")
except Exception:
pass
# Restore the user's SAVED volume to the selected sink (USB/BT speakers
# like the JBL otherwise come back at a low PulseAudio default). Use the
# saved level, NOT a forced 100%, so the slider/sink keep the user's
# choice across selects + restarts. Target the resolved sink.
try:
sink = (ad.load_state() or {}).get("sink") or "@DEFAULT_SINK@"
_pactl(["set-sink-volume", sink, "%d%%" % _g1_user_volume])
if _g1_user_volume > 0:
_pactl(["set-sink-mute", sink, "0"])
except Exception:
pass
return result
return await asyncio.to_thread(_do)

View File

@ -22,6 +22,8 @@ from Project.Sanad.config import BASE_DIR
from Project.Sanad.core.logger import get_logger
from Project.Sanad.vision import recognition_state
from Project.Sanad.dashboard.routes import _arbiter
log = get_logger("controller_routes")
router = APIRouter()
@ -75,6 +77,31 @@ def _require_armed(lc):
raise HTTPException(409, "Movement is disarmed. Enable movement first.")
def _claim_loco():
"""Arbitration gate: refuse a leg command while a Nav2 goal owns the legs."""
if not _arbiter.acquire_loco():
raise HTTPException(
409, "Navigation (Nav2) is active. Cancel the nav goal before manual movement."
)
def _cancel_nav():
"""Cancel any in-flight Nav2 goal and clear the nav arbitration flag.
Used by E-STOP so the global stop halts the legs no matter which stack is
driving them. Calls the nav client in-process (no HTTP self-call); never
raises into the caller.
"""
try:
from Project.Sanad.dashboard.routes.navigation import _CLIENT as _nav_client
if _nav_client is not None:
_nav_client.cancel()
except Exception:
log.exception("estop nav cancel failed")
finally:
_arbiter.release_nav()
# ── reads ───────────────────────────────────────────────────
@router.get("/status")
@ -100,7 +127,17 @@ async def get_msc():
@router.post("/arm")
async def set_arm(on: bool = Query(...)):
lc = _require_loco()
res = await asyncio.to_thread(lc.arm_movement if on else lc.disarm_movement)
if on:
# Refuse to arm manual loco while Nav2 owns the legs.
_claim_loco()
try:
res = await asyncio.to_thread(lc.arm_movement)
except Exception:
_arbiter.release_loco()
raise
return res
res = await asyncio.to_thread(lc.disarm_movement)
_arbiter.release_loco()
return res
@ -144,6 +181,10 @@ async def estop():
md.emergency_stop()
except Exception:
log.exception("estop dispatcher latch failed")
# Cancel any in-flight Nav2 goal too: the legs have exactly one commander,
# and an E-STOP must halt the legs whether loco or Nav2 is driving them.
await asyncio.to_thread(_cancel_nav)
_arbiter.release_loco()
return {"ok": True, **res}
@ -168,6 +209,7 @@ class MoveBody(BaseModel):
async def move(body: MoveBody):
lc = _require_loco()
_require_armed(lc)
_claim_loco()
return await asyncio.to_thread(lc.move, body.vx, body.vy, body.vyaw, body.run)
@ -175,6 +217,7 @@ async def move(body: MoveBody):
async def step(dir: str = Query(...)):
lc = _require_loco()
_require_armed(lc)
_claim_loco()
res = await asyncio.to_thread(lc.step, dir)
if not res.get("ok"):
raise HTTPException(400, res.get("reason", "step failed"))

View File

@ -4,10 +4,15 @@ from __future__ import annotations
import asyncio
from fastapi import APIRouter, HTTPException
from fastapi import APIRouter, HTTPException, Query
from Project.Sanad.config import BASE_DIR
from Project.Sanad.vision import recognition_state
router = APIRouter()
_STATE_PATH = BASE_DIR / "data" / ".recognition_state.json"
def _sub_or_503():
from Project.Sanad.main import live_sub
@ -19,9 +24,21 @@ def _sub_or_503():
@router.get("/status")
async def subprocess_status():
from Project.Sanad.main import live_sub
# record_enabled is a live flag (recognition_state) the panel toggle drives;
# surface it so the UI shows the current state even before a session starts.
rec = bool(recognition_state.read(_STATE_PATH).record_enabled)
if live_sub is None:
return {"available": False, "state": "unavailable"}
return live_sub.status()
return {"available": False, "state": "unavailable", "record_enabled": rec}
return {**live_sub.status(), "record_enabled": rec}
@router.post("/record")
async def set_record(on: bool = Query(...)):
"""Toggle auto-recording of conversation turns to data/recordings/. Takes
effect live (the voice child syncs its recorder) no session restart."""
st = await asyncio.to_thread(
recognition_state.mutate, _STATE_PATH, record_enabled=bool(on))
return {"ok": True, "record_enabled": st.record_enabled}
@router.post("/start")

179
vendor/Sanad/dashboard/routes/mask.py vendored Normal file
View File

@ -0,0 +1,179 @@
"""Mask Face tab — Shining LED face mask control (BLE).
Routes live under /api/mask. Backed by the FaceController subsystem
(face/mask_face.py), which owns a dedicated asyncio loop + BLE connection to the
standalone Mask project's `shiningmask` library.
Every handler is failure-safe: if the subsystem or its library is unavailable it
returns 503 (GET /status returns a degraded body) rather than crash the
dashboard. FaceController raises RuntimeError for "not connected" / "face not
started"; those map to 409. Blocking BLE calls run in a thread pool so the event
loop stays responsive.
"""
from __future__ import annotations
import asyncio
from typing import List, Optional
from fastapi import APIRouter, HTTPException, Query
from pydantic import BaseModel
from Project.Sanad.core.logger import get_logger
log = get_logger("mask_routes")
router = APIRouter()
# ── lazy subsystem accessor ─────────────────────────────────
def _get_face():
try:
from Project.Sanad.main import mask_face # type: ignore
return mask_face
except Exception:
return None
def _require():
mf = _get_face()
if mf is None:
raise HTTPException(503, "Mask face subsystem unavailable.")
return mf
def _run(fn, *args, **kwargs):
"""Call a FaceController method, mapping its errors to HTTP status codes."""
try:
return fn(*args, **kwargs)
except HTTPException:
raise
except RuntimeError as exc:
raise HTTPException(409, str(exc))
except Exception as exc: # noqa: BLE001
log.exception("mask operation failed")
raise HTTPException(500, str(exc))
# ── status ──────────────────────────────────────────────────
@router.get("/status")
async def status():
"""Never raises — returns a degraded body if the subsystem is missing."""
mf = _get_face()
if mf is None:
return {"available": False, "connected": False, "lib_available": False,
"last_error": "mask face subsystem not constructed"}
s = await asyncio.to_thread(mf.status)
s["available"] = True
return s
# ── connection ──────────────────────────────────────────────
@router.post("/connect")
async def connect(timeout: Optional[float] = Query(None),
attempts: Optional[int] = Query(None)):
mf = _require()
return await asyncio.to_thread(_run, mf.connect, timeout, attempts)
@router.post("/disconnect")
async def disconnect():
mf = _require()
return await asyncio.to_thread(_run, mf.disconnect)
# ── simple commands ─────────────────────────────────────────
@router.post("/brightness")
async def brightness(level: int = Query(..., ge=0, le=255)):
mf = _require()
return await asyncio.to_thread(_run, mf.set_brightness, level)
class TextBody(BaseModel):
text: str = ""
color: List[int] = [255, 255, 255]
mode: Optional[int] = None
bg: Optional[List[int]] = None # background RGB (None -> black)
speed: Optional[int] = None # scroll speed 0-255 (None -> firmware default)
@router.post("/text")
async def text(body: TextBody):
mf = _require()
bg = tuple(body.bg) if body.bg else None
return await asyncio.to_thread(_run, mf.set_text, body.text, tuple(body.color),
body.mode, bg, body.speed)
@router.post("/image")
async def image(id: int = Query(...)):
mf = _require()
return await asyncio.to_thread(_run, mf.show_image, id)
@router.post("/animation")
async def animation(id: int = Query(...)):
mf = _require()
return await asyncio.to_thread(_run, mf.play_animation, id)
@router.post("/clear")
async def clear():
mf = _require()
return await asyncio.to_thread(_run, mf.clear_diy)
# ── animated face ───────────────────────────────────────────
@router.post("/face/start")
async def face_start(reload: bool = Query(False)):
mf = _require()
return await asyncio.to_thread(_run, mf.face_start, reload)
@router.post("/face/stop")
async def face_stop():
mf = _require()
return await asyncio.to_thread(_run, mf.face_stop)
@router.post("/face/return")
async def face_return():
"""Resume the live animated face after a text/image/animation override."""
mf = _require()
return await asyncio.to_thread(_run, mf.return_face)
class FaceColorBody(BaseModel):
eye: Optional[List[int]] = None # eye/iris RGB
mouth: Optional[List[int]] = None # mouth RGB
sclera: Optional[List[int]] = None # white-of-the-eye RGB
@router.post("/face/color")
async def face_color(body: FaceColorBody):
"""Recolor the animated face (re-uploads the frame set if the face is live)."""
mf = _require()
return await asyncio.to_thread(_run, mf.set_face_color, body.eye, body.mouth, body.sclera)
@router.post("/speaking")
async def speaking(on: bool = Query(...)):
mf = _require()
return await asyncio.to_thread(_run, mf.set_speaking, on)
@router.post("/mouth")
async def mouth(level: int = Query(..., ge=0, le=3)):
mf = _require()
return await asyncio.to_thread(_run, mf.set_mouth, level)
@router.post("/expression/{name}")
async def expression(name: str):
mf = _require()
return await asyncio.to_thread(_run, mf.show_expression, name)

View File

@ -0,0 +1,395 @@
"""Social-media / QR display on the LED mask.
Renders a QR code (for a preset Instagram account) or an uploaded image onto the
mask's 46x58 display and holds it via the FaceController's reserved scratch slot
until the animated face is resumed. The shared helper :func:`show_social_on_mask`
is also called from the Gemini ``[[SHOW:account]]`` relay wired in ``main.py``.
Routes (under /api/mask):
POST /social/{account} -> show a preset Instagram QR
POST /qr -> upload an image (QR or any picture) + show it
POST /face/resume -> stop showing the scratch image, return to the face
GET /social -> list the preset accounts
"""
from __future__ import annotations
import asyncio
import io
import logging
import os
import sys
from pathlib import Path
import re
from fastapi import APIRouter, File, HTTPException, Query, UploadFile
from fastapi.responses import FileResponse
log = logging.getLogger("sanad.mask_social")
router = APIRouter() # prefix "/api/mask" supplied by dashboard/app.py _REST_ROUTES
# Preset Instagram accounts the mask can show as a QR. The mask is a low-res
# 46x58 panel, so a full-URL QR is dense; the black margin acts as the quiet
# zone and we scale modules crisply (NEAREST) to give it the best chance.
SOCIAL = {
"bu_sunaidah": {"handle": "@bu.sunaidah",
"url": "https://instagram.com/bu.sunaidah",
"short": "da.gd/VMkH8J"}, # -> instagram.com/bu.sunaidah (v1 QR)
"yslootahtech": {"handle": "@yslootahtech",
"url": "https://instagram.com/yslootahtech",
"short": "da.gd/Qr8RO"}, # -> instagram.com/yslootahtech (v1 QR)
}
def _ensure_mask_path() -> None:
"""Make the flat Mask lib (colorface) importable from this route — using the
SAME dir the FaceController resolved (the Mask lib lives outside the repo)."""
d = os.environ.get("SANAD_MASK_DIR")
if not d:
try:
from Project.Sanad.main import mask_face as _mf # type: ignore
d = getattr(_mf, "mask_dir", None)
except Exception:
d = None
if not d:
d = str(Path(__file__).resolve().parents[2] / "Mask")
if d and d not in sys.path:
sys.path.insert(0, d)
def _get_face():
from Project.Sanad.main import mask_face # type: ignore
if mask_face is None:
raise HTTPException(status_code=503, detail="mask face unavailable")
return mask_face
_EYE_BAND = 16 # top rows reserved for the cyan eyes; the code sits below them
def _compose_under_eyes(inner) -> bytes:
"""Draw two cyan eyes across the top and place ``inner`` (a QR / image) in the
area BELOW them, then encode for the mask. Keeps the panel looking like a face
with a code under the eyes instead of a full-screen QR."""
_ensure_mask_path()
import colorface as cf
from PIL import Image, ImageDraw
W, H = cf.DISPLAY_W, cf.DISPLAY_H
inner = inner.convert("RGB")
iw, ih = inner.size
# keep the code a small badge under the eyes (~70% of the space below them)
target = max(20, int(min(W, H - _EYE_BAND - 1) * 0.72))
if iw <= target and ih <= target:
s = max(1, min(target // iw, target // ih)) # crisp integer up-scale (QR)
nw, nh = iw * s, ih * s
else:
s = min(target / iw, target / ih) # scale big images down
nw, nh = max(1, int(iw * s)), max(1, int(ih * s))
inner = inner.resize((nw, nh), Image.NEAREST)
canvas = Image.new("RGB", (W, H), (0, 0, 0))
g = ImageDraw.Draw(canvas)
eye = cf.DEFAULT_EYE
for cx in (W // 2 - 10, W // 2 + 10): # two eyes at the top
g.ellipse([cx - 5, 3, cx + 5, 13], fill=(255, 255, 255))
g.ellipse([cx - 3, 5, cx + 3, 11], fill=eye)
g.ellipse([cx - 1, 7, cx + 1, 10], fill=(0, 0, 0))
x = (W - nw) // 2
y = _EYE_BAND + (H - _EYE_BAND - nh) // 2
canvas.paste(inner, (max(0, x), max(_EYE_BAND, y)))
return cf.encode(canvas)
def _qr_bytes(url: str) -> bytes:
"""Render a QR for ``url`` FULL-SCREEN with the largest crisp (integer) module
size the 46-wide panel allows the only way it has any chance of scanning.
Only a ~version-1 QR (<=17 chars) reaches ~2 px/module; longer data is denser
and won't scan. Returns (bytes, qr_version)."""
_ensure_mask_path()
import qrcode
from PIL import Image
import colorface as cf
W, H = cf.DISPLAY_W, cf.DISPLAY_H
qr = qrcode.QRCode(error_correction=qrcode.constants.ERROR_CORRECT_L,
box_size=1, border=1)
qr.add_data(url)
qr.make(fit=True)
q = qr.make_image(fill_color=(255, 255, 255),
back_color=(0, 0, 0)).convert("RGB")
scale = max(1, min(W, H) // max(1, q.width)) # largest integer that fits
if scale > 1:
q = q.resize((q.width * scale, q.width * scale), Image.NEAREST)
canvas = Image.new("RGB", (W, H), (0, 0, 0))
canvas.paste(q, ((W - q.width) // 2, (H - q.height) // 2))
return cf.encode(canvas)
def _image_bytes(img) -> bytes:
"""Show an uploaded QR/image FULL-SCREEN, crisp (NEAREST) — best effort."""
_ensure_mask_path()
import colorface as cf
from PIL import Image
W, H = cf.DISPLAY_W, cf.DISPLAY_H
s = min(W, H)
img = img.convert("RGB").resize((s, s), Image.NEAREST)
canvas = Image.new("RGB", (W, H), (0, 0, 0))
canvas.paste(img, ((W - s) // 2, (H - s) // 2))
return cf.encode(canvas)
def show_social_on_mask(account: str) -> dict:
"""Show the account's **scannable** QR on the mask — a version-1 QR made from
a short (da.gd) link that redirects to the Instagram profile. Shared by the
dashboard button and the Gemini ``show_social`` tool. Raises for an unknown
account; propagates FaceController errors (e.g. not connected)."""
acc = SOCIAL.get(str(account).strip().lower())
if not acc:
raise HTTPException(status_code=404, detail="unknown account")
data = _qr_bytes(acc.get("short") or acc["url"]) # v1 short link -> scannable
mf = _get_face()
res = mf.show_scratch_image(data)
log.info("showing scannable social QR on mask: %s (%s)", acc["handle"], acc.get("short"))
return {"ok": True, "handle": acc["handle"], "scannable": True, **(res or {})}
@router.get("/social")
async def list_social():
return {"accounts": [{"id": k, "handle": v["handle"]} for k, v in SOCIAL.items()]}
def _friendly(exc: Exception) -> HTTPException:
"""Map FaceController errors to clean HTTP responses (esp. the common
'mask not connected' usually the mask is off / far / held by the phone app)."""
if isinstance(exc, HTTPException):
return exc
msg = str(exc)
if "not connected" in msg or "not started" in msg or "MASK" in msg:
return HTTPException(status_code=503, detail=(
"Mask not connected — power it on, bring it close to the robot, and "
"free it from the phone app."))
log.exception("mask scratch op failed")
return HTTPException(status_code=500, detail="%s: %s" % (type(exc).__name__, msg))
@router.post("/social/{account}")
async def show_social(account: str):
try:
return await asyncio.to_thread(show_social_on_mask, account)
except Exception as exc:
raise _friendly(exc)
@router.post("/qr")
async def upload_qr(file: UploadFile = File(...)):
"""Upload an image (a QR you generated, or any picture) and show it on the mask."""
raw = await file.read()
if not raw:
raise HTTPException(status_code=400, detail="empty upload")
from PIL import Image
try:
img = Image.open(io.BytesIO(raw))
img.load()
except Exception:
raise HTTPException(status_code=400, detail="not a valid image")
try:
data = await asyncio.to_thread(_image_bytes, img)
mf = _get_face()
return await asyncio.to_thread(mf.show_scratch_image, data)
except Exception as exc:
raise _friendly(exc)
@router.post("/face/resume")
async def resume_face():
"""Stop showing the scratch image and resume the animated face."""
mf = _get_face()
return await asyncio.to_thread(mf.set_expression, None)
@router.post("/face/mouth")
async def face_mouth(hidden: bool = Query(...)):
"""Show (hidden=false) or hide (hidden=true) the mouth on the animated face."""
mf = _get_face()
return await asyncio.to_thread(mf.set_mouth_hidden, hidden)
@router.post("/link")
async def face_link(on: bool = Query(...)):
"""Link (on=true) / unlink (on=false) Gemini <-> the mask.
ON connects the mask + lets Gemini drive its emotions/social.
OFF tears the link down (no BLE churn) and Gemini stops touching the mask.
Default state is OFF. Runs in a thread a link-on may briefly block while it
makes its first connect attempt."""
mf = _get_face()
return await asyncio.to_thread(mf.set_gemini_linked, on)
# ── saved QR library ────────────────────────────────────────────────
# Upload QR/images, save them by name, list/show/delete them. Stored as PNGs
# under data/qr_codes so they persist across restarts.
_QR_DIR = None
def _qr_dir() -> Path:
global _QR_DIR
if _QR_DIR is None:
try:
from Project.Sanad.config import BASE_DIR
base = Path(BASE_DIR)
except Exception:
base = Path(__file__).resolve().parents[2]
_QR_DIR = base / "data" / "qr_codes"
_QR_DIR.mkdir(parents=True, exist_ok=True)
return _QR_DIR
def _safe_name(name: str) -> str:
n = re.sub(r"[^A-Za-z0-9_.-]", "_", (name or "").strip())[:40].strip("._")
return n or "qr"
@router.post("/qr/save")
async def qr_save(name: str = Query(...), file: UploadFile = File(...)):
"""Save an uploaded QR/image into the library under ``name``."""
raw = await file.read()
if not raw:
raise HTTPException(status_code=400, detail="empty upload")
from PIL import Image
try:
img = Image.open(io.BytesIO(raw))
img.load()
except Exception:
raise HTTPException(status_code=400, detail="not a valid image")
sn = _safe_name(name)
await asyncio.to_thread(img.convert("RGB").save, str(_qr_dir() / (sn + ".png")))
return {"ok": True, "name": sn}
@router.post("/qr/save_link")
async def qr_save_link(name: str = Query(...), url: str = Query(...)):
"""Generate a QR from ``url`` and save it to the library. Returns the QR
version + whether it's short enough to actually scan on the mask (version 1)."""
u = (url or "").strip()
if not u:
raise HTTPException(status_code=400, detail="empty url")
_ensure_mask_path()
import qrcode
qr = qrcode.QRCode(error_correction=qrcode.constants.ERROR_CORRECT_L,
box_size=10, border=2)
qr.add_data(u)
qr.make(fit=True)
img = qr.make_image(fill_color=(255, 255, 255),
back_color=(0, 0, 0)).convert("RGB")
sn = _safe_name(name or u)
await asyncio.to_thread(img.save, str(_qr_dir() / (sn + ".png")))
return {"ok": True, "name": sn, "version": qr.version,
"scannable_on_mask": qr.version <= 1,
"note": ("scannable" if qr.version <= 1 else
"too dense to scan on the mask — use a shorter link")}
@router.get("/qr/library")
async def qr_library():
"""List the saved QR names."""
return {"qr": sorted(p.stem for p in _qr_dir().glob("*.png"))}
@router.get("/qr/thumb/{name}")
async def qr_thumb(name: str):
"""Serve a saved QR image (for the dashboard thumbnail)."""
p = _qr_dir() / (_safe_name(name) + ".png")
if not p.exists():
raise HTTPException(status_code=404, detail="not found")
return FileResponse(str(p), media_type="image/png")
@router.post("/qr/show/{name}")
async def qr_show(name: str):
"""Show a saved QR (under the eyes) on the mask."""
p = _qr_dir() / (_safe_name(name) + ".png")
if not p.exists():
raise HTTPException(status_code=404, detail="not found")
from PIL import Image
try:
img = Image.open(p)
data = await asyncio.to_thread(_image_bytes, img)
mf = _get_face()
return await asyncio.to_thread(mf.show_scratch_image, data)
except Exception as exc:
raise _friendly(exc)
@router.delete("/qr/{name}")
async def qr_delete(name: str):
"""Delete a saved QR from the library."""
p = _qr_dir() / (_safe_name(name) + ".png")
if p.exists():
p.unlink()
return {"ok": True, "deleted": _safe_name(name)}
# ── saved TEXT library ──────────────────────────────────────────────
# Save words/phrases and scroll any of them across the mask on demand.
_TEXT_DIR = None
def _text_dir() -> Path:
global _TEXT_DIR
if _TEXT_DIR is None:
try:
from Project.Sanad.config import BASE_DIR
base = Path(BASE_DIR)
except Exception:
base = Path(__file__).resolve().parents[2]
_TEXT_DIR = base / "data" / "mask_texts"
_TEXT_DIR.mkdir(parents=True, exist_ok=True)
return _TEXT_DIR
@router.post("/texts/save")
async def text_save(text: str = Query(...), name: str = Query("")):
"""Save a word/phrase to the text library (name defaults to the text)."""
t = (text or "").strip()[:200]
if not t:
raise HTTPException(status_code=400, detail="empty text")
nm = _safe_name(name or t)
await asyncio.to_thread((_text_dir() / (nm + ".txt")).write_text, t)
return {"ok": True, "name": nm, "text": t}
@router.get("/texts/library")
async def text_library():
"""List the saved texts."""
out = []
for p in sorted(_text_dir().glob("*.txt")):
try:
out.append({"name": p.stem, "text": p.read_text()[:80]})
except Exception:
pass
return {"texts": out}
@router.post("/texts/show/{name}")
async def text_show(name: str):
"""Scroll a saved text across the mask."""
p = _text_dir() / (_safe_name(name) + ".txt")
if not p.exists():
raise HTTPException(status_code=404, detail="not found")
txt = p.read_text()
mf = _get_face()
try:
return await asyncio.to_thread(mf.set_text, txt, (255, 255, 255), None, None, 38)
except Exception as exc:
raise _friendly(exc)
@router.delete("/texts/{name}")
async def text_delete(name: str):
"""Delete a saved text."""
p = _text_dir() / (_safe_name(name) + ".txt")
if p.exists():
p.unlink()
return {"ok": True, "deleted": _safe_name(name)}

View File

@ -0,0 +1,402 @@
"""Navigation tab — proxy to the web_nav3 Nav2 stack.
Routes live under /api/nav (the prefix is applied centrally in dashboard/app.py,
NOT here). This router is a thin HTTP proxy: it forwards dashboard requests to a
single module-level WebNav3Client, which itself talks to the standalone web_nav3
FastAPI service (default http://127.0.0.1:8765 + rosbridge on :9090).
Fault isolation, two layers:
1. The `from ...navigation import WebNav3Client` import is GUARDED. If the
navigation package can't be imported (missing dep, syntax error), this
module still imports cleanly `_CLIENT` is None and every handler degrades
(GET /status returns {"available": False}; actions raise 503). This mirrors
how app.py loads each router in isolation.
2. WebNav3Client never raises into us by contract every method returns a
clean dict / NavStatus even when web_nav3 is unreachable so handlers just
forward the result. Blocking HTTP calls run off the event loop.
"""
from __future__ import annotations
import asyncio
from fastapi import APIRouter, HTTPException, Query
from pydantic import BaseModel
from Project.Sanad.core.logger import get_logger
from Project.Sanad.dashboard.routes import _arbiter
log = get_logger("navigation_route")
# Module-level router with NO prefix and NO tags — those are supplied by
# app.include_router(prefix="/api/nav", tags=["navigation"]) at registration time.
router = APIRouter()
# ── guarded optional import ─────────────────────────────────
# A broken navigation package must NOT stop this route module from importing —
# app.py would otherwise log the whole router as failed. Guard it and degrade.
try:
from Project.Sanad.navigation import WebNav3Client # type: ignore
_IMPORT_ERROR: str | None = None
except Exception as exc: # noqa: BLE001
WebNav3Client = None # type: ignore[assignment,misc]
_IMPORT_ERROR = f"{type(exc).__name__}: {exc}"
log.warning("navigation client unavailable — nav routes degraded: %s", _IMPORT_ERROR)
# ── config (env var -> dashboard config section -> default) ──
def _nav_config() -> dict:
"""Resolve nav connection config. Precedence: env var -> config -> default."""
import os
from Project.Sanad.core.config_loader import section as _cfg_section
cfg = _cfg_section("dashboard", "navigation")
web_nav3_url = (
os.environ.get("WEB_NAV3_URL")
or cfg.get("web_nav3_url")
or "http://127.0.0.1:8765"
)
rosbridge_url = (
os.environ.get("ROSBRIDGE_URL")
or cfg.get("rosbridge_url")
or "ws://127.0.0.1:9090"
)
robot = os.environ.get("SANAD_ROBOT_NAME") or cfg.get("robot") or "sanad"
return {
"web_nav3_url": str(web_nav3_url),
"rosbridge_url": str(rosbridge_url),
"robot": str(robot),
}
_CFG = _nav_config()
# ── single module-level client ──────────────────────────────
# One WebNav3Client for the whole dashboard, built from config. If the import
# was guarded out (above), or construction fails, _CLIENT stays None and every
# handler degrades gracefully.
if WebNav3Client is not None:
try:
_CLIENT = WebNav3Client(base_url=_CFG["web_nav3_url"], robot=_CFG["robot"])
log.info("WebNav3Client ready → %s (robot=%s)", _CFG["web_nav3_url"], _CFG["robot"])
except Exception as exc: # noqa: BLE001
_CLIENT = None
_IMPORT_ERROR = f"construct failed: {type(exc).__name__}: {exc}"
log.warning("WebNav3Client construction failed — nav routes degraded: %s", exc)
else:
_CLIENT = None
def _require():
"""Return the live client or raise 503 (for ACTION endpoints)."""
if _CLIENT is None:
raise HTTPException(503, f"Navigation client unavailable. {_IMPORT_ERROR or ''}".strip())
return _CLIENT
def _claim_nav():
"""Arbitration gate: refuse to start a Nav2 goal while manual loco owns legs."""
if not _arbiter.acquire_nav():
raise HTTPException(
409, "Manual movement (Controller) is armed. Disarm it before navigating."
)
# ── request bodies ──────────────────────────────────────────
class _NameBody(BaseModel):
name: str
class _IdBody(BaseModel):
id: object # mission ids may be int or str; forward as-is
class _StartBody(BaseModel):
mode: int = 2 # web_nav3 launch mode (e.g. 3 = localize against a saved map)
db_path: str | None = None # saved map to load (None = build fresh)
class _PoseBody(BaseModel):
name: str
x: float
y: float
yaw: float = 0.0
class _RenameBody(BaseModel):
old: str
new: str
# ── status (never raises — degraded body when unavailable) ──
@router.get("/status")
async def status():
if _CLIENT is None:
return {"available": False, "error": _IMPORT_ERROR}
nav = await asyncio.to_thread(_CLIENT.status)
# WebNav3Client.status() returns a NavStatus dataclass.
body = nav.as_dict() if hasattr(nav, "as_dict") else dict(nav)
body["available"] = True
return body
# ── places / navigation ─────────────────────────────────────
@router.get("/places")
async def places(map_name: str | None = Query(None, alias="map")):
"""List saved places. Per-MAP when ?map=<name> is given (each map keeps
its own places); else the legacy per-robot store."""
client = _require()
return await asyncio.to_thread(client.list_places, map_name)
@router.post("/goto")
async def goto(body: _NameBody):
client = _require()
_claim_nav()
res = await asyncio.to_thread(client.goto, body.name)
# A failed dispatch never drove the legs — release the gate so manual loco
# isn't locked out by a goto that never started.
if isinstance(res, dict) and not res.get("ok", True):
_arbiter.release_nav()
return res
@router.post("/start")
async def start(body: _StartBody):
client = _require()
return await asyncio.to_thread(client.start, body.mode, body.db_path)
class _DbBody(BaseModel):
db_path: str
@router.post("/load_map")
async def load_map(body: _DbBody):
"""View a saved map: stop any running bringup, then localize against it."""
client = _require()
return await asyncio.to_thread(client.load_map, body.db_path)
@router.post("/cancel")
async def cancel():
client = _require()
res = await asyncio.to_thread(client.cancel)
# WebNav3Client.cancel() is a no-op server-side (it only returns a note),
# so releasing the arbiter without truly stopping Nav2 would let the robot
# keep driving while manual loco re-acquires the legs (double-drive). Send a
# REAL goal-cancel over rosbridge first, and disarm the arrival monitor so a
# stale terminal can't fire, THEN release.
try:
from Project.Sanad.navigation.goal_monitor import request_cancel, disarm
disarm()
cancelled = await asyncio.to_thread(request_cancel)
if isinstance(res, dict):
res = {**res, "cancel_sent": bool(cancelled)}
except Exception as exc: # noqa: BLE001
log.debug("goal cancel skipped: %s", exc)
_arbiter.release_nav()
return res
@router.post("/save_here")
async def save_here(body: _NameBody):
client = _require()
return await asyncio.to_thread(client.save_here, body.name)
@router.post("/save_at")
async def save_at(body: _PoseBody, map_name: str | None = Query(None, alias="map")):
"""Save a named place at a map coordinate (from clicking the map). Per-MAP
when ?map=<name> given. Re-saving an existing name MOVES the place."""
client = _require()
return await asyncio.to_thread(client.save_at, body.name, body.x, body.y, body.yaw, map_name)
@router.post("/places/delete")
async def delete_place(body: _NameBody, map_name: str | None = Query(None, alias="map")):
"""Delete a saved place (per-map)."""
client = _require()
return await asyncio.to_thread(client.delete_place, body.name, map_name)
@router.post("/places/rename")
async def rename_place(body: _RenameBody, map_name: str | None = Query(None, alias="map")):
"""Rename a saved place (per-map)."""
client = _require()
return await asyncio.to_thread(client.rename_place, body.old, body.new, map_name)
class _MapEditsBody(BaseModel):
edits: list # [[world_x, world_y, value], ...] value 0=free/erase, 100=wall
@router.get("/map_edits")
async def get_map_edits(map_name: str = Query(..., alias="map")):
"""Saved edit overlay for a map (erased points + painted walls)."""
client = _require()
return await asyncio.to_thread(client.get_map_edits, map_name)
@router.post("/map_edits")
async def save_map_edits(body: _MapEditsBody, map_name: str = Query(..., alias="map")):
"""Persist a map's edit overlay (Map Editor)."""
client = _require()
return await asyncio.to_thread(client.save_map_edits, map_name, body.edits)
class _VoiceGotoBody(BaseModel):
place: str
def _resolve_place(client, spoken: str) -> dict:
"""Resolve a spoken place name against the ACTIVE map's places.
Strategy: exact (case-insensitive) single substring candidate
ambiguous / unknown. Returns a dict the caller (and ultimately Gemini)
can act on. Never raises.
"""
try:
st = client.status()
body = st.as_dict() if hasattr(st, "as_dict") else dict(st)
except Exception as exc: # noqa: BLE001
return {"ok": False, "reason": "status_error", "detail": str(exc)[:160]}
if not body.get("bringup_alive"):
return {"ok": False, "reason": "no_map",
"detail": "No navigation session is running — load a map first."}
active_map = body.get("active_map")
try:
places = client.list_places(active_map) or []
except Exception: # noqa: BLE001
places = []
names = [p.get("name") for p in places if isinstance(p, dict) and p.get("name")]
sl = (spoken or "").strip().lower()
if not sl:
return {"ok": False, "reason": "no_place", "map": active_map, "places": names}
exact = [n for n in names if n.lower() == sl]
if exact:
return {"ok": True, "resolved": exact[0], "map": active_map}
subs = []
for n in names:
nl = n.lower()
if sl in nl or nl in sl:
subs.append(n)
subs = list(dict.fromkeys(subs)) # de-dup, preserve order
if len(subs) == 1:
return {"ok": True, "resolved": subs[0], "map": active_map}
if len(subs) > 1:
return {"ok": False, "reason": "ambiguous", "candidates": subs, "map": active_map}
return {"ok": False, "reason": "unknown_place", "candidates": names, "map": active_map}
@router.get("/active")
async def active():
"""Navigation context for Gemini: the active map, its mode, and that map's
place names one call so the voice tools (list_places / where_am_i) don't
have to guess the active map."""
client = _require()
st = await asyncio.to_thread(client.status)
body = st.as_dict() if hasattr(st, "as_dict") else dict(st)
places = []
if body.get("bringup_alive"):
try:
pl = await asyncio.to_thread(client.list_places, body.get("active_map"))
places = [p.get("name") for p in (pl or [])
if isinstance(p, dict) and p.get("name")]
except Exception: # noqa: BLE001
places = []
return {
"map": body.get("active_map"),
"mode": body.get("mode"),
"mode_label": body.get("mode_label"),
"localizing": bool(body.get("localizing")),
"bringup_alive": bool(body.get("bringup_alive")),
"places": places,
}
@router.post("/voice_goto")
async def voice_goto(body: _VoiceGotoBody):
"""Resolve a spoken place name and drive there — Gemini's navigate_to_place.
Arbiter-gated (claims the legs for Nav2) and arms the arrival monitor so
Gemini later hears [NAV ARRIVED]/[NAV FAILED]. Never raises into the caller;
returns a structured result the model can speak from.
"""
client = _require()
res = await asyncio.to_thread(_resolve_place, client, body.place or "")
if not res.get("ok"):
return res
# Claim the legs for Nav2 — refuse (don't raise) if manual loco is armed.
if not _arbiter.acquire_nav():
return {"ok": False, "reason": "manual_armed",
"detail": "Manual movement (Controller) is armed — disarm it to navigate."}
drive = await asyncio.to_thread(client.goto, res["resolved"])
if isinstance(drive, dict) and not drive.get("ok", True):
_arbiter.release_nav()
return {"ok": False, "reason": "dispatch_failed",
"resolved": res["resolved"], "detail": drive}
# Arm arrival monitoring (best-effort; absence must not fail the drive).
try:
from Project.Sanad.navigation.goal_monitor import arm_goal
arm_goal(res["resolved"])
except Exception as exc: # noqa: BLE001
log.debug("goal monitor arm skipped: %s", exc)
return {"ok": True, "resolved": res["resolved"], "map": res.get("map")}
@router.post("/goto_pose")
async def goto_pose(body: _PoseBody):
"""Arbiter-gate a coordinate nav goal (click-to-drive).
The browser publishes the actual /goal_pose over rosbridge; this only
CLAIMS the legs for Nav2 (409 if manual loco is armed) so the two stacks
never both drive. The frontend sends the goal only after this returns ok.
"""
_require()
_claim_nav()
# Arm the arrival monitor so this click-to-drive goal releases the arbiter
# when it ends — without this, nav_active stays True forever after the goal
# completes (the browser publishes the goal but never arms anything).
try:
from Project.Sanad.navigation.goal_monitor import arm_goal
arm_goal(f"({body.x:.1f}, {body.y:.1f})")
except Exception as exc: # noqa: BLE001
log.debug("goal monitor arm skipped: %s", exc)
return {"ok": True, "x": body.x, "y": body.y, "yaw": body.yaw}
# ── maps / missions ─────────────────────────────────────────
@router.get("/maps")
async def maps():
client = _require()
return await asyncio.to_thread(client.list_maps)
@router.get("/missions")
async def missions():
client = _require()
return await asyncio.to_thread(client.list_missions)
@router.post("/missions/run")
async def run_mission(body: _IdBody):
client = _require()
_claim_nav()
res = await asyncio.to_thread(client.run_mission, body.id)
if isinstance(res, dict) and not res.get("ok", True):
_arbiter.release_nav()
return res
# ── config (what the SPA needs to render links / connect) ───
@router.get("/config")
async def config():
return {
"web_nav3_url": _CFG["web_nav3_url"],
"rosbridge_url": _CFG["rosbridge_url"],
"robot": _CFG["robot"],
}

View File

@ -23,6 +23,12 @@ router = APIRouter()
RECORDS_INDEX = AUDIO_RECORDINGS_DIR / "records.json"
_INDEX_LOCK = threading.Lock()
# Strong refs to fire-and-forget playback tasks. The event loop only keeps a
# weak reference to tasks, so an unreferenced create_task() result can be
# garbage-collected (cancelling playback) before it finishes. Mirror replay.py.
import asyncio as _asyncio # noqa: E402
_BG_TASKS: set[_asyncio.Task] = set()
def _load_index() -> dict[str, Any]:
if not RECORDS_INDEX.exists():
@ -110,15 +116,19 @@ async def play_record(payload: RecordPlay):
raise HTTPException(404, f"File not found: {raw_path.name}")
from Project.Sanad.main import audio_mgr
import asyncio
# Fire-and-forget — play_wav blocks for the clip duration on the G1
# DDS path, and the dashboard's pause / resume / stop / status calls
# need to be served while it's running. Without this, /play wouldn't
# return until the clip finished and the UI couldn't interact with
# the in-flight playback.
asyncio.create_task(asyncio.to_thread(
audio_mgr.play_wav, raw_path, payload.record_name,
))
import threading
# Fire-and-forget on a DEDICATED daemon thread — NOT asyncio.to_thread.
# to_thread runs on the shared default executor, which gets starved while
# the dashboard services the live-voice child's reconnect chatter; that
# delayed record playback by several seconds (clip silent, counter parked).
# A dedicated thread starts immediately regardless of executor/event-loop
# load. play_wav blocks for the clip duration and serves pause/stop via
# _play_state; the UI stays responsive because this handler returns now.
# Python keeps running threads alive, so no ref is needed to prevent GC.
threading.Thread(
target=audio_mgr.play_wav, args=(raw_path, payload.record_name),
name="record-playback", daemon=True,
).start()
return {"ok": True, "record_name": payload.record_name,
"file_kind": payload.file_kind, "path": str(raw_path)}
@ -135,6 +145,14 @@ async def resume_playback():
return audio_mgr.resume_playback()
@router.post("/seek")
async def seek_playback(position_sec: float):
"""Jump to a position (seconds) in the currently-playing clip — used by the
waveform scrubber. No-op (ok=False) if nothing is playing."""
from Project.Sanad.main import audio_mgr
return audio_mgr.seek_playback(position_sec)
@router.post("/stop")
async def stop_playback():
from Project.Sanad.main import audio_mgr
@ -149,6 +167,15 @@ async def playback_status():
return audio_mgr.playback_status()
@router.post("/live-hold")
async def set_live_hold(on: bool):
"""Manual hold for the live-Gemini pause. on=True pauses the live voice and
keeps it paused (records won't resume it) until on=False is sent. Default
behaviour (on=False) is AUTO: records pause Gemini only for the clip."""
from Project.Sanad.main import audio_mgr
return {"live_hold": audio_mgr.set_live_voice_hold(on)}
class RecordRename(BaseModel):
record_name: str
new_name: str
@ -217,7 +244,12 @@ async def delete_record(payload: RecordDelete):
deleted_files = []
for fi in deleted_entry.get("files", {}).values():
try:
p = Path(fi.get("path", "")).resolve()
# _resolve_path handles new-style basenames (resolved under
# AUDIO_RECORDINGS_DIR) as well as legacy absolute paths.
# A raw Path(basename) would resolve vs CWD and fall outside
# base, so the relative_to guard would skip the unlink and the
# WAV would be orphaned on disk. Mirror play_record/rename_record.
p = _resolve_path(fi.get("path", "")).resolve()
p.relative_to(base) # never delete files outside recordings dir
except (ValueError, OSError):
continue
@ -228,3 +260,43 @@ async def delete_record(payload: RecordDelete):
index["records"] = kept
_save_index(index)
return {"ok": True, "deleted": payload.record_name, "deleted_files": deleted_files}
class RecordBulkDelete(BaseModel):
record_names: list[str] | None = None
all: bool = False
@router.post("/delete-bulk")
async def delete_bulk(payload: RecordBulkDelete):
"""Delete many records in one call. all=True wipes every record; otherwise
only those in record_names. Files are unlinked, guarded to the recordings
dir (same safety as /delete)."""
names = set(payload.record_names or [])
with _INDEX_LOCK:
index = _load_index()
base = AUDIO_RECORDINGS_DIR.resolve()
kept: list = []
removed: list = []
deleted_files = 0
for r in index.get("records", []):
if payload.all or r.get("record_name") in names:
removed.append(r.get("record_name"))
for fi in r.get("files", {}).values():
try:
p = _resolve_path(fi.get("path", "")).resolve()
p.relative_to(base) # never delete outside recordings dir
except (ValueError, OSError):
continue
if p.exists():
try:
p.unlink()
deleted_files += 1
except OSError:
pass
else:
kept.append(r)
index["records"] = kept
_save_index(index)
return {"ok": True, "deleted": removed, "deleted_count": len(removed),
"deleted_files": deleted_files}

View File

@ -2,6 +2,7 @@
from __future__ import annotations
import asyncio
from datetime import datetime
from pathlib import Path
@ -9,6 +10,7 @@ from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
from Project.Sanad.config import SCRIPTS_DIR
from Project.Sanad.core import persona as _persona
from Project.Sanad.dashboard.routes._safe_io import (
atomic_write_text, MAX_UPLOAD_BYTES,
)
@ -31,6 +33,8 @@ def _safe_path(name: str) -> Path:
@router.get("/")
async def list_scripts():
SCRIPTS_DIR.mkdir(parents=True, exist_ok=True)
active = _persona.active_persona_name()
default = _persona.default_persona_name()
items = []
for p in sorted(SCRIPTS_DIR.iterdir(), key=lambda x: x.name.lower()):
if not p.is_file():
@ -40,8 +44,48 @@ async def list_scripts():
"name": p.name,
"size_bytes": st.st_size,
"modified_at": datetime.fromtimestamp(st.st_mtime).isoformat(timespec="seconds"),
"active": p.name == active, # the persona Gemini loads now
"is_default": p.name == default, # the fallback (sanad_script.txt)
})
return {"path": str(SCRIPTS_DIR), "files": items}
return {"path": str(SCRIPTS_DIR), "files": items,
"active": active, "default": default}
class ScriptActive(BaseModel):
name: str | None = None # None / "" / the default name → revert to default
restart: bool = False # also restart the live voice so it takes effect now
@router.get("/active")
async def get_active():
"""Which persona Gemini will load, and the default it falls back to."""
return {"active": _persona.active_persona_name(),
"default": _persona.default_persona_name()}
@router.post("/active")
async def set_active(payload: ScriptActive):
"""Select the persona script Gemini uses. With restart=true, the live voice
session is bounced so the new persona takes effect immediately; otherwise it
applies on the next voice (re)connect."""
try:
active = _persona.set_active_persona(payload.name)
except FileNotFoundError:
raise HTTPException(404, f"Script not found: {payload.name}")
restarted = False
if payload.restart:
try:
from Project.Sanad.main import live_sub
if live_sub is not None and hasattr(live_sub, "start"):
if hasattr(live_sub, "is_running") and live_sub.is_running():
await asyncio.to_thread(live_sub.stop)
await asyncio.sleep(1.5)
await asyncio.to_thread(live_sub.start)
restarted = True
except Exception:
pass # selection is saved regardless of restart success
return {"ok": True, "active": active,
"default": _persona.default_persona_name(), "restarted": restarted}
class ScriptLoad(BaseModel):
@ -116,5 +160,9 @@ async def delete_script(payload: ScriptDelete):
path = _safe_path(payload.name)
if not path.exists():
raise HTTPException(404, f"Not found: {payload.name}")
if path.name == _persona.default_persona_name():
raise HTTPException(409, f"Cannot delete the default persona ({path.name}).")
path.unlink()
# If the active selection was the deleted file, resolution auto-falls-back
# to the default — no extra cleanup needed.
return {"ok": True, "deleted": payload.name}

View File

@ -5,18 +5,24 @@ from __future__ import annotations
import asyncio
import os
import platform
import shutil
import socket
import sys
from pathlib import Path
from typing import Any
from fastapi import APIRouter
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
from Project.Sanad.config import (
AUDIO_RECORDINGS_DIR,
BASE_DIR,
DASHBOARD_HOST,
DASHBOARD_INTERFACE,
DASHBOARD_PORT,
DATA_DIR,
DDS_NETWORK_INTERFACE,
LOGS_DIR,
list_network_interfaces,
)
from Project.Sanad.core.logger import get_logger
@ -26,6 +32,36 @@ log = get_logger("system_route")
router = APIRouter()
def _runtime_bind() -> tuple[str, int]:
"""The host/port the server is ACTUALLY bound to.
main.py launches `uvicorn.run(_app, host=args.host, port=args.port)` with
the CLI --host/--port (start_sanad.sh passes `--port $PORT`, default 8001),
which can differ from the import-time DASHBOARD_HOST/DASHBOARD_PORT config
defaults (port 8000). Reading the live argv reports the real URL instead of
a stale config value. Falls back to the config constants when an arg is
absent (e.g. argparse default in effect)."""
host = DASHBOARD_HOST
port = DASHBOARD_PORT
argv = sys.argv
for i, tok in enumerate(argv):
if tok == "--host" and i + 1 < len(argv):
host = argv[i + 1]
elif tok.startswith("--host="):
host = tok.split("=", 1)[1]
elif tok == "--port" and i + 1 < len(argv):
try:
port = int(argv[i + 1])
except (TypeError, ValueError):
pass
elif tok.startswith("--port="):
try:
port = int(tok.split("=", 1)[1])
except (TypeError, ValueError):
pass
return host, port
def _safe_status(component, name: str) -> dict[str, Any]:
if component is None:
return {"available": False}
@ -90,8 +126,9 @@ async def system_info():
except Exception:
interfaces = []
# Determine the URL the dashboard is reachable at
bound_host = DASHBOARD_HOST
# Determine the URL the dashboard is reachable at — use the ACTUAL
# runtime bind args (argv), not the import-time config defaults.
bound_host, bound_port = _runtime_bind()
if bound_host == "0.0.0.0":
# Try to find the wlan0 IP for display purposes
up_ifaces = [i for i in interfaces if i["is_up"] and i["ip"] and not i["ip"].startswith("127.")]
@ -112,8 +149,8 @@ async def system_info():
"interface": DASHBOARD_INTERFACE,
"bound_host": bound_host,
"display_host": display_host,
"port": DASHBOARD_PORT,
"url": f"http://{display_host}:{DASHBOARD_PORT}",
"port": bound_port,
"url": f"http://{display_host}:{bound_port}",
},
"dds": {
"interface": DDS_NETWORK_INTERFACE,
@ -131,3 +168,148 @@ async def system_info():
}
return await asyncio.to_thread(_do)
# ───────────────────── storage tracking + cleanup ─────────────────────
# Categories surfaced in the Settings → Storage panel. `cleanable` ones get a
# Clean button + are included in "Clean all"; the rest (faces/motions/zones)
# are shown for tracking only — they're operational assets (enrollments,
# motion configs) managed in their own tabs, not disposable clutter.
_STORAGE_CATS = [
("recordings", "Conversation recordings", DATA_DIR / "recordings", True),
("records", "Named records (Typed Replay)", AUDIO_RECORDINGS_DIR, True),
("logs", "Logs", LOGS_DIR, True),
("faces", "Enrolled faces", DATA_DIR / "faces", False),
("motions", "Motion replays + config", DATA_DIR / "motions", False),
("photos", "Photos", DATA_DIR / "photos", False),
("zones", "Vision zones", DATA_DIR / "zones", False),
]
_CLEANABLE = {k for k, _l, _p, c in _STORAGE_CATS if c}
def _dir_stats(path: Path) -> tuple[int, int]:
"""(total_bytes, file_count) of a dir tree. Missing dir → (0, 0)."""
total, n = 0, 0
try:
for root, _dirs, files in os.walk(path):
for f in files:
try:
total += os.path.getsize(os.path.join(root, f))
n += 1
except OSError:
pass
except Exception:
pass
return total, n
def _human(b: float) -> str:
f = float(b)
for u in ("B", "KB", "MB", "GB", "TB"):
if f < 1024 or u == "TB":
return f"{f:.0f} {u}" if u == "B" else f"{f:.1f} {u}"
f /= 1024
return f"{f:.1f} TB"
@router.get("/storage")
async def storage_usage():
"""Per-category data/log sizes + disk free, for the Storage panel."""
def _do():
cats = []
for key, label, path, cleanable in _STORAGE_CATS:
size, files = _dir_stats(Path(path))
cats.append({
"key": key, "label": label, "path": str(path),
"size_bytes": size, "size_human": _human(size),
"files": files, "cleanable": cleanable,
})
data_b, _ = _dir_stats(DATA_DIR)
logs_b, _ = _dir_stats(LOGS_DIR)
try:
du = shutil.disk_usage(str(BASE_DIR))
disk = {
"free_human": _human(du.free), "total_human": _human(du.total),
"used_pct": round(100.0 * (du.total - du.free) / du.total, 1),
}
except Exception:
disk = {}
return {
"categories": cats,
"data_bytes": data_b, "data_human": _human(data_b),
"logs_human": _human(logs_b),
"total_human": _human(data_b + logs_b),
"disk": disk,
}
return await asyncio.to_thread(_do)
class _CleanReq(BaseModel):
target: str # recordings | records | logs | all
def _clean_recordings() -> tuple[int, int]:
d = DATA_DIR / "recordings"
freed, n = 0, 0
for f in list(d.glob("*.wav")) + [d / "index.json"]:
if f.is_file():
try:
freed += f.stat().st_size
f.unlink()
n += 1
except OSError:
pass
return n, freed
def _clean_records() -> tuple[int, int]:
d = AUDIO_RECORDINGS_DIR
freed, n = 0, 0
for f in list(d.glob("*.wav")) + [d / "records.json"]:
if f.is_file():
try:
freed += f.stat().st_size
f.unlink()
n += 1
except OSError:
pass
return n, freed
def _clean_logs() -> tuple[int, int]:
# Truncate (not delete) — active loggers hold append-mode handles, so
# truncating to 0 clears content cleanly without losing the fd.
freed, n = 0, 0
for f in Path(LOGS_DIR).glob("*.log"):
try:
freed += f.stat().st_size
open(f, "w").close()
n += 1
except OSError:
pass
return n, freed
@router.post("/storage/clean")
async def storage_clean(req: _CleanReq):
"""Clean a disposable category (recordings | records | logs) or 'all'.
Recordings/records are deleted; logs are truncated. Assets (faces, motions,
zones) are never touched here."""
t = (req.target or "").strip().lower()
if t != "all" and t not in _CLEANABLE:
raise HTTPException(400, f"target must be 'all' or one of {sorted(_CLEANABLE)}")
def _do():
targets = ["recordings", "records", "logs"] if t == "all" else [t]
fns = {"recordings": _clean_recordings, "records": _clean_records,
"logs": _clean_logs}
result, total = {}, 0
for tg in targets:
n, freed = fns[tg]()
result[tg] = {"items": n, "freed_bytes": freed, "freed_human": _human(freed)}
total += freed
log.info("storage clean %s → freed %s", targets, _human(total))
return {"ok": True, "cleaned": targets,
"total_freed_bytes": total, "total_freed_human": _human(total),
"result": result}
return await asyncio.to_thread(_do)

View File

@ -65,3 +65,17 @@ async def motors_snapshot():
except Exception:
positions = []
return build_payload(temps, positions, time.time())
@router.get("/battery")
async def battery_status():
"""Live G1 battery (BMS) snapshot: state-of-charge %, voltage, current,
charge/discharge status, pack temperature, cycles. `available=False` until
the BMS topic (rt/lf/bmsstate) delivers its first message."""
arm = _get_arm()
if arm is None or not hasattr(arm, "get_battery"):
return {"available": False}
try:
return arm.get_battery()
except Exception:
return {"available": False}

View File

@ -13,6 +13,7 @@ they just record the target and feed Gemini the place's reference.
from __future__ import annotations
import asyncio
import io
from typing import Optional
@ -110,11 +111,59 @@ def _place_to_dict(p) -> dict:
def _zone_to_dict(z) -> dict:
return {
"id": z.id, "name": z.name, "description": z.description,
"linked_map": getattr(z, "linked_map", None),
"added_at": z.added_at,
"places": [_place_to_dict(p) for p in z.places],
}
async def _maybe_drive_to_place(zone, place) -> Optional[dict]:
"""If the place links a nav2 place AND its zone's map is the one currently
localized, actually DRIVE there (arbiter-gated + arm arrival monitor).
Returns the drive outcome, or None when the place isn't drivable (no link).
Best-effort: never raises into the caller."""
nav_place = getattr(place, "nav_place", None)
linked_map = getattr(zone, "linked_map", None)
if not nav_place or not linked_map:
return None
try:
from Project.Sanad.dashboard.routes import navigation as navmod
from Project.Sanad.dashboard.routes import _arbiter
except Exception:
return {"ok": False, "reason": "nav_unavailable"}
client = getattr(navmod, "_CLIENT", None)
if client is None:
return {"ok": False, "reason": "nav_unavailable"}
try:
st = await asyncio.to_thread(client.status)
body = st.as_dict() if hasattr(st, "as_dict") else dict(st)
except Exception as exc: # noqa: BLE001
return {"ok": False, "reason": "status_error", "detail": str(exc)[:120]}
if not body.get("bringup_alive"):
return {"ok": False, "reason": "no_map"}
# The robot can only drive in the currently-localized map. Require the
# zone's linked map to match (compare on the sanitized .db stem).
active = (body.get("active_map") or "").strip().lower()
want = (linked_map or "").strip().lower()
if want.endswith(".db"):
want = want[:-3]
if active and want and active != want:
return {"ok": False, "reason": "wrong_map",
"active": body.get("active_map"), "want": linked_map}
if not _arbiter.acquire_nav():
return {"ok": False, "reason": "manual_armed"}
drive = await asyncio.to_thread(client.goto, nav_place)
if isinstance(drive, dict) and not drive.get("ok", True):
_arbiter.release_nav()
return {"ok": False, "reason": "dispatch_failed", "detail": drive}
try:
from Project.Sanad.navigation.goal_monitor import arm_goal
arm_goal(nav_place)
except Exception:
pass
return {"ok": True, "resolved": nav_place}
def _nav_target_dict(st, gallery) -> Optional[dict]:
zid, pid = st.nav_target_zone_id, st.nav_target_place_id
if not zid or not pid:
@ -184,6 +233,16 @@ class FacesPayload(BaseModel):
face_ids: list[int] = []
class LinkMapPayload(BaseModel):
# nav2 map .db basename (e.g. "office.db"); None/"" unlinks.
map: Optional[str] = None
class NavPlacePayload(BaseModel):
# nav2 place name in the zone's linked map; None/"" unlinks.
nav_place: Optional[str] = None
@router.get("")
async def list_zones():
g = _require_zones()
@ -247,6 +306,7 @@ async def create_place(
name: Optional[str] = Query(default=None),
description: Optional[str] = Query(default=None),
face_ids: list[int] = Query(default=[]),
nav_place: Optional[str] = Query(default=None),
files: Optional[list[UploadFile]] = File(default=None),
):
g = _require_zones()
@ -262,11 +322,38 @@ async def create_place(
_validate_image(content, f.filename)
image_bytes.append(content)
p = g.create_place(zone_id, name=name, description=description,
face_ids=face_ids, image_bytes_list=image_bytes or None)
face_ids=face_ids, image_bytes_list=image_bytes or None,
nav_place=nav_place)
_bump_zones_version()
return {"ok": True, "place": _place_to_dict(p)}
@router.post("/{zone_id}/link_map")
async def link_zone_map(zone_id: int, payload: LinkMapPayload):
"""Bind (or unbind) the zone to a nav2 map .db. Required before its places
can link to that map's nav places / before Gemini Nav can drive in it."""
g = _require_zones()
try:
g.set_zone_map(zone_id, payload.map)
except FileNotFoundError as exc:
raise HTTPException(404, str(exc))
_bump_zones_version()
return {"ok": True, "zone": _zone_to_dict(g.get_zone(zone_id))}
@router.post("/{zone_id}/places/{place_id}/nav_link")
async def link_place_nav(zone_id: int, place_id: int, payload: NavPlacePayload):
"""Link (or unlink) a place to a nav2 place name in the zone's map — this is
what makes the place drivable from voice / 'Go here'."""
g = _require_zones()
try:
g.set_place_nav(zone_id, place_id, payload.nav_place)
except FileNotFoundError as exc:
raise HTTPException(404, str(exc))
_bump_zones_version()
return {"ok": True, "place": _place_to_dict(g.get_place(zone_id, place_id))}
@router.post("/{zone_id}/places/{place_id}/rename")
async def rename_place(zone_id: int, place_id: int, payload: NamePayload):
g = _require_zones()
@ -398,9 +485,13 @@ async def download_place_zip(zone_id: int, place_id: int):
@router.post("/{zone_id}/places/{place_id}/go")
async def go_to_place(zone_id: int, place_id: int):
"""Set this place as the active destination. Records the target and lets
the Gemini child pick it up (reference photo + goal). Actual robot motion
is wired by N2 locomotion until then this just establishes the goal."""
"""Set this place as the active destination AND, if the place links a nav2
place in this zone's (currently-localized) map, actually drive there.
Two effects: (1) records nav_target so the Gemini child primes on the
reference photo + announces the destination; (2) if drivable, dispatches a
Nav2 goal (arbiter-gated, with arrival monitoring). A place with no nav link
is announce-only, as before."""
g = _require_zones()
p = g.get_place(zone_id, place_id)
if p is None:
@ -410,8 +501,12 @@ async def go_to_place(zone_id: int, place_id: int):
nav_target_place_id=place_id)
log.info("nav target set → zone_%d/place_%d (%s)", zone_id, place_id,
p.name or "(unnamed)")
return {"ok": True, "nav_target": {"zone_id": zone_id, "place_id": place_id,
"place_name": p.name}}
zone = g.get_zone(zone_id)
drive = await _maybe_drive_to_place(zone, p)
return {"ok": True,
"nav_target": {"zone_id": zone_id, "place_id": place_id,
"place_name": p.name},
"drive": drive}
@router.post("/nav/clear")
@ -419,3 +514,84 @@ async def clear_nav_target():
recognition_state.mutate(STATE_PATH, nav_target_zone_id=0, nav_target_place_id=0)
log.info("nav target cleared")
return {"ok": True, "nav_target": None}
def _resolve_map_path(client, linked_map: str) -> Optional[str]:
"""Find the .db path for a zone's linked map name via the nav client."""
want = (linked_map or "").strip().lower()
want_stem = want[:-3] if want.endswith(".db") else want
try:
maps = client.list_maps() or []
except Exception:
return None
for m in maps:
nm = (m.get("name") or "").strip().lower()
if nm == want or (nm[:-3] if nm.endswith(".db") else nm) == want_stem:
return m.get("path")
return None
@router.post("/{zone_id}/gemini_nav/start")
async def gemini_nav_start(zone_id: int):
"""Enter 'Gemini Nav' for a zone: localize the zone's map, turn on camera +
face + zone recognition + movement, ensure the Gemini session is live, and
greet the user so they can converse to navigate.
The robot only ever runs ONE map; this loads the zone's map in localize-only
mode (so it cannot fresh-map while driving), exactly as the user requires.
"""
g = _require_zones()
zone = g.get_zone(zone_id)
if zone is None:
raise HTTPException(404, f"zone_{zone_id} not found")
linked_map = getattr(zone, "linked_map", None)
if not linked_map:
raise HTTPException(400, "This zone has no linked nav2 map — link one first.")
# 1) Localize the zone's map (single bringup, mode 3 — no fresh mapping).
loaded: dict = {"ok": False, "reason": "nav_unavailable"}
try:
from Project.Sanad.dashboard.routes import navigation as navmod
client = getattr(navmod, "_CLIENT", None)
if client is not None:
db_path = await asyncio.to_thread(_resolve_map_path, client, linked_map)
if db_path:
loaded = await asyncio.to_thread(client.load_map, db_path)
else:
loaded = {"ok": False, "reason": "map_not_found", "map": linked_map}
except Exception as exc: # noqa: BLE001
loaded = {"ok": False, "reason": "load_error", "detail": str(exc)[:160]}
# 2) Camera + face + zone recognition + movement ON for the session.
recognition_state.mutate(STATE_PATH,
vision_enabled=True, face_rec_enabled=True,
zone_rec_enabled=True, movement_enabled=True)
_bump_zones_version()
# 3) Ensure the Gemini session is live, then greet (zone + drivable places).
session_started = False
try:
from Project.Sanad.main import live_sub
if live_sub is not None:
if hasattr(live_sub, "is_running") and not live_sub.is_running():
await asyncio.to_thread(live_sub.start)
session_started = True
drivable = [p.name or p.nav_place for p in zone.places
if getattr(p, "nav_place", None)]
zname = zone.name or f"zone {zone_id}"
if drivable:
placelist = ", ".join(str(x) for x in drivable)
greet = (f"You are now in the '{zname}' zone. You can drive the "
f"user to: {placelist}. Greet the user warmly in your "
f"normal Khaleeji style and ask where they would like to go.")
else:
greet = (f"You are now in the '{zname}' zone, but no drivable "
f"places are linked to its map yet. Greet the user and "
f"say places still need to be linked before you can drive.")
if hasattr(live_sub, "send_state"):
live_sub.send_state("nav_zone", greet)
except Exception as exc: # noqa: BLE001
log.warning("gemini_nav greet failed: %s", exc)
return {"ok": True, "zone_id": zone_id, "zone": _zone_to_dict(zone),
"loaded": loaded, "session_started": session_started}

File diff suppressed because it is too large Load Diff

View File

@ -19,20 +19,38 @@ MAX_WATCHERS = 50
# Ring buffer of recent log lines (shared across connections).
_recent: deque[str] = deque(maxlen=500)
_watchers: set[asyncio.Queue] = set()
# Each watcher is an (event_loop, queue) pair. We keep the loop so cross-thread
# producers can schedule the enqueue on the consumer's loop (asyncio.Queue is
# NOT thread-safe — calling put_nowait off-loop neither wakes the parked
# `await queue.get()` nor safely mutates the queue's internals).
_watchers: set[tuple[asyncio.AbstractEventLoop, asyncio.Queue]] = set()
_watchers_lock = threading.Lock()
def push_log_line(line: str):
"""Called from the logging system to feed new lines.
May be called from any thread (logging is multi-threaded), so we
snapshot the watchers under a lock before iterating.
May be called from ANY thread (logging is multi-threaded), so the append
to _recent and the per-watcher enqueue are done together under the same
lock that log_ws holds while snapshotting history + registering that
closes the history/live overlap window so a connecting client can't see a
line both in its history replay and again live. The enqueue itself is
marshalled onto each watcher's loop via call_soon_threadsafe because
asyncio.Queue.put_nowait is not safe to call from a foreign thread.
"""
_recent.append(line)
with _watchers_lock:
_recent.append(line)
snapshot = list(_watchers)
for q in snapshot:
for loop, q in snapshot:
try:
loop.call_soon_threadsafe(_safe_put, q, line)
except RuntimeError:
# Loop already closed — watcher is going away; skip it.
pass
def _safe_put(q: asyncio.Queue, line: str) -> None:
"""Enqueue on the consumer's own loop thread (so it's safe)."""
try:
q.put_nowait(line)
except asyncio.QueueFull:
@ -52,16 +70,22 @@ except Exception:
async def log_ws(ws: WebSocket):
await ws.accept()
loop = asyncio.get_running_loop()
queue: asyncio.Queue[str] = asyncio.Queue(maxsize=200)
watcher = (loop, queue)
with _watchers_lock:
if len(_watchers) >= MAX_WATCHERS:
await ws.close(code=1013, reason="Too many log watchers")
return
queue: asyncio.Queue[str] = asyncio.Queue(maxsize=200)
_watchers.add(queue)
# Register the live queue and snapshot history under the SAME lock that
# push_log_line holds — so every line is either in this history
# snapshot or arrives on the queue, never both (no replay duplicates).
_watchers.add(watcher)
history = list(_recent)
try:
# Send recent history
for line in list(_recent):
for line in history:
await ws.send_text(line)
while True:
@ -77,4 +101,4 @@ async def log_ws(ws: WebSocket):
pass
finally:
with _watchers_lock:
_watchers.discard(queue)
_watchers.discard(watcher)

0
vendor/Sanad/face/__init__.py vendored Normal file
View File

208
vendor/Sanad/face/emotion_frames.py vendored Normal file
View File

@ -0,0 +1,208 @@
"""Extra emotion frames for the LED mask, in the same 46x58 display space + RGB
style as :mod:`colorface` (black bg, cyan eyes, red mouth). These are the
expression frames Gemini can trigger via ``set_expression`` that the base
``colorface.default_frames`` does not draw (heart, laugh, love-eyes, cool,
sleepy, confused, kiss, star-struck).
``emotion_frames(...)`` returns ``{name: raw_bytes}`` ready for the mask's DIY
image upload, exactly like ``colorface.default_frames``. Positions mirror
``colorface.build_face`` so the eyes/mouth line up with the rest of the set.
"""
from __future__ import annotations
import math
import colorface as _cf
from colorface import DISPLAY_W as W, DISPLAY_H as H, encode
from PIL import Image, ImageDraw
# eye/mouth geometry copied from colorface.build_face so frames are consistent
_EYE_L = W // 2 - 10 # 13
_EYE_R = W // 2 + 10 # 33
_EYE_T, _EYE_B = 15, 29 # normal eye top/bottom
_EYE_W = 6
_MOUTH_CY = 44
_MOUTH_CX = W // 2 # 23
def _canvas():
img = Image.new("RGB", (W, H), (0, 0, 0))
return img, ImageDraw.Draw(img)
def _round_eye(g, cx, eye_color, sclera_color, *, t=_EYE_T, b=_EYE_B, w=_EYE_W):
g.ellipse([cx - w, t, cx + w, b], fill=sclera_color)
g.ellipse([cx - w + 3, t + 4, cx + w - 3, b - 2], fill=eye_color)
m = (t + b) // 2
g.ellipse([cx - 1, m - 1, cx + 1, m + 2], fill=(0, 0, 0))
def _happy_eye(g, cx, color):
# upward "^"-ish squint (a smiling eye)
g.arc([cx - 7, _EYE_T - 1, cx + 7, _EYE_B + 5], start=200, end=340,
fill=color, width=3)
def _heart(g, cx, cy, half, color):
"""A filled heart centred at (cx, cy), ``half`` = half-width."""
r = half / 2.0
g.pieslice([cx - half, cy - r, cx, cy + r], 0, 360, fill=color) # left lobe
g.pieslice([cx, cy - r, cx + half, cy + r], 0, 360, fill=color) # right lobe
g.polygon([(cx - half, cy + r * 0.2), (cx + half, cy + r * 0.2),
(cx, cy + half)], fill=color)
def _star(g, cx, cy, r, color):
pts = []
for i in range(10):
ang = -math.pi / 2 + i * math.pi / 5
rad = r if i % 2 == 0 else r * 0.45
pts.append((cx + rad * math.cos(ang), cy + rad * math.sin(ang)))
g.polygon(pts, fill=color)
def _smile(g, color, *, big=False):
if big: # open grin
g.chord([_MOUTH_CX - 13, _MOUTH_CY - 6, _MOUTH_CX + 13, _MOUTH_CY + 12],
start=0, end=180, fill=color)
else:
g.arc([_MOUTH_CX - 12, _MOUTH_CY - 8, _MOUTH_CX + 12, _MOUTH_CY + 8],
start=20, end=160, fill=color, width=4)
# Fixed emoji colors — these frames are icons, not part of the face's colour
# scheme, so a heart is always red and a thumb always yellow regardless of the
# user's chosen eye/mouth colours.
_RED = (255, 45, 75)
_PINK = (255, 95, 155)
_YELLOW = (255, 200, 40)
# -- individual emotion drawings ---------------------------------------------
def _heart_face(eye, mouth, sclera):
img, g = _canvas()
_heart(g, W // 2, 26, 18, _RED) # one big RED heart fills the face
return img
def _laugh(eye, mouth, sclera):
img, g = _canvas()
_happy_eye(g, _EYE_L, eye)
_happy_eye(g, _EYE_R, eye)
_smile(g, mouth, big=True) # wide open grin
# a joy tear under each eye
for cx in (_EYE_L, _EYE_R):
g.ellipse([cx - 2, _EYE_B + 3, cx + 2, _EYE_B + 9], fill=(0, 180, 255))
return img
def _love(eye, mouth, sclera):
img, g = _canvas()
_heart(g, _EYE_L, 22, 8, _PINK) # pink heart-shaped eyes
_heart(g, _EYE_R, 22, 8, _PINK)
_smile(g, _PINK)
return img
def _cool(eye, mouth, sclera):
img, g = _canvas()
frame = (40, 40, 55)
lens = (10, 10, 20)
# two lenses + bridge (sunglasses)
for cx in (_EYE_L, _EYE_R):
g.rounded_rectangle([cx - 8, _EYE_T, cx + 8, _EYE_B + 1], radius=4,
fill=lens, outline=frame, width=2)
g.line([cx - 5, _EYE_T + 3, cx + 2, _EYE_T + 3], fill=eye, width=2) # glint
g.line([_EYE_L + 8, _EYE_T + 3, _EYE_R - 8, _EYE_T + 3], fill=frame, width=3)
# a cool little smirk (raised on one side)
g.arc([_MOUTH_CX - 11, _MOUTH_CY - 5, _MOUTH_CX + 12, _MOUTH_CY + 8],
start=15, end=120, fill=mouth, width=4)
return img
def _sleepy(eye, mouth, sclera):
img, g = _canvas()
# droopy half-closed eyes: lid arc over a thin slit
for cx in (_EYE_L, _EYE_R):
g.arc([cx - 7, _EYE_T + 2, cx + 7, _EYE_B + 4], start=160, end=20,
fill=eye, width=3)
# small tired mouth
g.ellipse([_MOUTH_CX - 4, _MOUTH_CY - 2, _MOUTH_CX + 4, _MOUTH_CY + 4], fill=mouth)
# zZ drawn as cheap line-glyphs (no font dependency)
for (x, y, s) in ((36, 8, 5), (41, 3, 3)):
g.line([x, y, x + s, y], fill=eye, width=1)
g.line([x + s, y, x, y + s], fill=eye, width=1)
g.line([x, y + s, x + s, y + s], fill=eye, width=1)
return img
def _confused(eye, mouth, sclera):
img, g = _canvas()
_round_eye(g, _EYE_L, eye, sclera) # normal eye
_round_eye(g, _EYE_R, eye, sclera, t=_EYE_T - 3, b=_EYE_B - 3, w=5) # raised/small
# raised brow over the small eye
g.line([_EYE_R - 6, _EYE_T - 6, _EYE_R + 6, _EYE_T - 9], fill=eye, width=2)
# wavy/squiggle mouth
pts = [(_MOUTH_CX - 12 + i * 4, _MOUTH_CY + (3 if i % 2 else -3)) for i in range(7)]
g.line(pts, fill=mouth, width=3, joint="curve")
return img
def _kiss(eye, mouth, sclera):
img, g = _canvas()
_round_eye(g, _EYE_L, eye, sclera)
g.line([_EYE_R - 6, (_EYE_T + _EYE_B) // 2, _EYE_R + 6, (_EYE_T + _EYE_B) // 2],
fill=eye, width=3) # winking eye
# puckered red lips
g.ellipse([_MOUTH_CX - 4, _MOUTH_CY - 4, _MOUTH_CX + 4, _MOUTH_CY + 5], fill=_RED)
g.ellipse([_MOUTH_CX - 2, _MOUTH_CY - 2, _MOUTH_CX + 2, _MOUTH_CY + 3], fill=(0, 0, 0))
_heart(g, 37, 13, 6, _RED) # little floating red heart
return img
def _star_struck(eye, mouth, sclera):
img, g = _canvas()
_star(g, _EYE_L, 22, 7, (255, 220, 0))
_star(g, _EYE_R, 22, 7, (255, 220, 0))
_smile(g, mouth, big=True)
return img
def _thumbs_up(eye, mouth, sclera):
# a 👍: one bold vertical thumb + a bold fist block, kept simple so it reads
# on the low-res LED grid (fine detail just blurs into a blob).
img, g = _canvas()
g.rounded_rectangle([11, 30, 37, 52], radius=8, fill=_YELLOW) # fist block
g.rounded_rectangle([13, 6, 29, 34], radius=8, fill=_YELLOW) # big thumb up
g.line([30, 34, 36, 34], fill=(0, 0, 0), width=3) # thumb/finger split
return img
_BUILDERS = {
"heart": _heart_face,
"laugh": _laugh,
"love": _love,
"cool": _cool,
"sleepy": _sleepy,
"confused": _confused,
"kiss": _kiss,
"star_struck": _star_struck,
"thumbs_up": _thumbs_up,
}
def emotion_frames(*, eye_color=_cf.DEFAULT_EYE, mouth_color=_cf.DEFAULT_MOUTH,
sclera_color=_cf.WHITE, include=None) -> dict:
"""Return ``{name: raw_bytes}`` for the extra emotion frames.
``include`` optionally restricts to a subset (a set/list of names) so the
caller can honour the mask's slot budget.
"""
names = list(_BUILDERS) if include is None else [n for n in _BUILDERS if n in include]
out = {}
for name in names:
img = _BUILDERS[name](eye_color, mouth_color, sclera_color)
out[name] = encode(img)
return out

599
vendor/Sanad/face/face_motion.py vendored Normal file
View File

@ -0,0 +1,599 @@
#!/usr/bin/env python3
"""Lifelike face motion for the Shining LED mask — SanadV3.
A richer, more *organic* driver than the Mask lib's built-in idle. Instead of an
occasional blink/glance, it gives the robot's face the small, constant motion a
real face has:
* natural blinking varied intervals, occasional quick double-blinks
* frequent small eye saccades (darts) with short gaze holds and drift
* idle micro-expressions (a brief smile now and then)
* state-aware behaviour:
idle relaxed, wanders, blinks
listening attentive, eyes mostly forward, fewer darts, soft blinks
thinking looks away (longer gaze holds), slower blinks
speaking mouth lip-syncs to audio + the odd mid-sentence blink
* quick reactions: surprised / happy(smile) / sad, held briefly then released
It drives the mask by PLAY of the pre-uploaded DIY frames (no per-frame upload),
so motion is smooth. Lip-sync composes with the eye motion via feed_audio_level().
Run it standalone (keep the mask within ~30 cm for the one-time frame upload):
python3 face/face_motion.py # connect, load frames, stay alive
python3 face/face_motion.py --demo # cycle the states to show the range
python3 face/face_motion.py --reload # force re-upload of the frame set
Integrate into Sanad: construct ``LifelikeFace(mask=<connected ShiningMask>)``
(or let it connect itself), ``await face.start()``, then drive it from the event
bus / Gemini lip-sync markers:
face.set_listening() # when the user starts speaking
face.set_thinking() # while a tool/response is being prepared
face.set_speaking(True/False) # around a spoken reply
face.feed_audio_level(rms_0_to_1) # per audio chunk -> real lip-sync
face.react("surprised" | "smile" | "sad")
face.set_idle() # back to relaxed wandering
"""
from __future__ import annotations
import argparse
import asyncio
import os
import random
import sys
import time
import logging
import threading
from pathlib import Path
log = logging.getLogger("sanad.face_motion")
# Frames present in the mask's DIY slots (colorface.default_frames()).
GAZE = ("neutral", "look_left", "look_right")
MOUTH = ("neutral", "talk1", "talk2", "talk3")
# How long after the last lip-sync marker the face stays "speaking" (mouth
# follows the amplitude; pauses close it). When markers stop for this long the
# turn ends and the eyes return to their underlying state.
_SPEECH_WINDOW = 0.6
# Mouth-frame cadence while speaking. Each frame is a FULL-face DIY slot switch;
# this small mask can't cleanly repaint the LED matrix faster than ~5/s, so the
# old 0.09s (~11/s) cadence overran it and showed torn/scrambled composites of
# several frames at once. Cap it well under the tear threshold — speech visemes
# only change ~4-7/s anyway, so lip-sync still reads fine.
_SPEAK_FRAME_SEC = 0.22
# BLE-link health. If the mask link drops mid-session every play_diy raises a
# BleakError, which left the loop busy-spinning ~20x/s forever (no recovery but a
# manual disconnect/connect). Treat a run of consecutive play failures (or
# mask.is_connected going False) as a dropped link and attempt a *bounded*
# reconnect; if that is exhausted, stop the loop so the face goes idle/unavailable
# instead of hammering a dead transport.
_PLAY_FAIL_LIMIT = 10 # consecutive failed plays before we call it a drop
_RECONNECT_ATTEMPTS = 3 # reconnect tries per detected drop
_RECONNECT_BACKOFF = 2.0 # seconds between reconnect tries
# BLE link keepalive. _play() skips re-sending an *unchanged* frame, so a long
# neutral/idle stretch writes nothing but the occasional blink (every 2-4.5s).
# If that quiet gap (plus an RF glitch) outlasts the link's supervision timeout
# the mask drops — and every reconnect briefly flashes the mask's own built-in
# face. Re-send the current frame at least this often so the link never goes
# quiet long enough to be dropped. One tiny write/sec when idle; free when busy.
_KEEPALIVE_SEC = 1.0
# Frames that carry the animated face's mouth (gaze + lip-sync). "Hide mouth"
# blacks out the mouth region on just these, leaving eyes-only — the emotion
# icons (heart/thumb/…) are left alone.
_MOUTH_FRAMES = ("neutral", "talk1", "talk2", "talk3", "blink",
"look_left", "look_right")
_MOUTH_MASK_TOP = 32 # display-y below which the mouth lives (eyes end ~29)
def _mask_mouth_bytes(data: bytes) -> bytes:
"""Return a copy of an encoded 46x58 frame with the mouth region blacked out
(decode the transposed bytes -> mask display rows >= _MOUTH_MASK_TOP -> re-encode)."""
import colorface as _cf
from PIL import Image as _Image, ImageDraw as _ImageDraw
img = _Image.frombytes("RGB", (_cf.DISPLAY_H, _cf.DISPLAY_W), bytes(data))
img = img.transpose(_Image.Transpose.TRANSPOSE) # -> 46x58 display space
_ImageDraw.Draw(img).rectangle([0, _MOUTH_MASK_TOP, _cf.DISPLAY_W, _cf.DISPLAY_H],
fill=(0, 0, 0))
return _cf.encode(img)
def _add_mask_to_path() -> str:
"""Put the flat Mask library (mask.py / faceanim.py / colorface.py) on sys.path."""
d = os.environ.get("SANAD_MASK_DIR") or str(Path(__file__).resolve().parents[2] / "Mask")
if d and d not in sys.path:
sys.path.insert(0, d)
return d
class LifelikeFace:
"""Organic, state-aware motion driver for the LED mask."""
def __init__(self, mask=None, *, name_prefix="MASK", address=None, adapter=None,
brightness=95, frames=None, eye_color=None, mouth_color=None,
sclera_color=None, auto_reconnect=True, hide_mouth=False):
_add_mask_to_path()
import mask as _mask # flat Mask lib
import faceanim as _faceanim
import colorface as _colorface
self._ShiningMask = _mask.ShiningMask
self._FaceAnimator = _faceanim.FaceAnimator
self._colorface = _colorface
self.mask = mask
self._own_mask = mask is None
self.name_prefix = name_prefix
self.address = address
self.adapter = adapter
self.brightness = int(brightness)
# When False, a dropped link is NOT self-healed here — the loop bails
# cleanly (and forces the transport disconnected) so an external owner
# (FaceController's reconnect supervisor) brings the link + face back.
self._auto_reconnect = bool(auto_reconnect)
# Frame colors: explicit frames win; else build the default set tinted
# with whatever colors were given (None -> the lib defaults cyan/red).
if frames is None:
ck = {}
if eye_color is not None:
ck["eye_color"] = tuple(eye_color)
if mouth_color is not None:
ck["mouth_color"] = tuple(mouth_color)
if sclera_color is not None:
ck["sclera_color"] = tuple(sclera_color)
frames = _colorface.default_frames(**ck)
# Extra Gemini-triggerable emotions (heart, laugh, love-eyes, cool,
# sleepy, confused, kiss, star_struck) in the same style. Appended
# after the base set so slot ids 1..N stay stable for existing
# frames. Guarded: a missing module never breaks the face.
try:
from Project.Sanad.face.emotion_frames import emotion_frames as _emo
# 7 emotions so slots 1..19 hold the face set and slot 20 stays
# free as a scratch slot for QR/social images (mask caps at 20).
frames = {**frames, **_emo(**ck, include={
"heart", "laugh", "love", "cool", "confused", "kiss", "thumbs_up"})}
except Exception:
log.exception("emotion frames unavailable — base frames only")
self.frames = frames
# Reserved DIY slot (just past the animated frames) for on-demand images
# (QR / social) shown via the FaceController's show_scratch_image().
self.scratch_slot = len(self.frames) + 1
# Mouth show/hide: keep the unmasked originals so a live toggle can
# re-upload just the gaze/talk slots masked or normal.
self._base_frames = dict(self.frames)
self.hide_mouth = bool(hide_mouth)
if self.hide_mouth:
self.frames = {n: (_mask_mouth_bytes(d) if n in _MOUTH_FRAMES else d)
for n, d in self.frames.items()}
def mouth_frames_for(self, hidden: bool) -> dict:
"""{name: bytes} for the gaze/talk frames, masked (hidden) or normal — the
FaceController re-uploads just these slots to toggle the mouth live."""
return {n: (_mask_mouth_bytes(self._base_frames[n]) if hidden
else self._base_frames[n])
for n in _MOUTH_FRAMES if n in self._base_frames}
self.slots: dict = {}
self._state = "idle" # underlying eye state: idle|listening|thinking
self._speaking = False # explicit speaking turn (set_speaking)
self._level = 0.0 # live lip-sync amplitude 0..1
self._last_mouth_t = 0.0 # last set_mouth/feed_audio_level time
self._react = None
self._react_until = 0.0
self._cur = None
self._task = None
self._stop = False
self._play_fails = 0 # consecutive play_diy failures (link-drop signal)
self._last_write = 0.0 # monotonic of the last successful play_diy (keepalive)
self._paused = False # loop stops writing (used during a scratch upload)
self._paused_ack = threading.Event() # set once the loop has actually parked
# -- lifecycle ------------------------------------------------------------
async def start(self, *, reload: bool = False):
if self.mask is None:
self.mask = self._ShiningMask(
address=self.address, name_prefix=self.name_prefix, adapter=self.adapter)
await self.mask.connect(timeout=20.0, attempts=12)
await self.mask.set_brightness(self.brightness)
# Upload the frame set via the RELIABLE (acked) image path — see
# _upload_frames. We no longer borrow FaceAnimator.load(), whose
# fire-and-forget upload silently corrupts slots on a marginal link (a
# dropped packet -> garbage frame, no exception -> no retry).
await self._upload_frames(force=reload)
self._stop = False
await self._play("neutral")
self._task = asyncio.create_task(self._loop())
return self
async def stop(self):
self._stop = True
if self._task:
self._task.cancel()
try:
await self._task
except asyncio.CancelledError:
pass
self._task = None
if self._own_mask and self.mask is not None:
try:
await self.mask.disconnect()
except Exception:
pass
async def __aenter__(self):
return await self.start()
async def __aexit__(self, *exc):
await self.stop()
# -- reliable frame upload ------------------------------------------------
async def _upload_frames(self, *, force: bool):
"""Upload the frame set to the mask's DIY slots, RELIABLY.
The mask's default DIY upload is fire-and-forget: ~80 write-without-
response packets per frame with no ack, so a packet dropped on a
marginal BLE link silently corrupts that slot (no exception -> no retry)
and the frame renders as garbage. We instead drive each frame through
the mask's ACKED image path (upload_image -> _upload(kind=IMAGE),
per-packet REOK), retrying the whole frame on any failure (a fresh DATS
resets the half-written slot). Same name->slot (1..N) map as before.
"""
names = list(self.frames)
self.slots = {name: i + 1 for i, name in enumerate(names)}
if not force:
try:
count = await self.mask.get_diy_count(timeout=4.0) or 0
except Exception:
count = 0
if count >= len(names):
return # frames already stored (persist in flash)
await self.mask.clear_diy()
acked = True
for i, (name, data) in enumerate(self.frames.items(), start=1):
acked = await self._upload_one_frame(i, bytes(data), acked)
await asyncio.sleep(0.2)
async def _upload_one_frame(self, slot: int, data: bytes, acked: bool) -> bool:
"""Upload one frame. Prefer the acked image path; on failure reconnect +
retry the whole frame. If frame 1 proves this mask clone never acks
IMAGE uploads, latch off the acked path and use paced fire-and-forget
for the rest. Returns whether to keep using the acked path."""
if acked:
for attempt in range(5):
try:
await self.mask.upload_image(data, slot, timeout=8.0)
return True
except Exception as exc:
# frame 1 failing its first two acked tries => this clone
# doesn't ack IMAGE uploads; stop trying it.
if slot == 1 and attempt >= 1:
log.warning("mask: IMAGE uploads not acked by this clone "
"(%s) -- using paced fire-and-forget", exc)
break
if attempt == 4:
log.warning("mask: acked upload of slot %d exhausted (%s) "
"-- fire-and-forget fallback", slot, exc)
break
await self._reupload_reconnect()
# fallback: paced fire-and-forget (probabilistic -- keep the mask close)
await self.mask.upload_raw_image(data, index=slot,
chunk_delay=0.10, init_delay=0.30)
return False
async def _reupload_reconnect(self):
"""Drop + re-establish the link mid-upload so the next frame attempt
starts clean (a fresh DATS resets any half-written slot)."""
try:
await self.mask.disconnect()
except Exception:
pass
await asyncio.sleep(1.0)
try:
await self.mask.connect(timeout=15.0, attempts=8)
await self.mask.set_brightness(self.brightness)
except Exception:
pass
# -- control --------------------------------------------------------------
def set_state(self, state: str):
self._state = state if state in ("idle", "listening", "thinking", "speaking") else "idle"
def set_idle(self):
self._speaking = False
self._last_mouth_t = 0.0 # leave any speaking overlay immediately
self.set_state("idle")
def set_listening(self):
self._speaking = False
self._last_mouth_t = 0.0
self.set_state("listening")
def set_thinking(self):
self._speaking = False
self._last_mouth_t = 0.0
self.set_state("thinking")
def set_speaking(self, on: bool):
"""Mark a speaking turn. Without lip-sync markers the mouth auto-talks;
with them it follows the amplitude. The underlying eye state is kept, so
it returns there when the turn ends."""
self._speaking = bool(on)
if not on:
self._last_mouth_t = 0.0
self._level = 0.0
def feed_audio_level(self, level: float):
"""Per-audio-chunk amplitude 0..1 -> real lip-sync (mouth opens by loudness).
Keeps the face 'speaking' for a short window after the last call, so
pauses close the mouth and the turn ends cleanly when markers stop
without depending on an explicit speaking on/off signal."""
self._level = max(self._level * 0.4, min(1.0, float(level))) # fast attack
self._last_mouth_t = time.monotonic()
def react(self, emotion: str, hold: float = 1.4):
"""Briefly hold an expression (surprised / smile / sad / …) then release."""
if emotion in self.slots:
self._react = emotion
self._react_until = time.monotonic() + float(hold)
# -- FaceController-compatible API (so it can drop in for FaceAnimator) ----
def set_mouth(self, level: int):
"""Discrete mouth level 0..3 (e.g. from the Gemini [[MOUTH:n]] relay) ->
drives lip-sync. Maps the level to a representative amplitude; level 0
decays the mouth shut but keeps the short speaking window alive."""
amp = (0.0, 0.12, 0.24, 0.5)[max(0, min(3, int(level)))]
self._level = max(self._level * 0.4, amp)
self._last_mouth_t = time.monotonic()
def set_expression(self, name):
"""Hold an expression frame until cleared with None (vs the timed react)."""
if name and name in self.slots:
self._react = name
self._react_until = float("inf")
elif self._react_until == float("inf"):
self._react = None
def pause(self):
"""Stop the loop from writing to the mask (so a concurrent scratch-slot
upload's per-packet acks aren't disturbed by play_diy traffic)."""
self._paused_ack.clear()
self._paused = True
def wait_paused(self, timeout: float = 2.0) -> bool:
"""Block until the loop has actually reached the paused branch (so no
play_diy is in flight when the caller starts the scratch upload)."""
return self._paused_ack.wait(timeout)
def resume(self):
self._paused = False
self._paused_ack.clear()
self._cur = None # force a redraw when the loop takes over again
async def show(self, name: str):
"""One-off: briefly show a named frame (used by FaceController.show_expression)."""
if name in self.slots:
self.react(name, hold=1.5)
# -- internals ------------------------------------------------------------
@staticmethod
def _mouth_for(level: float) -> str:
i = 0 if level < 0.06 else 1 if level < 0.16 else 2 if level < 0.32 else 3
return MOUTH[i]
async def _play(self, name: str, *, force: bool = False):
slot = self.slots.get(name)
if slot is None or (name == self._cur and not force):
return
try:
await self.mask.play_diy(slot)
self._cur = name
self._last_write = time.monotonic() # keepalive clock: link saw traffic
self._play_fails = 0 # link is alive again
except Exception:
self._cur = None # retry next tick on a transient BLE error
self._play_fails += 1 # ...but count it: a sustained run == a drop
def _link_dead(self) -> bool:
"""True once the BLE link looks gone: the transport reports disconnected,
or play_diy has failed a sustained run in a row (a single glitch is still
treated as transient and retried)."""
connected = bool(getattr(self.mask, "is_connected", False)) if self.mask else False
return (not connected) or self._play_fails >= _PLAY_FAIL_LIMIT
async def _reconnect(self) -> bool:
"""Bounded reconnect after a detected drop. Frames persist on the mask's
flash, so on success we only re-pin brightness + redraw the current frame
(no re-upload). Returns True if the link is back, False if exhausted."""
for i in range(_RECONNECT_ATTEMPTS):
if self._stop:
return False
try:
if getattr(self.mask, "is_connected", False):
await self.mask.disconnect() # clean any half-open client first
except Exception:
pass
try:
await self.mask.connect(timeout=10.0, attempts=2)
await self.mask.set_brightness(self.brightness)
self._play_fails = 0
self._cur = None # force a redraw on the fresh link
await self._play("neutral")
return True
except Exception:
await asyncio.sleep(_RECONNECT_BACKOFF)
return False
async def _blink(self, restore: str):
await self._play("blink")
await asyncio.sleep(random.uniform(0.08, 0.13))
if random.random() < 0.18: # occasional quick double-blink
await self._play(restore)
await asyncio.sleep(random.uniform(0.07, 0.11))
await self._play("blink")
await asyncio.sleep(random.uniform(0.08, 0.12))
await self._play(restore)
async def _loop(self):
mono = time.monotonic
t_blink = mono() + random.uniform(1.5, 4.0)
t_sacc = mono() + random.uniform(0.6, 1.6)
t_micro = mono() + random.uniform(12.0, 25.0)
gaze = "neutral"
while not self._stop:
t = mono()
# BLE link health: if it dropped, try a bounded reconnect instead of
# busy-spinning play_diy on a dead transport. If reconnect is
# exhausted, leave the loop so the face stops (the controller's
# status() then reports it not running) rather than spinning forever.
if self._link_dead():
if not self._auto_reconnect:
# Owner-managed recovery (FaceController supervisor): make the
# transport report disconnected so the supervisor's is_connected
# check fires, then leave the loop. The supervisor reconnects
# and rebuilds the face (frames persist on the mask's flash).
try:
if getattr(self.mask, "is_connected", False):
await self.mask.disconnect()
except Exception:
pass
break
if not await self._reconnect():
break
t = mono() # reconnect can take a while
# Paused (during a scratch-slot upload): write nothing so the upload's
# per-packet REOK acks aren't disturbed by play_diy traffic. Signal
# that we've actually parked so the caller can start the upload.
if self._paused:
self._paused_ack.set()
await asyncio.sleep(0.1)
continue
# BLE keepalive: re-send the current frame if the link has gone quiet.
# _play() skips unchanged frames, so a long neutral idle stretch writes
# nothing but blinks; a quiet gap past the supervision timeout drops the
# link, and each reconnect flashes the mask's built-in face. A cheap
# periodic re-send keeps the link alive (no-op while speaking — that
# path already writes ~11x/s, so _last_write stays fresh).
if self._cur is not None and (t - self._last_write) >= _KEEPALIVE_SEC:
await self._play(self._cur, force=True)
# transient reaction overrides everything briefly
if self._react is not None:
if t < self._react_until:
await self._play(self._react)
await asyncio.sleep(0.06)
continue
self._react = None
self._cur = None # force a redraw of whatever's underneath
# "speaking" = an explicit turn OR fresh lip-sync markers (the latter
# window auto-expires, so the mouth closes and the turn ends when the
# markers stop, without needing a reliable speaking-off signal).
lipsync_active = (t - self._last_mouth_t) < _SPEECH_WINDOW
if self._speaking or lipsync_active:
if lipsync_active:
base = self._mouth_for(self._level) # 0 = closed on pauses
self._level *= 0.55 # decay toward closed
else:
base = MOUTH[random.choice([0, 1, 1, 2, 2, 3, 3, 2, 1, 0])] # auto-talk
await self._play(base)
# No mid-speech blink: a blink is a 2-3 frame burst that, on top
# of the mouth cadence, spikes the switch rate and tears the
# display. Eyes blink between utterances (idle/listening) instead.
await asyncio.sleep(_SPEAK_FRAME_SEC)
continue
# --- non-speaking: idle / listening / thinking ---
if t >= t_blink:
await self._blink(gaze)
lo, hi = (3.5, 6.5) if self._state == "thinking" else (2.0, 4.5)
t_blink = t + random.uniform(lo, hi)
if t >= t_sacc:
if self._state == "thinking":
gaze = random.choice(["look_left", "look_right", "look_left", "look_right", "neutral"])
hold = random.uniform(0.9, 1.8)
elif self._state == "listening":
gaze = random.choice(["neutral", "neutral", "neutral", "look_left", "look_right"])
hold = random.uniform(0.5, 1.2)
else: # idle — relaxed wandering
gaze = random.choice(["neutral", "neutral", "look_left", "look_right", "neutral"])
hold = random.uniform(0.3, 0.9)
await self._play(gaze)
t_sacc = t + hold + random.uniform(0.4, 1.4)
else:
await self._play(gaze)
if self._state == "idle" and t >= t_micro: # rare idle micro-smile
await self._play("smile")
await asyncio.sleep(random.uniform(0.6, 1.0))
gaze = "neutral"
self._cur = None
t_micro = t + random.uniform(15.0, 30.0)
await asyncio.sleep(0.05)
# ---------------------------------------------------------------------------
# Standalone runner
# ---------------------------------------------------------------------------
async def _amain(args):
face = LifelikeFace(name_prefix=args.name_prefix, address=args.address,
brightness=args.brightness)
print("connecting + loading frames (keep the mask within ~30 cm) ...", flush=True)
await face.start(reload=args.reload)
print("lifelike motion running. Ctrl+C to stop.", flush=True)
try:
if args.demo:
steps = [
("idle (wandering + blinks)", lambda: face.set_idle(), 7),
("listening (attentive)", lambda: face.set_listening(), 7),
("thinking (looks away)", lambda: face.set_thinking(), 7),
("speaking (auto lip-sync)", lambda: face.set_speaking(True), 7),
("react: surprised", lambda: face.react("surprised", 2.0), 2.2),
("react: smile", lambda: face.react("smile", 2.0), 2.2),
("react: sad", lambda: face.react("sad", 2.0), 2.2),
("back to idle", lambda: face.set_idle(), 5),
]
for label, action, dur in steps:
print(" ->", label, flush=True)
action()
await asyncio.sleep(dur)
face.set_idle()
await asyncio.sleep(2)
else:
while True:
await asyncio.sleep(1)
except KeyboardInterrupt:
print("\nstopping ...")
finally:
await face.stop()
def main():
ap = argparse.ArgumentParser(description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
ap.add_argument("--demo", action="store_true", help="cycle through the states/reactions")
ap.add_argument("--reload", action="store_true", help="force re-upload of the frame set")
ap.add_argument("--address", help="mask BLE MAC")
ap.add_argument("--name-prefix", default="MASK")
ap.add_argument("--brightness", type=int, default=95)
asyncio.run(_amain(ap.parse_args()))
if __name__ == "__main__":
main()

801
vendor/Sanad/face/mask_face.py vendored Normal file
View File

@ -0,0 +1,801 @@
"""Shining LED face mask — Sanad subsystem (BLE, owns its own asyncio loop).
Wraps the standalone **Mask** project (``Project/Mask`` the flat ``shiningmask``
library: ``mask.py`` / ``faceanim.py`` / ``colorface.py`` ) as a Sanad subsystem
so the dashboard "Mask Face" tab can drive the robot's animated LED face.
Why a dedicated loop: the mask talks BLE (bleak/BlueZ) and ``FaceAnimator`` runs a
persistent asyncio task, so this controller owns a background daemon thread with
its own event loop. Route handlers call the plain SYNC methods here (themselves
wrapped in ``asyncio.to_thread`` by FastAPI); each marshals a coroutine onto that
loop via ``run_coroutine_threadsafe``.
The Mask project is a flat set of top-level modules (not an installed package), so
it is imported by inserting its directory on ``sys.path``. Default location is the
sibling ``<Project>/Mask``; override with ``SANAD_MASK_DIR`` or
``config/mask_config.json``. Needs an env with ``bleak`` + ``Pillow`` (g1_env). If
those are missing the subsystem still constructs but reports unavailable, and the
rest of Sanad is unaffected (the dashboard tab shows the reason).
"""
from __future__ import annotations
import asyncio
import os
import sys
import threading
from pathlib import Path
from typing import Optional, Sequence, Tuple
from Project.Sanad.config import BASE_DIR
from Project.Sanad.core import config_loader
from Project.Sanad.core.logger import get_logger
log = get_logger("mask_face")
Color = Tuple[int, int, int]
# Named frames provided by colorface.default_frames() (FaceAnimator slots).
EXPRESSIONS = ("neutral", "smile", "blink", "look_left", "look_right",
"talk1", "talk2", "talk3", "surprised", "sad", "wink", "angry",
"heart", "laugh", "love", "cool", "confused", "kiss", "thumbs_up")
# Default face colors (match colorface.DEFAULT_EYE / DEFAULT_MOUTH).
DEFAULT_EYE_COLOR: Color = (0, 230, 255) # cyan
DEFAULT_MOUTH_COLOR: Color = (255, 50, 50) # red
DEFAULT_SCLERA_COLOR: Color = (255, 255, 255) # white of the eye
def _parse_color(value, default: Color) -> Color:
"""Coerce a config/API color (``[r,g,b]``, ``(r,g,b)``, or ``"#rrggbb"``) to a
clamped RGB tuple; fall back to ``default`` on anything unusable."""
if value in (None, ""):
return tuple(default)
try:
if isinstance(value, str):
h = value.strip().lstrip("#")
if len(h) == 3:
h = "".join(c * 2 for c in h)
value = (int(h[0:2], 16), int(h[2:4], 16), int(h[4:6], 16))
r, g, b = (int(value[0]), int(value[1]), int(value[2]))
return (max(0, min(255, r)), max(0, min(255, g)), max(0, min(255, b)))
except Exception:
return tuple(default)
class FaceController:
"""Owns the BLE mask connection + FaceAnimator on a private event loop."""
def __init__(self):
cfg = config_loader.load("mask")
def _cfg(key, default):
v = cfg.get(key, default)
return v if v not in (None, "") else default
mask_dir = os.environ.get("SANAD_MASK_DIR") or _cfg("mask_dir", "")
if not mask_dir:
# Default: the sibling Mask project (…/Project/Mask).
mask_dir = str(Path(BASE_DIR).parent / "Mask")
self.mask_dir = mask_dir
self.address = (os.environ.get("SANAD_MASK_ADDRESS") or _cfg("address", "")) or None
self.name_prefix = os.environ.get("SANAD_MASK_NAME_PREFIX") or _cfg("name_prefix", "MASK")
self.adapter = (os.environ.get("SANAD_MASK_ADAPTER") or _cfg("adapter", "")) or None
self.brightness = int(_cfg("brightness", 95))
self.fps = float(_cfg("fps", 8.0))
self.connect_timeout = float(_cfg("connect_timeout", 15.0))
self.connect_attempts = int(_cfg("connect_attempts", 5))
# Use the lifelike motion driver (saccades, varied blinks, states,
# reactions, smooth lip-sync). Falls back to the basic FaceAnimator if
# the lifelike module is unavailable or this is set false.
self.lifelike = bool(_cfg("lifelike", True))
self._face_kind = None
self._hide_mouth = bool(_cfg("hide_mouth", False)) # eyes-only face toggle
# Gemini<->mask link. Default OFF: the mask does NOT auto-connect (no BLE
# churn) and Gemini's emotion/social markers are ignored. Turned on from
# the dashboard, it connects the mask + lets Gemini drive it.
self._gemini_linked = bool(_cfg("gemini_linked", False))
# Auto-connect + start the animated face on boot (best-effort, in the
# background so it never blocks startup). After the one-time frame
# upload, later boots just connect + animate (no upload).
self.autostart = bool(_cfg("autostart", True))
# Face colors (baked into the uploaded DIY frames). Stored as RGB lists in
# config; changing them re-uploads the frame set (face_start reload).
self.eye_color = _parse_color(_cfg("eye_color", None), DEFAULT_EYE_COLOR)
self.mouth_color = _parse_color(_cfg("mouth_color", None), DEFAULT_MOUTH_COLOR)
self.sclera_color = _parse_color(_cfg("sclera_color", None), DEFAULT_SCLERA_COLOR)
# runtime state
self._mask = None # shiningmask.ShiningMask
self._face = None # faceanim.FaceAnimator
self._lib: Optional[dict] = None
self._lib_failed = False
self._connecting = False
self._face_running = False
self._speaking = False
self._mouth: Optional[int] = None
self._last_error: Optional[str] = None
self._op_lock = threading.Lock()
# Desired-state intents the reconnect supervisor enforces:
# _want_connected — we want a live BLE link (set on connect/autostart,
# cleared on a *user* disconnect). While true, the
# supervisor keeps (re)connecting through drops / weak
# signal until it succeeds.
# _face_desired — the animated face should be running (set on
# face_start, cleared on face_stop / static overrides
# like text/image). After a reconnect the supervisor
# restarts the face iff this is true.
self._want_connected = False
self._face_desired = False
self._reconnecting = False
# dedicated event loop in a background daemon thread (idle until used)
self._loop = asyncio.new_event_loop()
self._thread = threading.Thread(target=self._run_loop, daemon=True,
name="mask-face-loop")
self._thread.start()
log.info("FaceController ready (mask_dir=%s, name_prefix=%s, address=%s)",
self.mask_dir, self.name_prefix, self.address or "scan")
# Persistent reconnect supervisor: self-heals dropped/weak links and even
# establishes the FIRST connection once the mask comes into range, without
# the user babysitting the Connect button.
threading.Thread(target=self._supervisor, daemon=True,
name="mask-supervisor").start()
# Only auto-connect on boot if Gemini is linked (default off -> the mask
# stays disconnected + silent until the user links it from the dashboard).
if self.autostart and self._gemini_linked:
threading.Thread(target=self._autostart, daemon=True,
name="mask-autostart").start()
def _clear_stale_mask_links(self) -> None:
"""Drop any BlueZ-level connection to a MASK device left over from a
previous process, BEFORE the first connect.
A hard service restart leaves the old link half-open: BlueZ still
reports the mask "connected" so it stops advertising, our scan can't
find it, and the fresh connect churns with 'Software caused connection
abort' / 'failed to discover services' for minutes before BlueZ times
the stale link out flashing the mask's built-in face the whole time
(this, not WiFi/coexistence, is what makes the weird face appear on a
restart/boot). Disconnecting it first lets the mask advertise again, so
the new connect is clean and immediate. Fully guarded + bounded a
missing bluetoothctl or any error is a no-op, never blocking startup."""
try:
import subprocess as _sp
import time as _time
out = _sp.run(["bluetoothctl", "devices"], capture_output=True,
text=True, timeout=5).stdout or ""
prefix = (self.name_prefix or "MASK").upper()
cleared = False
for line in out.splitlines():
parts = line.split()
# "Device C3:8A:9B:05:B4:C9 MASK-05B4C9"
if (len(parts) >= 3 and parts[0] == "Device"
and parts[2].upper().startswith(prefix)):
addr = parts[1]
_sp.run(["bluetoothctl", "disconnect", addr],
capture_output=True, text=True, timeout=8)
log.info("cleared stale BlueZ link to %s (%s) before first connect",
parts[2], addr)
cleared = True
if cleared:
_time.sleep(1.5) # let the mask resume advertising before we scan
except Exception as exc:
log.debug("stale mask-link cleanup skipped: %s", exc)
def _autostart(self):
"""Best-effort connect + start the face on boot (runs on its own thread,
so a missing/asleep mask never blocks or breaks Sanad startup). Declares
the connect+face *intent* up front, so even if the mask is off / out of
range at boot, the reconnect supervisor keeps trying and brings the face
up on its own once the mask appears no dashboard babysitting."""
import time as _time
_time.sleep(4.0) # let the rest of Sanad finish booting first
if not self.lib_available:
log.warning("mask autostart skipped — Mask lib unavailable "
"(need bleak + Pillow in this conda env)")
return
self._want_connected = True
self._face_desired = True
self._clear_stale_mask_links() # drop any half-open link from a prior process
try:
self.connect()
except Exception as exc:
log.warning("mask autostart: connect failed (%s) — the supervisor will "
"keep retrying; or connect from the dashboard", exc)
return
try:
self.face_start(reload=False)
log.info("mask autostart: animated face running (driver=%s)", self._face_kind)
except Exception:
log.exception("mask autostart: face_start failed")
def _supervisor(self):
"""Background daemon that enforces the connect/face *intents*.
While ``_want_connected`` is set it keeps (re)establishing the BLE link
through drops and weak-signal scan misses; once connected, if the face is
desired but not running (e.g. after a reconnect) it restarts it. A user
Disconnect clears the intent so this stops fighting a deliberate
disconnect. Each attempt reuses the normal serialized connect()/
face_start() paths, so there are no new locking hazards only retries."""
import time as _time
backoff = 3.0
while True:
_time.sleep(backoff)
try:
if not self._want_connected or self._connecting:
backoff = 3.0
continue
if self.is_connected:
backoff = 3.0
# Link is up — restore the face if it's wanted but stopped
# (e.g. the face loop bailed on a drop the supervisor healed).
if self._face_desired and not self._face_running:
try:
self.face_start(reload=False)
log.info("mask supervisor: face restored")
except Exception as exc:
log.debug("mask supervisor: face restore failed (%s)", exc)
backoff = 5.0
continue
# Want a link but don't have one -> reconnect (short, then loop).
self._reconnecting = True
try:
self.connect(timeout=12.0, attempts=2)
log.info("mask supervisor: link (re)established")
if self._face_desired:
self.face_start(reload=False)
backoff = 3.0
except Exception as exc:
# Keep trying with a gentle backoff (weak signal / mask off).
log.debug("mask supervisor: reconnect attempt failed (%s)", exc)
# 'Software caused connection abort' / 'device disconnected'
# is usually a half-open BlueZ link from the drop: the mask
# still shows "connected" so it stops advertising and the next
# scan can't find it. Clearing it lets the mask re-advertise.
m = str(exc).lower()
if any(s in m for s in ("abort", "disconnect", "not connected",
"discover services")):
try:
self._clear_stale_mask_links()
except Exception:
pass
backoff = min(backoff * 1.5, 20.0)
finally:
self._reconnecting = False
except Exception:
log.exception("mask supervisor loop error")
backoff = 5.0
# -- loop plumbing --------------------------------------------------------
def _run_loop(self):
asyncio.set_event_loop(self._loop)
self._loop.run_forever()
def _submit(self, coro, timeout: float = 30.0):
"""Run a coroutine on the mask loop from a caller thread, blocking."""
fut = asyncio.run_coroutine_threadsafe(coro, self._loop)
return fut.result(timeout=timeout)
# -- lazy import of the flat Mask library ---------------------------------
def _ensure_lib(self) -> dict:
if self._lib is not None:
return self._lib
if self._lib_failed:
raise RuntimeError(self._last_error or "mask library unavailable")
if self.mask_dir and self.mask_dir not in sys.path:
sys.path.insert(0, self.mask_dir)
try:
import mask as _mask
import faceanim as _faceanim
import colorface as _colorface
import constants as _constants
except Exception as exc:
self._lib_failed = True
self._last_error = f"mask library import failed: {exc}"
log.exception("Mask library import failed (dir=%s) — is bleak/Pillow "
"installed (g1_env)?", self.mask_dir)
raise RuntimeError(self._last_error)
try:
from Project.Sanad.face.face_motion import LifelikeFace as _LifelikeFace
except Exception:
_LifelikeFace = None
log.warning("LifelikeFace unavailable — falling back to FaceAnimator")
self._lib = {
"ShiningMask": _mask.ShiningMask,
"FaceAnimator": _faceanim.FaceAnimator,
"LifelikeFace": _LifelikeFace,
"colorface": _colorface,
"TextMode": _constants.TextMode,
}
log.info("Mask library imported from %s", self.mask_dir)
return self._lib
@property
def lib_available(self) -> bool:
if self._lib is not None:
return True
if self._lib_failed:
return False
try:
self._ensure_lib()
return True
except Exception:
return False
@property
def is_connected(self) -> bool:
return bool(self._mask is not None and getattr(self._mask, "is_connected", False))
def _require_connected(self):
if not self.is_connected:
raise RuntimeError("mask not connected")
# -- status ---------------------------------------------------------------
def status(self) -> dict:
return {
"lib_available": self.lib_available,
"connected": self.is_connected,
"connecting": self._connecting,
"reconnecting": self._reconnecting and not self.is_connected,
"want_connected": self._want_connected,
"face_running": self._face_running and self.is_connected,
"face_desired": self._face_desired,
"driver": self._face_kind,
"lifelike": self.lifelike,
"autostart": self.autostart,
"gemini_linked": self._gemini_linked,
"hide_mouth": self._hide_mouth,
"speaking": self._speaking,
"mouth": self._mouth,
"brightness": self.brightness,
"eye_color": list(self.eye_color),
"mouth_color": list(self.mouth_color),
"sclera_color": list(self.sclera_color),
"fps": self.fps,
"address": self.address,
"name_prefix": self.name_prefix,
"adapter": self.adapter,
"mask_dir": self.mask_dir,
"expressions": list(EXPRESSIONS),
"last_error": self._last_error,
}
# -- connection -----------------------------------------------------------
def connect(self, timeout: Optional[float] = None, attempts: Optional[int] = None) -> dict:
# Serialize the whole connect under _op_lock so it can't interleave with a
# concurrent disconnect()/face_start() swapping self._mask underneath, and
# so _connecting is set and cleared under the same lock (status() reads it).
with self._op_lock:
if self._connecting:
raise RuntimeError("a connect is already in progress")
self._connecting = True
to = float(timeout) if timeout else self.connect_timeout
at = int(attempts) if attempts else self.connect_attempts
self._last_error = None
try:
self._ensure_lib()
self._submit(self._aconnect(to, at), timeout=to * at + 15.0)
self._want_connected = True # intent: supervisor keeps it alive
except Exception as exc:
self._last_error = str(exc)
raise RuntimeError(str(exc))
finally:
self._connecting = False
return self.status()
async def _aconnect(self, timeout: float, attempts: int):
if self.is_connected:
return
lib = self._lib
# Tear down any stale mask from a previous (now-dropped) session BEFORE
# building a fresh one. A reconnect after a silent BLE drop leaves the old
# BleakClient holding a half-open BlueZ connection to the SAME device; if
# we just overwrite self._mask the old client is never disconnected at the
# BlueZ level, the OS keeps the device "connected", and the new
# BleakClient.connect() to that address hangs/refuses. Disconnect (and
# drop) the old client first so the fresh connect starts from a clean
# BlueZ state.
old = self._mask
self._mask = None
if old is not None:
try:
await old.disconnect()
except Exception:
log.exception("stale mask.disconnect() before reconnect failed")
self._mask = lib["ShiningMask"](
address=self.address, name_prefix=self.name_prefix, adapter=self.adapter)
await self._mask.connect(timeout=timeout, attempts=attempts)
def disconnect(self) -> dict:
# Clear the intents FIRST (before the lock) so the supervisor won't race
# to re-establish a link the user is deliberately tearing down.
self._want_connected = False
self._face_desired = False
with self._op_lock:
self._stop_face()
if self._mask is not None:
try:
self._submit(self._mask.disconnect(), timeout=10.0)
except Exception:
log.exception("mask.disconnect() failed")
return self.status()
def set_gemini_linked(self, on: bool) -> dict:
"""Link / unlink Gemini <-> the mask.
ON -> declare intent to hold the BLE link + run the face; the supervisor
connects (and self-heals) in the background, and Gemini's emotion /
social markers are relayed to the mask.
OFF -> tear the link down + clear the intent, so the mask stops any BLE
churn and Gemini's markers are ignored (the voice is unaffected).
Default is OFF: the mask stays silent + disconnected until the user
links it from the dashboard."""
on = bool(on)
self._gemini_linked = on
if not on:
# Deliberate teardown; disconnect() also clears _want_connected /
# _face_desired so the supervisor stops trying to reconnect.
self.disconnect()
return {"ok": True, "linked": False, "connected": self.is_connected}
# Linking: declare intent, then one quick connect attempt so the common
# "mask nearby" case comes up immediately; the supervisor keeps retrying
# (weak signal / mask still off) so we never block the caller for long.
self._want_connected = True
self._face_desired = True
if not self.is_connected and not self._connecting:
try:
self._clear_stale_mask_links()
except Exception:
pass
try:
self.connect(timeout=10.0, attempts=1)
self.face_start(reload=False)
except Exception as exc:
log.info("link-on: mask not up yet, supervisor will retry (%s)", exc)
elif self.is_connected and not self._face_running:
try:
self.face_start(reload=False)
except Exception:
pass
return {"ok": True, "linked": True, "connected": self.is_connected}
# -- simple commands ------------------------------------------------------
def set_brightness(self, level: int) -> dict:
# Hardware range is 0-128 (config/mask_config.json: "0-128. Keep <=100 to
# avoid LED flicker"); reject/clamp values above the panel's real maximum
# rather than forwarding 129-255 to the mask.
level = max(0, min(128, int(level)))
with self._op_lock:
self._require_connected()
self._submit(self._mask.set_brightness(level))
self.brightness = level
face = self._face
if face is not None:
face.brightness = level
return {"ok": True, "brightness": level}
def set_text(self, text: str, color: Color = (255, 255, 255),
mode: Optional[int] = None, bg: Optional[Color] = None,
speed: Optional[int] = None) -> dict:
with self._op_lock:
self._require_connected()
self._face_desired = False # static override — don't auto-restart the face
self._stop_face() # static text can't share the panel with the animator
tm = self._lib["TextMode"]
m = int(mode) if mode is not None else tm.SCROLL_LEFT
kw = {}
if speed is not None:
kw["speed"] = max(0, min(255, int(speed)))
self._submit(self._mask.set_text(str(text), color=tuple(color), mode=m, **kw),
timeout=20.0)
if bg is not None:
# Apply a custom background AFTER set_text (which forces black by default).
self._submit(self._mask.set_background_color(*tuple(bg)), timeout=10.0)
return {"ok": True}
def show_image(self, image_id: int) -> dict:
with self._op_lock:
self._require_connected()
self._face_desired = False # static override
self._stop_face()
self._submit(self._mask.show_image(int(image_id)))
return {"ok": True, "image_id": int(image_id)}
def play_animation(self, anim_id: int) -> dict:
with self._op_lock:
self._require_connected()
self._face_desired = False # static override
self._stop_face()
self._submit(self._mask.play_animation(int(anim_id)))
return {"ok": True, "anim_id": int(anim_id)}
def clear_diy(self) -> dict:
with self._op_lock:
self._require_connected()
self._stop_face() # stop the loop before deleting the frames it plays
removed = self._submit(self._mask.clear_diy(), timeout=30.0)
return {"ok": True, "removed": int(removed or 0)}
# -- animated face --------------------------------------------------------
def _stop_face(self):
"""Cancel the animator loop (if any) and reset face state. Idempotent.
Lock-free internal: callers MUST hold ``self._op_lock`` (it mutates the
shared self._face / self._face_running state that the serialized mask
operations and the event-bus callbacks both touch)."""
if self._face is not None:
try:
self._submit(self._face.stop(), timeout=10.0)
except Exception:
log.exception("face.stop() failed")
self._face = None
self._face_running = False
self._speaking = False
self._mouth = None
def face_start(self, reload: bool = False) -> dict:
with self._op_lock:
self._require_connected()
# Always tear down any existing loop first so a second Start (or
# Reload) never leaves two animator tasks fighting over the display.
# Serialized under _op_lock so two concurrent Start presses can't both
# build an animator and race self._face.
self._stop_face()
cf = self._lib["colorface"]
Lifelike = self._lib.get("LifelikeFace") if self.lifelike else None
if Lifelike is not None:
# Rich driver: eye saccades, varied blinks, states, reactions,
# smooth lip-sync. Runs its own loop on this controller's BLE loop.
# auto_reconnect=False -> the controller's supervisor owns recovery.
self._face = Lifelike(mask=self._mask, brightness=self.brightness,
eye_color=self.eye_color, mouth_color=self.mouth_color,
sclera_color=self.sclera_color, auto_reconnect=False,
hide_mouth=self._hide_mouth)
self._face_kind = "lifelike"
else:
self._face = self._lib["FaceAnimator"](
self._mask, fps=self.fps, brightness=self.brightness,
frames=cf.default_frames(eye_color=self.eye_color,
mouth_color=self.mouth_color,
sclera_color=self.sclera_color))
self._face_kind = "faceanim"
# First upload of the frame set can take ~30-90s (acked writes); later
# starts skip it (frames persist on the mask's flash).
self._submit(self._face.start(reload=bool(reload)), timeout=240.0)
self._face_running = True
self._face_desired = True # intent: supervisor restores it after a drop
self._want_connected = True
return {"ok": True, "reloaded": bool(reload), "driver": self._face_kind}
def face_stop(self) -> dict:
with self._op_lock:
self._face_desired = False # user stopped it — don't auto-restart
self._stop_face()
return {"ok": True}
def return_face(self) -> dict:
"""Resume the live animated face (e.g. after a text/image/anim override)."""
self._face_desired = True
return self.face_start(reload=False)
def set_face_color(self, eye=None, mouth=None, sclera=None) -> dict:
"""Recolor the animated face. Colors are baked into the uploaded DIY
frames, so this stores them (persisted to config) and if the face is
running re-uploads the frame set in the new colors (~30-90s)."""
if eye is not None:
self.eye_color = _parse_color(eye, self.eye_color)
if mouth is not None:
self.mouth_color = _parse_color(mouth, self.mouth_color)
if sclera is not None:
self.sclera_color = _parse_color(sclera, self.sclera_color)
self._save_colors()
reuploaded = False
if self.is_connected and self._face_desired:
self.face_start(reload=True) # rebuild frames in the new colors
reuploaded = True
return {"ok": True, "reuploaded": reuploaded,
"eye_color": list(self.eye_color),
"mouth_color": list(self.mouth_color),
"sclera_color": list(self.sclera_color)}
def _save_colors(self):
"""Persist the chosen face colors to config/mask_config.json (best-effort,
so they survive restarts and drive autostart). Never raises."""
try:
import json
path = Path(BASE_DIR) / "config" / "mask_config.json"
data = json.loads(path.read_text()) if path.exists() else {}
data["eye_color"] = list(self.eye_color)
data["mouth_color"] = list(self.mouth_color)
data["sclera_color"] = list(self.sclera_color)
path.write_text(json.dumps(data, indent=2))
except Exception:
log.exception("could not persist mask face colors (kept in-memory)")
# -- lifelike states + reactions (no-ops on the basic FaceAnimator) --------
def _face_state(self, state: str) -> dict:
# Snapshot the face reference once: face_start/_stop_face (under _op_lock)
# can swap self._face to None concurrently, and these state setters fire
# from the event-bus worker threads. A local snapshot avoids a torn read
# (AttributeError) without blocking on a long face_start upload.
face = self._face
fn = getattr(face, "set_" + state, None) if face is not None else None
if callable(fn):
try:
fn()
except Exception:
log.exception("face.set_%s failed", state)
return {"ok": True, "state": state}
def set_listening(self) -> dict:
return self._face_state("listening")
def set_thinking(self) -> dict:
return self._face_state("thinking")
def set_idle(self) -> dict:
return self._face_state("idle")
def react(self, emotion: str, hold: float = 1.4) -> dict:
"""Brief reaction (surprised / smile / sad). No-op if unsupported."""
face = self._face # snapshot: face_start/_stop_face may swap it concurrently
if face is not None and hasattr(face, "react"):
try:
face.react(str(emotion), float(hold))
except Exception:
log.exception("face.react failed")
return {"ok": True, "react": emotion}
def set_speaking(self, on: bool) -> dict:
"""Animate the mouth while speaking. Safe no-op if the face isn't running.
Also called from the event bus (brain.gestural_speaking_changed)."""
on = bool(on)
self._speaking = on
self._mouth = None
face = self._face # snapshot: avoid a torn read vs a concurrent _stop_face
if face is not None:
try:
face.set_speaking(on)
except Exception:
log.exception("face.set_speaking() failed")
return {"ok": True, "speaking": on}
def set_mouth(self, level: int) -> dict:
level = max(0, min(3, int(level)))
self._mouth = level
self._speaking = False
# Fired from the Gemini reader thread at lip-sync rate; snapshot the face
# so a concurrent face_start/_stop_face swap can't NoneType-deref here.
face = self._face
if face is not None:
try:
face.set_mouth(level)
except Exception:
log.exception("face.set_mouth() failed")
return {"ok": True, "mouth": level}
def show_expression(self, name: str) -> dict:
with self._op_lock:
self._require_connected()
face = self._face
if face is None:
raise RuntimeError("face animation not started")
self._submit(face.show(str(name)), timeout=10.0)
return {"ok": True, "expression": name}
def show_scratch_image(self, data: bytes, timeout: float = 90.0) -> dict:
"""Upload raw 46x58 image bytes to the mask's reserved scratch DIY slot
and hold it on the face (a QR / social / custom image) until the face is
resumed with set_expression(None). Uses the reliable acked image upload."""
with self._op_lock:
self._require_connected()
face = self._face
if face is None:
raise RuntimeError("face animation not started")
slot = int(getattr(face, "scratch_slot", 20))
# Pause the animation loop so its play_diy traffic doesn't disturb the
# acked upload's per-packet REOK acks (else NotificationTimeout). Wait
# for the loop to actually park before uploading (not a fixed sleep).
paused = hasattr(face, "pause")
if paused:
face.pause()
if hasattr(face, "wait_paused"):
face.wait_paused(2.0)
else:
import time as _t
_t.sleep(0.35)
try:
self._submit(self._mask.upload_image(bytes(data), slot, timeout=15.0),
timeout=timeout)
# Register "_scratch" so set_expression holds it on EITHER driver:
# LifelikeFace.set_expression checks .slots, FaceAnimator checks
# .frames — populate both so the fallback driver holds it too.
if hasattr(face, "slots"):
face.slots["_scratch"] = slot
frames = getattr(face, "frames", None)
if isinstance(frames, dict) and "_scratch" not in frames:
frames["_scratch"] = b""
if hasattr(face, "set_expression"):
face.set_expression("_scratch")
finally:
if paused:
face.resume() # loop resumes + holds the "_scratch" frame
return {"ok": True, "slot": slot}
def set_mouth_hidden(self, hidden: bool) -> dict:
"""Show/hide the mouth on the animated face. Re-uploads just the 7 gaze/
talk slots (masked eyes-only, or normal) pausing the loop so the acked
upload isn't disturbed. Persists for future face starts this session."""
hidden = bool(hidden)
with self._op_lock:
self._hide_mouth = hidden
face = self._face
if (face is None or not self.is_connected
or not hasattr(face, "mouth_frames_for")):
return {"ok": True, "hidden": hidden,
"note": "applies when the face is running"}
frames = face.mouth_frames_for(hidden)
paused = hasattr(face, "pause")
if paused:
face.pause()
if hasattr(face, "wait_paused"):
face.wait_paused(2.0)
try:
for name, data in frames.items():
slot = face.slots.get(name) if hasattr(face, "slots") else None
if slot:
self._submit(self._mask.upload_image(bytes(data), int(slot),
timeout=15.0), timeout=90.0)
if hasattr(face, "frames"):
face.frames[name] = data
if hasattr(face, "hide_mouth"):
face.hide_mouth = hidden
if hasattr(face, "_cur"):
face._cur = None # force a redraw with the new frame
finally:
if paused:
face.resume()
return {"ok": True, "hidden": hidden}
def set_expression(self, name: Optional[str]) -> dict:
"""Hold an expression over the animation (None resumes idle/talk).
Unlike show_expression (a one-off), this pins the frame until cleared
e.g. 'surprised' on a reaction, 'sad' on an error. Safe no-op if the face
isn't running."""
face = self._face # snapshot: face_start/_stop_face may swap it concurrently
if face is not None:
try:
face.set_expression(name if name else None)
except Exception:
log.exception("face.set_expression() failed")
return {"ok": True, "expression": name}
# -- lifecycle ------------------------------------------------------------
def shutdown(self):
"""Disconnect the mask and stop the background loop (idempotent)."""
try:
self.disconnect()
except Exception:
log.exception("mask disconnect on shutdown failed")
try:
self._loop.call_soon_threadsafe(self._loop.stop)
except Exception:
pass

View File

@ -37,6 +37,22 @@ _DEFAULT_SYSTEM_PROMPT = _cfg_section("core", "gemini_defaults").get(
"You are Sanad (Bousandah), a wise and friendly Emirati assistant. "
"Speak in UAE dialect (Khaleeji). Be helpful and concise."
)
# TTS / typed-replay system prompt. The voice_client speaks TYPED text (typed
# replay + /api/voice/generate), so it must read the text VERBATIM in its OWN
# language — NOT answer it and NOT force Khaleeji (the default persona does the
# latter, which made English/Urdu/Indonesian lines come out in Arabic).
TTS_SYSTEM_PROMPT = _cfg_section("core", "gemini_defaults").get(
"tts_system_prompt",
"You are a pure multilingual text-to-speech voice. The instant the user "
"sends text, speak it aloud word for word in the SAME language it is "
"written in, then stop. Output ONLY that spoken audio — no thinking, no "
"commentary, no acknowledgements, no headers, no explanations, no "
"greetings, no extra words. Never translate and never change the language: "
"English stays English, Arabic stays Arabic, Urdu stays Urdu, Indonesian "
"stays Indonesian. Your speech must be identical to the user's text, "
"nothing more and nothing less."
)
_RECV_TIMEOUT_SEC = _GC.get("recv_timeout_sec", 30)
_RECONNECT_MAX_ATTEMPTS = _GC.get("reconnect_max_attempts", 3)
_RECONNECT_INITIAL_DELAY_SEC = _GC.get("reconnect_initial_delay_sec", 1.0)

View File

@ -60,10 +60,34 @@ _SESSION_TIMEOUT = _SV.get("session_timeout_sec", 660)
_MAX_RECONNECT_DELAY = _SV.get("max_reconnect_delay_sec", 30)
_MAX_CONSECUTIVE_ERRORS = _SV.get("max_consecutive_errors", 10)
_NO_MESSAGES_TIMEOUT = _SV.get("no_messages_timeout_sec", 30)
# Extra mic-gate time after the AI stops, on loud external-speaker profiles
# (JBL) — covers the speaker buffer + room reverb so it doesn't hear its tail.
_ECHO_TAIL_SEC = _SV.get("echo_tail_sec", 0.6)
# On a loud external speaker (JBL) barge-in must clear the measured speaker
# BLEED by this factor — so the user's own voice cuts the AI but the speaker's
# echo into the mic does not. Lower = easier to interrupt (more false cuts).
_JBL_BLEED_MARGIN = _SV.get("jbl_bleed_margin", 3.0)
# Don't allow barge-in for this long after the AI starts on the JBL — gives the
# bleed estimate time to ramp so an early loud bleed frame can't false-trigger.
_JBL_BARGE_GRACE = _SV.get("jbl_barge_grace_sec", 1.0)
# Sustained loud-frame count required to barge-in on the JBL (vs the shorter
# default). Brief speaker-echo peaks won't reach it; continuous speech will.
_JBL_BARGE_CHUNKS = _SV.get("jbl_barge_chunks", 9)
# Time since the AI last pushed audio after which the speaker bleed is assumed
# FADED (a gap between words/numbers). In that window barge-in drops to a low,
# sensitive bar so the user can interrupt; while audio is flowing it stays high.
_JBL_BLEED_FADE_SEC = _SV.get("jbl_bleed_fade_sec", 0.5)
_CHUNK_BYTES = CHUNK_SIZE * 2
_SILENCE_PCM = b"\x00" * _CHUNK_BYTES
# Set by a "pause:1" stdin command (a record is playing on the dashboard).
# While set, the brain feeds Gemini silence (so it neither hears the playback
# nor keeps replying) and drops its own audio output (so the record owns the
# chest speaker). Cleared by "pause:0" when playback ends. threading.Event is
# safe to read from the asyncio loops.
_INPUT_PAUSED = threading.Event()
# ── Recognition (camera + face gallery) tunables ──
_RECOG_STATE_PATH = Path(os.environ.get(
"SANAD_RECOGNITION_STATE_PATH",
@ -87,6 +111,158 @@ _ZONES_DIR = Path(os.environ.get(
))
# ── navigation tools (Gemini Live function-calling → Nav2) ────
# Gemini can DRIVE the robot to saved places via native function-calling.
# The handlers call the DASHBOARD HTTP API (not web_nav3 directly) so the
# in-process _arbiter (nav↔loco mutex) + single nav client stay authoritative
# — the Gemini brain runs as a SEPARATE subprocess and cannot touch the
# dashboard's in-memory arbiter, so it must go through HTTP.
try:
import requests as _requests
except Exception: # pragma: no cover - requests is in the gemini_sdk env
_requests = None
_NAV_TOOLS_ENABLED = os.environ.get("SANAD_NAV_TOOLS", "1") != "0"
_DASHBOARD_URL = os.environ.get(
"SANAD_DASHBOARD_URL", "http://127.0.0.1:8001"
).rstrip("/")
def _nav_api(method: str, path: str, body: Optional[dict] = None,
timeout: float = 12.0) -> dict:
"""Blocking call to the dashboard nav API. Always returns a dict; never
raises (run it via asyncio.to_thread so the receive loop stays responsive)."""
if _requests is None:
return {"ok": False, "reason": "no_http_client"}
url = _DASHBOARD_URL + path
try:
if method == "GET":
resp = _requests.get(url, timeout=timeout)
else:
resp = _requests.post(url, json=(body or {}), timeout=timeout)
except Exception as exc:
return {"ok": False, "reason": "unreachable", "detail": str(exc)[:200]}
try:
data = resp.json()
except Exception:
data = {"raw": (resp.text or "")[:200]}
if not resp.ok:
detail = data.get("detail") if isinstance(data, dict) else data
return {"ok": False, "reason": f"http_{resp.status_code}", "detail": detail}
return data if isinstance(data, dict) else {"ok": True, "data": data}
def _nav_function_declarations() -> list:
"""Gemini Live tool declarations for navigation (built lazily so the
google.genai types are resolved at call time)."""
S, T = types.Schema, types.Type
return [
types.FunctionDeclaration(
name="navigate_to_place",
description=(
"Drive the robot to a named saved place in the currently loaded "
"map (for example 'kitchen', 'reception', 'office'). Call this "
"ONLY when the user asks to go, move, walk, or be taken "
"somewhere. The place must exist in the active map — if you are "
"unsure of the name, call list_places first."
),
parameters=S(type=T.OBJECT, properties={
"place": S(type=T.STRING,
description="Destination place name, as the user said it."),
}, required=["place"]),
),
types.FunctionDeclaration(
name="list_places",
description=(
"List the saved places you can drive to in the currently loaded "
"map. Use it to tell the user where you can take them."
),
parameters=S(type=T.OBJECT, properties={}),
),
types.FunctionDeclaration(
name="where_am_i",
description=(
"Report navigation status: which map is loaded and whether the "
"robot is localized and ready to drive."
),
parameters=S(type=T.OBJECT, properties={}),
),
types.FunctionDeclaration(
name="stop_navigation",
description="Cancel the current navigation goal and stop the robot from driving.",
parameters=S(type=T.OBJECT, properties={}),
),
]
# Emotions Gemini can show on the LED face (a subset of the mask's frames that
# read as feelings — the talk/blink/gaze frames are driven automatically).
_FACE_EMOTIONS = ("smile", "laugh", "heart", "love", "sad", "surprised",
"wink", "angry", "cool", "confused", "kiss", "thumbs_up",
"neutral")
# Instagram accounts the mask can show as a QR (parent maps these to the code).
_SOCIAL_ACCOUNTS = ("bu_sunaidah", "yslootahtech")
# Appended to whatever base system prompt is passed in, so the expressive-face
# behaviour is always present regardless of the user-edited persona.
_FACE_PROMPT_ADDENDUM = (
"\n\nYou have an expressive LED face you control with tools. IMPORTANT: when "
"the user asks you to SHOW or MAKE a specific face/emotion (e.g. 'show me a "
"smile', 'give me a thumbs up', 'look surprised', 'make a heart'), ALWAYS "
"call set_expression with that emotion right away. Also use set_expression "
"naturally as you talk — smile when greeting or happy, laugh at something "
"funny, heart or love for affection or a compliment, thumbs_up to agree or "
"approve, surprised when astonished, confused when you didn't understand, "
"wink when joking, sad when empathizing, cool when playful. Available "
"emotions: smile, laugh, heart, love, thumbs_up, surprised, confused, wink, "
"kiss, cool, sad, angry, neutral. Your mouth already lip-syncs on its own, "
"so this is only the emotion. When the user asks how to follow you, for your "
"Instagram, or to see/show your social media, ALWAYS call show_social with "
"'bu_sunaidah' (@bu.sunaidah) or 'yslootahtech' (@yslootahtech) to display "
"the QR on your face. These tools are silent — never say the tool name, the "
"emotion, or any bracket marker out loud."
)
def _face_function_declarations() -> list:
"""Gemini Live tools for the expressive LED face + social QR (built lazily
so google.genai types resolve at call time)."""
S, T = types.Schema, types.Type
return [
types.FunctionDeclaration(
name="set_expression",
description=(
"Show an emotion on your LED face to react expressively while you "
"talk. Use it naturally and sparingly: smile when greeting or happy, "
"laugh at something funny, heart/love for affection or a compliment, "
"surprised when astonished, confused when you didn't understand, wink "
"when joking, sad when empathizing, cool when playful, sleepy when "
"tired, angry only rarely. Your mouth already lip-syncs on its own — "
"this is ONLY the emotion, not the mouth."
),
parameters=S(type=T.OBJECT, properties={
"emotion": S(type=T.STRING, enum=list(_FACE_EMOTIONS),
description="The emotion to show on the face."),
}, required=["emotion"]),
),
types.FunctionDeclaration(
name="show_social",
description=(
"Display a social-media QR code on your LED face so a visitor can "
"scan it and follow. Call this when the user asks how to follow you, "
"for your Instagram, or to share your social media. Choose the "
"account: 'bu_sunaidah' (@bu.sunaidah) or 'yslootahtech' "
"(@yslootahtech)."
),
parameters=S(type=T.OBJECT, properties={
"account": S(type=T.STRING, enum=list(_SOCIAL_ACCOUNTS),
description="Which Instagram account to show."),
}, required=["account"]),
),
]
# ── stdin push channel (Marcus pattern) ──────────────────────
# The GeminiSubprocess supervisor writes two line types to this process's
# stdin:
@ -108,6 +284,15 @@ _STATE_TAGS = {
"error": "[STATE-ERROR]",
"paused": "[STATE-PAUSED]",
"resumed": "[STATE-RESUMED]",
# Navigation arrival/failure — pushed by the dashboard goal monitor so
# Gemini can truthfully tell the user it arrived (or couldn't get there)
# instead of guessing from the fire-and-forget goto.
"nav_arrived": "[NAV ARRIVED]",
"nav_failed": "[NAV FAILED]",
"nav_canceled": "[NAV CANCELED]",
# "Gemini Nav" session greeting — the operator entered a zone bound to a
# map; tell Gemini the zone + drivable places and to greet the user.
"nav_zone": "[GEMINI NAV]",
}
# Pending audio-profile swap signalled by the parent over "profile:" stdin
@ -115,7 +300,8 @@ _STATE_TAGS = {
_PROFILE_LOCK = threading.Lock()
_PROFILE_PENDING: dict = {"id": None, "reason": ""}
_VALID_PROFILES = {"builtin", "anker", "hollyland_builtin"}
_VALID_PROFILES = {"builtin", "anker", "anker_powerconf",
"hollyland_builtin", "jbl_builtin_mic"}
def _stdin_watcher() -> None:
@ -124,7 +310,17 @@ def _stdin_watcher() -> None:
Best-effort: any malformed line is skipped. Exits when the parent
closes our stdin (subprocess teardown)."""
try:
for line in sys.stdin:
# IMPORTANT: read with readline(), NOT `for line in sys.stdin`. The file
# iterator does aggressive read-ahead buffering, so on an idle pipe a
# small command like "pause:1\n" can sit unread for SECONDS (until more
# stdin data arrives to flush the read-ahead). That delayed the record-
# playback pause by ~2s — Gemini kept stomping the chest speaker so the
# clip was silent / late. readline() returns each line as soon as its
# newline arrives, so commands are delivered promptly.
while True:
line = sys.stdin.readline()
if line == "":
break # EOF — parent closed our stdin (subprocess teardown)
line = line.rstrip("\n")
if not line:
continue
@ -172,6 +368,16 @@ def _stdin_watcher() -> None:
_PROFILE_PENDING["id"] = pid
_PROFILE_PENDING["reason"] = (
payload.get("reason") or "").strip()
elif line.startswith("pause:"):
# Dashboard record playback — pause/resume the live interaction.
if line[len("pause:"):].strip() in ("1", "true", "True", "on"):
if not _INPUT_PAUSED.is_set():
_INPUT_PAUSED.set()
log.info("input PAUSED — record playback")
else:
if _INPUT_PAUSED.is_set():
_INPUT_PAUSED.clear()
log.info("input RESUMED — record playback ended")
except Exception:
return
@ -211,7 +417,7 @@ class GeminiBrain:
self._swap_lock: Optional[asyncio.Lock] = None # built in run()
self._recorder = recorder
self._voice = voice_name or GEMINI_VOICE
self._system_prompt = system_prompt
self._system_prompt = (system_prompt or "") + _FACE_PROMPT_ADDENDUM
self._api_key = GEMINI_API_KEY
self._stop_flag = asyncio.Event()
# per-session state (reset in the outer reconnect loop)
@ -220,6 +426,9 @@ class GeminiBrain:
self._barge_block_until = 0.0
self._ai_speak_start = 0.0
self._last_ai_audio = 0.0
# Rolling estimate of the speaker bleed picked up by the mic while the AI
# talks (JBL profile) — the barge-in threshold floats above this.
self._bleed_ewma = 0.0
self._done: Optional[asyncio.Event] = None
# ── Recognition flags — kept in sync with the state file by
# _recognition_state_watcher. Boot defaults come from the file (or
@ -250,6 +459,13 @@ class GeminiBrain:
_initial.movement_enabled
or os.environ.get("SANAD_MOVEMENT_ENABLE", "0") == "1"
)
# Auto-record toggle — recognition_state is the live source of truth.
# Sync the recorder to it now; the watcher keeps it in sync at runtime.
self._record_enabled = bool(_initial.record_enabled)
try:
self._recorder.enabled = self._record_enabled
except Exception:
pass
def stop(self) -> None:
"""Signal the run loop to exit at the next opportunity."""
@ -270,6 +486,10 @@ class GeminiBrain:
while not self._stop_flag.is_set():
session_num += 1
self._reset_turn_state()
# On a reconnect (not the first session), suppress the unprompted
# re-greeting until the user speaks — keeps the chest speaker free
# for record playback and stops the "robot greets every 30s" loop.
self._suppress_greeting = session_num > 1
uptime_min = (time.time() - start_time) / 60
try:
@ -366,6 +586,14 @@ class GeminiBrain:
system_instruction=types.Content(
parts=[types.Part(text=self._system_prompt)],
),
# Native function-calling: Gemini can drive the robot to saved
# places (navigate_to_place / list_places / where_am_i /
# stop_navigation). Disable with SANAD_NAV_TOOLS=0.
# Native function-calling: nav tools (if enabled) + the always-on
# expressive-face / social-QR tools (set_expression / show_social).
tools=[types.Tool(function_declarations=(
(_nav_function_declarations() if _NAV_TOOLS_ENABLED else [])
+ _face_function_declarations()))],
)
# ─── state helpers ────────────────────────────────────
@ -376,6 +604,16 @@ class GeminiBrain:
self._barge_block_until = 0.0
self._ai_speak_start = 0.0
self._last_ai_audio = 0.0
# Rolling estimate of the speaker bleed picked up by the mic while the AI
# talks (JBL profile) — the barge-in threshold floats above this.
self._bleed_ewma = 0.0
# Suppress the unprompted greeting on a RECONNECT (set per-session in
# run() for session_num>1). The idle watchdog reconnects every ~30s when
# no one talks, and a fresh session greets each time ("مرحبابك…") which
# floods the shared chest speaker and stomps record playback. We drop
# that greeting's audio until the user actually speaks. Default False so
# the FIRST session (startup) greets normally.
self._suppress_greeting = False
def _interrupt(self, source: str = "local") -> None:
self._speaking = False
@ -411,9 +649,18 @@ class GeminiBrain:
data = samples.tobytes()
energy = _audio_energy(data)
now = time.time()
# On the JBL (loud external speaker) the head mic hears the robot's
# OWN voice as loud as the user. We FULLY gate the mic to Gemini while
# it speaks (+ a short echo tail) so it NEVER hears itself, and we
# DISABLE voice barge-in there — the bleed is as loud as your voice, so
# energy can't separate them and any attempt leaks the echo back to the
# model. (Reliable JBL interrupt needs AEC; the only PulseAudio mic is
# dead, so that's separate work.) The chest speaker (builtin) keeps
# light quiet-frame suppression + working barge-in (firmware AEC).
full_gate = "jbl" in (self._current_profile_id or "")
# Barge-in: after AI starts speaking, sustained user energy cuts it.
if self._speaking and now >= self._barge_block_until:
# Barge-in: sustained user energy cuts the AI — chest profile only.
if self._speaking and not full_gate and now >= self._barge_block_until:
if (now - self._ai_speak_start) >= grace:
if energy > threshold:
loud_count += 1
@ -425,10 +672,18 @@ class GeminiBrain:
loud_count = 0
self._barge_block_until = now + cooldown
# Echo suppression: while AI is speaking, mask quiet frames so the
# mic doesn't feed the model its own voice bleed.
# Echo suppression: mask the mic so the model doesn't hear its own bleed.
send_data = data
if self._speaking and energy < echo_suppress_below:
if _INPUT_PAUSED.is_set():
# Paused for a record playback — feed silence so Gemini neither
# hears the record nor keeps talking over it.
send_data = _SILENCE_PCM
elif full_gate and (self._speaking
or (now - self._last_ai_audio) < _ECHO_TAIL_SEC):
# Loud external speaker: gate ALL frames while speaking + tail —
# this is what guarantees it never hears itself.
send_data = _SILENCE_PCM
elif self._speaking and energy < echo_suppress_below:
send_data = _SILENCE_PCM
# Record user audio when clearly speaking and AI isn't.
@ -466,10 +721,29 @@ class GeminiBrain:
async def _receive_loop(self, session: Any) -> None:
loop = asyncio.get_event_loop()
try:
last_recv = time.time()
while not self._done.is_set() and not self._stop_flag.is_set():
async for response in session.receive():
last_recv = time.time()
# Iterate session.receive() with a PER-MESSAGE timeout. A plain
# `async for` parks inside the generator on a silent/half-open
# stall (server stops sending but never closes the socket), so
# the no-message watchdog below — which only ran after the
# async-for ended a cycle — could not fire, and recovery waited
# out the 660s outer session cap. Driving __anext__ under
# wait_for(_NO_MESSAGES_TIMEOUT) detects a stall in ~Ns.
_recv_agen = session.receive()
_recv_it = _recv_agen.__aiter__()
_stalled = False
try:
while True:
try:
response = await asyncio.wait_for(
_recv_it.__anext__(),
timeout=_NO_MESSAGES_TIMEOUT,
)
except StopAsyncIteration:
break # generator exhausted — same as async-for end
except asyncio.TimeoutError:
_stalled = True
break
if self._done.is_set():
break
@ -478,6 +752,14 @@ class GeminiBrain:
self._done.set()
return
# Native function-calling: Gemini asks us to run a tool
# (navigation). Handle it + reply, then continue — a
# tool_call message carries no server_content to process.
tc = getattr(response, "tool_call", None)
if tc is not None and getattr(tc, "function_calls", None):
await self._handle_tool_calls(session, tc.function_calls)
continue
sc = response.server_content
if sc is None:
continue
@ -493,6 +775,13 @@ class GeminiBrain:
if text and not self._speaking:
log.info("USER: %s", text)
self._recorder.add_user_text(text)
# The user actually said something (real
# transcription, not mic noise) → stop suppressing
# the reconnect greeting so Gemini's reply is heard.
if self._suppress_greeting:
self._suppress_greeting = False
log.info("reconnect greeting suppression "
"lifted — user spoke")
if sc.output_transcription:
text = (sc.output_transcription.text or "").strip()
@ -509,6 +798,22 @@ class GeminiBrain:
if sc.model_turn:
for part in sc.model_turn.parts:
if part.inline_data and part.inline_data.data:
if _INPUT_PAUSED.is_set() or self._suppress_greeting:
# Drop Gemini's audio AND halt any in-flight
# stream at the source. Two cases: (1) a record
# is playing (_INPUT_PAUSED) — Gemini's per-chunk
# PlayStream("sanad") must not stomp the record on
# the shared chest speaker; (2) this is a reconnect
# and the user hasn't spoken — drop the unprompted
# re-greeting. Gated on _stream_started so STOP
# fires once (not per chunk); the next turn's
# begin_stream() clears the stop-flag and resumes.
if self._stream_started:
await loop.run_in_executor(
None, self._speaker.stop)
self._stream_started = False
self._speaking = False
continue
now = time.time()
if not self._speaking:
self._ai_speak_start = now
@ -526,6 +831,22 @@ class GeminiBrain:
None, self._speaker.send_chunk,
audio, RECEIVE_SAMPLE_RATE,
)
# Lip-sync marker for the LED face mask: emit the
# mouth-open level (0..3) from this chunk's RMS,
# throttled. Parsed by GeminiSubprocess._reader_loop.
_mnow = time.time()
if _mnow - getattr(self, "_mouth_t", 0.0) >= 0.08:
_rms = (float(np.sqrt(np.mean(
audio.astype(np.float32) ** 2))) if audio.size else 0.0)
# Lower thresholds bias the mouth more open
# so lip-sync reads strongly (vs. barely moving).
_lvl = (0 if _rms < 140 else 1 if _rms < 650
else 2 if _rms < 1700 else 3)
if (_lvl != getattr(self, "_mouth_lvl", -1)
or _mnow - getattr(self, "_mouth_t", 0.0) >= 0.2):
self._mouth_t = _mnow
self._mouth_lvl = _lvl
log.info("[[MOUTH:%d]]", _lvl)
if sc.turn_complete:
if (self._speaking and self._stream_started
@ -538,11 +859,21 @@ class GeminiBrain:
log.info("speaker interrupted")
self._speaking = False
self._stream_started = False
if getattr(self, "_mouth_lvl", 0) != 0:
self._mouth_lvl = 0
log.info("[[MOUTH:0]]") # close the LED-mask mouth
self._mic.flush()
self._recorder.finish_turn()
log.info("listening")
finally:
# Close the per-cycle receive generator so a stall/break
# doesn't leak it (the old `async for` closed it for us).
try:
await _recv_agen.aclose()
except Exception:
pass
if time.time() - last_recv > _NO_MESSAGES_TIMEOUT:
if _stalled:
log.warning("no messages from Gemini for %ds — session dead",
_NO_MESSAGES_TIMEOUT)
break
@ -748,6 +1079,85 @@ class GeminiBrain:
except Exception as exc:
log.warning("nav-target inject failed: %s", exc)
# ─── navigation tool-call handler (Gemini function-calling) ───
# Gemini issues tool_calls (navigate_to_place / list_places / where_am_i /
# stop_navigation); we execute them against the dashboard nav API and reply
# with a FunctionResponse so the model can speak from the real result.
async def _handle_tool_calls(self, session: Any, function_calls: Any) -> None:
responses = []
for fc in function_calls:
name = getattr(fc, "name", "") or ""
try:
args = dict(getattr(fc, "args", None) or {})
except Exception:
args = {}
log.info("TOOL CALL: %s(%s)", name, args)
result = await self._dispatch_tool(name, args)
log.info("TOOL RESULT: %s%s", name, result)
responses.append(types.FunctionResponse(
id=getattr(fc, "id", None), name=name, response=result,
))
if not responses:
return
try:
await session.send_tool_response(function_responses=responses)
except asyncio.CancelledError:
raise
except Exception as exc:
log.warning("send_tool_response failed: %s", exc)
async def _dispatch_tool(self, name: str, args: dict) -> dict:
try:
if name == "navigate_to_place":
# Respect the movement gate (kept fresh by the state watcher) so
# a nav call can't drive while walking is disabled.
if not self._movement_enabled:
return {"ok": False, "reason": "movement_off",
"say": "Movement is off — ask the user to enable it from the dashboard."}
place = str(args.get("place") or "").strip()
if not place:
return {"ok": False, "reason": "no_place"}
return await asyncio.to_thread(
_nav_api, "POST", "/api/nav/voice_goto", {"place": place})
if name == "list_places":
r = await asyncio.to_thread(_nav_api, "GET", "/api/nav/active", None)
if not isinstance(r, dict):
return {"ok": False, "reason": "bad_response"}
if r.get("reason"): # an error envelope from _nav_api
return r
return {"ok": True, "map": r.get("map"), "places": r.get("places", [])}
if name == "where_am_i":
r = await asyncio.to_thread(_nav_api, "GET", "/api/nav/active", None)
if not isinstance(r, dict):
return {"ok": False, "reason": "bad_response"}
if r.get("reason"):
return r
return {"ok": True, "map": r.get("map"),
"mode": r.get("mode_label"),
"ready": bool(r.get("bringup_alive")),
"localized": bool(r.get("localizing")),
"places": r.get("places", [])}
if name == "stop_navigation":
return await asyncio.to_thread(_nav_api, "POST", "/api/nav/cancel", None)
if name == "set_expression":
emotion = str(args.get("emotion") or "").strip().lower()
if emotion not in _FACE_EMOTIONS:
return {"ok": False, "reason": "unknown_emotion"}
# The parent (GeminiSubprocess) relays [[FACE:…]] to the LED mask.
log.info("[[FACE:%s]]", emotion)
return {"ok": True, "shown": emotion}
if name == "show_social":
account = str(args.get("account") or "").strip().lower()
if account not in _SOCIAL_ACCOUNTS:
return {"ok": False, "reason": "unknown_account"}
log.info("[[SHOW:%s]]", account)
return {"ok": True, "showing": account}
return {"ok": False, "reason": "unknown_tool"}
except Exception as exc:
log.warning("tool %s error: %s", name, exc)
return {"ok": False, "reason": "error", "detail": str(exc)[:200]}
# ─── movement-state announcer (N2) ────────────────────
# Spoken confirmation when the operator enables / disables Gemini-driven
# locomotion from the dashboard. The actual movement dispatch loop lives
@ -926,7 +1336,7 @@ class GeminiBrain:
# rest of the session can react WITHOUT a Gemini reconnect.
async def _recognition_state_watcher(self, session: Any) -> None:
last_mtime = 0.0
last_mtime_ns = -1
last_state = _recog_state.RecognitionState(
vision_enabled=self._vision_enabled,
face_rec_enabled=self._face_rec_enabled,
@ -934,6 +1344,7 @@ class GeminiBrain:
zone_rec_enabled=self._zone_rec_enabled,
zones_version=self._zones_version_primed,
movement_enabled=self._movement_enabled,
record_enabled=self._record_enabled,
)
# Best-effort initial primer if face_rec is already on at session start.
if self._face_rec_enabled and self._vision_enabled:
@ -987,9 +1398,15 @@ class GeminiBrain:
continue
except Exception:
continue
if st.st_mtime == last_mtime:
# Use nanosecond mtime: write() does os.replace of a fresh
# tempfile, so two CRUD ops within one coarse mtime tick would
# share an identical whole-second st_mtime and the second change
# would be skipped on this tick. st_mtime_ns has far finer
# resolution, so a rapid second write is observed. (The
# version-diff logic below is still the ultimate safety net.)
if st.st_mtime_ns == last_mtime_ns:
continue
last_mtime = st.st_mtime
last_mtime_ns = st.st_mtime_ns
new_state = _recog_state.read(_RECOG_STATE_PATH)
# Vision toggle — instant. Announce it out loud so Gemini reacts
@ -1074,6 +1491,15 @@ class GeminiBrain:
session, self._movement_enabled, is_toggle=True,
)
# Auto-record toggle — flip the recorder live (no session restart).
if new_state.record_enabled != last_state.record_enabled:
self._record_enabled = new_state.record_enabled
try:
self._recorder.enabled = self._record_enabled
except Exception:
pass
log.info("auto-record toggled → %s", self._record_enabled)
last_state = new_state
# ─── camera frame send loop ───────────────────────────

View File

@ -88,6 +88,10 @@ class GeminiSubprocess:
def __init__(self):
self._lock = threading.Lock()
self.process: subprocess.Popen | None = None
# Set under _lock for the duration of start()'s heavy Popen so a
# concurrent start() observes it and bails (prevents a double-spawn
# race where two children both grab the G1 mic/speaker).
self._starting = False
self.log_tail: deque[str] = deque(maxlen=LOG_TAIL_SIZE)
self.user_transcript: deque[str] = deque(maxlen=TRANSCRIPT_TAIL_SIZE)
# Gemini's OWN spoken text (output transcription). The movement
@ -107,6 +111,17 @@ class GeminiSubprocess:
# slow callback (e.g. movement dispatch reading state) never stalls the
# reader thread or blocks log parsing.
self._pending_bot: str | None = None
# Lip-sync: callbacks fired on each [[MOUTH:n]] marker (mouth level 0..3)
# emitted by gemini/script.py while Gemini speaks. The LED-mask face
# subsystem registers here to drive the animated mouth. Fired on the
# reader thread; callbacks must be cheap / non-blocking.
self._mouth_callbacks: list = []
# Expression / social markers ([[FACE:name]] / [[SHOW:account]]) emitted
# by gemini/script.py when Gemini calls set_expression / show_social.
# The LED-mask face subsystem registers here. Fired on the reader thread;
# callbacks must be cheap / non-blocking.
self._face_callbacks: list = []
self._social_callbacks: list = []
self._reader_thread: threading.Thread | None = None
self._log_file = None # opened per-session in _reader_loop
self.state = "stopped"
@ -144,6 +159,24 @@ class GeminiSubprocess:
if callback not in self._bot_callbacks:
self._bot_callbacks.append(callback)
def register_mouth_callback(self, callback) -> None:
"""Register a fn(level:int 0..3) fired on each [[MOUTH:n]] lip-sync
marker. Used by the LED-mask face. Cheap/non-blocking only."""
if callback not in self._mouth_callbacks:
self._mouth_callbacks.append(callback)
def register_face_callback(self, callback) -> None:
"""Register a fn(name:str) fired on each [[FACE:name]] marker (Gemini's
set_expression). The LED-mask face reacts with the emotion. Cheap only."""
if callback not in self._face_callbacks:
self._face_callbacks.append(callback)
def register_social_callback(self, callback) -> None:
"""Register a fn(account:str) fired on each [[SHOW:account]] marker
(Gemini's show_social). Shows the social QR on the mask. Cheap only."""
if callback not in self._social_callbacks:
self._social_callbacks.append(callback)
def attach_camera(self, camera) -> None:
"""Give the supervisor a reference to the CameraDaemon so it can
forward frames to the child over stdin while a session runs."""
@ -216,7 +249,12 @@ class GeminiSubprocess:
self._set_state("listening", "Listening for speech.")
elif "session error" in line or "client recreation failed" in line:
self._set_state("error", line)
elif "server going away" in line or "session ended" in line or "session dead" in line:
elif ("server going away" in line or "ended — reconnecting" in line
or "ended - reconnecting" in line or "session dead" in line):
# NOTE: keep in lock-step with the brain's emit
# log.info("session #%d ended — reconnecting in 1s", ...) — the
# "#N" between "session" and "ended" means a plain "session ended"
# substring never matched, so we anchor on "ended — reconnecting".
self._set_state("warning", line)
elif "keyboard interrupt" in line or "cancelled — stopping" in line:
self._set_state("stopped", line)
@ -234,6 +272,49 @@ class GeminiSubprocess:
clean = line.rstrip()
if not clean:
continue
# High-frequency lip-sync marker [[MOUTH:n]] — fire callbacks and
# skip it entirely (not logged/tailed, ~10/s) before anything else.
_mi = clean.find("[[MOUTH:")
if _mi != -1:
try:
level = int(clean[_mi + 8:clean.index("]]", _mi)])
except Exception:
level = 0
for cb in self._mouth_callbacks:
try:
cb(max(0, min(3, level)))
except Exception:
log.exception("mouth callback failed")
continue
# Emotion marker [[FACE:name]] — from Gemini's set_expression tool.
# Low frequency; fire the face callbacks and skip logging the marker.
_fi = clean.find("[[FACE:")
if _fi != -1:
try:
name = clean[_fi + 7:clean.index("]]", _fi)].strip().lower()
except Exception:
name = ""
if name:
for cb in self._face_callbacks:
try:
cb(name)
except Exception:
log.exception("face callback failed")
continue
# Social marker [[SHOW:account]] — from Gemini's show_social tool.
_si = clean.find("[[SHOW:")
if _si != -1:
try:
acct = clean[_si + 7:clean.index("]]", _si)].strip().lower()
except Exception:
acct = ""
if acct:
for cb in self._social_callbacks:
try:
cb(acct)
except Exception:
log.exception("social callback failed")
continue
if fh is not None:
try:
fh.write(clean + "\n")
@ -278,8 +359,44 @@ class GeminiSubprocess:
with self._lock:
if self.process is not None and self.process.poll() is None:
return {"started": False, "message": "Already running.", "pid": self.process.pid}
if self._starting:
# A concurrent start() (e.g. two rapid POSTs dispatched on
# separate threads) is already mid-Popen. Bail so we don't
# spawn a second child holding the G1 mic/speaker — the first
# start owns the spawn and will publish self.process.
return {"started": False, "message": "Start already in progress."}
# Sentinel held across the unlocked heavy Popen below; the early
# guard above + this flag make the running-check and the eventual
# self.process assignment atomic w.r.t. a concurrent start().
self._starting = True
# Close the previous child's pipes if it died on its own (crash,
# not via stop()): stop() closes them, but a crash-then-start path
# would otherwise leak its stdin/stdout until Popen.__del__ at GC.
# stdin close is under _stdin_lock (mirrors stop()) so a stray
# motion-state _send_stdin can't race the close mid-write.
if self.process is not None:
with self._stdin_lock:
old_stdin = getattr(self.process, "stdin", None)
if old_stdin is not None:
try:
old_stdin.close()
except Exception:
pass
old_stdout = getattr(self.process, "stdout", None)
if old_stdout is not None:
try:
old_stdout.close()
except Exception:
pass
self._set_state("starting", "Starting...")
try:
return self._start_locked()
finally:
with self._lock:
self._starting = False
def _start_locked(self) -> dict[str, Any]:
script = LIVE_SCRIPT
if not script.exists():
raise RuntimeError(f"Script not found: {script}")
@ -444,7 +561,8 @@ class GeminiSubprocess:
_audio_swap_loop performs the actual mic/speaker rebind. No-op
if the process isn't running or stdin is closed."""
pid = (profile_id or "").strip().lower()
if pid not in {"builtin", "anker", "hollyland_builtin"}:
if pid not in {"builtin", "anker", "anker_powerconf",
"hollyland_builtin", "jbl_builtin_mic"}:
log.warning("send_profile: ignoring unknown profile %r", profile_id)
return
payload: dict[str, Any] = {"id": pid}
@ -456,6 +574,12 @@ class GeminiSubprocess:
return
self._send_stdin(line)
def send_pause(self, paused: bool) -> None:
"""Pause/resume the live interaction while a dashboard record plays —
the child feeds Gemini silence + drops its own audio so the record
owns the chest speaker, then resumes. No-op if not running."""
self._send_stdin("pause:%d\n" % (1 if paused else 0))
def _audio_watcher(self) -> None:
"""Background thread — poll pactl for the Anker USB device, signal
the child on every plug/unplug edge transition.
@ -505,20 +629,50 @@ class GeminiSubprocess:
# plug edges and dispatches profile changes to the child.
plugged = _ad.detect_plugged_profiles()
ids = {p.get("profile", {}).get("id") for p in (plugged or [])}
anker_present = _ANKER_PROFILE_ID in ids
target = "anker" if anker_present else boot_profile
reason = "anker plugged" if anker_present else "anker unplugged"
# Surface which detection path succeeded (Path A vs pactl)
if anker_present:
# Honor the user's SAVED profile selection whenever its device is
# actually plugged in — covers the JBL, Anker, Hollyland, or any
# future profile. Without this the watcher only knew the Anker and
# kept reverting every other selection back to the boot profile.
# Fallbacks: legacy Anker auto-detect, then boot profile (builtin
# G1 chest speaker) when no external device is present.
try:
selected = (_ad.load_state() or {}).get("profile_id")
except Exception:
selected = None
def _via(pid: str) -> str:
for p in plugged:
if p.get("profile", {}).get("id") == _ANKER_PROFILE_ID:
via = p.get("source_via", "pactl")
if via != "pactl":
reason += f" via {via}"
break
if p.get("profile", {}).get("id") == pid:
v = p.get("source_via", "pactl")
return f" via {v}" if v != "pactl" else ""
return ""
if selected and selected != "builtin" and selected in ids:
target = selected
reason = f"{selected} present" + _via(selected)
elif _ANKER_PROFILE_ID in ids:
target = "anker"
reason = "anker plugged" + _via(_ANKER_PROFILE_ID)
else:
target = boot_profile
reason = (f"{selected} unplugged → {boot_profile}"
if selected and selected != "builtin"
else "no external device")
if target == self._last_profile_id:
continue # edge-only
# Steady state: re-send the desired profile each poll (a
# cheap idempotent no-op on the child — _audio_swap_loop /
# swap_audio_devices short-circuit when already on target).
# This is the recovery path: if a previous swap exhausted
# its 3 retries (e.g. udev hadn't exposed the Anker yet),
# the child kept the old profile with NO pending retry; an
# edge-only watcher would never re-send the same target,
# leaving parent + child desynced until the next physical
# plug edge. Re-sending lets the child converge on its next
# tick. No log / no refresh_devices on this path (already
# done on the edge) so steady state stays quiet.
self.send_profile(target, reason=reason)
continue
prev = self._last_profile_id
log.info("audio watcher: %s%s (%s)",
@ -572,10 +726,21 @@ class GeminiSubprocess:
# Close stdin/stdout explicitly — without this each start/stop
# cycle leaks FDs (relied on Popen.__del__ which only runs at GC;
# a reconnect loop would march the FD count to the OS limit).
for pipe in (getattr(proc, "stdin", None), getattr(proc, "stdout", None)):
if pipe is not None:
# The stdin close is taken under _stdin_lock: the motion-state bus
# handler still writes via _send_stdin from the arm worker thread
# (it is never joined here), so without the lock its `closed` check
# could race this close() and raise mid-write.
with self._stdin_lock:
stdin_pipe = getattr(proc, "stdin", None)
if stdin_pipe is not None:
try:
pipe.close()
stdin_pipe.close()
except Exception:
pass
stdout_pipe = getattr(proc, "stdout", None)
if stdout_pipe is not None:
try:
stdout_pipe.close()
except Exception:
pass

320
vendor/Sanad/main.py vendored
View File

@ -33,6 +33,14 @@ if _PARENT.name == "Project":
_ROOT = _PARENT.parent
if str(_ROOT) not in sys.path:
sys.path.insert(0, str(_ROOT))
# This codebase imports itself as `Project.Sanad.*`. If this folder is a copy
# under a different name (e.g. Sanadv3), alias Project.Sanad → THIS package so
# it imports its OWN modules, not the sibling Project/Sanad. (The original
# Sanad folder is unaffected — this only triggers for renamed copies.)
if _THIS_DIR.name != "Sanad" and "Project.Sanad" not in sys.modules:
_self_pkg = importlib.import_module(f"Project.{_THIS_DIR.name}")
sys.modules["Project.Sanad"] = _self_pkg
sys.modules["Project"].Sanad = _self_pkg # type: ignore[attr-defined]
else:
# Deployed layout — create a virtual Project package and alias
if str(_PARENT) not in sys.path:
@ -111,6 +119,8 @@ FaceGallery = _safe_import("FaceGallery", lambda: __import__("
ZoneGallery = _safe_import("ZoneGallery", lambda: __import__("Project.Sanad.vision.zone_gallery", fromlist=["ZoneGallery"]).ZoneGallery)
LocoController = _safe_import("LocoController", lambda: __import__("Project.Sanad.G1_Controller.loco_controller", fromlist=["LocoController"]).LocoController)
MovementDispatcher = _safe_import("MovementDispatcher", lambda: __import__("Project.Sanad.voice.movement_dispatch", fromlist=["MovementDispatcher"]).MovementDispatcher)
FaceController = _safe_import("FaceController", lambda: __import__("Project.Sanad.face.mask_face", fromlist=["FaceController"]).FaceController)
WebNav3Client = _safe_import("WebNav3Client", lambda: __import__("Project.Sanad.navigation", fromlist=["WebNav3Client"]).WebNav3Client)
# ── global instances (imported by route modules) ──
@ -118,7 +128,14 @@ MovementDispatcher = _safe_import("MovementDispatcher", lambda: __import__(
brain = _safe_construct("brain", Brain) if Brain else None
arm = _safe_construct("arm", ArmController)
audio_mgr = _safe_construct("audio_mgr", AudioManager)
voice_client = _safe_construct("voice_client", GeminiVoiceClient)
# The voice_client speaks TYPED text (typed-replay + /api/voice/generate), so it
# uses the multilingual verbatim TTS prompt — NOT the Khaleeji persona, which
# forced every language to Arabic. (The live conversation uses live_sub, not
# this client; live_voice only reads its connection flag.)
def _build_voice_client():
from Project.Sanad.gemini.client import TTS_SYSTEM_PROMPT
return GeminiVoiceClient(system_prompt=TTS_SYSTEM_PROMPT)
voice_client = _safe_construct("voice_client", _build_voice_client if GeminiVoiceClient else None)
local_tts = _safe_construct("local_tts", LocalTTSEngine)
wake_mgr = _safe_construct("wake_mgr", WakePhraseManager)
macro_rec = _safe_construct("macro_rec", (lambda: MacroRecorder(arm)) if (MacroRecorder and arm) else None)
@ -136,6 +153,11 @@ else:
live_sub = _safe_construct("live_sub", GeminiSubprocess)
typed_replay = _safe_construct("typed_replay", (lambda: TypedReplayEngine(voice_client, audio_mgr)) if (TypedReplayEngine and voice_client and audio_mgr) else None)
# ── LED face mask (Mask project) — BLE animated face, own asyncio loop ───────
# Constructs idle (no BLE); the dashboard "Mask Face" tab connects on demand.
# Unavailable (None) if the Mask lib / bleak / Pillow aren't importable.
mask_face = _safe_construct("mask_face", FaceController)
# ── Locomotion controller (N2) — manual dashboard locomotion ────────────────
# Reuses the arm controller's single ChannelFactoryInitialize (one DDS init per
# process) — it does NOT init DDS itself. Disarmed every boot. See
@ -158,6 +180,20 @@ if arm is not None and loco_controller is not None:
except Exception:
log.exception("Could not wire arm motion-block")
# The voice→arm path (live_voice_loop) drives the SEPARATE singleton
# motion.sanad_arm_controller.ARM, not the `arm` instance above. Wire the SAME
# locomotion interlock onto it so a spoken gesture can't move the arms while
# the robot is (or just was) walking — otherwise the motion-block above would
# only cover the dashboard/Gemini-replay path, not voice triggers.
if loco_controller is not None:
try:
from Project.Sanad.motion.sanad_arm_controller import ARM as _sanad_arm
if hasattr(_sanad_arm, "set_motion_block"):
_sanad_arm.set_motion_block(loco_controller.movement_active)
log.info("Voice arm (sanad_arm) motion-block wired to locomotion movement_active")
except Exception:
log.exception("Could not wire sanad_arm motion-block")
# ── Gemini voice → movement dispatcher (N2 Phase 3) ─────────────────────────
# Reads Gemini's spoken (BOT) transcript via the live supervisor's bot-callback
# and drives loco_controller on a confirmation-phrase match (Marcus pattern).
@ -181,6 +217,27 @@ if MovementDispatcher and loco_controller is not None:
except Exception:
log.exception("Could not wire movement dispatcher")
# ── Navigation (web_nav3 Nav2 stack) — thin HTTP client ─────────────────────
# Loosely-coupled client to the standalone web_nav3 service (FastAPI :8765 +
# rosbridge :9090). Owns NO ROS2/Nav2 code; if web_nav3 is down the nav routes
# degrade gracefully. The dashboard "Navigation" tab routes (dashboard/routes/
# navigation.py) build their own module-level client, so this singleton is the
# parent-side handle used by voice/movement wiring and the subsystem report.
# Config precedence (highest first): env var → dashboard config 'navigation'
# section → hardcoded default — same resolution as the navigation route.
def _build_nav_client():
from Project.Sanad.core.config_loader import section as _cfg_section
nav_cfg = _cfg_section("dashboard", "navigation")
base_url = (os.environ.get("WEB_NAV3_URL")
or nav_cfg.get("web_nav3_url")
or "http://127.0.0.1:8765")
robot = (os.environ.get("SANAD_ROBOT_NAME")
or nav_cfg.get("robot")
or "sanad")
return WebNav3Client(base_url=str(base_url), robot=str(robot))
nav_client = _safe_construct("nav_client", _build_nav_client if WebNav3Client else None)
# ── Recognition (camera + face gallery) ─────────────────────────────────────
# Camera is idle until the dashboard toggles vision on; face gallery is pure
# file IO and always available if the import succeeded.
@ -306,6 +363,127 @@ if live_sub is not None and hasattr(live_sub, "send_state"):
except Exception:
log.exception("Could not wire motion-state → Gemini channel")
# Animate the LED face mask while the robot is "speaking". Hooked to the
# gestural-speaking toggle (brain.gestural_speaking_changed); finer per-utterance
# lip-sync from TTS amplitude is a follow-up. Safe no-op until the face is started.
if mask_face is not None:
try:
from Project.Sanad.core.event_bus import bus as _bus_face
def _on_gestural_speaking(enabled: bool = False, **_kw):
try:
mask_face.set_speaking(bool(enabled))
if not enabled:
mask_face.set_listening() # back to attentive after a reply
except Exception:
log.exception("mask_face.set_speaking failed")
_bus_face.on("brain.gestural_speaking_changed", _on_gestural_speaking)
log.info("LED face wired to gestural-speaking events")
except Exception:
log.exception("Could not wire LED face speaking hook")
# Real lip-sync: route Gemini's per-chunk [[MOUTH:n]] amplitude markers (emitted
# by gemini/script.py, parsed by GeminiSubprocess) to the LED mask's mouth so it
# opens/closes with the actual speech. Fires on the subprocess reader thread;
# FaceController.set_mouth is thread-safe and a safe no-op until the face starts.
if mask_face is not None and live_sub is not None and hasattr(live_sub, "register_mouth_callback"):
try:
def _on_mouth_level(level: int):
if not getattr(mask_face, "_gemini_linked", False):
return # Gemini not linked to the mask -> leave it alone
try:
mask_face.set_mouth(int(level))
except Exception:
log.exception("mask_face.set_mouth (lip-sync) failed")
live_sub.register_mouth_callback(_on_mouth_level)
log.info("LED face wired to Gemini lip-sync (MOUTH markers)")
except Exception:
log.exception("Could not wire LED face lip-sync hook")
# Gemini-driven expressions: [[FACE:name]] markers (from the set_expression tool)
# -> a brief emotion reaction on the mask. Fires on the subprocess reader thread;
# react() is thread-safe and a safe no-op until the face starts.
if mask_face is not None and live_sub is not None and hasattr(live_sub, "register_face_callback"):
try:
# per-emotion hold (seconds): affection/reactions linger a touch longer
_FACE_HOLD = {"heart": 2.6, "love": 2.6, "kiss": 2.4,
"laugh": 2.2, "surprised": 1.8, "confused": 1.8}
def _on_face_emotion(name: str):
if not getattr(mask_face, "_gemini_linked", False):
return # Gemini not linked to the mask -> ignore emotion markers
try:
mask_face.react(str(name), _FACE_HOLD.get(name, 1.6))
except Exception:
log.exception("mask_face.react (emotion) failed")
live_sub.register_face_callback(_on_face_emotion)
log.info("LED face wired to Gemini emotions (FACE markers)")
except Exception:
log.exception("Could not wire LED face emotion hook")
# Gemini-driven social QR: [[SHOW:account]] markers (from the show_social tool)
# -> render the account's QR + show it on the mask via the shared helper.
if mask_face is not None and live_sub is not None and hasattr(live_sub, "register_social_callback"):
try:
def _on_social(account: str):
if not getattr(mask_face, "_gemini_linked", False):
return # Gemini not linked to the mask -> ignore social markers
# This fires on the subprocess READER THREAD, which must keep draining
# stdout (lip-sync / transcript). show_social_on_mask does a ~9s BLE
# scratch upload — so dispatch it to a daemon thread and return at once.
def _run(acc=str(account)):
try:
from Project.Sanad.dashboard.routes.mask_social import show_social_on_mask
show_social_on_mask(acc)
except Exception:
log.exception("show_social_on_mask failed")
import threading as _th
_th.Thread(target=_run, daemon=True, name="mask-social").start()
live_sub.register_social_callback(_on_social)
log.info("LED face wired to Gemini social QR (SHOW markers)")
except Exception:
log.exception("Could not wire LED face social hook")
# Lifelike face behaviour: drive the LED face's state + reactions from bus events
# so it looks alive and engaged during a conversation (attentive while listening,
# looks-away while a reply is prepared, brief smile/sad reactions). All calls are
# safe no-ops until the face is started, and on the basic FaceAnimator fallback.
if mask_face is not None:
try:
from Project.Sanad.core.event_bus import bus as _bus_face2
def _face_listening(**_kw):
try: mask_face.set_listening()
except Exception: log.exception("face set_listening failed")
def _face_thinking(**_kw):
try: mask_face.set_thinking()
except Exception: log.exception("face set_thinking failed")
def _face_idle(**_kw):
try: mask_face.set_idle()
except Exception: log.exception("face set_idle failed")
def _face_react(emotion):
def _handler(**_kw):
try: mask_face.react(emotion)
except Exception: log.exception("face react failed")
return _handler
_bus_face2.on("voice.connected", _face_listening) # session up -> attentive
_bus_face2.on("voice.user_said", _face_thinking) # heard user -> processing
_bus_face2.on("voice.disconnected", _face_idle)
_bus_face2.on("voice.error", _face_react("sad"))
_bus_face2.on("motion.action_error", _face_react("sad"))
_bus_face2.on("skill.finished", _face_react("smile")) # success -> happy
log.info("LED face wired to lifelike state/reaction events")
except Exception:
log.exception("Could not wire LED face lifelike behaviour hooks")
# Wire everything into the Brain (only what was constructed)
def _safe_attach(method_name: str, value):
if brain is None or value is None:
@ -346,6 +524,8 @@ SUBSYSTEMS = {
"zone_gallery": zone_gallery,
"loco_controller": loco_controller,
"movement_dispatch": movement_dispatch,
"mask_face": mask_face,
"nav_client": nav_client,
}
# Critical subsystems — if any of these are None, log a warning at startup
@ -362,17 +542,63 @@ if _missing:
log.warning("Subsystems unavailable (%d): %s", len(_missing), ", ".join(_missing))
_already_shut_down = False
_shutting_down = False
def _call_with_timeout(label: str, fn, timeout_s: float = 2.0):
"""Run a possibly-blocking teardown call on a daemon thread and never
wait more than ``timeout_s`` for it. If it hangs we log and move on
the daemon thread dies with the process at os._exit / interpreter exit.
"""
import threading
def _runner():
try:
fn()
except Exception:
log.exception("%s failed", label)
t = threading.Thread(target=_runner, name=f"shutdown-{label}", daemon=True)
t.start()
t.join(timeout_s)
if t.is_alive():
log.warning("%s did not finish within %.1fs — skipping (forced exit)",
label, timeout_s)
def _do_shutdown(from_signal: bool = False):
"""Clean shutdown — release hardware, stop background tasks. Idempotent."""
global _already_shut_down
if _already_shut_down:
"""Clean shutdown — release hardware, stop background tasks. Idempotent.
Never blocks more than a couple seconds on any single step: the loco
StopMove is run on a watchdog thread (it can re-init / hang DDS during
teardown), and tracked children are stopped early so Ctrl+C kills the
whole tree fast.
"""
global _shutting_down
if _shutting_down:
return
_already_shut_down = True
_shutting_down = True
log.info("Shutdown requested")
# ── Stop tracked child subprocesses FIRST ───────────────────────────
# The Gemini/local voice supervisor owns a real child OS process (and
# forwards camera/audio to it). Kill it early so on Ctrl+C the child
# tree dies fast even if a later step hangs.
if live_sub is not None:
try:
running = live_sub.is_running() if callable(getattr(live_sub, "is_running", None)) else False
if running:
live_sub.stop()
except Exception:
log.exception("live_sub.stop() failed")
if camera is not None:
try:
if camera.is_running():
camera.stop()
except Exception:
log.exception("camera.stop() failed")
if arm is not None:
try:
if hasattr(arm, "cancel"):
@ -391,19 +617,33 @@ def _do_shutdown(from_signal: bool = False):
except Exception:
log.exception("movement_dispatch.stop() failed")
# ── Loco stop — NON-BLOCKING ─────────────────────────────────────────
# loco_controller.shutdown() does StopMove + disarm, but StopMove can
# re-init / block on DDS during interpreter teardown. Only stop if a
# client is actually live, and never wait more than ~2s on it.
if loco_controller is not None:
_loco_has_client = True
try:
loco_controller.shutdown() # StopMove (no FSM change) + disarm
# If the controller exposes a "client exists" probe, honour it so
# we never trigger a lazy LocoClient re-init during teardown.
for _attr in ("has_client", "is_armed", "_client"):
if hasattr(loco_controller, _attr):
_probe = getattr(loco_controller, _attr)
_loco_has_client = bool(_probe() if callable(_probe) else _probe)
break
except Exception:
log.exception("loco_controller.shutdown() failed")
_loco_has_client = True # probe failed — fall back to attempting it
if _loco_has_client:
_call_with_timeout("loco_controller.shutdown()",
loco_controller.shutdown, timeout_s=2.0)
else:
log.info("loco_controller has no live client — skipping StopMove")
if live_sub is not None:
if mask_face is not None:
try:
running = live_sub.is_running() if callable(getattr(live_sub, "is_running", None)) else False
if running:
live_sub.stop()
mask_face.shutdown() # disconnect BLE + stop the face loop
except Exception:
log.exception("live_sub.stop() failed")
log.exception("mask_face.shutdown() failed")
if audio_mgr is not None:
try:
@ -412,22 +652,49 @@ def _do_shutdown(from_signal: bool = False):
except Exception:
log.exception("audio_mgr.close() failed")
if camera is not None:
try:
if camera.is_running():
camera.stop()
except Exception:
log.exception("camera.stop() failed")
log.info("Shutdown complete")
import atexit # noqa: E402
atexit.register(_do_shutdown)
# NOTE: Do NOT install custom SIGINT/SIGTERM handlers here.
# Uvicorn installs its own signal handlers for graceful shutdown.
# If we override them, Ctrl+C never reaches uvicorn and the server
# keeps running forever. Our _do_shutdown runs via atexit instead.
# atexit is the fallback path (clean uvicorn return / interpreter exit).
# The PRIMARY Ctrl+C path is the explicit SIGINT/SIGTERM handler installed
# in main() — see _install_signal_handlers(). That handler fully takes over:
# it runs the (idempotent, non-blocking) shutdown and then os._exit(0), so it
# never returns to uvicorn. This avoids the old problem where uvicorn's own
# handler and ours would fight — we just don't hand control back. A single
# SIGINT therefore tears down every child and force-exits within ~2s.
def _install_signal_handlers():
"""Take over SIGINT/SIGTERM so one Ctrl+C kills EVERYTHING fast.
We do NOT chain to uvicorn's handler: we stop tracked children + do a
non-blocking loco stop, then os._exit(0) so the process dies immediately
without ever returning to uvicorn or hanging in atexit.
"""
import signal
def _handler(signum, _frame):
try:
log.warning("force shutdown (signal %s) — killing everything", signum)
except Exception:
pass
try:
_do_shutdown(from_signal=True)
except Exception:
try:
log.exception("_do_shutdown raised during signal teardown")
except Exception:
pass
# Hard-exit so even if uvicorn/atexit would hang we are gone.
os._exit(0)
for _sig in (signal.SIGINT, signal.SIGTERM):
try:
signal.signal(_sig, _handler)
except Exception:
log.exception("Could not install handler for signal %s", _sig)
def _print_env_diagnostic():
@ -485,6 +752,11 @@ def main():
_print_env_diagnostic()
return
# Install our SIGINT/SIGTERM handler EARLY — before any hardware init or
# uvicorn.run() — so a single Ctrl+C at any point forces a fast, clean
# teardown of every child and exits the process.
_install_signal_handlers()
log.info("Sanad starting — Python %s @ %s", sys.version.split()[0], sys.executable)
log.info("BASE_DIR: %s", _THIS_DIR)
log.info("Dashboard interface: %s → bound to %s", DASHBOARD_INTERFACE, args.host)

View File

@ -39,7 +39,6 @@ from Project.Sanad.config import ENABLE_ARM_SDK_INDEX
RAMP_IN_STEPS = _AC.get("ramp_in_steps", 60) # ~1.0s smooth move to start pose
RAMP_OUT_STEPS = _AC.get("ramp_out_steps", 180) # ~3.0s smooth return to home
SETTLE_HOLD_SEC = _AC.get("settle_hold_sec", 0.5) # hold start pose before replay begins
WATCHDOG_TIMEOUT = _AC.get("watchdog_timeout_sec", 0.25) # hold last pose if state stale
WATCHDOG_DISABLE_AFTER = _AC.get("watchdog_disable_after_sec", 1.0) # abort if state stale this long
ARM_INDICES = range(
_AC.get("arm_indices_start", 15),
@ -60,6 +59,12 @@ try:
)
from unitree_sdk2py.idl.unitree_hg.msg.dds_ import LowCmd_, LowState_
from unitree_sdk2py.utils.crc import CRC
# Battery (BMS) lives in a SEPARATE topic on the G1 — LowState_ (hg) has no
# battery field. Optional: never let its absence break arm import/motion.
try:
from unitree_sdk2py.idl.unitree_hg.msg.dds_ import BmsState_
except Exception:
BmsState_ = None
# IDL factory — LowCmd_() with no args fails because the dataclass
# has 5 required positional fields. The SDK ships a default factory
@ -221,6 +226,10 @@ class ArmController:
# temperature dashboard). Each entry: {motor_id, surface, winding}.
self._current_temps: list[dict[str, Any]] = []
self._last_temp_time = 0.0
# Battery (BMS) snapshot from rt/lf/bmsstate — separate topic on the G1.
self._bms = None # latest parsed battery dict, or None
self._last_bms_time = 0.0
self._bms_sub = None
# Cached motion file metadata
self._motion_files_cache: dict[str, dict[str, Any]] = {}
@ -239,6 +248,15 @@ class ArmController:
self._arm_pub.Init()
self._state_sub = ChannelSubscriber("rt/lowstate", LowState_)
self._state_sub.Init(self._on_low_state, 10)
# Battery: subscribe to the BMS topic (rt/lf/bmsstate, BmsState_).
# Read-only + best-effort — never let a BMS hiccup break arm init.
try:
if BmsState_ is not None:
self._bms_sub = ChannelSubscriber("rt/lf/bmsstate", BmsState_)
self._bms_sub.Init(self._on_bms_state, 10)
log.info("BMS subscriber up (rt/lf/bmsstate)")
except Exception as exc:
log.warning("BMS subscriber init failed (battery unavailable): %s", exc)
# IDL types need the SDK's default factory — bare LowCmd_() fails
# because the dataclass has 5 required positional fields.
if _make_low_cmd is not None:
@ -345,6 +363,66 @@ class ArmController:
with self._state_lock:
return list(self._current_temps)
def _on_bms_state(self, msg):
"""Battery (BMS) callback — parse the bits the dashboard shows. Wrapped
so a malformed message can never disturb the arm."""
try:
soc = int(getattr(msg, "soc", 0) or 0)
cur = int(getattr(msg, "current", 0) or 0) # G1 reports mA
# Pack voltage: prefer bmsvoltage[0] (mV); else sum of cell voltages.
volt_mv = 0
bv = getattr(msg, "bmsvoltage", None)
try:
if bv is not None and len(bv) and int(bv[0]):
volt_mv = int(bv[0])
except Exception:
volt_mv = 0
if not volt_mv:
cv = getattr(msg, "cell_vol", None)
if cv is not None:
try:
volt_mv = int(sum(int(x) for x in cv if x))
except Exception:
volt_mv = 0
# Max plausible cell/pack temperature (int16 °C).
temp_c = None
tt = getattr(msg, "temperature", None)
if tt is not None:
try:
vals = [int(x) for x in tt if -40 <= int(x) <= 150]
if vals:
temp_c = max(vals)
except Exception:
temp_c = None
batt = {
"soc": max(0, min(100, soc)),
"soh": int(getattr(msg, "soh", 0) or 0),
"current_a": round(cur / 1000.0, 2),
"voltage_v": round(volt_mv / 1000.0, 1) if volt_mv else None,
"temp_c": temp_c,
"cycle": int(getattr(msg, "cycle", 0) or 0),
}
with self._state_lock:
self._bms = batt
self._last_bms_time = time.monotonic()
except Exception:
pass
def get_battery(self) -> dict[str, Any]:
"""Latest battery snapshot for the dashboard. `available=False` until the
first BmsState_ arrives (or if the BMS topic isn't published)."""
with self._state_lock:
b = dict(self._bms) if self._bms else None
age = (time.monotonic() - self._last_bms_time) if self._last_bms_time else None
if b is None:
return {"available": False}
cur = b.get("current_a") or 0.0
b["status"] = ("charging" if cur > 0.05
else "discharging" if cur < -0.05 else "idle")
b["available"] = True
b["age_sec"] = round(age, 1) if age is not None else None
return b
def send_frame(self, arm_target_q: list[float], body_lock_q: list[float]):
"""Public single-frame send. Use only inside a controlled playback loop."""
self._send_frame(arm_target_q, body_lock_q)

View File

@ -77,6 +77,21 @@ class _ArmAdapter:
# No watchdog support: pretend state is fresh
return 0.0
def is_blocked(self) -> bool:
"""True while the arm's locomotion interlock forbids arm motion.
Mirrors the arm controller's own _blocked()/set_motion_block() gate so
a macro started before locomotion was armed stops mid-playback if the
legs are armed afterwards. If the arm has no such predicate, never
blocks (preserves behaviour for controllers without the interlock)."""
fn = getattr(self._arm, "_blocked", None)
if callable(fn):
try:
return bool(fn())
except Exception:
return False
return False
class MacroPlayer:
def __init__(self, audio_manager=None, arm_controller=None):
@ -218,6 +233,9 @@ class MacroPlayer:
for step in range(steps):
if self._stop_event.is_set():
return False
if adapter.is_blocked():
log.error("MacroPlayer %s abort — locomotion armed mid-playback", label)
return False
t = (step + 1) / steps
interp = _lerp_q(from_q, to_q, t)
try:
@ -237,6 +255,10 @@ class MacroPlayer:
if self._stop_event.is_set():
break
if adapter.is_blocked():
log.error("MacroPlayer abort — locomotion armed mid-playback")
break
age = adapter.state_age()
if age > WATCHDOG_DISABLE_AFTER:
log.error("MacroPlayer watchdog abort — state stale %.2fs", age)

View File

@ -10,6 +10,8 @@ These can be replayed in sync via MacroPlayer.
from __future__ import annotations
import json
import os
import tempfile
import threading
import time
import wave
@ -81,27 +83,49 @@ class MacroRecorder:
if self._motion_thread:
self._motion_thread.join(timeout=3.0)
# Save audio
audio_path = AUDIO_RECORDINGS_DIR / f"{self._name}.wav"
motion_path = MOTION_RECORDINGS_DIR / f"{self._name}.jsonl"
# A failed write must NOT leave _recording=True forever (which would
# wedge every future start()). Clear the busy flag in finally no matter
# what; write both files atomically (tempfile + os.replace) so a partial
# write can't surface a corrupt recording.
try:
# Save audio
pcm = b"".join(self._audio_frames)
with wave.open(str(audio_path), "wb") as wf:
tmp_audio = f"{audio_path}.tmp"
with wave.open(tmp_audio, "wb") as wf:
wf.setnchannels(CHANNELS)
wf.setsampwidth(2) # int16
wf.setframerate(RECEIVE_SAMPLE_RATE)
wf.writeframes(pcm)
os.replace(tmp_audio, str(audio_path))
# Save motion
motion_path = MOTION_RECORDINGS_DIR / f"{self._name}.jsonl"
with open(motion_path, "w") as f:
f.write(json.dumps({"meta": {"hz": REPLAY_HZ, "motors": 29}}) + "\n")
content_lines = [json.dumps({"meta": {"hz": REPLAY_HZ, "motors": 29}})]
for frame in self._motion_frames:
f.write(json.dumps(frame) + "\n")
duration = time.monotonic() - self._started_at
content_lines.append(json.dumps(frame))
content = ("\n".join(content_lines) + "\n").encode("utf-8")
fd, tmp_motion = tempfile.mkstemp(
prefix=f".{motion_path.name}.", suffix=".tmp",
dir=str(motion_path.parent),
)
try:
with os.fdopen(fd, "wb") as f:
f.write(content)
os.replace(tmp_motion, str(motion_path))
except Exception:
try:
os.unlink(tmp_motion)
except OSError:
pass
raise
finally:
with self._lock:
self._recording = False
duration = time.monotonic() - self._started_at
log.info("Macro saved: audio=%s motion=%s (%.1fs)", audio_path, motion_path, duration)
return {
"recording": False,
@ -138,6 +162,17 @@ class MacroRecorder:
def _record_motion(self):
"""Capture joint positions at REPLAY_HZ."""
interval = 1.0 / REPLAY_HZ
# Wait for the first LowState before sampling real hardware, otherwise
# _get_current_q() returns the seed [0.0]*29 and we capture an all-zero
# macro that body-locks the arms to zero on replay. Only relevant when a
# live, initialized arm is present (sim path records zeros by design).
if self._arm is not None and getattr(self._arm, "_initialized", False):
wait = getattr(self._arm, "wait_for_state", None)
if callable(wait) and not wait(timeout=2.0):
log.error("Macro motion aborted — no LowState received in 2s")
return
t0 = time.monotonic()
while not self._stop_event.is_set():
t = round(time.monotonic() - t0, 4)

View File

@ -92,7 +92,16 @@ except Exception:
# G1 hardware constants — single source in config/core_config.json
from Project.Sanad.config import G1_NUM_MOTOR, ENABLE_ARM_SDK_INDEX, REPLAY_HZ
DATA_DIR = BASE_DIR / _MCFG["data_subdir"]
# JSONL replay source. The legacy default (BASE_DIR/DataG1) does NOT exist on
# this deployment — the recorded/taught motions actually live in the canonical
# MOTIONS_DIR (data/motions), the same store every other module reads/writes.
# Prefer it so a file-backed action reads the real files; fall back to the
# legacy path only if the canonical one can't be imported.
try:
from Project.Sanad.config import MOTIONS_DIR as _MOTIONS_DIR
DATA_DIR = Path(_MOTIONS_DIR)
except Exception:
DATA_DIR = BASE_DIR / _MCFG["data_subdir"]
ACTION_COOLDOWN_SEC = _MCFG["action_cooldown_sec"]
STABILITY_THRESHOLD = _MCFG["stability_threshold"]
@ -117,6 +126,29 @@ class ArmController:
self._client = None
self._is_busy = False
self._init_lock = threading.Lock()
# Guards the check-and-set of _is_busy so two near-simultaneous
# triggers can't both pass the `if self._is_busy` gate and launch
# two overlapping replays on rt/arm_sdk.
self._busy_lock = threading.Lock()
# Arm ⇄ locomotion interlock. While this predicate returns True the
# robot may be walking, so NO arm action may fire (matches the
# motion-block wired onto motion/arm_controller.ArmController in
# main.py). Installed via set_motion_block(); None = no block.
self._motion_block = None
def set_motion_block(self, predicate):
"""Install a no-args predicate; while it returns True every arm
trigger refuses to run (arm locomotion mutual exclusion)."""
self._motion_block = predicate
def _blocked(self) -> bool:
pred = self._motion_block
if pred is None:
return False
try:
return bool(pred())
except Exception:
return False
def init(self) -> bool:
with self._init_lock:
@ -125,6 +157,21 @@ class ArmController:
if ChannelFactoryInitialize is None:
return False
try:
# Single-DDS-init rule: the process-wide ChannelFactory is
# owned by motion/arm_controller.ArmController (main.arm),
# initialised on the user-selected NIC in main.arm.init().
# Only init here if that has NOT happened yet — calling
# ChannelFactoryInitialize a second time would re-init on the
# default interface and is exactly the conflict we must avoid.
_factory_ready = False
try:
from Project.Sanad import main as _sanad_main
_factory_ready = bool(
getattr(getattr(_sanad_main, "arm", None),
"_initialized", False))
except Exception:
_factory_ready = False
if not _factory_ready:
ChannelFactoryInitialize(0)
self._pub = ChannelPublisher("rt/arm_sdk", LowCmd_)
self._pub.Init()
@ -243,6 +290,7 @@ class ArmController:
self._pub.Write(cmd)
time.sleep(0.01)
with self._busy_lock:
self._is_busy = False
self._last_action_time = time.time()
print("🔓 Ready.")
@ -254,26 +302,36 @@ class ArmController:
self._client.ExecuteAction(action_map.get(action_name))
time.sleep(3.5)
finally:
with self._busy_lock:
self._is_busy = False
self._last_action_time = time.time()
print("🔓 Ready.")
def trigger_action_by_id(self, action_id: int):
if not self.init():
# Arm ⇄ locomotion interlock — never drive the arms while the robot
# may be walking (manual armed / teleop / recent move/step).
if self._blocked():
return
if self._is_busy:
if not self.init():
return
if not self._cooldown_ok():
return
opt = OPTION_BY_ID.get(int(action_id))
if opt:
if not opt:
return
# Atomic check-and-set so two near-simultaneous triggers can't both
# launch a replay thread on rt/arm_sdk.
with self._busy_lock:
if self._is_busy:
return
self._is_busy = True
if opt.file:
threading.Thread(target=self._managed_replay, args=(opt.file,), daemon=True).start()
elif self._client and opt.name in action_map:
threading.Thread(target=self._managed_sdk_action, args=(opt.name,), daemon=True).start()
else:
with self._busy_lock:
self._is_busy = False
def trigger_action_by_name(self, action_name: str):

View File

@ -20,7 +20,7 @@ import time
from pathlib import Path
from typing import Any
from Project.Sanad.config import G1_NUM_MOTOR, MOTIONS_DIR, REPLAY_HZ
from Project.Sanad.config import ENABLE_ARM_SDK_INDEX, G1_NUM_MOTOR, MOTIONS_DIR, REPLAY_HZ
from Project.Sanad.core.config_loader import section as _cfg_section
from Project.Sanad.core.event_bus import bus
from Project.Sanad.core.logger import get_logger
@ -139,7 +139,7 @@ class TeachingSession:
else: # arms
low_cmd.motor_cmd[i].kp = HOLD_ARM_KP
low_cmd.motor_cmd[i].kd = HOLD_ARM_KD
low_cmd.motor_cmd[29].q = 1.0
low_cmd.motor_cmd[ENABLE_ARM_SDK_INDEX].q = 1.0
low_cmd.crc = crc.Crc(low_cmd)
arm._arm_pub.Write(low_cmd)
time.sleep(interval)
@ -171,7 +171,7 @@ class TeachingSession:
low_cmd.motor_cmd[i].q = current_q[i]
low_cmd.motor_cmd[i].kp = TEACH_ARM_KP
low_cmd.motor_cmd[i].kd = TEACH_ARM_KD
low_cmd.motor_cmd[29].q = 1.0
low_cmd.motor_cmd[ENABLE_ARM_SDK_INDEX].q = 1.0
low_cmd.crc = crc.Crc(low_cmd)
arm._arm_pub.Write(low_cmd)

85
vendor/Sanad/navigation/README.md vendored Normal file
View File

@ -0,0 +1,85 @@
# SanadV3 Navigation
Thin HTTP **client** to the external `web_nav3` Nav2 stack. This module owns
**no** ROS2/Nav2 code — it lets SanadV3 (dashboard + voice) drive autonomous
navigation over plain HTTP. If `web_nav3` is down, nav features degrade
gracefully and the rest of SanadV3 keeps running.
## What this module does
- `web_nav3_client.py``WebNav3Client`, a loose-coupled `requests` client.
**By contract no method ever raises into the caller**: each returns
`{"ok": bool, "error": str|None, ...}` or a `NavStatus`. If `web_nav3` is
unreachable, callers get a clean failure result instead of an exception.
- `NavStatus` — health snapshot from `GET /api/status` (`bringup_alive`,
`rosbridge_alive`, `reachable`, `log_tail`).
## Architecture
```
SanadV3 dashboard (:8000) ─┐
Navigation tab │ HTTP ┌── Nav2 ──┐
├──────────────▶│ web_nav3 │──▶ cmd_vel_loco_bridge ──▶ LocoClient (G1 legs)
SanadV3 voice (Gemini) ──┘ (:8765) └──────────┘
movement_dispatch.py rosbridge :9090 (live map / TF)
```
- SanadV3 plane = Python/asyncio, non-ROS. Dashboard on **:8000**.
- `web_nav3` = standalone FastAPI on **:8765** wrapping ROS2 Nav2 + rosbridge
on **:9090**. It owns SLAM, Nav2, and the `cmd_vel_loco_bridge` that drives
the G1 legs via `LocoClient`.
## Configure
Connection is resolved with precedence **env var → dashboard config → default**:
- `WEB_NAV3_URL` (default `http://127.0.0.1:8765`) — the `web_nav3` FastAPI base.
- `ROSBRIDGE_URL` (default `ws://127.0.0.1:9090`) — live map / TF stream.
- `SANAD_ROBOT_NAME` (default `sanad`) — sent as the `X-Robot-Name` header.
`config.py` exposes `WEB_NAV3_URL`. `main.py` builds the shared `nav_client`
singleton; `dashboard/routes/navigation.py` builds its own module-level client
(both use the same resolution). A broken nav package never blocks the dashboard.
## Dashboard Navigation tab
Backend proxy lives under `/api/nav/*` (prefix applied in `dashboard/app.py`).
The "Navigation" SPA tab lists saved **places** and **missions**, sends goto /
cancel, saves the current pose, and embeds the live `web_nav3` map iframe from
the robot at `:8765`. When the client is unavailable, status returns
`{"available": false}` and action endpoints return 503.
## API endpoints (`/api/nav/*`)
| Method | Path | Action |
|--------|------------------|------------------------------------------|
| GET | `/status` | health; `{available:false}` if degraded |
| GET | `/config` | web_nav3 / rosbridge URLs + robot name |
| GET | `/places` | list saved places |
| POST | `/goto` | navigate to a saved place by `name` |
| POST | `/cancel` | best-effort cancel (stops bringup) |
| POST | `/save_here` | save current pose as a named place |
| GET | `/maps` | list maps |
| GET | `/missions` | list missions |
| POST | `/missions/run` | run a saved mission by `id` |
## NEXT STEPS
1. **Voice bridge (not yet wired).** `voice/movement_dispatch.py` currently
drives discrete `loco_controller` steps only. Add a path so destination
phrases ("go to the lobby" / "اذهب إلى الردهة") map to `nav_client.goto()`
instead of stepping. Keep it gated on the existing
`recognition_state.movement_enabled` toggle.
2. **CRITICAL — LocoClient arbitration (prerequisite, do before #1).**
`web_nav3`'s `cmd_vel_loco_bridge` and SanadV3's `loco_controller` **must
never drive `LocoClient` simultaneously** — two velocity sources to the G1
legs at once is unsafe. Only ONE may hold the legs. Before enabling
voice-driven autonomous nav, build a hand-off: when a Nav2 goto is active,
`loco_controller` must release / be disarmed, and vice versa (fail-closed).
No `goto()` voice wiring lands until this interlock exists.
3. **Single DDS participant ordering.** SanadV3 and `web_nav3` share one Unitree
DDS domain on the G1. Initialize the DDS channel factory **exactly once**,
before any consumer. Decide startup order (whoever owns `LocoClient` inits
first) and ensure the other side never re-inits the participant.

10
vendor/Sanad/navigation/__init__.py vendored Normal file
View File

@ -0,0 +1,10 @@
"""SanadV3 navigation subsystem — thin client to the web_nav3 Nav2 stack.
web_nav3 runs as its own service (FastAPI on :8765 + rosbridge on :9090).
This module lets SanadV3 (dashboard + voice) drive it without owning any
ROS2/Nav2 code. Loose coupling: if web_nav3 is down, nav features degrade
gracefully and the rest of SanadV3 is unaffected.
"""
from .web_nav3_client import WebNav3Client, NavStatus
__all__ = ["WebNav3Client", "NavStatus"]

352
vendor/Sanad/navigation/goal_monitor.py vendored Normal file
View File

@ -0,0 +1,352 @@
"""Nav2 goal-status monitor — arrival/failure feedback for Gemini voice nav.
web_nav3's ``goto()`` is fire-and-forget: it publishes a goal and returns at
once, with no completion event. To let Gemini tell the user the truth ("we've
arrived" / "I couldn't get there") instead of guessing, we watch the Nav2
action status over rosbridge and, on a terminal status, push a
``[NAV ARRIVED]`` / ``[NAV FAILED]`` note to the Gemini child (via
``live_sub.send_state``) and release the nav arbiter so the legs free up.
Design
------
* One background daemon thread runs an asyncio loop holding a persistent
rosbridge websocket subscription to ``/navigate_to_pose/_action/status``
(``action_msgs/msg/GoalStatusArray``).
* ``arm_goal(place)`` marks a pending destination. A goal that is ACCEPTED/
EXECUTING and not already-terminal is latched as "ours"; its terminal status
fires feedback. A CANCELED of the latched goal while another goal is active
is treated as a preemption (re-latch, don't fire). Re-arming supersedes.
* A watchdog fails the goal after ``SANAD_NAV_GOAL_TIMEOUT_S`` so the arbiter
is ALWAYS released even if rosbridge/websockets is unavailable.
* ``request_cancel()`` sends a real rosbridge action CancelGoal (cancel-all) so
"stop" actually stops Nav2 not just an arbiter release.
Everything is best-effort: the drive already succeeded by the time we arm, so
any monitor failure simply means no spoken feedback never a crash.
"""
from __future__ import annotations
import asyncio
import base64
import json
import os
import threading
import time
from typing import Any, List, Optional
from Project.Sanad.core.logger import get_logger
log = get_logger("goal_monitor")
# Capture the arbiter at import so releasing the legs never depends on a lazy
# import succeeding inside the fire path (a missed release locks the legs).
try:
from Project.Sanad.dashboard.routes import _arbiter as _ARBITER
except Exception: # noqa: BLE001
_ARBITER = None
_ROSBRIDGE_URL = (
os.environ.get("SANAD_ROSBRIDGE_URL")
or os.environ.get("ROSBRIDGE_URL")
or "ws://127.0.0.1:9090"
)
_STATUS_TOPIC = os.environ.get(
"SANAD_NAV_STATUS_TOPIC", "/navigate_to_pose/_action/status"
)
_STATUS_TYPE = "action_msgs/msg/GoalStatusArray"
_CANCEL_SERVICE = os.environ.get(
"SANAD_NAV_CANCEL_SERVICE", "/navigate_to_pose/_action/cancel_goal"
)
_GOAL_TIMEOUT_S = float(os.environ.get("SANAD_NAV_GOAL_TIMEOUT_S", "240"))
# action_msgs/msg/GoalStatus.status
_ACCEPTED, _EXECUTING = 1, 2
_SUCCEEDED, _CANCELED, _ABORTED = 4, 5, 6
_TERMINAL = {_SUCCEEDED, _CANCELED, _ABORTED}
def _uuid_of(status: dict) -> Any:
"""Canonical, encoding-independent key for a goal id.
rosbridge may serialize uint8[16] as a base64 string OR an int list
depending on its png/cbor config; normalize both to a tuple of ints so the
ACCEPTED frame and the terminal frame compare equal even if the bridge
switches representation mid-session."""
gid = ((status.get("goal_info") or {}).get("goal_id") or {})
u = gid.get("uuid")
if isinstance(u, str):
try:
return tuple(base64.b64decode(u))
except Exception:
return u
if isinstance(u, list):
try:
return tuple(int(x) for x in u)
except Exception:
return tuple(u)
return None
class _GoalMonitor:
def __init__(self) -> None:
self._lock = threading.Lock()
self._active = False
self._place: Optional[str] = None
self._armed_at = 0.0
self._latched: Any = None
self._ignore: set = set() # uuids seen terminal — never latch
self._thread: Optional[threading.Thread] = None
# ── public ───────────────────────────────────────────────
def arm(self, place: str) -> None:
spawn = False
with self._lock:
self._active = True
self._place = place
self._armed_at = time.time()
self._latched = None
self._ignore = set()
if self._thread is None:
self._thread = threading.Thread(
target=self._run, daemon=True, name="nav-goal-monitor")
t = self._thread
spawn = True
if spawn:
t.start()
log.info("armed goal monitor for '%s'", place)
def disarm(self) -> None:
with self._lock:
self._active = False
self._latched = None
self._place = None
# ── thread / loop ────────────────────────────────────────
def _run(self) -> None:
# Loop so that a new arm() arriving exactly as a session ends keeps the
# monitor alive. The decision to exit is made under the lock together
# with clearing _thread, so arm()'s "spawn only if _thread is None" can
# never strand an active goal with no live thread.
while True:
try:
asyncio.run(self._serve())
except Exception as exc: # noqa: BLE001
log.warning("goal monitor loop ended: %s", exc)
with self._lock:
if not self._active:
self._thread = None
return
# still active → a fresh goal arrived; serve again
async def _serve(self) -> None:
try:
import websockets # local import — dashboard env only
except Exception as exc: # noqa: BLE001
# No ws client → honor the timeout so the arbiter is still released.
log.warning("websockets unavailable — nav feedback via timeout only: %s", exc)
while True:
await asyncio.sleep(min(5.0, _GOAL_TIMEOUT_S))
if self._check_timeout():
return
with self._lock:
if not self._active:
return
while True:
try:
await self._listen(websockets)
except Exception as exc: # noqa: BLE001
log.debug("rosbridge listen error: %s", exc)
if self._check_timeout():
return
with self._lock:
if not self._active:
return
await asyncio.sleep(3.0)
async def _listen(self, websockets: Any) -> None:
async with websockets.connect(
_ROSBRIDGE_URL, ping_interval=20, ping_timeout=20
) as ws:
await ws.send(json.dumps({
"op": "subscribe", "topic": _STATUS_TOPIC,
"type": _STATUS_TYPE, "throttle_rate": 0, "queue_length": 1,
}))
log.info("goal monitor subscribed %s", _STATUS_TOPIC)
while True:
try:
raw = await asyncio.wait_for(ws.recv(), timeout=5.0)
except asyncio.TimeoutError:
if self._check_timeout():
return
with self._lock:
if not self._active:
return
continue
try:
data = json.loads(raw)
except Exception:
continue
if data.get("op") != "publish":
continue
msg = data.get("msg") or {}
self._on_status(msg.get("status_list") or [])
if self._check_timeout():
return
with self._lock:
if not self._active:
return
# ── status handling ──────────────────────────────────────
def _on_status(self, status_list: List[dict]) -> None:
fire: Optional[tuple] = None
with self._lock:
if not self._active:
return
states = {} # uuid -> status (this frame)
for s in status_list:
u = _uuid_of(s)
st = s.get("status")
if u is None:
continue
states[u] = st
if st in _TERMINAL:
self._ignore.add(u) # prior/other goals — never ours
# Latch a genuinely-active, non-ignored goal as ours.
if self._latched is None:
cand = [u for u, st in states.items()
if st in (_ACCEPTED, _EXECUTING) and u not in self._ignore]
if cand:
self._latched = cand[-1] # newest Nav2 entry
# Terminal for the latched goal?
if self._latched is not None:
st = states.get(self._latched)
if st in _TERMINAL:
if st == _CANCELED:
# A CANCELED latch while another goal is active is a
# preemption (a newer goal replaced ours) — re-latch
# instead of falsely reporting we stopped.
others = [u for u, s2 in states.items()
if s2 in (_ACCEPTED, _EXECUTING)
and u not in self._ignore and u != self._latched]
if others:
self._ignore.add(self._latched)
self._latched = others[-1]
st = None
if st is not None and st in _TERMINAL:
fire = (self._place, st)
self._active = False
self._latched = None
self._place = None
if fire:
self._fire(*fire)
def _check_timeout(self) -> bool:
fire: Optional[tuple] = None
with self._lock:
if self._active and (time.time() - self._armed_at) > _GOAL_TIMEOUT_S:
fire = (self._place, "timeout")
self._active = False
self._latched = None
self._place = None
if fire:
self._fire(*fire)
return True
return False
# ── feedback + arbiter release ───────────────────────────
def _fire(self, place: Optional[str], status: Any) -> None:
place = place or "the destination"
if status == _SUCCEEDED:
event = "nav_arrived"
cmd = (f"You have arrived at '{place}'. Briefly tell the user "
"you've arrived, in your normal Khaleeji style.")
elif status == _CANCELED:
event = "nav_canceled"
cmd = (f"Navigation to '{place}' was canceled. Briefly acknowledge "
"it if relevant.")
elif status == "timeout":
event = "nav_failed"
cmd = (f"You could not confirm reaching '{place}' — it is taking "
"too long or the path is blocked. Briefly tell the user you "
"couldn't get there.")
else: # ABORTED / unknown
event = "nav_failed"
cmd = (f"You could NOT reach '{place}' — the path was blocked or "
"planning failed. Briefly apologise and say you couldn't "
"get there.")
log.info("goal terminal: place=%s status=%s%s", place, status, event)
# Free the legs first (a missed release locks them — make it loud).
arb = _ARBITER
if arb is None:
try:
from Project.Sanad.dashboard.routes import _arbiter as arb
except Exception: # noqa: BLE001
arb = None
if arb is not None:
try:
arb.release_nav()
except Exception as exc: # noqa: BLE001
log.error("release_nav failed after %s: %s", event, exc)
else:
log.error("arbiter unavailable — could not release nav after %s", event)
# Tell Gemini (via the supervisor's stdin push to the child).
try:
from Project.Sanad.main import live_sub
if live_sub is not None and hasattr(live_sub, "send_state"):
live_sub.send_state(event, cmd)
except Exception as exc: # noqa: BLE001
log.debug("nav feedback inject failed: %s", exc)
_MON = _GoalMonitor()
def arm_goal(place: str) -> None:
"""Begin watching for the arrival/failure of a goal driving to ``place``."""
try:
_MON.arm(place)
except Exception as exc: # noqa: BLE001
log.debug("arm_goal failed: %s", exc)
def disarm() -> None:
"""Stop watching the current goal (e.g. on an explicit cancel)."""
try:
_MON.disarm()
except Exception as exc: # noqa: BLE001
log.debug("disarm failed: %s", exc)
async def _cancel_once() -> bool:
try:
import websockets
except Exception: # noqa: BLE001
return False
try:
async with websockets.connect(_ROSBRIDGE_URL, ping_interval=None) as ws:
# Zero goal_id + zero stamp == cancel ALL goals (CancelGoal convention).
await ws.send(json.dumps({
"op": "call_service",
"service": _CANCEL_SERVICE,
"type": "action_msgs/srv/CancelGoal",
"args": {"goal_info": {"goal_id": {"uuid": [0] * 16},
"stamp": {"sec": 0, "nanosec": 0}}},
}))
try:
await asyncio.wait_for(ws.recv(), timeout=3.0)
except Exception:
pass
return True
except Exception as exc: # noqa: BLE001
log.debug("cancel ws failed: %s", exc)
return False
def request_cancel() -> bool:
"""Send a real Nav2 action CancelGoal (cancel-all) over rosbridge so the
robot actually stops. Blocking; call via asyncio.to_thread. Best-effort."""
try:
return asyncio.run(_cancel_once())
except Exception as exc: # noqa: BLE001
log.debug("request_cancel failed: %s", exc)
return False

View File

@ -0,0 +1,375 @@
"""Thin HTTP client to the web_nav3 Nav2 stack for SanadV3.
web_nav3 exposes a FastAPI surface (default http://127.0.0.1:8765) that wraps
a ROS2/Nav2 + rosbridge bringup. SanadV3 talks to it over plain HTTP so it
never has to import ROS2 itself.
Design contract: NO method here ever raises into the caller. Every public
method returns either a clean dict {"ok": bool, "error": str|None, ...} or a
NavStatus. If web_nav3 is unreachable/down, callers (dashboard, voice) get a
graceful failure result and keep running.
"""
from __future__ import annotations
import logging
import threading
from dataclasses import dataclass
from typing import Any, Dict, List, Optional
import requests
logger = logging.getLogger("sanadv3.navigation")
@dataclass
class NavStatus:
"""Snapshot of web_nav3 health (from GET /api/status)."""
bringup_alive: bool = False
rosbridge_alive: bool = False
reachable: bool = False
error: Optional[str] = None
log_tail: str = ""
# What the single bringup is doing right now (from /api/status):
# mode 1 fresh / 2 continue / 3 localize / None idle.
mode: Optional[int] = None
active_map: Optional[str] = None
mode_label: str = "IDLE"
mapping: bool = False
localizing: bool = False
def as_dict(self) -> Dict[str, Any]:
return {
"bringup_alive": self.bringup_alive,
"rosbridge_alive": self.rosbridge_alive,
"reachable": self.reachable,
"error": self.error,
"log_tail": self.log_tail,
"mode": self.mode,
"active_map": self.active_map,
"mode_label": self.mode_label,
"mapping": self.mapping,
"localizing": self.localizing,
}
class WebNav3Client:
"""Loosely-coupled HTTP client to web_nav3.
Args:
base_url: web_nav3 FastAPI base, e.g. "http://127.0.0.1:8765".
robot: robot name, sent as X-Robot-Name header on every request.
timeout: default per-request timeout (seconds).
"""
def __init__(
self,
base_url: str = "http://127.0.0.1:8765",
robot: str = "sanad",
timeout: float = 3.0,
) -> None:
self.base_url = base_url.rstrip("/")
self.robot = robot
self.timeout = float(timeout)
self._session = requests.Session()
# requests.Session is not guaranteed thread-safe, but route handlers
# call us from asyncio.to_thread workers (multiple tabs / overlapping
# status-poll + goto). Serialize Session access with a lock.
self._session_lock = threading.Lock()
# ------------------------------------------------------------------ #
# internals
# ------------------------------------------------------------------ #
def _headers(self) -> Dict[str, str]:
return {"X-Robot-Name": self.robot}
def _url(self, path: str) -> str:
if not path.startswith("/"):
path = "/" + path
return self.base_url + path
def _request(
self,
method: str,
path: str,
*,
params: Optional[Dict[str, Any]] = None,
json_body: Optional[Dict[str, Any]] = None,
timeout: Optional[float] = None,
) -> Dict[str, Any]:
"""Perform a request, always returning {"ok":bool, ...}; never raises."""
url = self._url(path)
try:
with self._session_lock:
resp = self._session.request(
method,
url,
params=params,
json=json_body,
headers=self._headers(),
timeout=timeout if timeout is not None else self.timeout,
)
except requests.RequestException as exc:
logger.debug("web_nav3 %s %s failed: %s", method, path, exc)
return {"ok": False, "error": f"{type(exc).__name__}: {exc}"}
# Try to decode a JSON body either way (errors often carry detail).
body: Any = None
try:
body = resp.json()
except ValueError:
body = resp.text
if not resp.ok:
detail = body
if isinstance(body, dict):
detail = body.get("detail") or body.get("error") or body
return {
"ok": False,
"error": f"HTTP {resp.status_code}: {detail}",
"status_code": resp.status_code,
"data": body,
}
return {"ok": True, "error": None, "data": body, "status_code": resp.status_code}
# ------------------------------------------------------------------ #
# health
# ------------------------------------------------------------------ #
def reachable(self) -> bool:
"""Quick liveness check: short-timeout GET /api/status.
Reachable means web_nav3 ANSWERED any HTTP response (including a 5xx
from a running-but-erroring backend) counts. Only a transport failure
(connection refused / timeout) means unreachable.
"""
try:
with self._session_lock:
self._session.get(
self._url("/api/status"),
headers=self._headers(),
timeout=min(self.timeout, 1.5),
)
return True
except requests.RequestException:
return False
def status(self) -> NavStatus:
"""Poll GET /api/status; returns a NavStatus (never raises)."""
res = self._request("GET", "/api/status")
if not res["ok"]:
# A 'status_code' key means web_nav3 answered with an HTTP error
# (e.g. 500) — it IS reachable, just erroring. Only a transport
# failure (no status_code) is genuinely unreachable.
answered = "status_code" in res
log_tail = ""
body = res.get("data")
if answered and isinstance(body, dict):
log_tail = str(body.get("log_tail", "") or "")
return NavStatus(reachable=answered, error=res["error"], log_tail=log_tail)
data = res.get("data") or {}
if not isinstance(data, dict):
return NavStatus(reachable=True, error="unexpected status payload")
return NavStatus(
bringup_alive=bool(data.get("bringup_alive", False)),
rosbridge_alive=bool(data.get("rosbridge_alive", False)),
reachable=True,
error=None,
log_tail=str(data.get("log_tail", "") or ""),
mode=data.get("mode"),
active_map=data.get("active_map"),
mode_label=str(data.get("mode_label", "IDLE") or "IDLE"),
mapping=bool(data.get("mapping", False)),
localizing=bool(data.get("localizing", False)),
)
# ------------------------------------------------------------------ #
# bringup lifecycle
# ------------------------------------------------------------------ #
def start(self, mode: int, db_path: Optional[str] = None) -> Dict[str, Any]:
"""POST /api/start. mode 1=fresh SLAM, 2/3=localization (need db_path)."""
body: Dict[str, Any] = {"mode": int(mode), "robot": self.robot}
if db_path is not None:
body["db_path"] = db_path
return self._request("POST", "/api/start", json_body=body, timeout=15.0)
def stop(self) -> Dict[str, Any]:
"""POST /api/stop — SIGINT the running bringup process group."""
return self._request("POST", "/api/stop", timeout=15.0)
def load_map(self, db_path: str) -> Dict[str, Any]:
"""POST /api/load_map — VIEW a saved map: stop any running bringup, then
start LOCALIZE-only (mode 3) against db_path. Use this for 'Load & View'
so it actually switches to the chosen map instead of attaching to a
mapping session already in progress."""
return self._request("POST", "/api/load_map",
json_body={"db_path": db_path}, timeout=25.0)
def shutdown(self) -> Dict[str, Any]:
"""POST /api/shutdown — kill ALL registered web_nav3 processes."""
return self._request("POST", "/api/shutdown", timeout=15.0)
# ------------------------------------------------------------------ #
# places / navigation
# ------------------------------------------------------------------ #
def list_places(self, map_name: Optional[str] = None) -> List[Dict[str, Any]]:
"""GET /api/places. Per-MAP when map_name given (each map keeps its own
places); else the legacy per-robot store. Returns a list (empty on error)."""
params: Dict[str, Any] = {"robot": self.robot}
if map_name:
params["map"] = map_name
res = self._request("GET", "/api/places", params=params)
if not res["ok"]:
logger.debug("list_places failed: %s", res["error"])
return []
data = res.get("data")
return data if isinstance(data, list) else []
def goto(self, name: str) -> Dict[str, Any]:
"""POST /api/places/goto?name=... — THE primary nav command.
Navigate to a saved place by name on the currently-running bringup.
"""
# Backend runs `timeout 5 ros2 topic pub --once` inside subprocess.run(
# timeout=12); on a Jetson the rclpy cold-start routinely exceeds the
# default 3s read-timeout, so override to match the backend's cap.
res = self._request("POST", "/api/places/goto", params={"name": name}, timeout=12.0)
if res["ok"]:
return {"ok": True, "error": None, "name": name, "data": res.get("data")}
return {"ok": False, "error": res["error"], "name": name}
def cancel(self) -> Dict[str, Any]:
"""Report that server-side goal-cancel is NOT available.
web_nav3 has no explicit goal-cancel HTTP endpoint, so a true
per-goal cancel must be performed client-side (browser) over rosbridge
by sending a CancelGoal to the /navigate_to_pose action server. That
keeps Nav2/SLAM/bridges alive only the in-flight goal aborts.
Server-side there is nothing safe to do here (we must NOT call stop(),
which SIGINTs the whole bringup process group). Previously this returned
ok:True, which made the dashboard's rosbridge-down fallback toast
"cancel sent" while the robot kept driving. Return ok:False with an
explicit reason so callers never mistake this for a real cancel the
only server-side way to halt motion is stop() (tears down bringup).
"""
return {
"ok": False,
"error": "cancel must be done client-side via rosbridge",
"note": "no server-side goal-cancel; use stop() to tear down bringup",
}
def save_here(self, name: str) -> Dict[str, Any]:
"""Save the robot's CURRENT pose as a named place.
POST /api/places/save_here?name=... web_nav3 reads the current TF
pose (requires bringup running; backend takes ``name`` as a query arg).
CONFIG COUPLING: backend save_here writes the LEGACY places store
(~/marcus_nav2_test/places.json) regardless of robot, while save_at /
list_places are robot-scoped and only resolve to that legacy file when
web_nav3's robot_config.yaml ``robot_name`` equals this client's robot
(``self.robot``). For all three to share one store, web_nav3's
robot_name MUST match SANAD_ROBOT_NAME (default 'sanad'); if they drift,
'Save here' and click-to-add places silently target different files.
"""
# Same ros2-pub cold-start as goto(): override the 3s default so a slow
# (but successful) publish isn't reported as a failure.
return self._request(
"POST",
"/api/places/save_here",
params={"robot": self.robot, "name": name},
timeout=12.0,
)
def save_at(self, name: str, x: float, y: float, yaw: float,
map_name: Optional[str] = None) -> Dict[str, Any]:
"""Save a named place at an explicit (x, y, yaw) map pose.
POST /api/places/save_at writes straight to places.json, so it works
even with no bringup running (used by click-to-add-place on the map).
Per-MAP when map_name given. Re-saving an existing name MOVES the place.
"""
params: Dict[str, Any] = {"robot": self.robot}
if map_name:
params["map"] = map_name
return self._request(
"POST",
"/api/places/save_at",
params=params,
json_body={
"name": name,
"x": float(x),
"y": float(y),
"yaw": float(yaw),
},
)
def delete_place(self, name: str, map_name: Optional[str] = None) -> Dict[str, Any]:
"""DELETE /api/places?name=... — remove a saved place (per-map)."""
params: Dict[str, Any] = {"robot": self.robot, "name": name}
if map_name:
params["map"] = map_name
return self._request("DELETE", "/api/places", params=params)
def rename_place(self, old: str, new: str,
map_name: Optional[str] = None) -> Dict[str, Any]:
"""POST /api/places/rename — rename a saved place (per-map)."""
params: Dict[str, Any] = {"robot": self.robot}
if map_name:
params["map"] = map_name
return self._request(
"POST", "/api/places/rename", params=params,
json_body={"old": old, "new": new},
)
# ------------------------------------------------------------------ #
# map editor — per-map occupancy edit overlay
# ------------------------------------------------------------------ #
def get_map_edits(self, map_name: str) -> Dict[str, Any]:
"""GET /api/map_edits?map=... — the saved edit overlay (erased points +
painted walls) for a map. Returns {ok, edits:[[wx,wy,v],...]}."""
res = self._request("GET", "/api/map_edits",
params={"robot": self.robot, "map": map_name})
if not res["ok"]:
return {"ok": False, "error": res["error"], "edits": []}
data = res.get("data") or {}
return {"ok": True, "edits": data.get("edits", []) if isinstance(data, dict) else []}
def save_map_edits(self, map_name: str, edits: list) -> Dict[str, Any]:
"""POST /api/map_edits?map=... — persist the edit overlay for a map.
edits = list of [world_x, world_y, value] (0=free/erase, 100=wall)."""
return self._request("POST", "/api/map_edits",
params={"robot": self.robot, "map": map_name},
json_body={"edits": edits})
# ------------------------------------------------------------------ #
# maps / missions
# ------------------------------------------------------------------ #
def list_maps(self) -> List[Dict[str, Any]]:
"""GET /api/maps. Returns a list (empty on any error)."""
res = self._request("GET", "/api/maps", params={"robot": self.robot})
if not res["ok"]:
logger.debug("list_maps failed: %s", res["error"])
return []
data = res.get("data")
return data if isinstance(data, list) else []
def list_missions(self) -> List[Dict[str, Any]]:
"""GET /api/missions. Returns a list (empty on any error)."""
res = self._request("GET", "/api/missions", params={"robot": self.robot})
if not res["ok"]:
logger.debug("list_missions failed: %s", res["error"])
return []
data = res.get("data")
return data if isinstance(data, list) else []
def run_mission(self, mission_id: Any) -> Dict[str, Any]:
"""POST /api/missions/run — start a saved mission by id."""
return self._request(
"POST",
"/api/missions/run",
params={"id": mission_id},
json_body={"id": mission_id, "robot": self.robot},
timeout=15.0,
)

View File

@ -1,7 +1,7 @@
# systemd user-level unit for Sanad. Install with:
# systemd user-level unit for SanadV3. Install with:
#
# mkdir -p ~/.config/systemd/user
# cp ~/Sanad/shell_scripts/sanad.service ~/.config/systemd/user/sanad.service
# cp ~/Sanadv3/shell_scripts/sanad.service ~/.config/systemd/user/sanad.service
# systemctl --user daemon-reload
# systemctl --user enable --now sanad.service
# sudo loginctl enable-linger unitree # run at boot even when not logged in
@ -13,14 +13,15 @@
# systemctl --user restart sanad.service
[Unit]
Description=Sanad robot assistant (FastAPI dashboard + voice/motion subsystems)
Description=SanadV3 robot assistant (FastAPI dashboard + voice/motion subsystems)
After=network-online.target
Wants=network-online.target
[Service]
Type=exec
WorkingDirectory=%h/Sanad
ExecStart=/usr/bin/env bash %h/Sanad/shell_scripts/start_sanad.sh
WorkingDirectory=%h/Sanadv3
ExecStart=/usr/bin/env bash %h/Sanadv3/shell_scripts/start_sanad.sh
Environment=PORT=8001
Restart=on-failure
RestartSec=5
TimeoutStopSec=15

View File

@ -0,0 +1,33 @@
[Unit]
Description=SanadV3 full stack (dashboard :8001 + web_nav3 nav :8765 + rosbridge :9090)
After=network-online.target sound.target
Wants=network-online.target
StartLimitIntervalSec=0
[Service]
Type=simple
User=unitree
Group=unitree
WorkingDirectory=/home/unitree/Sanadv3
# Runtime env (systemd does NOT source ~/.bashrc, so set what the stack needs).
Environment=USER=unitree
Environment=HOME=/home/unitree
Environment=PYTHONUNBUFFERED=1
Environment=XDG_RUNTIME_DIR=/run/user/1000
Environment=PULSE_SERVER=unix:/run/user/1000/pulse/native
# Pin the voice child to Sanadv3 (replaces the old ~/.bashrc SANAD_LIVE_SCRIPT).
Environment=SANAD_LIVE_SCRIPT=/home/unitree/Sanadv3/voice/sanad_voice.py
# start_all.sh self-activates conda (gemini_sdk for dashboard, g1_lidar for
# web_nav3), supervises both, and has a SIGTERM cleanup trap.
ExecStart=/bin/bash /home/unitree/Sanadv3/shell_scripts/start_all.sh
# Let start_all.sh's trap tear its children down on stop, then SIGKILL stragglers.
KillMode=mixed
TimeoutStopSec=30
Restart=always
RestartSec=5s
[Install]
WantedBy=multi-user.target

164
vendor/Sanad/shell_scripts/start_all.sh vendored Normal file
View File

@ -0,0 +1,164 @@
#!/usr/bin/env bash
#
# start_all.sh — ONE command to launch the entire Sanad humanoid stack:
#
# 1. web_nav3 nav stack → backend :8765 + rosbridge :9090 + map_relay
# + internal rosbridge watchdog
# (runs in its OWN g1_lidar conda env + CycloneDDS)
#
# 2. SanadV3 dashboard → Gemini voice + LED face + Nav tabs, :8001
# (runs in its OWN gemini_sdk conda env + Unitree SDK on eth0)
#
# The two stacks use incompatible Python envs + DDS configs, so they CANNOT
# share a process. This launcher starts each detached, supervises the pair
# (restarts one only if it truly dies), and Ctrl+C here stops EVERYTHING.
#
# Liveness is checked by PROCESS PATTERN (pgrep), NOT by a captured PID:
# `setsid ... &` returns the ephemeral setsid-wrapper pid which exits
# instantly, so a pid check would false-fire and restart-storm (which then
# makes each failed relaunch's cleanup trap nuke the working rosbridge).
#
# Usage: bash ~/Sanadv3/shell_scripts/start_all.sh
# Env knobs (optional): WEBNAV3_HOME, SANAD_HOME, PORT, LOG_DIR, NO_WEBNAV3=1
set -u
WEBNAV3_HOME="${WEBNAV3_HOME:-$HOME/web_nav3}"
SANAD_HOME="${SANAD_HOME:-$HOME/Sanadv3}"
SANAD_PORT="${PORT:-8001}"
LOG_DIR="${LOG_DIR:-$HOME/sanad_logs}"
NO_WEBNAV3="${NO_WEBNAV3:-0}"
mkdir -p "$LOG_DIR"
echo "════════════════════════════════════════════════"
echo " Sanad humanoid — full stack launcher"
echo " web_nav3 : $WEBNAV3_HOME $([ "$NO_WEBNAV3" = 1 ] && echo '(skipped)')"
echo " SanadV3 : $SANAD_HOME (port $SANAD_PORT)"
echo " logs : $LOG_DIR"
echo "════════════════════════════════════════════════"
# liveness by process pattern (robust — no pid tracking)
# NOTE: start_sanad.sh `cd $SANAD_HOME && exec python3 main.py --port N`, so the
# process cmdline is "python3 main.py --port <PORT>" (no Sanadv3/ in argv). Match
# on the PORT — also keeps us from ever touching the live Sanad on :8000.
SANAD_PAT="main.py --port $SANAD_PORT"
webnav3_up() { pgrep -f "web/backend.py" >/dev/null 2>&1; }
# Count a SanadV3 as "up" if EITHER main.py is bound OR start_sanad.sh is still
# mid-boot (conda activate + up-to-20s DDS-iface wait + heavy lazy imports come
# BEFORE `exec python3 main.py`, so "main.py --port" is absent from argv during
# that whole window). Matching the booting shell too prevents the supervisor
# from pkill-ing a process that is simply still cold-booting (restart storm).
sanad_up() { pgrep -f "$SANAD_PAT" >/dev/null 2>&1 || pgrep -f "shell_scripts/start_sanad.sh" >/dev/null 2>&1; }
start_webnav3() {
setsid bash "$WEBNAV3_HOME/scripts/start_web.sh" \
> "$LOG_DIR/web_nav3.log" 2>&1 < /dev/null &
echo " web_nav3 launching → $LOG_DIR/web_nav3.log"
}
start_sanad() {
SANAD_HOME="$SANAD_HOME" PORT="$SANAD_PORT" \
setsid bash "$SANAD_HOME/shell_scripts/start_sanad.sh" \
> "$LOG_DIR/sanadv3.log" 2>&1 < /dev/null &
echo " SanadV3 launching → $LOG_DIR/sanadv3.log"
}
kill_webnav3() {
pkill -9 -f "scripts/start_web.sh" 2>/dev/null || true
pkill -9 -f "web/backend.py|rosbridge_websocket|start_rosbridge.sh|start_map_relay.sh|map_relay.py" 2>/dev/null || true
}
# ---- teardown on Ctrl+C / TERM ----
# Installed BEFORE pre-flight/launch so an interrupt during the "clearing
# stale instances" sleep or right after a setsid child is spawned still
# triggers a full teardown (otherwise those detached children would be
# orphaned with no supervisor).
_cleaning=0
cleanup() {
[ "$_cleaning" = 1 ] && return; _cleaning=1
echo
echo "── stopping everything ──"
# 1. ask the backend to stop the BRINGUP cleanly (it owns the pgid, so a
# killpg takes down rtabmap + all Nav2 nodes in one shot).
curl -s -m 8 -X POST "http://127.0.0.1:8765/api/shutdown" >/dev/null 2>&1 || true
# 2. graceful TERM: SanadV3 (clean loco stop) + start_web.sh (own trap).
pkill -TERM -f "$SANAD_PAT" 2>/dev/null || true
pkill -TERM -f "scripts/start_web.sh" 2>/dev/null || true
sleep 3
# 3. force-kill the web_nav3 stack + SanadV3.
kill_webnav3
pkill -9 -f "$SANAD_PAT" 2>/dev/null || true
pkill -9 -f "shell_scripts/start_sanad.sh" 2>/dev/null || true
# 4. belt-and-suspenders — any orphaned bringup tree (if the backend was
# already gone in step 1, killpg never ran).
pkill -9 -f "scripts/bringup.sh|run_on_jetson.sh|g1_nav2_slam" 2>/dev/null || true
pkill -9 -f "rtabmap_slam/rtabmap|livox_pcd_bridge|dog_odom_to_tf|pointcloud_to_laserscan" 2>/dev/null || true
echo " done."
exit 0
}
trap cleanup INT TERM
# ---- pre-flight: clear any stale instances so we start clean ----
echo "── clearing any stale instances … ──"
[ "$NO_WEBNAV3" != 1 ] && kill_webnav3
pkill -9 -f "shell_scripts/start_sanad.sh" 2>/dev/null || true
pkill -9 -f "$SANAD_PAT" 2>/dev/null || true
sleep 2
# ---- launch ----
if [ "$NO_WEBNAV3" != 1 ]; then echo "[1/2] starting web_nav3 nav stack …"; start_webnav3; fi
echo "[2/2] starting SanadV3 dashboard …"; start_sanad
# ---- wait for ports, then report ----
echo
echo "── waiting for services to bind (up to ~80s) … ──"
for _ in $(seq 1 40); do
up8001=$(ss -tnln 2>/dev/null | grep -c ":$SANAD_PORT ")
up8765=$(ss -tnln 2>/dev/null | grep -c ':8765 ')
up9090=$(ss -tnln 2>/dev/null | grep -c ':9090 ')
if [ "$NO_WEBNAV3" = 1 ]; then
[ "$up8001" -ge 1 ] && break
else
[ "$up8001" -ge 1 ] && [ "$up8765" -ge 1 ] && [ "$up9090" -ge 1 ] && break
fi
sleep 2
done
IP=$(hostname -I | awk '{print $1}')
st() { [ "$(ss -tnln 2>/dev/null | grep -c "$1")" -ge 1 ] && echo UP || echo DOWN; }
echo
echo "════════════════════════════════════════════════"
echo " STATUS"
[ "$NO_WEBNAV3" != 1 ] && echo " web_nav3 backend :8765 $(st ':8765 ')"
[ "$NO_WEBNAV3" != 1 ] && echo " rosbridge :9090 $(st ':9090 ')"
echo " SanadV3 dashboard :$SANAD_PORT $(st ":$SANAD_PORT ")"
echo
echo " OPEN: http://$IP:$SANAD_PORT (full dashboard)"
[ "$NO_WEBNAV3" != 1 ] && echo " http://$IP:8765 (web_nav3 nav only)"
echo
echo " logs: tail -f $LOG_DIR/sanadv3.log"
[ "$NO_WEBNAV3" != 1 ] && echo " tail -f $LOG_DIR/web_nav3.log"
echo " >>> Ctrl+C here stops EVERYTHING <<<"
echo "════════════════════════════════════════════════"
# ---- supervise: restart a child ONLY if it truly died (pgrep), with grace ----
while true; do
sleep 10
if [ "$NO_WEBNAV3" != 1 ] && ! webnav3_up; then
echo "[supervisor] $(date +%T) web_nav3 down — restarting"
kill_webnav3 # clear any half-dead remnants first
sleep 1
start_webnav3
sleep 15 # grace: let it boot before the next check
fi
if ! sanad_up; then
echo "[supervisor] $(date +%T) SanadV3 down — restarting"
pkill -9 -f "shell_scripts/start_sanad.sh" 2>/dev/null || true
sleep 1
start_sanad
# grace MUST exceed worst-case cold boot: up to 20s DDS-iface wait +
# conda activate + heavy lazy imports (torch/transformers/gemini) +
# arm.init before uvicorn binds. 15s was shorter than that and caused a
# restart storm (kill a still-booting instance, relaunch, repeat).
sleep 45
fi
done

View File

@ -5,24 +5,24 @@
# (sanad.service) for boot-time auto-start.
#
# Override knobs (env vars; all optional):
# SANAD_HOME project root (default ~/Sanad)
# SANAD_HOME project root (default ~/Sanadv3)
# SANAD_CONDA_ENV conda env name (default gemini_sdk)
# SANAD_CONDA_BASE conda install dir (default $HOME/miniconda3)
# SANAD_DDS_INTERFACE DDS network iface (default eth0)
# SANAD_VOICE_BRAIN gemini | local | model (default gemini)
# SANAD_AUDIO_PROFILE builtin | anker | hollyland_builtin (default builtin)
# PORT dashboard port (default 8000)
# PORT dashboard port (default 8001)
set -u
SANAD_HOME="${SANAD_HOME:-$HOME/Sanad}"
SANAD_HOME="${SANAD_HOME:-$HOME/Sanadv3}"
SANAD_CONDA_ENV="${SANAD_CONDA_ENV:-gemini_sdk}"
SANAD_CONDA_BASE="${SANAD_CONDA_BASE:-$HOME/miniconda3}"
export SANAD_DDS_INTERFACE="${SANAD_DDS_INTERFACE:-eth0}"
export SANAD_VOICE_BRAIN="${SANAD_VOICE_BRAIN:-gemini}"
export SANAD_AUDIO_PROFILE="${SANAD_AUDIO_PROFILE:-builtin}"
export PORT="${PORT:-8000}"
export PORT="${PORT:-8001}"
# Mandatory environment fixes for Jetson + conda + Unitree SDK
export LD_PRELOAD="${LD_PRELOAD:-/usr/lib/aarch64-linux-gnu/libgomp.so.1}"
@ -66,12 +66,17 @@ conda activate "$SANAD_CONDA_ENV" || {
}
# Wait for the DDS interface to come up — robot may still be booting
_dds_up=0
for i in $(seq 1 20); do
if ip link show "$SANAD_DDS_INTERFACE" 2>/dev/null | grep -q "state UP"; then
_dds_up=1
break
fi
sleep 1
done
if [ "$_dds_up" != 1 ]; then
echo "[start_sanad] WARNING: $SANAD_DDS_INTERFACE not UP after 20s — launching anyway; hardware (Unitree SDK/arm/loco) may be unavailable" >&2
fi
echo "[start_sanad] $(date) — launching main.py"
echo "[start_sanad] SANAD_HOME=$SANAD_HOME"

View File

@ -137,6 +137,7 @@ class CameraDaemon:
"""Stop the capture thread and release the hardware."""
if not self.is_running():
self._backend = None
self._clear_cache()
return
self._stop.set()
t = self._thread
@ -144,8 +145,20 @@ class CameraDaemon:
t.join(timeout=2.0)
self._thread = None
self._backend = None
# Drop the last captured frame so snapshot_jpeg()/get_frame_b64()
# return None once vision is OFF — otherwise the /frame.jpg preview
# and the enroll path keep serving a frozen image of whoever was
# last in front of the camera.
self._clear_cache()
log.info("Camera stopped")
def _clear_cache(self) -> None:
"""Drop the cached frame views so nothing stale is served."""
with self._lock:
self._latest_jpeg = None
self._latest_b64 = None
self._latest_ts = 0.0
def reconfigure(self, width: Optional[int] = None, height: Optional[int] = None,
fps: Optional[int] = None, jpeg_quality: Optional[int] = None) -> dict:
"""Hot-swap the capture profile without a full stop/start.
@ -191,7 +204,10 @@ class CameraDaemon:
Used by face enrollment so the captured frame is guaranteed to be
the *current* scene, not a stale buffer from before the user got
into position. Falls back to whatever's cached on timeout.
into position. On timeout, only falls back to the cached frame if
it is still within the stale threshold otherwise returns None so
the enroll route raises 409 rather than capturing an old scene
(e.g. while the daemon is stuck reconnecting).
"""
deadline = time.time() + timeout_s
while time.time() < deadline:
@ -201,8 +217,15 @@ class CameraDaemon:
and (time.time() - self._latest_ts) <= max_age_s):
return self._latest_jpeg
time.sleep(0.03)
# Timed out waiting for a fresh frame. Hand back the cached frame
# only if it isn't dangerously stale; never enrol an arbitrarily
# old scene.
with self._lock:
if (self._latest_jpeg is not None
and self._latest_ts > 0
and (time.time() - self._latest_ts) <= self._stale_s):
return self._latest_jpeg
return None
def latest_age_s(self) -> float:
"""Seconds since last successful frame; +inf if none."""
@ -219,18 +242,28 @@ class CameraDaemon:
# leaving it as inf would 500 the /api/recognition/* routes. Map
# "running but no frame yet" and "not running" both to None.
age = self.latest_age_s()
age_s = round(age, 2) if (self.is_running() and age != float("inf")) else None
running = self.is_running()
age_s = round(age, 2) if (running and age != float("inf")) else None
# Snapshot the report counters under _lock for a consistent view —
# the capture/reconnect thread mutates these (see _reconnect_loop).
# Read latest_age_s()/is_running() above (they self-lock) so we
# don't re-enter this non-reentrant lock.
with self._lock:
backend = self._backend
frame_seq = self._frame_seq
error = self._error
reconnect_count = self._reconnect_count
return {
"running": self.is_running(),
"backend": self._backend,
"running": running,
"backend": backend,
"width": w,
"height": h,
"fps": f,
"jpeg_quality": q,
"frame_seq": self._frame_seq,
"frame_seq": frame_seq,
"age_s": age_s,
"error": self._error,
"reconnect_count": self._reconnect_count,
"error": error,
"reconnect_count": reconnect_count,
}
# ── helpers ─────────────────────────────────────────────

View File

@ -97,9 +97,14 @@ class FaceGallery:
yield int(m.group(1)), child
def _samples_in(self, face_dir: Path) -> list[Path]:
# Count only real face_N.<ext> samples — a stray non-conforming
# image (e.g. a manually dropped thumbnail.jpg) must not inflate the
# sample count, or delete_photo's "only photo" guard could let the
# last real sample be deleted, leaving the face with zero usable
# samples for the primer.
out: list[Path] = []
for p in sorted(face_dir.iterdir()):
if p.is_file() and p.suffix.lower() in ALLOWED_EXTS:
if p.is_file() and SAMPLE_NAME_RE.match(p.name):
out.append(p)
return out

View File

@ -40,6 +40,10 @@ class RecognitionState:
nav_target_place_id: int = 0
# N2 — Gemini-driven locomotion enable gate (default OFF for safety)
movement_enabled: bool = False
# Auto-record every conversation turn to data/recordings/ (default ON to
# match historical behavior). Toggled live from the Live Gemini panel; the
# child syncs TurnRecorder.enabled to this without a session restart.
record_enabled: bool = True
def read(path: Path) -> RecognitionState:
@ -63,6 +67,7 @@ def read(path: Path) -> RecognitionState:
nav_target_zone_id=int(raw.get("nav_target_zone_id", 0)),
nav_target_place_id=int(raw.get("nav_target_place_id", 0)),
movement_enabled=bool(raw.get("movement_enabled", False)),
record_enabled=bool(raw.get("record_enabled", True)),
)

View File

@ -9,12 +9,16 @@ A two-level hierarchy that replaces the flat place gallery:
Layout:
zones/
zone_{zid}/
meta.json {name, description, added_at}
meta.json {name, description, added_at, linked_map?}
place_{pid}/
meta.json {name, description, face_ids:[int], added_at}
meta.json {name, description, face_ids:[int], added_at, nav_place?}
place_1.jpg optional reference photos (0..N)
place_2.png
`linked_map` (optional) binds a zone to a nav2 map .db; `nav_place` (optional)
links a place to a nav2 place name in that map so it can be DRIVEN to. Both are
only written when set (absent = None on read), so old metadata stays valid.
`face_ids` reference enrolled faces in the SEPARATE face gallery
(data/faces/face_{id}); this module only stores the ids name/photo
resolution is done by the caller (route layer + Gemini primer).
@ -56,6 +60,9 @@ class PlaceEntry:
description: str | None = None
face_ids: list[int] = field(default_factory=list)
sample_paths: list[Path] = field(default_factory=list)
# Name of the nav2 saved place (in the zone's linked map) this vision place
# drives to. None = announce/recognise only, no driving.
nav_place: str | None = None
def to_dict(self) -> dict:
return {
@ -64,6 +71,7 @@ class PlaceEntry:
"name": self.name,
"description": self.description,
"face_ids": list(self.face_ids),
"nav_place": self.nav_place,
"added_at": self.added_at,
"photos": [
{"name": p.name, "size_bytes": p.stat().st_size}
@ -80,12 +88,17 @@ class ZoneEntry:
dir: Path
description: str | None = None
places: list[PlaceEntry] = field(default_factory=list)
# nav2 map (.db basename, e.g. "office.db") this zone is bound to. A zone
# with a linked map can be driven in via "Gemini Nav"; its places link to
# that map's nav2 places.
linked_map: str | None = None
def to_dict(self) -> dict:
return {
"id": self.id,
"name": self.name,
"description": self.description,
"linked_map": self.linked_map,
"added_at": self.added_at,
"places": [p.to_dict() for p in self.places],
}
@ -135,24 +148,27 @@ class ZoneGallery:
# ── meta ─────────────────────────────────────────────────
def _zone_meta(self, zone_dir: Path) -> tuple[str | None, str | None, str | None]:
def _zone_meta(self, zone_dir: Path) -> tuple[str | None, str | None, str | None, str | None]:
"""Returns (name, description, added_at, linked_map)."""
meta_path = zone_dir / "meta.json"
if not meta_path.exists():
return None, None, None
return None, None, None, None
try:
data = json.loads(meta_path.read_text(encoding="utf-8"))
except Exception:
return None, None, None
return (data.get("name") or None), (data.get("description") or None), data.get("added_at")
return None, None, None, None
return ((data.get("name") or None), (data.get("description") or None),
data.get("added_at"), (data.get("linked_map") or None))
def _place_meta(self, place_dir: Path) -> tuple[str | None, str | None, list[int], str | None]:
def _place_meta(self, place_dir: Path) -> tuple[str | None, str | None, list[int], str | None, str | None]:
"""Returns (name, description, face_ids, added_at, nav_place)."""
meta_path = place_dir / "meta.json"
if not meta_path.exists():
return None, None, [], None
return None, None, [], None, None
try:
data = json.loads(meta_path.read_text(encoding="utf-8"))
except Exception:
return None, None, [], None
return None, None, [], None, None
raw_ids = data.get("face_ids") or []
face_ids: list[int] = []
for x in raw_ids:
@ -160,26 +176,32 @@ class ZoneGallery:
face_ids.append(int(x))
except (TypeError, ValueError):
continue
return (data.get("name") or None), (data.get("description") or None), face_ids, data.get("added_at")
return ((data.get("name") or None), (data.get("description") or None),
face_ids, data.get("added_at"), (data.get("nav_place") or None))
def _write_zone_meta(self, zone_dir: Path, name, description, added_at=None) -> None:
def _write_zone_meta(self, zone_dir: Path, name, description,
added_at=None, linked_map=None) -> None:
meta: dict = {}
if name:
meta["name"] = name
if description:
meta["description"] = description
if linked_map:
meta["linked_map"] = linked_map
meta["added_at"] = added_at or datetime.now().isoformat(timespec="seconds")
(zone_dir / "meta.json").write_text(
json.dumps(meta, ensure_ascii=False, indent=2), encoding="utf-8")
def _write_place_meta(self, place_dir: Path, name, description,
face_ids=None, added_at=None) -> None:
face_ids=None, added_at=None, nav_place=None) -> None:
meta: dict = {}
if name:
meta["name"] = name
if description:
meta["description"] = description
meta["face_ids"] = [int(x) for x in (face_ids or [])]
if nav_place:
meta["nav_place"] = nav_place
meta["added_at"] = added_at or datetime.now().isoformat(timespec="seconds")
(place_dir / "meta.json").write_text(
json.dumps(meta, ensure_ascii=False, indent=2), encoding="utf-8")
@ -187,19 +209,20 @@ class ZoneGallery:
# ── read ─────────────────────────────────────────────────
def _build_place(self, zone_id: int, place_id: int, place_dir: Path) -> PlaceEntry:
name, desc, face_ids, added = self._place_meta(place_dir)
name, desc, face_ids, added, nav_place = self._place_meta(place_dir)
return PlaceEntry(
id=place_id, zone_id=zone_id, name=name, description=desc,
face_ids=face_ids, added_at=added, dir=place_dir,
sample_paths=self._samples_in(place_dir),
sample_paths=self._samples_in(place_dir), nav_place=nav_place,
)
def _build_zone(self, zone_id: int, zone_dir: Path) -> ZoneEntry:
name, desc, added = self._zone_meta(zone_dir)
name, desc, added, linked_map = self._zone_meta(zone_dir)
places = [self._build_place(zone_id, pid, pdir)
for pid, pdir in self._iter_place_dirs(zone_dir)]
return ZoneEntry(id=zone_id, name=name, description=desc,
added_at=added, dir=zone_dir, places=places)
added_at=added, dir=zone_dir, places=places,
linked_map=linked_map)
def list_zones(self) -> list[ZoneEntry]:
with self._lock:
@ -273,8 +296,9 @@ class ZoneGallery:
zd = self._zone_dir(zone_id)
if not zd.is_dir():
raise FileNotFoundError(f"zone_{zone_id} not found")
_, desc, added = self._zone_meta(zd)
self._write_zone_meta(zd, (name or "").strip() or None, desc, added_at=added)
_, desc, added, linked_map = self._zone_meta(zd)
self._write_zone_meta(zd, (name or "").strip() or None, desc,
added_at=added, linked_map=linked_map)
log.info("Renamed zone_%d%s", zone_id, name or "(unnamed)")
def describe_zone(self, zone_id: int, description: str | None) -> None:
@ -282,10 +306,22 @@ class ZoneGallery:
zd = self._zone_dir(zone_id)
if not zd.is_dir():
raise FileNotFoundError(f"zone_{zone_id} not found")
name, _, added = self._zone_meta(zd)
self._write_zone_meta(zd, name, (description or "").strip() or None, added_at=added)
name, _, added, linked_map = self._zone_meta(zd)
self._write_zone_meta(zd, name, (description or "").strip() or None,
added_at=added, linked_map=linked_map)
log.info("Described zone_%d", zone_id)
def set_zone_map(self, zone_id: int, linked_map: str | None) -> None:
"""Bind (or unbind, with None/'') the zone to a nav2 map .db basename."""
with self._lock:
zd = self._zone_dir(zone_id)
if not zd.is_dir():
raise FileNotFoundError(f"zone_{zone_id} not found")
name, desc, added, _ = self._zone_meta(zd)
self._write_zone_meta(zd, name, desc, added_at=added,
linked_map=(linked_map or "").strip() or None)
log.info("Linked zone_%d → map %s", zone_id, linked_map or "(none)")
def delete_zone(self, zone_id: int) -> None:
import shutil
with self._lock:
@ -300,7 +336,8 @@ class ZoneGallery:
def create_place(self, zone_id: int, name: str | None = None,
description: str | None = None,
face_ids: list[int] | None = None,
image_bytes_list: list[bytes] | None = None) -> PlaceEntry:
image_bytes_list: list[bytes] | None = None,
nav_place: str | None = None) -> PlaceEntry:
with self._lock:
zd = self._zone_dir(zone_id)
if not zd.is_dir():
@ -311,10 +348,11 @@ class ZoneGallery:
for idx, data in enumerate(image_bytes_list or [], start=1):
(pd / f"place_{idx}{self._detect_ext(data)}").write_bytes(data)
self._write_place_meta(pd, (name or "").strip() or None,
(description or "").strip() or None, face_ids or [])
log.info("Created zone_%d/place_%d (name=%s, photos=%d, faces=%d)",
(description or "").strip() or None, face_ids or [],
nav_place=(nav_place or "").strip() or None)
log.info("Created zone_%d/place_%d (name=%s, photos=%d, faces=%d, nav=%s)",
zone_id, pid, name or "(unnamed)",
len(image_bytes_list or []), len(face_ids or []))
len(image_bytes_list or []), len(face_ids or []), nav_place or "-")
return self._build_place(zone_id, pid, pd)
def rename_place(self, zone_id: int, place_id: int, name: str | None) -> None:
@ -322,8 +360,9 @@ class ZoneGallery:
pd = self._place_dir(zone_id, place_id)
if not pd.is_dir():
raise FileNotFoundError(f"zone_{zone_id}/place_{place_id} not found")
_, desc, fids, added = self._place_meta(pd)
self._write_place_meta(pd, (name or "").strip() or None, desc, fids, added_at=added)
_, desc, fids, added, navp = self._place_meta(pd)
self._write_place_meta(pd, (name or "").strip() or None, desc, fids,
added_at=added, nav_place=navp)
log.info("Renamed zone_%d/place_%d%s", zone_id, place_id, name or "(unnamed)")
def describe_place(self, zone_id: int, place_id: int, description: str | None) -> None:
@ -331,8 +370,9 @@ class ZoneGallery:
pd = self._place_dir(zone_id, place_id)
if not pd.is_dir():
raise FileNotFoundError(f"zone_{zone_id}/place_{place_id} not found")
name, _, fids, added = self._place_meta(pd)
self._write_place_meta(pd, name, (description or "").strip() or None, fids, added_at=added)
name, _, fids, added, navp = self._place_meta(pd)
self._write_place_meta(pd, name, (description or "").strip() or None, fids,
added_at=added, nav_place=navp)
log.info("Described zone_%d/place_%d", zone_id, place_id)
def set_place_faces(self, zone_id: int, place_id: int, face_ids: list[int]) -> None:
@ -341,16 +381,29 @@ class ZoneGallery:
pd = self._place_dir(zone_id, place_id)
if not pd.is_dir():
raise FileNotFoundError(f"zone_{zone_id}/place_{place_id} not found")
name, desc, _, added = self._place_meta(pd)
name, desc, _, added, navp = self._place_meta(pd)
clean = []
for x in (face_ids or []):
try:
clean.append(int(x))
except (TypeError, ValueError):
continue
self._write_place_meta(pd, name, desc, clean, added_at=added)
self._write_place_meta(pd, name, desc, clean, added_at=added, nav_place=navp)
log.info("Set zone_%d/place_%d faces → %s", zone_id, place_id, clean)
def set_place_nav(self, zone_id: int, place_id: int, nav_place: str | None) -> None:
"""Link (or unlink, with None/'') a place to a nav2 place name in the
zone's map. This is what makes a vision place drivable."""
with self._lock:
pd = self._place_dir(zone_id, place_id)
if not pd.is_dir():
raise FileNotFoundError(f"zone_{zone_id}/place_{place_id} not found")
name, desc, fids, added, _ = self._place_meta(pd)
self._write_place_meta(pd, name, desc, fids, added_at=added,
nav_place=(nav_place or "").strip() or None)
log.info("Linked zone_%d/place_%d → nav place %s",
zone_id, place_id, nav_place or "(none)")
def add_photo(self, zone_id: int, place_id: int, image_bytes: bytes) -> str:
with self._lock:
pd = self._place_dir(zone_id, place_id)

View File

@ -78,6 +78,13 @@ PROFILES: list[AudioProfile] = [
source_pattern="alsa_input.platform-sound",
description="Jetson / G1 built-in audio chip. (Default)",
),
AudioProfile(
id="hollyland_builtin",
label="Hollyland mic + built-in speaker",
sink_pattern="platform-sound",
source_pattern="hollyland,wireless_microphone",
description="Hollyland wireless lavalier microphone with the Jetson built-in speaker.",
),
AudioProfile(
id="anker_powerconf",
label="Anker PowerConf (mic + speaker)",
@ -85,6 +92,16 @@ PROFILES: list[AudioProfile] = [
source_pattern="powerconf,anker",
description="Anker PowerConf USB conference unit — mic + speaker on the same device.",
),
AudioProfile(
id="jbl_builtin_mic",
label="JBL speaker + built-in mic",
# The JBL connects over Bluetooth → its PulseAudio sink is a bluez sink
# (name is MAC-based, e.g. bluez_output.XX_XX_…). Match "jbl" or "bluez".
sink_pattern="jbl,bluez",
# The JBL has NO microphone → input stays on the G1 built-in mic.
source_pattern="alsa_input.platform-sound",
description="JBL Bluetooth speaker for output + the G1 built-in microphone for input (the JBL has no mic).",
),
]
# The profile that should be used when no saved state and no auto-detect succeeds.
@ -564,6 +581,13 @@ def save_state(state: dict[str, Any]) -> None:
# ───────────────────────── current selection ─────────────────────────
# Dedupe the "manual override stale" fall-through log. current_selection() is
# called every ~1.5s by the audio watcher, so logging every cycle spams the log
# whenever a saved device (e.g. an unplugged Anker) stays absent. We log only
# when the stale state changes (and reset when the override becomes valid).
_LAST_STALE_LOG: Any = None
def current_selection() -> dict[str, Any]:
"""Resolve the currently active sink/source.
@ -609,7 +633,9 @@ def current_selection() -> dict[str, Any]:
source_names = {
s["name"] for s in (list_sources() if pactl_available() else [])
}
global _LAST_STALE_LOG
if saved_sink in sink_names and saved_source in source_names:
_LAST_STALE_LOG = None # override valid again — re-arm the log
return {
"source_kind": "manual",
"profile": None,
@ -618,7 +644,16 @@ def current_selection() -> dict[str, Any]:
"sink_description": "",
"source_description": "",
}
log.info(
# Benign expected state (a saved device is simply unplugged) — and this
# is hit by every status poll, possibly from more than one process, so a
# module cache can't fully suppress it. Log at DEBUG (off the INFO log),
# and at INFO only ONCE when the stale state first changes, so an
# operator still gets a single breadcrumb without the 30s spam.
_stale_key = (saved_sink, saved_sink in sink_names,
saved_source, saved_source in source_names)
_first = _stale_key != _LAST_STALE_LOG
_LAST_STALE_LOG = _stale_key
(log.info if _first else log.debug)(
"current_selection: manual override stale (sink=%s present=%s, "
"source=%s present=%s) — falling through to auto-detect",
saved_sink, saved_sink in sink_names,

View File

@ -659,6 +659,150 @@ class AnkerSpeaker(_PyAudioSpeaker):
super().__init__(device_pattern="powerconf,anker", label="Anker")
class PulseStreamSpeaker(Speaker):
"""Stream PCM to PulseAudio's default sink via a `pacat` subprocess.
Why not _PyAudioSpeaker: PortAudio's 'pulse' device is unavailable in this
conda env (the ALSApulse plugin libasound_module_conf_pulse.so isn't on the
env's plugin path), so PyAudio can't reach PulseAudio at all silence. The
record-playback path proved `pacat`/`paplay` work, so we reuse that: pacat
inherits PULSE_SERVER/XDG_RUNTIME_DIR from the child and plays to PulseAudio's
DEFAULT sink which the dashboard's Apply pins to the active profile's sink
(the JBL). Used by the JBL profile (paired with the G1 built-in DDS mic)."""
HW_RATE = 24_000 # Gemini's native receive rate; PulseAudio resamples to the sink
def __init__(self, label: str = "Pulse", sink_pattern: str = ""):
self._label = label
self._sink_pattern = sink_pattern
self._sink_name: Optional[str] = None # resolved PA sink, cached
self._proc: Optional["subprocess.Popen"] = None
self._stop_flag = threading.Event()
self._lock = threading.RLock()
self._total_sent = 0.0
self._closed = False
def _resolve_sink(self) -> Optional[str]:
"""Find the PA sink whose name matches our pattern (e.g. the JBL), so we
can pin pacat to it with --device instead of relying on the (drift-prone)
default sink. Returns None pacat falls back to the default sink."""
if not self._sink_pattern:
return None
pats = [p.strip().lower() for p in self._sink_pattern.split(",") if p.strip()]
try:
out = subprocess.run(
["pactl", "list", "short", "sinks"],
capture_output=True, text=True, timeout=2,
).stdout
except Exception:
return None
for line in out.splitlines():
cols = line.split("\t")
name = cols[1] if len(cols) > 1 else ""
if name and any(p in name.lower() for p in pats):
return name
return None
def _spawn(self) -> None:
if self._proc is not None and self._proc.poll() is None:
return
if self._sink_name is None:
self._sink_name = self._resolve_sink()
cmd = [
"pacat", "--playback",
"--rate=%d" % self.HW_RATE, "--format=s16le", "--channels=1",
"--latency-msec=120",
"--client-name=sanad_voice", "--stream-name=sanad_voice_jbl",
]
if self._sink_name:
cmd.append("--device=%s" % self._sink_name)
try:
self._proc = subprocess.Popen(
cmd, stdin=subprocess.PIPE,
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
)
except Exception as exc:
log.warning("%sSpeaker: pacat spawn failed: %s", self._label, exc)
self._proc = None
def begin_stream(self) -> None:
with self._lock:
self._stop_flag.clear()
self._closed = False
self._total_sent = 0.0
self._spawn()
def send_chunk(self, pcm: PCMLike, source_rate: int) -> None:
with self._lock:
if self._stop_flag.is_set() or self._closed:
return
arr = _as_int16_array(pcm)
if arr.size < 10:
return
if source_rate != self.HW_RATE:
arr = _resample_int16(arr, source_rate, self.HW_RATE)
if self._proc is None or self._proc.poll() is not None:
self._spawn()
p = self._proc
if p is None or p.stdin is None:
return
try:
p.stdin.write(arr.tobytes())
p.stdin.flush()
self._total_sent += len(arr) / self.HW_RATE
except (BrokenPipeError, OSError):
pass
def wait_finish(self) -> None:
# Close stdin so pacat drains its buffer and exits on its own.
with self._lock:
p = self._proc
self._proc = None
if p is None:
return
try:
if p.stdin:
p.stdin.close()
except Exception:
pass
try:
p.wait(timeout=8)
except Exception:
try:
p.kill()
except Exception:
pass
def stop(self) -> None:
with self._lock:
self._stop_flag.set()
self._closed = True
p = self._proc
self._proc = None
if p is not None:
try:
if p.stdin:
p.stdin.close()
except Exception:
pass
try:
p.terminate()
p.wait(timeout=2)
except Exception:
try:
p.kill()
except Exception:
pass
@property
def interrupted(self) -> bool:
return self._stop_flag.is_set()
@property
def total_sent_sec(self) -> float:
return self._total_sent
# ─── Factory ──────────────────────────────────────────────
_PROFILE_ALIASES = {
@ -669,9 +813,11 @@ _PROFILE_ALIASES = {
"anker_powerconf": "anker",
"hollyland": "hollyland_builtin",
"hollyland_builtin": "hollyland_builtin",
"jbl": "jbl_builtin_mic",
"jbl_builtin_mic": "jbl_builtin_mic",
}
SUPPORTED_PROFILES = ("builtin", "anker", "hollyland_builtin")
SUPPORTED_PROFILES = ("builtin", "anker", "hollyland_builtin", "jbl_builtin_mic")
@dataclass
@ -733,6 +879,12 @@ class AudioIO:
"requires audio_client"
)
return HollylandMic(), BuiltinSpeaker(audio_client)
if resolved == "jbl_builtin_mic":
# JBL speaker via pacat → PulseAudio default sink (pinned to the JBL
# by the dashboard) + the G1 built-in DDS mic (the JBL has no mic).
# pacat is used because PyAudio's 'pulse' device is unavailable in
# this env. Neither backend needs the AudioClient.
return BuiltinMic(), PulseStreamSpeaker(label="JBL", sink_pattern="jbl,bluez")
raise AssertionError(f"unhandled resolved profile: {resolved!r}")
@classmethod

View File

@ -1,7 +1,10 @@
"""Audio I/O manager — recording and playback via PyAudio.
Handles microphone capture, speaker playback, and speaker-monitor recording.
Thread-safe; one playback at a time via play_lock.
Handles microphone capture and speaker playback. Thread-safe; one
playback at a time via play_lock.
(Speaker-monitor / `.monitor`-source capture lives in voice/typed_replay.py,
not here see its parec/PyAudio MonitorRecorder.)
Device selection is dynamic read from voice.audio_devices on each refresh.
"""
@ -49,7 +52,6 @@ from Project.Sanad.config import (
RECEIVE_SAMPLE_RATE,
SINK as DEFAULT_SINK,
SOURCE as DEFAULT_SOURCE,
MONITOR_SOURCE,
)
from Project.Sanad.core.logger import get_logger
from Project.Sanad.voice import audio_devices as ad
@ -58,10 +60,18 @@ log = get_logger("audio_manager")
FORMAT = pyaudio.paInt16 if pyaudio else 8
# Cached current selection — updated by refresh_devices()
_DEVICE_LOCK = threading.Lock()
_current_sink = DEFAULT_SINK
_current_source = DEFAULT_SOURCE
# Default fallback constants only — the live selection is per-instance state
# on AudioManager (self._current_sink / self._current_source), guarded by
# self._device_lock. Keeping the selection module-global meant two
# AudioManager instances stomped each other's sink/source; it now lives on
# the instance.
# How long an applied pactl selection is trusted before the hot playback /
# recording path re-runs the (expensive, multi-shell) pactl scan. The
# audio_devices watcher and the dashboard Apply endpoint already re-resolve
# on device change, so a short TTL here is purely a backstop against an
# unobserved hot-unplug — it does NOT need to be tight.
_DEFAULTS_TTL_S = 5.0
def _run_pactl(args: list[str]) -> subprocess.CompletedProcess[str]:
@ -80,26 +90,6 @@ def _resolve_devices() -> tuple[str, str]:
return DEFAULT_SINK, DEFAULT_SOURCE
def ensure_audio_defaults():
"""Re-scan all USB ports, resolve the active profile, set pactl defaults.
This is called at startup AND before every playback/recording so that
even if the user unplugs/re-plugs a device into a different port, the
correct sink/source is always used.
"""
try:
result = ad.apply_current_selection()
cur = result.get("selection", {})
sink = cur.get("sink", "")
source = cur.get("source", "")
with _DEVICE_LOCK:
global _current_sink, _current_source
_current_sink = sink or DEFAULT_SINK
_current_source = source or DEFAULT_SOURCE
except Exception as exc:
log.warning("Audio defaults not applied: %s", exc)
class _PulseOpenFailed(RuntimeError):
"""Signal from `_play_pcm_via_pulse` that PortAudio refused to open the
output stream (sink gone, bad I/O combination, etc.) lets `play_wav`
@ -116,6 +106,19 @@ class AudioManager:
)
self.pya = pyaudio.PyAudio()
self.play_lock = threading.Lock()
# Per-instance device selection (was module-global — two
# AudioManagers used to share one sink/source and stomp each
# other). _device_lock guards _current_sink / _current_source.
self._device_lock = threading.Lock()
self._current_sink = DEFAULT_SINK
self._current_source = DEFAULT_SOURCE
# Throttle ensure_audio_defaults() on the hot path — monotonic ts of
# the last successful apply. 0.0 = never applied yet.
self._defaults_applied_at = 0.0
# Cached PortAudio device index for the 'pulse'/'default' device
# (None = not probed; -1 = probed, absent). Lets play_pcm/record_mic
# route through PulseAudio instead of PortAudio's silent hw:0 default.
self._pulse_pa_index: int | None = None
# Lazily-initialised G1 DDS audio client (for play_wav → chest speaker)
self._g1_audio_client: Any = None
# G1 playback state — present during an active play_wav() call,
@ -123,9 +126,17 @@ class AudioManager:
# from other threads while _play_pcm_via_g1 holds play_lock.
self._play_state_lock = threading.Lock()
self._play_state: dict[str, Any] | None = None
# Monotonic play id — a new play_wav bumps it to preempt the in-flight
# one (so playing a record interrupts the previous instead of queueing).
self._play_epoch = 0
# Manual "hold" for the live Gemini pause. Default False = AUTO (record
# playback pauses Gemini only for the clip, then resumes). When True, the
# live voice is paused and STAYS paused (record playback won't resume it)
# until the dashboard releases the hold. Set via set_live_voice_hold().
self._live_voice_hold = False
# Resolve devices and set PulseAudio defaults at startup
self.refresh_devices()
ensure_audio_defaults()
self.ensure_audio_defaults(force=True)
def _get_g1_audio_client(self):
"""Return a cached G1 AudioClient (DDS) — creates on first use.
@ -140,7 +151,14 @@ class AudioManager:
return self._g1_audio_client
try:
c = AudioClient()
c.SetTimeout(5.0)
# SHORT RPC timeout (was 5.0). The G1 "voice" service replies to RPCs
# on a topic SHARED with the live-voice child's AudioClient; when both
# run, the dashboard's reply ack is frequently lost in the collision,
# so _Call would block the FULL timeout per STOP/PlayStream — that was
# the "5s delay / no sound". The request itself is still published
# (audio plays); we don't need the ack, so fail fast. Good-case replies
# arrive in ~0.1s, so 0.6s keeps the happy path while killing the hang.
c.SetTimeout(0.6)
c.Init()
try:
c.SetVolume(100)
@ -156,23 +174,109 @@ class AudioManager:
def refresh_devices(self) -> dict[str, str]:
"""Re-read selected sink/source from audio_devices module."""
sink, source = _resolve_devices()
with _DEVICE_LOCK:
global _current_sink, _current_source
_current_sink, _current_source = sink, source
with self._device_lock:
self._current_sink, self._current_source = sink, source
log.info("AudioManager devices refreshed: sink=%s source=%s", sink, source)
return {"sink": sink, "source": source}
def ensure_audio_defaults(self, force: bool = False) -> None:
"""Re-scan all USB ports, resolve the active profile, set pactl defaults.
Called at startup AND before playback/recording so that even if the
user unplugs/re-plugs a device into a different port, the correct
sink/source is always used.
The scan (ad.apply_current_selection current_selection
detect_plugged_profiles) shells out to pactl many times, so on the
hot playback/record path we skip it when it ran within
`_DEFAULTS_TTL_S`. Pass `force=True` (startup / device-change) to
bypass the throttle.
"""
if not force:
with self._device_lock:
if (time.monotonic() - self._defaults_applied_at) < _DEFAULTS_TTL_S:
return
try:
result = ad.apply_current_selection()
cur = result.get("selection", {})
sink = cur.get("sink", "")
source = cur.get("source", "")
with self._device_lock:
self._current_sink = sink or DEFAULT_SINK
self._current_source = source or DEFAULT_SOURCE
self._defaults_applied_at = time.monotonic()
# At startup / device-change, re-apply the user's SAVED speaker volume
# to the active sink — PulseAudio doesn't persist our USB/BT (JBL/Anker)
# sink volume across restarts, so without this the JBL comes back at a
# default level instead of where the user left it.
if force:
self._restore_sink_volume()
except Exception as exc:
log.warning("Audio defaults not applied: %s", exc)
def _restore_sink_volume(self) -> None:
"""Apply config audio.g1_volume to the active PulseAudio sink."""
try:
from Project.Sanad.config import load_config
vol = int(((load_config() or {}).get("audio") or {}).get("g1_volume", 100))
vol = max(0, min(100, vol))
sink = self._current_sink or "@DEFAULT_SINK@"
import subprocess as _sp
_sp.run(["pactl", "set-sink-volume", sink, "%d%%" % vol],
timeout=3, check=False,
stdout=_sp.DEVNULL, stderr=_sp.DEVNULL)
if vol > 0:
_sp.run(["pactl", "set-sink-mute", sink, "0"], timeout=3,
check=False, stdout=_sp.DEVNULL, stderr=_sp.DEVNULL)
log.info("restored saved speaker volume → %d%% (sink=%s)", vol, sink)
except Exception as exc:
log.warning("restore sink volume failed: %s", exc)
def _pulse_device_index(self) -> int | None:
"""Resolve the PortAudio device index that routes through PulseAudio.
On this Jetson's conda PyAudio, opening with output/input device
index None lands on PortAudio's default — the silent hw:0
platform-sound card. Opening PortAudio's 'pulse' (or 'default')
device instead routes through the PulseAudio daemon, which
ensure_audio_defaults() has already pointed at the resolved
sink/source. Mirrors voice/audio_io.py's _resolve_device_index.
Returns the device index, or None when PortAudio exposes no
pulse/default device (then the caller falls back to PortAudio's
own default). Cached for the lifetime of the PyAudio handle.
"""
if self._pulse_pa_index is not None:
return self._pulse_pa_index if self._pulse_pa_index >= 0 else None
pulse_idx = default_idx = None
try:
for i in range(self.pya.get_device_count()):
info = self.pya.get_device_info_by_index(i)
name_lower = str(info.get("name", "")).lower()
if pulse_idx is None and name_lower == "pulse":
pulse_idx = i
elif default_idx is None and name_lower == "default":
default_idx = i
except Exception as exc:
log.debug("pulse device probe failed: %s", exc)
idx = pulse_idx if pulse_idx is not None else default_idx
self._pulse_pa_index = idx if idx is not None else -1
return idx
@property
def current_sink(self) -> str:
with _DEVICE_LOCK:
return _current_sink
with self._device_lock:
return self._current_sink
@property
def current_source(self) -> str:
with _DEVICE_LOCK:
return _current_source
with self._device_lock:
return self._current_source
def close(self):
# Cached PortAudio device index is tied to this PyAudio handle —
# invalidate it so a re-init (audio reset) re-probes 'pulse'.
self._pulse_pa_index = None
self.pya.terminate()
def sample_width(self) -> int:
@ -182,12 +286,16 @@ class AudioManager:
def play_pcm(self, pcm_bytes: bytes, channels: int, sample_rate: int, sample_width: int):
with self.play_lock:
ensure_audio_defaults()
self.ensure_audio_defaults()
# Route through PortAudio's 'pulse' device so playback reaches
# the resolved sink — output_device_index=None defaults to the
# silent hw:0 platform-sound card on this Jetson's conda PyAudio.
stream = self.pya.open(
format=self.pya.get_format_from_width(sample_width),
channels=channels,
rate=sample_rate,
output=True,
output_device_index=self._pulse_device_index(),
frames_per_buffer=CHUNK_SIZE,
)
try:
@ -201,7 +309,10 @@ class AudioManager:
# Sink-name substrings that mean "PulseAudio routes this somewhere
# audible without DDS" — extend the tuple to add more USB cards (e.g.
# hollyland sink). Matched case-insensitively.
_PULSE_SINK_MARKERS = ("anker", "powerconf", "hollyland")
# "jbl"/"bluez" → the JBL Bluetooth speaker (and any bluez sink) is a real
# PulseAudio sink, so record playback must go via paplay/PulseAudio, NOT the
# G1 DDS chest speaker.
_PULSE_SINK_MARKERS = ("anker", "powerconf", "hollyland", "jbl", "bluez")
# Sample rate Anker PowerConf (and most USB UAC1 cards) accept natively
# — used as the resample target before opening a PortAudio stream so
# we don't hit paInvalidSampleRate when the WAV's native rate
@ -299,6 +410,13 @@ class AudioManager:
use_pulse = any(m in sink_lc for m in self._PULSE_SINK_MARKERS)
client = self._get_g1_audio_client() if not use_pulse else None
# Lip-sync: drive the LED mask mouth from THIS clip's amplitude while it
# plays (synced to the playback position via _play_state), same as the
# live Gemini voice does. Best-effort; stopped + mouth-closed when the
# playback path below returns. No-op if numpy / the mask are unavailable.
_mask_stop = threading.Event()
self._start_mask_lipsync(data, channels, sw, rate, _mask_stop)
try:
if not use_pulse and client is not None and _HAS_NUMPY and sw == 2:
log.info("play_wav route=g1_dds sink=%s record=%s",
sink or "?", record_name or "?")
@ -344,14 +462,138 @@ class AudioManager:
log.warning("play_wav pulse path failed (%s); no DDS "
"fallback available", exc)
route = ("paplay" if use_paplay else "pulse") + "_failed"
finally:
_mask_stop.set()
duration = len(data) / (rate * channels * sw) if rate else 0
return {"path": str(path), "duration_seconds": round(duration, 3),
"route": route, "sink": sink or "default"}
def _set_live_voice_paused(self, paused: bool) -> None:
"""Pause/resume the live Gemini session around a record playback so it
doesn't talk over (or react to) the clip. Best-effort + lazy import to
avoid a hard dependency on the dashboard process; no-op if the live
subprocess isn't running.
Runs on a DETACHED daemon thread: the pause is sent over the child's
stdin pipe, and when the child is busy (e.g. mid-reconnect) that write
can block. We must NEVER let it stall the playback loop which calls
this right before streaming or the record goes silent. Fire-and-forget
keeps playback starting immediately; a slightly late pause is harmless."""
def _do() -> None:
try:
from Project.Sanad.main import live_sub
if (live_sub is not None and hasattr(live_sub, "send_pause")
and hasattr(live_sub, "is_running")
and live_sub.is_running()):
live_sub.send_pause(paused)
except Exception:
pass
threading.Thread(target=_do, name="live-voice-pause", daemon=True).start()
def set_live_voice_hold(self, hold: bool) -> bool:
"""Manual hold for the live-Gemini pause.
hold=True pause the live voice NOW and keep it paused; record playback
will not auto-resume it (the finally skips the resume).
hold=False release: resume the live voice, unless a clip is currently
playing (that play's own finally resumes when it ends).
Returns the resulting hold state. Idempotent."""
self._live_voice_hold = bool(hold)
if self._live_voice_hold:
self._set_live_voice_paused(True)
else:
with self._play_state_lock:
playing = self._play_state is not None
if not playing:
self._set_live_voice_paused(False)
log.info("live-voice hold → %s", "PAUSED" if self._live_voice_hold else "AUTO")
return self._live_voice_hold
# -- LED mask lip-sync for record playback --------------------------------
_MASK_FRAME_SEC = 0.08 # 80 ms mouth-level frame (matches the Gemini lip-sync)
def _set_mask_mouth(self, level: int) -> None:
"""Push a mouth-open level (0..3) to the LED mask. Best-effort, lazy
import, thread-safe + a no-op if the mask isn't running."""
try:
from Project.Sanad.main import mask_face
if mask_face is not None and hasattr(mask_face, "set_mouth"):
mask_face.set_mouth(int(level))
except Exception:
pass
def _mouth_envelope(self, data: bytes, channels: int, sw: int,
rate: int) -> list[int]:
"""Per-80ms mouth-open levels (0..3) from a clip's RMS — same thresholds
the Gemini child uses, so records and the live voice move the mouth the
same way. Empty if numpy/format unsupported."""
if not _HAS_NUMPY or sw != 2 or not rate:
return []
try:
arr = np.frombuffer(data, dtype=np.int16)
if channels == 2 and arr.size % 2 == 0:
arr = arr.reshape(-1, 2).mean(axis=1).astype(np.int16)
frame = max(1, int(rate * self._MASK_FRAME_SEC))
env: list[int] = []
for i in range(0, len(arr), frame):
chunk = arr[i:i + frame].astype(np.float64)
rms = float(np.sqrt(np.mean(chunk ** 2))) if chunk.size else 0.0
env.append(0 if rms < 140 else 1 if rms < 650
else 2 if rms < 1700 else 3)
return env
except Exception:
return []
def _start_mask_lipsync(self, data: bytes, channels: int, sw: int,
rate: int, stop_evt: "threading.Event") -> None:
env = self._mouth_envelope(data, channels, sw, rate)
if not env:
return
threading.Thread(
target=self._mask_mouth_driver, args=(env, stop_evt),
name="rec-lipsync", daemon=True,
).start()
def _mask_mouth_driver(self, env: list[int],
stop_evt: "threading.Event") -> None:
"""Walk the mouth envelope synced to the live playback position
(_play_state) and drive the mask mouth. Honours pause (mouth closed)
and seeks. Closes the mouth when the play ends."""
last = -1
try:
while not stop_evt.is_set():
t = -1.0
with self._play_state_lock:
st = self._play_state
if st is not None and not st["paused"] and st["play_started_at"] > 0:
r = st["rate"] or 1
t = (st["play_started_pos"] / r
+ (time.time() - st["play_started_at"]))
lvl = 0
if t >= 0:
idx = int(t / self._MASK_FRAME_SEC)
lvl = env[idx] if 0 <= idx < len(env) else 0
if lvl != last:
self._set_mask_mouth(lvl)
last = lvl
stop_evt.wait(0.05)
finally:
self._set_mask_mouth(0)
# -- G1 DDS-routed playback --
_G1_STREAM_APP = "sanad_playback"
# The live Gemini voice streams to the SAME G1 chest speaker under a
# DIFFERENT app_name (config/voice_config.json speaker.app_name, default
# "sanad"). The G1 "voice" audio service is per-app-name, so a record must
# STOP that app too — otherwise Gemini's chunked PlayStream("sanad", …) per
# spoken word keeps stomping the record's single PlayStream and the clip is
# silent while its counter ticks. STOP_PLAY is process-agnostic (keyed only
# by app_name on the shared DDS "voice" service), so stopping it from here
# halts the separate voice child's stream. Must match voice_config.json.
_LIVE_VOICE_APP = "sanad"
_G1_HW_RATE = 16_000
def stop_playback(self) -> None:
@ -404,6 +646,25 @@ class AudioManager:
self._play_state.get("record_name") or "?")
return {"ok": True, "resumed": True}
def seek_playback(self, position_sec: float) -> dict[str, Any]:
"""Jump to `position_sec` in the active clip. The play loop re-pushes
pcm[pos:] from the new position (works whether playing or paused if
paused, the new position takes effect on resume)."""
with self._play_state_lock:
if self._play_state is None:
return {"ok": False, "reason": "nothing playing"}
rate = self._play_state["rate"] or 1
total = self._play_state["total_samples"]
target = max(0, min(total, int(float(position_sec) * rate)))
self._play_state["pos"] = target
self._play_state["play_started_pos"] = target
self._play_state["play_started_at"] = 0.0 # park until re-push
self._play_state["seek"] = True
log.info("Playback seek → %.2fs (record=%s)",
target / rate, self._play_state.get("record_name") or "?")
return {"ok": True, "position_sec": round(target / rate, 2),
"duration_sec": round(total / rate, 2) if rate else 0.0}
def playback_status(self) -> dict[str, Any]:
"""Snapshot of the current playback for the dashboard. Returns
`playing=False` when idle. `position_sec` is best-effort
@ -411,7 +672,8 @@ class AudioManager:
with self._play_state_lock:
if self._play_state is None:
return {"playing": False, "paused": False, "record_name": None,
"position_sec": 0.0, "duration_sec": 0.0}
"position_sec": 0.0, "duration_sec": 0.0,
"live_hold": self._live_voice_hold}
rate = self._play_state["rate"] or 1
total = self._play_state["total_samples"]
pos = self._play_state["pos"]
@ -426,6 +688,7 @@ class AudioManager:
"record_name": self._play_state.get("record_name"),
"position_sec": round(pos / rate, 2),
"duration_sec": round(total / rate, 2) if rate else 0.0,
"live_hold": self._live_voice_hold,
}
def _play_pcm_via_g1(self, pcm_bytes: bytes, channels: int,
@ -458,7 +721,26 @@ class AudioManager:
rate = self._G1_HW_RATE
total_samples = len(arr)
# Preempt any in-flight playback: signal it to stop + bump the epoch so
# a NEW play starts promptly instead of queueing behind the previous
# clip (or blocking forever on a paused one). This is what makes
# "play another record" interrupt-and-start rather than stall.
with self._play_state_lock:
if self._play_state is not None:
self._play_state["stop"] = True
self._play_epoch += 1
my_epoch = self._play_epoch
# play_lock serialises overlapping play_wav() calls; the preempted
# playback (stop=True) releases it promptly. pause/resume/stop do NOT
# take it (they only touch _play_state under _play_state_lock).
with self.play_lock:
# State is set INSIDE the lock now (was before — which let a second
# play stomp the first's state). Bail if a still-newer play won the
# race while we waited for the lock.
with self._play_state_lock:
if my_epoch != self._play_epoch:
return
self._play_state = {
"record_name": record_name,
"rate": rate,
@ -466,19 +748,20 @@ class AudioManager:
"pos": 0,
"paused": False,
"stop": False,
"seek": False,
"play_started_at": 0.0,
"play_started_pos": 0,
"epoch": my_epoch,
}
# play_lock serialises overlapping play_wav() calls; pause/resume/stop
# do NOT take it (they only touch _play_state under _play_state_lock).
with self.play_lock:
# Pause the live Gemini for the clip (idempotent across preempting
# plays; the last play's finally resumes it).
self._set_live_voice_paused(True)
try:
while True:
# Snapshot the state for this iteration
with self._play_state_lock:
st = self._play_state
if st is None or st["stop"]:
if st is None or st.get("epoch") != my_epoch or st["stop"]:
break
if st["paused"]:
paused_now = True
@ -486,19 +769,29 @@ class AudioManager:
sub_total_sec = 0.0
else:
paused_now = False
st["seek"] = False # consumed — pushing from st["pos"]
pos = st["pos"]
if pos >= total_samples:
break
sub_bytes = arr[pos:].tobytes()
sub_total_sec = (total_samples - pos) / rate
st["play_started_pos"] = pos
st["play_started_at"] = time.time()
# Set for real only AFTER PlayStream fires (below) so
# the dashboard counter doesn't tick on a stream that
# was dropped/never started. 0.0 → playback_status
# parks at play_started_pos until audio truly begins.
st["play_started_at"] = 0.0
if paused_now:
time.sleep(0.1)
continue
# Push remainder to G1
# Push remainder to G1. A SINGLE STOP suffices: the G1 "voice"
# service treats the chest speaker as one stream and STOP_PLAY
# is global (stops whatever's playing regardless of app_name),
# so this also clears any Gemini stream. Two STOP RPCs doubled
# the latency on the shared DDS bus and stalled the start; the
# live-voice pause (child stops its own stream) covers Gemini.
stream_id = f"wav_{int(time.time() * 1000)}"
try:
client._Call(
@ -508,7 +801,35 @@ class AudioManager:
except Exception:
pass
time.sleep(0.15)
# After the STOP+settle window, re-check our state: bail if a
# newer press superseded us (no churn / no queue), or loop back
# if a Pause was clicked during the window (don't leak audio).
with self._play_state_lock:
st = self._play_state
if st is None or st.get("epoch") != my_epoch or st["stop"]:
break
paused_in_settle = st["paused"]
if paused_in_settle:
continue
# PlayStream can raise on a DDS hiccup; if it does, abort this
# play rather than leaving play_started_at=0 while the poll loop
# runs (which would make the pause-math elapsed huge and snap
# the counter to the end). Set the timestamp only on success.
try:
client.PlayStream(self._G1_STREAM_APP, stream_id, sub_bytes)
except Exception as exc:
log.warning("PlayStream failed: %s", exc)
break
with self._play_state_lock:
if (self._play_state is not None
and self._play_state.get("epoch") == my_epoch):
self._play_state["play_started_at"] = time.time()
# NOTE: do NOT issue a STOP_PLAY here. The G1 "voice" service
# treats the chest speaker as a SINGLE stream — STOP_PLAY halts
# whatever is currently playing regardless of app_name (verified
# empirically: a post-PlayStream STOP("sanad") silenced the
# record entirely). The pre-stream STOP(both) above already
# cleared Gemini; the live-voice pause keeps it from re-pushing.
# Poll for pause / stop while the clip drains
poll_deadline = time.time() + sub_total_sec + 0.3
@ -525,6 +846,19 @@ class AudioManager:
except Exception:
pass
break
if self._play_state.get("seek"):
# Seek requested — halt the current stream and let
# the outer loop re-push from the new pos (already
# set by seek_playback). Cleared in the push branch.
try:
client._Call(
ROBOT_API_ID_AUDIO_STOP_PLAY,
json.dumps({"app_name": self._G1_STREAM_APP}),
)
except Exception:
pass
interrupted = True
break
if self._play_state["paused"]:
# Halt G1 and snapshot the new position
try:
@ -560,7 +894,19 @@ class AudioManager:
break
finally:
with self._play_state_lock:
# Only clear if it's still OURS — a preempting play may have
# already installed its own state after bumping the epoch.
mine = (self._play_state is not None
and self._play_state.get("epoch") == my_epoch)
if mine:
self._play_state = None
# Resume the live Gemini only if WE were the last play — if a
# newer play preempted us, it keeps Gemini paused and will
# resume when it finishes (no pause/resume thrash on rapid clicks).
# Skip the resume entirely while a manual hold is active: the user
# wants Gemini to STAY paused until they release it.
if mine and not self._live_voice_hold:
self._set_live_voice_paused(False)
# paplay binary path. Cached on first probe so we don't keep re-shelling
# `which paplay` on every play_wav call. None = probe pending; "" = absent.
@ -774,8 +1120,9 @@ class AudioManager:
pause / stop latency is bounded.
"""
# Make sure pactl defaults reflect the current selection — this is
# a no-op when the watcher or dashboard Apply already aligned them.
ensure_audio_defaults()
# a no-op when the watcher or dashboard Apply already aligned them
# (throttled so the multi-shell pactl scan doesn't run per clip).
self.ensure_audio_defaults()
# Resample to a USB-native rate before opening the stream.
# PortAudio's ALSA backend (the one PyAudio uses) opens the underlying
@ -849,6 +1196,7 @@ class AudioManager:
channels=channels,
rate=sample_rate,
output=True,
output_device_index=self._pulse_device_index(),
frames_per_buffer=CHUNK_SIZE,
)
except Exception as exc:
@ -903,13 +1251,17 @@ class AudioManager:
# -- recording --
def record_mic(self, duration_sec: float) -> bytes:
"""Record from default mic for *duration_sec* seconds, return raw PCM."""
ensure_audio_defaults()
"""Record from the resolved mic for *duration_sec* seconds, return raw PCM."""
self.ensure_audio_defaults()
# Capture through PortAudio's 'pulse' device so we read the resolved
# default source — input_device_index=None defaults to the silent
# hw:0 platform-sound card on this Jetson's conda PyAudio.
stream = self.pya.open(
format=FORMAT,
channels=CHANNELS,
rate=RECEIVE_SAMPLE_RATE,
input=True,
input_device_index=self._pulse_device_index(),
frames_per_buffer=CHUNK_SIZE,
)
frames: list[bytes] = []

View File

@ -81,8 +81,18 @@ class LiveVoiceLoop:
self.sanad_arm: Any = None
self._load_dispatch()
# Snapshot of already-processed transcript lines so we don't re-fire
self._seen_transcripts: set[str] = set()
# Guards the cross-thread transcript cursor + pending-trigger state.
# set_trigger_enabled() runs on the FastAPI event-loop thread while the
# poll daemon mutates the same fields — without this lock a concurrent
# update can tear the cursor / pending flags.
self._trigger_lock = threading.Lock()
# Transcript consumption is tracked by POSITION, not by content, so a
# repeated identical command (e.g. "wave" said twice) re-fires. We
# remember the last deque snapshot we processed and only dispatch the
# newly-appended tail. (live_sub.user_transcript is an append-only,
# left-evicting deque, so the new lines are always at the right.)
self._last_snapshot: list[str] = []
# ── phrase dispatch loader ────────────────────────────────────
def _load_dispatch(self):
@ -96,6 +106,7 @@ class LiveVoiceLoop:
# reach `wake_dispatch`, so a matched phrase for one of those
# silently no-ops in voice mode.
sdk_only_options = [o for o in OPTION_LIST if not getattr(o, "file", "")]
filtered_out = [o.name for o in OPTION_LIST if getattr(o, "file", "")]
if SANAD_ARM_TXT.exists():
self.wake_dispatch = load_arm_phrase_dispatch(SANAD_ARM_TXT, sdk_only_options)
log.info(
@ -104,14 +115,67 @@ class LiveVoiceLoop:
len(self.wake_dispatch), SANAD_ARM_TXT.name,
len(sdk_only_options), len(OPTION_LIST),
)
# Make the silent no-op observable: file-backed replays
# (laugh/bird/change_battery/move_*) are excluded from voice
# dispatch, so a spoken phrase for one of these does nothing.
if filtered_out:
log.warning(
"voice arm dispatch EXCLUDES %d file-backed action(s) "
"(dashboard-only, no voice trigger): %s",
len(filtered_out), ", ".join(filtered_out),
)
else:
log.warning("sanad_arm.txt missing at %s — arm trigger disabled",
SANAD_ARM_TXT)
# Fold in operator-editable WakePhraseManager entries so dashboard
# CRUD (data/wake_phrases.json) actually affects voice triggering.
self._merge_wake_phrases()
except Exception as exc:
log.warning("arm dispatch unavailable: %s", exc)
self.sanad_arm = None
self.wake_dispatch = {}
def _merge_wake_phrases(self) -> None:
"""Merge WakePhraseManager phrases into wake_dispatch.
Without this, dashboard wake-phrase edits were a silent no-op the
manager was stored on self.wake_mgr but never consulted. We only fold
in entries whose action_id resolves unambiguously to a voice-eligible
(SDK-only, non file-backed) arm option, matching either the integer id
or the option name; anything else is skipped with a warning so a
mistyped/file-backed action can't misfire the arm."""
if self.wake_mgr is None:
return
try:
from Project.Sanad.motion.sanad_arm_controller import (
OPTION_BY_ID, OPTION_BY_NAME,
)
amap = self.wake_mgr.action_phrase_map()
except Exception as exc:
log.warning("wake_phrase merge unavailable: %s", exc)
return
merged = skipped = 0
for action_id_str, phrases in amap.items():
opt = None
key = str(action_id_str).strip()
# Resolve by integer id first, then by option name.
if key.isdigit():
opt = OPTION_BY_ID.get(int(key))
if opt is None:
opt = OPTION_BY_NAME.get(key.lower())
if opt is None or getattr(opt, "file", ""):
# Unknown action, or a file-backed replay (voice-excluded).
skipped += 1
log.warning("wake phrase action %r not voice-eligible — skipped",
action_id_str)
continue
bucket = self.wake_dispatch.setdefault(opt.id, set())
bucket.update(p for p in phrases if p)
merged += 1
if merged or skipped:
log.info("merged WakePhraseManager entries (%d actions merged, %d skipped)",
merged, skipped)
# ── lifecycle ────────────────────────────────────────────────
async def start(self) -> None:
if self._running:
@ -145,6 +209,7 @@ class LiveVoiceLoop:
"""
self.trigger_enabled = bool(enabled)
with self._trigger_lock:
# Drop pending fallback timer — a queued "fire in 0.6s" from
# before the toggle must not leak across.
self.state._pending_arm_wave = False
@ -154,13 +219,15 @@ class LiveVoiceLoop:
snapshotted = 0
if self.trigger_enabled:
# On enable, mark everything currently in the deque as already
# consumed (by position) so phrases said while the gate was off
# don't suddenly fire when it's turned back on. Only NEW speech
# after this moment will dispatch.
try:
from Project.Sanad.main import live_sub
if live_sub is not None:
for txt in list(live_sub.user_transcript):
if txt and txt not in self._seen_transcripts:
self._seen_transcripts.add(txt)
snapshotted += 1
self._last_snapshot = list(live_sub.user_transcript)
snapshotted = len(self._last_snapshot)
except Exception as exc:
log.warning("set_trigger_enabled: snapshot failed: %s", exc)
@ -186,6 +253,10 @@ class LiveVoiceLoop:
# Master gate — same check as _dispatch
if not self.trigger_enabled:
return
# Read-and-claim the pending trigger under the lock so a concurrent
# set_trigger_enabled() (FastAPI thread) can't clear it mid-fire and
# cause a stray or lost deferred arm action.
with self._trigger_lock:
if not getattr(self.state, "_pending_arm_wave", False):
return
if getattr(self.state, "_pending_arm_wave_fired", False):
@ -199,14 +270,34 @@ class LiveVoiceLoop:
# Gate on arm idle — skip fire if a motion is already running
if self.sanad_arm is not None and getattr(self.sanad_arm, "_is_busy", False):
return
# Claim it now (still under the lock) so it fires exactly once.
self.state._pending_arm_wave_fired = True
self.state._pending_arm_wave = False
self.state._pending_arm_trigger_fn = None
try:
fn()
except Exception as exc:
log.warning("deferred arm trigger failed: %s", exc)
finally:
self.state._pending_arm_wave_fired = True
self.state._pending_arm_wave = False
self.state._pending_arm_trigger_fn = None
@staticmethod
def _new_tail(prev: list[str], curr: list[str]) -> list[str]:
"""Return the items appended to ``curr`` since ``prev`` was taken.
``curr`` is a snapshot of an append-only, left-evicting deque. The new
lines are the suffix of ``curr`` that wasn't present at the end of
``prev``. We find the largest overlap k where the tail of ``prev``
equals the head of ``curr`` and return everything after it. This is
position-based (not content-based), so a repeated identical command is
treated as a genuinely new line and re-fires."""
if not prev:
return list(curr)
max_k = min(len(prev), len(curr))
for k in range(max_k, 0, -1):
if prev[-k:] == curr[:k]:
return list(curr[k:])
# No overlap — the buffer rolled over entirely between polls; treat the
# whole current snapshot as new.
return list(curr)
def _check_transcripts(self):
try:
@ -215,24 +306,33 @@ class LiveVoiceLoop:
return
if live_sub is None:
return
# Pull recent transcripts
for text in list(live_sub.user_transcript):
if text in self._seen_transcripts:
continue
self._seen_transcripts.add(text)
curr = list(live_sub.user_transcript)
with self._trigger_lock:
new_lines = self._new_tail(self._last_snapshot, curr)
self._last_snapshot = curr
# Dispatch only the newly-appended tail (outside the lock — _dispatch
# may spawn an arm replay thread).
for text in new_lines:
self.last_heard = text
self._dispatch(text)
# Prune seen set when subprocess stops to free memory
if not live_sub.is_running() and len(self._seen_transcripts) > 500:
self._seen_transcripts.clear()
def _dispatch(self, transcript_text: str) -> None:
if not self.wake_dispatch or self.sanad_arm is None:
return
# Master gate — skip arm triggering entirely when disabled
if not self.trigger_enabled:
return
# Arm ⇄ locomotion interlock — refuse voice gestures while the robot
# may be walking. The authoritative check is sanad_arm._blocked() at
# fire time (covers the deferred path too); refuse early here so the
# block is observable and we don't queue a deferred fire that would be
# silently dropped later.
try:
if self.sanad_arm._blocked():
log.info("arm trigger refused — locomotion active (movement_active)")
return
except Exception:
pass
# Gate trigger on arm idle
if getattr(self.sanad_arm, "_is_busy", False):
return

View File

@ -327,6 +327,13 @@ class MovementDispatcher:
# after the operator just toggled movement off.
if cmd != "stop" and not self._movement_enabled(force=True):
continue # toggled off while queued — drop
# Never step while Nav2 owns the legs (autonomous goal in progress).
# Two stacks driving at once is the exact hazard _arbiter guards.
# STOP always passes through (safety). Read-only check — manual loco
# uses acquire_loco; the discrete-step voice path must only YIELD.
if cmd != "stop" and self._nav_holds_legs():
log.info("voice movement dropped — Nav2 owns the legs (%r)", cmd)
continue
try:
self._execute(cmd)
except Exception:
@ -363,9 +370,21 @@ class MovementDispatcher:
return
log.debug("no loco mapping for canonical %r", c)
@staticmethod
def _nav_holds_legs() -> bool:
"""True if Nav2 currently owns the legs (in-process arbiter). Lazy
import so a missing/absent dashboard package never breaks voice."""
try:
from Project.Sanad.dashboard.routes import _arbiter
return _arbiter.nav_active()
except Exception:
return False
def _repeat_step(self, direction: str, n: int):
for _ in range(max(1, n)):
if self._abort.is_set() or self._estop or not self._movement_enabled(force=True):
if (self._abort.is_set() or self._estop
or not self._movement_enabled(force=True)
or self._nav_holds_legs()):
log.info("voice multi-step aborted")
break
self._loco.step(direction)

View File

@ -34,6 +34,7 @@ import json
import logging
import os
import sys
import tempfile
import threading
import time
import types
@ -149,8 +150,8 @@ _MOVEMENT_PROMPT_RULES = (
"asks you to move, reply with ONE short confirmation phrase per requested "
"motion, in the SAME language, in the order asked. Use these EXACT shapes — "
"they are what triggers the motion:\n"
" forward : 'Moving forward.' / 'أمشي للأمام.'\n"
" backward : 'Moving back.' / 'أمشي للخلف.'\n"
" forward : 'Walking forward.' / 'أمشي للأمام.'\n"
" backward : 'Walking back.' / 'أمشي للخلف.'\n"
" turn right : 'Turning right.' / 'أستدير يميناً.'\n"
" turn left : 'Turning left.' / 'أستدير يساراً.'\n"
" slide left : 'Sliding left.' / 'أنزلق لليسار.'\n"
@ -166,6 +167,33 @@ _MOVEMENT_PROMPT_RULES = (
"mis-hear a 0, drop the number and say the bare motion."
)
# Native function-calling: Gemini can DRIVE the robot to saved map places via
# the navigate_to_place / list_places / where_am_i / stop_navigation tools. The
# tool schemas are declared in the Live config; this block tells Gemini WHEN and
# HOW to use them, and the safety constraints. Appended only when nav tools are
# enabled (SANAD_NAV_TOOLS != 0).
_NAV_TOOLS_ENABLED = os.environ.get("SANAD_NAV_TOOLS", "1") != "0"
_NAV_PROMPT_RULES = (
"\n\n--- NAVIGATION (autonomous driving to places) ---\n"
"You can autonomously DRIVE the robot to a saved place on the loaded map "
"using your tools. This is different from step-by-step walking above.\n"
"- When the user asks to GO/MOVE/TAKE them to a named place (e.g. 'go to "
"the kitchen', 'خذني للاستقبال'), call the navigate_to_place tool with the "
"place name. Do NOT say the walking phrases above for this — the tool does "
"the driving.\n"
"- You can only drive to places that exist in the CURRENTLY loaded map. If "
"you are unsure which places exist, call list_places first and offer them.\n"
"- If the tool returns reason 'no_map', tell the user to load a map first. "
"If 'movement_off', tell them to enable movement. If 'ambiguous' or "
"'unknown_place', read back the candidate names and ask which one.\n"
"- After a successful navigate_to_place, briefly say you're heading there — "
"but do NOT claim you have arrived. You will receive a [NAV ARRIVED] note "
"when you actually arrive (then tell the user), or [NAV FAILED] if you "
"could not reach it (then apologise and say why).\n"
"- To stop an in-progress drive, call stop_navigation.\n"
"Keep all of this in your normal Khaleeji style."
)
def _load_system_prompt() -> str:
"""scripts/<persona file> → config default → hardcoded fallback, with the
@ -175,27 +203,41 @@ def _load_system_prompt() -> str:
mismatch (e.g. `persona: "sanad_v2"` while only `sanad_script.txt`
existed) which made the robot fall back to the English default that
introduces itself as "Sanad" instead of using the Arabic persona on
disk. We now WARN so the same trap doesn't bite again."""
disk. We now WARN so the same trap doesn't bite again.
The persona is resolved HERE (at session start), not at import so the
operator's Scripts Manager selection (a sanad_script_v*.txt variant) is
picked up on the next voice (re)connect. Falls back to sanad_script.txt."""
# Resolve the selected persona variant (or the default sanad_script.txt).
try:
from Project.Sanad.core.persona import active_persona_path
persona_file = active_persona_path()
except Exception:
persona_file = _PERSONA_FILE
base = None
try:
text = _PERSONA_FILE.read_text(encoding="utf-8-sig").strip()
text = persona_file.read_text(encoding="utf-8-sig").strip()
if text:
base = text
log.info("persona loaded: %s", persona_file.name)
except FileNotFoundError:
log.warning(
"Persona file not found at %s — falling back to "
"config.core.gemini_defaults.default_system_prompt. "
"Check `script_files.persona` in config/core_config.json "
"matches an actual file under scripts/.", _PERSONA_FILE,
"matches an actual file under scripts/.", persona_file,
)
except (OSError, UnicodeDecodeError) as exc:
# An existing-but-unreadable persona file (bad encoding, permissions, a
# directory) must NOT crash the voice child — fall back to the default.
log.warning("Persona file at %s could not be read (%s) — "
"falling back to default system prompt.", _PERSONA_FILE, exc)
"falling back to default system prompt.", persona_file, exc)
if base is None:
base = _GEMINI_DEFAULTS.get("default_system_prompt", _FALLBACK_SYSTEM_PROMPT)
return base + _MOVEMENT_PROMPT_RULES
prompt = base + _MOVEMENT_PROMPT_RULES
if _NAV_TOOLS_ENABLED:
prompt += _NAV_PROMPT_RULES
return prompt
def _audio_energy(pcm: bytes) -> int:
@ -323,10 +365,22 @@ class TurnRecorder:
payload = {"records": []}
payload.setdefault("records", []).append(entry)
payload["total_records"] = len(payload["records"])
idx_path.write_text(
json.dumps(payload, indent=2, ensure_ascii=False),
encoding="utf-8",
)
# Atomic write (tempfile + os.replace) — an in-place write_text that is
# interrupted (the start_all.sh supervisor Ctrl+C-teardowns this voice
# child) can truncate index.json, so the next read falls back to an
# empty {"records": []} and silently drops all prior turn metadata.
# Mirrors voice/typed_replay._save_index.
fd, tmp = tempfile.mkstemp(dir=str(idx_path.parent), suffix=".tmp")
try:
with os.fdopen(fd, "w", encoding="utf-8") as f:
json.dump(payload, f, indent=2, ensure_ascii=False)
os.replace(tmp, idx_path)
except Exception:
try:
os.unlink(tmp)
except OSError:
pass
raise
# ─── BRAIN FACTORY ───────────────────────────────────────

View File

@ -415,12 +415,14 @@ class TypedReplayEngine:
if not self.voice_client.connected:
await self.voice_client.connect()
# Ordered by empirical reliability — first variant wins ~95% of turns.
# The quoted-phrase form is the most consistent trigger for an
# audio-only response with the current Sanad persona prompt.
# The voice_client now runs the multilingual VERBATIM TTS prompt
# (gemini/client.TTS_SYSTEM_PROMPT), so the text is read back in its own
# language. Send the raw text first (cleanest — no wrapper to bias the
# language); fall back to explicit, language-neutral verbatim asks. The
# old attempt-1 wrapped in Arabic, which dragged every line to Arabic.
attempts = [
f'قل هذا بالضبط وبدون إضافات: "{stripped}"', # Arabic: "Say this exactly, no additions"
f'Say this exactly, nothing else: "{stripped}"',
stripped,
f'Read this aloud word for word, in its original language, nothing else:\n{stripped}',
f'"{stripped}"',
]
last_parts: list[str] = []
@ -496,6 +498,10 @@ class TypedReplayEngine:
except Exception as exc:
log.warning("couldn't switch default source to monitor: %s", exc)
# Outer try/finally guarantees the default source is restored even
# if pya.open()/recorder.start()/stream.write() raises — otherwise
# the machine's default mic stays pointed at the speaker monitor.
try:
stream = None
try:
stream = self.audio_mgr.pya.open(
@ -518,9 +524,9 @@ class TypedReplayEngine:
stream.close()
captured = b""
try:
if recorder is not None:
captured = recorder.stop()
return captured
finally:
if restore_source:
try:
@ -528,8 +534,6 @@ class TypedReplayEngine:
except Exception as exc:
log.warning("couldn't restore default source: %s", exc)
return captured
def save_audio(self, pcm: bytes, path: Path, channels: int, rate: int) -> None:
with wave.open(str(path), "wb") as wf:
wf.setnchannels(channels)