Update 2026-07-04 23:28:24

This commit is contained in:
kassam 2026-07-04 23:28:25 +04:00
commit 39be38811c
180 changed files with 36985 additions and 0 deletions

40
.env.example Normal file
View File

@ -0,0 +1,40 @@
# Copy to .env — docker compose reads it automatically from this directory.
# Signed license for THIS robot (default = the bundled example, entitles P1+P3;
# P2 needs a license that entitles P2 — re-sign with packages.P2=true).
SANAD_LICENSE_FILE=./license/sanad.lic.example
# Enforce machine-fingerprint binding (1 on a delivered robot; also uncomment the
# /etc/machine-id mount in docker-compose.yml).
SANAD_LICENSE_BIND=0
# Audio: builtin (G1 chest over DDS) | plugged (USB e.g. Anker via PulseAudio)
SANAD_AUDIO_PROFILE=builtin
# DDS interface to the G1 firmware.
SANAD_DDS_INTERFACE=eth0
# --- Plugged/Bluetooth speaker volume (Anker/JBL) ---------------------------
# The volume slider drives ALL speaker types like SanadV3: the G1 chest over DDS
# (always) AND the active PulseAudio sink (plugged/BT). The container runs as root,
# so it needs the HOST pulse socket + cookie to reach a uid-1000 PulseAudio.
# One-time host setup (stable socket at boot, no stray dir):
# loginctl enable-linger unitree
# Override these only if your pulse runs under a different uid or cookie path:
SANAD_PULSE_DIR=/run/user/1000/pulse
PULSE_SERVER=unix:/run/user/1000/pulse/native
# Cookie location varies by distro; if plugged volume/output is silent, try
# /home/<user>/.config/pulse/cookie instead and mount it accordingly.
PULSE_COOKIE=/run/user/1000/pulse/cookie
# Conversation language. Empty = MULTILINGUAL auto-detect (P2's headline feature);
# set e.g. ar/en only to force a single language.
SANAD_LANGUAGE=
# LED "Shining Mask": pin its BLE MAC, else auto-discover by name prefix.
SANAD_MASK_ADDRESS=
# Bundle the chest-audio Unitree SDK into the image (1=yes default, 0=USB-only/leaner).
WITH_UNITREE_SDK=1
# Base image (override only for a GPU build).
BASE_OS_IMAGE=python:3.10-slim-bookworm
# Image name/tag (e.g. a registry path for pull-and-run).
# SANAD_IMAGE=sanad-p2:latest

20
.gitignore vendored Normal file
View File

@ -0,0 +1,20 @@
# Python caches
__pycache__/
*.pyc
# Logs
*.log
Logs/
# Customer license — NEVER commit a real signed license; ship only the example.
license/sanad.lic
# Runtime data (keep the seed structure + config; ignore generated media).
data/recordings/*
data/audio/*
data/faces/*
data/photos/*
!data/**/.gitkeep
# NOTE: ./vendor IS committed on purpose — the vendored SanadV3 engine + Mask lib
# that make this package build standalone. Only caches are ignored (above).

75
Dockerfile Normal file
View File

@ -0,0 +1,75 @@
# syntax=docker/dockerfile:1
# ─────────────────────────────────────────────────────────────────────────────
# Sanad Package 3 — Recognition + Places + Memories. SELF-CONTAINED (vendors the
# SanadV3 engine + Mask lib under ./vendor; FROM python:3.10-slim; no sanad-base).
# docker build -t sanad-p3:latest . (Jetson without buildx: DOCKER_BUILDKIT=0)
# ─────────────────────────────────────────────────────────────────────────────
ARG BASE_OS_IMAGE=python:3.10-slim-bookworm
FROM ${BASE_OS_IMAGE}
ENV DEBIAN_FRONTEND=noninteractive PYTHONUNBUFFERED=1 PYTHONDONTWRITEBYTECODE=1 PYTHONPATH=/app
WORKDIR /app
# System deps: audio + PortAudio/toolchain (pyaudio) + BlueZ/D-Bus (mask) +
# iproute2 (`ip`, chest-mic) + libGL/glib for opencv-headless V4L camera capture.
RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates libsndfile1 alsa-utils pulseaudio-utils iproute2 \
portaudio19-dev libportaudio2 build-essential python3-dev \
bluez libdbus-1-3 libglib2.0-0 libgl1 \
&& rm -rf /var/lib/apt/lists/*
COPY requirements.txt /tmp/requirements.txt
RUN python3 -m pip install --no-cache-dir --upgrade pip \
&& python3 -m pip install --no-cache-dir -r /tmp/requirements.txt
# Optional Unitree SDK — chest (builtin) audio over DDS (full CycloneDDS + idlc,
# pinned binding). Wrapped so a failure never breaks the image.
ARG WITH_UNITREE_SDK=1
ENV CYCLONEDDS_HOME=/usr/local LD_LIBRARY_PATH=/usr/local/lib
RUN if [ "$WITH_UNITREE_SDK" = "1" ]; then \
( set -eux; apt-get update; apt-get install -y --no-install-recommends git cmake build-essential; \
git clone --depth 1 -b releases/0.10.x https://github.com/eclipse-cyclonedds/cyclonedds /tmp/cyclonedds; \
cmake -S /tmp/cyclonedds -B /tmp/cyclonedds/build -DCMAKE_INSTALL_PREFIX=/usr/local; \
cmake --build /tmp/cyclonedds/build --target install -j"$(nproc)"; \
CYCLONEDDS_HOME=/usr/local CMAKE_PREFIX_PATH=/usr/local python3 -m pip install --no-cache-dir "cyclonedds==0.10.2"; \
git clone --depth 1 https://github.com/unitreerobotics/unitree_sdk2_python /opt/unitree_sdk2_python; \
python3 -m pip install --no-cache-dir -e /opt/unitree_sdk2_python; \
python3 -c "import unitree_sdk2py; print('unitree_sdk2py OK')"; \
rm -rf /tmp/cyclonedds /var/lib/apt/lists/*; ) \
|| echo "WARN[P3]: Unitree SDK build failed — chest audio unavailable; use SANAD_AUDIO_PROFILE=plugged"; \
else echo "WITH_UNITREE_SDK=0 — skipping Unitree SDK"; fi
COPY vendor/sanad_pkg /app/sanad_pkg
RUN mkdir -p /etc/sanad && cp /app/sanad_pkg/pubkey.ed25519 /etc/sanad/pubkey.ed25519
COPY vendor/mask /app/mask
COPY vendor/Sanad /app/Sanad
# P3 launcher + package-local memory feature + convenience routes + entrypoint + config.
COPY app_p3.py /app/app_p3.py
COPY routes_p3.py /app/routes_p3.py
COPY visitor_memory.py /app/visitor_memory.py
COPY routes_memory.py /app/routes_memory.py
COPY entrypoint.sh /app/entrypoint.sh
COPY config /app/pkg3_config
RUN chmod +x /app/entrypoint.sh
COPY strip_key.py /tmp/strip_key.py
RUN python3 /tmp/strip_key.py && rm -f /tmp/strip_key.py
RUN python3 - <<'PY'
import importlib.util as u, sys
ok = all(u.find_spec(m) for m in ("sanad_pkg.license", "Sanad", "visitor_memory"))
sys.path.insert(0, "/app/mask")
print("P3 self-contained: modules importable:", ok, "| cv2:", u.find_spec("cv2") is not None)
sys.exit(0 if ok else 1)
PY
ENV SANAD_PACKAGE=P3 \
SANAD_DASHBOARD_PORT=8013 \
SANAD_DASHBOARD_HOST=0.0.0.0 \
SANAD_MASK_DIR=/app/mask \
SANAD_MEMORIES_DIR=/app/Sanad/data/memories \
SANAD_LICENSE=/etc/sanad/sanad.lic \
SANAD_PUBKEY=/etc/sanad/pubkey.ed25519
EXPOSE 8013
ENTRYPOINT ["/app/entrypoint.sh"]

22
README.md Normal file
View File

@ -0,0 +1,22 @@
# Sanad Package 3 — Facial Recognition + Places + Memories (SCAFFOLD)
Perception + memory only; **no motion**. Dashboard on **:8013**. License
features: `face_rec`, `places`, `memory`, `mask`.
Planned (`sanad-recognition` container):
- **Faces / VIP DB**`vision/face_gallery.py` (enroll/upload/capture/describe,
VIP flag). Recognition is Gemini-side in-context (primer images) — no local ML.
- **Places**`vision/zone_gallery.py` visual place recognition (the `/go` nav
action stays disabled here; it belongs to P4).
- **Memories** — NEW persistent visitor-profile store (attributes, notes,
last-seen, linked `face_id`); feeds personalized-greeting primers to comms.
- **Face/Mask** — drives the shared `sanad-mask` for expressions on recognition.
- Consumes the `frames` topic from `Sanad_Core/camera`; emits `recognition.event`
/ `place.event`.
Depends on `Sanad_Core` (camera, shared `sanad-mask`). See the plan for details.
**Build model (when implemented):** self-contained like P1 — a `vendor/` copy of
the Sanad engine + a standalone `Dockerfile` (`FROM python:3.10-slim`) + its own
`docker-compose.yml`, refreshed by a `sync_vendor.sh`. No `sanad-base`. (Camera
frames + shared mask are the cross-container dependencies.)

474
app_p3.py Normal file
View File

@ -0,0 +1,474 @@
#!/usr/bin/env python3
"""Sanad Package 3 — Facial Recognition + Places + Memories launcher.
P3 = perception + memory (NO motion): identify faces (VIP DB), recognize places
(zones), and remember visitors across visits, driving mask expressions on
recognition. Recognition is 100% Gemini-side / in-context (primer images) no
local ML. Dashboard on :8013.
Self-contained wrapper around the vendored SanadV3 engine (like P1/P2):
1. bootstrap the Project.Sanad namespace + flat Mask path,
2. construct the perception subsystems (camera, face gallery, zone gallery,
recognition state) + comms core (brain/audio/voice/live_sub) + mask,
3. construct the NEW package-local VisitorMemory store,
4. wire lip-sync + Gemini emotions/social + recognition-driven expressions,
5. inject a P3-scoped Project.Sanad.main shim + a package-local memory shim,
6. mount the recognition/places/mask/memory routers + comms subset, serve the
SanadV3 SPA with non-P3 tabs hidden, on :8013.
Kept Python-3.8 compatible.
"""
from __future__ import annotations
import atexit
import importlib
import os
import sys
import types
from pathlib import Path
# ── 1. namespace bootstrap (mirrors app_p2.py) ───────────────────────────────
_APP = Path(os.environ.get("SANAD_APP_DIR", "/app"))
if str(_APP) not in sys.path:
sys.path.insert(0, str(_APP))
_MASK_DIR = os.environ.setdefault("SANAD_MASK_DIR", str(_APP / "mask"))
if _MASK_DIR and _MASK_DIR not in sys.path:
sys.path.insert(0, _MASK_DIR)
if "Project" not in sys.modules:
_proj = types.ModuleType("Project")
_proj.__path__ = []
sys.modules["Project"] = _proj
if "Project.Sanad" not in sys.modules:
_sanad = importlib.import_module("Sanad")
sys.modules["Project.Sanad"] = _sanad
sys.modules["Project"].Sanad = _sanad # type: ignore[attr-defined]
# package-local modules (memory store + its route) live next to this file.
sys.path.insert(0, str(Path(__file__).resolve().parent))
from Project.Sanad.core import asyncio_compat # noqa: E402,F401
from Project.Sanad.core.logger import get_logger # noqa: E402
log = get_logger("pkg3.app")
PACKAGE = "P3"
PACKAGE_TITLE = "Sanad — Recognition + Places + Memories (P3)"
# SanadV3 SPA tab ids P3 SHOWS / HIDES.
P3_SPA_TABS = ["operations", "voice", "recognition", "mask", "recordings", "settings"]
P3_SPA_HIDE = ["motion", "controller", "navigation", "livemap", "mapeditor",
"temp", "terminal"]
# Routers P3 does NOT mount → short-circuit client-side (no "Not Found" toasts).
P3_UNMOUNTED = ["/api/nav", "/api/controller", "/api/motion", "/api/skills",
"/api/macros", "/api/replay", "/api/wake-phrases", "/api/live-voice",
"/api/scripts"]
def _safe(name, factory):
try:
return factory()
except Exception:
log.exception("P3: could not construct %s — degraded", name)
return None
# ── 2. construct the perception + comms subsystems ───────────────────────────
def _build_singletons():
from Project.Sanad.core.brain import Brain
from Project.Sanad.voice.audio_manager import AudioManager
from Project.Sanad.gemini.client import GeminiVoiceClient
from Project.Sanad.gemini.subprocess import GeminiSubprocess
brain = _safe("brain", Brain) # CRITICAL — greetings ride the live session
audio_mgr = _safe("audio_mgr", AudioManager)
voice_client = _safe("voice_client", GeminiVoiceClient)
local_tts = None
try:
from Project.Sanad.voice.local_tts import LocalTTSEngine
local_tts = _safe("local_tts", LocalTTSEngine)
except Exception:
pass
typed_replay = None
if voice_client is not None and audio_mgr is not None:
try:
from Project.Sanad.voice.typed_replay import TypedReplayEngine
typed_replay = _safe("typed_replay", lambda: TypedReplayEngine(voice_client, audio_mgr))
except Exception:
pass
live_sub = _safe("live_sub", lambda: GeminiSubprocess())
# Perception: camera daemon + face gallery + zone gallery.
camera = None
try:
from Project.Sanad.vision.camera import CameraDaemon
camera = _safe("camera", lambda: CameraDaemon())
except Exception:
log.exception("P3: CameraDaemon import failed — vision unavailable")
gallery = None
try:
from Project.Sanad.vision.face_gallery import FaceGallery
gallery = _safe("gallery", lambda: FaceGallery())
except Exception:
log.exception("P3: FaceGallery import failed")
zone_gallery = None
try:
from Project.Sanad.vision.zone_gallery import ZoneGallery
zone_gallery = _safe("zone_gallery", lambda: ZoneGallery())
except Exception:
log.exception("P3: ZoneGallery import failed")
# Mask/face — expressions on recognition.
mask_face = None
try:
from Project.Sanad.face.mask_face import FaceController
mask_face = _safe("mask_face", FaceController)
except Exception:
log.exception("P3: FaceController import failed — LED mask unavailable")
# NEW package-local visitor memory store.
memory = None
try:
from visitor_memory import VisitorMemory
memory = _safe("memory", VisitorMemory)
except Exception:
log.exception("P3: VisitorMemory init failed")
# attachments
for meth, val in (("attach_voice", voice_client),
("attach_audio_manager", audio_mgr)):
if brain is not None and val is not None and hasattr(brain, meth):
try:
getattr(brain, meth)(val)
except Exception:
log.exception("brain.%s failed", meth)
if live_sub is not None:
if audio_mgr is not None and hasattr(live_sub, "attach_audio_manager"):
try:
live_sub.attach_audio_manager(audio_mgr)
except Exception:
log.exception("live_sub.attach_audio_manager failed")
if camera is not None and hasattr(live_sub, "attach_camera"):
try:
live_sub.attach_camera(camera) # frames flow to the child for recognition
except Exception:
log.exception("live_sub.attach_camera failed")
# Boot vision-restore (guarded — never crash the container if no camera).
# The vendored recognition_state exposes read(path)/mutate(path, **) — NOT load().
try:
from Project.Sanad.vision import recognition_state as _rs
from Project.Sanad.config import BASE_DIR as _BD
_state_path = _BD / "data" / ".recognition_state.json"
st = _rs.read(_state_path)
want_vision = bool(getattr(st, "vision_enabled", False))
if want_vision and camera is not None and hasattr(camera, "start"):
try:
camera.start()
log.info("P3: vision restored (camera started)")
except Exception:
log.exception("P3: camera.start() failed — disabling vision, booting headless")
try:
_rs.mutate(_state_path, vision_enabled=False)
except Exception:
pass
except Exception:
log.exception("P3: recognition-state restore skipped")
return dict(brain=brain, audio_mgr=audio_mgr, voice_client=voice_client,
local_tts=local_tts, typed_replay=typed_replay, live_sub=live_sub,
camera=camera, gallery=gallery, zone_gallery=zone_gallery,
mask_face=mask_face, memory=memory)
# ── 2b. mask wiring (lip-sync + emotions + social + lifelike) ────────────────
def _wire_mask(s):
mask_face = s.get("mask_face")
live_sub = s.get("live_sub")
if mask_face is None:
return
# lip-sync
if live_sub is not None and hasattr(live_sub, "register_mouth_callback"):
try:
live_sub.register_mouth_callback(lambda lvl: getattr(mask_face, "_gemini_linked", False) and _try(mask_face.set_mouth, int(lvl)))
log.info("LED face wired to lip-sync (MOUTH)")
except Exception:
log.exception("mouth hook failed")
# emotions
if live_sub is not None and hasattr(live_sub, "register_face_callback"):
try:
_HOLD = {"heart": 2.6, "love": 2.6, "kiss": 2.4, "laugh": 2.2,
"surprised": 1.8, "confused": 1.8}
live_sub.register_face_callback(lambda n: getattr(mask_face, "_gemini_linked", False) and _try(mask_face.react, str(n), _HOLD.get(n, 1.6)))
log.info("LED face wired to emotions (FACE)")
except Exception:
log.exception("face hook failed")
# social QR (off-thread — ~9s BLE upload)
if live_sub is not None and hasattr(live_sub, "register_social_callback"):
try:
def _on_social(account):
if not getattr(mask_face, "_gemini_linked", False):
return
import threading as _th
def _run(acc=str(account)):
try:
from Project.Sanad.dashboard.routes.mask_social import show_social_on_mask
show_social_on_mask(acc)
except Exception:
log.exception("show_social_on_mask failed")
_th.Thread(target=_run, daemon=True, name="mask-social").start()
live_sub.register_social_callback(_on_social)
log.info("LED face wired to social QR (SHOW)")
except Exception:
log.exception("social hook failed")
# lifelike state + reactions (synchronous bus)
try:
from Project.Sanad.core.event_bus import bus as _bus
_bus.on("brain.gestural_speaking_changed",
lambda enabled=False, **_k: (_try(mask_face.set_speaking, bool(enabled)),
(not enabled) and _try(mask_face.set_listening)))
_bus.on("voice.connected", lambda **_k: _try(mask_face.set_listening))
_bus.on("voice.user_said", lambda **_k: _try(mask_face.set_thinking))
_bus.on("voice.disconnected", lambda **_k: _try(mask_face.set_idle))
_bus.on("voice.error", lambda **_k: _try(mask_face.react, "sad"))
_bus.on("recognition.event", lambda **_k: _try(mask_face.react, "smile")) # greet on recognition
log.info("LED face wired to lifelike + recognition-greeting events")
except Exception:
log.exception("lifelike hooks failed")
def _try(fn, *a):
try:
return fn(*a)
except Exception:
log.exception("%s failed", getattr(fn, "__name__", "callback"))
def _inject_main_shim(singletons):
shim = types.ModuleType("Project.Sanad.main")
for k, v in singletons.items():
setattr(shim, k, v)
# motion / nav subsystems P3 does NOT own — present as None (routers guard).
for k in ("arm", "wake_mgr", "macro_rec", "macro_play", "teacher", "live_voice",
"loco_controller", "movement_dispatch", "nav_client"):
if not hasattr(shim, k):
setattr(shim, k, None)
shim.SUBSYSTEMS = {k: singletons.get(k) for k in ( # type: ignore[attr-defined]
"brain", "audio_mgr", "voice_client", "local_tts", "typed_replay", "live_sub",
"camera", "gallery", "zone_gallery", "mask_face", "memory")}
sys.modules["Project.Sanad.main"] = shim
return shim
# ── 3. build the P3 FastAPI app ───────────────────────────────────────────────
_P3_REST = [
("health", "/api", "health"),
("system", "/api/system", "system"),
("voice", "/api/voice", "voice"),
("audio_control", "/api/audio", "audio"),
("prompt", "/api/prompt", "prompt"),
("typed_replay", "/api/typed-replay", "typed-replay"),
("records", "/api/records", "records"),
("logs", "/api/logs", "logs"),
("live_subprocess", "/api/live-subprocess", "live-subprocess"),
("recognition", "/api/recognition", "recognition"), # faces / VIP
("zones", "/api/zones", "zones"), # places (nav /go degrades to nav_unavailable)
("mask", "/api/mask", "mask"),
("mask_social", "/api/mask", "mask-social"),
]
_P3_WS = ["log_stream"]
def _tab_filter_snippet():
import json as _json
css = ",".join(".tab[onclick*=\"switchTab('%s')\"],#tab-%s" % (t, t) for t in P3_SPA_HIDE) + ",#status-pills"
return (
"<style>%s{display:none!important}</style>"
"<script>window.SANAD_PACKAGE=%s;"
"(function(){var B=%s,_f=window.fetch;"
"window.fetch=function(i,o){try{var u=(typeof i==='string')?i:(i&&i.url)||'',"
"p=u.replace(/^https?:\\/\\/[^/]+/,'');"
"for(var k=0;k<B.length;k++){if(p.indexOf(B[k])===0)"
"return Promise.resolve(new Response('{}',{status:200,headers:{'Content-Type':'application/json'}}));}"
"}catch(e){}return _f.apply(this,arguments);};})();</script>"
% (css, _json.dumps({"name": PACKAGE, "title": PACKAGE_TITLE, "tabs": P3_SPA_TABS}),
_json.dumps(P3_UNMOUNTED))
)
def build_app():
from fastapi import FastAPI
from fastapi.staticfiles import StaticFiles
from fastapi.responses import HTMLResponse, JSONResponse
from Project.Sanad.config import BASE_DIR
from Project.Sanad.core.config_loader import section as _cfg_section
app_cfg = _cfg_section("dashboard", "app")
app = FastAPI(title=PACKAGE_TITLE, version="0.1.0")
loaded, failed = [], {}
def _register(mod_name, prefix, tag, package="Project.Sanad.dashboard.routes"):
try:
mod = importlib.import_module("%s.%s" % (package, mod_name))
if not hasattr(mod, "router"):
raise AttributeError("no 'router'")
kw = {}
if prefix:
kw["prefix"] = prefix
if tag:
kw["tags"] = [tag]
app.include_router(mod.router, **kw)
loaded.append(mod_name)
except Exception as exc:
failed[mod_name] = str(exc)
log.exception("P3: router %s failed — skipped", mod_name)
for m, p, t in _P3_REST:
_register(m, p, t)
for m in _P3_WS:
_register(m, None, "websocket", package="Project.Sanad.dashboard.websockets")
# package-local routers: NEW memory store + P3 convenience.
try:
import routes_memory
app.include_router(routes_memory.router, prefix="/api/memory", tags=["memory"])
loaded.append("routes_memory")
except Exception as exc:
failed["routes_memory"] = str(exc)
log.exception("P3: routes_memory failed — /api/memory unavailable")
try:
import routes_p3
app.include_router(routes_p3.router, prefix="/api/p3", tags=["p3"])
loaded.append("routes_p3")
except Exception as exc:
failed["routes_p3"] = str(exc)
log.exception("P3: routes_p3 failed")
static_dir = BASE_DIR / app_cfg.get("static_subdir", "dashboard/static")
try:
app.mount("/static", StaticFiles(directory=str(static_dir)), name="static")
except Exception:
log.exception("P3: static mount failed")
@app.get("/api/package")
async def package_info():
from sanad_pkg import license as _lic
lic = _lic.current()
api_key = {"has_key": False, "masked": "", "source": "default"}
try:
import Project.Sanad.config as _cfg_mod
from Project.Sanad.dashboard.routes.voice import _mask_api_key
_k = getattr(_cfg_mod, "GEMINI_API_KEY", "") or ""
try:
from Project.Sanad.config import load_config
_stored = (load_config().get("gemini", {}) or {}).get("api_key")
except Exception:
_stored = None
api_key = {"has_key": bool(_k), "masked": _mask_api_key(_k),
"source": "config_file" if _stored else "default"}
except Exception:
log.exception("could not read api-key status")
return {
"package": PACKAGE, "title": PACKAGE_TITLE, "tabs": P3_SPA_TABS,
"features": {"face_rec": bool(lic.feature("face_rec", True)),
"places": bool(lic.feature("places", True)),
"memory": bool(lic.feature("memory", True)),
"mask": bool(lic.feature("mask", True))},
"api_key": api_key,
"endpoints": {"recognition": "GET /api/recognition/*", "places": "GET /api/zones/*",
"memories": "GET /api/memory/", "mask": "GET /api/mask/status"},
"loaded_routes": loaded, "failed_routes": failed, "license": lic.summary(),
}
def _filtered_spa():
index = static_dir / "index.html"
if not index.exists():
return JSONResponse({"message": "SPA index.html not found", "loaded": loaded, "failed": failed})
try:
html = index.read_text(encoding="utf-8")
filt = _tab_filter_snippet()
if "</head>" in html:
html = html.replace("</head>", filt + "</head>", 1)
elif "</body>" in html:
html = html.replace("</body>", filt + "</body>", 1)
else:
html = filt + html
return HTMLResponse(html)
except OSError as exc:
return JSONResponse({"error": "index.html unreadable: %s" % exc}, status_code=500)
@app.get("/")
async def root():
return _filtered_spa()
@app.get("/full")
async def full_dashboard():
return _filtered_spa()
log.info("P3 dashboard built — routers loaded=%s failed=%s", loaded, list(failed))
return app
def _init_dds_for_audio():
if os.environ.get("SANAD_BUS_ADDR"):
return
try:
from unitree_sdk2py.core.channel import ChannelFactoryInitialize
iface = os.environ.get("SANAD_DDS_INTERFACE", "eth0")
ChannelFactoryInitialize(0, iface)
log.info("P3: DDS ChannelFactoryInitialize(0, %s) done — chest audio enabled", iface)
except Exception:
log.exception("P3: DDS init failed — chest audio unavailable (plugged still works)")
def _enforce_keyless_default():
import Project.Sanad.config as _cfg
env_key = (os.environ.get("SANAD_GEMINI_API_KEY") or "").strip()
saved = ""
try:
from Project.Sanad.config import load_config
saved = ((load_config().get("gemini") or {}).get("api_key") or "").strip()
except Exception:
pass
if env_key or saved:
return
_cfg.GEMINI_API_KEY = ""
try:
import Project.Sanad.gemini.client as _gc
_gc.GEMINI_API_KEY = ""
except Exception:
pass
log.info("P3: keyless by default — customer adds a Gemini key via the dashboard")
def main():
host = os.environ.get("SANAD_DASHBOARD_HOST", "0.0.0.0")
port = int(os.environ.get("SANAD_DASHBOARD_PORT", "8013"))
log.info("Sanad P3 (Recognition + Places + Memories) starting — %s:%d", host, port)
try:
from sanad_pkg.bus import bus
bus.connect()
except Exception:
log.exception("bus connect failed (continuing in-process)")
_init_dds_for_audio()
_enforce_keyless_default()
singletons = _build_singletons()
_wire_mask(singletons)
_inject_main_shim(singletons)
_mask = singletons.get("mask_face")
if _mask is not None and hasattr(_mask, "shutdown"):
atexit.register(lambda: _mask.shutdown())
_cam = singletons.get("camera")
if _cam is not None and hasattr(_cam, "stop"):
atexit.register(lambda: _try(_cam.stop))
import uvicorn
app = build_app()
uvicorn.run(app, host=host, port=port, log_level="info")
if __name__ == "__main__":
main()

27
config/mask_config.json Normal file
View File

@ -0,0 +1,27 @@
{
"_comment": "Shining LED face mask (BLE). Driven by the FaceController subsystem (face/mask_face.py) which imports the standalone Mask project. Needs an env with bleak + Pillow (g1_env). Free the mask from the phone app before connecting.",
"mask_dir": "",
"_mask_dir": "Path to the Mask project (flat shiningmask lib). Empty -> auto: <Project>/Mask. Env override: SANAD_MASK_DIR.",
"name_prefix": "MASK",
"_name_prefix": "BLE scan prefix; the mask advertises e.g. 'MASK-02A711'. Env: SANAD_MASK_NAME_PREFIX.",
"address": "",
"_address": "Specific BLE MAC to connect to. Empty -> scan by name_prefix. Env: SANAD_MASK_ADDRESS.",
"adapter": "",
"_adapter": "BlueZ adapter (e.g. 'hci0'). Empty -> default. Env: SANAD_MASK_ADAPTER.",
"brightness": 95,
"_brightness": "0-128. Keep <=100 to avoid LED flicker (battery-limited).",
"fps": 8.0,
"_fps": "FaceAnimator (fallback driver) frame rate (PLAY commands/sec).",
"lifelike": true,
"_lifelike": "Use the LifelikeFace driver (face/face_motion.py): eye saccades, varied blinks, listening/thinking/speaking states, reactions, smooth lip-sync. false -> basic FaceAnimator.",
"autostart": true,
"_autostart": "Auto-connect + Start face on boot (best-effort, background — never blocks startup). After the one-time frame upload, later boots just connect + animate. false -> connect/start manually from the dashboard.",
"connect_timeout": 15.0,
"connect_attempts": 5,
"eye_color": [0, 230, 255],
"_eye_color": "Face eye/iris RGB (baked into the uploaded frames). Default cyan. Set via the dashboard 'Apply colors' (persisted here).",
"mouth_color": [255, 50, 50],
"_mouth_color": "Face mouth RGB. Default red.",
"sclera_color": [255, 255, 255],
"_sclera_color": "White-of-the-eye RGB. Default white."
}

11
config/p3_config.json Normal file
View File

@ -0,0 +1,11 @@
{
"_comment": "Sanad Package 3 (Recognition + Places + Memories) defaults. Precedence: env > license feature > this file.",
"package": "P3",
"title": "Sanad — Recognition + Places + Memories",
"language_default": "",
"audio_profile_default": "builtin",
"port": 8013,
"voice_brain": "gemini",
"spa_tabs": ["operations", "voice", "recognition", "mask", "recordings", "settings"],
"excluded": ["motion", "controller", "navigation", "livemap", "mapeditor", "temp", "terminal"]
}

0
data/audio/.gitkeep Normal file
View File

1
data/audio_device.json Normal file
View File

@ -0,0 +1 @@
{}

5
data/camera_device.json Normal file
View File

@ -0,0 +1,5 @@
{
"profile_serial_assignments": {
"realsense_primary": ""
}
}

0
data/faces/.gitkeep Normal file
View File

0
data/memories/.gitkeep Normal file
View File

21
data/motions/config.json Normal file
View File

@ -0,0 +1,21 @@
{
"gemini": {
"api_key": "",
"model": "models/gemini-2.5-flash-native-audio-preview-12-2025",
"voice_name": "Charon"
},
"audio": {
"send_sample_rate": 16000,
"receive_sample_rate": 24000,
"chunk_size": 512,
"g1_volume": 100
},
"motion": {
"action_cooldown_sec": 1.0,
"replay_hz": 60.0
},
"dashboard": {
"host": "0.0.0.0",
"port": 8000
}
}

0
data/photos/.gitkeep Normal file
View File

0
data/recordings/.gitkeep Normal file
View File

0
data/zones/.gitkeep Normal file
View File

63
docker-compose.yml Normal file
View File

@ -0,0 +1,63 @@
# Self-contained compose for Sanad Package 3 (Recognition + Places + Memories).
# docker compose up -d --build # -> http://<robot>:8013
# Jetson without buildx: DOCKER_BUILDKIT=0 docker compose up -d --build
services:
p3:
build:
context: .
dockerfile: Dockerfile
args:
BASE_OS_IMAGE: "${BASE_OS_IMAGE:-python:3.10-slim-bookworm}"
WITH_UNITREE_SDK: "${WITH_UNITREE_SDK:-1}"
image: "${SANAD_IMAGE:-sanad-p3:latest}"
container_name: sanad-p3
network_mode: host
restart: unless-stopped
cap_add:
- NET_ADMIN
# camera V4L nodes are group 'video'; grant it so cv2.VideoCapture can open them.
group_add:
- video
environment:
SANAD_PACKAGE: P3
SANAD_DASHBOARD_PORT: "8013"
SANAD_DASHBOARD_HOST: "0.0.0.0"
SANAD_VOICE_BRAIN: gemini
SANAD_AUDIO_PROFILE: "${SANAD_AUDIO_PROFILE:-builtin}" # builtin (chest, on a G1) | plugged (USB)
SANAD_DDS_INTERFACE: "${SANAD_DDS_INTERFACE:-eth0}"
# Plugged/Bluetooth audio (Anker USB, JBL BT) route through the HOST
# PulseAudio server — chest ("builtin") uses DDS and needs none of this.
PULSE_SERVER: "${PULSE_SERVER:-unix:/run/user/1000/pulse/native}"
PULSE_COOKIE: "${PULSE_COOKIE:-/run/user/1000/pulse/cookie}" # root→uid-1000 PA auth
SANAD_MASK_DIR: /app/mask
SANAD_MEMORIES_DIR: /app/Sanad/data/memories
# Pin the colour camera node — on a RealSense the colour node is NOT video0.
SANAD_CAMERA_USB_INDEX: "${SANAD_CAMERA_USB_INDEX:-}"
SANAD_LICENSE: /etc/sanad/sanad.lic
SANAD_PUBKEY: /etc/sanad/pubkey.ed25519
SANAD_LICENSE_BIND: "${SANAD_LICENSE_BIND:-0}"
devices:
- "/dev/snd:/dev/snd"
- "/dev/bus/usb:/dev/bus/usb" # USB camera + BLE dongle
# Camera V4L node(s). video0 is the common USB-webcam default; a RealSense
# exposes ~6 nodes and the colour node is NOT video0 — add the others (or use
# SANAD_CAMERA_USB_INDEX). If a node is absent, comment its line out.
# Camera is OPTIONAL to boot: a bind of an ABSENT /dev/video0 makes `up` HARD-FAIL.
# UNCOMMENT + set to YOUR camera V4L node (RealSense colour node is NOT video0) to enable recognition:
# - "/dev/video0:/dev/video0"
# - "/dev/video1:/dev/video1"
# - "/dev/video2:/dev/video2"
# - "/dev/video4:/dev/video4"
volumes:
- "${SANAD_LICENSE_FILE:-./license/sanad.lic.example}:/etc/sanad/sanad.lic:ro"
# Plugged/Bluetooth audio (Anker/JBL): mount the host PulseAudio runtime dir
# (socket + auth cookie) so the container sets ANY sink's volume/output — the
# volume slider then drives chest (DDS) AND plugged (PA), exactly like SanadV3.
# Chest ("builtin") audio uses DDS only and needs none of this. One-time host
# setup for a stable boot-time socket: `loginctl enable-linger unitree`.
- "${SANAD_PULSE_DIR:-/run/user/1000/pulse}:/run/user/1000/pulse"
- "./data:/app/Sanad/data" # persist faces / zones / memories / recordings
- "./config/mask_config.json:/app/Sanad/config/mask_config.json"
- "/var/run/dbus:/var/run/dbus" # host BlueZ/D-Bus for the LED mask
# Bound license also needs the host machine-id:
# - "/etc/machine-id:/etc/machine-id:ro"

73
entrypoint.sh Executable file
View File

@ -0,0 +1,73 @@
#!/usr/bin/env bash
# Sanad Package 3 (Recognition) entrypoint.
# 1) license gate 2) resolve P2 env (env > license > config) 3) preflight 4) launch.
set -u
PKG="P3"
CFG="/app/pkg3_config/p3_config.json"
# ── 1. license gate ──────────────────────────────────────────────────────────
# license_check exits 0 only when entitled. If NOT entitled we exit the CONTAINER
# cleanly (code 0) so a restart policy won't crash-loop.
if ! python3 -m sanad_pkg.license_check "$PKG"; then
echo "[$PKG] not licensed for this robot — container exiting cleanly."
exit 0
fi
# ── 2. resolve config (env wins, then license feature, then config file) ──────
read_cfg() { # read_cfg <key>
python3 - "$CFG" "$1" <<'PY' 2>/dev/null || true
import json, sys
try:
print(json.load(open(sys.argv[1])).get(sys.argv[2], "") or "")
except Exception:
print("")
PY
}
# Language: empty = MULTILINGUAL auto-detect (P2's headline feature). Only set a
# fixed language if the operator/license/config explicitly pins one.
if [ -z "${SANAD_LANGUAGE:-}" ]; then
SANAD_LANGUAGE="$(python3 -c 'from sanad_pkg import license as L; print(L.feature("language","") or "")' 2>/dev/null || true)"
[ -z "$SANAD_LANGUAGE" ] && SANAD_LANGUAGE="$(read_cfg language_default)"
fi
export SANAD_LANGUAGE
export SANAD_VOICE_BRAIN="${SANAD_VOICE_BRAIN:-gemini}"
[ -z "${SANAD_AUDIO_PROFILE:-}" ] && SANAD_AUDIO_PROFILE="$(read_cfg audio_profile_default)"
export SANAD_AUDIO_PROFILE="${SANAD_AUDIO_PROFILE:-builtin}"
export SANAD_DASHBOARD_HOST="${SANAD_DASHBOARD_HOST:-0.0.0.0}"
[ -z "${SANAD_DASHBOARD_PORT:-}" ] && SANAD_DASHBOARD_PORT="$(read_cfg port)"
export SANAD_DASHBOARD_PORT="${SANAD_DASHBOARD_PORT:-8013}"
export SANAD_MASK_DIR="${SANAD_MASK_DIR:-/app/mask}"
export PYTHONUNBUFFERED=1
# Jetson + Unitree SDK OpenMP load-order fix (only if the lib exists; override-able).
if [ -z "${LD_PRELOAD:-}" ] && [ -f /usr/lib/aarch64-linux-gnu/libgomp.so.1 ]; then
export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libgomp.so.1
fi
echo "[$PKG] entitled — lang=${SANAD_LANGUAGE:-<multilingual>} audio=$SANAD_AUDIO_PROFILE port=$SANAD_DASHBOARD_PORT brain=$SANAD_VOICE_BRAIN mask_dir=$SANAD_MASK_DIR"
# ── 3. preflight (clear diagnostics) ─────────────────────────────────────────
python3 - "$SANAD_AUDIO_PROFILE" "$SANAD_MASK_DIR" <<'PY' || true
import importlib.util as u, sys
profile = sys.argv[1] if len(sys.argv) > 1 else "builtin"
mask_dir = sys.argv[2] if len(sys.argv) > 2 else "/app/mask"
def has(m): return u.find_spec(m) is not None
print("[P3] preflight:")
ok = sys.version_info >= (3, 9)
print(" python : %s %s" % (".".join(map(str, sys.version_info[:3])),
"OK" if ok else "TOO OLD — google-genai needs >=3.9"))
print(" google-genai : %s" % ("OK" if has("google.genai") else "MISSING — live conversation will NOT work"))
print(" pyaudio : %s" % ("OK" if has("pyaudio") else "missing — mic/speaker capture limited"))
print(" bleak (mask) : %s" % ("OK" if has("bleak") else "MISSING — LED mask will NOT connect"))
print(" Pillow (face) : %s" % ("OK" if has("PIL") else "missing — LifelikeFace falls back to FaceAnimator"))
sys.path.insert(0, mask_dir)
print(" mask lib : %s (%s)" % ("OK" if has("mask") else "MISSING", mask_dir))
sdk = has("unitree_sdk2py")
print(" unitree SDK : %s" % ("OK" if sdk else "absent"))
if profile == "builtin" and not sdk:
print(" >> NOTE: audio profile 'builtin' (G1 chest) needs the Unitree SDK, which is")
print(" absent. Plug a USB speaker/mic and set SANAD_AUDIO_PROFILE=plugged.")
PY
exec python3 /app/app_p3.py

1
license/pubkey.ed25519 Normal file
View File

@ -0,0 +1 @@
ZOFerXRMTVQxkxsawjmGXJz8n5HmXfb8qLMhO/7DIC4=

27
license/sanad.lic.example Normal file
View File

@ -0,0 +1,27 @@
{
"payload": {
"robot_id": "G1-SN-DEMO-0001",
"machine_fingerprint": null,
"packages": {
"P1": true,
"P2": true,
"P3": true,
"P4": false
},
"features": {
"language": "",
"multilingual": true,
"voice_command_motion": true,
"lipsync": true,
"mask": true,
"face_rec": true,
"places": true,
"memory": true,
"guide_tour": false,
"navigation": false
},
"issued": "2026-06-22",
"expires": "2030-01-01"
},
"sig": "WSI7gPG0mj1FhkOzRWcmhgo1mHmubOmPFycZF0mKUPqRaFQMD7GIil6sYlVE5njBtYb7EOevw6cetjcKOqkxCg=="
}

62
p3ctl.sh Executable file
View File

@ -0,0 +1,62 @@
#!/usr/bin/env bash
# p2ctl.sh — run / stop Sanad Package 3 (Recognition+Places+Memories) in dev mode
# (no Docker), against the vendored engine in ./vendor. Self-contained: no sibling
# Sanad/ checkout needed.
#
# ./p2ctl.sh start | stop | restart | status | logs [N]
#
# The conda env must have google-genai AND (for the LED mask) bleak==0.22.3 + Pillow.
# Override env: SANAD_P3_PY, SANAD_DASHBOARD_PORT (8013), SANAD_AUDIO_PROFILE (builtin),
# SANAD_DDS_INTERFACE (eth0), SANAD_MASK_DIR, SANAD_LICENSE / SANAD_PUBKEY.
set -u
PKG_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PY="${SANAD_P3_PY:-$HOME/miniconda3/envs/gemini_sdk/bin/python}"
PORT="${SANAD_DASHBOARD_PORT:-8013}"
APP="$PKG_DIR/app_p3.py"
LOG="$PKG_DIR/p3.log"
LIC="${SANAD_LICENSE:-$PKG_DIR/license/sanad.lic}"; [ -f "$LIC" ] || LIC="$PKG_DIR/license/sanad.lic.example"
_start() {
if pgrep -f app_p3.py >/dev/null 2>&1; then echo "P3 already running on :$PORT"; return 0; fi
[ -f "$APP" ] || { echo "ERROR: $APP not found (deploy first)"; return 1; }
cd "$PKG_DIR"
export SANAD_APP_DIR="$PKG_DIR/vendor" \
SANAD_LICENSE="$LIC" \
SANAD_PUBKEY="${SANAD_PUBKEY:-$PKG_DIR/license/pubkey.ed25519}" \
SANAD_MASK_DIR="${SANAD_MASK_DIR:-$PKG_DIR/vendor/mask}" \
PYTHONPATH="$PKG_DIR/vendor" \
SANAD_DASHBOARD_PORT="$PORT" SANAD_DASHBOARD_HOST="0.0.0.0" \
SANAD_VOICE_BRAIN="gemini" \
SANAD_AUDIO_PROFILE="${SANAD_AUDIO_PROFILE:-builtin}" \
SANAD_DDS_INTERFACE="${SANAD_DDS_INTERFACE:-eth0}" \
PYTHONUNBUFFERED=1
[ -f /usr/lib/aarch64-linux-gnu/libgomp.so.1 ] && export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libgomp.so.1
nohup "$PY" "$APP" > "$LOG" 2>&1 &
sleep 3
if pgrep -f app_p3.py >/dev/null 2>&1; then
echo "P3 started -> http://$(hostname -I | awk '{print $1}'):$PORT (log: $LOG)"
else
echo "P3 failed to start. Last log lines:"; tail -20 "$LOG"
fi
}
_stop() {
pgrep -f app_p3.py >/dev/null 2>&1 || { echo "P3 was not running."; return 0; }
pkill -f app_p3.py 2>/dev/null
for _ in $(seq 1 8); do pgrep -f app_p3.py >/dev/null 2>&1 || break; sleep 1; done
pgrep -f app_p3.py >/dev/null 2>&1 && pkill -9 -f app_p3.py 2>/dev/null
sleep 1
pgrep -f app_p3.py >/dev/null 2>&1 && echo "P3 still running (could not kill)." || echo "P3 stopped."
}
_status() { if pgrep -af app_p3.py; then echo -n "health: "; curl -s --max-time 4 "http://127.0.0.1:$PORT/api/health"; echo; else echo "P3 not running."; fi; }
_logs() { tail -n "${1:-40}" "$LOG" 2>/dev/null || echo "no log at $LOG"; }
case "${1:-}" in
start) _start ;;
stop) _stop ;;
restart) _stop; sleep 2; _start ;;
status) _status ;;
logs) shift; _logs "${1:-40}" ;;
*) echo "usage: $0 {start|stop|restart|status|logs [N]}"; exit 2 ;;
esac

34
requirements.txt Normal file
View File

@ -0,0 +1,34 @@
# Sanad Package 3 — Recognition + Places + Memories — self-contained deps.
# Comms + mask (like P2) + camera vision (opencv). Recognition is Gemini-side
# (in-context primers) — NO local ML / torch / face-embeddings.
# Web dashboard
fastapi
uvicorn[standard]
pydantic
python-multipart
websockets
# IPC bus shim + offline license verification
pyzmq
cryptography
# Numerics + image
numpy
Pillow
# Gemini Live voice + audio I/O
google-genai>=1.0.0
pyaudio
soundfile
requests
# Camera vision — JPEG-encode + USB (/dev/video*) capture. Headless (no GUI/libGL).
# pyrealsense2 is deliberately OMITTED (PyPI wheel's newer glibc ImportErrors on
# slim-bookworm); camera.py probes pyrealsense2 then falls back to cv2 V4L capture,
# so a USB colour camera works via opencv alone. Add pyrealsense2 (built from
# source) only if RealSense-specific depth is required.
opencv-python-headless
# Mask (BLE LED "Shining Mask") — expressions on recognition.
bleak==0.22.3

106
routes_memory.py Normal file
View File

@ -0,0 +1,106 @@
"""/api/memory — visitor-memory CRUD (P3, package-local, mounted by app_p3.py).
The VisitorMemory singleton is resolved lazily from the Project.Sanad.main shim
(app_p3 sets it), so a missing store degrades to 503 rather than crashing.
Kept Python-3.8 compatible.
"""
from __future__ import annotations
from typing import Any, Dict, List, Optional
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
router = APIRouter()
def _mem():
try:
from Project.Sanad.main import memory
except Exception:
memory = None
if memory is None:
raise HTTPException(503, "visitor memory store unavailable")
return memory
class ProfileCreate(BaseModel):
name: str
attributes: Optional[Dict[str, Any]] = None
notes: Optional[str] = ""
tags: Optional[List[str]] = None
linked_face_id: Optional[str] = ""
class ProfileUpdate(BaseModel):
name: Optional[str] = None
attributes: Optional[Dict[str, Any]] = None
notes: Optional[str] = None
tags: Optional[List[str]] = None
linked_face_id: Optional[str] = None
@router.get("/")
async def list_profiles():
return {"ok": True, "profiles": _mem().list()}
@router.post("/")
async def create_profile(payload: ProfileCreate):
if not (payload.name or "").strip():
raise HTTPException(400, "name is required")
prof = _mem().add(payload.name.strip(), attributes=payload.attributes,
notes=payload.notes or "", tags=payload.tags,
linked_face_id=payload.linked_face_id or "")
return {"ok": True, "profile": prof}
@router.get("/status")
async def status():
return _mem().status()
@router.get("/primer")
async def primer():
"""The compact known-visitors summary fed into personalized greetings."""
return {"ok": True, "primer": _mem().load_for_primer()}
@router.get("/by-face/{face_id}")
async def by_face(face_id: str):
prof = _mem().find_by_face(face_id)
if prof is None:
raise HTTPException(404, "no visitor linked to face %s" % face_id)
return {"ok": True, "profile": prof}
@router.get("/{pid}")
async def get_profile(pid: str):
prof = _mem().get(pid)
if prof is None:
raise HTTPException(404, "no visitor %s" % pid)
return {"ok": True, "profile": prof}
@router.put("/{pid}")
async def update_profile(pid: str, payload: ProfileUpdate):
prof = _mem().update(pid, **payload.dict(exclude_unset=True))
if prof is None:
raise HTTPException(404, "no visitor %s" % pid)
return {"ok": True, "profile": prof}
@router.post("/{pid}/touch")
async def touch_profile(pid: str):
prof = _mem().touch(pid)
if prof is None:
raise HTTPException(404, "no visitor %s" % pid)
return {"ok": True, "profile": prof}
@router.delete("/{pid}")
async def delete_profile(pid: str):
ok = _mem().delete(pid)
if not ok:
raise HTTPException(404, "no visitor %s" % pid)
return {"ok": True, "deleted": pid}

250
routes_p3.py Normal file
View File

@ -0,0 +1,250 @@
"""P3-specific dashboard routes (mounted at /api/p2 by app_p2.py).
Same first-class settings as P1 (REUSE Sanad's canonical logic, no fork) — the
one thing the base routes don't do is apply a change to the LIVE Gemini session
immediately by restarting the voice subprocess (the child reads the API key +
persona at spawn time). P3 is a superset of P1, so these are identical to P1's
convenience routes; the premium features (live-voice, wake-phrases, motion,
skills, mask) are served by the vendored Sanad routers mounted alongside.
/api/p2/api-key GET masked status | POST set/update + live-restart
/api/p2/persona GET current persona+rules | POST update persona + live-restart
/api/p2/say speak a typed line (local, or via bus/hwbroker if SANAD_BUS_ADDR)
/api/p2/logs/delete delete all logs
/api/p2/settings one-shot view (api-key + persona + language + audio + live)
Kept Python-3.8 compatible.
"""
from __future__ import annotations
import asyncio
import base64
import os
from fastapi import APIRouter, HTTPException
from Project.Sanad.core.logger import get_logger
from Project.Sanad.dashboard.routes import voice as _voice # reuse api-key logic
from Project.Sanad.dashboard.routes import prompt as _prompt # reuse persona logic
from Project.Sanad.dashboard.routes import typed_replay as _tr # reuse local TTS say
from sanad_pkg.bus import bus
# Bind request models as module-level names so FastAPI resolves body annotations
# cleanly under `from __future__ import annotations`.
ApiKeyPayload = _voice.ApiKeyPayload
PromptUpdate = _prompt.PromptUpdate
SayPayload = _tr.SayPayload
log = get_logger("pkg3.routes")
router = APIRouter()
async def _restart_live_if_running() -> bool:
"""Restart the live Gemini subprocess (if running) so a new key/persona takes
effect immediately. Returns True if it was restarted."""
try:
from Project.Sanad.main import live_sub
is_running = getattr(live_sub, "is_running", None)
if live_sub is None or not callable(is_running) or not is_running():
return False
try:
live_sub.stop()
except Exception:
log.exception("live_sub.stop() failed")
try:
await asyncio.to_thread(live_sub.start)
return True
except Exception:
log.exception("live_sub.start() failed")
return False
except Exception:
log.exception("could not restart live subprocess")
return False
# ─────────────────────────── Gemini API key ───────────────────────────
def _persist_and_hotswap_key(key: str) -> None:
"""Persist gemini.api_key to data/motions/config.json (empty => remove) and
hot-swap the in-memory globals. Patches BOTH Project.Sanad.config and
gemini.client (each binds its own ref)."""
from Project.Sanad.config import load_config, save_config
import Project.Sanad.config as _cfg_mod
cfg = load_config() or {}
g = cfg.get("gemini") if isinstance(cfg.get("gemini"), dict) else {}
if key:
g["api_key"] = key
else:
g.pop("api_key", None)
cfg["gemini"] = g
save_config(cfg)
_cfg_mod.GEMINI_API_KEY = key
try:
import Project.Sanad.gemini.client as _gc
_gc.GEMINI_API_KEY = key
except Exception:
log.exception("could not patch gemini.client.GEMINI_API_KEY")
async def _disconnect_voice():
try:
from Project.Sanad.main import voice_client
if voice_client is not None and getattr(voice_client, "connected", False):
await voice_client.disconnect()
except Exception:
log.exception("voice_client disconnect failed")
@router.get("/api-key")
async def p3_get_api_key():
"""Masked current key + where it came from (delegates to the voice route)."""
return await _voice.get_api_key()
@router.post("/api-key")
async def p3_set_api_key(payload: ApiKeyPayload):
"""ADD / update the Gemini API key. Relaxed validation — accepts AIza… and
AQ. / ephemeral tokens. Persists + hot-swaps + restarts the live session."""
key = (payload.api_key or "").strip()
if len(key) < 10:
raise HTTPException(400, "API key looks too short (paste the full key).")
_persist_and_hotswap_key(key)
await _disconnect_voice()
restarted = await _restart_live_if_running()
return {
"ok": True,
"masked": _voice._mask_api_key(key),
"source": "config_file",
"live_subprocess_restarted": restarted,
"message": "API key added" + (" and applied (live session restarted)."
if restarted else " — start the session to use it."),
}
@router.post("/api-key/delete")
async def p3_delete_api_key():
"""DELETE the Gemini API key — clears it from data/motions/config.json and
in-memory. Conversation stops until a new key is added."""
_persist_and_hotswap_key("")
await _disconnect_voice()
restarted = await _restart_live_if_running()
return {
"ok": True,
"deleted": True,
"live_subprocess_restarted": restarted,
"message": "API key deleted. Add a new key to re-enable conversation.",
}
# ─────────────────────────── Robot persona ───────────────────────────
@router.get("/persona")
async def p3_get_persona():
"""Current persona system prompt + parsed rules + file paths."""
return await _prompt.get_prompt()
@router.post("/persona")
async def p3_set_persona(payload: PromptUpdate):
"""Change the robot persona — write scripts/sanad_script.txt (canonical prompt
logic) and restart the live session so it speaks with the new persona
immediately. The persona is also where you steer language/dialect (P3 = the
bilingual multilingual prompt by default)."""
result = await _prompt.update_prompt(payload)
restarted = await _restart_live_if_running()
result["live_subprocess_restarted"] = restarted
result["message"] = (
"Persona saved and applied — live session restarted."
if restarted else
"Persona saved. Start (or restart) the live session to use the new persona."
)
return result
# ─────────────────────────── say a line ───────────────────────────
@router.post("/say")
async def p3_say(payload: SayPayload):
"""Speak a typed line. Standalone (no bus) → play locally via Sanad's
typed-replay. Multi-package (SANAD_BUS_ADDR set) synth via Gemini and hand
the PCM to the hwbroker `speak.request` (audio-lock)."""
text = (payload.text or "").strip()
if not text:
raise HTTPException(400, "text cannot be empty")
if not os.environ.get("SANAD_BUS_ADDR"):
return await _tr.say(payload)
from Project.Sanad.main import voice_client
if voice_client is None:
raise HTTPException(503, "voice client unavailable")
if not getattr(voice_client, "connected", False):
try:
await voice_client.connect()
except Exception as exc:
raise HTTPException(503, "Gemini connect failed: %s" % exc)
try:
audio, _parts = await voice_client.send_text(text, owner="p3_say")
except Exception as exc:
raise HTTPException(502, "Gemini error: %s" % exc)
if not audio:
return {"ok": False, "routed": "hwbroker", "reason": "no audio produced"}
bus.emit_sync("speak.request", owner="p3",
pcm_b64=base64.b64encode(audio).decode("ascii"),
rate=24000, channels=1, sampwidth=2)
return {"ok": True, "routed": "hwbroker (audio-lock)"}
# ─────────────────────────── logs ───────────────────────────
@router.post("/logs/delete")
async def p3_delete_logs():
"""Delete all log files on the robot. Active .log files are truncated; rotated/
snapshot/bundle files are removed."""
from Project.Sanad.config import LOGS_DIR
cleared = []
try:
for p in sorted(LOGS_DIR.glob("*.log*")):
try:
if p.name.endswith(".log") and "_snapshot_" not in p.name:
open(p, "w").close()
else:
p.unlink()
cleared.append(p.name)
except Exception:
log.exception("could not clear log %s", p.name)
except Exception:
log.exception("delete logs failed")
return {"ok": True, "count": len(cleared), "cleared": cleared}
# ─────────────────────────── combined view ───────────────────────────
@router.get("/settings")
async def p3_settings():
"""One-shot P3 settings: api-key status + persona + language + audio + live + mask."""
key_status = await _voice.get_api_key()
persona = ""
try:
persona = _prompt._load_system_prompt()
except Exception:
log.exception("could not load persona")
live_running = False
try:
from Project.Sanad.main import live_sub
is_running = getattr(live_sub, "is_running", None)
live_running = bool(live_sub is not None and callable(is_running) and is_running())
except Exception:
pass
mask_status = None
try:
from Project.Sanad.main import mask_face
if mask_face is not None and hasattr(mask_face, "status"):
mask_status = mask_face.status()
except Exception:
pass
return {
"package": "P3",
"api_key": key_status,
"persona_preview": (persona[:400] + ("" if len(persona) > 400 else "")),
"persona_chars": len(persona),
"language": os.environ.get("SANAD_LANGUAGE", "") or "(multilingual auto-detect)",
"audio_profile": os.environ.get("SANAD_AUDIO_PROFILE", "builtin"),
"live_running": live_running,
"mask": mask_status,
}

24
strip_key.py Normal file
View File

@ -0,0 +1,24 @@
#!/usr/bin/env python3
"""Build-time: blank any Gemini API key baked into the Sanad config so the P2
image ships KEYLESS the customer adds their own via the dashboard. Idempotent
and best-effort (never fails the build)."""
import json
BASE = "/app/Sanad"
for rel, section in (("config/core_config.json", "gemini_defaults"),
("data/motions/config.json", "gemini")):
path = "%s/%s" % (BASE, rel)
try:
with open(path) as f:
d = json.load(f)
except Exception:
continue
sec = d.get(section)
if isinstance(sec, dict) and sec.get("api_key"):
sec["api_key"] = ""
try:
with open(path, "w") as f:
json.dump(d, f, ensure_ascii=False, indent=2)
print("strip_key: blanked %s.api_key in %s" % (section, rel))
except Exception as exc:
print("strip_key: could not write %s: %s" % (rel, exc))

83
sync_vendor.sh Executable file
View File

@ -0,0 +1,83 @@
#!/usr/bin/env bash
# Refresh the vendored SanadV3 engine + sanad_pkg + flat BLE Mask lib from a full
# monorepo checkout. P2 ships SELF-CONTAINED copies under ./vendor so the repo
# builds standalone. When SanadV3 or Project/Mask change upstream, run this from a
# checkout that has Project/Sanadv3 + Project/Mask + Project/Packages, then commit
# the updated ./vendor + ./data + ./license.
#
# ./sync_vendor.sh [/path/to/Project] # default: ../../ (Packages/.. = Project/)
#
# Excludes runtime data, Logs, caches, the temp3d 3D viewer, and tests.
set -euo pipefail
HERE="$(cd "$(dirname "$0")" && pwd)"
PROJECT="${1:-$(cd "$HERE/../.." && pwd)}" # = Project/G1 (packages live under G1/)
SRC_V3="$PROJECT/Sanadv3"
SRC_PKG="$PROJECT/Packages/sanad_pkg"
SRC_LIC="$PROJECT/Packages/licensing"
# The flat Mask lib moved OUT of the robot tree in the G1/ reorg (now Project/Other/Mask).
# Try a few locations; override with SANAD_MASK_SRC=/abs/path if it moves again.
SRC_MASK="${SANAD_MASK_SRC:-}"
if [ -z "$SRC_MASK" ]; then
for c in "$PROJECT/Mask" "$PROJECT/../Other/Mask" "$PROJECT/Other/Mask" "$(dirname "$PROJECT")/Other/Mask"; do
[ -d "$c" ] && { SRC_MASK="$(cd "$c" && pwd)"; break; }
done
fi
[ -d "$SRC_V3" ] || { echo "ERROR: no Sanadv3/ at $SRC_V3"; exit 1; }
[ -d "$SRC_MASK" ] || { echo "ERROR: Mask lib not found (set SANAD_MASK_SRC=/path/to/Mask)"; exit 1; }
echo ">> using Mask lib: $SRC_MASK"
[ -d "$SRC_PKG" ] || { echo "ERROR: no sanad_pkg at $SRC_PKG"; exit 1; }
echo ">> vendoring SanadV3 engine from $SRC_V3"
rm -rf "$HERE/vendor"; mkdir -p "$HERE/vendor"
rsync -a \
--exclude 'data/' --exclude 'Logs/' --exclude '__pycache__/' --exclude '*.pyc' \
--exclude '.git/' --exclude 'dashboard/static/temp3d/' --exclude 'tests/' \
"$SRC_V3/" "$HERE/vendor/Sanad/"
echo ">> seeding minimal data/"
mkdir -p "$HERE/vendor/Sanad/data/motions"
cp "$SRC_V3/data/motions/config.json" "$HERE/vendor/Sanad/data/motions/config.json"
for j in audio_device.json camera_device.json wake_phrases.json; do
[ -f "$SRC_V3/data/$j" ] && cp "$SRC_V3/data/$j" "$HERE/vendor/Sanad/data/$j" || true
done
for d in recordings audio faces photos zones memories; do mkdir -p "$HERE/vendor/Sanad/data/$d"; touch "$HERE/vendor/Sanad/data/$d/.gitkeep"; done
echo ">> vendoring sanad_pkg + public key"
rm -rf "$HERE/vendor/sanad_pkg"; cp -r "$SRC_PKG" "$HERE/vendor/sanad_pkg"
find "$HERE/vendor/sanad_pkg" -name __pycache__ -type d -prune -exec rm -rf {} + 2>/dev/null || true
mkdir -p "$HERE/license"; cp "$SRC_LIC/pubkey.ed25519" "$HERE/license/pubkey.ed25519"
echo ">> vendoring flat BLE Mask lib (own path)"
rm -rf "$HERE/vendor/mask"
rsync -a --exclude '__pycache__/' --exclude '*.pyc' --exclude '.git/' \
--exclude 'test_*.py' --exclude 'selftest.py' "$SRC_MASK/" "$HERE/vendor/mask/"
echo ">> ship keyless (blank any baked Gemini key)"
python3 - "$HERE" <<'PY'
import json, sys
h = sys.argv[1]
for p, sec in ((h+"/vendor/Sanad/config/core_config.json", "gemini_defaults"),
(h+"/vendor/Sanad/data/motions/config.json", "gemini")):
try:
d = json.load(open(p))
except Exception:
continue
s = d.get(sec)
if isinstance(s, dict) and s.get("api_key"):
s["api_key"] = ""
json.dump(d, open(p, "w"), ensure_ascii=False, indent=2)
print(" blanked", sec, "in", p)
PY
echo ">> refresh ./data seed mirror (keep structure, drop runtime media)"
rsync -a --delete \
--exclude 'recordings/*' --exclude 'audio/*' --exclude 'faces/*' --exclude 'photos/*' \
"$HERE/vendor/Sanad/data/" "$HERE/data/"
for d in recordings audio faces photos zones memories; do mkdir -p "$HERE/data/$d"; touch "$HERE/data/$d/.gitkeep"; done
echo ">> refresh host mask_config.json seed (the mask-color persistence mount)"
# Only this one file — do NOT touch the hand-written config/p2_config.json.
cp "$HERE/vendor/Sanad/config/mask_config.json" "$HERE/config/mask_config.json"
echo ">> done. vendor: $(du -sh "$HERE/vendor" | cut -f1) — review & commit ./vendor ./data ./config ./license"

36
test_p3.sh Executable file
View File

@ -0,0 +1,36 @@
#!/usr/bin/env bash
# Smoke-test Sanad Package 3. Usage: ./test_p3.sh [host:port] (default 127.0.0.1:8013)
H="${1:-127.0.0.1:8013}"; B="http://$H"; pass=0; fail=0
chk(){ local code
if [ "$1" = GET ]; then code=$(curl -s -o /dev/null -w '%{http_code}' --max-time 8 "$B$2")
else code=$(curl -s -o /dev/null -w '%{http_code}' --max-time 8 -X "$1" "$B$2"); fi
if [ "$code" = "$3" ]; then printf " PASS [%s] %-34s %s\n" "$code" "$1 $2" "$4"; pass=$((pass+1))
else printf " FAIL [%s≠%s] %-30s %s\n" "$code" "$3" "$1 $2" "$4"; fail=$((fail+1)); fi
}
echo "== Sanad P3 smoke test @ $B =="
chk GET /api/health 200 "health"
chk GET /api/package 200 "manifest + license + features"
chk GET /api/p3/api-key 200 "key status (masked)"
chk GET /api/p3/settings 200 "combined settings"
chk GET /api/recognition/state 200 "recognition state"
chk GET /api/zones/ 200 "places / zones list"
chk GET /api/mask/status 200 "LED mask status"
chk GET /api/memory/ 200 "visitor memory list"
chk GET /api/memory/status 200 "memory store status"
chk GET /api/memory/primer 200 "greeting primer"
chk GET /api/live-subprocess/status 200 "conversation status"
chk GET /api/system/info 200 "system info"
chk GET /api/logs/ 200 "logs list"
echo "== $pass passed, $fail failed =="
echo "-- manifest --"
curl -s --max-time 6 "$B/api/package" | python3 -c '
import sys, json
d = json.load(sys.stdin); lic = d.get("license") or {}
print(" package :", d.get("package"))
print(" license :", lic.get("valid"), " packages:", lic.get("packages"))
print(" features:", d.get("features"))
' 2>/dev/null || true
echo "-- memory roundtrip (create -> list -> delete) --"
id=$(curl -s --max-time 6 -X POST "$B/api/memory/" -H 'Content-Type: application/json' -d '{"name":"Test VIP","attributes":{"vip":true},"notes":"smoke"}' | python3 -c "import sys,json;print(json.load(sys.stdin).get('profile',{}).get('id',''))" 2>/dev/null)
echo " created id: ${id:-<none>}"
[ -n "$id" ] && curl -s --max-time 6 -X DELETE "$B/api/memory/$id" >/dev/null && echo " deleted ok"

7
vendor/Sanad/.claude/settings.json vendored Normal file
View File

@ -0,0 +1,7 @@
{
"permissions": {
"allow": [
"Bash(node -e ' *)"
]
}
}

4
vendor/Sanad/.gitignore vendored Normal file
View File

@ -0,0 +1,4 @@
__pycache__/
*.pyc
Logs/
*.log

136
vendor/Sanad/FEATURES.md vendored Normal file
View File

@ -0,0 +1,136 @@
# SanadV3 — Feature Catalog
Sanad is a bilingual (Arabic/English) humanoid receptionist/assistant on a
Unitree G1 (Jetson Orin NX, ROS 2 Foxy, Livox MID-360). This catalogs
**what's built today** (Part A) and **what's on the roadmap** (Part B).
---
# Part A — Current features (built & running)
Verified from the live subsystem registry (19 subsystems), dashboard tabs
(12), and API routers (22).
## 1. Voice & Conversation
- **Gemini live voice** — real-time bilingual AR/EN spoken conversation (native-audio model)
- **Offline brain** — local pipeline via `ollama` (`SANAD_VOICE_BRAIN=local`), no cloud
- **Wake phrases** — configurable wake-word manager
- **Typed replay** — type text, robot speaks it (with speaker-monitor capture)
- **Local TTS** — on-device text-to-speech engine
- **Prompt management** — edit the system prompt from the dashboard
- **Lip-sync** — mask mouth driven by TTS `MOUTH` markers
- **Barge-in** — interrupt speech (volume-scaled threshold)
## 2. Motion & Arm
- **35 arm actions** — 16 SDK built-ins + 19 custom JSONL motions
- **Macro record / playback** — capture and replay motion sequences
- **Teaching mode** — kinesthetic teach-and-repeat
- **Skills** — composed higher-level behaviors (skills.json)
- **Movement dispatch** — voice → motion (53 fixed + 10 parametric phrases, cooldown-gated)
- **Arm motion-block** — auto-inhibits arm moves while locomotion is active (safety interlock)
## 3. Locomotion
- **LocoClient + MotionSwitcher** — walk / pose control via Unitree SDK (eth0)
- **E-STOP** — dashboard kill button
- **Single Ctrl+C teardown** — one signal cleanly stops every subsystem (~2s)
## 4. LED Face Mask
- **Animated expressions** — neutral, smile, blink, look L/R, talk13, surprised, sad
- **Gestural-speaking events** — face reacts while speaking
- **Lip-sync** — mouth animates to speech
## 5. Vision & Recognition
- **Face recognition** — identify people via camera
- **Face gallery** — enroll/manage known faces
- **Zone gallery / zones** — visual zone recognition
- **Camera feed** — attached to the live voice subprocess (vision-in-the-loop)
## 6. Navigation (web_nav3 integration)
- **Live Map tab** — full embedded web_nav3 dashboard (set-pose, goals, bringup)
- **Navigation tab** — native canvas viewer (saved/live map, places, missions)
- **map_relay** — re-publishes the latched `/map` @1Hz so the map renders even when stationary
- **Saved maps** — load & view a pre-built `.db` (localize mode)
- **Places** — save named poses, one-click "Go"
- **Missions** — multi-waypoint routes (defined in web_nav3)
- **Cancel goal** — stop an active goal without tearing down bringup
- **SLAM** — RTABMap LiDAR-ICP, drift-corrected mapping/localization
## 7. Audio
- **Device manager** — sink/source selection, live refresh
- **Audio profiles** — builtin / anker / hollyland_builtin (auto-switch on plug/unplug)
## 8. Operations, System & Diagnostics
- **System control** — start/stop subsystems, status
- **Temperature monitor** — motor temps (live websocket stream)
- **Controller** — gamepad/teleop input
- **Web terminal** — shell in the browser (websocket)
- **Logs** — live log stream
- **Recordings & replay** — record/playback sessions
- **Scripts** — run saved scripts
## Dashboard infrastructure
- 12 tabs, **fault-isolated routers** (one broken module never breaks the dashboard)
- WebSocket streams: log_stream, motor_temps, terminal
- No-store HTML (no stale-cache 404s after deploy)
- Lazy subsystem imports (missing dep → that subsystem unavailable, rest runs)
---
# Part B — Roadmap (to add)
Tiers = priority. 🏗️ = load-bearing · ⚠️ = Foxy constraint.
## Tier 1 — Autonomous behaviors (the product)
1. **Voice-driven navigation** — "Sanad, go to the lobby" → nav goal
2. **Greeter mission** — recognized face → navigate → greet → express
3. **Named-person greeting** — identity → personalized line
4. **Patrol / guided tours** — ordered places, speech at each stop
5. **Return-to-base / dock-on-idle** — auto-home on idle/low battery
## Tier 2 — Navigation & map (harden + edit)
6. 🏗️ **Map republish relay** — ✅ DONE (map_relay)
7. **Click-to-goal on Nav tab canvas**
8. **Live nav telemetry** — distance/ETA/waypoint, "arrived" toast
9. **Battery + nav-state status bar**
10. **Geofence zones on the map**
11. **Cancel-goal button** — ✅ DONE
### Map editing & annotation (all build on #6)
12. **Erase tool** — paint cells free; wipe ghost obstacles + the SLAM "spokes"
13. **Obstacle paint ("black points" / virtual walls)** — ⚠️ Foxy-safe KeepoutFilter substitute
14. **Shape tools + brush size** — line/rectangle/polygon
15. **Non-destructive overlay + undo/redo**
16. **Persist & auto-reload edits per map**
17. **Crop / trim map bounds**
## Tier 3 — Voice & interaction
18. **Barge-in from dashboard**
19. **Quick-phrase soundboard**
20. **Conversation memory / visitor log**
21. **Per-speaker AR/EN auto-detect**
22. **Scheduled announcements**
23. **Bake edited map → PGM/YAML** (static map_server deploy)
## Tier 4 — Face & presence
24. **Gaze / head-track recognized face**
25. **Emotion-from-context** (sentiment → expression)
26. **Idle breathing / look-around**
27. **Lip-sync to TTS amplitude** (enhance existing markers)
## Tier 5 — Operator, fleet & reliability
28. 🏗️ **Global E-STOP button** — ✅ exists; surface consistently
29. **Health watchdog** — auto-restart dead subsystem + alert
30. **Per-subsystem enable/disable toggles**
31. **Behavior recorder → replay** (nav+voice timelines)
32. **Mission editor UI** (visual sequence builder)
33. **Remote access / tunnel**
34. **Reverse-proxy web_nav3 through :8001** — one origin, no iframe cross-port issues
## Tier 6 — Future / blocked
35. **Speed / caution zones** — needs Galactic SpeedFilter or custom layer
36. **Multi-robot fleet** (SanadV3 ↔ BotBrain) — needs LocoClient arbitration + coordinator
---
### Recommended next build order
**#1 voice→nav** → **#2 greeter mission** (the product), then **#12/#13 map editing**
(clean the spokes + virtual walls). #6 republish relay and #11 cancel are already done.

12
vendor/Sanad/G1_Controller/__init__.py vendored Normal file
View File

@ -0,0 +1,12 @@
"""G1_Controller — manual dashboard locomotion control (N2 Phase 1).
`LocoController` wraps the Unitree `LocoClient` + `MotionSwitcherClient` for
operator-driven walking, postures and a discrete step pad. It reuses the arm
controller's single process-wide DDS init (one `ChannelFactoryInitialize`) and
is gated behind an in-memory "Enable movement" arm flag that defaults OFF every
boot. See dashboard/routes/controller.py for the REST surface.
"""
from Project.Sanad.G1_Controller.loco_controller import LocoController
__all__ = ["LocoController"]

View File

@ -0,0 +1,593 @@
"""LocoController — manual G1 locomotion via the Unitree LocoClient (N2 Phase 1).
Ported from the proven scripts in G1_Lootah/Controller (g1_mode_controller.py,
keyboard_controller.py, hanger_boot_sequence.py). Design notes:
* **One DDS init per process.** The arm controller owns the single
`ChannelFactoryInitialize(0, nic)` (motion/arm_controller.py). This class
NEVER initialises DDS it lazily builds its `LocoClient` /
`MotionSwitcherClient` only after `arm._initialized` is True.
* **Default DISARMED.** `_armed` starts False every boot and gates every WRITE
method. Reads (status / fsm / joints), E-STOP and disarm are ALWAYS allowed.
* **StopMove watchdog.** Continuous `Move(..., True)` never self-terminates, so a
daemon thread StopMoves if no `move()` refresh arrives within
`watchdog_timeout_sec`. The frontend re-sends setpoints at ~10 Hz, so a tab
close / network drop trips the watchdog within the timeout.
* **Velocity caps.** Symmetric clamp on vx/vy/vyaw Walk 0.6, Run 1.2.
* **Allow-anytime-warn.** move/step never hard-block on FSM; if not walk-ready
they still execute but return a `warning`.
* **Sim fallback.** When `unitree_sdk2py` is absent (workstation), every write
returns `{"simulated": True}` (never raises) so the whole UI is testable.
SDK facts confirmed from source do not "fix" them:
* `LocoClient.Move(vx, vy, vyaw, True)` the continuous-mode kwarg is misspelled
`continous_move` (one n); we pass it POSITIONALLY to avoid a TypeError.
* `LocoClient` has NO StandUp()/Squat() use SetFsmId(4)/SetFsmId(2).
* FSM id / mode are read via the private RPC `bot._Call(7001/7002, "{}")`.
"""
from __future__ import annotations
import json
import threading
import time
from typing import Any, Optional
from Project.Sanad.core.config_loader import section as _cfg_section
from Project.Sanad.core.logger import get_logger
log = get_logger("loco_controller")
# -- SDK import (optional) -----------------------------------------------------
try:
from unitree_sdk2py.g1.loco.g1_loco_client import LocoClient
from unitree_sdk2py.comm.motion_switcher.motion_switcher_client import (
MotionSwitcherClient,
)
_HAS_SDK = True
except ImportError:
LocoClient = None
MotionSwitcherClient = None
_HAS_SDK = False
log.warning("Unitree SDK not available — LocoController in simulation mode")
# LocoClient general RPC api-ids for FSM read-back (stable across SDK builds).
ROBOT_API_ID_LOCO_GET_FSM_ID = 7001
ROBOT_API_ID_LOCO_GET_FSM_MODE = 7002
# G1 29-DoF joint names for indices 12-28 (0-11 legs, 12-14 waist, 15-21 left
# arm, 22-28 right arm). Used by the Diagnostics joint read-out.
JOINT_NAMES = {
12: "WAIST_YAW", 13: "WAIST_ROLL", 14: "WAIST_PITCH",
15: "L_SHOULDER_PITCH", 16: "L_SHOULDER_ROLL", 17: "L_SHOULDER_YAW",
18: "L_ELBOW", 19: "L_WRIST_ROLL", 20: "L_WRIST_PITCH", 21: "L_WRIST_YAW",
22: "R_SHOULDER_PITCH", 23: "R_SHOULDER_ROLL", 24: "R_SHOULDER_YAW",
25: "R_ELBOW", 26: "R_WRIST_ROLL", 27: "R_WRIST_PITCH", 28: "R_WRIST_YAW",
}
# Discrete step pad — (vx, vy, vyaw) sign per direction; magnitude is
# step_speed_frac * cap_walk (a gentle single step).
_STEP_DIRS = {
"forward": (1.0, 0.0, 0.0),
"backward": (-1.0, 0.0, 0.0),
"slide_left": (0.0, 1.0, 0.0),
"slide_right": (0.0, -1.0, 0.0),
"rotate_left": (0.0, 0.0, 1.0),
"rotate_right": (0.0, 0.0, -1.0),
}
_POSTURES = (
"zero_torque", "damp", "stand_up", "squat", "sit",
"low_stand", "high_stand", "lie_to_stand",
)
class LocoController:
"""Thread-safe manual locomotion control with a simulation fallback."""
def __init__(self, arm=None):
self._arm = arm # shared ArmController (owns the ONE DDS init)
self._bot = None # LocoClient (lazy)
self._msc = None # MotionSwitcherClient (lazy)
self._lc_ready = False
self._lock = threading.RLock() # serialise all loco client WRITE calls
self._armed = False # in-memory MANUAL gate — OFF every boot
self._cur_v = (0.0, 0.0, 0.0) # last commanded (vx, vy, vyaw)
self._teleop_active = False
self._last_msc_mode: Optional[str] = None
# watchdog
self._last_move_ts = 0.0
self._wd_thread: Optional[threading.Thread] = None
self._wd_stop = threading.Event()
self._wd_stop.set() # not running until armed
# Monotonic stop-generation counter, bumped under _lock by
# estop/stop/disarm. move()/step()/prep_mode() capture it at start and
# bail the instant it changes — so E-STOP preempts an in-flight motion
# immediately AND can never be silently "un-cancelled" by a concurrent
# command (a lock-free Event clear() could; an int compare under the
# lock cannot).
self._stop_gen = 0
# Serializes the discrete blocking operations (step/prep_mode) so two
# can't overlap and interleave Move commands. Continuous teleop move()
# is intentionally NOT guarded by this.
self._discrete_busy = False
cfg = _cfg_section("motion", "loco_controller")
self._cap_walk = float(cfg.get("cap_walk", 0.6))
self._cap_run = float(cfg.get("cap_run", 1.2))
self._lin_step = float(cfg.get("lin_step", 0.05))
self._ang_step = float(cfg.get("ang_step", 0.2))
self._wd_timeout = float(cfg.get("watchdog_timeout_sec", 0.5))
self._block_window = float(cfg.get("arm_block_window_sec", 1.5))
self._step_dur = float(cfg.get("step_duration_sec", 0.6))
self._step_frac = float(cfg.get("step_speed_frac", 0.5))
self._loco_timeout = float(cfg.get("loco_timeout_sec", 10.0))
self._msc_timeout = float(cfg.get("msc_timeout_sec", 5.0))
# ── client lifecycle ─────────────────────────────────────────────────────
def _ensure_client(self) -> bool:
"""Lazily build LocoClient + MotionSwitcherClient. Returns readiness.
Never initialises DDS requires the shared arm to have already run the
single ChannelFactoryInitialize.
"""
if not _HAS_SDK:
return False
if self._lc_ready:
return True
if self._arm is None or not getattr(self._arm, "_initialized", False):
return False
with self._lock:
if self._lc_ready:
return True
try:
bot = LocoClient()
bot.SetTimeout(self._loco_timeout)
bot.Init()
msc = MotionSwitcherClient()
msc.SetTimeout(self._msc_timeout)
msc.Init()
self._bot = bot
self._msc = msc
self._lc_ready = True
log.info("LocoClient + MotionSwitcherClient ready")
except Exception as exc:
log.error("LocoClient init failed: %s", exc)
self._lc_ready = False
return self._lc_ready
def _safe_call(self, name: str, fn, *a, **kw):
try:
return True, fn(*a, **kw)
except Exception as exc:
log.error("%s failed: %s", name, exc)
return False, None
def _rpc_get_int(self, api_id: int):
bot = self._bot
if bot is None:
return None
try:
code, data = bot._Call(api_id, "{}")
if code == 0 and data:
return json.loads(data).get("data")
except Exception:
pass
return None
@staticmethod
def _clamp(v: float, cap: float) -> float:
return max(-cap, min(cap, float(v)))
# ── FSM / readiness ──────────────────────────────────────────────────────
def fsm_id(self):
return self._rpc_get_int(ROBOT_API_ID_LOCO_GET_FSM_ID)
def fsm_mode(self):
return self._rpc_get_int(ROBOT_API_ID_LOCO_GET_FSM_MODE)
def _walk_ready_warning(self) -> Optional[str]:
"""allow-anytime-warn: None when ready, else a human message."""
if not self._lc_ready:
return None
fid = self.fsm_id()
fmode = self.fsm_mode()
if fid == 200 and fmode not in (None, 2):
return None
return (f"Robot not in walk-ready FSM (id={fid}, mode={fmode}). "
f"Command sent anyway.")
# ── arm flag + watchdog ──────────────────────────────────────────────────
def is_armed(self) -> bool:
return self._armed
def movement_active(self) -> bool:
"""True when the robot may be walking: manual armed, teleop active, OR a
move/step issued within the block window. Used as the arm's motion-block
predicate so the arm never replays while the robot is (or just was)
moving regardless of whether the MANUAL gate or the GEMINI gate
(Phase 3 voice dispatch, which calls move/step directly) triggered it."""
if self._armed or self._teleop_active:
return True
return (time.monotonic() - self._last_move_ts) < self._block_window
def arm_movement(self) -> dict:
"""Unlock manual control. Cancels any in-flight arm motion first so the
arm and locomotion are never active simultaneously (movement wins)."""
try:
if self._arm is not None and getattr(self._arm, "is_busy", False):
log.info("arming movement — cancelling in-flight arm motion")
self._arm.cancel()
except Exception:
log.exception("arm.cancel() on arm_movement failed")
with self._lock:
self._armed = True
self._start_watchdog()
log.info("movement ARMED")
return {"ok": True, "armed": True}
def disarm_movement(self) -> dict:
with self._lock:
self._stop_gen += 1 # break any in-flight step/prep/move
self._armed = False
self._teleop_active = False
self._wd_stop.set()
try:
self._raw_stop()
except Exception:
log.exception("StopMove on disarm failed")
log.info("movement DISARMED")
return {"ok": True, "armed": False}
def _start_watchdog(self):
self._wd_stop.clear()
if self._wd_thread is None or not self._wd_thread.is_alive():
self._wd_thread = threading.Thread(
target=self._watchdog_loop, daemon=True, name="loco-watchdog")
self._wd_thread.start()
def _watchdog_loop(self):
period = max(0.02, min(0.1, self._wd_timeout / 2.0))
while not self._wd_stop.is_set():
fire = False
park = False
# Read-and-decide under the lock (atomic check-then-act); the actual
# StopMove runs after release so the critical section stays tiny.
with self._lock:
if self._teleop_active and (time.monotonic() - self._last_move_ts) > self._wd_timeout:
self._teleop_active = False
fire = True
# Self-park once there's nothing left to guard. The Gemini
# dispatch path uses step() directly and never calls
# disarm_movement(), so without this the watchdog would spin for
# the rest of the process lifetime after the first voice step.
if not self._armed and not self._teleop_active and not self._discrete_busy:
park = True
if fire:
log.warning("watchdog: teleop setpoint stale (>%.2fs) — StopMove",
self._wd_timeout)
try:
self._raw_stop()
except Exception:
log.exception("watchdog StopMove failed")
if park:
# Nothing left to guard — stop the thread (a later move/step
# re-arms it via _start_watchdog()). Done AFTER any stale-stop
# above so we never skip a pending StopMove.
self._wd_stop.set()
break
self._wd_stop.wait(period)
def _raw_stop(self) -> bool:
"""Issue StopMove if the client is up; no-op in sim. Lock-light."""
if not self._lc_ready or self._bot is None:
return False
with self._lock:
ok, _ = self._safe_call("StopMove", self._bot.StopMove)
return ok
# ── movement ─────────────────────────────────────────────────────────────
def move(self, vx: float, vy: float, vyaw: float, run: bool = False) -> dict:
cap = self._cap_run if run else self._cap_walk
cvx, cvy, cvyaw = self._clamp(vx, cap), self._clamp(vy, cap), self._clamp(vyaw, cap)
capped = (cvx, cvy, cvyaw) != (float(vx), float(vy), float(vyaw))
warning = self._walk_ready_warning()
sent = {"vx": cvx, "vy": cvy, "vyaw": cvyaw}
with self._lock:
my_gen = self._stop_gen # capture under lock
if not self._ensure_client():
with self._lock: # sim: record intent for UI/watchdog
self._cur_v = (cvx, cvy, cvyaw)
self._last_move_ts = time.monotonic()
self._teleop_active = True
self._start_watchdog()
return {"ok": True, "sent": sent, "capped": capped,
"warning": warning, "simulated": True}
with self._lock:
# If an E-STOP / stop / disarm landed since we captured my_gen, do NOT
# (re)command velocity — and do NOT stamp the motion flags (so a
# cancelled tick doesn't extend the arm-block window).
if self._stop_gen != my_gen:
return {"ok": False, "cancelled": True, "sent": sent,
"capped": capped, "warning": warning, "simulated": False}
self._cur_v = (cvx, cvy, cvyaw)
self._last_move_ts = time.monotonic()
self._teleop_active = True
self._safe_call("SetBalanceMode", self._bot.SetBalanceMode, 1)
ok, _ = self._safe_call("Move", self._bot.Move, cvx, cvy, cvyaw, True)
self._start_watchdog()
return {"ok": bool(ok), "sent": sent, "capped": capped,
"warning": warning, "simulated": False}
def stop_move(self) -> dict:
"""Halt translation/rotation. Allowed even when disarmed."""
with self._lock:
self._stop_gen += 1
self._teleop_active = False
if not self._ensure_client():
return {"ok": True, "simulated": True}
ok = self._raw_stop()
return {"ok": bool(ok), "simulated": False}
def estop(self) -> dict:
"""Emergency stop = StopMove only (no Damp / FSM change → keeps posture).
ALWAYS allowed, even disarmed and in sim. Bumps the stop generation so any
in-flight move()/step()/prep_mode() bails immediately (no lock wait)."""
with self._lock:
self._stop_gen += 1
self._teleop_active = False
self._cur_v = (0.0, 0.0, 0.0)
if not self._ensure_client():
log.warning("E-STOP (sim)")
return {"ok": True, "simulated": True}
ok = self._raw_stop()
log.warning("E-STOP — StopMove issued")
return {"ok": bool(ok), "simulated": False}
def step(self, direction: str) -> dict:
"""Discrete one-step pad: Move for step_duration then StopMove.
Blocking (~step_duration); call via asyncio.to_thread from the route.
The sleep loop does NOT hold self._lock, so E-STOP / StopMove (which take
the lock briefly) preempt it immediately; the loop also bails the moment
the stop generation changes."""
if direction not in _STEP_DIRS:
return {"ok": False, "reason": f"unknown direction: {direction}"}
sx, sy, syaw = _STEP_DIRS[direction]
k = self._cap_walk * self._step_frac
vx, vy, vyaw = sx * k, sy * k, syaw * k
warning = self._walk_ready_warning()
with self._lock:
if self._discrete_busy:
return {"ok": False, "dir": direction, "reason": "busy",
"warning": warning, "simulated": not self._lc_ready}
self._discrete_busy = True
my_gen = self._stop_gen
self._last_move_ts = time.monotonic()
self._teleop_active = True
self._start_watchdog()
if not self._ensure_client():
with self._lock:
self._teleop_active = False
self._discrete_busy = False
return {"ok": True, "dir": direction, "warning": warning, "simulated": True}
try:
with self._lock:
if self._stop_gen != my_gen: # stopped before we began
return {"ok": False, "dir": direction, "cancelled": True,
"warning": warning, "simulated": False}
self._safe_call("SetBalanceMode", self._bot.SetBalanceMode, 1)
self._safe_call("Move", self._bot.Move, vx, vy, vyaw, True)
t_end = time.monotonic() + self._step_dur
while time.monotonic() < t_end:
if self._stop_gen != my_gen:
break
with self._lock:
self._last_move_ts = time.monotonic() # keep watchdog fed
time.sleep(0.05)
finally:
with self._lock:
self._safe_call("StopMove", self._bot.StopMove)
self._teleop_active = False
self._discrete_busy = False
return {"ok": True, "dir": direction, "warning": warning, "simulated": False}
# ── postures / modes ─────────────────────────────────────────────────────
def prep_mode(self) -> dict:
"""PREP — StopMove → Damp → StandUp(FSM4) → height ramp → BalanceStand(0).
Exact order from g1_mode_controller.prep_mode, minus the blocking input().
Blocking (~1s); call via asyncio.to_thread."""
if not self._ensure_client():
return {"ok": True, "mode": "prep", "simulated": True}
with self._lock:
if self._discrete_busy:
return {"ok": False, "mode": "prep", "reason": "busy", "simulated": False}
self._discrete_busy = True
my_gen = self._stop_gen
self._safe_call("StopMove", self._bot.StopMove)
self._safe_call("Damp", self._bot.Damp)
self._safe_call("SetFsmId(4)", self._bot.SetFsmId, 4)
try:
# Height ramp OUTSIDE the lock so E-STOP can preempt at any time.
h = 0.02
while h <= 0.5 + 1e-9:
if self._stop_gen != my_gen:
log.warning("PREP cancelled (E-STOP)")
return {"ok": False, "mode": "prep", "cancelled": True, "simulated": False}
with self._lock:
self._safe_call("SetStandHeight", self._bot.SetStandHeight, round(h, 3))
time.sleep(0.03)
h += 0.02
with self._lock:
self._safe_call("BalanceStand", self._bot.BalanceStand, 0)
self._safe_call("SetStandHeight", self._bot.SetStandHeight, 0.22)
finally:
with self._lock:
self._discrete_busy = False
log.info("PREP complete")
return {"ok": True, "mode": "prep", "simulated": False}
def ready_start_mode(self) -> dict:
"""READY = PREP then Start (FSM 200 / balance engaged)."""
self.prep_mode()
if not self._ensure_client():
return {"ok": True, "mode": "ready", "simulated": True}
with self._lock:
if hasattr(self._bot, "Start"):
ok, _ = self._safe_call("Start", self._bot.Start)
else:
ok, _ = self._safe_call("SetFsmId(200)", self._bot.SetFsmId, 200)
log.info("READY/START complete")
return {"ok": bool(ok), "mode": "ready", "simulated": False}
def posture(self, name: str) -> dict:
if name not in _POSTURES:
return {"ok": False, "reason": f"unknown posture: {name}"}
if not self._ensure_client():
return {"ok": True, "posture": name, "simulated": True}
bot = self._bot
with self._lock:
if name == "zero_torque":
ok, _ = self._safe_call("ZeroTorque", bot.ZeroTorque)
elif name == "damp":
ok, _ = self._safe_call("Damp", bot.Damp)
elif name == "stand_up":
ok, _ = self._safe_call("SetFsmId(4)", bot.SetFsmId, 4)
elif name == "squat":
ok, _ = self._safe_call("SetFsmId(2)", bot.SetFsmId, 2)
elif name == "sit":
ok, _ = self._safe_call("Sit", bot.Sit)
elif name == "low_stand":
ok, _ = self._safe_call("LowStand", bot.LowStand)
elif name == "high_stand":
ok, _ = self._safe_call("HighStand", bot.HighStand)
elif name == "lie_to_stand":
if hasattr(bot, "Lie2StandUp"):
ok, _ = self._safe_call("Lie2StandUp", bot.Lie2StandUp)
else:
ok, _ = self._safe_call("SetFsmId(702)", bot.SetFsmId, 702)
else: # unreachable (guarded above)
ok = False
return {"ok": bool(ok), "posture": name, "simulated": False}
def set_balance_mode(self, mode: int) -> dict:
if not self._ensure_client():
return {"ok": True, "balance_mode": int(mode), "simulated": True}
with self._lock:
ok, _ = self._safe_call("SetBalanceMode", self._bot.SetBalanceMode, int(mode))
return {"ok": bool(ok), "balance_mode": int(mode), "simulated": False}
def set_stand_height(self, h: float) -> dict:
if not self._ensure_client():
return {"ok": True, "height": float(h), "simulated": True}
with self._lock:
ok, _ = self._safe_call("SetStandHeight", self._bot.SetStandHeight, float(h))
return {"ok": bool(ok), "height": float(h), "simulated": False}
# ── MotionSwitcher ───────────────────────────────────────────────────────
def msc_check(self) -> dict:
if not self._ensure_client() or self._msc is None:
return {"mode_name": None, "simulated": not self._lc_ready}
try:
ret = self._msc.CheckMode()
name = None
if isinstance(ret, tuple) and len(ret) >= 2 and isinstance(ret[1], dict):
name = ret[1].get("name")
elif isinstance(ret, dict):
name = ret.get("name")
self._last_msc_mode = name
return {"mode_name": name}
except Exception as exc:
log.error("msc_check failed: %s", exc)
return {"mode_name": None}
def msc_select_ai(self) -> dict:
if not self._ensure_client() or self._msc is None:
return {"ok": True, "simulated": True}
with self._lock:
ok, _ = self._safe_call("SelectMode(ai)", self._msc.SelectMode, "ai")
return {"ok": bool(ok), "simulated": False}
def msc_release(self) -> dict:
if not self._ensure_client() or self._msc is None:
return {"ok": True, "simulated": True}
with self._lock:
ok, _ = self._safe_call("ReleaseMode", self._msc.ReleaseMode)
return {"ok": bool(ok), "simulated": False}
def reconnect(self) -> dict:
"""Drop and rebuild Loco + MSC clients (does NOT re-init the DDS factory)."""
with self._lock:
self._bot = None
self._msc = None
self._lc_ready = False
ok = self._ensure_client()
return {"ok": bool(ok), "lc_ready": self._lc_ready}
# ── reads ────────────────────────────────────────────────────────────────
def joints(self) -> dict:
q: list = []
try:
if self._arm is not None:
q = self._arm.get_current_q()
except Exception:
q = []
out = []
for idx in range(12, 29):
val = q[idx] if idx < len(q) else 0.0
out.append({"idx": idx, "name": JOINT_NAMES.get(idx, f"motor_{idx}"),
"q": float(val)})
return {"joints": out}
def status(self) -> dict:
# Polling /status lazily brings up the client once arm DDS is ready.
self._ensure_client()
fid = self.fsm_id() if self._lc_ready else None
fmode = self.fsm_mode() if self._lc_ready else None
walk_ready = bool(self._lc_ready and fid == 200 and fmode not in (None, 2))
return {
"sdk_available": _HAS_SDK,
"lc_ready": self._lc_ready,
"armed": self._armed,
"fsm_id": fid,
"fsm_mode": fmode,
"walk_ready": walk_ready,
"msc_mode": self._last_msc_mode,
"teleop_active": self._teleop_active,
"last_velocity": {"vx": self._cur_v[0], "vy": self._cur_v[1], "vyaw": self._cur_v[2]},
"caps": {"walk": self._cap_walk, "run": self._cap_run},
"arm_initialized": bool(self._arm is not None and getattr(self._arm, "_initialized", False)),
}
# ── shutdown helper ──────────────────────────────────────────────────────
def shutdown(self):
"""Best-effort StopMove + disarm for process shutdown.
Uses _raw_stop() (NOT estop()) so teardown never builds a brand-new
LocoClient: estop() _ensure_client() would lazily construct a client
and run bot.Init() (a DDS RPC) during interpreter teardown when we were
armed-but-never-built (Enable movement clicked, never moved, then
Ctrl+C). _raw_stop() no-ops when no client was ever created. Bump the
stop generation so any in-flight motion bails immediately."""
with self._lock:
self._stop_gen += 1
self._teleop_active = False
self._cur_v = (0.0, 0.0, 0.0)
try:
self._raw_stop() # no-op when _bot is None — never re-inits
except Exception:
log.exception("StopMove on shutdown failed")
finally:
self.disarm_movement()

412
vendor/Sanad/README.md vendored Normal file
View File

@ -0,0 +1,412 @@
# Sanad
Voice + motion assistant for the Unitree G1 humanoid. **Gemini Live** (or a
fully-offline pipeline) handles bilingual Arabic/English conversation; an arm
controller plays built-in SDK poses and recorded JSONL macros; a locomotion
controller walks/turns the robot; an optional camera feeds **Gemini-side face &
place recognition**; everything is orchestrated through a fault-isolated
**FastAPI dashboard** on `http://<robot>:8000`.
```
┌──────────────────────────────────────────────────────────────────────┐
│ Dashboard (FastAPI) ── http://<robot>:8000 │
│ ├─ Operations Quick-fire arm actions + gestural-speaking │
│ ├─ Voice & Audio Live Gemini, Typed Replay, Wake Phrases, Audio │
│ ├─ Motion & Replay SDK actions, JSONL replays, macros, teaching │
│ ├─ Controller Locomotion teleop, postures, FSM modes, E-STOP │
│ ├─ Recognition Camera vision + face gallery + zones/places │
│ ├─ Recordings Skill registry, saved Gemini turns │
│ ├─ Temperature Live 3D motor-temperature heatmap (three.js) │
│ ├─ Terminal In-browser shell (PTY) to the robot │
│ └─ Settings & Logs System info, tail/stream live logs │
└──────────────────────────────────────────────────────────────────────┘
├─ voice/sanad_voice.py (subprocess — model-agnostic voice loop)
│ ├─ gemini/script.py (Gemini Live brain — audio+video+state)
│ └─ local/script.py (offline brain — VAD→STT→LLM→TTS)
├─ gemini/client.py (short-session client for Typed Replay)
├─ gemini/subprocess.py (spawns+supervises sanad_voice.py;
│ pushes camera frames + motion state
│ to the child over its stdin)
├─ voice/movement_dispatch.py(Gemini spoken phrase → locomotion)
├─ vision/camera.py (RealSense/USB capture daemon)
├─ vision/face_gallery.py (data/faces/ CRUD for the primer turn)
├─ vision/zone_gallery.py (data/zones/ places + "go here" targets)
├─ motion/arm_controller.py (G1 arm DDS publisher — owns DDS init)
├─ G1_Controller/loco_controller.py (G1 locomotion via LocoClient)
├─ voice/audio_io.py (mic + speaker abstraction — 3 profiles)
└─ core/brain.py (skill dispatcher, event bus)
```
### Camera + face/place recognition data flow
```
CameraDaemon (parent, in-memory JPEG+b64 cache)
├─→ dashboard /api/recognition/frame.jpg ── snapshot_jpeg()
└─→ GeminiSubprocess._frame_forwarder ── get_frame_b64()
│ "frame:<b64>\n" over stdin
ArmController ─emit→ event bus ─→ main.py ─→ live_sub.send_state()
│ "state:<json>\n" over stdin
gemini/script.py _stdin_watcher thread
├─ frame: → _LATEST_FRAME → _send_frame_loop →
│ session.send_realtime_input(video=Blob)
└─ state: → _STATE_PENDING → _send_state_loop →
session.send_realtime_input(text=…)
Recognition toggles (vision / face-rec / zone-rec / movement) are written by the
dashboard to data/.recognition_state.json and POLLED by the Gemini child at 1 Hz
— so flipping a toggle takes effect mid-session with NO restart.
```
## Quick start (on the robot)
```bash
conda activate gemini_sdk
cd ~/Sanad
python3 main.py
```
Then open `http://<robot-ip>:8000` in a browser. (The dashboard binds to the
`wlan0` IP by default — see *Runtime selection* to override.)
Fully-offline brain (no cloud): `SANAD_VOICE_BRAIN=local python3 main.py`
(requires `ollama serve` + the local model env — see *Voice brains*).
> **Gemini API key — required, none ships with the repo.** The `api_key`
> fields in `config/core_config.json` (`gemini_defaults`) and
> `data/motions/config.json` (`gemini`) are intentionally empty (`""`).
> The voice loop cannot connect until you supply one, by any of:
> - **Dashboard***Voice & Audio → Gemini API Key* — paste + save, hot-swaps live (no restart). Persists to `data/motions/config.json`.
> - **Env var**`export SANAD_GEMINI_API_KEY=AIza...` before `python3 main.py`.
> - **Config file** — set `gemini_defaults.api_key` in `config/core_config.json`.
>
> Precedence (highest first): `data/motions/config.json``SANAD_GEMINI_API_KEY``config/core_config.json`. Get a key at <https://aistudio.google.com/apikey>.
## Dashboard features
### Operations
Quick-fire SDK + JSONL arm actions (chip buttons), gestural-speaking toggle.
### Voice & Audio
- **Live Voice Commands** — fire arm gestures from the *user's* transcript
(wake-phrase → arm action). Master gate + Deferred-trigger toggle.
- **Live Gemini Process** — start/stop the voice conversation subprocess, tail
its log. Choose the Gemini cloud brain or the offline brain via
`SANAD_VOICE_BRAIN`.
- **Typed Replay** — Gemini reads typed text aloud (wrapped with a
"repeat verbatim" prompt); optionally records the clip.
- **Gemini API Key** — hot-swap the key without restart.
- **Wake Phrase Manager** — add/remove phrase → action bindings.
- **Audio Controls** — mic/speaker mute, G1 chest-speaker volume (DDS), device
profile selection, PulseAudio soft-reset and Anker USB hard-reset.
### Motion & Replay
- **Motion Control** — list SDK (built-in) + JSONL (recorded) actions, select +
play. Cancel smoothly returns to `arm_home.jsonl`.
- **Replay Manager** — upload `.jsonl` files, test-play with speed, Teaching
Mode (kinesthetic record — limp the arm and hand-guide it).
- **Macro Recorder** — record a new audio+motion pair, OR pick any WAV + any
motion (SDK or JSONL) and play them in parallel.
### Controller *(locomotion)*
Manual teleoperation of the G1's **legs** via the Unitree `LocoClient`.
**Disarmed every boot**; all motion writes require Arm first.
- **Move / Step** — continuous teleop (vx/vy/vyaw) or discrete one-shot steps.
- **Postures & FSM modes** — zero-torque, damp, squat, sit, stand, balance,
stand-height; prep/ready sequences; MotionSwitcher select-AI/release.
- **Gemini Movement** — toggle voice-driven walking: the `MovementDispatcher`
parses Gemini's *own spoken confirmation phrases* ("Turning right." /
"أستدير يميناً.") and drives the legs (gated on this toggle + an E-STOP latch).
- **E-STOP** — always available; `StopMove` + disarm + latch the dispatcher.
> **Safety:** the arm and locomotion are **mutually exclusive**
> `arm.set_motion_block(loco.movement_active)` makes every arm
> replay/gesture refuse while the robot is (or just was, within ~1.5 s) walking.
### Recognition
Camera vision + Gemini-side **face** and **zone/place** recognition. All are
**off by default**; each is a **hot toggle** (≈1 s to take effect, no restart).
- **Camera Vision**`CameraDaemon` captures from a RealSense (preferred) or
USB camera; the supervisor streams JPEG frames to Gemini Live so it can answer
"what do you see?". Live preview panel. Auto-reconnects on USB unplug/stall
and warns if a RealSense negotiated USB 2.0 (Marcus-ported resilience).
- **Face Recognition** — manage `data/faces/face_{id}/` galleries: enroll from
the live camera or upload photos, rename, describe, download (per-photo or
ZIP), delete. On session start (and on any gallery change) the child sends a
**primer turn** carrying every enrolled face + a Khaleeji greeting
instruction — **Gemini matches in-context, so there is no local
face-recognition model**. Recognition needs vision on.
- **Zones & Places**`data/zones/zone_{zid}/place_{pid}/` two-level gallery:
reference photos per place, optional linked face_ids, and a **"go here"** nav
target (`nav_target_zone/place_id` in the recognition-state file) for
place-aware navigation.
- **Sync Gallery** — force-resend the face/zone primer to the live session.
### Recordings
Skill Registry (predefined audio+motion+callback skills from `skills.json`) +
Saved Records (captured Gemini turn recordings; play/pause/stop/rename/delete).
### Temperature
Live **3D motor-temperature heatmap** — a standalone three.js viewer
(`dashboard/static/temp3d/`) loads the G1 29-DoF URDF + STL meshes and colors
each joint blue→red from the arm controller's throttled `rt/lowstate` snapshot,
streamed over `/ws/motor-temps` at ~8 fps. No second DDS subscriber.
### Terminal
In-browser **PTY shell** to the robot (`/ws/terminal`, xterm.js) — a `bash -i`
as the dashboard's user, with resize + backpressure, bounded to 4 sessions.
(See *Security* — this is full shell access to whoever reaches the URL.)
### Settings & Logs
System info (host, network interfaces, DDS interface, bound dashboard host/port,
per-subsystem status, audio devices), live log stream (`/ws/logs`), per-file
tail, snapshot, and a one-blob "Copy All Logs" bundle.
## Directory layout
| Path | Contents |
|---|---|
| `main.py` | Entry point — fault-isolated boot of all subsystems + the dashboard. Doubles as the service container (route handlers `import` its module globals). |
| `config.py` | Runtime constants + layout-agnostic path resolution; layers `data/motions/config.json` over the JSON config at import. |
| `config/` | Per-subsystem JSON: `core`, `voice`, `gemini`, `local`, `motion`, `dashboard`. |
| `core/` | `brain.py` (skill dispatcher), `event_bus.py`, `skill_registry.py`, `config_loader.py`, `logger.py` (rotating + WS push), `asyncio_compat.py` (3.8 `to_thread` shim). |
| `gemini/` | Gemini Live — `client.py` (one-shot), `script.py` (live brain: audio + video + motion-state), `subprocess.py` (supervisor + stdin frame/state push). |
| `local/` | Fully-offline brain — `vad.py` (Silero), `stt.py` (faster-whisper), `llm.py` (Qwen via Ollama/llama.cpp), `tts.py` (CosyVoice2), `script.py` (the brain), `subprocess.py` (supervisor). Opt-in via `SANAD_VOICE_BRAIN=local`. |
| `voice/` | `sanad_voice.py` (subprocess entry, model-agnostic), `audio_io.py` / `audio_manager.py` / `audio_devices.py` (mic/speaker), `local_tts.py` (SpeechT5 Arabic TTS), `live_voice_loop.py` (user-transcript → arm gesture), `movement_dispatch.py` (Gemini-phrase → locomotion), `typed_replay.py`, `wake_phrase_manager.py`, `text_utils.py` (Arabic normalization + phrase matching), `model_script.py` / `model_subprocess.py` (brain templates). |
| `motion/` | `arm_controller.py` (production 5-phase JSONL replay engine, owns the single DDS init), `macro_player.py`, `macro_recorder.py`, `teaching.py`. (`sanad_arm_controller.py` is a legacy alternate — not wired by `main.py`.) |
| `G1_Controller/` | `loco_controller.py` — locomotion via Unitree `LocoClient` (move/step/postures/FSM/E-STOP); reuses the arm's DDS participant. |
| `vision/` | `camera.py` (RealSense/USB daemon, auto-reconnect), `face_gallery.py`, `zone_gallery.py`, `recognition_state.py` (atomic-JSON toggle IPC). |
| `dashboard/` | `app.py` (FastAPI factory + fault-isolated router registration), `routes/*.py` (20 REST routers), `websockets/*.py` (logs, motor-temps, terminal), `static/index.html` (single-page UI), `static/temp3d/` (3D viewer). |
| `scripts/` | Persona files — `sanad_script.txt` (voice persona "Bousandah"), `sanad_rule.txt`, `sanad_arm.txt` (voice→arm phrases). |
| `data/` | Runtime state — `motions/*.jsonl` (arm trajectories) + `instruction.json` (locomotion phrase map) + `skills.json` + `config.json` (dashboard-editable), `recordings/` (captured turns + macros), `faces/face_{id}/` + `zones/zone_{zid}/place_{pid}/` (galleries), `audio/` (typed-replay WAVs + records index), `.recognition_state.json` (toggle IPC). |
| `model/` | Local SpeechT5 / Whisper / CosyVoice2 weights when using the offline pipeline. |
| `logs/` | Per-module rotating logs. |
## Voice brains
The child `voice/sanad_voice.py` is model-agnostic and selects a brain via
`SANAD_VOICE_BRAIN`. Every brain implements the same contract
(`__init__(audio_io, recorder, voice, system_prompt)`, `async run()`, `stop()`)
and ships a sibling supervisor that spawns the child and parses its
`USER:` / `BOT:` / state log markers.
| Value | Brain | Pipeline |
|---|---|---|
| `gemini` *(default)* | `gemini/script.py` | Gemini Live native-audio (full-duplex speech-to-speech, server-side VAD, vision frames, face/zone primers, voice→movement). Cloud. |
| `local` | `local/script.py` | Silero VAD → faster-whisper (large-v3-turbo, CUDA int8) → Qwen2.5 (Ollama/llama.cpp) → CosyVoice2 streaming TTS. Fully on-device. |
| `model` | `voice/model_script.py` | Template/stub for adding a new provider (OpenAI Realtime, Claude Voice, …). |
To add a brain: drop a file in `voice/` or a new `<brand>/` folder and add a
branch to `voice/sanad_voice.py:_build_brain()`; ship a supervisor modeled on
`voice/model_subprocess.py`.
## Runtime selection (env vars)
| Var | Values | Default | Effect |
|---|---|---|---|
| `SANAD_VOICE_BRAIN` | `gemini`, `local`, `model` | `gemini` | Which brain the subprocess loads (see `voice/sanad_voice.py:_build_brain`). |
| `SANAD_AUDIO_PROFILE` | `builtin`, `anker`, `hollyland_builtin` | `builtin` | Mic + speaker pair. `builtin` = G1 UDP mic + G1 chest speaker via DDS. |
| `SANAD_DDS_INTERFACE` | network iface | `eth0` | DDS network for G1 low-level comms (arm + locomotion + speaker). |
| `SANAD_DASHBOARD_HOST` / `_INTERFACE` | IP / iface | `wlan0` IP | Dashboard bind address. |
| `SANAD_GEMINI_API_KEY` | string | `""` (empty) | Gemini API key. No key ships in the repo — set this, paste one in the dashboard (**Voice & Audio → Gemini API Key**), or fill `gemini_defaults.api_key` in `config/core_config.json`. See [Quick start](#quick-start-on-the-robot). |
| `SANAD_GEMINI_MODEL` / `_VOICE` | string | reads config | Override the Gemini model id / prebuilt voice. |
| `SANAD_G1_VOLUME` | `0``100` | `100` | G1 chest-speaker volume; also scales the barge-in threshold. |
| `SANAD_LIVE_SCRIPT` | path | auto | Override the subprocess entry script path. |
| `SANAD_RECORD` | `0` or `1` | `1` | Record every Gemini turn to `data/recordings/`. |
| `SANAD_AEC_ENABLE` | `0` or `1` | `1` | Enable WebRTC AEC3 (if the Python binding is installed). |
| `SANAD_VISION_ENABLE` | `0` or `1` | `0` | Boot default for camera vision. **Runtime truth is the Recognition-tab toggle**`data/.recognition_state.json`, hot-applied without a restart. |
| `SANAD_FACE_RECOGNITION_ENABLE` | `0` or `1` | `0` | Boot default for Gemini-side face recognition. Also a hot toggle. |
| `SANAD_VISION_SEND_HZ` | float | `2` | Frames/sec the Gemini child relays to Live. |
| `SANAD_CAMERA_WIDTH` / `_HEIGHT` / `_FPS` | int | `424` / `240` / `15` | Capture profile. Also settable per-deploy in `config/core_config.json > camera`. |
| `SANAD_CAMERA_USB_INDEX` | int | auto | Pin a `/dev/videoN` node (avoids picking a RealSense IR stream). |
| `SANAD_FACES_MAX_SAMPLES` | int | `3` | Max photos per person fed into the gallery primer turn (token budget). |
| `SANAD_PROJECT_ROOT` | path | auto | Override the project root (see *Dynamic paths*). |
> All `SANAD_VISION_*` / `SANAD_CAMERA_*` / `SANAD_FACE_*` vars are **boot
> defaults** forwarded to the Gemini child via `LIVE_TUNE`. Once running, the
> Recognition tab's toggles (vision / face-rec / zone-rec / movement) are the
> live source of truth in `data/.recognition_state.json`, polled at 1 Hz.
CLI flags: `python3 main.py --host <ip> --port 8000 --network <dds_iface>`;
`--check-env` prints a subsystem/environment diagnostic and exits.
## API surface
All routes are registered defensively — a router whose import fails is recorded
(`GET /api/_dashboard_status`) and the server still boots without it.
**REST** (prefix → controls): `/api` health · `/api/system` info ·
`/api/voice` Gemini/local generate+connect+key · `/api/motion` arm actions ·
`/api/skills` skill registry · `/api/macros` record/play · `/api/replay` JSONL
CRUD + teaching · `/api/audio` mute/volume/devices/reset · `/api/scripts`
persona files · `/api/records` saved WAVs · `/api/prompt` system prompt ·
`/api/wake-phrases` bindings · `/api/live-voice` arm-phrase dispatcher ·
`/api/live-subprocess` Gemini child · `/api/typed-replay` TTS · `/api/recognition`
vision + face gallery · `/api/zones` zones/places + nav target · `/api/temp`
motor map + snapshot · `/api/controller` locomotion (move/step/postures/modes/
E-STOP).
**WebSockets**: `/ws/logs` (live log stream + 500-line replay) ·
`/ws/motor-temps` (3D heatmap data, ~8 fps) · `/ws/terminal` (PTY shell).
## Architecture notes
- **Subprocess isolation**: `voice/sanad_voice.py` runs as a child of `main.py`
via the supervisor. If the voice loop crashes, the dashboard + arm + legs stay
up.
- **Single DDS init**: `motion/arm_controller.py` owns the one
`ChannelFactoryInitialize`; `LocoController` and the audio routes reuse that
participant rather than re-initializing.
- **Brain contract**: see `voice/model_script.py` — any new model implements
`__init__(audio_io, recorder, voice, system_prompt)`, `async run()`, `stop()`.
- **Supervisor contract**: each brain ships a sibling supervisor (e.g.
`gemini/subprocess.py`) that spawns `sanad_voice.py` with its
`SANAD_VOICE_BRAIN` and parses the brain's log markers. Template:
`voice/model_subprocess.py`.
- **Locomotion safety**: `LocoController` is disarmed every boot, has velocity
caps + a `StopMove` watchdog, and is mutually exclusive with the arm.
Voice-driven movement is **off by default** and gated by the Controller
toggle. Distances/degrees in `data/motions/instruction.json` are
**approximate and must be calibrated on the real robot** — there is no
obstacle/abort stack.
- **Audio routing**: the G1's platform-sound PulseAudio sink is NOT wired to a
physical speaker. All dashboard-triggered playback (`play_wav`, typed-replay
audio, record playback) routes through DDS `AudioClient.PlayStream` via
`audio_manager._play_pcm_via_g1`. The PyAudio path is a desktop/dev fallback.
- **Arm replay**: `motion/arm_controller.py:_replay_file_inner()` is a port of
`G1_Lootah/Manual_Recorder/g1_replay_v4_stable.py:Run()` — ramp-in → settle
hold → playback → smooth return → disable SDK. Body motors (014) lock to a
live snapshot while arm motors (1528) follow the file at 60 Hz. `_return_home()`
runs unconditionally after a cancel for a jerk-free return.
- **Camera frame transport (stdin push)**: the `CameraDaemon` lives in the
parent and caches frames in memory. `GeminiSubprocess` base64-encodes the
latest frame to the child's stdin (~2 fps); the child's `_stdin_watcher`
relays it to Gemini Live with a staleness guard. Chosen over a file drop so
the parent owns the camera once and the dashboard preview reads the same cache.
- **Motion-state channel**: `arm_controller._execute()` emits
`motion.action_started` / `_done` / `_error` on the event bus. `main.py`
forwards each to the child as `state:<json>\n`, injected to Gemini Live as
silent `[STATE-START] wave_hand` / `[STATE-DONE] wave_hand (2.3s)` text so it
can honestly answer "what are you doing?".
- **Recognition is Gemini-side**: no dlib/insightface/onnxruntime. Galleries are
pure file IO; `gemini/script.py:_send_gallery_primer()` builds one multimodal
`send_client_content` turn — every enrolled face/place's photos + a greeting
instruction — and Gemini matches incoming frames against it in-context.
## Camera vision on Jetson
The Recognition tab needs `pyrealsense2` to talk to the Intel RealSense.
**Do not `pip install pyrealsense2` on JetPack 5** — the PyPI wheel is built
against glibc 2.32+ (Ubuntu 22.04) and fails to load on JetPack 5's glibc
2.31 with `ImportError: ... version 'GLIBC_2.32' not found`.
The native runtime is already there (`apt`-installed `librealsense2`). Build
just the Python binding from source against it, into the `gemini_sdk` env:
```bash
rs-enumerate-devices # confirm the D435I shows up at OS level first
source ~/miniconda3/etc/profile.d/conda.sh && conda activate gemini_sdk
pip uninstall -y pyrealsense2 # remove the broken wheel if present
sudo apt install -y cmake build-essential git python3-dev libusb-1.0-0-dev pkg-config libssl-dev
cd /tmp && rm -rf librealsense
git clone --depth=1 --branch v2.56.5 https://github.com/IntelRealSense/librealsense.git
cd librealsense && mkdir -p build && cd build
cmake .. -DBUILD_PYTHON_BINDINGS=ON -DPYTHON_EXECUTABLE=$(which python3) \
-DBUILD_EXAMPLES=OFF -DBUILD_GRAPHICAL_EXAMPLES=OFF \
-DBUILD_UNIT_TESTS=OFF -DCHECK_FOR_UPDATES=OFF -DCMAKE_BUILD_TYPE=Release
make -j$(nproc) pyrealsense2
SITE=$(python3 -c "import sysconfig; print(sysconfig.get_paths()['purelib'])")
mkdir -p "$SITE/pyrealsense2"
cp wrappers/python/pyrealsense2*.so "$SITE/pyrealsense2/"
cp ../wrappers/python/pyrealsense2/__init__.py "$SITE/pyrealsense2/" 2>/dev/null || true
python3 -c 'import pyrealsense2 as rs; print([d.get_info(rs.camera_info.name) for d in rs.context().query_devices()])'
```
Match the `--branch` tag to the installed runtime (`dpkg -l | grep librealsense2`).
If the build isn't worth it, `CameraDaemon` falls back to `cv2.VideoCapture(0)`
automatically — fine for a plain USB webcam, but note a RealSense exposes its
*depth* stream at `/dev/video0`, not RGB, so a real USB cam is the cleaner
fallback (or pin `SANAD_CAMERA_USB_INDEX`). On x86_64 / Ubuntu 22.04+ desktops,
`pip install pyrealsense2` just works.
## Dynamic paths
Every path is derived at runtime — no hard-coded `/home/...` anywhere.
Resolution order for `BASE_DIR` in `config.py`:
1. `SANAD_PROJECT_ROOT` env var (if set).
2. `PROJECT_BASE + PROJECT_NAME` from a `.env` file in `Sanad/` or its parent.
3. `Path(__file__).resolve().parent` — auto-detected.
The project runs unchanged from either layout:
- dev: `<anywhere>/Project/Sanad/`
- deployed: `/home/unitree/Sanad/`
## Deployment (workstation → robot)
```bash
rsync -av --delete \
--exclude=__pycache__ --exclude=logs --exclude=model --exclude=.git \
/path/to/Sanad/ \
unitree@192.168.123.164:/home/unitree/Sanad/
```
Then on the robot: `Ctrl+C` the running `main.py` and re-run.
## Security
The dashboard has **no authentication**. Anyone who can reach
`http://<robot>:8000` gets full robot control — locomotion, arm, audio, file
upload/delete — and, via the **Terminal tab**, an interactive shell as the
dashboard's user. Bind it to a **trusted LAN only**; add auth before any wider
exposure.
## Troubleshooting
| Symptom | Fix |
|---|---|
| `No LowState received in 2s — refusing to replay` | `main.py` was re-executed as both `__main__` and `Project.Sanad.main`, creating two arm instances. Fix lives in the `sys.modules` alias near the top of `main.py`. Restart. |
| `G1ArmActionClient not available — skipping` for SDK actions | Same duplicate-init issue as above. |
| `No module named 'Project'` in subprocess | Bootstrap preamble in `voice/sanad_voice.py:~30` synthesises the `Project.Sanad` namespace when run as `__main__`. |
| Controller moves rejected (409) | The Controller is **disarmed by default** — hit Arm first. Reads + E-STOP are always allowed. |
| Arm action refused while "movement armed" | Arm ↔ locomotion are mutually exclusive. Disarm/stop locomotion, then trigger the arm. |
| Voice-driven walking does nothing | "Gemini Movement" toggle off, or E-STOP latched. Toggle on; clear E-STOP. Distances are uncalibrated. |
| Arm jumps at start of JSONL replay | `SETTLE_HOLD_SEC` (in `config/motion_config.json > arm_controller`) too low — try `0.7` or `1.0`. |
| Record playback silent | `audio_mgr.play_wav` only routes to G1 DDS if the Unitree SDK is importable; on desktop it falls back to the PulseAudio sink. |
| Live Voice Commands transcript stuck | Deferred trigger was queued but `trigger_enabled` toggle was off. Toggle on — or the pending-trigger poll fires it automatically once enabled. |
| Gemini "no audio" on Typed Replay | Non-deterministic; the retry chain in `voice/typed_replay.py:generate_audio` tries three prompt variants. For reliable TTS, use the offline `local_tts` SpeechT5 path. |
| Local brain exits immediately | `ollama serve` not running / model not pulled, or weights missing under `model/`. Check `logs/local_subprocess.log`. The Gemini brain is the safe default. |
| Recognition tab: "Camera could not start (no backend)" | No camera backend acquired. Check `rs-enumerate-devices` (RealSense at OS level) and `python3 -c 'import pyrealsense2'` in the `gemini_sdk` env. The glibc `ImportError` means the pip wheel is incompatible — see "Camera vision on Jetson" above. |
| Camera badge stuck on "reconnecting…" | `CameraDaemon` lost the device and is retrying with exponential backoff. Re-seat the USB 3 cable; check `logs/camera.log` for the USB-2.0 warning. |
| Gemini doesn't greet an enrolled face | Face Recognition toggle on? Vision on? (Face rec needs frames.) Check `logs/gemini_brain.log` for `face gallery primed: N person(s)`. Hit "Sync Gallery" to force a re-prime. |
| Gemini unaware of motion state | The `motion.action_*``send_state` chain only runs when Live Gemini is up. Check `logs/gemini_subprocess.log` and `logs/gemini_brain.log` for `STATE injected:` lines. |
## License / attribution
Internal project for YS Lootah Technology. Reuses/ports patterns from:
- `G1_Lootah/Manual_Recorder/g1_replay_v4_stable.py` (arm replay math)
- `SanadVoice/gemini_interact` (arm-phrase dispatch, skill registry)
- `SanadVoice/gemini_voice_v2` (local SpeechT5 TTS)
- `Project/Marcus` — camera→Gemini stdin-push transport, motion-state
injection, camera daemon resilience (auto-reconnect, USB-2.0 warning), the
`API/camera_api.py` cache shape (`get_frame_b64` / `get_fresh_frame`), and the
confirmation-phrase → locomotion pattern (`movement_dispatch`).
- Unitree `unitree_sdk2py` (G1 low-level SDK, `LocoClient`, `G1ArmActionClient`,
`AudioClient.PlayStream`).

0
vendor/Sanad/__init__.py vendored Normal file
View File

472
vendor/Sanad/config.py vendored Normal file
View File

@ -0,0 +1,472 @@
"""Centralized configuration for the Sanad robot assistant.
Resolution order for BASE_DIR (highest priority first):
1. SANAD_PROJECT_ROOT environment variable
2. PROJECT_BASE + PROJECT_NAME from .env file (or env vars)
3. Path(__file__).resolve().parent.parent (auto-detected from this file's location)
Every other directory is derived from BASE_DIR never hardcode an absolute path.
"""
from __future__ import annotations
import json
import os
from pathlib import Path
from typing import Any
def _read_env_file(env_path: Path) -> dict[str, str]:
"""Minimal .env reader (no python-dotenv dependency)."""
out: dict[str, str] = {}
if not env_path.exists():
return out
try:
for raw in env_path.read_text(encoding="utf-8").splitlines():
line = raw.strip()
if not line or line.startswith("#") or "=" not in line:
continue
k, v = line.split("=", 1)
out[k.strip()] = v.strip().strip('"').strip("'")
except OSError:
pass
return out
def _resolve_base_dir() -> Path:
"""Resolve the Sanad project root with override support."""
# 1. Direct env override
override = os.environ.get("SANAD_PROJECT_ROOT", "").strip()
if override:
p = Path(override).expanduser().resolve()
if p.exists():
return p
# 2. PROJECT_BASE + PROJECT_NAME pattern
_here = Path(__file__).resolve().parent # Sanad/
env_files = [
_here / ".env", # Sanad/.env
_here.parent / ".env", # Project/.env
]
for env_path in env_files:
env = _read_env_file(env_path)
base = env.get("PROJECT_BASE") or os.environ.get("PROJECT_BASE", "")
name = env.get("PROJECT_NAME") or os.environ.get("PROJECT_NAME", "")
if base and name:
candidate = Path(base).expanduser().resolve() / name
if candidate.exists():
return candidate
# 3. Auto-detect — this file lives at Sanad/config.py, so parent = Sanad/
return _here
BASE_DIR = _resolve_base_dir()
DATA_DIR = BASE_DIR / "data"
LOGS_DIR = BASE_DIR / "logs"
SCRIPTS_DIR = BASE_DIR / "scripts"
MODEL_DIR = BASE_DIR / "model"
# Audio recordings (typed-replay, etc.) live under data/audio
AUDIO_RECORDINGS_DIR = DATA_DIR / "audio"
# Motion macro recordings (paired with audio) live under data/recordings/motion
MOTION_RECORDINGS_DIR = DATA_DIR / "recordings" / "motion"
# Motion JSONL macros (auto-discovered as actions)
MOTIONS_DIR = DATA_DIR / "motions"
SKILLS_FILE = MOTIONS_DIR / "skills.json"
CONFIG_FILE = MOTIONS_DIR / "config.json"
# ─── Load baseline defaults from config/core_config.json ───
# Single source of truth. Runtime overrides via:
# 1. env vars (SANAD_GEMINI_API_KEY, SANAD_GEMINI_MODEL, ...)
# 2. data/motions/config.json (dashboard-editable — see load_config())
# 3. config/core_config.json (this file)
def _load_core_config() -> dict[str, Any]:
cfg_path = BASE_DIR / "config" / "core_config.json"
if not cfg_path.exists():
return {}
try:
raw = json.loads(cfg_path.read_text(encoding="utf-8"))
except (OSError, json.JSONDecodeError):
return {}
# Strip _comment / _description noise
return {k: v for k, v in raw.items() if not k.startswith("_")}
_CORE_CFG = _load_core_config()
_GEMINI = _CORE_CFG.get("gemini_defaults", {})
_AUDIO = _CORE_CFG.get("audio_defaults", {})
# -- Gemini defaults (override via data/motions/config.json or env) --
GEMINI_API_KEY = os.environ.get(
"SANAD_GEMINI_API_KEY",
_GEMINI.get("api_key", ""))
GEMINI_MODEL = os.environ.get(
"SANAD_GEMINI_MODEL",
"models/" + _GEMINI.get("model_live", "gemini-2.5-flash-native-audio-preview-12-2025"))
GEMINI_VOICE = os.environ.get(
"SANAD_GEMINI_VOICE",
_GEMINI.get("voice_name", "Charon"))
GEMINI_WS_URI = _GEMINI.get(
"model_ws_uri",
"wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent")
GEMINI_WS_TIMEOUT = _GEMINI.get("ws_timeout_sec", 30)
# -- Audio defaults --
SEND_SAMPLE_RATE = _AUDIO.get("send_sample_rate", 16000)
RECEIVE_SAMPLE_RATE = _AUDIO.get("receive_sample_rate", 24000)
CHUNK_SIZE = _AUDIO.get("chunk_size", 512)
CHANNELS = _AUDIO.get("channels", 1)
# -- PulseAudio hardware IDs --
SINK = _AUDIO.get("sink", "alsa_output.usb-Anker_PowerConf_A3321-DEV-SN1-01.analog-stereo")
SOURCE = _AUDIO.get("source", "alsa_input.usb-Anker_PowerConf_A3321-DEV-SN1-01.mono-fallback")
MONITOR_SOURCE = f"{SINK}.monitor"
# -- Dashboard --
# Default: bind to wlan0's IP (auto-detected at startup) so the dashboard is
# reachable on the wireless network. Falls back to 0.0.0.0 (all interfaces)
# if wlan0 isn't present.
#
# Resolution order (highest priority first):
# 1. SANAD_DASHBOARD_HOST env var (explicit IP or hostname)
# 2. SANAD_DASHBOARD_INTERFACE env var → that interface's IP
# 3. wlan0 interface IP (default)
# 4. 0.0.0.0 (bind to all)
#
# Override via --host CLI flag too.
DASHBOARD_INTERFACE = os.environ.get("SANAD_DASHBOARD_INTERFACE", "wlan0")
def _get_interface_ip(iface: str) -> str | None:
"""Return the IPv4 address bound to `iface`, or None if not present.
Tries multiple strategies in order different Linux setups expose
interface info via different mechanisms.
"""
# Strategy 1: fcntl SIOCGIFADDR (fastest, no subprocess)
ip = _get_iface_ip_fcntl(iface)
if ip:
return ip
# Strategy 2: parse `ip -4 -o addr show <iface>` (works on Ubuntu/Jetson)
ip = _get_iface_ip_via_ip_cmd(iface)
if ip:
return ip
# Strategy 3: parse `/proc/net/fib_trie` (last resort)
ip = _get_iface_ip_via_proc(iface)
if ip:
return ip
return None
def _get_iface_ip_fcntl(iface: str) -> str | None:
try:
import fcntl
import socket
import struct
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
try:
ifname = iface[:15].encode("utf-8")
packed = fcntl.ioctl(
s.fileno(),
0x8915, # SIOCGIFADDR
struct.pack("256s", ifname),
)
return socket.inet_ntoa(packed[20:24])
finally:
s.close()
except Exception:
return None
def _get_iface_ip_via_ip_cmd(iface: str) -> str | None:
try:
import subprocess
r = subprocess.run(
["ip", "-4", "-o", "addr", "show", iface],
capture_output=True, text=True, timeout=2.0,
)
if r.returncode != 0:
return None
# Output: "5: wlan0 inet 10.255.254.86/24 brd ..."
for line in r.stdout.splitlines():
parts = line.split()
for i, p in enumerate(parts):
if p == "inet" and i + 1 < len(parts):
return parts[i + 1].split("/")[0]
except Exception:
return None
return None
def _get_iface_ip_via_proc(iface: str) -> str | None:
"""Fallback: scrape /proc/net/fib_trie for an IP advertised on this iface.
Less reliable than fcntl/ip cmd but doesn't need any external tooling.
"""
try:
import subprocess
# Try `hostname -I` as a final fallback (returns space-separated IPs)
r = subprocess.run(["hostname", "-I"], capture_output=True, text=True, timeout=1.0)
if r.returncode == 0:
ips = (r.stdout or "").strip().split()
# Return first non-loopback IPv4
for ip in ips:
if "." in ip and not ip.startswith("127."):
return ip
except Exception:
return None
return None
def list_network_interfaces() -> list[dict]:
"""Return [{name, ip, is_up}] for every interface on the box.
Used by the dashboard's system-info panel.
"""
out: list[dict] = []
try:
import socket
for idx, name in socket.if_nameindex():
ip = _get_interface_ip(name)
out.append({
"name": name,
"index": idx,
"ip": ip or "",
"is_up": ip is not None,
})
except Exception:
pass
return out
def _resolve_dashboard_host() -> str:
"""Resolve the host the dashboard should bind to.
Order:
1. SANAD_DASHBOARD_HOST env var (explicit IP/hostname)
2. SANAD_DASHBOARD_INTERFACE that interface's IP
3. wlan0's IP (default)
4. First non-loopback IP from `hostname -I`
5. 0.0.0.0 (bind everywhere)
"""
explicit = os.environ.get("SANAD_DASHBOARD_HOST", "").strip()
if explicit:
return explicit
iface_ip = _get_interface_ip(DASHBOARD_INTERFACE)
if iface_ip:
return iface_ip
# Try `hostname -I` as a final non-loopback fallback
try:
import subprocess
r = subprocess.run(["hostname", "-I"], capture_output=True, text=True, timeout=1.0)
if r.returncode == 0:
for ip in (r.stdout or "").strip().split():
if "." in ip and not ip.startswith("127."):
return ip
except Exception:
pass
return "0.0.0.0"
DASHBOARD_HOST = _resolve_dashboard_host()
# Canonical SanadV3 port (matches shell_scripts/start_all.sh + docs). The
# legacy Sanad ran on :8000; SanadV3 is :8001 to never collide with it.
DASHBOARD_PORT = 8001
# -- Local TTS --
LOCAL_TTS_MODEL = "MBZUAI/speecht5_tts_clartts_ar"
LOCAL_TTS_MODEL_PATH = str(MODEL_DIR / "speecht5_tts_clartts_ar")
LOCAL_TTS_HIFIGAN_PATH = str(MODEL_DIR / "speecht5_hifigan")
LOCAL_TTS_XVECTOR_PATH = str(MODEL_DIR / "arabic_xvector_embedding.pt")
# -- Motion --
_G1 = _CORE_CFG.get("g1_hardware", {})
REPLAY_HZ = _G1.get("replay_hz", 60.0)
G1_NUM_MOTOR = _G1.get("num_motor", 29)
ENABLE_ARM_SDK_INDEX = _G1.get("enable_arm_sdk_index", 29)
KP_HIGH = 300.0
KD_HIGH = 3.0
KP_LOW = 80.0
KD_LOW = 3.0
KP_WRIST = 40.0
KD_WRIST = 1.5
WEAK_MOTORS = {4, 10, 15, 16, 17, 18, 22, 23, 24, 25}
WRIST_MOTORS = {19, 20, 21, 26, 27, 28}
# -- Live Gemini subprocess tuning --
LIVE_TUNE: dict[str, str] = {
"SANAD_REQUIRED_LOUD_CHUNKS": "5",
"SANAD_PREBUFFER_CHUNKS": "3",
"SANAD_PLAYBACK_TIMEOUT": "0.25",
"SANAD_BARGE_IN_COOLDOWN": "1.0",
"SANAD_AI_SPEAK_GRACE": "0.5",
# ECHO_GUARD_SEC suppresses USER SAID log lines for this many seconds
# after the robot finishes a chunk. Previously 1.2 — caused a visible
# lag where "robot finished talking" was followed by silence in the
# log even though Gemini was transcribing the user's new speech
# immediately. Lowered to 0.3 to match typical room reverb tail; the
# real echo protection is the silence-during-speaking gate, not this.
"SANAD_ECHO_GUARD_SEC": "0.3",
"SANAD_SPEAKING_ENERGY_GATE": "0.90",
"SANAD_CALIBRATION_CHUNKS": "30",
"SANAD_THRESHOLD_MULTIPLIER": "4.0",
# Base barge-in threshold calibrated at the REFERENCE volume (50%).
# At runtime, scaled QUADRATICALLY with actual G1 volume:
# scale = (actual_vol / ref_vol) ** 2
#
# Physical reason: doubling digital speaker volume doubles sample
# amplitude, which means RECEIVED energy at the mic quadruples
# (energy ~ amplitude²). Linear scaling under-threshold echo at
# high volumes → caused "robot listening to himself" feedback.
#
# Measured on Hollyland + G1 speaker at 100% volume:
# echo peak (no user) up to ~15700
# voice peak (user) 25000-32000+ (often saturates 32767)
# Safe threshold at 100% vol: ~18000, above echo / below voice.
#
# Working back with quadratic scale: base × (100/50)² = 18000
# base × 4 = 18000 → base = 4500 at 50% ref volume.
"SANAD_MIN_THRESHOLD": "800",
"SANAD_PLAYBACK_BARGE_MIN": "2500",
"SANAD_PLAYBACK_BARGE_MULT": "1.5",
# Sustained-chunk requirement for barge-in. Balance:
# higher = fewer false triggers from echo bursts
# lower = quicker response to short commands ("stop", "توقف")
# Default 5 = ~160ms sustained voice. Real speech reliably
# sustains that long; single-chunk echo spikes don't.
"SANAD_PLAYBACK_REQUIRED_CHUNKS": "2",
"SANAD_SILENCE_AFTER_SPEECH": "1.2",
"SANAD_SPEECH_THRESHOLD": "300",
"SANAD_DDS_INTERFACE": os.environ.get("SANAD_DDS_INTERFACE", "eth0"),
# G1 built-in mic — UDP multicast 239.168.123.161:5555.
# Requires wake-up conversation mode ON in Unitree app.
"SANAD_USE_G1_MIC": "1",
# ── Recognition (camera vision + face recognition) ──
# All of these are BOOT defaults. The runtime source of truth is the
# state file data/.recognition_state.json — toggled live from the
# Recognition tab and polled by the Gemini child at 1 Hz.
"SANAD_VISION_ENABLE": "0",
"SANAD_VISION_SEND_HZ": "2",
"SANAD_VISION_STALE_MS": "1500",
"SANAD_CAMERA_WIDTH": "424",
"SANAD_CAMERA_HEIGHT": "240",
"SANAD_CAMERA_FPS": "15",
"SANAD_CAMERA_JPEG_QUALITY": "70",
"SANAD_FACE_RECOGNITION_ENABLE": "0",
"SANAD_FACES_DIR": str(DATA_DIR / "faces"),
"SANAD_FACES_MAX_SAMPLES": "3",
"SANAD_FACES_PRIMER_RESIZE": "256",
"SANAD_RECOGNITION_STATE_PATH": str(DATA_DIR / ".recognition_state.json"),
"SANAD_RECOGNITION_POLL_S": "1.0",
}
# -- Camera --
CAMERA_SERVICE_PORT = 8091
DIRECT_CAMERA_URL = f"http://127.0.0.1:{CAMERA_SERVICE_PORT}"
# -- Navigation (web_nav3 / rosbridge) --
WEB_NAV3_URL = os.environ.get("WEB_NAV3_URL", "http://127.0.0.1:8765")
ROSBRIDGE_URL = os.environ.get("ROSBRIDGE_URL", "ws://127.0.0.1:9090")
NAV_ROBOT_NAME = os.environ.get("NAV_ROBOT_NAME", "sanad")
# -- DDS / hardware --
# Jetson G1 default is eth0 (the robot's internal network).
# Override with SANAD_DDS_INTERFACE=lo for desktop/sim development.
DDS_NETWORK_INTERFACE = os.environ.get("SANAD_DDS_INTERFACE", "eth0")
def _ensure_dirs() -> list[str]:
"""Create runtime directories. Failures are collected, not raised.
Returns the list of directories that failed to create caller can decide
whether to log/abort. The module import never crashes due to a single
permission error on a single directory.
"""
failed: list[str] = []
for d in (DATA_DIR, LOGS_DIR, SCRIPTS_DIR, AUDIO_RECORDINGS_DIR,
MOTION_RECORDINGS_DIR, MOTIONS_DIR):
try:
d.mkdir(parents=True, exist_ok=True)
except OSError:
failed.append(str(d))
return failed
# Best-effort: create dirs at import. Ignore failures here — individual
# subsystems will handle missing dirs at usage time and isolation prevents
# cascading import failures.
_DIRS_FAILED = _ensure_dirs()
def load_config() -> dict[str, Any]:
"""Load runtime config overrides from CONFIG_FILE (if present)."""
if CONFIG_FILE.exists():
try:
with open(CONFIG_FILE, "r", encoding="utf-8") as f:
return json.load(f)
except (json.JSONDecodeError, OSError):
return {}
return {}
def save_config(cfg: dict[str, Any]):
CONFIG_FILE.parent.mkdir(parents=True, exist_ok=True)
import os, tempfile
fd, tmp = tempfile.mkstemp(
prefix=f".{CONFIG_FILE.name}.", suffix=".tmp",
dir=str(CONFIG_FILE.parent),
)
try:
with os.fdopen(fd, "w", encoding="utf-8") as f:
json.dump(cfg, f, ensure_ascii=False, indent=2)
os.replace(tmp, CONFIG_FILE)
except Exception:
try:
os.unlink(tmp)
except OSError:
pass
raise
# Apply config.json overrides on top of module constants (was previously dead code).
def _apply_overrides():
cfg = load_config()
if not cfg:
return
g = globals()
gemini = cfg.get("gemini", {})
if isinstance(gemini, dict):
if "api_key" in gemini and gemini["api_key"]:
g["GEMINI_API_KEY"] = gemini["api_key"]
if "model" in gemini:
g["GEMINI_MODEL"] = gemini["model"]
if "voice" in gemini:
g["GEMINI_VOICE"] = gemini["voice"]
audio = cfg.get("audio", {})
if isinstance(audio, dict):
if "send_sample_rate" in audio:
g["SEND_SAMPLE_RATE"] = int(audio["send_sample_rate"])
if "receive_sample_rate" in audio:
g["RECEIVE_SAMPLE_RATE"] = int(audio["receive_sample_rate"])
if "chunk_size" in audio:
g["CHUNK_SIZE"] = int(audio["chunk_size"])
if "sink" in audio:
g["SINK"] = audio["sink"]
if "source" in audio:
g["SOURCE"] = audio["source"]
dashboard = cfg.get("dashboard", {})
if isinstance(dashboard, dict):
if "host" in dashboard:
g["DASHBOARD_HOST"] = dashboard["host"]
if "port" in dashboard:
g["DASHBOARD_PORT"] = int(dashboard["port"])
try:
_apply_overrides()
except Exception:
# Never let a malformed config.json kill module import.
pass

101
vendor/Sanad/config/core_config.json vendored Normal file
View File

@ -0,0 +1,101 @@
{
"_description": "Tunables for core/* modules. Loaded via core.config_loader.load('core').",
"brain": {
"allowed_callback_prefixes": [
"Project.Sanad.motion.",
"Project.Sanad.voice.",
"motion.",
"voice."
],
"gestural_speaking_default": false
},
"logger": {
"log_level": "INFO",
"format": "%(asctime)s [%(name)s] %(levelname)-7s %(message)s",
"datefmt": "%Y-%m-%d %H:%M:%S",
"file_max_bytes": 10485760,
"file_backup_count": 7
},
"event_bus": {
"emit_timeout_sec": 0.5
},
"paths": {
"_comment": "Path roots — resolved against BASE_DIR in core/config.py",
"data": "data",
"logs": "logs",
"scripts": "scripts",
"model": "model",
"audio_recordings": "data/audio",
"motion_recordings": "data/recordings/motion",
"motions": "data/motions"
},
"gemini_defaults": {
"_comment": "Baseline Gemini API config — SINGLE SOURCE OF TRUTH. All voice modules read from here.",
"api_key": "",
"model_live": "gemini-2.5-flash-native-audio-preview-12-2025",
"model_ws_uri": "wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent",
"voice_name": "Charon",
"ws_timeout_sec": 30,
"default_system_prompt": "You are Bousandah, a wise and friendly Emirati assistant. Speak strictly in the UAE dialect (Khaleeji). Be helpful, concise, and use local greetings like 'Marhaba' and 'Ya Khoy'."
},
"g1_hardware": {
"_comment": "G1 humanoid hardware constants — shared by every motion/voice module that talks to the arm.",
"num_motor": 29,
"enable_arm_sdk_index": 29,
"replay_hz": 60.0
},
"script_files": {
"_comment": "Filenames (under scripts/) used across voice + dashboard",
"persona": "sanad_script.txt",
"rules": "sanad_rule.txt",
"arm_phrases": "sanad_arm.txt"
},
"dashboard_defaults": {
"host": null,
"port": 8000,
"interface": "wlan0"
},
"audio_defaults": {
"_comment": "Host PulseAudio fallback only — the G1 deployment uses UDP multicast mic + AudioClient.PlayStream speaker (see SANAD_USE_G1_MIC in config.py LIVE_TUNE). Default here is the Jetson/G1 built-in platform-sound chip.",
"send_sample_rate": 16000,
"receive_sample_rate": 24000,
"chunk_size": 512,
"channels": 1,
"sink": "alsa_output.platform-sound.analog-stereo",
"source": "alsa_input.platform-sound.analog-stereo"
},
"dds": {
"network_interface_default": "eth0"
},
"camera": {
"_comment": "Recognition tab camera daemon (parent process reads this). width/height/fps/jpeg_quality + the reconnect knobs configure CameraDaemon. Frames are cached in memory and pushed to the Gemini child over its stdin (no file drop). send_hz/stale_ms are read by the Gemini child via SANAD_VISION_SEND_HZ / SANAD_VISION_STALE_MS env vars (LIVE_TUNE).",
"width": 424,
"height": 240,
"fps": 15,
"jpeg_quality": 70,
"send_hz": 2,
"stale_ms": 1500,
"stale_threshold_s": 10.0,
"reconnect_min_s": 2.0,
"reconnect_max_s": 10.0,
"capture_timeout_ms": 5000
},
"faces": {
"_comment": "Face gallery for Gemini-side recognition. Folder layout: data/faces/face_{id}/{face_1.jpg, ...} + optional meta.json {\"name\": \"...\"}. Gemini does the matching — no local ML model.",
"dir_rel": "data/faces",
"max_samples_per_face": 3,
"primer_resize_long_side": 256
}
}

View File

@ -0,0 +1,49 @@
{
"_description": "Tunables for dashboard/* modules. Loaded via core.config_loader.load('dashboard').",
"app": {
"_comment": "dashboard/app.py — FastAPI app",
"title": "Sanad Dashboard",
"version": "1.0.0",
"static_subdir": "dashboard/static"
},
"api_input": {
"_comment": "Shared by every route that accepts user text input / uploads. Single source of truth.",
"max_text_len": 2000,
"max_upload_bytes": 8388608
},
"voice_route": {
"_comment": "dashboard/routes/voice.py — reads max_text_len from api_input above",
"api_key_mask_visible": 4
},
"typed_replay_route": {
"_comment": "dashboard/routes/typed_replay.py — reads max_text_len from api_input above"
},
"records_route": {
"_comment": "dashboard/routes/records.py",
"index_filename": "records.json"
},
"prompt_route": {
"_comment": "dashboard/routes/prompt.py — script/rule filenames come from core.script_files; default prompt from core.gemini_defaults.default_system_prompt"
},
"logs_route": {
"_comment": "dashboard/routes/logs.py",
"default_tail_lines": 200,
"max_tail_lines": 5000
},
"scripts_route": {
"_comment": "dashboard/routes/scripts.py — max_script_bytes reads from api_input.max_upload_bytes"
},
"live_subprocess_route": {
"_comment": "dashboard/routes/live_subprocess.py",
"tail_default_lines": 100
}
}

35
vendor/Sanad/config/gemini_config.json vendored Normal file
View File

@ -0,0 +1,35 @@
{
"_description": "Tunables for gemini/* modules. Loaded via core.config_loader.load('gemini'). API credentials (api_key, model, voice_name) still live in core_config.json > gemini_defaults — single source of truth shared with config.py.",
"client": {
"_comment": "gemini/client.py — short-session WebSocket client used by dashboard /generate + typed replay. default_system_prompt comes from core.gemini_defaults.",
"recv_timeout_sec": 30,
"reconnect_max_attempts": 3,
"reconnect_initial_delay_sec": 1.0,
"reconnect_max_delay_sec": 10.0
},
"subprocess": {
"_comment": "gemini/subprocess.py — GeminiSubprocess supervisor. Spawns voice/sanad_voice.py as a child, tails stdout for Gemini-specific log markers, pushes camera frames + motion state to the child over its stdin, exposes transcript + state to the dashboard.",
"log_tail_size": 2000,
"transcript_tail_size": 30,
"log_name": "gemini_subprocess",
"stop_timeout_sec": 3.0,
"terminate_timeout_sec": 2.0,
"frame_forward_interval_sec": 0.5,
"noisy_prefixes": [
"ALSA lib ",
"Expression 'alsa_",
"Cannot connect to server socket",
"jack server is not running"
],
"noisy_fragments": [
"Unknown PCM",
"Evaluate error",
"snd_pcm_open_noupdate",
"PaAlsaStream",
"snd_config_evaluate",
"snd_func_refer"
]
}
}

92
vendor/Sanad/config/local_config.json vendored Normal file
View File

@ -0,0 +1,92 @@
{
"_description": "Tunables for local/* — fully on-device voice pipeline (Silero VAD → Whisper → Qwen via llama.cpp → CosyVoice2). Loaded via core.config_loader.load('local').",
"subprocess": {
"_comment": "local/subprocess.py — LocalSubprocess supervisor. Mirrors gemini/subprocess.py. IMPORTANT: python_bin points at the `local` conda env (Python 3.8 + Jetson CUDA torch) so CosyVoice+Whisper run with GPU, while the dashboard/Gemini stack stays in gemini_sdk (Python 3.10).",
"python_bin": "/home/unitree/miniconda3/envs/local/bin/python",
"log_tail_size": 2000,
"transcript_tail_size": 30,
"log_name": "local_subprocess",
"stop_timeout_sec": 5.0,
"terminate_timeout_sec": 3.0,
"noisy_prefixes": [
"ALSA lib ",
"Expression 'alsa_",
"Cannot connect to server socket",
"jack server is not running"
],
"noisy_fragments": [
"Unknown PCM",
"Evaluate error",
"snd_pcm_open_noupdate",
"PaAlsaStream"
]
},
"vad": {
"_comment": "Silero VAD — CPU. Emits speech_start / speech_end events.",
"sample_rate": 16000,
"frame_ms": 32,
"threshold": 0.55,
"min_silence_ms": 400,
"min_speech_ms": 250,
"pad_start_ms": 200,
"pad_end_ms": 200,
"device": "cpu"
},
"stt": {
"_comment": "faster-whisper Large V3 Turbo, INT8 on GPU.",
"model_name": "large-v3-turbo",
"model_subdir": "faster-whisper-large-v3-turbo",
"device": "cuda",
"compute_type": "int8_float16",
"beam_size": 1,
"language": null,
"vad_filter": false,
"no_speech_threshold": 0.6,
"min_utterance_chars": 2,
"temperature": 0.0
},
"llm": {
"_comment": "Qwen 2.5 Instruct via Ollama (default) OR self-managed llama.cpp. Set backend to pick.",
"backend": "ollama",
"_ollama_comment": "Ollama daemon — assumes `ollama serve` is running; `ollama pull qwen2.5:1.5b` to fetch.",
"ollama_host": "127.0.0.1",
"ollama_port": 11434,
"ollama_model": "qwen2.5:1.5b",
"ollama_keep_alive": "5m",
"_llamacpp_comment": "Self-managed llama-server subprocess. Only used when backend='llama_cpp'.",
"model_subdir": "qwen2.5-1.5b-instruct-q4_k_m.gguf",
"server_binary": "llama-server",
"host": "127.0.0.1",
"port": 8080,
"n_gpu_layers": 99,
"ctx_size": 2048,
"threads": 4,
"startup_timeout_sec": 30,
"_shared_comment": "Generation params — both backends.",
"request_timeout_sec": 30,
"max_tokens": 200,
"temperature": 0.7,
"top_p": 0.9,
"stop": ["<|im_end|>", "\n\n\n"],
"chunk_delimiters": ".,?!؟،",
"chunk_min_chars": 8
},
"tts": {
"_comment": "CosyVoice2 0.5B streaming — GPU. Uses a 3s reference WAV for voice cloning.",
"model_subdir": "CosyVoice2-0.5B",
"reference_wav_subdir": "khaleeji_reference_3s.wav",
"reference_prompt": "",
"stream_chunk_sec": 0.25,
"sample_rate": 16000,
"queue_max": 3,
"device": "cuda"
}
}

27
vendor/Sanad/config/mask_config.json vendored Normal file
View File

@ -0,0 +1,27 @@
{
"_comment": "Shining LED face mask (BLE). Driven by the FaceController subsystem (face/mask_face.py) which imports the standalone Mask project. Needs an env with bleak + Pillow (g1_env). Free the mask from the phone app before connecting.",
"mask_dir": "",
"_mask_dir": "Path to the Mask project (flat shiningmask lib). Empty -> auto: <Project>/Mask. Env override: SANAD_MASK_DIR.",
"name_prefix": "MASK",
"_name_prefix": "BLE scan prefix; the mask advertises e.g. 'MASK-02A711'. Env: SANAD_MASK_NAME_PREFIX.",
"address": "",
"_address": "Specific BLE MAC to connect to. Empty -> scan by name_prefix. Env: SANAD_MASK_ADDRESS.",
"adapter": "",
"_adapter": "BlueZ adapter (e.g. 'hci0'). Empty -> default. Env: SANAD_MASK_ADAPTER.",
"brightness": 95,
"_brightness": "0-128. Keep <=100 to avoid LED flicker (battery-limited).",
"fps": 8.0,
"_fps": "FaceAnimator (fallback driver) frame rate (PLAY commands/sec).",
"lifelike": true,
"_lifelike": "Use the LifelikeFace driver (face/face_motion.py): eye saccades, varied blinks, listening/thinking/speaking states, reactions, smooth lip-sync. false -> basic FaceAnimator.",
"autostart": true,
"_autostart": "Auto-connect + Start face on boot (best-effort, background — never blocks startup). After the one-time frame upload, later boots just connect + animate. false -> connect/start manually from the dashboard.",
"connect_timeout": 15.0,
"connect_attempts": 5,
"eye_color": [0, 230, 255],
"_eye_color": "Face eye/iris RGB (baked into the uploaded frames). Default cyan. Set via the dashboard 'Apply colors' (persisted here).",
"mouth_color": [255, 50, 50],
"_mouth_color": "Face mouth RGB. Default red.",
"sclera_color": [255, 255, 255],
"_sclera_color": "White-of-the-eye RGB. Default white."
}

70
vendor/Sanad/config/motion_config.json vendored Normal file
View File

@ -0,0 +1,70 @@
{
"_description": "Tunables for motion/* modules. Loaded via core.config_loader.load('motion').",
"arm_controller": {
"_comment": "motion/arm_controller.py — enable_arm_sdk_index + replay_hz come from core.g1_hardware",
"ramp_in_steps": 60,
"ramp_out_steps": 180,
"settle_hold_sec": 0.5,
"watchdog_timeout_sec": 0.25,
"watchdog_disable_after_sec": 1.0,
"arm_indices_start": 15,
"arm_indices_stop": 29,
"jsonl_id_start": 100
},
"loco_controller": {
"_comment": "G1_Controller/loco_controller.py — manual locomotion. NIC is shared from the arm's DDS init (config core.dds / SANAD_DDS_INTERFACE), not set here.",
"cap_walk": 0.6,
"cap_run": 1.2,
"lin_step": 0.05,
"ang_step": 0.2,
"watchdog_timeout_sec": 0.5,
"arm_block_window_sec": 1.5,
"step_duration_sec": 0.6,
"step_speed_frac": 0.5,
"loco_timeout_sec": 10.0,
"msc_timeout_sec": 5.0
},
"macro_player": {
"_comment": "motion/macro_player.py — JSONL playback",
"ramp_in_steps": 60,
"ramp_out_steps": 60,
"watchdog_disable_after_sec": 1.0
},
"macro_recorder": {
"_comment": "motion/macro_recorder.py — record arm trajectories",
"sample_rate_hz": 60.0,
"smoothing_window": 5
},
"teaching": {
"_comment": "motion/teaching.py — teach-by-demo",
"safe_hold_sec": 3.0,
"waist_kp": 60.0,
"waist_kd": 4.0,
"hold_arm_kp": 60.0,
"hold_arm_kd": 4.0,
"teach_arm_kp": 0.0,
"teach_arm_kd": 2.0
},
"sanad_arm_controller": {
"_comment": "motion/sanad_arm_controller.py — g1_num_motor + enable_arm_sdk_index + replay_hz come from core.g1_hardware",
"action_cooldown_sec": 1.0,
"stability_threshold": 0.06,
"gains": {
"kp_high": 300.0,
"kd_high": 3.0,
"kp_low": 80.0,
"kd_low": 3.0,
"kp_wrist": 40.0,
"kd_wrist": 1.5
},
"weak_motors": [4, 10, 15, 16, 17, 18, 22, 23, 24, 25],
"wrist_motors": [19, 20, 21, 26, 27, 28],
"data_subdir": "DataG1"
}
}

75
vendor/Sanad/config/voice_config.json vendored Normal file
View File

@ -0,0 +1,75 @@
{
"_description": "Tunables for voice/* modules. Loaded via core.config_loader.load('voice').",
"sanad_voice": {
"_comment": "voice/sanad_voice.py — main live voice subprocess. Gemini API credentials (api_key, model, voice_name) come from core_config.json's gemini_defaults — single source of truth.",
"mic_gain": 1.0,
"play_chunk_bytes": 96000,
"log_dir": "~/logs",
"log_name": "gemini_live_v2",
"session_timeout_sec": 660,
"max_reconnect_delay_sec": 30,
"max_consecutive_errors": 10,
"no_messages_timeout_sec": 30
},
"mic_udp": {
"_comment": "G1 built-in mic — UDP multicast subscriber",
"group": "239.168.123.161",
"port": 5555,
"buffer_max_bytes": 64000,
"read_timeout_sec": 0.04,
"socket_timeout_sec": 1.0
},
"speaker": {
"_comment": "G1 built-in speaker — AudioClient.PlayStream wrapper",
"app_name": "sanad",
"begin_stream_pause_sec": 0.15,
"wait_finish_margin_sec": 0.3
},
"vad": {
"_comment": "Gemini Live server-side voice-activity-detection config",
"start_sensitivity": "START_SENSITIVITY_HIGH",
"end_sensitivity": "END_SENSITIVITY_LOW",
"prefix_padding_ms": 20,
"silence_duration_ms": 200
},
"barge_in": {
"threshold": 500,
"loud_chunks_needed": 3,
"cooldown_sec": 0.3,
"echo_suppress_below": 500,
"ai_speak_grace_sec": 0.15
},
"recording": {
"enabled": true,
"dir_relative": "data/recordings"
},
"typed_replay": {
"_comment": "voice/typed_replay.py — max_text_len comes from dashboard.api_input",
"monitor_chunk_size": 512,
"monitor_tail_sec": 0.2
},
"live_voice_loop": {
"_comment": "voice/live_voice_loop.py — arm phrase dispatcher. arm_txt filename comes from core.script_files.arm_phrases",
"trigger_log_size": 100,
"poll_interval_sec": 0.1,
"deferred_default": false,
"trigger_enabled_default": false
},
"local_tts": {
"_comment": "voice/local_tts.py — offline Coqui TTS",
"model_subdir": "speecht5_tts_clartts_ar",
"vocoder_subdir": "speecht5_hifigan",
"xvector_filename": "arabic_xvector_embedding.pt",
"sample_rate": 16000,
"channels": 1
}
}

0
vendor/Sanad/core/__init__.py vendored Normal file
View File

33
vendor/Sanad/core/asyncio_compat.py vendored Normal file
View File

@ -0,0 +1,33 @@
"""asyncio compatibility shim for Python 3.8.
`asyncio.to_thread` only exists from Python 3.9. The Jetson runs 3.8, so we
backfill it via run_in_executor on the default thread pool.
Usage:
from Project.Sanad.core.asyncio_compat import to_thread
result = await to_thread(blocking_fn, arg1, arg2, kw=val)
"""
from __future__ import annotations
import asyncio
import functools
import sys
from typing import Any, Callable, TypeVar
_T = TypeVar("_T")
if sys.version_info >= (3, 9):
# Native implementation
to_thread = asyncio.to_thread # type: ignore[attr-defined]
else:
async def to_thread(func: Callable[..., _T], /, *args: Any, **kwargs: Any) -> _T:
"""Backport of asyncio.to_thread for Python 3.8."""
loop = asyncio.get_event_loop()
ctx = functools.partial(func, *args, **kwargs)
return await loop.run_in_executor(None, ctx)
# Also patch the asyncio module so existing `asyncio.to_thread` calls work
# without rewriting every consumer file. Done lazily — only if missing.
if not hasattr(asyncio, "to_thread"):
asyncio.to_thread = to_thread # type: ignore[attr-defined]

272
vendor/Sanad/core/brain.py vendored Normal file
View File

@ -0,0 +1,272 @@
"""The Brain — central orchestrator for the Sanad robot assistant.
Responsibilities:
1. Owns the SkillRegistry, resolves callbacks at runtime.
2. Coordinates voice motion vision pipelines.
3. Executes skills (audio + motion + callback) with configurable sync modes.
4. Exposes a thread-safe API consumed by the FastAPI dashboard.
"""
from __future__ import annotations
import asyncio
import importlib
import time
from pathlib import Path
from typing import Any, Callable
from Project.Sanad.config import (
AUDIO_RECORDINGS_DIR,
MOTIONS_DIR,
MOTION_RECORDINGS_DIR,
)
from Project.Sanad.core.event_bus import bus
from Project.Sanad.core.logger import get_logger
from Project.Sanad.core.skill_registry import Skill, SkillRegistry
log = get_logger("brain")
# Whitelist of module path prefixes allowed for skill callbacks.
# Prevents arbitrary code execution via dashboard-editable skills.json.
from Project.Sanad.core.config_loader import section as _cfg_section
_BRAIN_CFG = _cfg_section("core", "brain")
ALLOWED_CALLBACK_PREFIXES = tuple(_BRAIN_CFG.get("allowed_callback_prefixes", [
"Project.Sanad.motion.",
"Project.Sanad.voice.",
"motion.",
"voice.",
]))
class Brain:
"""Singleton-style manager that bridges all subsystems."""
def __init__(self):
self.registry = SkillRegistry()
self._lock = asyncio.Lock()
# Sub-modules are injected after construction so imports stay lazy.
self._voice = None # gemini.client.GeminiVoiceClient
self._audio_mgr = None # voice.audio_manager.AudioManager
self._arm = None # motion.arm_controller.ArmController
self._macro_rec = None # motion.macro_recorder.MacroRecorder
self._macro_play = None # motion.macro_player.MacroPlayer
self._live_voice = None # voice.live_voice_loop.LiveVoiceLoop
self.gestural_speaking = False # toggle: move while Gemini speaks
self._running_skill: str | None = None
# -- dependency injection --
def attach_voice(self, client):
self._voice = client
log.info("Voice client attached")
def attach_audio_manager(self, mgr):
self._audio_mgr = mgr
log.info("Audio manager attached")
def attach_arm(self, arm):
self._arm = arm
log.info("Arm controller attached")
def attach_macro_recorder(self, rec):
self._macro_rec = rec
def attach_macro_player(self, player):
self._macro_play = player
def attach_live_voice(self, lv):
self._live_voice = lv
log.info("LiveVoiceLoop attached")
# -- callback resolution --
def _resolve_callback(self, callback_str: str) -> Callable | None:
"""Resolve 'module.submodule:function_name' → callable.
SECURITY: only modules under ALLOWED_CALLBACK_PREFIXES may be imported.
Skill JSON is dashboard-editable and otherwise an arbitrary-import RCE.
Examples:
"Project.Sanad.motion.arm_controller:wave_hand"
"motion.arm_controller:wave_hand"
"""
if not callback_str:
return None
if ":" not in callback_str:
log.error("Invalid callback (missing ':'): %s", callback_str)
return None
module_path, func_name = callback_str.rsplit(":", 1)
if not any(module_path.startswith(prefix) or module_path == prefix.rstrip(".")
for prefix in ALLOWED_CALLBACK_PREFIXES):
log.error(
"Callback %s rejected — module '%s' not in whitelist",
callback_str, module_path,
)
return None
try:
mod = importlib.import_module(module_path)
return getattr(mod, func_name)
except Exception:
log.exception("Cannot resolve callback '%s'", callback_str)
return None
# -- skill execution --
async def execute_skill(self, skill_id: str) -> dict[str, Any]:
"""Run a skill: play audio + execute motion + fire callback."""
skill = self.registry.get(skill_id)
if skill is None:
raise KeyError(f"Skill not found: {skill_id}")
if not skill.enabled:
raise RuntimeError(f"Skill '{skill_id}' is disabled.")
async with self._lock:
if self._running_skill:
raise RuntimeError(f"Skill '{self._running_skill}' is already running.")
self._running_skill = skill_id
t0 = time.monotonic()
result: dict[str, Any] = {"skill_id": skill_id, "ok": True}
try:
await bus.emit("skill.started", skill_id=skill_id)
# Validate required attachments before partial execution
if skill.audio_file and self._audio_mgr is None:
raise RuntimeError("AudioManager not attached but skill requires audio")
if skill.motion_file and self._arm is None:
raise RuntimeError("ArmController not attached but skill requires motion")
if skill.sync_mode == "parallel":
await self._exec_parallel(skill, result)
elif skill.sync_mode == "audio_first":
await self._exec_audio_first(skill, result)
elif skill.sync_mode == "motion_first":
await self._exec_motion_first(skill, result)
else:
await self._exec_parallel(skill, result)
# Fire callback — run blocking callbacks in a thread to avoid stalling the loop
cb = self._resolve_callback(skill.callback)
if cb is not None:
if asyncio.iscoroutinefunction(cb):
cb_result = await cb()
else:
cb_result = await asyncio.to_thread(cb)
result["callback_result"] = str(cb_result) if cb_result else "ok"
except Exception as exc:
result["ok"] = False
result["error"] = str(exc)
log.exception("Skill %s failed", skill_id)
finally:
elapsed = time.monotonic() - t0
result["elapsed_sec"] = round(elapsed, 3)
async with self._lock:
self._running_skill = None
await bus.emit("skill.finished", skill_id=skill_id, result=result)
return result
async def cancel_skill(self) -> dict[str, Any]:
"""Cancel any running skill — sends cancel to arm controller."""
cancelled = self._running_skill
if self._arm is not None and hasattr(self._arm, "cancel"):
try:
self._arm.cancel()
except Exception:
log.exception("arm.cancel() failed")
if self._audio_mgr is not None and hasattr(self._audio_mgr, "stop_playback"):
try:
self._audio_mgr.stop_playback()
except Exception:
pass
return {"cancelled": cancelled}
async def _exec_parallel(self, skill: Skill, result: dict):
tasks = []
if skill.audio_file:
tasks.append(asyncio.create_task(self._play_audio(skill.audio_file, result)))
if skill.motion_file:
tasks.append(asyncio.create_task(self._play_motion(skill.motion_file, result)))
if tasks:
await asyncio.gather(*tasks)
async def _exec_audio_first(self, skill: Skill, result: dict):
if skill.audio_file:
await self._play_audio(skill.audio_file, result)
if skill.motion_file:
await self._play_motion(skill.motion_file, result)
async def _exec_motion_first(self, skill: Skill, result: dict):
if skill.motion_file:
await self._play_motion(skill.motion_file, result)
if skill.audio_file:
await self._play_audio(skill.audio_file, result)
async def _play_audio(self, audio_file: str, result: dict):
path = Path(audio_file)
if not path.is_absolute():
path = AUDIO_RECORDINGS_DIR / path
if not path.exists():
result["audio_error"] = f"File not found: {path}"
log.warning("Audio file missing: %s", path)
return
if self._audio_mgr is not None:
await asyncio.to_thread(self._audio_mgr.play_wav, path)
result["audio_played"] = str(path)
else:
result["audio_error"] = "AudioManager not attached"
async def _play_motion(self, motion_file: str, result: dict):
path = Path(motion_file)
if not path.is_absolute():
path = MOTIONS_DIR / path
if not path.exists():
result["motion_error"] = f"File not found: {path}"
log.warning("Motion file missing: %s", path)
return
if self._arm is not None:
await asyncio.to_thread(self._arm.replay_file, str(path))
result["motion_played"] = str(path)
else:
result["motion_error"] = "ArmController not attached"
# -- macro recording --
async def start_macro_recording(self, name: str) -> dict[str, Any]:
if self._macro_rec is None:
raise RuntimeError("MacroRecorder not attached.")
return await asyncio.to_thread(self._macro_rec.start, name)
async def stop_macro_recording(self) -> dict[str, Any]:
if self._macro_rec is None:
raise RuntimeError("MacroRecorder not attached.")
return await asyncio.to_thread(self._macro_rec.stop)
async def play_macro(self, name: str) -> dict[str, Any]:
if self._macro_play is None:
raise RuntimeError("MacroPlayer not attached.")
return await asyncio.to_thread(self._macro_play.play, name)
# -- gestural speaking toggle --
def set_gestural_speaking(self, enabled: bool):
self.gestural_speaking = enabled
bus.emit_sync("brain.gestural_speaking_changed", enabled=enabled)
log.info("Gestural speaking: %s", "ON" if enabled else "OFF")
# -- status --
def status(self) -> dict[str, Any]:
return {
"voice_attached": self._voice is not None,
"arm_attached": self._arm is not None,
"audio_manager_attached": self._audio_mgr is not None,
"live_voice_attached": self._live_voice is not None,
"gestural_speaking": self.gestural_speaking,
"running_skill": self._running_skill,
"total_skills": len(self.registry.list_skills()),
}

124
vendor/Sanad/core/config_loader.py vendored Normal file
View File

@ -0,0 +1,124 @@
"""Single-source config loader for all Sanad subsystems.
Each subsystem (core, voice, motion, dashboard) has its own JSON file at
`config/<subsystem>_config.json`. This module loads them on demand, caches
the result, and exposes helpers for pulling nested sections.
Usage:
from Project.Sanad.core.config_loader import load, get
cfg = load("voice") # full voice config dict
threshold = get("voice", "barge_in.threshold", 500)
rates = get("voice", "sanad_voice", {}) # whole section
Why JSON (not TOML/YAML): standard library only, editable in any text
editor, commented via "_comment" keys. No third-party dep.
"""
from __future__ import annotations
import json
import threading
from pathlib import Path
from typing import Any
from Project.Sanad.core.logger import get_logger
log = get_logger("config_loader")
# Resolved at first-load time (avoids circular import with config.py)
_BASE_DIR: Path | None = None
_CONFIG_DIR: Path | None = None
_CACHE: dict[str, dict[str, Any]] = {}
_LOCK = threading.Lock()
def _resolve_dirs() -> tuple[Path, Path]:
"""Find Sanad's root and config/ directory (lazy + cached)."""
global _BASE_DIR, _CONFIG_DIR
if _BASE_DIR is not None and _CONFIG_DIR is not None:
return _BASE_DIR, _CONFIG_DIR
here = Path(__file__).resolve().parent # Sanad/core
base = here.parent # Sanad/
_BASE_DIR = base
_CONFIG_DIR = base / "config"
return _BASE_DIR, _CONFIG_DIR
def _strip_comments(d: Any) -> Any:
"""Remove top-level "_comment"/"_description" keys — noise for callers."""
if isinstance(d, dict):
return {
k: _strip_comments(v) for k, v in d.items()
if not (isinstance(k, str) and k.startswith("_"))
}
if isinstance(d, list):
return [_strip_comments(x) for x in d]
return d
def load(subsystem: str) -> dict[str, Any]:
"""Load + cache config/<subsystem>_config.json.
Returns a dict with all leading-underscore keys stripped. Missing
file returns an empty dict (callers supply their own defaults via
`get(..., default)`).
"""
with _LOCK:
if subsystem in _CACHE:
return _CACHE[subsystem]
_, cfg_dir = _resolve_dirs()
path = cfg_dir / f"{subsystem}_config.json"
if not path.exists():
log.warning("config file missing: %s — using empty dict", path)
_CACHE[subsystem] = {}
return _CACHE[subsystem]
try:
raw = json.loads(path.read_text(encoding="utf-8"))
except json.JSONDecodeError as exc:
log.error("config file %s unreadable: %s", path, exc)
_CACHE[subsystem] = {}
return _CACHE[subsystem]
cleaned = _strip_comments(raw)
_CACHE[subsystem] = cleaned
return cleaned
def get(subsystem: str, dotted_key: str, default: Any = None) -> Any:
"""Fetch a nested key. Supports dotted-paths: 'barge_in.threshold'."""
cfg = load(subsystem)
parts = dotted_key.split(".")
cur: Any = cfg
for p in parts:
if not isinstance(cur, dict) or p not in cur:
return default
cur = cur[p]
return cur
def section(subsystem: str, name: str) -> dict[str, Any]:
"""Convenience — load one top-level section, always returning a dict.
Example: `section("voice", "sanad_voice")` dict of that section.
"""
s = get(subsystem, name, {})
return s if isinstance(s, dict) else {}
def reload(subsystem: str | None = None) -> None:
"""Drop cached config so next load() re-reads from disk."""
with _LOCK:
if subsystem is None:
_CACHE.clear()
else:
_CACHE.pop(subsystem, None)
def config_dir() -> Path:
"""Absolute path to Sanad/config/."""
_, d = _resolve_dirs()
return d

91
vendor/Sanad/core/event_bus.py vendored Normal file
View File

@ -0,0 +1,91 @@
"""Lightweight in-process event bus for inter-module communication.
Usage:
from core.event_bus import bus
# Subscribe
bus.on("voice.user_said", my_handler) # sync or async callable
bus.on("motion.action_done", other_handler)
# Publish
await bus.emit("voice.user_said", text="hello")
"""
from __future__ import annotations
import asyncio
import threading
from collections import defaultdict
from typing import Any, Callable
from Project.Sanad.core.logger import get_logger
log = get_logger("event_bus", to_console=False)
class EventBus:
def __init__(self):
self._lock = threading.Lock()
self._listeners: dict[str, list[Callable]] = defaultdict(list)
def on(self, event: str, callback: Callable):
with self._lock:
self._listeners[event].append(callback)
log.debug("Subscribed %s%s", event, callback.__qualname__)
def off(self, event: str, callback: Callable):
with self._lock:
try:
self._listeners[event].remove(callback)
except ValueError:
pass
async def emit(self, event: str, **kwargs: Any):
with self._lock:
handlers = list(self._listeners.get(event, []))
for handler in handlers:
try:
result = handler(**kwargs)
if asyncio.iscoroutine(result):
await result
except Exception:
log.exception("Handler %s for event '%s' failed", handler.__qualname__, event)
def emit_sync(self, event: str, **kwargs: Any):
"""Fire-and-forget from a sync context.
Async handlers are scheduled on the running event loop if one exists.
Otherwise they are dropped with a warning (the original silent-no-op
bug at least now it's logged).
"""
with self._lock:
handlers = list(self._listeners.get(event, []))
for handler in handlers:
try:
if asyncio.iscoroutinefunction(handler):
try:
loop = asyncio.get_running_loop()
loop.create_task(handler(**kwargs))
except RuntimeError:
log.warning(
"Async handler %s for '%s' dropped — no running loop",
handler.__qualname__, event,
)
continue
result = handler(**kwargs)
if asyncio.iscoroutine(result):
# Sync handler returned a coroutine — schedule it
try:
loop = asyncio.get_running_loop()
loop.create_task(result)
except RuntimeError:
result.close()
log.warning(
"Coroutine result from %s for '%s' dropped — no running loop",
handler.__qualname__, event,
)
except Exception:
log.exception("Handler %s for event '%s' failed", handler.__qualname__, event)
bus = EventBus()

67
vendor/Sanad/core/logger.py vendored Normal file
View File

@ -0,0 +1,67 @@
"""Unified logging with RotatingFileHandler for all Sanad modules."""
from __future__ import annotations
import logging
import sys
from logging.handlers import RotatingFileHandler
from pathlib import Path
from Project.Sanad.config import LOGS_DIR
_MAX_BYTES = 10 * 1024 * 1024 # 10 MB
_BACKUP_COUNT = 3
_FMT = "%(asctime)s [%(name)s] %(levelname)s %(message)s"
_formatter = logging.Formatter(_FMT)
# Callback for the WebSocket log stream — set by log_stream.py at import time.
_ws_push_fn = None
def set_ws_push(fn):
"""Register the push function from dashboard.websockets.log_stream."""
global _ws_push_fn
_ws_push_fn = fn
class _WSHandler(logging.Handler):
"""Forwards every log record to the WebSocket log stream."""
def emit(self, record: logging.LogRecord):
if _ws_push_fn is not None:
try:
_ws_push_fn(self.format(record))
except Exception:
pass
def get_logger(name: str, *, to_console: bool = True) -> logging.Logger:
"""Return a module-level logger that writes to logs/<name>.log (rotating)."""
logger = logging.getLogger(f"sanad.{name}")
if logger.handlers:
return logger
logger.setLevel(logging.DEBUG)
logger.propagate = False
LOGS_DIR.mkdir(parents=True, exist_ok=True)
fh = RotatingFileHandler(
LOGS_DIR / f"{name}.log", maxBytes=_MAX_BYTES, backupCount=_BACKUP_COUNT
)
fh.setFormatter(_formatter)
fh.setLevel(logging.DEBUG)
logger.addHandler(fh)
if to_console:
sh = logging.StreamHandler(sys.stdout)
sh.setFormatter(_formatter)
sh.setLevel(logging.INFO)
logger.addHandler(sh)
# WebSocket stream handler
wsh = _WSHandler()
wsh.setFormatter(_formatter)
wsh.setLevel(logging.INFO)
logger.addHandler(wsh)
return logger

71
vendor/Sanad/core/persona.py vendored Normal file
View File

@ -0,0 +1,71 @@
"""Active-persona selection — which script file Gemini loads as its system
prompt.
The operator can keep several persona variants in scripts/ (e.g.
``sanad_script.txt``, ``sanad_script_v1.txt``, ``sanad_script_v2.txt``) and pick
which one is live. The selection is a single basename stored in
``data/active_persona.txt``; the DEFAULT (and reset target) is always the
configured persona (``sanad_script.txt``). The Gemini child resolves this at
session start, so a new selection takes effect on the next voice (re)connect.
A missing/blank/stale pointer transparently falls back to the default, so this
can never break the voice worst case it loads ``sanad_script.txt``.
"""
from __future__ import annotations
from pathlib import Path
from Project.Sanad.config import DATA_DIR, SCRIPTS_DIR
ACTIVE_PERSONA_FILE = DATA_DIR / "active_persona.txt"
def default_persona_name() -> str:
"""The configured default persona filename (core.script_files.persona)."""
try:
from Project.Sanad.core.config_loader import section as _section
name = (_section("core", "script_files") or {}).get("persona")
return (name or "sanad_script.txt").strip() or "sanad_script.txt"
except Exception:
return "sanad_script.txt"
def active_persona_name() -> str:
"""Selected persona basename — the chosen variant if set AND still exists,
otherwise the default. Never raises."""
default = default_persona_name()
try:
sel = ACTIVE_PERSONA_FILE.read_text(encoding="utf-8").strip()
except Exception:
sel = ""
if sel:
cand = SCRIPTS_DIR / Path(sel).name # basename only — no traversal
if cand.is_file():
return cand.name
return default
def active_persona_path() -> Path:
"""Full path to the persona script Gemini should load right now."""
return SCRIPTS_DIR / active_persona_name()
def set_active_persona(name: str | None) -> str:
"""Persist the selected persona basename. Passing None/"" or the default
name clears the pointer (revert to default). Returns the effective active
name. Raises FileNotFoundError if a non-default name doesn't exist."""
nm = (Path(str(name)).name if name else "").strip()
default = default_persona_name()
if not nm or nm == default:
try:
ACTIVE_PERSONA_FILE.unlink()
except FileNotFoundError:
pass
except Exception:
pass
return default
if not (SCRIPTS_DIR / nm).is_file():
raise FileNotFoundError(nm)
DATA_DIR.mkdir(parents=True, exist_ok=True)
ACTIVE_PERSONA_FILE.write_text(nm, encoding="utf-8")
return nm

175
vendor/Sanad/core/skill_registry.py vendored Normal file
View File

@ -0,0 +1,175 @@
"""Skill Registry — maps audio files to motion commands and callback functions.
A "skill" is a named unit that ties together:
- An audio clip (e.g. recordings/audio/intro.wav)
- A motion file (e.g. data/motions/wave.jsonl) optional
- A callback (e.g. "motion.wave_hand") resolved at runtime
The registry is persisted in data/skills.json and can be edited via the
dashboard or programmatically through the Brain.
Skill entry schema:
{
"id": "intro_greeting",
"audio_file": "recordings/audio/intro.wav",
"motion_file": "data/motions/right_hand_up.jsonl",
"callback": "motion.trigger:wave_hand",
"sync_mode": "parallel", # parallel | audio_first | motion_first
"enabled": true,
"description": "Wave hand while playing intro audio"
}
"""
from __future__ import annotations
import json
import os
import tempfile
import threading
import uuid
from dataclasses import dataclass, field, asdict
from pathlib import Path
from typing import Any
from Project.Sanad.config import SKILLS_FILE
from Project.Sanad.core.logger import get_logger
log = get_logger("skill_registry")
@dataclass
class Skill:
id: str
audio_file: str = ""
motion_file: str = ""
callback: str = ""
sync_mode: str = "parallel"
enabled: bool = True
description: str = ""
meta: dict[str, Any] = field(default_factory=dict)
def to_dict(self) -> dict[str, Any]:
return asdict(self)
@classmethod
def from_dict(cls, data: dict[str, Any]) -> Skill:
known = {f.name for f in cls.__dataclass_fields__.values()}
filtered = {k: v for k, v in data.items() if k in known}
return cls(**filtered)
class SkillRegistry:
"""Thread-safe, JSON-backed registry of skills."""
def __init__(self, path: Path = SKILLS_FILE):
self._path = path
self._lock = threading.Lock()
self._skills: dict[str, Skill] = {}
self._load()
# -- persistence --
def _load(self):
if not self._path.exists():
self._skills = {}
return
try:
with open(self._path, "r", encoding="utf-8") as f:
payload = json.load(f)
for entry in payload.get("skills", []):
skill = Skill.from_dict(entry)
self._skills[skill.id] = skill
log.info("Loaded %d skills from %s", len(self._skills), self._path)
except Exception as exc:
log.warning("Could not load skills: %s", exc)
# Backup corrupt file rather than silently nuking
try:
self._path.rename(self._path.with_suffix(".json.corrupt"))
log.warning("Backed up corrupt skills to %s.corrupt", self._path)
except OSError:
pass
self._skills = {}
_VALID_SYNC_MODES = {"parallel", "audio_first", "motion_first"}
def _save(self):
self._path.parent.mkdir(parents=True, exist_ok=True)
payload = {
"version": 1,
"total": len(self._skills),
"skills": [s.to_dict() for s in self._skills.values()],
}
# Atomic write: tempfile + os.replace
fd, tmp = tempfile.mkstemp(
prefix=f".{self._path.name}.", suffix=".tmp",
dir=str(self._path.parent),
)
try:
with os.fdopen(fd, "w", encoding="utf-8") as f:
json.dump(payload, f, ensure_ascii=False, indent=2)
os.replace(tmp, self._path)
except Exception:
try:
os.unlink(tmp)
except OSError:
pass
raise
# -- CRUD --
def list_skills(self) -> list[dict[str, Any]]:
with self._lock:
return [s.to_dict() for s in self._skills.values()]
def get(self, skill_id: str) -> Skill | None:
with self._lock:
return self._skills.get(skill_id)
def add(self, skill: Skill) -> Skill:
if skill.sync_mode not in self._VALID_SYNC_MODES:
raise ValueError(
f"Invalid sync_mode '{skill.sync_mode}' (allowed: {sorted(self._VALID_SYNC_MODES)})"
)
with self._lock:
if not skill.id:
skill.id = uuid.uuid4().hex[:12]
elif skill.id in self._skills:
raise ValueError(f"Skill id already exists: {skill.id}")
self._skills[skill.id] = skill
self._save()
log.info("Added skill %s (%s)", skill.id, skill.description)
return skill
def update(self, skill_id: str, updates: dict[str, Any]) -> Skill | None:
with self._lock:
existing = self._skills.get(skill_id)
if existing is None:
return None
if "sync_mode" in updates and updates["sync_mode"] not in self._VALID_SYNC_MODES:
raise ValueError(
f"Invalid sync_mode '{updates['sync_mode']}'"
)
for key, value in updates.items():
if hasattr(existing, key) and key != "id":
setattr(existing, key, value)
self._save()
log.info("Updated skill %s", skill_id)
return existing
def delete(self, skill_id: str) -> dict[str, Any] | None:
with self._lock:
skill = self._skills.pop(skill_id, None)
if skill is None:
return None
self._save()
log.info("Deleted skill %s", skill_id)
return skill.to_dict()
def find_by_audio(self, audio_file: str) -> list[Skill]:
"""Find all skills linked to a given audio file."""
with self._lock:
return [s for s in self._skills.values() if s.audio_file == audio_file and s.enabled]
def find_by_callback(self, callback: str) -> list[Skill]:
with self._lock:
return [s for s in self._skills.values() if s.callback == callback and s.enabled]

0
vendor/Sanad/dashboard/__init__.py vendored Normal file
View File

143
vendor/Sanad/dashboard/app.py vendored Normal file
View File

@ -0,0 +1,143 @@
"""FastAPI application — Sanad Dashboard.
Each route module is imported INDIVIDUALLY inside try/except so that one
broken router (missing dep, syntax error in a sibling) cannot break the
entire dashboard. Failed routers are logged and the server starts without
them.
"""
from __future__ import annotations
import importlib
import logging
# Backfill asyncio.to_thread on Python 3.8 — must run before any router import.
from Project.Sanad.core import asyncio_compat # noqa: F401
from fastapi import FastAPI
from fastapi.staticfiles import StaticFiles
from Project.Sanad.config import BASE_DIR
from Project.Sanad.core.logger import get_logger
log = get_logger("dashboard.app")
from Project.Sanad.core.config_loader import section as _cfg_section
_APP_CFG = _cfg_section("dashboard", "app")
app = FastAPI(
title=_APP_CFG.get("title", "Sanad Dashboard"),
version=_APP_CFG.get("version", "1.0.0"),
)
# -- isolated route registration --
_REST_ROUTES: list[tuple[str, str, str]] = [
# (module_name, prefix, tag)
("health", "/api", "health"),
("system", "/api/system", "system"),
("voice", "/api/voice", "voice"),
("motion", "/api/motion", "motion"),
("skills", "/api/skills", "skills"),
("macros", "/api/macros", "macros"),
("logs", "/api/logs", "logs"),
("replay", "/api/replay", "replay"),
("audio_control", "/api/audio", "audio"),
("scripts", "/api/scripts", "scripts"),
("records", "/api/records", "records"),
("prompt", "/api/prompt", "prompt"),
("wake_phrases", "/api/wake-phrases", "wake-phrases"),
("live_voice", "/api/live-voice", "live-voice"),
("live_subprocess", "/api/live-subprocess", "live-subprocess"),
("typed_replay", "/api/typed-replay", "typed-replay"),
("recognition", "/api/recognition", "recognition"),
("zones", "/api/zones", "zones"),
("temp_monitor", "/api/temp", "temperature"),
("controller", "/api/controller", "controller"),
("mask", "/api/mask", "mask"),
("mask_social", "/api/mask", "mask-social"),
("navigation", "/api/nav", "navigation"),
]
_WS_ROUTES: list[str] = ["log_stream", "motor_temps", "terminal"]
_loaded_routes: list[str] = []
_failed_routes: dict[str, str] = {}
def _register_router(module_name: str, prefix: str | None = None, tag: str | None = None,
package: str = "Project.Sanad.dashboard.routes"):
"""Import + register one router. Failures are logged, never raised."""
full_name = f"{package}.{module_name}"
try:
mod = importlib.import_module(full_name)
if not hasattr(mod, "router"):
raise AttributeError(f"{full_name} has no 'router' attribute")
kwargs: dict = {}
if prefix is not None:
kwargs["prefix"] = prefix
if tag is not None:
kwargs["tags"] = [tag]
app.include_router(mod.router, **kwargs)
_loaded_routes.append(module_name)
log.info("Registered router: %s", module_name)
except Exception as exc:
_failed_routes[module_name] = str(exc)
log.exception("Failed to register router %s — skipping", module_name)
# REST routes
for mod_name, prefix, tag in _REST_ROUTES:
_register_router(mod_name, prefix=prefix, tag=tag)
# WebSocket routes
for mod_name in _WS_ROUTES:
_register_router(
mod_name,
package="Project.Sanad.dashboard.websockets",
tag="websocket",
)
# -- Static files (dashboard UI) — best effort --
STATIC_DIR = BASE_DIR / _APP_CFG.get("static_subdir", "dashboard/static")
try:
STATIC_DIR.mkdir(parents=True, exist_ok=True)
app.mount("/static", StaticFiles(directory=str(STATIC_DIR)), name="static")
log.info("Static dir mounted: %s", STATIC_DIR)
except Exception:
log.exception("Could not mount static dir %s — serving without it", STATIC_DIR)
@app.get("/")
async def root():
"""Serve the dashboard SPA."""
index = STATIC_DIR / "index.html"
if index.exists():
from fastapi.responses import HTMLResponse
try:
# no-store so the browser always re-fetches the dashboard HTML/JS
# after a deploy — otherwise stale cached JS keeps calling old
# endpoints (e.g. /nav/* instead of /api/nav/*) and 404s.
return HTMLResponse(
index.read_text(encoding="utf-8"),
headers={"Cache-Control": "no-store, must-revalidate"},
)
except OSError as exc:
return {"error": f"Could not read index.html: {exc}"}
return {
"message": "Sanad Dashboard — index.html not found",
"loaded_routes": _loaded_routes,
"failed_routes": _failed_routes,
}
@app.get("/api/_dashboard_status")
async def dashboard_load_status():
"""Diagnostic — which routers loaded, which failed."""
return {
"loaded": _loaded_routes,
"failed": _failed_routes,
"total_loaded": len(_loaded_routes),
"total_failed": len(_failed_routes),
}

View File

View File

@ -0,0 +1,66 @@
"""In-process arbitration between Nav2 (web_nav3) and the manual LocoController.
Both stacks can drive the G1's legs via different command paths:
- Nav2 (web_nav3) publishes cmd_vel from a navigation goal/mission.
- LocoController issues LocoClient.Move()/step() from the Controller tab and
Gemini movement dispatch.
The documented hazard is "two stacks must never both drive the legs at once".
This module is a tiny thread-safe gate that lets ONE commander own the legs at a
time. controller.py sets loco_active for arm/move/step and refuses when nav is
active; navigation.py sets nav_active for goto/missions/run and refuses when loco
is active. The E-STOP / cancel paths clear the relevant flag.
Pure in-process state (no DDS, no HTTP) both routers share this single module
instance, so the flags are coherent across the dashboard process.
"""
from __future__ import annotations
import threading
_lock = threading.Lock()
_loco_active = False
_nav_active = False
def loco_active() -> bool:
with _lock:
return _loco_active
def nav_active() -> bool:
with _lock:
return _nav_active
def acquire_loco() -> bool:
"""Claim the legs for manual loco. Returns False if Nav2 holds them."""
global _loco_active
with _lock:
if _nav_active:
return False
_loco_active = True
return True
def release_loco() -> None:
global _loco_active
with _lock:
_loco_active = False
def acquire_nav() -> bool:
"""Claim the legs for Nav2. Returns False if manual loco holds them."""
global _nav_active
with _lock:
if _loco_active:
return False
_nav_active = True
return True
def release_nav() -> None:
global _nav_active
with _lock:
_nav_active = False

View File

@ -0,0 +1,81 @@
"""Shared filesystem safety helpers for dashboard routes.
Provides:
- safe_filename: validate + reject traversal/special chars
- safe_path_under: ensure resolved path stays inside a base dir
- atomic_write_bytes: write-to-temp + os.replace
- atomic_write_text
- atomic_write_json
"""
from __future__ import annotations
import json
import os
import tempfile
from pathlib import Path
from typing import Any
from fastapi import HTTPException
from Project.Sanad.core.config_loader import section as _cfg_section
# Maximum upload size in bytes — SINGLE SOURCE in dashboard.api_input
MAX_UPLOAD_BYTES = _cfg_section("dashboard", "api_input").get(
"max_upload_bytes", 8 * 1024 * 1024)
def safe_filename(name: str | None) -> str:
"""Strip directory components and reject obviously unsafe names."""
if not name:
raise HTTPException(400, "Filename required.")
cleaned = os.path.basename(name).strip()
if not cleaned or cleaned in {".", ".."}:
raise HTTPException(400, "Invalid filename.")
if any(c in cleaned for c in ("\x00", "\n", "\r")):
raise HTTPException(400, "Invalid characters in filename.")
return cleaned
def safe_path_under(base: Path, name: str) -> Path:
"""Resolve `base/name` and verify it stays inside `base`."""
cleaned = safe_filename(name)
base_resolved = base.resolve()
candidate = (base / cleaned).resolve()
try:
candidate.relative_to(base_resolved)
except ValueError:
raise HTTPException(400, "Path traversal denied.")
return candidate
def check_upload_size(content: bytes, max_bytes: int = MAX_UPLOAD_BYTES) -> None:
if len(content) > max_bytes:
raise HTTPException(
413,
f"Upload too large: {len(content)} bytes (max {max_bytes}).",
)
def atomic_write_bytes(path: Path, data: bytes) -> None:
"""Write bytes atomically via tempfile + os.replace."""
path.parent.mkdir(parents=True, exist_ok=True)
fd, tmp = tempfile.mkstemp(prefix=f".{path.name}.", suffix=".tmp", dir=str(path.parent))
try:
with os.fdopen(fd, "wb") as f:
f.write(data)
os.replace(tmp, path)
except Exception:
try:
os.unlink(tmp)
except OSError:
pass
raise
def atomic_write_text(path: Path, text: str, encoding: str = "utf-8") -> None:
atomic_write_bytes(path, text.encode(encoding))
def atomic_write_json(path: Path, payload: Any, indent: int = 2) -> None:
atomic_write_text(path, json.dumps(payload, ensure_ascii=False, indent=indent))

View File

@ -0,0 +1,965 @@
"""Audio control endpoints — mic mute, speaker mute, device profile selection."""
from __future__ import annotations
import asyncio
import os
import subprocess
import threading
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
from Project.Sanad.core.logger import get_logger
from Project.Sanad.voice import audio_devices as ad
log = get_logger("audio_route")
router = APIRouter()
# ─────────────────────── G1 built-in speaker (DDS) ───────────────────────
#
# pactl set-sink-mute has NO effect on the G1 built-in speaker because
# sanad_voice.py streams PCM to it via the Unitree DDS AudioClient API,
# bypassing PulseAudio entirely. To actually silence the built-in speaker
# mid-playback we must call AudioClient.SetVolume(0) over DDS.
#
# This module keeps a lazily-initialized AudioClient + a cached volume so
# the dashboard can mute/unmute without waiting on DDS init for every click.
_g1_audio_client = None
_g1_audio_lock = threading.Lock()
_g1_current_volume: int = 100 # what's actually on the hardware right now
_g1_user_volume: int = 100 # the user's preferred "unmuted" level
_g1_init_error: str = ""
def _load_persisted_g1_volume() -> int:
"""Read the saved G1 volume from data/motions/config.json.
Keys are `audio.g1_volume` (persistent target level 0-100). Returns
100 if no value is stored matches the default the Unitree SDK sets
on the voice service.
"""
try:
from Project.Sanad.config import load_config
cfg = load_config() or {}
audio = cfg.get("audio") or {}
vol = int(audio.get("g1_volume", 100))
return max(0, min(100, vol))
except Exception:
return 100
def _save_persisted_g1_volume(level: int) -> None:
"""Persist the user's volume choice to config.json so it survives restart."""
try:
from Project.Sanad.config import load_config, save_config
cfg = load_config() or {}
audio = cfg.get("audio") if isinstance(cfg.get("audio"), dict) else {}
audio["g1_volume"] = max(0, min(100, int(level)))
cfg["audio"] = audio
save_config(cfg)
except Exception as exc:
log.warning("could not persist g1_volume: %s", exc)
# Initialize user volume from the persisted value so the dashboard shows
# the correct level on first load even if no one has touched it yet.
_g1_user_volume = _load_persisted_g1_volume()
_g1_current_volume = _g1_user_volume
def _get_g1_audio_client():
"""Lazy-init AudioClient. Safe to call from multiple routes."""
global _g1_audio_client, _g1_init_error
if _g1_audio_client is not None:
return _g1_audio_client
try:
from unitree_sdk2py.core.channel import ChannelFactoryInitialize
from unitree_sdk2py.g1.audio.g1_audio_client import AudioClient
except ImportError as exc:
_g1_init_error = f"unitree_sdk2py not installed: {exc}"
return None
iface = os.environ.get("SANAD_DDS_INTERFACE", "eth0")
# ChannelFactoryInitialize can only be called once per process. The
# arm controller normally calls it first at startup — the second call
# either no-ops or raises, so wrap it defensively.
try:
ChannelFactoryInitialize(0, iface)
except Exception as exc:
log.debug("ChannelFactoryInitialize already called or failed: %s", exc)
try:
client = AudioClient()
client.SetTimeout(5.0)
client.Init()
_g1_audio_client = client
log.info("G1 AudioClient initialized for dashboard mute control (iface=%s)", iface)
return client
except Exception as exc:
_g1_init_error = f"AudioClient init failed: {exc}"
log.warning("G1 AudioClient init failed: %s", exc)
return None
def _pactl(args: list[str]) -> subprocess.CompletedProcess[str]:
return subprocess.run(["pactl", *args], check=True, text=True, capture_output=True)
def _get_muted(kind: str, name: str) -> bool:
if not name:
return False
try:
cmd = "get-source-mute" if kind == "source" else "get-sink-mute"
r = _pactl([cmd, name])
return (r.stdout or "").strip().lower().endswith("yes")
except (FileNotFoundError, subprocess.CalledProcessError):
return False
def _set_muted(kind: str, name: str, muted: bool) -> bool:
if not name:
return False
cmd = "set-source-mute" if kind == "source" else "set-sink-mute"
_pactl([cmd, name, "1" if muted else "0"])
return _get_muted(kind, name)
def _current_sink_source() -> tuple[str, str]:
cur = ad.current_selection()
return cur.get("sink", ""), cur.get("source", "")
# ─────────────────────── status / mute ───────────────────────
@router.get("/status")
async def audio_status():
"""Return current device + mute state + G1 speaker volume.
`speaker_muted` is the EFFECTIVE mute state True if either the
PulseAudio sink is muted OR the G1 built-in speaker volume is 0.
`pulse_sink_muted` and `g1_speaker_muted` are the per-path states.
`g1_current_volume` = what's live on the hardware.
`g1_user_volume` = the user's preferred unmuted level (what we
restore to when they un-mute).
"""
def _do():
sink, source = _current_sink_source()
cur = ad.current_selection()
pulse_muted = _get_muted("sink", sink)
# Read cached state — avoid DDS GetVolume round-trips on every poll
global _g1_current_volume, _g1_user_volume
g1_muted = _g1_current_volume == 0
return {
"mic_muted": _get_muted("source", source),
# Effective (OR of both paths) — the badge the user sees
"speaker_muted": pulse_muted or g1_muted,
# Per-path breakdown so the UI can distinguish
"pulse_sink_muted": pulse_muted,
"g1_speaker_muted": g1_muted,
"g1_current_volume": _g1_current_volume,
"g1_user_volume": _g1_user_volume,
# Only report available once an AudioClient has actually been
# built — reporting True before any init attempt made the UI
# advertise G1 speaker controls that then 503 on first use.
# `g1_init_error` surfaces *why* it's unavailable (or "" if
# init was never attempted yet).
"g1_available": _g1_audio_client is not None,
"g1_init_error": _g1_init_error,
"sink": sink,
"source": source,
"current": cur,
"pactl_available": ad.pactl_available(),
}
return await asyncio.to_thread(_do)
@router.post("/mic/mute")
async def toggle_mic(muted: bool | None = None):
def _do():
_, source = _current_sink_source()
if not source:
raise HTTPException(503, "No source device selected")
target = muted if muted is not None else not _get_muted("source", source)
try:
actual = _set_muted("source", source, target)
except (FileNotFoundError, subprocess.CalledProcessError) as exc:
raise HTTPException(500, f"pactl failed: {exc}")
return {"mic_muted": actual, "source": source}
return await asyncio.to_thread(_do)
@router.post("/speaker/mute")
async def toggle_speaker(muted: bool | None = None):
"""Mute/unmute the SPEAKER — both the PulseAudio sink AND the G1
built-in speaker, so the effect is audible regardless of which
playback path is currently active (Anker PowerConf via PyAudio vs
G1 built-in via Unitree DDS AudioClient).
Each of the two paths is attempted independently; the response
reports which one(s) succeeded. If either path is muted, the button
shows as "Muted".
"""
def _do():
global _g1_current_volume, _g1_user_volume
sink, _ = _current_sink_source()
# Decide target state — if muted is None, toggle based on
# whichever path is currently "not muted"
if muted is None:
pulse_cur = _get_muted("sink", sink) if sink else False
g1_cur = _g1_current_volume == 0
# Toggle: if anything is live, mute everything; else unmute all
target = not (pulse_cur or g1_cur)
else:
target = bool(muted)
result = {"speaker_muted": target, "pulse": None, "g1": None}
# ── Path 1: PulseAudio sink (Anker PowerConf, USB, etc.) ──
if sink:
try:
actual_pulse = _set_muted("sink", sink, target)
result["pulse"] = {"ok": True, "muted": actual_pulse, "sink": sink}
except (FileNotFoundError, subprocess.CalledProcessError) as exc:
result["pulse"] = {"ok": False, "error": f"pactl failed: {exc}"}
else:
result["pulse"] = {"ok": False, "error": "no sink selected"}
# ── Path 2: G1 built-in speaker via DDS AudioClient ──
# Mute = SetVolume(0). Unmute = SetVolume(_g1_user_volume) so the
# user's chosen level is restored (instead of always jumping back
# to 100).
client = _get_g1_audio_client()
if client is None:
result["g1"] = {"ok": False, "error": _g1_init_error or "AudioClient unavailable"}
else:
volume = 0 if target else _g1_user_volume
try:
with _g1_audio_lock:
code = client.SetVolume(volume)
_g1_current_volume = volume
result["g1"] = {
"ok": True, "muted": volume == 0,
"volume": volume, "code": code,
}
log.info("G1 speaker volume set to %d (rc=%s)", volume, code)
except Exception as exc:
result["g1"] = {"ok": False, "error": f"SetVolume failed: {exc}"}
# Final effective state — either path counts as muted
pulse_muted = result["pulse"].get("muted", False) if result["pulse"] else False
g1_muted = result["g1"].get("muted", False) if result["g1"] else False
result["speaker_muted"] = bool(pulse_muted or g1_muted) if target else False
result["sink"] = sink
result["g1_current_volume"] = _g1_current_volume
result["g1_user_volume"] = _g1_user_volume
return result
return await asyncio.to_thread(_do)
@router.post("/g1-speaker/mute")
async def toggle_g1_speaker_only(muted: bool | None = None):
"""Mute/unmute ONLY the G1 built-in speaker via DDS AudioClient.
Useful for testing the DDS path in isolation the normal
/speaker/mute endpoint hits both PulseAudio and G1 at once.
On unmute, restores the user's last chosen volume (not always 100).
"""
def _do():
global _g1_current_volume
client = _get_g1_audio_client()
if client is None:
raise HTTPException(
503,
f"G1 AudioClient unavailable: {_g1_init_error or 'unknown'}",
)
if muted is None:
target = _g1_current_volume > 0 # toggle
else:
target = bool(muted)
volume = 0 if target else _g1_user_volume
try:
with _g1_audio_lock:
code = client.SetVolume(volume)
_g1_current_volume = volume
except Exception as exc:
raise HTTPException(500, f"SetVolume failed: {exc}")
log.info("G1 speaker volume set to %d (rc=%s)", volume, code)
return {
"g1_muted": volume == 0,
"volume": volume,
"user_volume": _g1_user_volume,
"return_code": code,
}
return await asyncio.to_thread(_do)
# ─────────────────────── G1 speaker volume (0-100) ───────────────────────
class G1VolumePayload(BaseModel):
level: int # 0..100
@router.get("/g1-speaker/volume")
async def get_g1_volume():
"""Return the current G1 speaker volume state.
Response:
{
"available": true, # AudioClient available?
"current_volume": 75, # what's on hardware right now
"user_volume": 75, # user's preferred unmuted level
"muted": false, # current_volume == 0
"persisted": 75, # value from config.json
}
"""
def _do():
return {
# True only after an AudioClient was actually constructed —
# `init_error` (below) explains an unavailable/never-tried state.
"available": _g1_audio_client is not None,
"current_volume": _g1_current_volume,
"user_volume": _g1_user_volume,
"muted": _g1_current_volume == 0,
"persisted": _load_persisted_g1_volume(),
"init_error": _g1_init_error,
}
return await asyncio.to_thread(_do)
@router.post("/g1-speaker/volume")
async def set_g1_volume(payload: G1VolumePayload):
"""Set the G1 built-in speaker volume via DDS AudioClient.
Body: `{"level": 0..100}`
Effects:
- Immediately applies to hardware via AudioClient.SetVolume(level).
- Persists to data/motions/config.json under `audio.g1_volume` so
it survives restarts.
- If level > 0, updates _g1_user_volume (the "unmuted" restore
target). level == 0 is a soft mute that preserves user_volume.
- Takes effect on the live playback immediately you can slide
the volume down mid-speech and hear it get quieter.
"""
def _do():
global _g1_current_volume, _g1_user_volume
level = int(payload.level)
if not 0 <= level <= 100:
raise HTTPException(400, "level must be 0..100")
# 1) G1 chest speaker (DDS) — best-effort so it works even when an
# external sink (JBL) is the active output.
code = None
client = _get_g1_audio_client()
if client is not None:
try:
with _g1_audio_lock:
code = client.SetVolume(level)
_g1_current_volume = level
except Exception as exc:
log.warning("G1 SetVolume failed: %s", exc)
if level > 0:
_g1_user_volume = level
# 2) The ACTIVE profile's PulseAudio sink (JBL / Anker / …). Target the
# RESOLVED sink from the saved selection, NOT @DEFAULT_SINK@ — the PA
# default can be a different sink (e.g. the chest platform-sound) even
# when the JBL is the selected output, so @DEFAULT_SINK@ would move the
# wrong sink and the slider would appear to do nothing on the JBL.
pa_applied = False
try:
sink = (ad.load_state() or {}).get("sink") or "@DEFAULT_SINK@"
_pactl(["set-sink-volume", sink, "%d%%" % level])
if level > 0:
_pactl(["set-sink-mute", sink, "0"])
pa_applied = True
except Exception as exc:
log.warning("PA set-sink-volume failed: %s", exc)
if client is None and not pa_applied:
raise HTTPException(503, "No speaker available (G1 + PulseAudio both failed)")
_save_persisted_g1_volume(_g1_user_volume)
log.info("volume → %d (g1_rc=%s, pa=%s, user_pref=%d)",
level, code, pa_applied, _g1_user_volume)
return {
"ok": True,
"current_volume": level,
"user_volume": _g1_user_volume,
"muted": level == 0,
"return_code": code,
"pa_applied": pa_applied,
"persisted": True,
}
return await asyncio.to_thread(_do)
# ─────────────────────── device profiles ───────────────────────
@router.get("/devices")
async def list_devices():
"""Full device + profile listing for the dashboard picker."""
return await asyncio.to_thread(ad.status)
@router.get("/profiles")
async def list_profiles():
"""Just the named profiles + which are currently plugged in."""
def _do():
from dataclasses import asdict
detected = ad.detect_plugged_profiles() if ad.pactl_available() else []
detected_ids = {d["profile"]["id"] for d in detected}
return {
"profiles": [
{
**asdict(p),
"available": p.id in detected_ids,
}
for p in ad.PROFILES
],
"detected_ids": list(detected_ids),
}
return await asyncio.to_thread(_do)
class ProfileSelect(BaseModel):
profile_id: str
@router.post("/select-profile")
async def select_profile(payload: ProfileSelect):
def _do():
result = ad.select_profile(payload.profile_id)
if not result.get("ok"):
raise HTTPException(409, result.get("error") or "Could not select profile")
# Best-effort: tell the audio_manager to refresh its cached state
try:
from Project.Sanad.main import audio_mgr
if audio_mgr is not None and hasattr(audio_mgr, "refresh_devices"):
audio_mgr.refresh_devices()
except Exception:
pass
return result
return await asyncio.to_thread(_do)
class ManualSelect(BaseModel):
sink: str
source: str
@router.post("/select-manual")
async def select_manual(payload: ManualSelect):
def _do():
if not payload.sink and not payload.source:
raise HTTPException(400, "At least one of sink/source required")
result = ad.select_manual(payload.sink, payload.source)
if not result.get("ok"):
raise HTTPException(500, str(result.get("errors") or "Selection failed"))
try:
from Project.Sanad.main import audio_mgr
if audio_mgr is not None and hasattr(audio_mgr, "refresh_devices"):
audio_mgr.refresh_devices()
except Exception:
pass
return result
return await asyncio.to_thread(_do)
@router.post("/refresh")
async def refresh_devices():
"""Re-scan plugged devices and re-resolve current selection."""
return await asyncio.to_thread(ad.status)
@router.post("/apply")
async def apply_audio():
"""Re-scan all USB ports, resolve the best profile, and set pactl defaults.
Use this after plugging/unplugging devices or switching USB ports.
"""
def _do():
result = ad.apply_current_selection()
# Also refresh AudioManager so it picks up the new sink/source
try:
from Project.Sanad.main import audio_mgr
if audio_mgr is not None:
audio_mgr.refresh_devices()
except Exception:
pass
# Hot-swap the live Gemini voice to the selected profile too, so picking
# a device (e.g. the JBL) moves BOTH record playback AND the live voice
# to it — without dropping the session. Best-effort; no-op if not running.
try:
from Project.Sanad.main import live_sub
pid = (ad.load_state() or {}).get("profile_id")
if (pid and live_sub is not None and hasattr(live_sub, "send_profile")
and hasattr(live_sub, "is_running") and live_sub.is_running()):
live_sub.send_profile(pid, reason="dashboard audio Apply")
except Exception:
pass
# Restore the user's SAVED volume to the selected sink (USB/BT speakers
# like the JBL otherwise come back at a low PulseAudio default). Use the
# saved level, NOT a forced 100%, so the slider/sink keep the user's
# choice across selects + restarts. Target the resolved sink.
try:
sink = (ad.load_state() or {}).get("sink") or "@DEFAULT_SINK@"
_pactl(["set-sink-volume", sink, "%d%%" % _g1_user_volume])
if _g1_user_volume > 0:
_pactl(["set-sink-mute", sink, "0"])
except Exception:
pass
return result
return await asyncio.to_thread(_do)
# ─────────────────────── Reset endpoints (Pulse + USB) ───────────────────────
#
# Two distinct recovery paths for the dashboard's audio panel:
#
# POST /api/audio/reset — SOFT: restart pulseaudio / pipewire-pulse.
# Fixes Pulse-side state (stuck profile, lost default sink, crashed
# module). Cannot recover a kernel-side missing USB capture descriptor
# — snd-usb-audio parses those at probe time and Pulse can't influence
# that. Use for "devices look weird" failures.
#
# POST /api/audio/usb-reset — HARD: unbind+rebind snd-usb-audio scoped
# to the Anker VID:PID. Forces snd-usb-audio to re-parse UAC1
# descriptors → input profile reappears even after the firmware/USB
# handshake dropped it. Use for "Anker mic missing from pactl" — the
# symptom soft-reset cannot fix.
#
# Both gate with module-level locks (no concurrent reset), refuse while Live
# Gemini is running or a record is mid-playback, and return structured
# before/after diagnostics so the dashboard can show meaningful toasts.
_RESET_LOCK = threading.Lock()
_USB_RESET_LOCK = threading.Lock()
# Anker PowerConf A3321 — used both for VID:PID matching in sysfs and for
# logging. Change here if you add support for a different USB conference
# device (Hollyland etc).
_USB_RESET_TARGETS = (
{"vid": "291a", "pid": "3301", "label": "Anker PowerConf"},
)
def _refuse_if_busy() -> None:
"""Raise HTTPException(409) if Live Gemini is active or a record is playing.
Used by both reset endpoints a userspace audio restart mid-stream
leaves the active session in a broken state (PortAudio handle pointing
at a dead Pulse, in-flight write() raises, etc.). Cheaper to refuse
than to recover.
"""
try:
from Project.Sanad.main import live_sub
except Exception:
live_sub = None
if live_sub is not None:
try:
st = live_sub.status() or {}
except Exception:
st = {}
state = (st.get("state") or "").lower()
if st.get("running") or state not in ("", "stopped", "error"):
raise HTTPException(
409, f"Stop Live Gemini before resetting audio (state={state or '?'}).",
)
try:
from Project.Sanad.main import audio_mgr
except Exception:
audio_mgr = None
if audio_mgr is not None and hasattr(audio_mgr, "playback_status"):
try:
ps = audio_mgr.playback_status() or {}
if ps.get("playing"):
raise HTTPException(
409, "Stop the active playback before resetting audio.",
)
except HTTPException:
raise
except Exception:
pass
def _detect_pa_flavour() -> str:
"""Return 'pipewire' if pipewire-pulse is the active daemon, else 'pulse'."""
try:
r = subprocess.run(
["pgrep", "-x", "pipewire-pulse"],
check=False, capture_output=True, text=True, timeout=1.0,
)
if r.returncode == 0 and (r.stdout or "").strip():
return "pipewire"
except (FileNotFoundError, subprocess.SubprocessError):
pass
return "pulse"
def _kill_audio_daemon(flavour: str) -> dict:
"""Issue the restart command for the detected daemon. Non-zero exit is a
soft warning (some installs return 1 when there's no daemon to kill)."""
if flavour == "pipewire":
cmd = ["systemctl", "--user", "restart", "pipewire-pulse.service"]
else:
cmd = ["pulseaudio", "-k"]
try:
r = subprocess.run(cmd, check=False, capture_output=True,
text=True, timeout=5.0)
info = {"cmd": " ".join(cmd), "returncode": r.returncode,
"stderr": (r.stderr or "").strip()[:300]}
if r.returncode != 0:
log.warning("audio reset: %s exited %d (%s)",
cmd[0], r.returncode, info["stderr"])
return info
except FileNotFoundError as exc:
return {"cmd": " ".join(cmd), "returncode": -1,
"stderr": f"binary missing: {exc}"}
except subprocess.TimeoutExpired:
return {"cmd": " ".join(cmd), "returncode": -1,
"stderr": "timeout (>5s)"}
def _wait_for_pactl(deadline_s: float = 5.0, interval_s: float = 0.2) -> bool:
"""Poll `pactl info` until it returns 0 or the deadline expires."""
import time as _time
end = _time.monotonic() + deadline_s
while _time.monotonic() < end:
if ad.pactl_available():
return True
_time.sleep(interval_s)
return False
@router.post("/reset")
async def reset_audio_subsystem():
"""SOFT reset — restart pulseaudio/pipewire-pulse and re-resolve devices.
Use when devices look stuck, pactl is unavailable, or the wrong sink
is being selected. **Does NOT recover a kernel-side missing USB capture
descriptor** for that symptom use /api/audio/usb-reset.
"""
if os.geteuid() == 0:
raise HTTPException(
403, "Refusing to reset audio as root — Sanad must run as the "
"unitree user so the per-user PulseAudio session is reachable.",
)
if not _RESET_LOCK.acquire(blocking=False):
raise HTTPException(429, "Reset already in progress.")
try:
_refuse_if_busy()
log.info(
"audio reset requested (uid=%s PULSE_RUNTIME_PATH=%s XDG_RUNTIME_DIR=%s)",
os.geteuid(),
os.environ.get("PULSE_RUNTIME_PATH") or "-",
os.environ.get("XDG_RUNTIME_DIR") or "-",
)
try:
from Project.Sanad.main import audio_mgr
except Exception:
audio_mgr = None
def _do() -> dict:
before = {"pactl_available": ad.pactl_available(),
"selection": ad.current_selection()}
# Quiesce AudioManager so the next play_wav rebinds cleanly.
pya_closed = False
if audio_mgr is not None:
play_lock = getattr(audio_mgr, "play_lock", None)
acquired = False
if play_lock is not None:
acquired = play_lock.acquire(timeout=2.0)
try:
try:
audio_mgr.close()
pya_closed = True
except Exception as exc:
log.warning("audio reset: audio_mgr.close failed: %s", exc)
finally:
if acquired and play_lock is not None:
play_lock.release()
flavour = _detect_pa_flavour()
kill_info = _kill_audio_daemon(flavour)
came_back = _wait_for_pactl(deadline_s=5.0)
if not came_back and flavour == "pulse":
# autospawn may be disabled — try an explicit start.
try:
subprocess.run(["pulseaudio", "--start"], check=False,
capture_output=True, text=True, timeout=3.0)
except (FileNotFoundError, subprocess.SubprocessError) as exc:
log.warning("audio reset: pulseaudio --start failed: %s", exc)
came_back = _wait_for_pactl(deadline_s=2.0)
if not came_back:
raise HTTPException(500, {
"error": "audio daemon did not return within ~7s",
"flavour": flavour, "kill": kill_info,
})
apply_result: dict = {}
try:
apply_result = ad.apply_current_selection() or {}
except Exception as exc:
log.warning("audio reset: apply_current_selection failed: %s", exc)
apply_result = {"error": str(exc)}
if audio_mgr is not None:
try:
import pyaudio
audio_mgr.pya = pyaudio.PyAudio()
audio_mgr.refresh_devices()
except Exception as exc:
log.error("audio reset: PyAudio re-init failed: %s", exc)
raise HTTPException(
500, f"PortAudio re-init failed after daemon restart: {exc}")
after_sel = ad.current_selection() or {}
detected = ad.detect_plugged_profiles() or []
after = {
"pactl_available": ad.pactl_available(),
"selection": after_sel,
"detected_profiles": [p.get("profile", {}).get("id") for p in detected],
}
return {
"ok": True, "best_effort": True, "flavour": flavour,
"kill": kill_info, "pya_reinitialized": pya_closed,
"apply_result": apply_result,
"input_recovered": bool(after_sel.get("source")),
"output_recovered": bool(after_sel.get("sink")),
"before": before, "after": after,
"hint": ("Soft reset only fixes Pulse-side state. If "
"input_recovered is False, try POST /api/audio/usb-reset "
"or physically replug the dongle."),
}
return await asyncio.to_thread(_do)
finally:
_RESET_LOCK.release()
def _find_usb_devices_by_vid_pid(vid: str, pid: str) -> list[str]:
"""Return sysfs bus-id strings (e.g. '1-3') for every USB device whose
idVendor/idProduct match. Empty list when nothing matches.
We read /sys/bus/usb/devices/* every USB *device* (not interface) has
idVendor/idProduct files. Interfaces (paths with a colon, e.g. '1-3:1.1')
do not, so they're naturally skipped.
"""
import glob
hits: list[str] = []
for path in glob.glob("/sys/bus/usb/devices/*"):
name = os.path.basename(path)
if ":" in name:
continue
try:
with open(os.path.join(path, "idVendor")) as f:
v = f.read().strip().lower()
with open(os.path.join(path, "idProduct")) as f:
p = f.read().strip().lower()
except OSError:
continue
if v == vid.lower() and p == pid.lower():
hits.append(name)
return hits
def _snd_usb_interfaces_for_device(bus_id: str) -> list[str]:
"""For USB device `bus_id` (e.g. '1-3'), return all interface names that
are currently bound to the snd-usb-audio driver (e.g. ['1-3:1.0']).
Used so we unbind ONLY the audio interfaces and don't touch HID / HUB
interfaces on the same composite device.
"""
import glob
bound: list[str] = []
base = f"/sys/bus/usb/devices/{bus_id}"
for iface in glob.glob(f"{base}/{bus_id}:*"):
driver_link = os.path.join(iface, "driver")
if not os.path.islink(driver_link):
continue
try:
driver = os.path.basename(os.readlink(driver_link))
except OSError:
continue
if driver == "snd-usb-audio":
bound.append(os.path.basename(iface))
return bound
def _write_sysfs(path: str, value: str) -> tuple[bool, str]:
"""Write `value` to a sysfs file. Returns (success, error_message).
Writes to /sys/bus/usb/drivers/snd-usb-audio/{bind,unbind} usually
require root. If permission denied, the caller should fall back to
invoking shell_scripts/reset_anker_usb.sh via sudo (one-time sudoers
setup documented in that script's header).
"""
try:
with open(path, "w") as f:
f.write(value)
return True, ""
except PermissionError as exc:
return False, f"permission denied: {path} ({exc})"
except OSError as exc:
return False, f"write failed: {path} ({exc})"
@router.post("/usb-reset")
async def usb_reset_anker():
"""HARD reset — unbind+rebind snd-usb-audio for the Anker (VID:PID
291a:3301). Forces the kernel to re-parse the USB Audio Class
descriptors, which is the only way to recover a missing capture profile
on this Jetson without a physical replug.
Tries two paths:
1. Direct sysfs write (no sudo) works if a udev rule has set
`audio` group ownership / world-write on the snd-usb-audio bind
files, or if Sanad runs as root (it shouldn't).
2. Fallback to `sudo shell_scripts/reset_anker_usb.sh` works after
a one-time sudoers entry; see that script's header for setup.
Refuses while Live Gemini or a record playback is in flight (same
guard as the soft reset).
"""
if not _USB_RESET_LOCK.acquire(blocking=False):
raise HTTPException(429, "USB reset already in progress.")
try:
_refuse_if_busy()
# Find candidate Anker USB devices currently enumerated.
candidates: list[dict] = []
for tgt in _USB_RESET_TARGETS:
for bus_id in _find_usb_devices_by_vid_pid(tgt["vid"], tgt["pid"]):
candidates.append({"bus_id": bus_id, **tgt})
if not candidates:
wanted = ", ".join(
"{}:{}".format(t["vid"], t["pid"]) for t in _USB_RESET_TARGETS
)
raise HTTPException(
404,
f"No matching USB device found (looked for {wanted}). "
"Plug the Anker dongle and try again.",
)
log.info("usb reset: candidates=%s", candidates)
def _do() -> dict:
before_detected = [
p.get("profile", {}).get("id")
for p in (ad.detect_plugged_profiles() or [])
]
results: list[dict] = []
for cand in candidates:
bus = cand["bus_id"]
ifaces = _snd_usb_interfaces_for_device(bus)
attempt = {"bus_id": bus, "label": cand["label"],
"snd_interfaces": ifaces, "method": None,
"ok": False, "error": ""}
if not ifaces:
attempt["error"] = ("no snd-usb-audio interfaces bound "
"to this device — already unbound or "
"kernel didn't claim it")
results.append(attempt)
continue
# ─── Path 1: direct sysfs write ───
unbind_path = "/sys/bus/usb/drivers/snd-usb-audio/unbind"
bind_path = "/sys/bus/usb/drivers/snd-usb-audio/bind"
direct_ok = True
direct_err = ""
for iface in ifaces:
ok, err = _write_sysfs(unbind_path, iface)
if not ok:
direct_ok = False
direct_err = err
break
if direct_ok:
import time as _time
_time.sleep(0.5)
for iface in ifaces:
ok, err = _write_sysfs(bind_path, iface)
if not ok:
direct_ok = False
direct_err = err
break
if direct_ok:
attempt.update({"method": "direct-sysfs", "ok": True})
results.append(attempt)
continue
# ─── Path 2: sudo helper script ───
from pathlib import Path as _Path
helper = (_Path(__file__).resolve().parent.parent.parent
/ "shell_scripts" / "reset_anker_usb.sh")
if not helper.exists():
attempt.update({"method": "direct-sysfs",
"error": f"{direct_err}; helper not present "
f"at {helper}"})
results.append(attempt)
continue
try:
r = subprocess.run(
["sudo", "-n", str(helper), bus],
check=False, capture_output=True, text=True, timeout=10.0,
)
attempt["method"] = "sudo-helper"
if r.returncode == 0:
attempt["ok"] = True
else:
attempt["error"] = (
f"sudo helper exited {r.returncode}: "
f"{(r.stderr or r.stdout or '').strip()[:300]}"
)
except subprocess.TimeoutExpired:
attempt["error"] = "sudo helper timed out (>10s)"
except FileNotFoundError as exc:
attempt["error"] = f"sudo not available: {exc}"
results.append(attempt)
# Settle, then re-detect
import time as _time
_time.sleep(1.0)
try:
ad.apply_current_selection()
except Exception:
pass
try:
from Project.Sanad.main import audio_mgr
if audio_mgr is not None and hasattr(audio_mgr, "refresh_devices"):
audio_mgr.refresh_devices()
except Exception:
pass
after_detected = [
p.get("profile", {}).get("id")
for p in (ad.detect_plugged_profiles() or [])
]
any_ok = any(r.get("ok") for r in results)
mic_now = any(
"anker" in (p.get("profile", {}).get("id") or "").lower()
for p in (ad.detect_plugged_profiles() or [])
)
return {
"ok": any_ok,
"candidates": results,
"before_detected_profiles": before_detected,
"after_detected_profiles": after_detected,
"input_recovered": mic_now,
"hint": (
"If ok is False, the unbind/rebind path needs sudo. "
"Run `bash shell_scripts/reset_anker_usb.sh --setup-sudoers` "
"once on the robot to install the sudoers entry, then retry."
) if not any_ok else None,
}
return await asyncio.to_thread(_do)
finally:
_USB_RESET_LOCK.release()

View File

@ -0,0 +1,338 @@
"""Controller tab — manual dashboard locomotion control (N2 Phase 1/2).
Routes live under /api/controller. All WRITE actions (move / step / postures /
modes / MotionSwitcher) require the in-memory "Enable movement" arm flag and
return 409 when disarmed. Reads (/status, /joints, /msc, /status/summary),
E-STOP and the arm toggle are ALWAYS available.
`/status/summary` is the aggregate the dashboard polls for the global subsystem
status strip (Camera / Face / Place / Movement). It is kept under /api/controller
(final path /api/controller/status/summary) so no second router is needed; note
/api/status (no /summary) is already used by the SPA, so the suffix matters.
"""
from __future__ import annotations
import asyncio
from fastapi import APIRouter, HTTPException, Query
from pydantic import BaseModel
from Project.Sanad.config import BASE_DIR
from Project.Sanad.core.logger import get_logger
from Project.Sanad.vision import recognition_state
from Project.Sanad.dashboard.routes import _arbiter
log = get_logger("controller_routes")
router = APIRouter()
STATE_PATH = BASE_DIR / "data" / ".recognition_state.json"
# ── lazy subsystem accessors ────────────────────────────────
def _get_loco():
try:
from Project.Sanad.main import loco_controller # type: ignore
return loco_controller
except Exception:
return None
def _get_camera():
try:
from Project.Sanad.main import camera # type: ignore
return camera
except Exception:
return None
def _get_live_sub():
try:
from Project.Sanad.main import live_sub # type: ignore
return live_sub
except Exception:
return None
def _get_dispatch():
try:
from Project.Sanad.main import movement_dispatch # type: ignore
return movement_dispatch
except Exception:
return None
def _require_loco():
lc = _get_loco()
if lc is None:
raise HTTPException(503, "Locomotion controller subsystem unavailable.")
return lc
def _require_armed(lc):
if not lc.is_armed():
raise HTTPException(409, "Movement is disarmed. Enable movement first.")
def _claim_loco():
"""Arbitration gate: refuse a leg command while a Nav2 goal owns the legs."""
if not _arbiter.acquire_loco():
raise HTTPException(
409, "Navigation (Nav2) is active. Cancel the nav goal before manual movement."
)
def _cancel_nav():
"""Cancel any in-flight Nav2 goal and clear the nav arbitration flag.
Used by E-STOP so the global stop halts the legs no matter which stack is
driving them. Calls the nav client in-process (no HTTP self-call); never
raises into the caller.
"""
try:
from Project.Sanad.dashboard.routes.navigation import _CLIENT as _nav_client
if _nav_client is not None:
_nav_client.cancel()
except Exception:
log.exception("estop nav cancel failed")
finally:
_arbiter.release_nav()
# ── reads ───────────────────────────────────────────────────
@router.get("/status")
async def get_status():
lc = _require_loco()
return await asyncio.to_thread(lc.status)
@router.get("/joints")
async def get_joints():
lc = _require_loco()
return await asyncio.to_thread(lc.joints)
@router.get("/msc")
async def get_msc():
lc = _require_loco()
return await asyncio.to_thread(lc.msc_check)
# ── arm flag / E-STOP (always available) ────────────────────
@router.post("/arm")
async def set_arm(on: bool = Query(...)):
lc = _require_loco()
if on:
# Refuse to arm manual loco while Nav2 owns the legs.
_claim_loco()
try:
res = await asyncio.to_thread(lc.arm_movement)
except Exception:
_arbiter.release_loco()
raise
return res
res = await asyncio.to_thread(lc.disarm_movement)
_arbiter.release_loco()
return res
@router.post("/gemini-movement")
async def set_gemini_movement(on: bool = Query(...)):
"""Enable / disable Gemini voice-driven locomotion (N2 Phase 3 gate).
Writes recognition_state.movement_enabled SEPARATE from the manual arm
flag. The Gemini child announces the toggle (spoken), and the parent
MovementDispatcher starts/stops acting on confirmation phrases. Default OFF.
"""
st = await asyncio.to_thread(recognition_state.mutate, STATE_PATH,
movement_enabled=bool(on))
# Enabling Gemini movement also clears any E-STOP latch on the dispatcher.
if on:
md = _get_dispatch()
if md is not None:
try:
md.clear_estop()
except Exception:
log.exception("clear_estop failed")
log.info("gemini-movement %s", "ON" if on else "OFF")
return {"ok": True, "movement_enabled": st.movement_enabled}
@router.post("/estop")
async def estop():
lc = _require_loco()
res = await asyncio.to_thread(lc.estop)
# Full stop: drop the manual arm flag AND latch the voice dispatcher off, so
# no source (teleop, step, or voice dispatch) can keep driving the robot. The
# dispatcher latch is used instead of flipping movement_enabled so the Gemini
# child does not deliver a spoken "movement disabled" line during an E-STOP.
try:
await asyncio.to_thread(lc.disarm_movement)
except Exception:
log.exception("estop disarm failed")
md = _get_dispatch()
if md is not None:
try:
md.emergency_stop()
except Exception:
log.exception("estop dispatcher latch failed")
# Cancel any in-flight Nav2 goal too: the legs have exactly one commander,
# and an E-STOP must halt the legs whether loco or Nav2 is driving them.
await asyncio.to_thread(_cancel_nav)
_arbiter.release_loco()
return {"ok": True, **res}
@router.post("/stop")
async def stop():
lc = _require_loco()
# Allowed even when disarmed — StopMove is always safe.
res = await asyncio.to_thread(lc.stop_move)
return res
# ── movement (armed) ────────────────────────────────────────
class MoveBody(BaseModel):
vx: float = 0.0
vy: float = 0.0
vyaw: float = 0.0
run: bool = False
@router.post("/move")
async def move(body: MoveBody):
lc = _require_loco()
_require_armed(lc)
_claim_loco()
return await asyncio.to_thread(lc.move, body.vx, body.vy, body.vyaw, body.run)
@router.post("/step")
async def step(dir: str = Query(...)):
lc = _require_loco()
_require_armed(lc)
_claim_loco()
res = await asyncio.to_thread(lc.step, dir)
if not res.get("ok"):
raise HTTPException(400, res.get("reason", "step failed"))
return res
# ── modes / postures (armed) ────────────────────────────────
@router.post("/mode/prep")
async def mode_prep():
lc = _require_loco()
_require_armed(lc)
return await asyncio.to_thread(lc.prep_mode)
@router.post("/mode/ready")
async def mode_ready():
lc = _require_loco()
_require_armed(lc)
return await asyncio.to_thread(lc.ready_start_mode)
@router.post("/posture/{name}")
async def posture(name: str):
lc = _require_loco()
_require_armed(lc)
res = await asyncio.to_thread(lc.posture, name)
if not res.get("ok") and res.get("reason"):
raise HTTPException(400, res["reason"])
return res
@router.post("/balance")
async def balance(mode: int = Query(...)):
lc = _require_loco()
_require_armed(lc)
return await asyncio.to_thread(lc.set_balance_mode, mode)
@router.post("/height")
async def height(h: float = Query(...)):
lc = _require_loco()
_require_armed(lc)
return await asyncio.to_thread(lc.set_stand_height, h)
# ── MotionSwitcher / reconnect (armed) ──────────────────────
@router.post("/msc/select-ai")
async def msc_select_ai():
lc = _require_loco()
_require_armed(lc)
return await asyncio.to_thread(lc.msc_select_ai)
@router.post("/msc/release")
async def msc_release():
lc = _require_loco()
_require_armed(lc)
return await asyncio.to_thread(lc.msc_release)
@router.post("/reconnect")
async def reconnect():
lc = _require_loco()
_require_armed(lc)
return await asyncio.to_thread(lc.reconnect)
# ── aggregate subsystem summary (always available) ──────────
@router.get("/status/summary")
async def status_summary():
"""Live on/off state for the header status strip. Never raises."""
try:
st = recognition_state.read(STATE_PATH)
except Exception:
st = recognition_state.RecognitionState()
cam = _get_camera()
camera_running = False
try:
camera_running = bool(cam is not None and cam.is_running())
except Exception:
camera_running = False
lc = _get_loco()
movement_armed = False
try:
movement_armed = bool(lc is not None and lc.is_armed())
except Exception:
movement_armed = False
sub = _get_live_sub()
gemini_running = False
try:
runner = getattr(sub, "is_running", None)
gemini_running = bool(callable(runner) and runner())
except Exception:
gemini_running = False
# Effective Gemini-movement = the file flag AND not latched off by an E-STOP.
md = _get_dispatch()
estopped = False
try:
estopped = bool(md is not None and md.is_estopped())
except Exception:
estopped = False
return {
"vision_enabled": st.vision_enabled,
"camera_running": camera_running,
"face_rec_enabled": st.face_rec_enabled,
"zone_rec_enabled": st.zone_rec_enabled,
"movement_armed": movement_armed,
"gemini_movement_enabled": st.movement_enabled and not estopped,
"gemini_running": gemini_running,
}

51
vendor/Sanad/dashboard/routes/health.py vendored Normal file
View File

@ -0,0 +1,51 @@
"""Health and status endpoints."""
from __future__ import annotations
from fastapi import APIRouter
from Project.Sanad.core.logger import get_logger
log = get_logger("health_route")
router = APIRouter()
def _safe_status(component, name: str) -> dict:
"""Get status without crashing the whole endpoint if one component fails."""
if component is None:
return {"available": False}
try:
if hasattr(component, "status") and callable(component.status):
return component.status()
return {"available": True}
except Exception as exc:
log.warning("status() failed for %s: %s", name, exc)
return {"available": True, "error": str(exc)}
@router.get("/health")
async def health():
from Project.Sanad.main import brain
return {
"status": "ok",
"brain": _safe_status(brain, "brain"),
}
@router.get("/status")
async def full_status():
from Project.Sanad.main import (
brain, arm, voice_client, macro_rec, macro_play,
live_voice, live_sub, wake_mgr,
)
return {
"brain": _safe_status(brain, "brain"),
"voice": _safe_status(voice_client, "voice"),
"arm": _safe_status(arm, "arm"),
"macro_recorder": _safe_status(macro_rec, "macro_rec"),
"macro_player": _safe_status(macro_play, "macro_play"),
"live_voice": _safe_status(live_voice, "live_voice"),
"live_subprocess": _safe_status(live_sub, "live_sub"),
"wake_manager": _safe_status(wake_mgr, "wake_mgr"),
}

View File

@ -0,0 +1,55 @@
"""Live Gemini Subprocess control endpoints."""
from __future__ import annotations
import asyncio
from fastapi import APIRouter, HTTPException, Query
from Project.Sanad.config import BASE_DIR
from Project.Sanad.vision import recognition_state
router = APIRouter()
_STATE_PATH = BASE_DIR / "data" / ".recognition_state.json"
def _sub_or_503():
from Project.Sanad.main import live_sub
if live_sub is None:
raise HTTPException(503, "Live subprocess not available")
return live_sub
@router.get("/status")
async def subprocess_status():
from Project.Sanad.main import live_sub
# record_enabled is a live flag (recognition_state) the panel toggle drives;
# surface it so the UI shows the current state even before a session starts.
rec = bool(recognition_state.read(_STATE_PATH).record_enabled)
if live_sub is None:
return {"available": False, "state": "unavailable", "record_enabled": rec}
return {**live_sub.status(), "record_enabled": rec}
@router.post("/record")
async def set_record(on: bool = Query(...)):
"""Toggle auto-recording of conversation turns to data/recordings/. Takes
effect live (the voice child syncs its recorder) no session restart."""
st = await asyncio.to_thread(
recognition_state.mutate, _STATE_PATH, record_enabled=bool(on))
return {"ok": True, "record_enabled": st.record_enabled}
@router.post("/start")
async def start_subprocess():
live_sub = _sub_or_503()
try:
return await asyncio.to_thread(live_sub.start)
except RuntimeError as exc:
raise HTTPException(404, str(exc))
@router.post("/stop")
async def stop_subprocess():
return await asyncio.to_thread(_sub_or_503().stop)

View File

@ -0,0 +1,73 @@
"""Live Voice Commands — voice-to-arm phrase trigger dispatcher.
Listens to GeminiSubprocess user transcripts, matches against
sanad_arm.txt phrases, and fires ARM.trigger_action_by_id.
Endpoints:
POST /start begin polling transcripts
POST /stop stop polling
POST /deferred-mode?enabled toggle instant vs deferred trigger
POST /trigger-enabled?enabled master gate allow arm actions or not
GET /status running, last heard, last action, etc.
GET /triggers arm trigger history (log)
"""
from __future__ import annotations
from fastapi import APIRouter, HTTPException
router = APIRouter()
def _loop():
from Project.Sanad.main import live_voice
if live_voice is None:
raise HTTPException(503, "LiveVoiceLoop not initialized.")
return live_voice
@router.get("/status")
async def status():
from Project.Sanad.main import live_voice
if live_voice is None:
return {"available": False}
return {"available": True, **live_voice.status()}
@router.post("/start")
async def start():
loop = _loop()
await loop.start()
return {"ok": True, **loop.status()}
@router.post("/stop")
async def stop():
loop = _loop()
await loop.stop()
return {"ok": True, **loop.status()}
@router.post("/deferred-mode")
async def set_deferred(enabled: bool):
loop = _loop()
loop.set_deferred(enabled)
return {"ok": True, "deferred_mode": loop.deferred_mode}
@router.post("/trigger-enabled")
async def set_trigger_enabled(enabled: bool):
"""Master gate for voice → arm triggering. Default OFF."""
loop = _loop()
loop.set_trigger_enabled(enabled)
return {"ok": True, "trigger_enabled": loop.trigger_enabled}
@router.get("/triggers")
async def triggers():
loop = _loop()
return {
"triggers": list(loop.triggers),
"total": len(loop.triggers),
"dispatch_actions": len(loop.wake_dispatch),
}

203
vendor/Sanad/dashboard/routes/logs.py vendored Normal file
View File

@ -0,0 +1,203 @@
"""Log viewing and snapshot endpoints."""
from __future__ import annotations
import asyncio
import json
import platform
import shutil
import socket
import sys
from collections import deque
from datetime import datetime
from fastapi import APIRouter, HTTPException
from fastapi.responses import PlainTextResponse
from Project.Sanad.config import BASE_DIR, LOGS_DIR
from Project.Sanad.dashboard.routes._safe_io import safe_path_under
router = APIRouter()
def _list_logs_sync():
LOGS_DIR.mkdir(parents=True, exist_ok=True)
files = []
for p in sorted(LOGS_DIR.glob("*.log*")):
files.append({
"name": p.name,
"size_bytes": p.stat().st_size,
})
return files
@router.get("/")
async def list_logs():
files = await asyncio.to_thread(_list_logs_sync)
return {"logs_dir": str(LOGS_DIR), "files": files}
def _tail_sync(path, lines: int) -> list[str]:
with open(path, "r", encoding="utf-8", errors="replace") as f:
tail = deque(f, maxlen=lines)
return [l.rstrip("\n") for l in tail]
@router.get("/tail/{filename}")
async def tail_log(filename: str, lines: int = 200):
path = safe_path_under(LOGS_DIR, filename)
if not path.exists():
raise HTTPException(404, "File not found")
lines_out = await asyncio.to_thread(_tail_sync, path, lines)
return {"filename": path.name, "lines": lines_out}
def _snapshot_sync(ts: str):
saved = []
for p in LOGS_DIR.glob("*.log"):
# Skip prior snapshots to avoid recursive growth
if "_snapshot_" in p.stem:
continue
dest = LOGS_DIR / f"{p.stem}_snapshot_{ts}.log"
shutil.copy2(p, dest)
saved.append({"source": p.name, "snapshot": dest.name, "size_bytes": dest.stat().st_size})
return saved
@router.post("/snapshot")
async def save_log_snapshot():
"""Save timestamped copy of all log files."""
LOGS_DIR.mkdir(parents=True, exist_ok=True)
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
saved = await asyncio.to_thread(_snapshot_sync, ts)
return {"ok": True, "saved_at": ts, "snapshots": saved}
# ─────────────────────── full bundle (everything in one text blob) ───────────────────────
def _build_bundle_sync(lines_per_file: int, include_system: bool) -> str:
"""Build the full text bundle — header, subsystem status, all logs.
Returns a single string safe to copy directly into a bug report.
"""
out: list[str] = []
ts = datetime.now().isoformat(timespec="seconds")
out.append("=" * 72)
out.append(f"SANAD LOG BUNDLE — {ts}")
out.append("=" * 72)
out.append(f"Hostname : {socket.gethostname()}")
out.append(f"Platform : {platform.platform()}")
out.append(f"Python : {sys.version.split()[0]}")
out.append(f"Executable: {sys.executable}")
out.append(f"BASE_DIR : {BASE_DIR}")
out.append(f"LOGS_DIR : {LOGS_DIR}")
# Subsystems — pull live status from main.SUBSYSTEMS
if include_system:
out.append("")
out.append("-" * 72)
out.append("SUBSYSTEMS")
out.append("-" * 72)
try:
from Project.Sanad.main import SUBSYSTEMS
except Exception as exc:
out.append(f" could not import SUBSYSTEMS: {exc}")
SUBSYSTEMS = {}
for name in sorted(SUBSYSTEMS):
comp = SUBSYSTEMS[name]
if comp is None:
out.append(f"{name:15s} unavailable")
continue
status: dict = {}
if hasattr(comp, "status") and callable(comp.status):
try:
s = comp.status()
if isinstance(s, dict):
status = s
else:
status = {"raw": str(s)}
except Exception as exc:
status = {"status_error": str(exc)}
try:
status_str = json.dumps(status, ensure_ascii=False, default=str)
except Exception:
status_str = str(status)
out.append(f"{name:15s} {status_str}")
# Dashboard router load state
out.append("")
out.append("-" * 72)
out.append("DASHBOARD ROUTERS")
out.append("-" * 72)
try:
from Project.Sanad.dashboard.app import _loaded_routes, _failed_routes
out.append(f" loaded ({len(_loaded_routes)}): {', '.join(_loaded_routes)}")
if _failed_routes:
out.append(f" failed ({len(_failed_routes)}):")
for name, err in _failed_routes.items():
out.append(f" - {name}: {err}")
else:
out.append(" failed (0): —")
except Exception as exc:
out.append(f" could not read dashboard state: {exc}")
# All log files — tail N lines each, skip snapshots
out.append("")
out.append("-" * 72)
out.append(f"LOG FILES (last {lines_per_file} lines each)")
out.append("-" * 72)
LOGS_DIR.mkdir(parents=True, exist_ok=True)
log_paths = sorted(LOGS_DIR.glob("*.log*"))
files_included = 0
for p in log_paths:
if "_snapshot_" in p.stem:
continue # skip stale snapshots
try:
size = p.stat().st_size
except OSError:
size = 0
out.append("")
out.append(f"=== {p.name} ({size} bytes) ===")
try:
with open(p, "r", encoding="utf-8", errors="replace") as f:
tail = deque(f, maxlen=lines_per_file)
for raw in tail:
out.append(raw.rstrip("\n"))
files_included += 1
except OSError as exc:
out.append(f" <could not read: {exc}>")
out.append("")
out.append("=" * 72)
out.append(f"END OF BUNDLE — {files_included} log file(s) included")
out.append("=" * 72)
return "\n".join(out)
@router.get("/bundle")
async def logs_bundle(lines: int = 1000, include_system: bool = True):
"""Return a single plain-text dump of everything useful for debugging.
Includes:
- Timestamp, hostname, platform, Python, BASE_DIR, LOGS_DIR
- Live status of every subsystem in main.SUBSYSTEMS
- Dashboard router load/fail state
- Tail of every .log file in LOGS_DIR (configurable per-file limit)
Response is `text/plain` so it's safe to copy straight to clipboard
or pipe into a file. Intended use: dashboard "Copy All Logs" button
and manual `curl ... > sanad_bundle.txt` debugging.
"""
# Clamp lines to keep the payload sane
lines = max(10, min(int(lines), 50000))
text = await asyncio.to_thread(_build_bundle_sync, lines, include_system)
return PlainTextResponse(
text,
headers={
"Content-Disposition": (
f'inline; filename="sanad_bundle_{datetime.now().strftime("%Y%m%d_%H%M%S")}.txt"'
),
},
)

238
vendor/Sanad/dashboard/routes/macros.py vendored Normal file
View File

@ -0,0 +1,238 @@
"""Macro recording and playback endpoints."""
from __future__ import annotations
import asyncio
from pathlib import Path
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
from Project.Sanad.config import AUDIO_RECORDINGS_DIR, MOTIONS_DIR
from Project.Sanad.core.logger import get_logger
log = get_logger("macros_route")
router = APIRouter()
def _block_if_movement_armed():
"""409 when locomotion movement is armed — arm motion is mutually exclusive
with walking. The arm controller's motion-block is the safety net."""
try:
from Project.Sanad.main import loco_controller # type: ignore
armed = loco_controller is not None and loco_controller.is_armed()
except HTTPException:
raise
except Exception:
return
if armed:
raise HTTPException(
409, "Arm actions are disabled while movement is enabled. "
"Disable movement in the Controller tab first.")
class MacroName(BaseModel):
name: str
class ComboPlayPayload(BaseModel):
audio_file: str = "" # filename under data/audio/ (or empty for none)
motion_file: str = "" # DEPRECATED — use action_id. Still accepted for bare JSONL by filename.
action_id: int | None = None # arm_controller action id (SDK built-in OR JSONL) — preferred
speed: float = 1.0
@router.get("/")
async def list_macros():
from Project.Sanad.main import macro_play
if macro_play is None:
return {"macros": []}
return {"macros": macro_play.list_macros()}
@router.get("/status")
async def macro_status():
from Project.Sanad.main import macro_rec, macro_play
return {
"recorder": macro_rec.status() if macro_rec else {},
"player": macro_play.status() if macro_play else {},
}
@router.post("/record/start")
async def start_recording(payload: MacroName):
from Project.Sanad.main import macro_rec
if macro_rec is None:
raise HTTPException(503, "Macro recorder not available.")
return macro_rec.start(payload.name)
@router.post("/record/stop")
async def stop_recording():
import asyncio
from Project.Sanad.main import macro_rec
if macro_rec is None:
raise HTTPException(503, "Macro recorder not available.")
return await asyncio.to_thread(macro_rec.stop)
@router.post("/play")
async def play_macro(payload: MacroName):
from Project.Sanad.main import brain
_block_if_movement_armed()
return await brain.play_macro(payload.name)
@router.post("/stop")
async def stop_macro():
from Project.Sanad.main import macro_play
if macro_play:
macro_play.stop()
return {"ok": True}
# ─── Ad-hoc audio + motion combined playback ─────────────────────────
# List the two catalogues so the dashboard can populate dropdowns, then
# play the chosen pair in parallel (asyncio.gather) — same scheme the
# Brain uses for `parallel`-mode skills, but ad-hoc instead of predefined.
@router.get("/audio-files")
async def list_audio_files():
"""Enumerate playable audio files under data/audio/."""
AUDIO_RECORDINGS_DIR.mkdir(parents=True, exist_ok=True)
files = []
for p in sorted(AUDIO_RECORDINGS_DIR.glob("*.wav")):
try:
files.append({
"name": p.name,
"size_kb": round(p.stat().st_size / 1024, 1),
})
except OSError:
continue
return {"files": files, "dir": str(AUDIO_RECORDINGS_DIR)}
@router.get("/motion-files")
async def list_motion_files():
"""Enumerate playable .jsonl motions under data/motions/ (thin wrapper
so the Macro Recorder dropdown doesn't have to call the replay route)."""
MOTIONS_DIR.mkdir(parents=True, exist_ok=True)
files = []
for p in sorted(MOTIONS_DIR.glob("*.jsonl")):
try:
files.append({
"name": p.name,
"size_kb": round(p.stat().st_size / 1024, 1),
})
except OSError:
continue
return {"files": files, "dir": str(MOTIONS_DIR)}
@router.post("/stop-combined")
async def stop_combined():
"""Immediately stop any in-flight combined playback.
- `arm.cancel()` breaks the replay loop and triggers the smooth
return-to-home ramp (see `_return_home` in arm_controller.py).
- `audio_mgr.stop_playback()` sends AUDIO_STOP_PLAY to the G1
chest speaker via DDS.
Both run unconditionally so Stop works even if only one side was
actually playing.
"""
from Project.Sanad.main import audio_mgr, arm
result = {"motion_stopped": False, "audio_stopped": False}
if arm is not None:
try:
arm.cancel()
result["motion_stopped"] = True
except Exception as exc:
log.warning("stop-combined: arm.cancel failed: %s", exc)
result["motion_error"] = str(exc)
if audio_mgr is not None:
try:
audio_mgr.stop_playback()
result["audio_stopped"] = True
except Exception as exc:
log.warning("stop-combined: audio stop failed: %s", exc)
result["audio_error"] = str(exc)
return {"ok": True, **result}
@router.post("/play-combined")
async def play_combined(payload: ComboPlayPayload):
"""Fire a user-picked audio clip and arm action in parallel.
Motion dispatch is via `arm.trigger_by_id(action_id)` which handles
BOTH SDK built-in actions (shake_hand, wave, ) and recorded JSONL
replays. Audio goes through `audio_mgr.play_wav` (routed to the G1
chest speaker via DDS). Either side may be omitted.
"""
from Project.Sanad.main import audio_mgr, arm
has_audio = bool(payload.audio_file)
has_motion = payload.action_id is not None or bool(payload.motion_file)
if not has_audio and not has_motion:
raise HTTPException(400, "pick at least one of audio_file / action_id / motion_file")
if has_motion:
_block_if_movement_armed() # audio-only combos still allowed while armed
tasks = []
result: dict = {
"audio_file": payload.audio_file,
"action_id": payload.action_id,
"motion_file": payload.motion_file,
}
if has_audio:
if audio_mgr is None:
raise HTTPException(503, "AudioManager not available")
audio_path = (AUDIO_RECORDINGS_DIR / payload.audio_file).resolve()
try:
audio_path.relative_to(AUDIO_RECORDINGS_DIR.resolve())
except ValueError:
raise HTTPException(400, "audio_file path traversal denied")
if not audio_path.exists():
raise HTTPException(404, f"audio not found: {payload.audio_file}")
async def _play_audio():
try:
await asyncio.to_thread(audio_mgr.play_wav, audio_path)
result["audio_played"] = audio_path.name
except Exception as exc:
log.exception("combined play: audio failed")
result["audio_error"] = str(exc)
tasks.append(_play_audio())
if has_motion:
if arm is None:
raise HTTPException(503, "ArmController not available")
async def _play_motion():
try:
if payload.action_id is not None:
# SDK built-in OR JSONL — arm.trigger_by_id handles both
await asyncio.to_thread(arm.trigger_by_id,
int(payload.action_id),
payload.speed)
result["motion_played"] = f"action_id={payload.action_id}"
else:
# Legacy path: bare JSONL filename
motion_path = (MOTIONS_DIR / payload.motion_file).resolve()
try:
motion_path.relative_to(MOTIONS_DIR.resolve())
except ValueError:
result["motion_error"] = "motion_file path traversal denied"
return
if not motion_path.exists():
result["motion_error"] = f"motion not found: {payload.motion_file}"
return
await asyncio.to_thread(arm.replay_file, str(motion_path), payload.speed)
result["motion_played"] = motion_path.name
except Exception as exc:
log.exception("combined play: motion failed")
result["motion_error"] = str(exc)
tasks.append(_play_motion())
await asyncio.gather(*tasks)
return {"ok": True, **result}

179
vendor/Sanad/dashboard/routes/mask.py vendored Normal file
View File

@ -0,0 +1,179 @@
"""Mask Face tab — Shining LED face mask control (BLE).
Routes live under /api/mask. Backed by the FaceController subsystem
(face/mask_face.py), which owns a dedicated asyncio loop + BLE connection to the
standalone Mask project's `shiningmask` library.
Every handler is failure-safe: if the subsystem or its library is unavailable it
returns 503 (GET /status returns a degraded body) rather than crash the
dashboard. FaceController raises RuntimeError for "not connected" / "face not
started"; those map to 409. Blocking BLE calls run in a thread pool so the event
loop stays responsive.
"""
from __future__ import annotations
import asyncio
from typing import List, Optional
from fastapi import APIRouter, HTTPException, Query
from pydantic import BaseModel
from Project.Sanad.core.logger import get_logger
log = get_logger("mask_routes")
router = APIRouter()
# ── lazy subsystem accessor ─────────────────────────────────
def _get_face():
try:
from Project.Sanad.main import mask_face # type: ignore
return mask_face
except Exception:
return None
def _require():
mf = _get_face()
if mf is None:
raise HTTPException(503, "Mask face subsystem unavailable.")
return mf
def _run(fn, *args, **kwargs):
"""Call a FaceController method, mapping its errors to HTTP status codes."""
try:
return fn(*args, **kwargs)
except HTTPException:
raise
except RuntimeError as exc:
raise HTTPException(409, str(exc))
except Exception as exc: # noqa: BLE001
log.exception("mask operation failed")
raise HTTPException(500, str(exc))
# ── status ──────────────────────────────────────────────────
@router.get("/status")
async def status():
"""Never raises — returns a degraded body if the subsystem is missing."""
mf = _get_face()
if mf is None:
return {"available": False, "connected": False, "lib_available": False,
"last_error": "mask face subsystem not constructed"}
s = await asyncio.to_thread(mf.status)
s["available"] = True
return s
# ── connection ──────────────────────────────────────────────
@router.post("/connect")
async def connect(timeout: Optional[float] = Query(None),
attempts: Optional[int] = Query(None)):
mf = _require()
return await asyncio.to_thread(_run, mf.connect, timeout, attempts)
@router.post("/disconnect")
async def disconnect():
mf = _require()
return await asyncio.to_thread(_run, mf.disconnect)
# ── simple commands ─────────────────────────────────────────
@router.post("/brightness")
async def brightness(level: int = Query(..., ge=0, le=255)):
mf = _require()
return await asyncio.to_thread(_run, mf.set_brightness, level)
class TextBody(BaseModel):
text: str = ""
color: List[int] = [255, 255, 255]
mode: Optional[int] = None
bg: Optional[List[int]] = None # background RGB (None -> black)
speed: Optional[int] = None # scroll speed 0-255 (None -> firmware default)
@router.post("/text")
async def text(body: TextBody):
mf = _require()
bg = tuple(body.bg) if body.bg else None
return await asyncio.to_thread(_run, mf.set_text, body.text, tuple(body.color),
body.mode, bg, body.speed)
@router.post("/image")
async def image(id: int = Query(...)):
mf = _require()
return await asyncio.to_thread(_run, mf.show_image, id)
@router.post("/animation")
async def animation(id: int = Query(...)):
mf = _require()
return await asyncio.to_thread(_run, mf.play_animation, id)
@router.post("/clear")
async def clear():
mf = _require()
return await asyncio.to_thread(_run, mf.clear_diy)
# ── animated face ───────────────────────────────────────────
@router.post("/face/start")
async def face_start(reload: bool = Query(False)):
mf = _require()
return await asyncio.to_thread(_run, mf.face_start, reload)
@router.post("/face/stop")
async def face_stop():
mf = _require()
return await asyncio.to_thread(_run, mf.face_stop)
@router.post("/face/return")
async def face_return():
"""Resume the live animated face after a text/image/animation override."""
mf = _require()
return await asyncio.to_thread(_run, mf.return_face)
class FaceColorBody(BaseModel):
eye: Optional[List[int]] = None # eye/iris RGB
mouth: Optional[List[int]] = None # mouth RGB
sclera: Optional[List[int]] = None # white-of-the-eye RGB
@router.post("/face/color")
async def face_color(body: FaceColorBody):
"""Recolor the animated face (re-uploads the frame set if the face is live)."""
mf = _require()
return await asyncio.to_thread(_run, mf.set_face_color, body.eye, body.mouth, body.sclera)
@router.post("/speaking")
async def speaking(on: bool = Query(...)):
mf = _require()
return await asyncio.to_thread(_run, mf.set_speaking, on)
@router.post("/mouth")
async def mouth(level: int = Query(..., ge=0, le=3)):
mf = _require()
return await asyncio.to_thread(_run, mf.set_mouth, level)
@router.post("/expression/{name}")
async def expression(name: str):
mf = _require()
return await asyncio.to_thread(_run, mf.show_expression, name)

View File

@ -0,0 +1,395 @@
"""Social-media / QR display on the LED mask.
Renders a QR code (for a preset Instagram account) or an uploaded image onto the
mask's 46x58 display and holds it via the FaceController's reserved scratch slot
until the animated face is resumed. The shared helper :func:`show_social_on_mask`
is also called from the Gemini ``[[SHOW:account]]`` relay wired in ``main.py``.
Routes (under /api/mask):
POST /social/{account} -> show a preset Instagram QR
POST /qr -> upload an image (QR or any picture) + show it
POST /face/resume -> stop showing the scratch image, return to the face
GET /social -> list the preset accounts
"""
from __future__ import annotations
import asyncio
import io
import logging
import os
import sys
from pathlib import Path
import re
from fastapi import APIRouter, File, HTTPException, Query, UploadFile
from fastapi.responses import FileResponse
log = logging.getLogger("sanad.mask_social")
router = APIRouter() # prefix "/api/mask" supplied by dashboard/app.py _REST_ROUTES
# Preset Instagram accounts the mask can show as a QR. The mask is a low-res
# 46x58 panel, so a full-URL QR is dense; the black margin acts as the quiet
# zone and we scale modules crisply (NEAREST) to give it the best chance.
SOCIAL = {
"bu_sunaidah": {"handle": "@bu.sunaidah",
"url": "https://instagram.com/bu.sunaidah",
"short": "da.gd/VMkH8J"}, # -> instagram.com/bu.sunaidah (v1 QR)
"yslootahtech": {"handle": "@yslootahtech",
"url": "https://instagram.com/yslootahtech",
"short": "da.gd/Qr8RO"}, # -> instagram.com/yslootahtech (v1 QR)
}
def _ensure_mask_path() -> None:
"""Make the flat Mask lib (colorface) importable from this route — using the
SAME dir the FaceController resolved (the Mask lib lives outside the repo)."""
d = os.environ.get("SANAD_MASK_DIR")
if not d:
try:
from Project.Sanad.main import mask_face as _mf # type: ignore
d = getattr(_mf, "mask_dir", None)
except Exception:
d = None
if not d:
d = str(Path(__file__).resolve().parents[2] / "Mask")
if d and d not in sys.path:
sys.path.insert(0, d)
def _get_face():
from Project.Sanad.main import mask_face # type: ignore
if mask_face is None:
raise HTTPException(status_code=503, detail="mask face unavailable")
return mask_face
_EYE_BAND = 16 # top rows reserved for the cyan eyes; the code sits below them
def _compose_under_eyes(inner) -> bytes:
"""Draw two cyan eyes across the top and place ``inner`` (a QR / image) in the
area BELOW them, then encode for the mask. Keeps the panel looking like a face
with a code under the eyes instead of a full-screen QR."""
_ensure_mask_path()
import colorface as cf
from PIL import Image, ImageDraw
W, H = cf.DISPLAY_W, cf.DISPLAY_H
inner = inner.convert("RGB")
iw, ih = inner.size
# keep the code a small badge under the eyes (~70% of the space below them)
target = max(20, int(min(W, H - _EYE_BAND - 1) * 0.72))
if iw <= target and ih <= target:
s = max(1, min(target // iw, target // ih)) # crisp integer up-scale (QR)
nw, nh = iw * s, ih * s
else:
s = min(target / iw, target / ih) # scale big images down
nw, nh = max(1, int(iw * s)), max(1, int(ih * s))
inner = inner.resize((nw, nh), Image.NEAREST)
canvas = Image.new("RGB", (W, H), (0, 0, 0))
g = ImageDraw.Draw(canvas)
eye = cf.DEFAULT_EYE
for cx in (W // 2 - 10, W // 2 + 10): # two eyes at the top
g.ellipse([cx - 5, 3, cx + 5, 13], fill=(255, 255, 255))
g.ellipse([cx - 3, 5, cx + 3, 11], fill=eye)
g.ellipse([cx - 1, 7, cx + 1, 10], fill=(0, 0, 0))
x = (W - nw) // 2
y = _EYE_BAND + (H - _EYE_BAND - nh) // 2
canvas.paste(inner, (max(0, x), max(_EYE_BAND, y)))
return cf.encode(canvas)
def _qr_bytes(url: str) -> bytes:
"""Render a QR for ``url`` FULL-SCREEN with the largest crisp (integer) module
size the 46-wide panel allows the only way it has any chance of scanning.
Only a ~version-1 QR (<=17 chars) reaches ~2 px/module; longer data is denser
and won't scan. Returns (bytes, qr_version)."""
_ensure_mask_path()
import qrcode
from PIL import Image
import colorface as cf
W, H = cf.DISPLAY_W, cf.DISPLAY_H
qr = qrcode.QRCode(error_correction=qrcode.constants.ERROR_CORRECT_L,
box_size=1, border=1)
qr.add_data(url)
qr.make(fit=True)
q = qr.make_image(fill_color=(255, 255, 255),
back_color=(0, 0, 0)).convert("RGB")
scale = max(1, min(W, H) // max(1, q.width)) # largest integer that fits
if scale > 1:
q = q.resize((q.width * scale, q.width * scale), Image.NEAREST)
canvas = Image.new("RGB", (W, H), (0, 0, 0))
canvas.paste(q, ((W - q.width) // 2, (H - q.height) // 2))
return cf.encode(canvas)
def _image_bytes(img) -> bytes:
"""Show an uploaded QR/image FULL-SCREEN, crisp (NEAREST) — best effort."""
_ensure_mask_path()
import colorface as cf
from PIL import Image
W, H = cf.DISPLAY_W, cf.DISPLAY_H
s = min(W, H)
img = img.convert("RGB").resize((s, s), Image.NEAREST)
canvas = Image.new("RGB", (W, H), (0, 0, 0))
canvas.paste(img, ((W - s) // 2, (H - s) // 2))
return cf.encode(canvas)
def show_social_on_mask(account: str) -> dict:
"""Show the account's **scannable** QR on the mask — a version-1 QR made from
a short (da.gd) link that redirects to the Instagram profile. Shared by the
dashboard button and the Gemini ``show_social`` tool. Raises for an unknown
account; propagates FaceController errors (e.g. not connected)."""
acc = SOCIAL.get(str(account).strip().lower())
if not acc:
raise HTTPException(status_code=404, detail="unknown account")
data = _qr_bytes(acc.get("short") or acc["url"]) # v1 short link -> scannable
mf = _get_face()
res = mf.show_scratch_image(data)
log.info("showing scannable social QR on mask: %s (%s)", acc["handle"], acc.get("short"))
return {"ok": True, "handle": acc["handle"], "scannable": True, **(res or {})}
@router.get("/social")
async def list_social():
return {"accounts": [{"id": k, "handle": v["handle"]} for k, v in SOCIAL.items()]}
def _friendly(exc: Exception) -> HTTPException:
"""Map FaceController errors to clean HTTP responses (esp. the common
'mask not connected' usually the mask is off / far / held by the phone app)."""
if isinstance(exc, HTTPException):
return exc
msg = str(exc)
if "not connected" in msg or "not started" in msg or "MASK" in msg:
return HTTPException(status_code=503, detail=(
"Mask not connected — power it on, bring it close to the robot, and "
"free it from the phone app."))
log.exception("mask scratch op failed")
return HTTPException(status_code=500, detail="%s: %s" % (type(exc).__name__, msg))
@router.post("/social/{account}")
async def show_social(account: str):
try:
return await asyncio.to_thread(show_social_on_mask, account)
except Exception as exc:
raise _friendly(exc)
@router.post("/qr")
async def upload_qr(file: UploadFile = File(...)):
"""Upload an image (a QR you generated, or any picture) and show it on the mask."""
raw = await file.read()
if not raw:
raise HTTPException(status_code=400, detail="empty upload")
from PIL import Image
try:
img = Image.open(io.BytesIO(raw))
img.load()
except Exception:
raise HTTPException(status_code=400, detail="not a valid image")
try:
data = await asyncio.to_thread(_image_bytes, img)
mf = _get_face()
return await asyncio.to_thread(mf.show_scratch_image, data)
except Exception as exc:
raise _friendly(exc)
@router.post("/face/resume")
async def resume_face():
"""Stop showing the scratch image and resume the animated face."""
mf = _get_face()
return await asyncio.to_thread(mf.set_expression, None)
@router.post("/face/mouth")
async def face_mouth(hidden: bool = Query(...)):
"""Show (hidden=false) or hide (hidden=true) the mouth on the animated face."""
mf = _get_face()
return await asyncio.to_thread(mf.set_mouth_hidden, hidden)
@router.post("/link")
async def face_link(on: bool = Query(...)):
"""Link (on=true) / unlink (on=false) Gemini <-> the mask.
ON connects the mask + lets Gemini drive its emotions/social.
OFF tears the link down (no BLE churn) and Gemini stops touching the mask.
Default state is OFF. Runs in a thread a link-on may briefly block while it
makes its first connect attempt."""
mf = _get_face()
return await asyncio.to_thread(mf.set_gemini_linked, on)
# ── saved QR library ────────────────────────────────────────────────
# Upload QR/images, save them by name, list/show/delete them. Stored as PNGs
# under data/qr_codes so they persist across restarts.
_QR_DIR = None
def _qr_dir() -> Path:
global _QR_DIR
if _QR_DIR is None:
try:
from Project.Sanad.config import BASE_DIR
base = Path(BASE_DIR)
except Exception:
base = Path(__file__).resolve().parents[2]
_QR_DIR = base / "data" / "qr_codes"
_QR_DIR.mkdir(parents=True, exist_ok=True)
return _QR_DIR
def _safe_name(name: str) -> str:
n = re.sub(r"[^A-Za-z0-9_.-]", "_", (name or "").strip())[:40].strip("._")
return n or "qr"
@router.post("/qr/save")
async def qr_save(name: str = Query(...), file: UploadFile = File(...)):
"""Save an uploaded QR/image into the library under ``name``."""
raw = await file.read()
if not raw:
raise HTTPException(status_code=400, detail="empty upload")
from PIL import Image
try:
img = Image.open(io.BytesIO(raw))
img.load()
except Exception:
raise HTTPException(status_code=400, detail="not a valid image")
sn = _safe_name(name)
await asyncio.to_thread(img.convert("RGB").save, str(_qr_dir() / (sn + ".png")))
return {"ok": True, "name": sn}
@router.post("/qr/save_link")
async def qr_save_link(name: str = Query(...), url: str = Query(...)):
"""Generate a QR from ``url`` and save it to the library. Returns the QR
version + whether it's short enough to actually scan on the mask (version 1)."""
u = (url or "").strip()
if not u:
raise HTTPException(status_code=400, detail="empty url")
_ensure_mask_path()
import qrcode
qr = qrcode.QRCode(error_correction=qrcode.constants.ERROR_CORRECT_L,
box_size=10, border=2)
qr.add_data(u)
qr.make(fit=True)
img = qr.make_image(fill_color=(255, 255, 255),
back_color=(0, 0, 0)).convert("RGB")
sn = _safe_name(name or u)
await asyncio.to_thread(img.save, str(_qr_dir() / (sn + ".png")))
return {"ok": True, "name": sn, "version": qr.version,
"scannable_on_mask": qr.version <= 1,
"note": ("scannable" if qr.version <= 1 else
"too dense to scan on the mask — use a shorter link")}
@router.get("/qr/library")
async def qr_library():
"""List the saved QR names."""
return {"qr": sorted(p.stem for p in _qr_dir().glob("*.png"))}
@router.get("/qr/thumb/{name}")
async def qr_thumb(name: str):
"""Serve a saved QR image (for the dashboard thumbnail)."""
p = _qr_dir() / (_safe_name(name) + ".png")
if not p.exists():
raise HTTPException(status_code=404, detail="not found")
return FileResponse(str(p), media_type="image/png")
@router.post("/qr/show/{name}")
async def qr_show(name: str):
"""Show a saved QR (under the eyes) on the mask."""
p = _qr_dir() / (_safe_name(name) + ".png")
if not p.exists():
raise HTTPException(status_code=404, detail="not found")
from PIL import Image
try:
img = Image.open(p)
data = await asyncio.to_thread(_image_bytes, img)
mf = _get_face()
return await asyncio.to_thread(mf.show_scratch_image, data)
except Exception as exc:
raise _friendly(exc)
@router.delete("/qr/{name}")
async def qr_delete(name: str):
"""Delete a saved QR from the library."""
p = _qr_dir() / (_safe_name(name) + ".png")
if p.exists():
p.unlink()
return {"ok": True, "deleted": _safe_name(name)}
# ── saved TEXT library ──────────────────────────────────────────────
# Save words/phrases and scroll any of them across the mask on demand.
_TEXT_DIR = None
def _text_dir() -> Path:
global _TEXT_DIR
if _TEXT_DIR is None:
try:
from Project.Sanad.config import BASE_DIR
base = Path(BASE_DIR)
except Exception:
base = Path(__file__).resolve().parents[2]
_TEXT_DIR = base / "data" / "mask_texts"
_TEXT_DIR.mkdir(parents=True, exist_ok=True)
return _TEXT_DIR
@router.post("/texts/save")
async def text_save(text: str = Query(...), name: str = Query("")):
"""Save a word/phrase to the text library (name defaults to the text)."""
t = (text or "").strip()[:200]
if not t:
raise HTTPException(status_code=400, detail="empty text")
nm = _safe_name(name or t)
await asyncio.to_thread((_text_dir() / (nm + ".txt")).write_text, t)
return {"ok": True, "name": nm, "text": t}
@router.get("/texts/library")
async def text_library():
"""List the saved texts."""
out = []
for p in sorted(_text_dir().glob("*.txt")):
try:
out.append({"name": p.stem, "text": p.read_text()[:80]})
except Exception:
pass
return {"texts": out}
@router.post("/texts/show/{name}")
async def text_show(name: str):
"""Scroll a saved text across the mask."""
p = _text_dir() / (_safe_name(name) + ".txt")
if not p.exists():
raise HTTPException(status_code=404, detail="not found")
txt = p.read_text()
mf = _get_face()
try:
return await asyncio.to_thread(mf.set_text, txt, (255, 255, 255), None, None, 38)
except Exception as exc:
raise _friendly(exc)
@router.delete("/texts/{name}")
async def text_delete(name: str):
"""Delete a saved text."""
p = _text_dir() / (_safe_name(name) + ".txt")
if p.exists():
p.unlink()
return {"ok": True, "deleted": _safe_name(name)}

89
vendor/Sanad/dashboard/routes/motion.py vendored Normal file
View File

@ -0,0 +1,89 @@
"""Motion endpoints — arm actions, replay management."""
from __future__ import annotations
import asyncio
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
router = APIRouter()
def _block_if_movement_armed():
"""409 if locomotion movement is armed — arm actions are mutually exclusive
with walking. The arm controller's own motion-block is the safety net; this
just gives the dashboard a clear message instead of a silent no-op."""
try:
from Project.Sanad.main import loco_controller # type: ignore
armed = loco_controller is not None and loco_controller.is_armed()
except HTTPException:
raise
except Exception:
return
if armed:
raise HTTPException(
409, "Arm actions are disabled while movement is enabled. "
"Disable movement in the Controller tab first.")
@router.get("/status")
async def motion_status():
from Project.Sanad.main import arm
return arm.status() if arm else {"error": "Arm not attached"}
@router.get("/actions")
async def list_actions():
from Project.Sanad.main import arm
return {"actions": arm.list_actions() if arm else []}
class TriggerPayload(BaseModel):
action_id: int | None = None
action_name: str | None = None
speed: float = 1.0
@router.post("/trigger")
async def trigger_action(payload: TriggerPayload):
from Project.Sanad.main import arm
if arm is None:
raise HTTPException(503, "Arm controller not attached.")
_block_if_movement_armed()
speed = max(0.1, min(payload.speed, 5.0))
# NOTE: TOCTOU on arm.is_busy is unavoidable from the route layer.
# The internal arm controller has its own _lock + _is_busy guard inside
# _execute() that returns silently if busy. We rely on that.
if payload.action_id is not None:
try:
await asyncio.to_thread(arm.trigger_by_id, payload.action_id, speed)
except KeyError as exc:
raise HTTPException(404, str(exc))
return {"ok": True, "action_id": payload.action_id, "speed": speed}
elif payload.action_name:
try:
await asyncio.to_thread(arm.trigger_by_name, payload.action_name, speed)
except KeyError as exc:
raise HTTPException(404, str(exc))
return {"ok": True, "action_name": payload.action_name, "speed": speed}
else:
raise HTTPException(400, "Provide action_id or action_name.")
@router.post("/cancel")
async def cancel_motion():
from Project.Sanad.main import arm
if arm is None:
raise HTTPException(503, "Arm controller not attached.")
arm.cancel()
return {"ok": True, "cancelled": True}
@router.post("/gestural-speaking")
async def toggle_gestural(enabled: bool = True):
from Project.Sanad.main import brain
brain.set_gestural_speaking(enabled)
return {"gestural_speaking": brain.gestural_speaking}

View File

@ -0,0 +1,402 @@
"""Navigation tab — proxy to the web_nav3 Nav2 stack.
Routes live under /api/nav (the prefix is applied centrally in dashboard/app.py,
NOT here). This router is a thin HTTP proxy: it forwards dashboard requests to a
single module-level WebNav3Client, which itself talks to the standalone web_nav3
FastAPI service (default http://127.0.0.1:8765 + rosbridge on :9090).
Fault isolation, two layers:
1. The `from ...navigation import WebNav3Client` import is GUARDED. If the
navigation package can't be imported (missing dep, syntax error), this
module still imports cleanly `_CLIENT` is None and every handler degrades
(GET /status returns {"available": False}; actions raise 503). This mirrors
how app.py loads each router in isolation.
2. WebNav3Client never raises into us by contract every method returns a
clean dict / NavStatus even when web_nav3 is unreachable so handlers just
forward the result. Blocking HTTP calls run off the event loop.
"""
from __future__ import annotations
import asyncio
from fastapi import APIRouter, HTTPException, Query
from pydantic import BaseModel
from Project.Sanad.core.logger import get_logger
from Project.Sanad.dashboard.routes import _arbiter
log = get_logger("navigation_route")
# Module-level router with NO prefix and NO tags — those are supplied by
# app.include_router(prefix="/api/nav", tags=["navigation"]) at registration time.
router = APIRouter()
# ── guarded optional import ─────────────────────────────────
# A broken navigation package must NOT stop this route module from importing —
# app.py would otherwise log the whole router as failed. Guard it and degrade.
try:
from Project.Sanad.navigation import WebNav3Client # type: ignore
_IMPORT_ERROR: str | None = None
except Exception as exc: # noqa: BLE001
WebNav3Client = None # type: ignore[assignment,misc]
_IMPORT_ERROR = f"{type(exc).__name__}: {exc}"
log.warning("navigation client unavailable — nav routes degraded: %s", _IMPORT_ERROR)
# ── config (env var -> dashboard config section -> default) ──
def _nav_config() -> dict:
"""Resolve nav connection config. Precedence: env var -> config -> default."""
import os
from Project.Sanad.core.config_loader import section as _cfg_section
cfg = _cfg_section("dashboard", "navigation")
web_nav3_url = (
os.environ.get("WEB_NAV3_URL")
or cfg.get("web_nav3_url")
or "http://127.0.0.1:8765"
)
rosbridge_url = (
os.environ.get("ROSBRIDGE_URL")
or cfg.get("rosbridge_url")
or "ws://127.0.0.1:9090"
)
robot = os.environ.get("SANAD_ROBOT_NAME") or cfg.get("robot") or "sanad"
return {
"web_nav3_url": str(web_nav3_url),
"rosbridge_url": str(rosbridge_url),
"robot": str(robot),
}
_CFG = _nav_config()
# ── single module-level client ──────────────────────────────
# One WebNav3Client for the whole dashboard, built from config. If the import
# was guarded out (above), or construction fails, _CLIENT stays None and every
# handler degrades gracefully.
if WebNav3Client is not None:
try:
_CLIENT = WebNav3Client(base_url=_CFG["web_nav3_url"], robot=_CFG["robot"])
log.info("WebNav3Client ready → %s (robot=%s)", _CFG["web_nav3_url"], _CFG["robot"])
except Exception as exc: # noqa: BLE001
_CLIENT = None
_IMPORT_ERROR = f"construct failed: {type(exc).__name__}: {exc}"
log.warning("WebNav3Client construction failed — nav routes degraded: %s", exc)
else:
_CLIENT = None
def _require():
"""Return the live client or raise 503 (for ACTION endpoints)."""
if _CLIENT is None:
raise HTTPException(503, f"Navigation client unavailable. {_IMPORT_ERROR or ''}".strip())
return _CLIENT
def _claim_nav():
"""Arbitration gate: refuse to start a Nav2 goal while manual loco owns legs."""
if not _arbiter.acquire_nav():
raise HTTPException(
409, "Manual movement (Controller) is armed. Disarm it before navigating."
)
# ── request bodies ──────────────────────────────────────────
class _NameBody(BaseModel):
name: str
class _IdBody(BaseModel):
id: object # mission ids may be int or str; forward as-is
class _StartBody(BaseModel):
mode: int = 2 # web_nav3 launch mode (e.g. 3 = localize against a saved map)
db_path: str | None = None # saved map to load (None = build fresh)
class _PoseBody(BaseModel):
name: str
x: float
y: float
yaw: float = 0.0
class _RenameBody(BaseModel):
old: str
new: str
# ── status (never raises — degraded body when unavailable) ──
@router.get("/status")
async def status():
if _CLIENT is None:
return {"available": False, "error": _IMPORT_ERROR}
nav = await asyncio.to_thread(_CLIENT.status)
# WebNav3Client.status() returns a NavStatus dataclass.
body = nav.as_dict() if hasattr(nav, "as_dict") else dict(nav)
body["available"] = True
return body
# ── places / navigation ─────────────────────────────────────
@router.get("/places")
async def places(map_name: str | None = Query(None, alias="map")):
"""List saved places. Per-MAP when ?map=<name> is given (each map keeps
its own places); else the legacy per-robot store."""
client = _require()
return await asyncio.to_thread(client.list_places, map_name)
@router.post("/goto")
async def goto(body: _NameBody):
client = _require()
_claim_nav()
res = await asyncio.to_thread(client.goto, body.name)
# A failed dispatch never drove the legs — release the gate so manual loco
# isn't locked out by a goto that never started.
if isinstance(res, dict) and not res.get("ok", True):
_arbiter.release_nav()
return res
@router.post("/start")
async def start(body: _StartBody):
client = _require()
return await asyncio.to_thread(client.start, body.mode, body.db_path)
class _DbBody(BaseModel):
db_path: str
@router.post("/load_map")
async def load_map(body: _DbBody):
"""View a saved map: stop any running bringup, then localize against it."""
client = _require()
return await asyncio.to_thread(client.load_map, body.db_path)
@router.post("/cancel")
async def cancel():
client = _require()
res = await asyncio.to_thread(client.cancel)
# WebNav3Client.cancel() is a no-op server-side (it only returns a note),
# so releasing the arbiter without truly stopping Nav2 would let the robot
# keep driving while manual loco re-acquires the legs (double-drive). Send a
# REAL goal-cancel over rosbridge first, and disarm the arrival monitor so a
# stale terminal can't fire, THEN release.
try:
from Project.Sanad.navigation.goal_monitor import request_cancel, disarm
disarm()
cancelled = await asyncio.to_thread(request_cancel)
if isinstance(res, dict):
res = {**res, "cancel_sent": bool(cancelled)}
except Exception as exc: # noqa: BLE001
log.debug("goal cancel skipped: %s", exc)
_arbiter.release_nav()
return res
@router.post("/save_here")
async def save_here(body: _NameBody):
client = _require()
return await asyncio.to_thread(client.save_here, body.name)
@router.post("/save_at")
async def save_at(body: _PoseBody, map_name: str | None = Query(None, alias="map")):
"""Save a named place at a map coordinate (from clicking the map). Per-MAP
when ?map=<name> given. Re-saving an existing name MOVES the place."""
client = _require()
return await asyncio.to_thread(client.save_at, body.name, body.x, body.y, body.yaw, map_name)
@router.post("/places/delete")
async def delete_place(body: _NameBody, map_name: str | None = Query(None, alias="map")):
"""Delete a saved place (per-map)."""
client = _require()
return await asyncio.to_thread(client.delete_place, body.name, map_name)
@router.post("/places/rename")
async def rename_place(body: _RenameBody, map_name: str | None = Query(None, alias="map")):
"""Rename a saved place (per-map)."""
client = _require()
return await asyncio.to_thread(client.rename_place, body.old, body.new, map_name)
class _MapEditsBody(BaseModel):
edits: list # [[world_x, world_y, value], ...] value 0=free/erase, 100=wall
@router.get("/map_edits")
async def get_map_edits(map_name: str = Query(..., alias="map")):
"""Saved edit overlay for a map (erased points + painted walls)."""
client = _require()
return await asyncio.to_thread(client.get_map_edits, map_name)
@router.post("/map_edits")
async def save_map_edits(body: _MapEditsBody, map_name: str = Query(..., alias="map")):
"""Persist a map's edit overlay (Map Editor)."""
client = _require()
return await asyncio.to_thread(client.save_map_edits, map_name, body.edits)
class _VoiceGotoBody(BaseModel):
place: str
def _resolve_place(client, spoken: str) -> dict:
"""Resolve a spoken place name against the ACTIVE map's places.
Strategy: exact (case-insensitive) single substring candidate
ambiguous / unknown. Returns a dict the caller (and ultimately Gemini)
can act on. Never raises.
"""
try:
st = client.status()
body = st.as_dict() if hasattr(st, "as_dict") else dict(st)
except Exception as exc: # noqa: BLE001
return {"ok": False, "reason": "status_error", "detail": str(exc)[:160]}
if not body.get("bringup_alive"):
return {"ok": False, "reason": "no_map",
"detail": "No navigation session is running — load a map first."}
active_map = body.get("active_map")
try:
places = client.list_places(active_map) or []
except Exception: # noqa: BLE001
places = []
names = [p.get("name") for p in places if isinstance(p, dict) and p.get("name")]
sl = (spoken or "").strip().lower()
if not sl:
return {"ok": False, "reason": "no_place", "map": active_map, "places": names}
exact = [n for n in names if n.lower() == sl]
if exact:
return {"ok": True, "resolved": exact[0], "map": active_map}
subs = []
for n in names:
nl = n.lower()
if sl in nl or nl in sl:
subs.append(n)
subs = list(dict.fromkeys(subs)) # de-dup, preserve order
if len(subs) == 1:
return {"ok": True, "resolved": subs[0], "map": active_map}
if len(subs) > 1:
return {"ok": False, "reason": "ambiguous", "candidates": subs, "map": active_map}
return {"ok": False, "reason": "unknown_place", "candidates": names, "map": active_map}
@router.get("/active")
async def active():
"""Navigation context for Gemini: the active map, its mode, and that map's
place names one call so the voice tools (list_places / where_am_i) don't
have to guess the active map."""
client = _require()
st = await asyncio.to_thread(client.status)
body = st.as_dict() if hasattr(st, "as_dict") else dict(st)
places = []
if body.get("bringup_alive"):
try:
pl = await asyncio.to_thread(client.list_places, body.get("active_map"))
places = [p.get("name") for p in (pl or [])
if isinstance(p, dict) and p.get("name")]
except Exception: # noqa: BLE001
places = []
return {
"map": body.get("active_map"),
"mode": body.get("mode"),
"mode_label": body.get("mode_label"),
"localizing": bool(body.get("localizing")),
"bringup_alive": bool(body.get("bringup_alive")),
"places": places,
}
@router.post("/voice_goto")
async def voice_goto(body: _VoiceGotoBody):
"""Resolve a spoken place name and drive there — Gemini's navigate_to_place.
Arbiter-gated (claims the legs for Nav2) and arms the arrival monitor so
Gemini later hears [NAV ARRIVED]/[NAV FAILED]. Never raises into the caller;
returns a structured result the model can speak from.
"""
client = _require()
res = await asyncio.to_thread(_resolve_place, client, body.place or "")
if not res.get("ok"):
return res
# Claim the legs for Nav2 — refuse (don't raise) if manual loco is armed.
if not _arbiter.acquire_nav():
return {"ok": False, "reason": "manual_armed",
"detail": "Manual movement (Controller) is armed — disarm it to navigate."}
drive = await asyncio.to_thread(client.goto, res["resolved"])
if isinstance(drive, dict) and not drive.get("ok", True):
_arbiter.release_nav()
return {"ok": False, "reason": "dispatch_failed",
"resolved": res["resolved"], "detail": drive}
# Arm arrival monitoring (best-effort; absence must not fail the drive).
try:
from Project.Sanad.navigation.goal_monitor import arm_goal
arm_goal(res["resolved"])
except Exception as exc: # noqa: BLE001
log.debug("goal monitor arm skipped: %s", exc)
return {"ok": True, "resolved": res["resolved"], "map": res.get("map")}
@router.post("/goto_pose")
async def goto_pose(body: _PoseBody):
"""Arbiter-gate a coordinate nav goal (click-to-drive).
The browser publishes the actual /goal_pose over rosbridge; this only
CLAIMS the legs for Nav2 (409 if manual loco is armed) so the two stacks
never both drive. The frontend sends the goal only after this returns ok.
"""
_require()
_claim_nav()
# Arm the arrival monitor so this click-to-drive goal releases the arbiter
# when it ends — without this, nav_active stays True forever after the goal
# completes (the browser publishes the goal but never arms anything).
try:
from Project.Sanad.navigation.goal_monitor import arm_goal
arm_goal(f"({body.x:.1f}, {body.y:.1f})")
except Exception as exc: # noqa: BLE001
log.debug("goal monitor arm skipped: %s", exc)
return {"ok": True, "x": body.x, "y": body.y, "yaw": body.yaw}
# ── maps / missions ─────────────────────────────────────────
@router.get("/maps")
async def maps():
client = _require()
return await asyncio.to_thread(client.list_maps)
@router.get("/missions")
async def missions():
client = _require()
return await asyncio.to_thread(client.list_missions)
@router.post("/missions/run")
async def run_mission(body: _IdBody):
client = _require()
_claim_nav()
res = await asyncio.to_thread(client.run_mission, body.id)
if isinstance(res, dict) and not res.get("ok", True):
_arbiter.release_nav()
return res
# ── config (what the SPA needs to render links / connect) ───
@router.get("/config")
async def config():
return {
"web_nav3_url": _CFG["web_nav3_url"],
"rosbridge_url": _CFG["rosbridge_url"],
"robot": _CFG["robot"],
}

98
vendor/Sanad/dashboard/routes/prompt.py vendored Normal file
View File

@ -0,0 +1,98 @@
"""Prompt management — view, edit, reload system prompts."""
from __future__ import annotations
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
from Project.Sanad.config import SCRIPTS_DIR
from Project.Sanad.core.config_loader import section as _cfg_section
from Project.Sanad.dashboard.routes._safe_io import (
atomic_write_text, MAX_UPLOAD_BYTES,
)
router = APIRouter()
# Filenames — SINGLE SOURCE in core.script_files
_SCRIPTS = _cfg_section("core", "script_files")
SCRIPT_PROMPT_PATH = SCRIPTS_DIR / _SCRIPTS.get("persona", "sanad_script.txt")
RULE_PROMPT_PATH = SCRIPTS_DIR / _SCRIPTS.get("rules", "sanad_rule.txt")
MAX_PROMPT_BYTES = MAX_UPLOAD_BYTES
# Default system prompt — SINGLE SOURCE in core.gemini_defaults
DEFAULT_SYSTEM_PROMPT = _cfg_section("core", "gemini_defaults").get(
"default_system_prompt",
"You are Sanad (Bousandah), a wise and friendly Emirati assistant. "
"Speak strictly in the UAE dialect (Khaleeji). "
"Be helpful, concise, and use local greetings like 'Marhaba' and 'Ya Khoy'."
)
def _load_system_prompt() -> str:
try:
content = SCRIPT_PROMPT_PATH.read_text(encoding="utf-8-sig").strip()
if content:
return content
except FileNotFoundError:
pass
return DEFAULT_SYSTEM_PROMPT
def _load_rule_prompts() -> dict[str, str]:
result = {"system_prompt": "", "replay_prompt": ""}
try:
content = RULE_PROMPT_PATH.read_text(encoding="utf-8-sig").strip()
sections: dict[str, list[str]] = {}
current = None
for line in content.splitlines():
stripped = line.strip()
if stripped.startswith("[") and stripped.endswith("]"):
current = stripped[1:-1].strip()
sections[current] = []
elif current is not None:
sections[current].append(line.rstrip())
result["system_prompt"] = "\n".join(sections.get("SYSTEM_PROMPT", [])).strip()
result["replay_prompt"] = "\n".join(sections.get("REPLAY_SYSTEM_PROMPT", [])).strip()
except FileNotFoundError:
pass
if not result["system_prompt"]:
result["system_prompt"] = _load_system_prompt()
return result
@router.get("/")
async def get_prompt():
return {
"script_path": str(SCRIPT_PROMPT_PATH),
"rule_path": str(RULE_PROMPT_PATH),
"system_prompt": _load_system_prompt(),
"rules": _load_rule_prompts(),
}
class PromptUpdate(BaseModel):
content: str
@router.post("/update")
async def update_prompt(payload: PromptUpdate):
if len(payload.content.encode("utf-8")) > MAX_PROMPT_BYTES:
raise HTTPException(413, f"Prompt too large (max {MAX_PROMPT_BYTES} bytes).")
try:
SCRIPTS_DIR.mkdir(parents=True, exist_ok=True)
atomic_write_text(SCRIPT_PROMPT_PATH, payload.content.rstrip() + "\n")
except OSError as exc:
raise HTTPException(500, f"Could not write prompt: {exc}")
return {"ok": True, "path": str(SCRIPT_PROMPT_PATH), "length": len(payload.content)}
@router.post("/reload")
async def reload_prompts():
rules = _load_rule_prompts()
return {
"ok": True,
"system_prompt": rules["system_prompt"],
"replay_prompt": rules["replay_prompt"],
"script_path": str(SCRIPT_PROMPT_PATH),
"rule_path": str(RULE_PROMPT_PATH),
}

View File

@ -0,0 +1,457 @@
"""Recognition tab — camera vision + face gallery + hot toggles.
Single router covering:
- Vision / Face Recognition toggles (hot no Gemini restart needed)
- Live camera preview (latest JPEG drop)
- Face gallery CRUD: enroll, upload, capture, rename, delete, ZIP
- Per-photo download + delete
Toggle changes write data/.recognition_state.json atomically. The Gemini
child polls that file at 1 Hz and applies changes mid-session.
"""
from __future__ import annotations
import io
from typing import Optional
from fastapi import APIRouter, File, HTTPException, Query, UploadFile
from fastapi.responses import FileResponse, Response, StreamingResponse
from pydantic import BaseModel
from Project.Sanad.config import BASE_DIR
from Project.Sanad.core.logger import get_logger
from Project.Sanad.dashboard.routes._safe_io import check_upload_size
from Project.Sanad.vision import recognition_state
log = get_logger("recognition_routes")
router = APIRouter()
# ── paths (resolved from BASE_DIR) ──────────────────────────
STATE_PATH = BASE_DIR / "data" / ".recognition_state.json"
FACES_DIR = BASE_DIR / "data" / "faces"
ALLOWED_IMAGE_EXTS = {".jpg", ".jpeg", ".png"}
# ── helpers ─────────────────────────────────────────────────
def _get_camera():
"""Lazy import to avoid circular import on dashboard load."""
try:
from Project.Sanad.main import camera # type: ignore
return camera
except Exception:
return None
def _get_gallery():
"""Lazy import — same reason."""
try:
from Project.Sanad.main import gallery # type: ignore
return gallery
except Exception:
return None
def _bump_and_write_state(**changes) -> recognition_state.RecognitionState:
"""Apply changes (vision_enabled, face_rec_enabled) and persist."""
return recognition_state.mutate(STATE_PATH, **changes)
def _bump_gallery_version() -> int:
cur = recognition_state.read(STATE_PATH)
new_version = cur.gallery_version + 1
recognition_state.mutate(STATE_PATH, gallery_version=new_version)
return new_version
# ── state + toggles ─────────────────────────────────────────
@router.get("/state")
async def get_state():
"""Return the current toggle/camera/gallery state."""
st = recognition_state.read(STATE_PATH)
cam = _get_camera()
gallery = _get_gallery()
faces_count = 0
photos_count = 0
if gallery is not None:
try:
entries = gallery.list()
faces_count = len(entries)
photos_count = sum(len(e.sample_paths) for e in entries)
except Exception:
pass
return {
"vision_enabled": st.vision_enabled,
"face_rec_enabled": st.face_rec_enabled,
"gallery_version": st.gallery_version,
"camera": cam.status() if cam is not None else {
"running": False, "backend": None, "error": "camera subsystem unavailable"
},
"faces_count": faces_count,
"photos_count": photos_count,
}
@router.post("/vision")
async def set_vision(on: bool = Query(...)):
"""Enable / disable camera vision (hot — no Gemini restart)."""
cam = _get_camera()
if cam is None:
log.warning("vision toggle requested but camera subsystem unavailable")
raise HTTPException(503, "Camera subsystem not available.")
if on and not cam.is_running():
ok = cam.start()
if not ok:
log.warning("vision ON requested but camera.start() failed: %s",
cam.error or "no backend")
_bump_and_write_state(vision_enabled=False)
raise HTTPException(503,
f"Camera could not start (no backend). {cam.error or ''}")
elif (not on) and cam.is_running():
cam.stop()
st = _bump_and_write_state(vision_enabled=bool(on))
log.info("vision %s (backend=%s)", "ON" if on else "OFF",
cam.backend if cam.is_running() else "none")
return {"ok": True, "vision_enabled": st.vision_enabled,
"camera": cam.status()}
@router.post("/face-rec")
async def set_face_rec(on: bool = Query(...)):
"""Enable / disable face recognition (hot — no Gemini restart).
The Gemini child picks the change up within ~1 s: ON re-sends the
gallery primer and tells Gemini it can recognise people; OFF tells
Gemini to disregard the gallery and stop identifying anyone. Both
take effect on the live session no reconnect needed.
"""
st = _bump_and_write_state(face_rec_enabled=bool(on))
log.info("face recognition %s", "ON" if on else "OFF")
return {"ok": True, "face_rec_enabled": st.face_rec_enabled}
@router.post("/sync")
async def sync_gallery():
"""Bump gallery_version so the child re-sends the primer if face-rec is ON."""
v = _bump_gallery_version()
log.info("gallery sync requested → v.%d", v)
return {"ok": True, "gallery_version": v}
# ── live preview ────────────────────────────────────────────
@router.get("/frame.jpg")
async def latest_frame():
"""Serve the most recent camera frame straight from the daemon's
in-memory cache (no file drop frames are also pushed to the Gemini
child over its stdin)."""
cam = _get_camera()
if cam is None:
raise HTTPException(503, "Camera subsystem unavailable.")
jpeg = cam.snapshot_jpeg()
if not jpeg:
raise HTTPException(404, "No frame captured yet.")
return Response(
content=jpeg,
media_type="image/jpeg",
headers={"Cache-Control": "no-store, must-revalidate"},
)
# ── camera resolution / quality ─────────────────────────────
class CameraConfigPayload(BaseModel):
width: Optional[int] = None
height: Optional[int] = None
fps: Optional[int] = None
jpeg_quality: Optional[int] = None
@router.post("/camera-config")
async def set_camera_config(payload: CameraConfigPayload):
"""Hot-swap the camera capture profile (resolution / fps / JPEG quality).
If the camera is running, CameraDaemon.reconfigure() rebuilds the
pipeline at the new profile (~0.5 s gap). If idle, the values just
take effect on the next start. Bounds are sanity-checked here so a
fat-fingered value can't wedge the daemon."""
cam = _get_camera()
if cam is None:
raise HTTPException(503, "Camera subsystem unavailable.")
if payload.width is not None and not (160 <= payload.width <= 1920):
raise HTTPException(400, "width out of range (1601920)")
if payload.height is not None and not (120 <= payload.height <= 1080):
raise HTTPException(400, "height out of range (1201080)")
if payload.fps is not None and not (1 <= payload.fps <= 60):
raise HTTPException(400, "fps out of range (160)")
if payload.jpeg_quality is not None and not (10 <= payload.jpeg_quality <= 95):
raise HTTPException(400, "jpeg_quality out of range (1095)")
profile = cam.reconfigure(
width=payload.width, height=payload.height,
fps=payload.fps, jpeg_quality=payload.jpeg_quality,
)
log.info("camera reconfigured via dashboard → %s", profile)
return {"ok": True, "profile": profile, "camera": cam.status()}
# ── face gallery routes ─────────────────────────────────────
def _validate_image(content: bytes, filename: str | None = None) -> None:
"""Reject non-JPEG/PNG content + oversize uploads."""
check_upload_size(content)
if len(content) < 16:
raise HTTPException(400, "Image too small / empty.")
is_jpeg = content[:3] == b"\xff\xd8\xff"
is_png = content[:8] == b"\x89PNG\r\n\x1a\n"
if not (is_jpeg or is_png):
raise HTTPException(
400,
f"Only JPEG/PNG accepted (got {filename or 'unknown'}).",
)
def _entry_to_dict(entry) -> dict:
photos = []
for p in entry.sample_paths:
try:
photos.append({"name": p.name, "size_bytes": p.stat().st_size})
except OSError:
continue
return {
"id": entry.id,
"name": entry.name,
"description": entry.description,
"added_at": entry.added_at,
"photos": photos,
}
@router.get("/faces")
async def list_faces():
gallery = _get_gallery()
if gallery is None:
raise HTTPException(503, "Face gallery subsystem unavailable.")
entries = gallery.list()
return {"faces": [_entry_to_dict(e) for e in entries],
"total": len(entries)}
class RenamePayload(BaseModel):
name: Optional[str] = None
class DescribePayload(BaseModel):
description: Optional[str] = None
@router.post("/faces/enroll")
async def enroll_from_camera(name: Optional[str] = Query(default=None),
description: Optional[str] = Query(default=None)):
"""Create a new face from the camera's latest snapshot."""
gallery = _get_gallery()
if gallery is None:
raise HTTPException(503, "Face gallery subsystem unavailable.")
cam = _get_camera()
if cam is None or not cam.is_running():
raise HTTPException(409, "Camera is not running. Toggle Vision ON first.")
# get_fresh_frame waits briefly for a current frame so the enrolled
# photo is the scene the user is posing for, not a stale buffer.
jpeg = cam.get_fresh_frame(max_age_s=0.5, timeout_s=1.5)
if not jpeg:
raise HTTPException(409, "Camera has no frame yet. Wait a moment and retry.")
entry = gallery.create_face(
[jpeg],
name=name.strip() if name else None,
description=description.strip() if description else None,
)
v = _bump_gallery_version()
log.info("enrolled face_%d via camera (name=%s, desc=%s, v.%d)",
entry.id, name or "(unnamed)",
"yes" if description else "no", v)
return {"ok": True, "face": _entry_to_dict(entry)}
@router.post("/faces/upload")
async def enroll_from_upload(
files: list[UploadFile] = File(...),
name: Optional[str] = Query(default=None),
description: Optional[str] = Query(default=None),
):
"""Create a new face from uploaded image file(s)."""
gallery = _get_gallery()
if gallery is None:
raise HTTPException(503, "Face gallery subsystem unavailable.")
if not files:
raise HTTPException(400, "At least one image file required.")
image_bytes: list[bytes] = []
for f in files:
content = await f.read()
_validate_image(content, f.filename)
image_bytes.append(content)
entry = gallery.create_face(
image_bytes,
name=name.strip() if name else None,
description=description.strip() if description else None,
)
v = _bump_gallery_version()
log.info("enrolled face_%d via upload (%d photos, name=%s, desc=%s, v.%d)",
entry.id, len(image_bytes), name or "(unnamed)",
"yes" if description else "no", v)
return {"ok": True, "face": _entry_to_dict(entry)}
@router.post("/faces/{face_id}/capture")
async def capture_to_face(face_id: int):
"""Add a new sample (from the camera) to an existing face."""
gallery = _get_gallery()
if gallery is None:
raise HTTPException(503, "Face gallery subsystem unavailable.")
cam = _get_camera()
if cam is None or not cam.is_running():
raise HTTPException(409, "Camera is not running. Toggle Vision ON first.")
jpeg = cam.get_fresh_frame(max_age_s=0.5, timeout_s=1.5)
if not jpeg:
raise HTTPException(409, "Camera has no frame yet.")
try:
fname = gallery.add_photo(face_id, jpeg)
except FileNotFoundError as exc:
raise HTTPException(404, str(exc))
v = _bump_gallery_version()
log.info("captured new photo for face_%d%s (v.%d)", face_id, fname, v)
return {"ok": True, "added": fname, "face": _entry_to_dict(gallery.get(face_id))}
@router.post("/faces/{face_id}/upload")
async def upload_to_face(face_id: int, files: list[UploadFile] = File(...)):
"""Add one or more uploaded samples to an existing face."""
gallery = _get_gallery()
if gallery is None:
raise HTTPException(503, "Face gallery subsystem unavailable.")
if gallery.get(face_id) is None:
raise HTTPException(404, f"face_{face_id} not found")
added: list[str] = []
for f in files:
content = await f.read()
_validate_image(content, f.filename)
try:
fname = gallery.add_photo(face_id, content)
added.append(fname)
except FileNotFoundError as exc:
raise HTTPException(404, str(exc))
v = _bump_gallery_version()
log.info("uploaded %d photo(s) to face_%d (v.%d)", len(added), face_id, v)
return {"ok": True, "added": added,
"face": _entry_to_dict(gallery.get(face_id))}
@router.post("/faces/{face_id}/rename")
async def rename_face(face_id: int, payload: RenamePayload):
gallery = _get_gallery()
if gallery is None:
raise HTTPException(503, "Face gallery subsystem unavailable.")
try:
gallery.rename(face_id, payload.name)
except FileNotFoundError as exc:
raise HTTPException(404, str(exc))
v = _bump_gallery_version()
log.info("renamed face_%d%s (v.%d)", face_id,
payload.name or "(unnamed)", v)
return {"ok": True, "face": _entry_to_dict(gallery.get(face_id))}
@router.post("/faces/{face_id}/describe")
async def describe_face(face_id: int, payload: DescribePayload):
"""Set / clear a face's free-text description. The description is
folded into the Gemini primer turn so Gemini can reference it."""
gallery = _get_gallery()
if gallery is None:
raise HTTPException(503, "Face gallery subsystem unavailable.")
try:
gallery.set_description(face_id, payload.description)
except FileNotFoundError as exc:
raise HTTPException(404, str(exc))
v = _bump_gallery_version()
log.info("described face_%d (%s, v.%d)", face_id,
"set" if payload.description else "cleared", v)
return {"ok": True, "face": _entry_to_dict(gallery.get(face_id))}
@router.delete("/faces/{face_id}")
async def delete_face(face_id: int):
gallery = _get_gallery()
if gallery is None:
raise HTTPException(503, "Face gallery subsystem unavailable.")
try:
gallery.delete_face(face_id)
except FileNotFoundError as exc:
raise HTTPException(404, str(exc))
v = _bump_gallery_version()
log.info("deleted face_%d (v.%d)", face_id, v)
return {"ok": True, "deleted": face_id}
@router.delete("/faces/{face_id}/photo/{photo_name}")
async def delete_photo(face_id: int, photo_name: str):
gallery = _get_gallery()
if gallery is None:
raise HTTPException(503, "Face gallery subsystem unavailable.")
# safe filename — only allow simple file names, no traversal
if "/" in photo_name or ".." in photo_name or "\x00" in photo_name:
raise HTTPException(400, "Invalid photo name.")
try:
gallery.delete_photo(face_id, photo_name)
except FileNotFoundError as exc:
raise HTTPException(404, str(exc))
except ValueError as exc:
raise HTTPException(400, str(exc))
v = _bump_gallery_version()
log.info("deleted photo %s from face_%d (v.%d)", photo_name, face_id, v)
return {"ok": True, "deleted": photo_name}
@router.get("/faces/{face_id}/photo/{photo_name}")
async def get_photo(face_id: int, photo_name: str,
download: int = Query(default=0)):
"""Serve a single photo. Add ?download=1 for attachment disposition."""
gallery = _get_gallery()
if gallery is None:
raise HTTPException(503, "Face gallery subsystem unavailable.")
if "/" in photo_name or ".." in photo_name or "\x00" in photo_name:
raise HTTPException(400, "Invalid photo name.")
path = gallery.get_photo(face_id, photo_name)
if path is None:
raise HTTPException(404, "Photo not found.")
media = "image/png" if path.suffix.lower() == ".png" else "image/jpeg"
headers = {}
if download:
headers["Content-Disposition"] = (
f'attachment; filename="face_{face_id}_{photo_name}"'
)
return FileResponse(path, media_type=media, headers=headers)
@router.get("/faces/{face_id}/download.zip")
async def download_face_zip(face_id: int):
gallery = _get_gallery()
if gallery is None:
raise HTTPException(503, "Face gallery subsystem unavailable.")
try:
data = gallery.zip_face(face_id)
except FileNotFoundError as exc:
raise HTTPException(404, str(exc))
return StreamingResponse(
io.BytesIO(data),
media_type="application/zip",
headers={
"Content-Disposition": f'attachment; filename="face_{face_id}.zip"',
"Content-Length": str(len(data)),
},
)

302
vendor/Sanad/dashboard/routes/records.py vendored Normal file
View File

@ -0,0 +1,302 @@
"""Saved records management — list, play, pause, resume, stop, rename, delete.
Manages WAV recordings saved via the typed replay engine.
"""
from __future__ import annotations
import json
import threading
from pathlib import Path
from typing import Any
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
from Project.Sanad.config import AUDIO_RECORDINGS_DIR
from Project.Sanad.dashboard.routes._safe_io import (
safe_filename, safe_path_under, atomic_write_json,
)
router = APIRouter()
RECORDS_INDEX = AUDIO_RECORDINGS_DIR / "records.json"
_INDEX_LOCK = threading.Lock()
# Strong refs to fire-and-forget playback tasks. The event loop only keeps a
# weak reference to tasks, so an unreferenced create_task() result can be
# garbage-collected (cancelling playback) before it finishes. Mirror replay.py.
import asyncio as _asyncio # noqa: E402
_BG_TASKS: set[_asyncio.Task] = set()
def _load_index() -> dict[str, Any]:
if not RECORDS_INDEX.exists():
return {"records": [], "total_records": 0, "last_updated": ""}
try:
with open(RECORDS_INDEX, "r", encoding="utf-8") as f:
return json.load(f)
except (json.JSONDecodeError, OSError):
# Backup corrupt file rather than nuking it
try:
RECORDS_INDEX.rename(RECORDS_INDEX.with_suffix(".json.corrupt"))
except OSError:
pass
return {"records": [], "total_records": 0, "last_updated": ""}
def _save_index(payload: dict[str, Any]):
AUDIO_RECORDINGS_DIR.mkdir(parents=True, exist_ok=True)
payload["total_records"] = len(payload.get("records", []))
atomic_write_json(RECORDS_INDEX, payload)
def _resolve_path(path_str: str) -> Path:
"""Resolve record path — basename / relative / absolute.
Legacy records stored absolute paths. New records store basenames.
Both flavors resolve to a real file under AUDIO_RECORDINGS_DIR.
"""
if not path_str:
return AUDIO_RECORDINGS_DIR
p = Path(path_str)
if p.is_absolute():
return p
return AUDIO_RECORDINGS_DIR / p
def _reconcile(payload: dict[str, Any]) -> tuple[dict[str, Any], int]:
kept, removed = [], 0
for entry in payload.get("records", []):
try:
sp = _resolve_path(entry["files"]["speaker_recording"]["path"])
rp = _resolve_path(entry["files"]["gemini_raw_output"]["path"])
if sp.exists() and rp.exists():
kept.append(entry)
else:
removed += 1
except (KeyError, TypeError):
removed += 1
payload["records"] = kept
payload["total_records"] = len(kept)
return payload, removed
@router.get("/")
async def list_records():
with _INDEX_LOCK:
payload = _load_index()
payload, removed = _reconcile(payload)
if removed:
_save_index(payload)
return payload
class RecordPlay(BaseModel):
record_name: str
file_kind: str = "speaker" # speaker | raw
@router.post("/play")
async def play_record(payload: RecordPlay):
with _INDEX_LOCK:
index = _load_index()
entry = next((r for r in index.get("records", []) if r.get("record_name") == payload.record_name), None)
if entry is None:
raise HTTPException(404, f"Record not found: {payload.record_name}")
file_key = "speaker_recording" if payload.file_kind == "speaker" else "gemini_raw_output"
raw_path = _resolve_path(entry["files"][file_key]["path"]).resolve()
base = AUDIO_RECORDINGS_DIR.resolve()
try:
raw_path.relative_to(base)
except ValueError:
raise HTTPException(400, "Record path outside recordings directory.")
if not raw_path.exists():
raise HTTPException(404, f"File not found: {raw_path.name}")
from Project.Sanad.main import audio_mgr
import threading
# Fire-and-forget on a DEDICATED daemon thread — NOT asyncio.to_thread.
# to_thread runs on the shared default executor, which gets starved while
# the dashboard services the live-voice child's reconnect chatter; that
# delayed record playback by several seconds (clip silent, counter parked).
# A dedicated thread starts immediately regardless of executor/event-loop
# load. play_wav blocks for the clip duration and serves pause/stop via
# _play_state; the UI stays responsive because this handler returns now.
# Python keeps running threads alive, so no ref is needed to prevent GC.
threading.Thread(
target=audio_mgr.play_wav, args=(raw_path, payload.record_name),
name="record-playback", daemon=True,
).start()
return {"ok": True, "record_name": payload.record_name,
"file_kind": payload.file_kind, "path": str(raw_path)}
@router.post("/pause")
async def pause_playback():
from Project.Sanad.main import audio_mgr
return audio_mgr.pause_playback()
@router.post("/resume")
async def resume_playback():
from Project.Sanad.main import audio_mgr
return audio_mgr.resume_playback()
@router.post("/seek")
async def seek_playback(position_sec: float):
"""Jump to a position (seconds) in the currently-playing clip — used by the
waveform scrubber. No-op (ok=False) if nothing is playing."""
from Project.Sanad.main import audio_mgr
return audio_mgr.seek_playback(position_sec)
@router.post("/stop")
async def stop_playback():
from Project.Sanad.main import audio_mgr
import asyncio
await asyncio.to_thread(audio_mgr.stop_playback)
return {"ok": True, "stopped": True}
@router.get("/playback-status")
async def playback_status():
from Project.Sanad.main import audio_mgr
return audio_mgr.playback_status()
@router.post("/live-hold")
async def set_live_hold(on: bool):
"""Manual hold for the live-Gemini pause. on=True pauses the live voice and
keeps it paused (records won't resume it) until on=False is sent. Default
behaviour (on=False) is AUTO: records pause Gemini only for the clip."""
from Project.Sanad.main import audio_mgr
return {"live_hold": audio_mgr.set_live_voice_hold(on)}
class RecordRename(BaseModel):
record_name: str
new_name: str
@router.post("/rename")
async def rename_record(payload: RecordRename):
new_name = safe_filename(payload.new_name)
# Strip any extension the user provided — we add our own
if new_name.lower().endswith(".wav"):
new_name = new_name[:-4]
if not new_name or new_name.startswith("."):
raise HTTPException(400, "Invalid new name.")
with _INDEX_LOCK:
index = _load_index()
entry = next(
(r for r in index.get("records", []) if r.get("record_name") == payload.record_name),
None,
)
if entry is None:
raise HTTPException(404, f"Record not found: {payload.record_name}")
base = AUDIO_RECORDINGS_DIR.resolve()
for key in ("speaker_recording", "gemini_raw_output"):
try:
old_path = _resolve_path(entry["files"][key]["path"]).resolve()
old_path.relative_to(base) # ensure inside recordings dir
except (KeyError, ValueError):
continue
if not old_path.exists():
continue
suffix = "_raw.wav" if key == "gemini_raw_output" else ".wav"
new_path = safe_path_under(AUDIO_RECORDINGS_DIR, f"{new_name}{suffix}")
if new_path.exists():
raise HTTPException(409, f"File already exists: {new_path.name}")
old_path.rename(new_path)
entry["files"][key]["path"] = new_path.name # basename — portable
entry["files"][key]["name"] = new_path.name
entry["record_name"] = new_name
_save_index(index)
return {"ok": True, "record": entry}
class RecordDelete(BaseModel):
record_name: str
@router.post("/delete")
async def delete_record(payload: RecordDelete):
with _INDEX_LOCK:
index = _load_index()
kept = []
deleted_entry = None
for r in index.get("records", []):
if r.get("record_name") == payload.record_name and deleted_entry is None:
deleted_entry = r
else:
kept.append(r)
if deleted_entry is None:
raise HTTPException(404, f"Record not found: {payload.record_name}")
base = AUDIO_RECORDINGS_DIR.resolve()
deleted_files = []
for fi in deleted_entry.get("files", {}).values():
try:
# _resolve_path handles new-style basenames (resolved under
# AUDIO_RECORDINGS_DIR) as well as legacy absolute paths.
# A raw Path(basename) would resolve vs CWD and fall outside
# base, so the relative_to guard would skip the unlink and the
# WAV would be orphaned on disk. Mirror play_record/rename_record.
p = _resolve_path(fi.get("path", "")).resolve()
p.relative_to(base) # never delete files outside recordings dir
except (ValueError, OSError):
continue
if p.exists():
p.unlink()
deleted_files.append(str(p))
index["records"] = kept
_save_index(index)
return {"ok": True, "deleted": payload.record_name, "deleted_files": deleted_files}
class RecordBulkDelete(BaseModel):
record_names: list[str] | None = None
all: bool = False
@router.post("/delete-bulk")
async def delete_bulk(payload: RecordBulkDelete):
"""Delete many records in one call. all=True wipes every record; otherwise
only those in record_names. Files are unlinked, guarded to the recordings
dir (same safety as /delete)."""
names = set(payload.record_names or [])
with _INDEX_LOCK:
index = _load_index()
base = AUDIO_RECORDINGS_DIR.resolve()
kept: list = []
removed: list = []
deleted_files = 0
for r in index.get("records", []):
if payload.all or r.get("record_name") in names:
removed.append(r.get("record_name"))
for fi in r.get("files", {}).values():
try:
p = _resolve_path(fi.get("path", "")).resolve()
p.relative_to(base) # never delete outside recordings dir
except (ValueError, OSError):
continue
if p.exists():
try:
p.unlink()
deleted_files += 1
except OSError:
pass
else:
kept.append(r)
index["records"] = kept
_save_index(index)
return {"ok": True, "deleted": removed, "deleted_count": len(removed),
"deleted_files": deleted_files}

184
vendor/Sanad/dashboard/routes/replay.py vendored Normal file
View File

@ -0,0 +1,184 @@
"""Replay management endpoints — JSONL files, teaching, test replay, speed control.
Mirrors the replay management features from AI_Photographer/Server/photo_server.py.
"""
from __future__ import annotations
import asyncio
from fastapi import APIRouter, HTTPException, UploadFile, File
from fastapi.responses import FileResponse
from pydantic import BaseModel
from Project.Sanad.config import MOTIONS_DIR
from Project.Sanad.core.logger import get_logger
from Project.Sanad.dashboard.routes._safe_io import (
safe_path_under, check_upload_size, atomic_write_bytes,
)
log = get_logger("replay_route")
router = APIRouter()
def _block_if_movement_armed():
"""409 when locomotion movement is armed — arm motion (replay / teaching) is
mutually exclusive with walking."""
try:
from Project.Sanad.main import loco_controller # type: ignore
armed = loco_controller is not None and loco_controller.is_armed()
except HTTPException:
raise
except Exception:
return
if armed:
raise HTTPException(
409, "Arm actions are disabled while movement is enabled. "
"Disable movement in the Controller tab first.")
# -- models --
class ReplayRequest(BaseModel):
name: str
speed: float = 1.0
class RenameRequest(BaseModel):
old_name: str
new_name: str
class TeachRequest(BaseModel):
name: str
duration_sec: float = 15.0
# -- motion file CRUD --
@router.get("/files")
async def list_motion_files():
from Project.Sanad.main import arm
return {"files": arm.list_motion_files()}
@router.get("/files/{filename}")
async def download_motion_file(filename: str):
path = safe_path_under(MOTIONS_DIR, filename)
if not path.exists():
raise HTTPException(404, "File not found.")
return FileResponse(path, filename=path.name, media_type="application/json")
@router.post("/files/upload")
async def upload_motion_file(file: UploadFile = File(...)):
if not file.filename or not file.filename.lower().endswith(".jsonl"):
raise HTTPException(400, "Only .jsonl files accepted.")
MOTIONS_DIR.mkdir(parents=True, exist_ok=True)
dest = safe_path_under(MOTIONS_DIR, file.filename)
content = await file.read()
check_upload_size(content)
atomic_write_bytes(dest, content)
return {"ok": True, "name": dest.name, "size_bytes": len(content)}
@router.post("/files/rename")
async def rename_motion_file(payload: RenameRequest):
old = safe_path_under(MOTIONS_DIR, payload.old_name)
new = safe_path_under(MOTIONS_DIR, payload.new_name)
if not old.exists():
raise HTTPException(404, f"File not found: {payload.old_name}")
if new.exists():
raise HTTPException(409, f"File already exists: {payload.new_name}")
old.rename(new)
return {"ok": True, "old_name": old.name, "new_name": new.name}
@router.delete("/files/{filename}")
async def delete_motion_file(filename: str):
path = safe_path_under(MOTIONS_DIR, filename)
if not path.exists():
raise HTTPException(404, "File not found.")
path.unlink()
return {"ok": True, "deleted": path.name}
# -- test replay --
_BG_TASKS: set[asyncio.Task] = set()
@router.post("/test")
async def test_replay(payload: ReplayRequest):
"""Test-play a motion file at the given speed."""
from Project.Sanad.main import arm
_block_if_movement_armed()
if arm.is_busy:
raise HTTPException(409, "Arm is busy.")
path = safe_path_under(MOTIONS_DIR, payload.name)
if not path.exists():
raise HTTPException(404, f"Motion file not found: {path.name}")
async def _run():
try:
await asyncio.to_thread(arm.replay_file, str(path), payload.speed)
except Exception:
log.exception("Test replay failed")
task = asyncio.create_task(_run())
_BG_TASKS.add(task)
task.add_done_callback(_BG_TASKS.discard)
return {"ok": True, "name": path.name, "speed": payload.speed}
@router.post("/cancel")
async def cancel_replay():
"""Stop the current replay — the smooth return-to-home runs as the
final phase of the replay itself.
Matches g1_replay_v4_stable.py's behaviour: the play loop breaks on
the cancel flag, then the same Run() function executes its
return-home ramp + DisableSDK. No separate scheduling needed.
"""
from Project.Sanad.main import arm
arm.cancel()
return {"ok": True, "message": "Cancelled — returning to home pose smoothly."}
@router.get("/status")
async def replay_status():
from Project.Sanad.main import arm, teacher
return {
"arm": arm.status(),
"teaching": teacher.status() if teacher else {},
}
# -- teaching mode --
@router.post("/teach/start")
async def start_teaching(payload: TeachRequest):
from Project.Sanad.main import teacher
if teacher is None:
raise HTTPException(503, "Teaching module not available.")
_block_if_movement_armed()
if teacher.is_recording:
raise HTTPException(409, "Teaching session already active.")
existing = MOTIONS_DIR / f"{payload.name}.jsonl"
if existing.exists():
raise HTTPException(409, f"Motion file already exists: {payload.name}.jsonl")
return teacher.start(payload.name, payload.duration_sec)
@router.post("/teach/stop")
async def stop_teaching():
from Project.Sanad.main import teacher
if teacher is None:
raise HTTPException(503, "Teaching module not available.")
return teacher.stop()
@router.get("/teach/status")
async def teaching_status():
from Project.Sanad.main import teacher
if teacher is None:
return {"recording": False, "phase": "idle"}
return teacher.status()

168
vendor/Sanad/dashboard/routes/scripts.py vendored Normal file
View File

@ -0,0 +1,168 @@
"""Script/prompt file management — CRUD for sanad_script.txt, sanad_rule.txt, etc."""
from __future__ import annotations
import asyncio
from datetime import datetime
from pathlib import Path
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
from Project.Sanad.config import SCRIPTS_DIR
from Project.Sanad.core import persona as _persona
from Project.Sanad.dashboard.routes._safe_io import (
atomic_write_text, MAX_UPLOAD_BYTES,
)
router = APIRouter()
MAX_SCRIPT_BYTES = MAX_UPLOAD_BYTES
def _safe_path(name: str) -> Path:
cleaned = name.strip()
if not cleaned or "/" in cleaned or "\\" in cleaned or cleaned in {".", ".."}:
raise HTTPException(400, "Invalid script name.")
path = (SCRIPTS_DIR / cleaned).resolve()
if not str(path).startswith(str(SCRIPTS_DIR.resolve())):
raise HTTPException(400, "Path traversal denied.")
return path
@router.get("/")
async def list_scripts():
SCRIPTS_DIR.mkdir(parents=True, exist_ok=True)
active = _persona.active_persona_name()
default = _persona.default_persona_name()
items = []
for p in sorted(SCRIPTS_DIR.iterdir(), key=lambda x: x.name.lower()):
if not p.is_file():
continue
st = p.stat()
items.append({
"name": p.name,
"size_bytes": st.st_size,
"modified_at": datetime.fromtimestamp(st.st_mtime).isoformat(timespec="seconds"),
"active": p.name == active, # the persona Gemini loads now
"is_default": p.name == default, # the fallback (sanad_script.txt)
})
return {"path": str(SCRIPTS_DIR), "files": items,
"active": active, "default": default}
class ScriptActive(BaseModel):
name: str | None = None # None / "" / the default name → revert to default
restart: bool = False # also restart the live voice so it takes effect now
@router.get("/active")
async def get_active():
"""Which persona Gemini will load, and the default it falls back to."""
return {"active": _persona.active_persona_name(),
"default": _persona.default_persona_name()}
@router.post("/active")
async def set_active(payload: ScriptActive):
"""Select the persona script Gemini uses. With restart=true, the live voice
session is bounced so the new persona takes effect immediately; otherwise it
applies on the next voice (re)connect."""
try:
active = _persona.set_active_persona(payload.name)
except FileNotFoundError:
raise HTTPException(404, f"Script not found: {payload.name}")
restarted = False
if payload.restart:
try:
from Project.Sanad.main import live_sub
if live_sub is not None and hasattr(live_sub, "start"):
if hasattr(live_sub, "is_running") and live_sub.is_running():
await asyncio.to_thread(live_sub.stop)
await asyncio.sleep(1.5)
await asyncio.to_thread(live_sub.start)
restarted = True
except Exception:
pass # selection is saved regardless of restart success
return {"ok": True, "active": active,
"default": _persona.default_persona_name(), "restarted": restarted}
class ScriptLoad(BaseModel):
name: str
@router.post("/load")
async def load_script(payload: ScriptLoad):
path = _safe_path(payload.name)
if not path.exists():
raise HTTPException(404, f"Script not found: {payload.name}")
content = path.read_text(encoding="utf-8-sig")
st = path.stat()
return {
"name": path.name,
"content": content,
"size_bytes": st.st_size,
"modified_at": datetime.fromtimestamp(st.st_mtime).isoformat(timespec="seconds"),
}
class ScriptSave(BaseModel):
name: str
content: str
@router.post("/save")
async def save_script(payload: ScriptSave):
if len(payload.content.encode("utf-8")) > MAX_SCRIPT_BYTES:
raise HTTPException(413, f"Content too large (max {MAX_SCRIPT_BYTES} bytes).")
path = _safe_path(payload.name)
SCRIPTS_DIR.mkdir(parents=True, exist_ok=True)
atomic_write_text(path, payload.content)
return {"ok": True, "name": path.name, "size_bytes": path.stat().st_size}
class ScriptCreate(BaseModel):
name: str
content: str = ""
@router.post("/create")
async def create_script(payload: ScriptCreate):
if len(payload.content.encode("utf-8")) > MAX_SCRIPT_BYTES:
raise HTTPException(413, f"Content too large (max {MAX_SCRIPT_BYTES} bytes).")
path = _safe_path(payload.name)
if path.exists():
raise HTTPException(409, f"File already exists: {payload.name}")
SCRIPTS_DIR.mkdir(parents=True, exist_ok=True)
atomic_write_text(path, payload.content)
return {"ok": True, "name": path.name}
class ScriptRename(BaseModel):
old_name: str
new_name: str
@router.post("/rename")
async def rename_script(payload: ScriptRename):
old = _safe_path(payload.old_name)
new = _safe_path(payload.new_name)
if not old.exists():
raise HTTPException(404, f"Not found: {payload.old_name}")
if new.exists():
raise HTTPException(409, f"Already exists: {payload.new_name}")
old.rename(new)
return {"ok": True, "old_name": payload.old_name, "new_name": new.name}
class ScriptDelete(BaseModel):
name: str
@router.post("/delete")
async def delete_script(payload: ScriptDelete):
path = _safe_path(payload.name)
if not path.exists():
raise HTTPException(404, f"Not found: {payload.name}")
if path.name == _persona.default_persona_name():
raise HTTPException(409, f"Cannot delete the default persona ({path.name}).")
path.unlink()
# If the active selection was the deleted file, resolution auto-falls-back
# to the default — no extra cleanup needed.
return {"ok": True, "deleted": payload.name}

101
vendor/Sanad/dashboard/routes/skills.py vendored Normal file
View File

@ -0,0 +1,101 @@
"""Skill registry CRUD endpoints + skill execution."""
from __future__ import annotations
from fastapi import APIRouter, HTTPException, UploadFile, File
from pydantic import BaseModel
from Project.Sanad.config import AUDIO_RECORDINGS_DIR
from Project.Sanad.dashboard.routes._safe_io import (
safe_path_under, check_upload_size, atomic_write_bytes,
)
router = APIRouter()
class SkillCreate(BaseModel):
id: str = ""
audio_file: str = ""
motion_file: str = ""
callback: str = ""
sync_mode: str = "parallel"
enabled: bool = True
description: str = ""
class SkillUpdate(BaseModel):
audio_file: str | None = None
motion_file: str | None = None
callback: str | None = None
sync_mode: str | None = None
enabled: bool | None = None
description: str | None = None
@router.get("/")
async def list_skills():
from Project.Sanad.main import brain
return {"skills": brain.registry.list_skills()}
@router.get("/{skill_id}")
async def get_skill(skill_id: str):
from Project.Sanad.main import brain
skill = brain.registry.get(skill_id)
if skill is None:
raise HTTPException(404, f"Skill not found: {skill_id}")
return skill.to_dict()
@router.post("/")
async def create_skill(payload: SkillCreate):
from Project.Sanad.main import brain
from Project.Sanad.core.skill_registry import Skill
try:
skill = Skill(**payload.model_dump())
created = brain.registry.add(skill)
except ValueError as exc:
raise HTTPException(400, str(exc))
return {"ok": True, "skill": created.to_dict()}
@router.put("/{skill_id}")
async def update_skill(skill_id: str, payload: SkillUpdate):
from Project.Sanad.main import brain
updates = {k: v for k, v in payload.model_dump().items() if v is not None}
try:
updated = brain.registry.update(skill_id, updates)
except ValueError as exc:
raise HTTPException(400, str(exc))
if updated is None:
raise HTTPException(404, f"Skill not found: {skill_id}")
return {"ok": True, "skill": updated.to_dict()}
@router.delete("/{skill_id}")
async def delete_skill(skill_id: str):
from Project.Sanad.main import brain
deleted = brain.registry.delete(skill_id)
if not deleted:
raise HTTPException(404, f"Skill not found: {skill_id}")
return {"ok": True, "deleted": deleted}
@router.post("/{skill_id}/execute")
async def execute_skill(skill_id: str):
from Project.Sanad.main import brain
result = await brain.execute_skill(skill_id)
return result
@router.post("/upload-audio")
async def upload_audio(file: UploadFile = File(...)):
"""Upload a .wav file for skill binding."""
if not file.filename or not file.filename.lower().endswith(".wav"):
raise HTTPException(400, "Only .wav files are accepted.")
AUDIO_RECORDINGS_DIR.mkdir(parents=True, exist_ok=True)
dest = safe_path_under(AUDIO_RECORDINGS_DIR, file.filename)
content = await file.read()
check_upload_size(content)
atomic_write_bytes(dest, content)
return {"ok": True, "path": str(dest), "size_bytes": len(content)}

315
vendor/Sanad/dashboard/routes/system.py vendored Normal file
View File

@ -0,0 +1,315 @@
"""System information endpoints — network, subsystems, dashboard binding."""
from __future__ import annotations
import asyncio
import os
import platform
import shutil
import socket
import sys
from pathlib import Path
from typing import Any
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
from Project.Sanad.config import (
AUDIO_RECORDINGS_DIR,
BASE_DIR,
DASHBOARD_HOST,
DASHBOARD_INTERFACE,
DASHBOARD_PORT,
DATA_DIR,
DDS_NETWORK_INTERFACE,
LOGS_DIR,
list_network_interfaces,
)
from Project.Sanad.core.logger import get_logger
log = get_logger("system_route")
router = APIRouter()
def _runtime_bind() -> tuple[str, int]:
"""The host/port the server is ACTUALLY bound to.
main.py launches `uvicorn.run(_app, host=args.host, port=args.port)` with
the CLI --host/--port (start_sanad.sh passes `--port $PORT`, default 8001),
which can differ from the import-time DASHBOARD_HOST/DASHBOARD_PORT config
defaults (port 8000). Reading the live argv reports the real URL instead of
a stale config value. Falls back to the config constants when an arg is
absent (e.g. argparse default in effect)."""
host = DASHBOARD_HOST
port = DASHBOARD_PORT
argv = sys.argv
for i, tok in enumerate(argv):
if tok == "--host" and i + 1 < len(argv):
host = argv[i + 1]
elif tok.startswith("--host="):
host = tok.split("=", 1)[1]
elif tok == "--port" and i + 1 < len(argv):
try:
port = int(argv[i + 1])
except (TypeError, ValueError):
pass
elif tok.startswith("--port="):
try:
port = int(tok.split("=", 1)[1])
except (TypeError, ValueError):
pass
return host, port
def _safe_status(component, name: str) -> dict[str, Any]:
if component is None:
return {"available": False}
try:
if hasattr(component, "status") and callable(component.status):
s = component.status()
if not isinstance(s, dict):
s = {"raw": str(s)}
s.setdefault("available", True)
return s
return {"available": True}
except Exception as exc:
log.warning("status() failed for %s: %s", name, exc)
return {"available": True, "error": str(exc)}
@router.get("/info")
async def system_info():
"""One-shot system snapshot for the dashboard system panel."""
def _do():
# Subsystems
try:
from Project.Sanad.main import SUBSYSTEMS
except Exception:
SUBSYSTEMS = {}
subsystem_list = []
for name in sorted(SUBSYSTEMS):
comp = SUBSYSTEMS[name]
entry = {
"name": name,
"connected": comp is not None,
}
if comp is not None and hasattr(comp, "status") and callable(comp.status):
try:
s = comp.status()
if isinstance(s, dict):
entry["status"] = s
except Exception as exc:
entry["status_error"] = str(exc)
subsystem_list.append(entry)
connected_count = sum(1 for s in subsystem_list if s["connected"])
# Audio device current selection (best-effort)
audio_info = {}
try:
from Project.Sanad.voice import audio_devices as ad
audio_info = {
"pactl_available": ad.pactl_available(),
"current": ad.current_selection(),
"detected_profile_ids": [
d["profile"]["id"] for d in ad.detect_plugged_profiles()
] if ad.pactl_available() else [],
}
except Exception as exc:
audio_info = {"error": str(exc)}
# Network interfaces
try:
interfaces = list_network_interfaces()
except Exception:
interfaces = []
# Determine the URL the dashboard is reachable at — use the ACTUAL
# runtime bind args (argv), not the import-time config defaults.
bound_host, bound_port = _runtime_bind()
if bound_host == "0.0.0.0":
# Try to find the wlan0 IP for display purposes
up_ifaces = [i for i in interfaces if i["is_up"] and i["ip"] and not i["ip"].startswith("127.")]
display_host = up_ifaces[0]["ip"] if up_ifaces else bound_host
else:
display_host = bound_host
return {
"host": {
"hostname": socket.gethostname(),
"platform": platform.platform(),
"python": sys.version.split()[0],
"executable": sys.executable,
"base_dir": str(BASE_DIR),
"pid": os.getpid(),
},
"dashboard": {
"interface": DASHBOARD_INTERFACE,
"bound_host": bound_host,
"display_host": display_host,
"port": bound_port,
"url": f"http://{display_host}:{bound_port}",
},
"dds": {
"interface": DDS_NETWORK_INTERFACE,
},
"network": {
"interfaces": interfaces,
},
"subsystems": {
"total": len(subsystem_list),
"connected": connected_count,
"disconnected": len(subsystem_list) - connected_count,
"list": subsystem_list,
},
"audio": audio_info,
}
return await asyncio.to_thread(_do)
# ───────────────────── storage tracking + cleanup ─────────────────────
# Categories surfaced in the Settings → Storage panel. `cleanable` ones get a
# Clean button + are included in "Clean all"; the rest (faces/motions/zones)
# are shown for tracking only — they're operational assets (enrollments,
# motion configs) managed in their own tabs, not disposable clutter.
_STORAGE_CATS = [
("recordings", "Conversation recordings", DATA_DIR / "recordings", True),
("records", "Named records (Typed Replay)", AUDIO_RECORDINGS_DIR, True),
("logs", "Logs", LOGS_DIR, True),
("faces", "Enrolled faces", DATA_DIR / "faces", False),
("motions", "Motion replays + config", DATA_DIR / "motions", False),
("photos", "Photos", DATA_DIR / "photos", False),
("zones", "Vision zones", DATA_DIR / "zones", False),
]
_CLEANABLE = {k for k, _l, _p, c in _STORAGE_CATS if c}
def _dir_stats(path: Path) -> tuple[int, int]:
"""(total_bytes, file_count) of a dir tree. Missing dir → (0, 0)."""
total, n = 0, 0
try:
for root, _dirs, files in os.walk(path):
for f in files:
try:
total += os.path.getsize(os.path.join(root, f))
n += 1
except OSError:
pass
except Exception:
pass
return total, n
def _human(b: float) -> str:
f = float(b)
for u in ("B", "KB", "MB", "GB", "TB"):
if f < 1024 or u == "TB":
return f"{f:.0f} {u}" if u == "B" else f"{f:.1f} {u}"
f /= 1024
return f"{f:.1f} TB"
@router.get("/storage")
async def storage_usage():
"""Per-category data/log sizes + disk free, for the Storage panel."""
def _do():
cats = []
for key, label, path, cleanable in _STORAGE_CATS:
size, files = _dir_stats(Path(path))
cats.append({
"key": key, "label": label, "path": str(path),
"size_bytes": size, "size_human": _human(size),
"files": files, "cleanable": cleanable,
})
data_b, _ = _dir_stats(DATA_DIR)
logs_b, _ = _dir_stats(LOGS_DIR)
try:
du = shutil.disk_usage(str(BASE_DIR))
disk = {
"free_human": _human(du.free), "total_human": _human(du.total),
"used_pct": round(100.0 * (du.total - du.free) / du.total, 1),
}
except Exception:
disk = {}
return {
"categories": cats,
"data_bytes": data_b, "data_human": _human(data_b),
"logs_human": _human(logs_b),
"total_human": _human(data_b + logs_b),
"disk": disk,
}
return await asyncio.to_thread(_do)
class _CleanReq(BaseModel):
target: str # recordings | records | logs | all
def _clean_recordings() -> tuple[int, int]:
d = DATA_DIR / "recordings"
freed, n = 0, 0
for f in list(d.glob("*.wav")) + [d / "index.json"]:
if f.is_file():
try:
freed += f.stat().st_size
f.unlink()
n += 1
except OSError:
pass
return n, freed
def _clean_records() -> tuple[int, int]:
d = AUDIO_RECORDINGS_DIR
freed, n = 0, 0
for f in list(d.glob("*.wav")) + [d / "records.json"]:
if f.is_file():
try:
freed += f.stat().st_size
f.unlink()
n += 1
except OSError:
pass
return n, freed
def _clean_logs() -> tuple[int, int]:
# Truncate (not delete) — active loggers hold append-mode handles, so
# truncating to 0 clears content cleanly without losing the fd.
freed, n = 0, 0
for f in Path(LOGS_DIR).glob("*.log"):
try:
freed += f.stat().st_size
open(f, "w").close()
n += 1
except OSError:
pass
return n, freed
@router.post("/storage/clean")
async def storage_clean(req: _CleanReq):
"""Clean a disposable category (recordings | records | logs) or 'all'.
Recordings/records are deleted; logs are truncated. Assets (faces, motions,
zones) are never touched here."""
t = (req.target or "").strip().lower()
if t != "all" and t not in _CLEANABLE:
raise HTTPException(400, f"target must be 'all' or one of {sorted(_CLEANABLE)}")
def _do():
targets = ["recordings", "records", "logs"] if t == "all" else [t]
fns = {"recordings": _clean_recordings, "records": _clean_records,
"logs": _clean_logs}
result, total = {}, 0
for tg in targets:
n, freed = fns[tg]()
result[tg] = {"items": n, "freed_bytes": freed, "freed_human": _human(freed)}
total += freed
log.info("storage clean %s → freed %s", targets, _human(total))
return {"ok": True, "cleaned": targets,
"total_freed_bytes": total, "total_freed_human": _human(total),
"result": result}
return await asyncio.to_thread(_do)

View File

@ -0,0 +1,81 @@
"""REST endpoints backing the 3D motor-temperature dashboard (N1).
Serves the motor name/mesh mapping + thresholds, and a one-shot temperature
snapshot (the front-end's initial fetch fallback). The live stream is over
/ws/motor-temps (dashboard/websockets/motor_temps.py). The 3D view itself is
the static page at /static/temp3d/index.html.
"""
from __future__ import annotations
import time
from fastapi import APIRouter
from Project.Sanad.dashboard.temp_motor_map import (
MOTOR_NAMES,
MOTOR_TO_MESH,
TEMP_HOT_THRESHOLD,
TEMP_MAX,
TEMP_MIN,
TEMP_WARM_THRESHOLD,
build_payload,
)
router = APIRouter()
def _get_arm():
"""Lazy import — avoids a circular import on dashboard load."""
try:
from Project.Sanad.main import arm # type: ignore
return arm
except Exception:
return None
@router.get("/mapping")
async def motor_mapping():
"""Motor id → name / mesh map + the temperature gradient thresholds."""
return {
"motor_names": MOTOR_NAMES,
"motor_to_mesh": MOTOR_TO_MESH,
"thresholds": {
"min": TEMP_MIN,
"max": TEMP_MAX,
"warm": TEMP_WARM_THRESHOLD,
"hot": TEMP_HOT_THRESHOLD,
},
}
@router.get("/motors")
async def motors_snapshot():
"""One-shot motor temperature + position snapshot (Marcus payload shape)."""
arm = _get_arm()
temps: list = []
positions: list = []
if arm is not None:
try:
temps = arm.get_motor_temps()
except Exception:
temps = []
try:
positions = arm.get_current_q()
except Exception:
positions = []
return build_payload(temps, positions, time.time())
@router.get("/battery")
async def battery_status():
"""Live G1 battery (BMS) snapshot: state-of-charge %, voltage, current,
charge/discharge status, pack temperature, cycles. `available=False` until
the BMS topic (rt/lf/bmsstate) delivers its first message."""
arm = _get_arm()
if arm is None or not hasattr(arm, "get_battery"):
return {"available": False}
try:
return arm.get_battery()
except Exception:
return {"available": False}

View File

@ -0,0 +1,146 @@
"""Typed Replay dashboard endpoints.
Full CRUD over the records index:
POST /say generate + play + optionally record
POST /replay-last re-play cached audio
POST /save-last persist cached generation
GET /records list
GET /records/{name} get one
POST /records/{name}/play play saved WAV (speaker or raw)
POST /records/{name}/rename rename
DELETE /records/{name} delete
GET /status engine + session state
"""
from __future__ import annotations
import asyncio
from typing import Literal
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
from Project.Sanad.core.config_loader import section as _cfg_section
router = APIRouter()
# MAX_TEXT_LEN — SINGLE SOURCE in dashboard.api_input
MAX_TEXT_LEN = _cfg_section("dashboard", "api_input").get("max_text_len", 2000)
class SayPayload(BaseModel):
text: str
record: bool = False
record_name: str = ""
class SaveLastPayload(BaseModel):
record_name: str = ""
class RenamePayload(BaseModel):
new_name: str
class PlayRecordPayload(BaseModel):
file_kind: Literal["speaker", "raw"] = "speaker"
def _engine():
from Project.Sanad.main import typed_replay
if typed_replay is None:
raise HTTPException(503, "TypedReplayEngine not initialized.")
return typed_replay
# ───────────────────── generate / replay ─────────────────────
@router.post("/say")
async def say(payload: SayPayload):
if not payload.text or not payload.text.strip():
raise HTTPException(400, "text cannot be empty")
if len(payload.text) > MAX_TEXT_LEN:
raise HTTPException(413, f"text too long (max {MAX_TEXT_LEN})")
eng = _engine()
try:
return await eng.say(payload.text, record=payload.record,
record_name=payload.record_name)
except ValueError as exc:
raise HTTPException(400, str(exc))
except RuntimeError as exc:
raise HTTPException(503, str(exc))
@router.post("/replay-last")
async def replay_last():
eng = _engine()
try:
return await asyncio.to_thread(eng.replay_last)
except RuntimeError as exc:
raise HTTPException(400, str(exc))
@router.post("/save-last")
async def save_last(payload: SaveLastPayload):
eng = _engine()
try:
return {"ok": True, "record": eng.save_last(payload.record_name)}
except RuntimeError as exc:
raise HTTPException(400, str(exc))
# ───────────────────── record CRUD ───────────────────────────
@router.get("/records")
async def list_records():
return _engine().list_records()
@router.get("/records/{name}")
async def get_record(name: str):
try:
return _engine().find_record(name)
except KeyError:
raise HTTPException(404, f"record not found: {name}")
@router.post("/records/{name}/play")
async def play_record(name: str, payload: PlayRecordPayload):
eng = _engine()
try:
return await asyncio.to_thread(eng.play_record, name, payload.file_kind)
except KeyError:
raise HTTPException(404, f"record not found: {name}")
except FileNotFoundError as exc:
raise HTTPException(410, f"file missing on disk: {exc}")
except RuntimeError as exc:
raise HTTPException(503, str(exc))
@router.post("/records/{name}/rename")
async def rename_record(name: str, payload: RenamePayload):
eng = _engine()
try:
return {"ok": True, "record": eng.rename_record(name, payload.new_name)}
except KeyError:
raise HTTPException(404, f"record not found: {name}")
except ValueError as exc:
raise HTTPException(400, str(exc))
@router.delete("/records/{name}")
async def delete_record(name: str):
eng = _engine()
try:
return {"ok": True, **eng.delete_record(name)}
except KeyError:
raise HTTPException(404, f"record not found: {name}")
# ───────────────────── status ────────────────────────────────
@router.get("/status")
async def status():
from Project.Sanad.main import typed_replay
if typed_replay is None:
return {"available": False}
return {"available": True, **typed_replay.status()}

237
vendor/Sanad/dashboard/routes/voice.py vendored Normal file
View File

@ -0,0 +1,237 @@
"""Voice endpoints — Gemini interaction, local TTS, prompt management."""
from __future__ import annotations
import asyncio
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
from Project.Sanad.core.config_loader import section as _cfg_section
from Project.Sanad.core.logger import get_logger
log = get_logger("voice_route")
router = APIRouter()
_VR = _cfg_section("dashboard", "voice_route")
_API = _cfg_section("dashboard", "api_input")
# MAX_TEXT_LEN — SINGLE SOURCE in dashboard.api_input
MAX_TEXT_LEN = _API.get("max_text_len", 2000)
_API_KEY_MASK_VISIBLE = _VR.get("api_key_mask_visible", 4)
def _mask_api_key(key: str) -> str:
"""Mask an API key for display — keeps 4 chars on each end.
Examples:
"" ""
"AIza123" "*******" (8 chars = full mask)
"AIzaSy...kqf8" "AIza***...kqf8" (>8 chars = partial mask)
"""
if not key:
return ""
if len(key) <= 8:
return "*" * len(key)
return f"{key[:4]}{'*' * (len(key) - 8)}{key[-4:]}"
class TextPayload(BaseModel):
text: str
engine: str = "gemini" # "gemini" | "local"
@router.get("/status")
async def voice_status():
from Project.Sanad.main import voice_client, local_tts
return {
"gemini": voice_client.status() if voice_client else {},
"local_tts": local_tts.status() if local_tts else {},
}
@router.post("/generate")
async def generate_speech(payload: TextPayload):
"""Generate speech from text using Gemini or local TTS."""
if not payload.text.strip():
raise HTTPException(400, "Text cannot be empty.")
if len(payload.text) > MAX_TEXT_LEN:
raise HTTPException(413, f"Text too long (max {MAX_TEXT_LEN} chars).")
from Project.Sanad.main import voice_client, local_tts, audio_mgr
if payload.engine == "local":
if local_tts is None:
raise HTTPException(503, "Local TTS not available.")
pcm = await asyncio.to_thread(local_tts.synthesize, payload.text)
if audio_mgr:
await asyncio.to_thread(audio_mgr.play_pcm, pcm, 1, 16000, 2)
return {
"ok": True,
"engine": "local",
"duration_sec": round(len(pcm) / (16000 * 2), 3),
}
else:
if voice_client is None:
raise HTTPException(503, "Voice client not initialized.")
if not voice_client.connected:
try:
await voice_client.connect()
except Exception:
log.exception("Gemini reconnect failed in /generate")
raise HTTPException(503, "Gemini not connected and reconnect failed.")
# Check session ownership — TypedReplay or live loop may hold it
if voice_client.session_owner is not None:
raise HTTPException(
409,
f"Voice session busy (owned by {voice_client.session_owner})",
)
try:
audio_bytes, text_parts = await voice_client.send_text(
payload.text, owner="voice_route"
)
except RuntimeError as exc:
raise HTTPException(503, str(exc))
except Exception as exc:
raise HTTPException(502, f"Gemini communication error: {exc}")
if audio_bytes and audio_mgr:
await asyncio.to_thread(audio_mgr.play_pcm, audio_bytes, 1, 24000, 2)
return {
"ok": True,
"engine": "gemini",
"has_audio": bool(audio_bytes),
"text_response": text_parts,
}
@router.post("/connect")
async def connect_gemini():
from Project.Sanad.main import voice_client
if voice_client is None:
raise HTTPException(503, "Voice client not initialized.")
try:
await voice_client.connect()
except Exception as exc:
raise HTTPException(502, f"Gemini connection failed: {exc}")
return {"connected": voice_client.connected}
@router.post("/disconnect")
async def disconnect_gemini():
from Project.Sanad.main import voice_client
if voice_client:
await voice_client.disconnect()
return {"connected": False}
# ─────────────────────── Gemini API key management ───────────────────────
class ApiKeyPayload(BaseModel):
api_key: str
@router.get("/api-key")
async def get_api_key():
"""Return the current Gemini API key in masked form.
Never returns the full key. Response:
{
"has_key": true,
"masked": "AIza***...kqf8",
"length": 39,
"source": "config_file" | "default"
}
"""
import Project.Sanad.config as cfg_mod
key = getattr(cfg_mod, "GEMINI_API_KEY", "") or ""
# Detect where the value came from (persisted override vs module default)
try:
from Project.Sanad.config import load_config
stored = load_config().get("gemini", {}) or {}
source = "config_file" if stored.get("api_key") else "default"
except Exception:
source = "default"
return {
"has_key": bool(key),
"masked": _mask_api_key(key),
"length": len(key),
"source": source,
}
@router.post("/api-key")
async def update_api_key(payload: ApiKeyPayload):
"""Update the Gemini API key — persists to data/motions/config.json and
hot-swaps the in-memory value so the next Gemini connect uses it.
Also disconnects any currently-connected Gemini session so that the
next reconnect picks up the new key cleanly. Returns the NEW masked
key + a flag telling the dashboard to trigger a reconnect.
"""
key = payload.api_key.strip()
if not key:
raise HTTPException(400, "API key cannot be empty.")
if len(key) < 20:
raise HTTPException(400, "API key looks too short.")
if not key.startswith("AIza"):
raise HTTPException(
400,
"Gemini API keys normally start with 'AIza'. "
"Double-check you're pasting a Google AI Studio key.",
)
# Persist to data/motions/config.json (atomic temp-then-replace)
try:
from Project.Sanad.config import load_config, save_config
cfg = load_config() or {}
gemini_cfg = cfg.get("gemini") if isinstance(cfg.get("gemini"), dict) else {}
gemini_cfg["api_key"] = key
cfg["gemini"] = gemini_cfg
save_config(cfg)
except Exception as exc:
log.exception("Failed to persist API key to config.json")
raise HTTPException(500, f"Could not save config: {exc}")
# Hot-swap the in-memory module globals.
# Both Project.Sanad.config AND Project.Sanad.gemini.client
# have their OWN reference to GEMINI_API_KEY (the latter was created
# at `from Project.Sanad.config import GEMINI_API_KEY` at import time).
# Python's `from X import Y` binds a local name — updating config.Y
# alone does NOT propagate to the importer, so we must patch both.
try:
import Project.Sanad.config as _cfg_mod
_cfg_mod.GEMINI_API_KEY = key
except Exception:
log.exception("could not patch config.GEMINI_API_KEY")
try:
import Project.Sanad.gemini.client as _gc
_gc.GEMINI_API_KEY = key
except Exception:
log.exception("could not patch gemini.client.GEMINI_API_KEY")
# Disconnect any live session so reconnect uses the new key.
from Project.Sanad.main import voice_client
was_connected = False
if voice_client is not None:
was_connected = bool(getattr(voice_client, "connected", False))
if was_connected:
try:
await voice_client.disconnect()
except Exception:
log.exception("disconnect during api-key swap failed")
log.info("Gemini API key updated (length=%d) source=config_file", len(key))
return {
"ok": True,
"masked": _mask_api_key(key),
"length": len(key),
"source": "config_file",
"was_connected": was_connected,
"message": (
"API key saved. Click 'Connect' to reopen the Gemini session with "
"the new key. Any running Live Gemini subprocess must be restarted "
"separately (Stop → Start) to pick up the new key."
),
}

View File

@ -0,0 +1,72 @@
"""Wake-phrase CRUD endpoints.
Lets the dashboard edit the wake-phrase action mapping stored in
data/wake_phrases.json.
"""
from __future__ import annotations
from typing import Optional
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
router = APIRouter()
class WakePhrasePayload(BaseModel):
phrase: str
action_id: str
class EnablePayload(BaseModel):
phrase: str
action_id: str
enabled: bool
def _mgr():
from Project.Sanad.main import wake_mgr
if wake_mgr is None:
raise HTTPException(503, "WakePhraseManager not initialized.")
return wake_mgr
@router.get("/")
async def list_phrases():
m = _mgr()
return {
"status": m.status(),
"phrases": m.list(),
}
@router.post("/")
async def add_phrase(payload: WakePhrasePayload):
m = _mgr()
try:
entry = m.add(payload.phrase, payload.action_id)
except ValueError as exc:
raise HTTPException(400, str(exc))
return {"ok": True, "entry": entry}
@router.delete("/")
async def remove_phrase(phrase: str, action_id: Optional[str] = None):
m = _mgr()
removed = m.remove(phrase, action_id)
return {"ok": True, "removed": removed}
@router.post("/enable")
async def set_enabled(payload: EnablePayload):
m = _mgr()
ok = m.set_enabled(payload.phrase, payload.action_id, payload.enabled)
if not ok:
raise HTTPException(404, "phrase+action_id not found")
return {"ok": True}
@router.get("/status")
async def status():
return _mgr().status()

597
vendor/Sanad/dashboard/routes/zones.py vendored Normal file
View File

@ -0,0 +1,597 @@
"""Zones tab — zone → place → linked-faces management + "go here" destination.
Hierarchy (replaces the old flat places):
Zone (name + description)
Place (name + description + optional reference photos + linked face ids)
Routes live under /api/zones. Toggle + CRUD changes write
data/.recognition_state.json (the SAME file faces use); the Gemini child polls
it at 1 Hz and re-primes / announces mid-session. The "go here" endpoints set a
navigation target the robot will head to once N2 locomotion is wired for now
they just record the target and feed Gemini the place's reference.
"""
from __future__ import annotations
import asyncio
import io
from typing import Optional
from fastapi import APIRouter, File, HTTPException, Query, UploadFile
from fastapi.responses import FileResponse, StreamingResponse
from pydantic import BaseModel
from Project.Sanad.config import BASE_DIR
from Project.Sanad.core.logger import get_logger
from Project.Sanad.dashboard.routes._safe_io import check_upload_size
from Project.Sanad.vision import recognition_state
log = get_logger("zones_routes")
router = APIRouter()
STATE_PATH = BASE_DIR / "data" / ".recognition_state.json"
# ── lazy subsystem accessors ────────────────────────────────
def _get_camera():
try:
from Project.Sanad.main import camera # type: ignore
return camera
except Exception:
return None
def _get_zone_gallery():
try:
from Project.Sanad.main import zone_gallery # type: ignore
return zone_gallery
except Exception:
return None
def _get_face_gallery():
try:
from Project.Sanad.main import gallery # type: ignore
return gallery
except Exception:
return None
def _require_zones():
g = _get_zone_gallery()
if g is None:
raise HTTPException(503, "Zone gallery subsystem unavailable.")
return g
def _bump_zones_version() -> int:
cur = recognition_state.read(STATE_PATH)
v = cur.zones_version + 1
recognition_state.mutate(STATE_PATH, zones_version=v)
return v
def _validate_image(content: bytes, filename: str | None = None) -> None:
check_upload_size(content)
if len(content) < 16:
raise HTTPException(400, "Image too small / empty.")
if not (content[:3] == b"\xff\xd8\xff" or content[:8] == b"\x89PNG\r\n\x1a\n"):
raise HTTPException(400, f"Only JPEG/PNG accepted (got {filename or 'unknown'}).")
def _safe_photo_name(name: str) -> None:
if "/" in name or ".." in name or "\x00" in name:
raise HTTPException(400, "Invalid photo name.")
def _resolve_faces(face_ids: list[int]) -> list[dict]:
"""Turn linked face ids into [{id, name}] using the face gallery."""
g = _get_face_gallery()
out = []
for fid in face_ids:
name = None
if g is not None:
try:
e = g.get(fid)
name = e.name if e else None
except Exception:
name = None
out.append({"id": fid, "name": name})
return out
def _place_to_dict(p) -> dict:
d = p.to_dict()
d["faces"] = _resolve_faces(p.face_ids)
return d
def _zone_to_dict(z) -> dict:
return {
"id": z.id, "name": z.name, "description": z.description,
"linked_map": getattr(z, "linked_map", None),
"added_at": z.added_at,
"places": [_place_to_dict(p) for p in z.places],
}
async def _maybe_drive_to_place(zone, place) -> Optional[dict]:
"""If the place links a nav2 place AND its zone's map is the one currently
localized, actually DRIVE there (arbiter-gated + arm arrival monitor).
Returns the drive outcome, or None when the place isn't drivable (no link).
Best-effort: never raises into the caller."""
nav_place = getattr(place, "nav_place", None)
linked_map = getattr(zone, "linked_map", None)
if not nav_place or not linked_map:
return None
try:
from Project.Sanad.dashboard.routes import navigation as navmod
from Project.Sanad.dashboard.routes import _arbiter
except Exception:
return {"ok": False, "reason": "nav_unavailable"}
client = getattr(navmod, "_CLIENT", None)
if client is None:
return {"ok": False, "reason": "nav_unavailable"}
try:
st = await asyncio.to_thread(client.status)
body = st.as_dict() if hasattr(st, "as_dict") else dict(st)
except Exception as exc: # noqa: BLE001
return {"ok": False, "reason": "status_error", "detail": str(exc)[:120]}
if not body.get("bringup_alive"):
return {"ok": False, "reason": "no_map"}
# The robot can only drive in the currently-localized map. Require the
# zone's linked map to match (compare on the sanitized .db stem).
active = (body.get("active_map") or "").strip().lower()
want = (linked_map or "").strip().lower()
if want.endswith(".db"):
want = want[:-3]
if active and want and active != want:
return {"ok": False, "reason": "wrong_map",
"active": body.get("active_map"), "want": linked_map}
if not _arbiter.acquire_nav():
return {"ok": False, "reason": "manual_armed"}
drive = await asyncio.to_thread(client.goto, nav_place)
if isinstance(drive, dict) and not drive.get("ok", True):
_arbiter.release_nav()
return {"ok": False, "reason": "dispatch_failed", "detail": drive}
try:
from Project.Sanad.navigation.goal_monitor import arm_goal
arm_goal(nav_place)
except Exception:
pass
return {"ok": True, "resolved": nav_place}
def _nav_target_dict(st, gallery) -> Optional[dict]:
zid, pid = st.nav_target_zone_id, st.nav_target_place_id
if not zid or not pid:
return None
zone_name = place_name = None
if gallery is not None:
try:
z = gallery.get_zone(zid)
zone_name = z.name if z else None
p = gallery.get_place(zid, pid)
place_name = p.name if p else None
except Exception:
pass
return {"zone_id": zid, "place_id": pid,
"zone_name": zone_name, "place_name": place_name}
# ── state + toggle ──────────────────────────────────────────
@router.get("/state")
async def get_state():
st = recognition_state.read(STATE_PATH)
g = _get_zone_gallery()
zones_count = places_count = 0
if g is not None:
try:
zones = g.list_zones()
zones_count = len(zones)
places_count = sum(len(z.places) for z in zones)
except Exception:
pass
return {
"zone_rec_enabled": st.zone_rec_enabled,
"zones_version": st.zones_version,
"zones_count": zones_count,
"places_count": places_count,
"nav_target": _nav_target_dict(st, g),
}
@router.post("/zone-rec")
async def set_zone_rec(on: bool = Query(...)):
"""Enable / disable the robot's knowledge of zones & places (hot)."""
st = recognition_state.mutate(STATE_PATH, zone_rec_enabled=bool(on))
log.info("zone recognition %s", "ON" if on else "OFF")
return {"ok": True, "zone_rec_enabled": st.zone_rec_enabled}
@router.post("/sync")
async def sync_zones():
v = _bump_zones_version()
log.info("zones sync requested → v.%d", v)
return {"ok": True, "zones_version": v}
# ── zones CRUD ──────────────────────────────────────────────
class NamePayload(BaseModel):
name: Optional[str] = None
class DescribePayload(BaseModel):
description: Optional[str] = None
class FacesPayload(BaseModel):
face_ids: list[int] = []
class LinkMapPayload(BaseModel):
# nav2 map .db basename (e.g. "office.db"); None/"" unlinks.
map: Optional[str] = None
class NavPlacePayload(BaseModel):
# nav2 place name in the zone's linked map; None/"" unlinks.
nav_place: Optional[str] = None
@router.get("")
async def list_zones():
g = _require_zones()
zones = g.list_zones()
return {"zones": [_zone_to_dict(z) for z in zones], "total": len(zones)}
@router.post("/create")
async def create_zone(name: Optional[str] = Query(default=None),
description: Optional[str] = Query(default=None)):
g = _require_zones()
if not (name or "").strip() and not (description or "").strip():
raise HTTPException(400, "A zone needs at least a name or a description.")
z = g.create_zone(name=name, description=description)
_bump_zones_version()
return {"ok": True, "zone": _zone_to_dict(z)}
@router.post("/{zone_id}/rename")
async def rename_zone(zone_id: int, payload: NamePayload):
g = _require_zones()
try:
g.rename_zone(zone_id, payload.name)
except FileNotFoundError as exc:
raise HTTPException(404, str(exc))
_bump_zones_version()
return {"ok": True, "zone": _zone_to_dict(g.get_zone(zone_id))}
@router.post("/{zone_id}/describe")
async def describe_zone(zone_id: int, payload: DescribePayload):
g = _require_zones()
try:
g.describe_zone(zone_id, payload.description)
except FileNotFoundError as exc:
raise HTTPException(404, str(exc))
_bump_zones_version()
return {"ok": True, "zone": _zone_to_dict(g.get_zone(zone_id))}
@router.delete("/{zone_id}")
async def delete_zone(zone_id: int):
g = _require_zones()
try:
g.delete_zone(zone_id)
except FileNotFoundError as exc:
raise HTTPException(404, str(exc))
# If the active destination was inside this zone, clear it.
st = recognition_state.read(STATE_PATH)
if st.nav_target_zone_id == zone_id:
recognition_state.mutate(STATE_PATH, nav_target_zone_id=0, nav_target_place_id=0)
_bump_zones_version()
return {"ok": True, "deleted": zone_id}
# ── places CRUD (within a zone) ─────────────────────────────
@router.post("/{zone_id}/places/create")
async def create_place(
zone_id: int,
name: Optional[str] = Query(default=None),
description: Optional[str] = Query(default=None),
face_ids: list[int] = Query(default=[]),
nav_place: Optional[str] = Query(default=None),
files: Optional[list[UploadFile]] = File(default=None),
):
g = _require_zones()
if g.get_zone(zone_id) is None:
raise HTTPException(404, f"zone_{zone_id} not found")
if not (name or "").strip() and not (description or "").strip():
raise HTTPException(400, "A place needs at least a name or a description.")
image_bytes: list[bytes] = []
for f in (files or []):
content = await f.read()
if not content:
continue
_validate_image(content, f.filename)
image_bytes.append(content)
p = g.create_place(zone_id, name=name, description=description,
face_ids=face_ids, image_bytes_list=image_bytes or None,
nav_place=nav_place)
_bump_zones_version()
return {"ok": True, "place": _place_to_dict(p)}
@router.post("/{zone_id}/link_map")
async def link_zone_map(zone_id: int, payload: LinkMapPayload):
"""Bind (or unbind) the zone to a nav2 map .db. Required before its places
can link to that map's nav places / before Gemini Nav can drive in it."""
g = _require_zones()
try:
g.set_zone_map(zone_id, payload.map)
except FileNotFoundError as exc:
raise HTTPException(404, str(exc))
_bump_zones_version()
return {"ok": True, "zone": _zone_to_dict(g.get_zone(zone_id))}
@router.post("/{zone_id}/places/{place_id}/nav_link")
async def link_place_nav(zone_id: int, place_id: int, payload: NavPlacePayload):
"""Link (or unlink) a place to a nav2 place name in the zone's map — this is
what makes the place drivable from voice / 'Go here'."""
g = _require_zones()
try:
g.set_place_nav(zone_id, place_id, payload.nav_place)
except FileNotFoundError as exc:
raise HTTPException(404, str(exc))
_bump_zones_version()
return {"ok": True, "place": _place_to_dict(g.get_place(zone_id, place_id))}
@router.post("/{zone_id}/places/{place_id}/rename")
async def rename_place(zone_id: int, place_id: int, payload: NamePayload):
g = _require_zones()
try:
g.rename_place(zone_id, place_id, payload.name)
except FileNotFoundError as exc:
raise HTTPException(404, str(exc))
_bump_zones_version()
return {"ok": True, "place": _place_to_dict(g.get_place(zone_id, place_id))}
@router.post("/{zone_id}/places/{place_id}/describe")
async def describe_place(zone_id: int, place_id: int, payload: DescribePayload):
g = _require_zones()
try:
g.describe_place(zone_id, place_id, payload.description)
except FileNotFoundError as exc:
raise HTTPException(404, str(exc))
_bump_zones_version()
return {"ok": True, "place": _place_to_dict(g.get_place(zone_id, place_id))}
@router.post("/{zone_id}/places/{place_id}/faces")
async def set_place_faces(zone_id: int, place_id: int, payload: FacesPayload):
"""Replace the set of saved faces linked to this place."""
g = _require_zones()
try:
g.set_place_faces(zone_id, place_id, payload.face_ids)
except FileNotFoundError as exc:
raise HTTPException(404, str(exc))
_bump_zones_version()
return {"ok": True, "place": _place_to_dict(g.get_place(zone_id, place_id))}
@router.post("/{zone_id}/places/{place_id}/capture")
async def capture_to_place(zone_id: int, place_id: int):
g = _require_zones()
cam = _get_camera()
if cam is None or not cam.is_running():
raise HTTPException(409, "Camera is not running. Toggle Vision ON first.")
jpeg = cam.get_fresh_frame(max_age_s=0.5, timeout_s=1.5)
if not jpeg:
raise HTTPException(409, "Camera has no frame yet.")
try:
fname = g.add_photo(zone_id, place_id, jpeg)
except FileNotFoundError as exc:
raise HTTPException(404, str(exc))
_bump_zones_version()
return {"ok": True, "added": fname, "place": _place_to_dict(g.get_place(zone_id, place_id))}
@router.post("/{zone_id}/places/{place_id}/upload")
async def upload_to_place(zone_id: int, place_id: int,
files: list[UploadFile] = File(...)):
g = _require_zones()
if g.get_place(zone_id, place_id) is None:
raise HTTPException(404, f"zone_{zone_id}/place_{place_id} not found")
added: list[str] = []
for f in files:
content = await f.read()
_validate_image(content, f.filename)
try:
added.append(g.add_photo(zone_id, place_id, content))
except FileNotFoundError as exc:
raise HTTPException(404, str(exc))
_bump_zones_version()
return {"ok": True, "added": added, "place": _place_to_dict(g.get_place(zone_id, place_id))}
@router.delete("/{zone_id}/places/{place_id}")
async def delete_place(zone_id: int, place_id: int):
g = _require_zones()
try:
g.delete_place(zone_id, place_id)
except FileNotFoundError as exc:
raise HTTPException(404, str(exc))
st = recognition_state.read(STATE_PATH)
if st.nav_target_zone_id == zone_id and st.nav_target_place_id == place_id:
recognition_state.mutate(STATE_PATH, nav_target_zone_id=0, nav_target_place_id=0)
_bump_zones_version()
return {"ok": True, "deleted": place_id}
@router.delete("/{zone_id}/places/{place_id}/photo/{photo_name}")
async def delete_place_photo(zone_id: int, place_id: int, photo_name: str):
g = _require_zones()
_safe_photo_name(photo_name)
try:
g.delete_photo(zone_id, place_id, photo_name)
except FileNotFoundError as exc:
raise HTTPException(404, str(exc))
_bump_zones_version()
return {"ok": True, "deleted": photo_name}
@router.get("/{zone_id}/places/{place_id}/photo/{photo_name}")
async def get_place_photo(zone_id: int, place_id: int, photo_name: str,
download: int = Query(default=0)):
g = _require_zones()
_safe_photo_name(photo_name)
path = g.get_photo(zone_id, place_id, photo_name)
if path is None:
raise HTTPException(404, "Photo not found.")
media = "image/png" if path.suffix.lower() == ".png" else "image/jpeg"
headers = {}
if download:
headers["Content-Disposition"] = (
f'attachment; filename="zone_{zone_id}_place_{place_id}_{photo_name}"')
return FileResponse(path, media_type=media, headers=headers)
@router.get("/{zone_id}/places/{place_id}/download.zip")
async def download_place_zip(zone_id: int, place_id: int):
g = _require_zones()
try:
data = g.zip_place(zone_id, place_id)
except FileNotFoundError as exc:
raise HTTPException(404, str(exc))
return StreamingResponse(
io.BytesIO(data), media_type="application/zip",
headers={
"Content-Disposition": f'attachment; filename="zone_{zone_id}_place_{place_id}.zip"',
"Content-Length": str(len(data)),
},
)
# ── "go here" navigation target ─────────────────────────────
@router.post("/{zone_id}/places/{place_id}/go")
async def go_to_place(zone_id: int, place_id: int):
"""Set this place as the active destination AND, if the place links a nav2
place in this zone's (currently-localized) map, actually drive there.
Two effects: (1) records nav_target so the Gemini child primes on the
reference photo + announces the destination; (2) if drivable, dispatches a
Nav2 goal (arbiter-gated, with arrival monitoring). A place with no nav link
is announce-only, as before."""
g = _require_zones()
p = g.get_place(zone_id, place_id)
if p is None:
raise HTTPException(404, f"zone_{zone_id}/place_{place_id} not found")
recognition_state.mutate(STATE_PATH,
nav_target_zone_id=zone_id,
nav_target_place_id=place_id)
log.info("nav target set → zone_%d/place_%d (%s)", zone_id, place_id,
p.name or "(unnamed)")
zone = g.get_zone(zone_id)
drive = await _maybe_drive_to_place(zone, p)
return {"ok": True,
"nav_target": {"zone_id": zone_id, "place_id": place_id,
"place_name": p.name},
"drive": drive}
@router.post("/nav/clear")
async def clear_nav_target():
recognition_state.mutate(STATE_PATH, nav_target_zone_id=0, nav_target_place_id=0)
log.info("nav target cleared")
return {"ok": True, "nav_target": None}
def _resolve_map_path(client, linked_map: str) -> Optional[str]:
"""Find the .db path for a zone's linked map name via the nav client."""
want = (linked_map or "").strip().lower()
want_stem = want[:-3] if want.endswith(".db") else want
try:
maps = client.list_maps() or []
except Exception:
return None
for m in maps:
nm = (m.get("name") or "").strip().lower()
if nm == want or (nm[:-3] if nm.endswith(".db") else nm) == want_stem:
return m.get("path")
return None
@router.post("/{zone_id}/gemini_nav/start")
async def gemini_nav_start(zone_id: int):
"""Enter 'Gemini Nav' for a zone: localize the zone's map, turn on camera +
face + zone recognition + movement, ensure the Gemini session is live, and
greet the user so they can converse to navigate.
The robot only ever runs ONE map; this loads the zone's map in localize-only
mode (so it cannot fresh-map while driving), exactly as the user requires.
"""
g = _require_zones()
zone = g.get_zone(zone_id)
if zone is None:
raise HTTPException(404, f"zone_{zone_id} not found")
linked_map = getattr(zone, "linked_map", None)
if not linked_map:
raise HTTPException(400, "This zone has no linked nav2 map — link one first.")
# 1) Localize the zone's map (single bringup, mode 3 — no fresh mapping).
loaded: dict = {"ok": False, "reason": "nav_unavailable"}
try:
from Project.Sanad.dashboard.routes import navigation as navmod
client = getattr(navmod, "_CLIENT", None)
if client is not None:
db_path = await asyncio.to_thread(_resolve_map_path, client, linked_map)
if db_path:
loaded = await asyncio.to_thread(client.load_map, db_path)
else:
loaded = {"ok": False, "reason": "map_not_found", "map": linked_map}
except Exception as exc: # noqa: BLE001
loaded = {"ok": False, "reason": "load_error", "detail": str(exc)[:160]}
# 2) Camera + face + zone recognition + movement ON for the session.
recognition_state.mutate(STATE_PATH,
vision_enabled=True, face_rec_enabled=True,
zone_rec_enabled=True, movement_enabled=True)
_bump_zones_version()
# 3) Ensure the Gemini session is live, then greet (zone + drivable places).
session_started = False
try:
from Project.Sanad.main import live_sub
if live_sub is not None:
if hasattr(live_sub, "is_running") and not live_sub.is_running():
await asyncio.to_thread(live_sub.start)
session_started = True
drivable = [p.name or p.nav_place for p in zone.places
if getattr(p, "nav_place", None)]
zname = zone.name or f"zone {zone_id}"
if drivable:
placelist = ", ".join(str(x) for x in drivable)
greet = (f"You are now in the '{zname}' zone. You can drive the "
f"user to: {placelist}. Greet the user warmly in your "
f"normal Khaleeji style and ask where they would like to go.")
else:
greet = (f"You are now in the '{zname}' zone, but no drivable "
f"places are linked to its map yet. Greet the user and "
f"say places still need to be linked before you can drive.")
if hasattr(live_sub, "send_state"):
live_sub.send_state("nav_zone", greet)
except Exception as exc: # noqa: BLE001
log.warning("gemini_nav greet failed: %s", exc)
return {"ok": True, "zone_id": zone_id, "zone": _zone_to_dict(zone),
"loaded": loaded, "session_started": session_started}

3761
vendor/Sanad/dashboard/static/index.html vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,90 @@
"""G1 29-DoF motor → name / mesh mapping for the 3D temperature dashboard.
Ported verbatim from Marcus/Features/TempMonitor/config_g1.py so the copied
three.js front-end (static/temp3d/index.html) binds temperature colours to the
correct STL meshes. `build_payload()` turns the arm controller's raw lowstate
snapshot into the exact 'motor_update' payload shape that front-end expects.
"""
from __future__ import annotations
from typing import Any, Optional
# Motor ID → human name (29 motors = 29 DOF)
MOTOR_NAMES: dict[int, str] = {
0: "Left Hip Pitch", 1: "Left Hip Roll", 2: "Left Hip Yaw", 3: "Left Knee",
4: "Left Ankle Pitch", 5: "Left Ankle Roll",
6: "Right Hip Pitch", 7: "Right Hip Roll", 8: "Right Hip Yaw", 9: "Right Knee",
10: "Right Ankle Pitch", 11: "Right Ankle Roll",
12: "Waist Yaw", 13: "Waist Roll", 14: "Waist Pitch",
15: "Left Shoulder Pitch", 16: "Left Shoulder Roll", 17: "Left Shoulder Yaw",
18: "Left Elbow", 19: "Left Wrist Roll", 20: "Left Wrist Pitch", 21: "Left Wrist Yaw",
22: "Right Shoulder Pitch", 23: "Right Shoulder Roll", 24: "Right Shoulder Yaw",
25: "Right Elbow", 26: "Right Wrist Roll", 27: "Right Wrist Pitch", 28: "Right Wrist Yaw",
}
# Motor ID → URDF link / STL mesh name
MOTOR_TO_MESH: dict[int, str] = {
0: "left_hip_pitch_link", 1: "left_hip_roll_link", 2: "left_hip_yaw_link",
3: "left_knee_link", 4: "left_ankle_pitch_link", 5: "left_ankle_roll_link",
6: "right_hip_pitch_link", 7: "right_hip_roll_link", 8: "right_hip_yaw_link",
9: "right_knee_link", 10: "right_ankle_pitch_link", 11: "right_ankle_roll_link",
12: "waist_yaw_link", 13: "waist_roll_link", 14: "torso_link",
15: "left_shoulder_pitch_link", 16: "left_shoulder_roll_link", 17: "left_shoulder_yaw_link",
18: "left_elbow_link", 19: "left_wrist_roll_link", 20: "left_wrist_pitch_link",
21: "left_wrist_yaw_link",
22: "right_shoulder_pitch_link", 23: "right_shoulder_roll_link", 24: "right_shoulder_yaw_link",
25: "right_elbow_link", 26: "right_wrist_roll_link", 27: "right_wrist_pitch_link",
28: "right_wrist_yaw_link",
}
# Temperature thresholds (°C) — the three.js gradient maps MIN→MAX (blue→red).
TEMP_MIN = 30
TEMP_MAX = 120
TEMP_WARM_THRESHOLD = 45
TEMP_HOT_THRESHOLD = 60
def _coerce(v: Optional[int]) -> float:
"""Temperatures default to 0 when the firmware didn't report one, so the
front-end's Math.max / .toFixed never sees null/NaN."""
return float(v) if v is not None else 0.0
def build_payload(temps: list[dict[str, Any]],
positions: list[float],
timestamp: float) -> dict[str, Any]:
"""Build the Marcus-compatible 'motor_update' payload.
`temps` arm.get_motor_temps(): [{motor_id, surface, winding}]
`positions` arm.get_current_q(): joint angles indexed by motor id
"""
temperatures: list[dict[str, Any]] = []
for t in temps or []:
i = t.get("motor_id")
surface = t.get("surface")
winding = t.get("winding")
if surface is not None and winding is not None:
avg = (_coerce(surface) + _coerce(winding)) / 2.0
else:
avg = _coerce(surface if surface is not None else winding)
entry: dict[str, Any] = {
"motor_id": i,
"motor_name": MOTOR_NAMES.get(i, f"Motor {i}"),
"mesh_name": MOTOR_TO_MESH.get(i, ""),
"surface": _coerce(surface),
"winding": _coerce(winding),
"temp1": _coerce(surface),
"temp2": _coerce(winding),
"avg": avg,
}
if positions and isinstance(i, int) and i < len(positions):
entry["position"] = float(positions[i])
temperatures.append(entry)
pos_list: list[dict[str, Any]] = [
{"motor_id": i, "position": float(q), "link_name": MOTOR_TO_MESH.get(i)}
for i, q in enumerate(positions or [])
]
return {"temperatures": temperatures, "positions": pos_list,
"timestamp": timestamp}

View File

View File

@ -0,0 +1,104 @@
"""WebSocket endpoint for real-time log streaming.
Clients connect to /ws/logs and receive live log lines from all modules.
"""
from __future__ import annotations
import asyncio
import threading
from collections import deque
from fastapi import APIRouter, WebSocket, WebSocketDisconnect
from Project.Sanad.core.logger import set_ws_push
router = APIRouter()
MAX_WATCHERS = 50
# Ring buffer of recent log lines (shared across connections).
_recent: deque[str] = deque(maxlen=500)
# Each watcher is an (event_loop, queue) pair. We keep the loop so cross-thread
# producers can schedule the enqueue on the consumer's loop (asyncio.Queue is
# NOT thread-safe — calling put_nowait off-loop neither wakes the parked
# `await queue.get()` nor safely mutates the queue's internals).
_watchers: set[tuple[asyncio.AbstractEventLoop, asyncio.Queue]] = set()
_watchers_lock = threading.Lock()
def push_log_line(line: str):
"""Called from the logging system to feed new lines.
May be called from ANY thread (logging is multi-threaded), so the append
to _recent and the per-watcher enqueue are done together under the same
lock that log_ws holds while snapshotting history + registering that
closes the history/live overlap window so a connecting client can't see a
line both in its history replay and again live. The enqueue itself is
marshalled onto each watcher's loop via call_soon_threadsafe because
asyncio.Queue.put_nowait is not safe to call from a foreign thread.
"""
with _watchers_lock:
_recent.append(line)
snapshot = list(_watchers)
for loop, q in snapshot:
try:
loop.call_soon_threadsafe(_safe_put, q, line)
except RuntimeError:
# Loop already closed — watcher is going away; skip it.
pass
def _safe_put(q: asyncio.Queue, line: str) -> None:
"""Enqueue on the consumer's own loop thread (so it's safe)."""
try:
q.put_nowait(line)
except asyncio.QueueFull:
# Drop on overflow rather than block — logs are not critical data
pass
# Register with the logger so all log records are pushed to WS clients.
# Wrap so a logger registration failure doesn't break Dashboard import.
try:
set_ws_push(push_log_line)
except Exception:
pass
@router.websocket("/ws/logs")
async def log_ws(ws: WebSocket):
await ws.accept()
loop = asyncio.get_running_loop()
queue: asyncio.Queue[str] = asyncio.Queue(maxsize=200)
watcher = (loop, queue)
with _watchers_lock:
if len(_watchers) >= MAX_WATCHERS:
await ws.close(code=1013, reason="Too many log watchers")
return
# Register the live queue and snapshot history under the SAME lock that
# push_log_line holds — so every line is either in this history
# snapshot or arrives on the queue, never both (no replay duplicates).
_watchers.add(watcher)
history = list(_recent)
try:
# Send recent history
for line in history:
await ws.send_text(line)
while True:
line = await queue.get()
await ws.send_text(line)
except WebSocketDisconnect:
pass
except Exception:
# Any other error closes the connection cleanly
try:
await ws.close()
except Exception:
pass
finally:
with _watchers_lock:
_watchers.discard(watcher)

View File

@ -0,0 +1,81 @@
"""WebSocket endpoint streaming G1 motor temperatures to the 3D dashboard (N1).
Polls the arm controller's throttled rt/lowstate snapshot (arm.get_motor_temps
/ arm.get_current_q NO second DDS subscriber, no second ChannelFactoryInitialize)
and pushes a Marcus-compatible 'motor_update' payload to each connected client.
Front-end: dashboard/static/temp3d/index.html (ported three.js view), which
opens this socket via a tiny shim in place of socket.io.
"""
from __future__ import annotations
import asyncio
import threading
import time
from fastapi import APIRouter, WebSocket, WebSocketDisconnect
from Project.Sanad.core.logger import get_logger
from Project.Sanad.dashboard.temp_motor_map import build_payload
log = get_logger("motor_temps_ws")
router = APIRouter()
MAX_WATCHERS = 20
PUSH_HZ = 8.0 # ~8 fps is plenty for a temperature heatmap
_count = 0
_count_lock = threading.Lock()
def _get_arm():
"""Lazy import — avoids a circular import on dashboard load."""
try:
from Project.Sanad.main import arm # type: ignore
return arm
except Exception:
return None
@router.websocket("/ws/motor-temps")
async def motor_temps_ws(ws: WebSocket):
await ws.accept()
global _count
with _count_lock:
if _count >= MAX_WATCHERS:
await ws.close(code=1013, reason="Too many temperature watchers")
return
_count += 1
period = 1.0 / PUSH_HZ
try:
while True:
arm = _get_arm()
temps: list = []
positions: list = []
if arm is not None:
try:
temps = arm.get_motor_temps()
except Exception:
temps = []
try:
positions = arm.get_current_q()
except Exception:
positions = []
payload = build_payload(temps, positions, time.time())
await ws.send_json(payload)
await asyncio.sleep(period)
except WebSocketDisconnect:
pass
except Exception:
# Any other error (client gone mid-send, serialise issue) closes cleanly.
try:
await ws.close()
except Exception:
pass
finally:
with _count_lock:
_count -= 1

View File

@ -0,0 +1,323 @@
"""WebSocket → PTY bridge for the dashboard's Terminal tab.
Spawns a shell (bash by default) inside a pseudo-terminal on the robot and
relays stdin/stdout to a browser xterm.js instance over WebSocket. From the
operator's seat this is functionally identical to an in-browser
`ssh unitree@<robot>` except no SSH handshake is needed because the
dashboard process already runs as unitree on the robot. The Terminal tab
connects to ws://<dashboard>/ws/terminal and you land in unitree's shell
directly.
PROTOCOL text frames only. Control vs. keystrokes are disambiguated by
the leading byte:
client server:
"\\x1f" + json-encoded control object (init / resize)
e.g. "\\x1f{\\"type\\":\\"init\\",\\"cols\\":80,\\"rows\\":24}"
<any other text> keystrokes written to PTY
server client:
<text> PTY stdout/stderr chunks
The \\x1f prefix (ASCII Unit Separator) is the disambiguator. If we just
JSON-sniffed every message, a user pasting `{"type":"resize",...}` into
their shell would silently resize the PTY instead of pasting the text.
SECURITY NOTE: anyone who can reach the dashboard URL gets shell access
as the unitree user. The dashboard already exposes equally-powerful
endpoints (E-STOP, motion replay, audio mute, etc.) so this isn't a new
threat class but it IS a single-bullet kill switch for the robot. Bind
the dashboard to a trusted network only.
"""
from __future__ import annotations
import asyncio
import fcntl
import json
import os
import pty
import select
import shutil
import signal
import struct
import termios
import threading
from fastapi import APIRouter, WebSocket, WebSocketDisconnect
from Project.Sanad.core.logger import get_logger
log = get_logger("terminal_ws")
router = APIRouter()
# Magic prefix that distinguishes control messages from raw keystrokes.
# ASCII 0x1F (Unit Separator) — not produced by normal keyboard input,
# so user-pasted JSON can never spoof a control frame.
_CTRL_PREFIX = "\x1f"
# Concurrent-session cap so a runaway tab can't spawn 50 bashes on the robot.
_MAX_SESSIONS = 4
_active: set[int] = set()
_active_lock = threading.Lock()
# Bounded queue depth between the PTY reader thread and the WS sender.
# A chatty shell command (e.g. `yes`, `cat /dev/urandom`) at gigabytes/sec
# would otherwise pile up unbounded asyncio tasks + string refs. Past the
# cap we drop chunks and surface a single drop notice — ANSI may corrupt
# briefly but the session stays alive.
_SEND_QUEUE_MAX = 64
def _resolve_shell() -> list[str]:
"""Pick a sensible shell. SHELL env first, then /bin/bash, then sh."""
sh = os.environ.get("SHELL", "")
if sh and shutil.which(sh):
return [sh, "-i"]
if shutil.which("/bin/bash"):
return ["/bin/bash", "-i"]
return ["/bin/sh", "-i"]
def _set_pty_size(fd: int, cols: int, rows: int) -> None:
"""Inform the PTY of its new window size so curses-style apps (htop,
less, vim) lay out correctly."""
try:
# TIOCSWINSZ payload: rows, cols, xpixel, ypixel (xpixel/ypixel
# unused, kept 0).
fcntl.ioctl(fd, termios.TIOCSWINSZ,
struct.pack("HHHH", rows, cols, 0, 0))
except Exception as exc:
log.debug("TIOCSWINSZ failed (cols=%s rows=%s): %s", cols, rows, exc)
async def _reap_child(pid: int) -> None:
"""SIGHUP → wait briefly → SIGKILL → wait briefly → giveup.
Earlier version SIGKILLed unconditionally because the WNOHANG check
happened immediately after SIGHUP (which never returns true that fast).
Now we poll for up to ~1.5s after SIGHUP before escalating.
"""
async def _wait_exit(timeout_s: float, interval_s: float = 0.1) -> bool:
end = asyncio.get_running_loop().time() + timeout_s
while asyncio.get_running_loop().time() < end:
try:
done_pid, _ = os.waitpid(pid, os.WNOHANG)
except ChildProcessError:
return True # already reaped
except OSError:
return False
if done_pid:
return True
await asyncio.sleep(interval_s)
return False
# 1. Polite request
try:
os.kill(pid, signal.SIGHUP)
except ProcessLookupError:
return
except OSError as exc:
log.debug("SIGHUP pid=%d: %s", pid, exc)
return
if await _wait_exit(1.5):
return
# 2. Force
try:
os.kill(pid, signal.SIGKILL)
except ProcessLookupError:
return
except OSError as exc:
log.debug("SIGKILL pid=%d: %s", pid, exc)
return
if not await _wait_exit(1.0):
log.warning("terminal child pid=%d failed to exit after SIGKILL", pid)
@router.websocket("/ws/terminal")
async def terminal_ws(ws: WebSocket) -> None:
"""Bridge a browser xterm.js to a shell PTY on the robot."""
await ws.accept()
# Concurrent-session guard.
with _active_lock:
if len(_active) >= _MAX_SESSIONS:
await ws.send_text(
f"\r\n[terminal] Refused — already have {_MAX_SESSIONS} "
f"open sessions. Close another tab and reconnect.\r\n"
)
await ws.close(code=1008)
return
# Fork + exec the shell. Parent gets the master fd; child becomes the
# shell with stdin/stdout/stderr wired to the slave end.
cmd = _resolve_shell()
try:
pid, fd = pty.fork()
except OSError as exc:
log.error("pty.fork failed: %s", exc)
await ws.send_text(f"\r\n[terminal] pty.fork failed: {exc}\r\n")
await ws.close(code=1011)
return
if pid == 0:
# CHILD — set env so the shell is interactive and looks right.
os.environ.setdefault("TERM", "xterm-256color")
os.environ.setdefault("LANG", os.environ.get("LANG", "en_US.UTF-8"))
try:
os.execvp(cmd[0], cmd)
except OSError as exc:
# exec failed — printing to fd 2 reaches the parent via the
# PTY so the browser sees the error before we _exit.
os.write(2, f"[terminal] exec failed: {exc}\n".encode())
os._exit(127)
return # unreachable in child
# PARENT
with _active_lock:
_active.add(pid)
log.info("terminal session started pid=%d cmd=%s", pid, cmd[0])
loop = asyncio.get_running_loop()
closed = asyncio.Event()
# Bounded queue + dedicated sender task = backpressure. If the queue
# fills up we drop the chunk and bump _dropped so we can surface a
# short notice in the stream.
send_q: asyncio.Queue[str] = asyncio.Queue(maxsize=_SEND_QUEUE_MAX)
dropped = 0
def _reader_thread() -> None:
"""Drain PTY master fd → queue. Runs in a daemon thread because
select.select on a pipe blocks; asyncio has no portable
equivalent for arbitrary fds on Windows (and we want one code
path)."""
nonlocal dropped
try:
while not closed.is_set():
try:
r, _, _ = select.select([fd], [], [], 0.1)
except (OSError, ValueError):
break
if not r:
continue
try:
data = os.read(fd, 4096)
except OSError:
break
if not data: # EOF — child exited / PTY closed
break
try:
text = data.decode("utf-8", errors="replace")
except Exception:
continue
# put_nowait raises on full — we drop and count.
try:
loop.call_soon_threadsafe(_enqueue, text)
except RuntimeError:
# loop closed — bail
break
finally:
loop.call_soon_threadsafe(closed.set)
def _enqueue(text: str) -> None:
nonlocal dropped
try:
send_q.put_nowait(text)
except asyncio.QueueFull:
dropped += 1
async def _sender_task() -> None:
"""Drains send_q → WebSocket. Single producer, single consumer
means no extra locking needed. Backoff on send failure and let
the closed flag end the session."""
nonlocal dropped
while not closed.is_set():
try:
text = await asyncio.wait_for(send_q.get(), timeout=0.5)
except asyncio.TimeoutError:
continue
try:
await ws.send_text(text)
except Exception as exc:
log.info("terminal ws.send failed (likely client gone): %s", exc)
closed.set()
return
# If we dropped chunks since the last successful send, tell
# the user once so the ANSI corruption isn't mysterious.
if dropped:
d = dropped
dropped = 0
try:
await ws.send_text(
f"\r\n\x1b[2m[term: dropped {d} chunk(s) — slow client]"
f"\x1b[0m\r\n",
)
except Exception:
closed.set()
return
reader = threading.Thread(target=_reader_thread, daemon=True,
name=f"terminal-rx-{pid}")
reader.start()
sender = asyncio.create_task(_sender_task())
# Initial sizing — xterm.js will send a {type:"init",...} control
# frame right after onopen with the actual window size.
_set_pty_size(fd, 80, 24)
try:
while not closed.is_set():
try:
msg = await asyncio.wait_for(ws.receive_text(), timeout=0.5)
except asyncio.TimeoutError:
continue
except WebSocketDisconnect:
break
if not msg:
continue
# Control frame? Must start with the magic prefix. User-typed
# / pasted text can never spoof this — \x1f isn't producible
# by normal keyboard input.
if msg[:1] == _CTRL_PREFIX:
try:
ctrl = json.loads(msg[1:])
except (json.JSONDecodeError, ValueError):
ctrl = None
if isinstance(ctrl, dict) and ctrl.get("type") in ("init", "resize"):
cols = int(ctrl.get("cols") or 80)
rows = int(ctrl.get("rows") or 24)
_set_pty_size(fd, cols, rows)
# Either way, control frames are NEVER forwarded to PTY.
continue
# Plain keystrokes — write to PTY master.
try:
os.write(fd, msg.encode("utf-8", errors="replace"))
except OSError as exc:
log.info("terminal pty write failed (child likely exited): %s", exc)
break
finally:
closed.set()
try:
sender.cancel()
except Exception:
pass
try:
await _reap_child(pid)
except Exception as exc:
log.debug("reap_child pid=%d: %s", pid, exc)
try:
os.close(fd)
except OSError:
pass
with _active_lock:
_active.discard(pid)
log.info("terminal session ended pid=%d", pid)
try:
await ws.close()
except Exception:
pass

0
vendor/Sanad/data/audio/.gitkeep vendored Normal file
View File

1
vendor/Sanad/data/audio_device.json vendored Normal file
View File

@ -0,0 +1 @@
{}

5
vendor/Sanad/data/camera_device.json vendored Normal file
View File

@ -0,0 +1,5 @@
{
"profile_serial_assignments": {
"realsense_primary": ""
}
}

0
vendor/Sanad/data/faces/.gitkeep vendored Normal file
View File

0
vendor/Sanad/data/memories/.gitkeep vendored Normal file
View File

21
vendor/Sanad/data/motions/config.json vendored Normal file
View File

@ -0,0 +1,21 @@
{
"gemini": {
"api_key": "",
"model": "models/gemini-2.5-flash-native-audio-preview-12-2025",
"voice_name": "Charon"
},
"audio": {
"send_sample_rate": 16000,
"receive_sample_rate": 24000,
"chunk_size": 512,
"g1_volume": 100
},
"motion": {
"action_cooldown_sec": 1.0,
"replay_hz": 60.0
},
"dashboard": {
"host": "0.0.0.0",
"port": 8000
}
}

0
vendor/Sanad/data/photos/.gitkeep vendored Normal file
View File

0
vendor/Sanad/data/recordings/.gitkeep vendored Normal file
View File

0
vendor/Sanad/data/zones/.gitkeep vendored Normal file
View File

136
vendor/Sanad/examples/voice_example.py vendored Normal file
View File

@ -0,0 +1,136 @@
#!/usr/bin/env python3
"""voice_example.py — demos for each voice subsystem in isolation.
Each subcommand exercises one component so you can debug pieces without
running the full Sanad stack.
Usage:
python3 voice_example.py gemini "hello" # one-shot Gemini text→audio
python3 voice_example.py local_tts "hello" # local Coqui TTS
python3 voice_example.py typed_replay "hello" # typed replay engine
python3 voice_example.py live # spawn GeminiSubprocess
python3 voice_example.py status # show status of all subsystems
Assumes Project.Sanad is importable (run from repo root or with PYTHONPATH set).
"""
from __future__ import annotations
import argparse
import asyncio
import sys
def _demo_gemini(text: str) -> None:
"""One-shot: connect Gemini, send text, play reply."""
from Project.Sanad.gemini.client import GeminiVoiceClient
from Project.Sanad.voice.audio_manager import AudioManager
async def run():
client = GeminiVoiceClient()
audio = AudioManager()
await client.connect()
try:
audio_bytes, text_parts = await client.send_text(text, owner="example")
print(f"[gemini] got {len(audio_bytes)} bytes audio, text={text_parts}")
if audio_bytes:
await asyncio.to_thread(audio.play_pcm, audio_bytes, 1, 24000, 2)
finally:
await client.disconnect()
asyncio.run(run())
def _demo_local_tts(text: str) -> None:
"""Synthesize with local Coqui TTS and play."""
from Project.Sanad.voice.local_tts import LocalTTSEngine
from Project.Sanad.voice.audio_manager import AudioManager
tts = LocalTTSEngine()
audio = AudioManager()
pcm = tts.synthesize(text)
print(f"[local_tts] generated {len(pcm)} bytes")
audio.play_pcm(pcm, 1, 16000, 2)
def _demo_typed_replay(text: str) -> None:
"""Exercise the TypedReplayEngine end-to-end."""
from Project.Sanad.gemini.client import GeminiVoiceClient
from Project.Sanad.voice.audio_manager import AudioManager
from Project.Sanad.voice.typed_replay import TypedReplayEngine
async def run():
client = GeminiVoiceClient()
await client.connect()
audio = AudioManager()
engine = TypedReplayEngine(client, audio)
result = await engine.say(text)
print(f"[typed_replay] {result}")
await client.disconnect()
asyncio.run(run())
def _demo_live() -> None:
"""Spawn the live voice subprocess — same as dashboard /api/live-subprocess."""
from Project.Sanad.gemini.subprocess import GeminiSubprocess
mgr = GeminiSubprocess()
info = mgr.start()
print(f"[live] {info}")
print("Running. Ctrl+C to stop.")
try:
while True:
import time
time.sleep(1)
except KeyboardInterrupt:
print("\n[live] stopping...")
print(mgr.stop())
def _demo_status() -> None:
"""Print status of all voice subsystems."""
from Project.Sanad.gemini.client import GeminiVoiceClient
try:
from Project.Sanad.voice.local_tts import LocalTTSEngine
except Exception:
LocalTTSEngine = None
client = GeminiVoiceClient()
print("[gemini]", client.status())
if LocalTTSEngine:
try:
tts = LocalTTSEngine()
print("[local_tts]", tts.status())
except Exception as exc:
print(f"[local_tts] unavailable: {exc}")
else:
print("[local_tts] not installed")
def main():
ap = argparse.ArgumentParser(description=__doc__)
sub = ap.add_subparsers(dest="cmd", required=True)
for name in ("gemini", "local_tts", "typed_replay"):
sp = sub.add_parser(name, help=f"demo {name}")
sp.add_argument("text", help="text to speak")
sub.add_parser("live", help="spawn live voice subprocess")
sub.add_parser("status", help="print subsystem status")
args = ap.parse_args()
if args.cmd == "gemini":
_demo_gemini(args.text)
elif args.cmd == "local_tts":
_demo_local_tts(args.text)
elif args.cmd == "typed_replay":
_demo_typed_replay(args.text)
elif args.cmd == "live":
_demo_live()
elif args.cmd == "status":
_demo_status()
if __name__ == "__main__":
main()

0
vendor/Sanad/face/__init__.py vendored Normal file
View File

208
vendor/Sanad/face/emotion_frames.py vendored Normal file
View File

@ -0,0 +1,208 @@
"""Extra emotion frames for the LED mask, in the same 46x58 display space + RGB
style as :mod:`colorface` (black bg, cyan eyes, red mouth). These are the
expression frames Gemini can trigger via ``set_expression`` that the base
``colorface.default_frames`` does not draw (heart, laugh, love-eyes, cool,
sleepy, confused, kiss, star-struck).
``emotion_frames(...)`` returns ``{name: raw_bytes}`` ready for the mask's DIY
image upload, exactly like ``colorface.default_frames``. Positions mirror
``colorface.build_face`` so the eyes/mouth line up with the rest of the set.
"""
from __future__ import annotations
import math
import colorface as _cf
from colorface import DISPLAY_W as W, DISPLAY_H as H, encode
from PIL import Image, ImageDraw
# eye/mouth geometry copied from colorface.build_face so frames are consistent
_EYE_L = W // 2 - 10 # 13
_EYE_R = W // 2 + 10 # 33
_EYE_T, _EYE_B = 15, 29 # normal eye top/bottom
_EYE_W = 6
_MOUTH_CY = 44
_MOUTH_CX = W // 2 # 23
def _canvas():
img = Image.new("RGB", (W, H), (0, 0, 0))
return img, ImageDraw.Draw(img)
def _round_eye(g, cx, eye_color, sclera_color, *, t=_EYE_T, b=_EYE_B, w=_EYE_W):
g.ellipse([cx - w, t, cx + w, b], fill=sclera_color)
g.ellipse([cx - w + 3, t + 4, cx + w - 3, b - 2], fill=eye_color)
m = (t + b) // 2
g.ellipse([cx - 1, m - 1, cx + 1, m + 2], fill=(0, 0, 0))
def _happy_eye(g, cx, color):
# upward "^"-ish squint (a smiling eye)
g.arc([cx - 7, _EYE_T - 1, cx + 7, _EYE_B + 5], start=200, end=340,
fill=color, width=3)
def _heart(g, cx, cy, half, color):
"""A filled heart centred at (cx, cy), ``half`` = half-width."""
r = half / 2.0
g.pieslice([cx - half, cy - r, cx, cy + r], 0, 360, fill=color) # left lobe
g.pieslice([cx, cy - r, cx + half, cy + r], 0, 360, fill=color) # right lobe
g.polygon([(cx - half, cy + r * 0.2), (cx + half, cy + r * 0.2),
(cx, cy + half)], fill=color)
def _star(g, cx, cy, r, color):
pts = []
for i in range(10):
ang = -math.pi / 2 + i * math.pi / 5
rad = r if i % 2 == 0 else r * 0.45
pts.append((cx + rad * math.cos(ang), cy + rad * math.sin(ang)))
g.polygon(pts, fill=color)
def _smile(g, color, *, big=False):
if big: # open grin
g.chord([_MOUTH_CX - 13, _MOUTH_CY - 6, _MOUTH_CX + 13, _MOUTH_CY + 12],
start=0, end=180, fill=color)
else:
g.arc([_MOUTH_CX - 12, _MOUTH_CY - 8, _MOUTH_CX + 12, _MOUTH_CY + 8],
start=20, end=160, fill=color, width=4)
# Fixed emoji colors — these frames are icons, not part of the face's colour
# scheme, so a heart is always red and a thumb always yellow regardless of the
# user's chosen eye/mouth colours.
_RED = (255, 45, 75)
_PINK = (255, 95, 155)
_YELLOW = (255, 200, 40)
# -- individual emotion drawings ---------------------------------------------
def _heart_face(eye, mouth, sclera):
img, g = _canvas()
_heart(g, W // 2, 26, 18, _RED) # one big RED heart fills the face
return img
def _laugh(eye, mouth, sclera):
img, g = _canvas()
_happy_eye(g, _EYE_L, eye)
_happy_eye(g, _EYE_R, eye)
_smile(g, mouth, big=True) # wide open grin
# a joy tear under each eye
for cx in (_EYE_L, _EYE_R):
g.ellipse([cx - 2, _EYE_B + 3, cx + 2, _EYE_B + 9], fill=(0, 180, 255))
return img
def _love(eye, mouth, sclera):
img, g = _canvas()
_heart(g, _EYE_L, 22, 8, _PINK) # pink heart-shaped eyes
_heart(g, _EYE_R, 22, 8, _PINK)
_smile(g, _PINK)
return img
def _cool(eye, mouth, sclera):
img, g = _canvas()
frame = (40, 40, 55)
lens = (10, 10, 20)
# two lenses + bridge (sunglasses)
for cx in (_EYE_L, _EYE_R):
g.rounded_rectangle([cx - 8, _EYE_T, cx + 8, _EYE_B + 1], radius=4,
fill=lens, outline=frame, width=2)
g.line([cx - 5, _EYE_T + 3, cx + 2, _EYE_T + 3], fill=eye, width=2) # glint
g.line([_EYE_L + 8, _EYE_T + 3, _EYE_R - 8, _EYE_T + 3], fill=frame, width=3)
# a cool little smirk (raised on one side)
g.arc([_MOUTH_CX - 11, _MOUTH_CY - 5, _MOUTH_CX + 12, _MOUTH_CY + 8],
start=15, end=120, fill=mouth, width=4)
return img
def _sleepy(eye, mouth, sclera):
img, g = _canvas()
# droopy half-closed eyes: lid arc over a thin slit
for cx in (_EYE_L, _EYE_R):
g.arc([cx - 7, _EYE_T + 2, cx + 7, _EYE_B + 4], start=160, end=20,
fill=eye, width=3)
# small tired mouth
g.ellipse([_MOUTH_CX - 4, _MOUTH_CY - 2, _MOUTH_CX + 4, _MOUTH_CY + 4], fill=mouth)
# zZ drawn as cheap line-glyphs (no font dependency)
for (x, y, s) in ((36, 8, 5), (41, 3, 3)):
g.line([x, y, x + s, y], fill=eye, width=1)
g.line([x + s, y, x, y + s], fill=eye, width=1)
g.line([x, y + s, x + s, y + s], fill=eye, width=1)
return img
def _confused(eye, mouth, sclera):
img, g = _canvas()
_round_eye(g, _EYE_L, eye, sclera) # normal eye
_round_eye(g, _EYE_R, eye, sclera, t=_EYE_T - 3, b=_EYE_B - 3, w=5) # raised/small
# raised brow over the small eye
g.line([_EYE_R - 6, _EYE_T - 6, _EYE_R + 6, _EYE_T - 9], fill=eye, width=2)
# wavy/squiggle mouth
pts = [(_MOUTH_CX - 12 + i * 4, _MOUTH_CY + (3 if i % 2 else -3)) for i in range(7)]
g.line(pts, fill=mouth, width=3, joint="curve")
return img
def _kiss(eye, mouth, sclera):
img, g = _canvas()
_round_eye(g, _EYE_L, eye, sclera)
g.line([_EYE_R - 6, (_EYE_T + _EYE_B) // 2, _EYE_R + 6, (_EYE_T + _EYE_B) // 2],
fill=eye, width=3) # winking eye
# puckered red lips
g.ellipse([_MOUTH_CX - 4, _MOUTH_CY - 4, _MOUTH_CX + 4, _MOUTH_CY + 5], fill=_RED)
g.ellipse([_MOUTH_CX - 2, _MOUTH_CY - 2, _MOUTH_CX + 2, _MOUTH_CY + 3], fill=(0, 0, 0))
_heart(g, 37, 13, 6, _RED) # little floating red heart
return img
def _star_struck(eye, mouth, sclera):
img, g = _canvas()
_star(g, _EYE_L, 22, 7, (255, 220, 0))
_star(g, _EYE_R, 22, 7, (255, 220, 0))
_smile(g, mouth, big=True)
return img
def _thumbs_up(eye, mouth, sclera):
# a 👍: one bold vertical thumb + a bold fist block, kept simple so it reads
# on the low-res LED grid (fine detail just blurs into a blob).
img, g = _canvas()
g.rounded_rectangle([11, 30, 37, 52], radius=8, fill=_YELLOW) # fist block
g.rounded_rectangle([13, 6, 29, 34], radius=8, fill=_YELLOW) # big thumb up
g.line([30, 34, 36, 34], fill=(0, 0, 0), width=3) # thumb/finger split
return img
_BUILDERS = {
"heart": _heart_face,
"laugh": _laugh,
"love": _love,
"cool": _cool,
"sleepy": _sleepy,
"confused": _confused,
"kiss": _kiss,
"star_struck": _star_struck,
"thumbs_up": _thumbs_up,
}
def emotion_frames(*, eye_color=_cf.DEFAULT_EYE, mouth_color=_cf.DEFAULT_MOUTH,
sclera_color=_cf.WHITE, include=None) -> dict:
"""Return ``{name: raw_bytes}`` for the extra emotion frames.
``include`` optionally restricts to a subset (a set/list of names) so the
caller can honour the mask's slot budget.
"""
names = list(_BUILDERS) if include is None else [n for n in _BUILDERS if n in include]
out = {}
for name in names:
img = _BUILDERS[name](eye_color, mouth_color, sclera_color)
out[name] = encode(img)
return out

599
vendor/Sanad/face/face_motion.py vendored Normal file
View File

@ -0,0 +1,599 @@
#!/usr/bin/env python3
"""Lifelike face motion for the Shining LED mask — SanadV3.
A richer, more *organic* driver than the Mask lib's built-in idle. Instead of an
occasional blink/glance, it gives the robot's face the small, constant motion a
real face has:
* natural blinking varied intervals, occasional quick double-blinks
* frequent small eye saccades (darts) with short gaze holds and drift
* idle micro-expressions (a brief smile now and then)
* state-aware behaviour:
idle relaxed, wanders, blinks
listening attentive, eyes mostly forward, fewer darts, soft blinks
thinking looks away (longer gaze holds), slower blinks
speaking mouth lip-syncs to audio + the odd mid-sentence blink
* quick reactions: surprised / happy(smile) / sad, held briefly then released
It drives the mask by PLAY of the pre-uploaded DIY frames (no per-frame upload),
so motion is smooth. Lip-sync composes with the eye motion via feed_audio_level().
Run it standalone (keep the mask within ~30 cm for the one-time frame upload):
python3 face/face_motion.py # connect, load frames, stay alive
python3 face/face_motion.py --demo # cycle the states to show the range
python3 face/face_motion.py --reload # force re-upload of the frame set
Integrate into Sanad: construct ``LifelikeFace(mask=<connected ShiningMask>)``
(or let it connect itself), ``await face.start()``, then drive it from the event
bus / Gemini lip-sync markers:
face.set_listening() # when the user starts speaking
face.set_thinking() # while a tool/response is being prepared
face.set_speaking(True/False) # around a spoken reply
face.feed_audio_level(rms_0_to_1) # per audio chunk -> real lip-sync
face.react("surprised" | "smile" | "sad")
face.set_idle() # back to relaxed wandering
"""
from __future__ import annotations
import argparse
import asyncio
import os
import random
import sys
import time
import logging
import threading
from pathlib import Path
log = logging.getLogger("sanad.face_motion")
# Frames present in the mask's DIY slots (colorface.default_frames()).
GAZE = ("neutral", "look_left", "look_right")
MOUTH = ("neutral", "talk1", "talk2", "talk3")
# How long after the last lip-sync marker the face stays "speaking" (mouth
# follows the amplitude; pauses close it). When markers stop for this long the
# turn ends and the eyes return to their underlying state.
_SPEECH_WINDOW = 0.6
# Mouth-frame cadence while speaking. Each frame is a FULL-face DIY slot switch;
# this small mask can't cleanly repaint the LED matrix faster than ~5/s, so the
# old 0.09s (~11/s) cadence overran it and showed torn/scrambled composites of
# several frames at once. Cap it well under the tear threshold — speech visemes
# only change ~4-7/s anyway, so lip-sync still reads fine.
_SPEAK_FRAME_SEC = 0.22
# BLE-link health. If the mask link drops mid-session every play_diy raises a
# BleakError, which left the loop busy-spinning ~20x/s forever (no recovery but a
# manual disconnect/connect). Treat a run of consecutive play failures (or
# mask.is_connected going False) as a dropped link and attempt a *bounded*
# reconnect; if that is exhausted, stop the loop so the face goes idle/unavailable
# instead of hammering a dead transport.
_PLAY_FAIL_LIMIT = 10 # consecutive failed plays before we call it a drop
_RECONNECT_ATTEMPTS = 3 # reconnect tries per detected drop
_RECONNECT_BACKOFF = 2.0 # seconds between reconnect tries
# BLE link keepalive. _play() skips re-sending an *unchanged* frame, so a long
# neutral/idle stretch writes nothing but the occasional blink (every 2-4.5s).
# If that quiet gap (plus an RF glitch) outlasts the link's supervision timeout
# the mask drops — and every reconnect briefly flashes the mask's own built-in
# face. Re-send the current frame at least this often so the link never goes
# quiet long enough to be dropped. One tiny write/sec when idle; free when busy.
_KEEPALIVE_SEC = 1.0
# Frames that carry the animated face's mouth (gaze + lip-sync). "Hide mouth"
# blacks out the mouth region on just these, leaving eyes-only — the emotion
# icons (heart/thumb/…) are left alone.
_MOUTH_FRAMES = ("neutral", "talk1", "talk2", "talk3", "blink",
"look_left", "look_right")
_MOUTH_MASK_TOP = 32 # display-y below which the mouth lives (eyes end ~29)
def _mask_mouth_bytes(data: bytes) -> bytes:
"""Return a copy of an encoded 46x58 frame with the mouth region blacked out
(decode the transposed bytes -> mask display rows >= _MOUTH_MASK_TOP -> re-encode)."""
import colorface as _cf
from PIL import Image as _Image, ImageDraw as _ImageDraw
img = _Image.frombytes("RGB", (_cf.DISPLAY_H, _cf.DISPLAY_W), bytes(data))
img = img.transpose(_Image.Transpose.TRANSPOSE) # -> 46x58 display space
_ImageDraw.Draw(img).rectangle([0, _MOUTH_MASK_TOP, _cf.DISPLAY_W, _cf.DISPLAY_H],
fill=(0, 0, 0))
return _cf.encode(img)
def _add_mask_to_path() -> str:
"""Put the flat Mask library (mask.py / faceanim.py / colorface.py) on sys.path."""
d = os.environ.get("SANAD_MASK_DIR") or str(Path(__file__).resolve().parents[2] / "Mask")
if d and d not in sys.path:
sys.path.insert(0, d)
return d
class LifelikeFace:
"""Organic, state-aware motion driver for the LED mask."""
def __init__(self, mask=None, *, name_prefix="MASK", address=None, adapter=None,
brightness=95, frames=None, eye_color=None, mouth_color=None,
sclera_color=None, auto_reconnect=True, hide_mouth=False):
_add_mask_to_path()
import mask as _mask # flat Mask lib
import faceanim as _faceanim
import colorface as _colorface
self._ShiningMask = _mask.ShiningMask
self._FaceAnimator = _faceanim.FaceAnimator
self._colorface = _colorface
self.mask = mask
self._own_mask = mask is None
self.name_prefix = name_prefix
self.address = address
self.adapter = adapter
self.brightness = int(brightness)
# When False, a dropped link is NOT self-healed here — the loop bails
# cleanly (and forces the transport disconnected) so an external owner
# (FaceController's reconnect supervisor) brings the link + face back.
self._auto_reconnect = bool(auto_reconnect)
# Frame colors: explicit frames win; else build the default set tinted
# with whatever colors were given (None -> the lib defaults cyan/red).
if frames is None:
ck = {}
if eye_color is not None:
ck["eye_color"] = tuple(eye_color)
if mouth_color is not None:
ck["mouth_color"] = tuple(mouth_color)
if sclera_color is not None:
ck["sclera_color"] = tuple(sclera_color)
frames = _colorface.default_frames(**ck)
# Extra Gemini-triggerable emotions (heart, laugh, love-eyes, cool,
# sleepy, confused, kiss, star_struck) in the same style. Appended
# after the base set so slot ids 1..N stay stable for existing
# frames. Guarded: a missing module never breaks the face.
try:
from Project.Sanad.face.emotion_frames import emotion_frames as _emo
# 7 emotions so slots 1..19 hold the face set and slot 20 stays
# free as a scratch slot for QR/social images (mask caps at 20).
frames = {**frames, **_emo(**ck, include={
"heart", "laugh", "love", "cool", "confused", "kiss", "thumbs_up"})}
except Exception:
log.exception("emotion frames unavailable — base frames only")
self.frames = frames
# Reserved DIY slot (just past the animated frames) for on-demand images
# (QR / social) shown via the FaceController's show_scratch_image().
self.scratch_slot = len(self.frames) + 1
# Mouth show/hide: keep the unmasked originals so a live toggle can
# re-upload just the gaze/talk slots masked or normal.
self._base_frames = dict(self.frames)
self.hide_mouth = bool(hide_mouth)
if self.hide_mouth:
self.frames = {n: (_mask_mouth_bytes(d) if n in _MOUTH_FRAMES else d)
for n, d in self.frames.items()}
def mouth_frames_for(self, hidden: bool) -> dict:
"""{name: bytes} for the gaze/talk frames, masked (hidden) or normal — the
FaceController re-uploads just these slots to toggle the mouth live."""
return {n: (_mask_mouth_bytes(self._base_frames[n]) if hidden
else self._base_frames[n])
for n in _MOUTH_FRAMES if n in self._base_frames}
self.slots: dict = {}
self._state = "idle" # underlying eye state: idle|listening|thinking
self._speaking = False # explicit speaking turn (set_speaking)
self._level = 0.0 # live lip-sync amplitude 0..1
self._last_mouth_t = 0.0 # last set_mouth/feed_audio_level time
self._react = None
self._react_until = 0.0
self._cur = None
self._task = None
self._stop = False
self._play_fails = 0 # consecutive play_diy failures (link-drop signal)
self._last_write = 0.0 # monotonic of the last successful play_diy (keepalive)
self._paused = False # loop stops writing (used during a scratch upload)
self._paused_ack = threading.Event() # set once the loop has actually parked
# -- lifecycle ------------------------------------------------------------
async def start(self, *, reload: bool = False):
if self.mask is None:
self.mask = self._ShiningMask(
address=self.address, name_prefix=self.name_prefix, adapter=self.adapter)
await self.mask.connect(timeout=20.0, attempts=12)
await self.mask.set_brightness(self.brightness)
# Upload the frame set via the RELIABLE (acked) image path — see
# _upload_frames. We no longer borrow FaceAnimator.load(), whose
# fire-and-forget upload silently corrupts slots on a marginal link (a
# dropped packet -> garbage frame, no exception -> no retry).
await self._upload_frames(force=reload)
self._stop = False
await self._play("neutral")
self._task = asyncio.create_task(self._loop())
return self
async def stop(self):
self._stop = True
if self._task:
self._task.cancel()
try:
await self._task
except asyncio.CancelledError:
pass
self._task = None
if self._own_mask and self.mask is not None:
try:
await self.mask.disconnect()
except Exception:
pass
async def __aenter__(self):
return await self.start()
async def __aexit__(self, *exc):
await self.stop()
# -- reliable frame upload ------------------------------------------------
async def _upload_frames(self, *, force: bool):
"""Upload the frame set to the mask's DIY slots, RELIABLY.
The mask's default DIY upload is fire-and-forget: ~80 write-without-
response packets per frame with no ack, so a packet dropped on a
marginal BLE link silently corrupts that slot (no exception -> no retry)
and the frame renders as garbage. We instead drive each frame through
the mask's ACKED image path (upload_image -> _upload(kind=IMAGE),
per-packet REOK), retrying the whole frame on any failure (a fresh DATS
resets the half-written slot). Same name->slot (1..N) map as before.
"""
names = list(self.frames)
self.slots = {name: i + 1 for i, name in enumerate(names)}
if not force:
try:
count = await self.mask.get_diy_count(timeout=4.0) or 0
except Exception:
count = 0
if count >= len(names):
return # frames already stored (persist in flash)
await self.mask.clear_diy()
acked = True
for i, (name, data) in enumerate(self.frames.items(), start=1):
acked = await self._upload_one_frame(i, bytes(data), acked)
await asyncio.sleep(0.2)
async def _upload_one_frame(self, slot: int, data: bytes, acked: bool) -> bool:
"""Upload one frame. Prefer the acked image path; on failure reconnect +
retry the whole frame. If frame 1 proves this mask clone never acks
IMAGE uploads, latch off the acked path and use paced fire-and-forget
for the rest. Returns whether to keep using the acked path."""
if acked:
for attempt in range(5):
try:
await self.mask.upload_image(data, slot, timeout=8.0)
return True
except Exception as exc:
# frame 1 failing its first two acked tries => this clone
# doesn't ack IMAGE uploads; stop trying it.
if slot == 1 and attempt >= 1:
log.warning("mask: IMAGE uploads not acked by this clone "
"(%s) -- using paced fire-and-forget", exc)
break
if attempt == 4:
log.warning("mask: acked upload of slot %d exhausted (%s) "
"-- fire-and-forget fallback", slot, exc)
break
await self._reupload_reconnect()
# fallback: paced fire-and-forget (probabilistic -- keep the mask close)
await self.mask.upload_raw_image(data, index=slot,
chunk_delay=0.10, init_delay=0.30)
return False
async def _reupload_reconnect(self):
"""Drop + re-establish the link mid-upload so the next frame attempt
starts clean (a fresh DATS resets any half-written slot)."""
try:
await self.mask.disconnect()
except Exception:
pass
await asyncio.sleep(1.0)
try:
await self.mask.connect(timeout=15.0, attempts=8)
await self.mask.set_brightness(self.brightness)
except Exception:
pass
# -- control --------------------------------------------------------------
def set_state(self, state: str):
self._state = state if state in ("idle", "listening", "thinking", "speaking") else "idle"
def set_idle(self):
self._speaking = False
self._last_mouth_t = 0.0 # leave any speaking overlay immediately
self.set_state("idle")
def set_listening(self):
self._speaking = False
self._last_mouth_t = 0.0
self.set_state("listening")
def set_thinking(self):
self._speaking = False
self._last_mouth_t = 0.0
self.set_state("thinking")
def set_speaking(self, on: bool):
"""Mark a speaking turn. Without lip-sync markers the mouth auto-talks;
with them it follows the amplitude. The underlying eye state is kept, so
it returns there when the turn ends."""
self._speaking = bool(on)
if not on:
self._last_mouth_t = 0.0
self._level = 0.0
def feed_audio_level(self, level: float):
"""Per-audio-chunk amplitude 0..1 -> real lip-sync (mouth opens by loudness).
Keeps the face 'speaking' for a short window after the last call, so
pauses close the mouth and the turn ends cleanly when markers stop
without depending on an explicit speaking on/off signal."""
self._level = max(self._level * 0.4, min(1.0, float(level))) # fast attack
self._last_mouth_t = time.monotonic()
def react(self, emotion: str, hold: float = 1.4):
"""Briefly hold an expression (surprised / smile / sad / …) then release."""
if emotion in self.slots:
self._react = emotion
self._react_until = time.monotonic() + float(hold)
# -- FaceController-compatible API (so it can drop in for FaceAnimator) ----
def set_mouth(self, level: int):
"""Discrete mouth level 0..3 (e.g. from the Gemini [[MOUTH:n]] relay) ->
drives lip-sync. Maps the level to a representative amplitude; level 0
decays the mouth shut but keeps the short speaking window alive."""
amp = (0.0, 0.12, 0.24, 0.5)[max(0, min(3, int(level)))]
self._level = max(self._level * 0.4, amp)
self._last_mouth_t = time.monotonic()
def set_expression(self, name):
"""Hold an expression frame until cleared with None (vs the timed react)."""
if name and name in self.slots:
self._react = name
self._react_until = float("inf")
elif self._react_until == float("inf"):
self._react = None
def pause(self):
"""Stop the loop from writing to the mask (so a concurrent scratch-slot
upload's per-packet acks aren't disturbed by play_diy traffic)."""
self._paused_ack.clear()
self._paused = True
def wait_paused(self, timeout: float = 2.0) -> bool:
"""Block until the loop has actually reached the paused branch (so no
play_diy is in flight when the caller starts the scratch upload)."""
return self._paused_ack.wait(timeout)
def resume(self):
self._paused = False
self._paused_ack.clear()
self._cur = None # force a redraw when the loop takes over again
async def show(self, name: str):
"""One-off: briefly show a named frame (used by FaceController.show_expression)."""
if name in self.slots:
self.react(name, hold=1.5)
# -- internals ------------------------------------------------------------
@staticmethod
def _mouth_for(level: float) -> str:
i = 0 if level < 0.06 else 1 if level < 0.16 else 2 if level < 0.32 else 3
return MOUTH[i]
async def _play(self, name: str, *, force: bool = False):
slot = self.slots.get(name)
if slot is None or (name == self._cur and not force):
return
try:
await self.mask.play_diy(slot)
self._cur = name
self._last_write = time.monotonic() # keepalive clock: link saw traffic
self._play_fails = 0 # link is alive again
except Exception:
self._cur = None # retry next tick on a transient BLE error
self._play_fails += 1 # ...but count it: a sustained run == a drop
def _link_dead(self) -> bool:
"""True once the BLE link looks gone: the transport reports disconnected,
or play_diy has failed a sustained run in a row (a single glitch is still
treated as transient and retried)."""
connected = bool(getattr(self.mask, "is_connected", False)) if self.mask else False
return (not connected) or self._play_fails >= _PLAY_FAIL_LIMIT
async def _reconnect(self) -> bool:
"""Bounded reconnect after a detected drop. Frames persist on the mask's
flash, so on success we only re-pin brightness + redraw the current frame
(no re-upload). Returns True if the link is back, False if exhausted."""
for i in range(_RECONNECT_ATTEMPTS):
if self._stop:
return False
try:
if getattr(self.mask, "is_connected", False):
await self.mask.disconnect() # clean any half-open client first
except Exception:
pass
try:
await self.mask.connect(timeout=10.0, attempts=2)
await self.mask.set_brightness(self.brightness)
self._play_fails = 0
self._cur = None # force a redraw on the fresh link
await self._play("neutral")
return True
except Exception:
await asyncio.sleep(_RECONNECT_BACKOFF)
return False
async def _blink(self, restore: str):
await self._play("blink")
await asyncio.sleep(random.uniform(0.08, 0.13))
if random.random() < 0.18: # occasional quick double-blink
await self._play(restore)
await asyncio.sleep(random.uniform(0.07, 0.11))
await self._play("blink")
await asyncio.sleep(random.uniform(0.08, 0.12))
await self._play(restore)
async def _loop(self):
mono = time.monotonic
t_blink = mono() + random.uniform(1.5, 4.0)
t_sacc = mono() + random.uniform(0.6, 1.6)
t_micro = mono() + random.uniform(12.0, 25.0)
gaze = "neutral"
while not self._stop:
t = mono()
# BLE link health: if it dropped, try a bounded reconnect instead of
# busy-spinning play_diy on a dead transport. If reconnect is
# exhausted, leave the loop so the face stops (the controller's
# status() then reports it not running) rather than spinning forever.
if self._link_dead():
if not self._auto_reconnect:
# Owner-managed recovery (FaceController supervisor): make the
# transport report disconnected so the supervisor's is_connected
# check fires, then leave the loop. The supervisor reconnects
# and rebuilds the face (frames persist on the mask's flash).
try:
if getattr(self.mask, "is_connected", False):
await self.mask.disconnect()
except Exception:
pass
break
if not await self._reconnect():
break
t = mono() # reconnect can take a while
# Paused (during a scratch-slot upload): write nothing so the upload's
# per-packet REOK acks aren't disturbed by play_diy traffic. Signal
# that we've actually parked so the caller can start the upload.
if self._paused:
self._paused_ack.set()
await asyncio.sleep(0.1)
continue
# BLE keepalive: re-send the current frame if the link has gone quiet.
# _play() skips unchanged frames, so a long neutral idle stretch writes
# nothing but blinks; a quiet gap past the supervision timeout drops the
# link, and each reconnect flashes the mask's built-in face. A cheap
# periodic re-send keeps the link alive (no-op while speaking — that
# path already writes ~11x/s, so _last_write stays fresh).
if self._cur is not None and (t - self._last_write) >= _KEEPALIVE_SEC:
await self._play(self._cur, force=True)
# transient reaction overrides everything briefly
if self._react is not None:
if t < self._react_until:
await self._play(self._react)
await asyncio.sleep(0.06)
continue
self._react = None
self._cur = None # force a redraw of whatever's underneath
# "speaking" = an explicit turn OR fresh lip-sync markers (the latter
# window auto-expires, so the mouth closes and the turn ends when the
# markers stop, without needing a reliable speaking-off signal).
lipsync_active = (t - self._last_mouth_t) < _SPEECH_WINDOW
if self._speaking or lipsync_active:
if lipsync_active:
base = self._mouth_for(self._level) # 0 = closed on pauses
self._level *= 0.55 # decay toward closed
else:
base = MOUTH[random.choice([0, 1, 1, 2, 2, 3, 3, 2, 1, 0])] # auto-talk
await self._play(base)
# No mid-speech blink: a blink is a 2-3 frame burst that, on top
# of the mouth cadence, spikes the switch rate and tears the
# display. Eyes blink between utterances (idle/listening) instead.
await asyncio.sleep(_SPEAK_FRAME_SEC)
continue
# --- non-speaking: idle / listening / thinking ---
if t >= t_blink:
await self._blink(gaze)
lo, hi = (3.5, 6.5) if self._state == "thinking" else (2.0, 4.5)
t_blink = t + random.uniform(lo, hi)
if t >= t_sacc:
if self._state == "thinking":
gaze = random.choice(["look_left", "look_right", "look_left", "look_right", "neutral"])
hold = random.uniform(0.9, 1.8)
elif self._state == "listening":
gaze = random.choice(["neutral", "neutral", "neutral", "look_left", "look_right"])
hold = random.uniform(0.5, 1.2)
else: # idle — relaxed wandering
gaze = random.choice(["neutral", "neutral", "look_left", "look_right", "neutral"])
hold = random.uniform(0.3, 0.9)
await self._play(gaze)
t_sacc = t + hold + random.uniform(0.4, 1.4)
else:
await self._play(gaze)
if self._state == "idle" and t >= t_micro: # rare idle micro-smile
await self._play("smile")
await asyncio.sleep(random.uniform(0.6, 1.0))
gaze = "neutral"
self._cur = None
t_micro = t + random.uniform(15.0, 30.0)
await asyncio.sleep(0.05)
# ---------------------------------------------------------------------------
# Standalone runner
# ---------------------------------------------------------------------------
async def _amain(args):
face = LifelikeFace(name_prefix=args.name_prefix, address=args.address,
brightness=args.brightness)
print("connecting + loading frames (keep the mask within ~30 cm) ...", flush=True)
await face.start(reload=args.reload)
print("lifelike motion running. Ctrl+C to stop.", flush=True)
try:
if args.demo:
steps = [
("idle (wandering + blinks)", lambda: face.set_idle(), 7),
("listening (attentive)", lambda: face.set_listening(), 7),
("thinking (looks away)", lambda: face.set_thinking(), 7),
("speaking (auto lip-sync)", lambda: face.set_speaking(True), 7),
("react: surprised", lambda: face.react("surprised", 2.0), 2.2),
("react: smile", lambda: face.react("smile", 2.0), 2.2),
("react: sad", lambda: face.react("sad", 2.0), 2.2),
("back to idle", lambda: face.set_idle(), 5),
]
for label, action, dur in steps:
print(" ->", label, flush=True)
action()
await asyncio.sleep(dur)
face.set_idle()
await asyncio.sleep(2)
else:
while True:
await asyncio.sleep(1)
except KeyboardInterrupt:
print("\nstopping ...")
finally:
await face.stop()
def main():
ap = argparse.ArgumentParser(description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
ap.add_argument("--demo", action="store_true", help="cycle through the states/reactions")
ap.add_argument("--reload", action="store_true", help="force re-upload of the frame set")
ap.add_argument("--address", help="mask BLE MAC")
ap.add_argument("--name-prefix", default="MASK")
ap.add_argument("--brightness", type=int, default=95)
asyncio.run(_amain(ap.parse_args()))
if __name__ == "__main__":
main()

801
vendor/Sanad/face/mask_face.py vendored Normal file
View File

@ -0,0 +1,801 @@
"""Shining LED face mask — Sanad subsystem (BLE, owns its own asyncio loop).
Wraps the standalone **Mask** project (``Project/Mask`` the flat ``shiningmask``
library: ``mask.py`` / ``faceanim.py`` / ``colorface.py`` ) as a Sanad subsystem
so the dashboard "Mask Face" tab can drive the robot's animated LED face.
Why a dedicated loop: the mask talks BLE (bleak/BlueZ) and ``FaceAnimator`` runs a
persistent asyncio task, so this controller owns a background daemon thread with
its own event loop. Route handlers call the plain SYNC methods here (themselves
wrapped in ``asyncio.to_thread`` by FastAPI); each marshals a coroutine onto that
loop via ``run_coroutine_threadsafe``.
The Mask project is a flat set of top-level modules (not an installed package), so
it is imported by inserting its directory on ``sys.path``. Default location is the
sibling ``<Project>/Mask``; override with ``SANAD_MASK_DIR`` or
``config/mask_config.json``. Needs an env with ``bleak`` + ``Pillow`` (g1_env). If
those are missing the subsystem still constructs but reports unavailable, and the
rest of Sanad is unaffected (the dashboard tab shows the reason).
"""
from __future__ import annotations
import asyncio
import os
import sys
import threading
from pathlib import Path
from typing import Optional, Sequence, Tuple
from Project.Sanad.config import BASE_DIR
from Project.Sanad.core import config_loader
from Project.Sanad.core.logger import get_logger
log = get_logger("mask_face")
Color = Tuple[int, int, int]
# Named frames provided by colorface.default_frames() (FaceAnimator slots).
EXPRESSIONS = ("neutral", "smile", "blink", "look_left", "look_right",
"talk1", "talk2", "talk3", "surprised", "sad", "wink", "angry",
"heart", "laugh", "love", "cool", "confused", "kiss", "thumbs_up")
# Default face colors (match colorface.DEFAULT_EYE / DEFAULT_MOUTH).
DEFAULT_EYE_COLOR: Color = (0, 230, 255) # cyan
DEFAULT_MOUTH_COLOR: Color = (255, 50, 50) # red
DEFAULT_SCLERA_COLOR: Color = (255, 255, 255) # white of the eye
def _parse_color(value, default: Color) -> Color:
"""Coerce a config/API color (``[r,g,b]``, ``(r,g,b)``, or ``"#rrggbb"``) to a
clamped RGB tuple; fall back to ``default`` on anything unusable."""
if value in (None, ""):
return tuple(default)
try:
if isinstance(value, str):
h = value.strip().lstrip("#")
if len(h) == 3:
h = "".join(c * 2 for c in h)
value = (int(h[0:2], 16), int(h[2:4], 16), int(h[4:6], 16))
r, g, b = (int(value[0]), int(value[1]), int(value[2]))
return (max(0, min(255, r)), max(0, min(255, g)), max(0, min(255, b)))
except Exception:
return tuple(default)
class FaceController:
"""Owns the BLE mask connection + FaceAnimator on a private event loop."""
def __init__(self):
cfg = config_loader.load("mask")
def _cfg(key, default):
v = cfg.get(key, default)
return v if v not in (None, "") else default
mask_dir = os.environ.get("SANAD_MASK_DIR") or _cfg("mask_dir", "")
if not mask_dir:
# Default: the sibling Mask project (…/Project/Mask).
mask_dir = str(Path(BASE_DIR).parent / "Mask")
self.mask_dir = mask_dir
self.address = (os.environ.get("SANAD_MASK_ADDRESS") or _cfg("address", "")) or None
self.name_prefix = os.environ.get("SANAD_MASK_NAME_PREFIX") or _cfg("name_prefix", "MASK")
self.adapter = (os.environ.get("SANAD_MASK_ADAPTER") or _cfg("adapter", "")) or None
self.brightness = int(_cfg("brightness", 95))
self.fps = float(_cfg("fps", 8.0))
self.connect_timeout = float(_cfg("connect_timeout", 15.0))
self.connect_attempts = int(_cfg("connect_attempts", 5))
# Use the lifelike motion driver (saccades, varied blinks, states,
# reactions, smooth lip-sync). Falls back to the basic FaceAnimator if
# the lifelike module is unavailable or this is set false.
self.lifelike = bool(_cfg("lifelike", True))
self._face_kind = None
self._hide_mouth = bool(_cfg("hide_mouth", False)) # eyes-only face toggle
# Gemini<->mask link. Default OFF: the mask does NOT auto-connect (no BLE
# churn) and Gemini's emotion/social markers are ignored. Turned on from
# the dashboard, it connects the mask + lets Gemini drive it.
self._gemini_linked = bool(_cfg("gemini_linked", False))
# Auto-connect + start the animated face on boot (best-effort, in the
# background so it never blocks startup). After the one-time frame
# upload, later boots just connect + animate (no upload).
self.autostart = bool(_cfg("autostart", True))
# Face colors (baked into the uploaded DIY frames). Stored as RGB lists in
# config; changing them re-uploads the frame set (face_start reload).
self.eye_color = _parse_color(_cfg("eye_color", None), DEFAULT_EYE_COLOR)
self.mouth_color = _parse_color(_cfg("mouth_color", None), DEFAULT_MOUTH_COLOR)
self.sclera_color = _parse_color(_cfg("sclera_color", None), DEFAULT_SCLERA_COLOR)
# runtime state
self._mask = None # shiningmask.ShiningMask
self._face = None # faceanim.FaceAnimator
self._lib: Optional[dict] = None
self._lib_failed = False
self._connecting = False
self._face_running = False
self._speaking = False
self._mouth: Optional[int] = None
self._last_error: Optional[str] = None
self._op_lock = threading.Lock()
# Desired-state intents the reconnect supervisor enforces:
# _want_connected — we want a live BLE link (set on connect/autostart,
# cleared on a *user* disconnect). While true, the
# supervisor keeps (re)connecting through drops / weak
# signal until it succeeds.
# _face_desired — the animated face should be running (set on
# face_start, cleared on face_stop / static overrides
# like text/image). After a reconnect the supervisor
# restarts the face iff this is true.
self._want_connected = False
self._face_desired = False
self._reconnecting = False
# dedicated event loop in a background daemon thread (idle until used)
self._loop = asyncio.new_event_loop()
self._thread = threading.Thread(target=self._run_loop, daemon=True,
name="mask-face-loop")
self._thread.start()
log.info("FaceController ready (mask_dir=%s, name_prefix=%s, address=%s)",
self.mask_dir, self.name_prefix, self.address or "scan")
# Persistent reconnect supervisor: self-heals dropped/weak links and even
# establishes the FIRST connection once the mask comes into range, without
# the user babysitting the Connect button.
threading.Thread(target=self._supervisor, daemon=True,
name="mask-supervisor").start()
# Only auto-connect on boot if Gemini is linked (default off -> the mask
# stays disconnected + silent until the user links it from the dashboard).
if self.autostart and self._gemini_linked:
threading.Thread(target=self._autostart, daemon=True,
name="mask-autostart").start()
def _clear_stale_mask_links(self) -> None:
"""Drop any BlueZ-level connection to a MASK device left over from a
previous process, BEFORE the first connect.
A hard service restart leaves the old link half-open: BlueZ still
reports the mask "connected" so it stops advertising, our scan can't
find it, and the fresh connect churns with 'Software caused connection
abort' / 'failed to discover services' for minutes before BlueZ times
the stale link out flashing the mask's built-in face the whole time
(this, not WiFi/coexistence, is what makes the weird face appear on a
restart/boot). Disconnecting it first lets the mask advertise again, so
the new connect is clean and immediate. Fully guarded + bounded a
missing bluetoothctl or any error is a no-op, never blocking startup."""
try:
import subprocess as _sp
import time as _time
out = _sp.run(["bluetoothctl", "devices"], capture_output=True,
text=True, timeout=5).stdout or ""
prefix = (self.name_prefix or "MASK").upper()
cleared = False
for line in out.splitlines():
parts = line.split()
# "Device C3:8A:9B:05:B4:C9 MASK-05B4C9"
if (len(parts) >= 3 and parts[0] == "Device"
and parts[2].upper().startswith(prefix)):
addr = parts[1]
_sp.run(["bluetoothctl", "disconnect", addr],
capture_output=True, text=True, timeout=8)
log.info("cleared stale BlueZ link to %s (%s) before first connect",
parts[2], addr)
cleared = True
if cleared:
_time.sleep(1.5) # let the mask resume advertising before we scan
except Exception as exc:
log.debug("stale mask-link cleanup skipped: %s", exc)
def _autostart(self):
"""Best-effort connect + start the face on boot (runs on its own thread,
so a missing/asleep mask never blocks or breaks Sanad startup). Declares
the connect+face *intent* up front, so even if the mask is off / out of
range at boot, the reconnect supervisor keeps trying and brings the face
up on its own once the mask appears no dashboard babysitting."""
import time as _time
_time.sleep(4.0) # let the rest of Sanad finish booting first
if not self.lib_available:
log.warning("mask autostart skipped — Mask lib unavailable "
"(need bleak + Pillow in this conda env)")
return
self._want_connected = True
self._face_desired = True
self._clear_stale_mask_links() # drop any half-open link from a prior process
try:
self.connect()
except Exception as exc:
log.warning("mask autostart: connect failed (%s) — the supervisor will "
"keep retrying; or connect from the dashboard", exc)
return
try:
self.face_start(reload=False)
log.info("mask autostart: animated face running (driver=%s)", self._face_kind)
except Exception:
log.exception("mask autostart: face_start failed")
def _supervisor(self):
"""Background daemon that enforces the connect/face *intents*.
While ``_want_connected`` is set it keeps (re)establishing the BLE link
through drops and weak-signal scan misses; once connected, if the face is
desired but not running (e.g. after a reconnect) it restarts it. A user
Disconnect clears the intent so this stops fighting a deliberate
disconnect. Each attempt reuses the normal serialized connect()/
face_start() paths, so there are no new locking hazards only retries."""
import time as _time
backoff = 3.0
while True:
_time.sleep(backoff)
try:
if not self._want_connected or self._connecting:
backoff = 3.0
continue
if self.is_connected:
backoff = 3.0
# Link is up — restore the face if it's wanted but stopped
# (e.g. the face loop bailed on a drop the supervisor healed).
if self._face_desired and not self._face_running:
try:
self.face_start(reload=False)
log.info("mask supervisor: face restored")
except Exception as exc:
log.debug("mask supervisor: face restore failed (%s)", exc)
backoff = 5.0
continue
# Want a link but don't have one -> reconnect (short, then loop).
self._reconnecting = True
try:
self.connect(timeout=12.0, attempts=2)
log.info("mask supervisor: link (re)established")
if self._face_desired:
self.face_start(reload=False)
backoff = 3.0
except Exception as exc:
# Keep trying with a gentle backoff (weak signal / mask off).
log.debug("mask supervisor: reconnect attempt failed (%s)", exc)
# 'Software caused connection abort' / 'device disconnected'
# is usually a half-open BlueZ link from the drop: the mask
# still shows "connected" so it stops advertising and the next
# scan can't find it. Clearing it lets the mask re-advertise.
m = str(exc).lower()
if any(s in m for s in ("abort", "disconnect", "not connected",
"discover services")):
try:
self._clear_stale_mask_links()
except Exception:
pass
backoff = min(backoff * 1.5, 20.0)
finally:
self._reconnecting = False
except Exception:
log.exception("mask supervisor loop error")
backoff = 5.0
# -- loop plumbing --------------------------------------------------------
def _run_loop(self):
asyncio.set_event_loop(self._loop)
self._loop.run_forever()
def _submit(self, coro, timeout: float = 30.0):
"""Run a coroutine on the mask loop from a caller thread, blocking."""
fut = asyncio.run_coroutine_threadsafe(coro, self._loop)
return fut.result(timeout=timeout)
# -- lazy import of the flat Mask library ---------------------------------
def _ensure_lib(self) -> dict:
if self._lib is not None:
return self._lib
if self._lib_failed:
raise RuntimeError(self._last_error or "mask library unavailable")
if self.mask_dir and self.mask_dir not in sys.path:
sys.path.insert(0, self.mask_dir)
try:
import mask as _mask
import faceanim as _faceanim
import colorface as _colorface
import constants as _constants
except Exception as exc:
self._lib_failed = True
self._last_error = f"mask library import failed: {exc}"
log.exception("Mask library import failed (dir=%s) — is bleak/Pillow "
"installed (g1_env)?", self.mask_dir)
raise RuntimeError(self._last_error)
try:
from Project.Sanad.face.face_motion import LifelikeFace as _LifelikeFace
except Exception:
_LifelikeFace = None
log.warning("LifelikeFace unavailable — falling back to FaceAnimator")
self._lib = {
"ShiningMask": _mask.ShiningMask,
"FaceAnimator": _faceanim.FaceAnimator,
"LifelikeFace": _LifelikeFace,
"colorface": _colorface,
"TextMode": _constants.TextMode,
}
log.info("Mask library imported from %s", self.mask_dir)
return self._lib
@property
def lib_available(self) -> bool:
if self._lib is not None:
return True
if self._lib_failed:
return False
try:
self._ensure_lib()
return True
except Exception:
return False
@property
def is_connected(self) -> bool:
return bool(self._mask is not None and getattr(self._mask, "is_connected", False))
def _require_connected(self):
if not self.is_connected:
raise RuntimeError("mask not connected")
# -- status ---------------------------------------------------------------
def status(self) -> dict:
return {
"lib_available": self.lib_available,
"connected": self.is_connected,
"connecting": self._connecting,
"reconnecting": self._reconnecting and not self.is_connected,
"want_connected": self._want_connected,
"face_running": self._face_running and self.is_connected,
"face_desired": self._face_desired,
"driver": self._face_kind,
"lifelike": self.lifelike,
"autostart": self.autostart,
"gemini_linked": self._gemini_linked,
"hide_mouth": self._hide_mouth,
"speaking": self._speaking,
"mouth": self._mouth,
"brightness": self.brightness,
"eye_color": list(self.eye_color),
"mouth_color": list(self.mouth_color),
"sclera_color": list(self.sclera_color),
"fps": self.fps,
"address": self.address,
"name_prefix": self.name_prefix,
"adapter": self.adapter,
"mask_dir": self.mask_dir,
"expressions": list(EXPRESSIONS),
"last_error": self._last_error,
}
# -- connection -----------------------------------------------------------
def connect(self, timeout: Optional[float] = None, attempts: Optional[int] = None) -> dict:
# Serialize the whole connect under _op_lock so it can't interleave with a
# concurrent disconnect()/face_start() swapping self._mask underneath, and
# so _connecting is set and cleared under the same lock (status() reads it).
with self._op_lock:
if self._connecting:
raise RuntimeError("a connect is already in progress")
self._connecting = True
to = float(timeout) if timeout else self.connect_timeout
at = int(attempts) if attempts else self.connect_attempts
self._last_error = None
try:
self._ensure_lib()
self._submit(self._aconnect(to, at), timeout=to * at + 15.0)
self._want_connected = True # intent: supervisor keeps it alive
except Exception as exc:
self._last_error = str(exc)
raise RuntimeError(str(exc))
finally:
self._connecting = False
return self.status()
async def _aconnect(self, timeout: float, attempts: int):
if self.is_connected:
return
lib = self._lib
# Tear down any stale mask from a previous (now-dropped) session BEFORE
# building a fresh one. A reconnect after a silent BLE drop leaves the old
# BleakClient holding a half-open BlueZ connection to the SAME device; if
# we just overwrite self._mask the old client is never disconnected at the
# BlueZ level, the OS keeps the device "connected", and the new
# BleakClient.connect() to that address hangs/refuses. Disconnect (and
# drop) the old client first so the fresh connect starts from a clean
# BlueZ state.
old = self._mask
self._mask = None
if old is not None:
try:
await old.disconnect()
except Exception:
log.exception("stale mask.disconnect() before reconnect failed")
self._mask = lib["ShiningMask"](
address=self.address, name_prefix=self.name_prefix, adapter=self.adapter)
await self._mask.connect(timeout=timeout, attempts=attempts)
def disconnect(self) -> dict:
# Clear the intents FIRST (before the lock) so the supervisor won't race
# to re-establish a link the user is deliberately tearing down.
self._want_connected = False
self._face_desired = False
with self._op_lock:
self._stop_face()
if self._mask is not None:
try:
self._submit(self._mask.disconnect(), timeout=10.0)
except Exception:
log.exception("mask.disconnect() failed")
return self.status()
def set_gemini_linked(self, on: bool) -> dict:
"""Link / unlink Gemini <-> the mask.
ON -> declare intent to hold the BLE link + run the face; the supervisor
connects (and self-heals) in the background, and Gemini's emotion /
social markers are relayed to the mask.
OFF -> tear the link down + clear the intent, so the mask stops any BLE
churn and Gemini's markers are ignored (the voice is unaffected).
Default is OFF: the mask stays silent + disconnected until the user
links it from the dashboard."""
on = bool(on)
self._gemini_linked = on
if not on:
# Deliberate teardown; disconnect() also clears _want_connected /
# _face_desired so the supervisor stops trying to reconnect.
self.disconnect()
return {"ok": True, "linked": False, "connected": self.is_connected}
# Linking: declare intent, then one quick connect attempt so the common
# "mask nearby" case comes up immediately; the supervisor keeps retrying
# (weak signal / mask still off) so we never block the caller for long.
self._want_connected = True
self._face_desired = True
if not self.is_connected and not self._connecting:
try:
self._clear_stale_mask_links()
except Exception:
pass
try:
self.connect(timeout=10.0, attempts=1)
self.face_start(reload=False)
except Exception as exc:
log.info("link-on: mask not up yet, supervisor will retry (%s)", exc)
elif self.is_connected and not self._face_running:
try:
self.face_start(reload=False)
except Exception:
pass
return {"ok": True, "linked": True, "connected": self.is_connected}
# -- simple commands ------------------------------------------------------
def set_brightness(self, level: int) -> dict:
# Hardware range is 0-128 (config/mask_config.json: "0-128. Keep <=100 to
# avoid LED flicker"); reject/clamp values above the panel's real maximum
# rather than forwarding 129-255 to the mask.
level = max(0, min(128, int(level)))
with self._op_lock:
self._require_connected()
self._submit(self._mask.set_brightness(level))
self.brightness = level
face = self._face
if face is not None:
face.brightness = level
return {"ok": True, "brightness": level}
def set_text(self, text: str, color: Color = (255, 255, 255),
mode: Optional[int] = None, bg: Optional[Color] = None,
speed: Optional[int] = None) -> dict:
with self._op_lock:
self._require_connected()
self._face_desired = False # static override — don't auto-restart the face
self._stop_face() # static text can't share the panel with the animator
tm = self._lib["TextMode"]
m = int(mode) if mode is not None else tm.SCROLL_LEFT
kw = {}
if speed is not None:
kw["speed"] = max(0, min(255, int(speed)))
self._submit(self._mask.set_text(str(text), color=tuple(color), mode=m, **kw),
timeout=20.0)
if bg is not None:
# Apply a custom background AFTER set_text (which forces black by default).
self._submit(self._mask.set_background_color(*tuple(bg)), timeout=10.0)
return {"ok": True}
def show_image(self, image_id: int) -> dict:
with self._op_lock:
self._require_connected()
self._face_desired = False # static override
self._stop_face()
self._submit(self._mask.show_image(int(image_id)))
return {"ok": True, "image_id": int(image_id)}
def play_animation(self, anim_id: int) -> dict:
with self._op_lock:
self._require_connected()
self._face_desired = False # static override
self._stop_face()
self._submit(self._mask.play_animation(int(anim_id)))
return {"ok": True, "anim_id": int(anim_id)}
def clear_diy(self) -> dict:
with self._op_lock:
self._require_connected()
self._stop_face() # stop the loop before deleting the frames it plays
removed = self._submit(self._mask.clear_diy(), timeout=30.0)
return {"ok": True, "removed": int(removed or 0)}
# -- animated face --------------------------------------------------------
def _stop_face(self):
"""Cancel the animator loop (if any) and reset face state. Idempotent.
Lock-free internal: callers MUST hold ``self._op_lock`` (it mutates the
shared self._face / self._face_running state that the serialized mask
operations and the event-bus callbacks both touch)."""
if self._face is not None:
try:
self._submit(self._face.stop(), timeout=10.0)
except Exception:
log.exception("face.stop() failed")
self._face = None
self._face_running = False
self._speaking = False
self._mouth = None
def face_start(self, reload: bool = False) -> dict:
with self._op_lock:
self._require_connected()
# Always tear down any existing loop first so a second Start (or
# Reload) never leaves two animator tasks fighting over the display.
# Serialized under _op_lock so two concurrent Start presses can't both
# build an animator and race self._face.
self._stop_face()
cf = self._lib["colorface"]
Lifelike = self._lib.get("LifelikeFace") if self.lifelike else None
if Lifelike is not None:
# Rich driver: eye saccades, varied blinks, states, reactions,
# smooth lip-sync. Runs its own loop on this controller's BLE loop.
# auto_reconnect=False -> the controller's supervisor owns recovery.
self._face = Lifelike(mask=self._mask, brightness=self.brightness,
eye_color=self.eye_color, mouth_color=self.mouth_color,
sclera_color=self.sclera_color, auto_reconnect=False,
hide_mouth=self._hide_mouth)
self._face_kind = "lifelike"
else:
self._face = self._lib["FaceAnimator"](
self._mask, fps=self.fps, brightness=self.brightness,
frames=cf.default_frames(eye_color=self.eye_color,
mouth_color=self.mouth_color,
sclera_color=self.sclera_color))
self._face_kind = "faceanim"
# First upload of the frame set can take ~30-90s (acked writes); later
# starts skip it (frames persist on the mask's flash).
self._submit(self._face.start(reload=bool(reload)), timeout=240.0)
self._face_running = True
self._face_desired = True # intent: supervisor restores it after a drop
self._want_connected = True
return {"ok": True, "reloaded": bool(reload), "driver": self._face_kind}
def face_stop(self) -> dict:
with self._op_lock:
self._face_desired = False # user stopped it — don't auto-restart
self._stop_face()
return {"ok": True}
def return_face(self) -> dict:
"""Resume the live animated face (e.g. after a text/image/anim override)."""
self._face_desired = True
return self.face_start(reload=False)
def set_face_color(self, eye=None, mouth=None, sclera=None) -> dict:
"""Recolor the animated face. Colors are baked into the uploaded DIY
frames, so this stores them (persisted to config) and if the face is
running re-uploads the frame set in the new colors (~30-90s)."""
if eye is not None:
self.eye_color = _parse_color(eye, self.eye_color)
if mouth is not None:
self.mouth_color = _parse_color(mouth, self.mouth_color)
if sclera is not None:
self.sclera_color = _parse_color(sclera, self.sclera_color)
self._save_colors()
reuploaded = False
if self.is_connected and self._face_desired:
self.face_start(reload=True) # rebuild frames in the new colors
reuploaded = True
return {"ok": True, "reuploaded": reuploaded,
"eye_color": list(self.eye_color),
"mouth_color": list(self.mouth_color),
"sclera_color": list(self.sclera_color)}
def _save_colors(self):
"""Persist the chosen face colors to config/mask_config.json (best-effort,
so they survive restarts and drive autostart). Never raises."""
try:
import json
path = Path(BASE_DIR) / "config" / "mask_config.json"
data = json.loads(path.read_text()) if path.exists() else {}
data["eye_color"] = list(self.eye_color)
data["mouth_color"] = list(self.mouth_color)
data["sclera_color"] = list(self.sclera_color)
path.write_text(json.dumps(data, indent=2))
except Exception:
log.exception("could not persist mask face colors (kept in-memory)")
# -- lifelike states + reactions (no-ops on the basic FaceAnimator) --------
def _face_state(self, state: str) -> dict:
# Snapshot the face reference once: face_start/_stop_face (under _op_lock)
# can swap self._face to None concurrently, and these state setters fire
# from the event-bus worker threads. A local snapshot avoids a torn read
# (AttributeError) without blocking on a long face_start upload.
face = self._face
fn = getattr(face, "set_" + state, None) if face is not None else None
if callable(fn):
try:
fn()
except Exception:
log.exception("face.set_%s failed", state)
return {"ok": True, "state": state}
def set_listening(self) -> dict:
return self._face_state("listening")
def set_thinking(self) -> dict:
return self._face_state("thinking")
def set_idle(self) -> dict:
return self._face_state("idle")
def react(self, emotion: str, hold: float = 1.4) -> dict:
"""Brief reaction (surprised / smile / sad). No-op if unsupported."""
face = self._face # snapshot: face_start/_stop_face may swap it concurrently
if face is not None and hasattr(face, "react"):
try:
face.react(str(emotion), float(hold))
except Exception:
log.exception("face.react failed")
return {"ok": True, "react": emotion}
def set_speaking(self, on: bool) -> dict:
"""Animate the mouth while speaking. Safe no-op if the face isn't running.
Also called from the event bus (brain.gestural_speaking_changed)."""
on = bool(on)
self._speaking = on
self._mouth = None
face = self._face # snapshot: avoid a torn read vs a concurrent _stop_face
if face is not None:
try:
face.set_speaking(on)
except Exception:
log.exception("face.set_speaking() failed")
return {"ok": True, "speaking": on}
def set_mouth(self, level: int) -> dict:
level = max(0, min(3, int(level)))
self._mouth = level
self._speaking = False
# Fired from the Gemini reader thread at lip-sync rate; snapshot the face
# so a concurrent face_start/_stop_face swap can't NoneType-deref here.
face = self._face
if face is not None:
try:
face.set_mouth(level)
except Exception:
log.exception("face.set_mouth() failed")
return {"ok": True, "mouth": level}
def show_expression(self, name: str) -> dict:
with self._op_lock:
self._require_connected()
face = self._face
if face is None:
raise RuntimeError("face animation not started")
self._submit(face.show(str(name)), timeout=10.0)
return {"ok": True, "expression": name}
def show_scratch_image(self, data: bytes, timeout: float = 90.0) -> dict:
"""Upload raw 46x58 image bytes to the mask's reserved scratch DIY slot
and hold it on the face (a QR / social / custom image) until the face is
resumed with set_expression(None). Uses the reliable acked image upload."""
with self._op_lock:
self._require_connected()
face = self._face
if face is None:
raise RuntimeError("face animation not started")
slot = int(getattr(face, "scratch_slot", 20))
# Pause the animation loop so its play_diy traffic doesn't disturb the
# acked upload's per-packet REOK acks (else NotificationTimeout). Wait
# for the loop to actually park before uploading (not a fixed sleep).
paused = hasattr(face, "pause")
if paused:
face.pause()
if hasattr(face, "wait_paused"):
face.wait_paused(2.0)
else:
import time as _t
_t.sleep(0.35)
try:
self._submit(self._mask.upload_image(bytes(data), slot, timeout=15.0),
timeout=timeout)
# Register "_scratch" so set_expression holds it on EITHER driver:
# LifelikeFace.set_expression checks .slots, FaceAnimator checks
# .frames — populate both so the fallback driver holds it too.
if hasattr(face, "slots"):
face.slots["_scratch"] = slot
frames = getattr(face, "frames", None)
if isinstance(frames, dict) and "_scratch" not in frames:
frames["_scratch"] = b""
if hasattr(face, "set_expression"):
face.set_expression("_scratch")
finally:
if paused:
face.resume() # loop resumes + holds the "_scratch" frame
return {"ok": True, "slot": slot}
def set_mouth_hidden(self, hidden: bool) -> dict:
"""Show/hide the mouth on the animated face. Re-uploads just the 7 gaze/
talk slots (masked eyes-only, or normal) pausing the loop so the acked
upload isn't disturbed. Persists for future face starts this session."""
hidden = bool(hidden)
with self._op_lock:
self._hide_mouth = hidden
face = self._face
if (face is None or not self.is_connected
or not hasattr(face, "mouth_frames_for")):
return {"ok": True, "hidden": hidden,
"note": "applies when the face is running"}
frames = face.mouth_frames_for(hidden)
paused = hasattr(face, "pause")
if paused:
face.pause()
if hasattr(face, "wait_paused"):
face.wait_paused(2.0)
try:
for name, data in frames.items():
slot = face.slots.get(name) if hasattr(face, "slots") else None
if slot:
self._submit(self._mask.upload_image(bytes(data), int(slot),
timeout=15.0), timeout=90.0)
if hasattr(face, "frames"):
face.frames[name] = data
if hasattr(face, "hide_mouth"):
face.hide_mouth = hidden
if hasattr(face, "_cur"):
face._cur = None # force a redraw with the new frame
finally:
if paused:
face.resume()
return {"ok": True, "hidden": hidden}
def set_expression(self, name: Optional[str]) -> dict:
"""Hold an expression over the animation (None resumes idle/talk).
Unlike show_expression (a one-off), this pins the frame until cleared
e.g. 'surprised' on a reaction, 'sad' on an error. Safe no-op if the face
isn't running."""
face = self._face # snapshot: face_start/_stop_face may swap it concurrently
if face is not None:
try:
face.set_expression(name if name else None)
except Exception:
log.exception("face.set_expression() failed")
return {"ok": True, "expression": name}
# -- lifecycle ------------------------------------------------------------
def shutdown(self):
"""Disconnect the mask and stop the background loop (idempotent)."""
try:
self.disconnect()
except Exception:
log.exception("mask disconnect on shutdown failed")
try:
self._loop.call_soon_threadsafe(self._loop.stop)
except Exception:
pass

0
vendor/Sanad/gemini/__init__.py vendored Normal file
View File

357
vendor/Sanad/gemini/client.py vendored Normal file
View File

@ -0,0 +1,357 @@
"""Gemini WebSocket client for real-time voice interaction.
Provides:
- Bidirectional audio streaming (mic Gemini speaker)
- Text-to-speech via typed input
- Voice-command detection through transcription parsing
- System instruction injection for persona control
"""
from __future__ import annotations
import asyncio
import base64
import inspect
import json
from typing import Any
import websockets
from Project.Sanad.config import (
GEMINI_API_KEY,
GEMINI_MODEL,
GEMINI_VOICE,
GEMINI_WS_TIMEOUT,
GEMINI_WS_URI,
)
from Project.Sanad.core.config_loader import section as _cfg_section
from Project.Sanad.core.event_bus import bus
from Project.Sanad.core.logger import get_logger
log = get_logger("gemini_client")
_GC = _cfg_section("gemini", "client")
# Default system prompt — SINGLE SOURCE in core.gemini_defaults
_DEFAULT_SYSTEM_PROMPT = _cfg_section("core", "gemini_defaults").get(
"default_system_prompt",
"You are Sanad (Bousandah), a wise and friendly Emirati assistant. "
"Speak in UAE dialect (Khaleeji). Be helpful and concise."
)
# TTS / typed-replay system prompt. The voice_client speaks TYPED text (typed
# replay + /api/voice/generate), so it must read the text VERBATIM in its OWN
# language — NOT answer it and NOT force Khaleeji (the default persona does the
# latter, which made English/Urdu/Indonesian lines come out in Arabic).
TTS_SYSTEM_PROMPT = _cfg_section("core", "gemini_defaults").get(
"tts_system_prompt",
"You are a pure multilingual text-to-speech voice. The instant the user "
"sends text, speak it aloud word for word in the SAME language it is "
"written in, then stop. Output ONLY that spoken audio — no thinking, no "
"commentary, no acknowledgements, no headers, no explanations, no "
"greetings, no extra words. Never translate and never change the language: "
"English stays English, Arabic stays Arabic, Urdu stays Urdu, Indonesian "
"stays Indonesian. Your speech must be identical to the user's text, "
"nothing more and nothing less."
)
_RECV_TIMEOUT_SEC = _GC.get("recv_timeout_sec", 30)
_RECONNECT_MAX_ATTEMPTS = _GC.get("reconnect_max_attempts", 3)
_RECONNECT_INITIAL_DELAY_SEC = _GC.get("reconnect_initial_delay_sec", 1.0)
_RECONNECT_MAX_DELAY_SEC = _GC.get("reconnect_max_delay_sec", 10.0)
class GeminiVoiceClient:
"""Manages one WebSocket session to the Gemini Bidi audio API.
Concurrency model:
- `_send_lock` serializes ALL websocket writes.
- `_session_lock` ensures only one consumer (live loop OR typed replay)
owns the receive stream at a time. Acquired by send_text and
receive_stream context managers.
- `_owner` records who currently holds the session lock for diagnostics.
"""
def __init__(self, system_prompt: str = ""):
self.system_prompt = system_prompt or _DEFAULT_SYSTEM_PROMPT
self._ws: Any = None
self._connected = False
self._send_lock = asyncio.Lock()
self._session_lock = asyncio.Lock()
self._connect_lock = asyncio.Lock() # serializes reconnect attempts
self._owner: str | None = None
self._reconnect_attempts = 0
@property
def connected(self) -> bool:
return self._connected
@property
def session_owner(self) -> str | None:
return self._owner
def _ws_kwargs(self) -> dict[str, Any]:
kwargs: dict[str, Any] = {"max_size": None, "open_timeout": 30}
try:
sig = inspect.signature(websockets.connect)
key = "extra_headers" if "extra_headers" in sig.parameters else "additional_headers"
except Exception:
key = "extra_headers"
kwargs[key] = {"Content-Type": "application/json"}
return kwargs
async def connect(self):
uri = f"{GEMINI_WS_URI}?key={GEMINI_API_KEY}"
try:
self._ws = await websockets.connect(uri, **self._ws_kwargs())
setup = {
"setup": {
"model": GEMINI_MODEL,
"generationConfig": {
"responseModalities": ["AUDIO"],
"speechConfig": {
"voiceConfig": {
"prebuiltVoiceConfig": {"voiceName": GEMINI_VOICE}
}
},
},
"systemInstruction": {"parts": [{"text": self.system_prompt}]},
}
}
await self._ws.send(json.dumps(setup))
await self._ws.recv() # ACK
self._connected = True
self._reconnect_attempts = 0
log.info("Connected to Gemini (%s)", GEMINI_MODEL)
await bus.emit("voice.connected")
except Exception:
self._connected = False
self._ws = None
log.exception("Failed to connect to Gemini")
raise
async def disconnect(self):
try:
if self._ws is not None:
await self._ws.close()
except Exception:
pass
finally:
self._ws = None
self._connected = False
self._owner = None
log.info("Disconnected from Gemini")
await bus.emit("voice.disconnected")
async def _ensure_connected(self):
"""Reconnect if dropped, with bounded retries.
Serialized via _connect_lock so concurrent callers don't trigger
duplicate handshakes.
"""
# Fast path — no lock needed
if self._connected and self._ws is not None:
return True
async with self._connect_lock:
# Re-check inside the lock (another coroutine may have just connected)
if self._connected and self._ws is not None:
return True
max_attempts = _RECONNECT_MAX_ATTEMPTS
delay = _RECONNECT_INITIAL_DELAY_SEC
for attempt in range(max_attempts):
try:
log.warning("Reconnecting to Gemini (attempt %d/%d)", attempt + 1, max_attempts)
await self.connect()
return True
except Exception:
self._reconnect_attempts += 1
await asyncio.sleep(delay)
delay = min(delay * 2, _RECONNECT_MAX_DELAY_SEC)
log.error("Reconnect failed after %d attempts", max_attempts)
await bus.emit("voice.error", reason="reconnect_failed")
return False
async def send_audio_chunk(self, pcm_b64: str) -> bool:
"""Send a base64-encoded PCM audio chunk (mic input).
Returns False on failure so the caller can react instead of silently
no-op'ing forever (the original bug).
"""
if not self._connected or self._ws is None:
return False
msg = {
"realtimeInput": {
"mediaChunks": [
{"mimeType": "audio/pcm;rate=16000", "data": pcm_b64}
]
}
}
try:
async with self._send_lock:
await self._ws.send(json.dumps(msg))
return True
except websockets.exceptions.ConnectionClosed:
log.warning("send_audio_chunk: connection closed")
self._connected = False
await bus.emit("voice.error", reason="connection_closed")
return False
except Exception:
log.exception("send_audio_chunk failed")
return False
async def send_text(self, text: str, owner: str = "send_text") -> tuple[bytes, list[str]]:
"""Send text, receive audio response. Returns (audio_bytes, text_parts).
Acquires the session lock for the entire request/response cycle so
no other consumer can steal frames from the receive side.
If the connection drops mid-request, reconnects once and retries.
"""
if not await self._ensure_connected():
raise RuntimeError("Not connected to Gemini and reconnect failed.")
async with self._session_lock:
self._owner = owner
try:
return await self._send_text_inner(text)
except websockets.exceptions.ConnectionClosed:
log.warning("send_text: connection died on send — reconnecting once")
self._connected = False
if not await self._ensure_connected():
raise RuntimeError("Reconnect after send failure also failed.")
return await self._send_text_inner(text)
finally:
self._owner = None
async def _send_text_inner(self, text: str) -> tuple[bytes, list[str]]:
"""Inner send/receive loop — caller must hold _session_lock."""
request = {
"client_content": {
"turns": [{"role": "user", "parts": [{"text": text}]}],
"turn_complete": True,
}
}
async with self._send_lock:
await self._ws.send(json.dumps(request))
audio_chunks: list[bytes] = []
text_parts: list[str] = []
while True:
try:
raw = await asyncio.wait_for(self._ws.recv(), timeout=GEMINI_WS_TIMEOUT)
except asyncio.TimeoutError:
log.warning("send_text: recv timed out")
break
except websockets.exceptions.ConnectionClosed:
log.warning("send_text: connection closed mid-stream")
self._connected = False
break
try:
resp = json.loads(raw)
except json.JSONDecodeError:
log.warning("send_text: bad JSON from server")
continue
if "error" in resp:
log.error("Gemini error: %s", resp["error"])
await bus.emit("voice.error", reason=str(resp["error"]))
break
sc = resp.get("serverContent", {})
mt = sc.get("modelTurn", {})
for part in mt.get("parts", []):
inline = part.get("inlineData")
if inline and inline.get("data"):
audio_chunks.append(base64.b64decode(inline["data"]))
tp = part.get("text")
if isinstance(tp, str) and tp.strip():
text_parts.append(tp.strip())
input_tr = sc.get("inputTranscription", {})
if input_tr.get("text"):
await bus.emit("voice.user_said", text=input_tr["text"])
if sc.get("turnComplete") or sc.get("generationComplete"):
break
audio_bytes = b"".join(audio_chunks)
if audio_bytes:
await bus.emit("voice.gemini_spoke", audio_len=len(audio_bytes))
return audio_bytes, text_parts
def acquire_session(self, owner: str) -> "_SessionGuard":
"""Return an async context manager for exclusive session ownership.
Use as `async with client.acquire_session("live_voice"):`.
While held, no other consumer may call send_text or receive_stream.
"""
return _SessionGuard(self, owner)
async def receive_stream(self):
"""Yield server events. Caller MUST hold the session lock."""
if self._owner is None:
raise RuntimeError(
"receive_stream requires session lock — use acquire_session() first"
)
if not self._connected or self._ws is None:
return
try:
async for raw in self._ws:
try:
resp = json.loads(raw)
except json.JSONDecodeError:
continue
yield resp.get("serverContent", {})
except websockets.exceptions.ConnectionClosed:
log.warning("receive_stream: connection closed")
self._connected = False
await bus.emit("voice.error", reason="connection_closed")
async def raw_send(self, payload: dict):
"""Low-level send for the live loop. Always use through send lock."""
if not self._connected or self._ws is None:
return False
try:
async with self._send_lock:
await self._ws.send(json.dumps(payload))
return True
except Exception:
log.exception("raw_send failed")
return False
def status(self) -> dict[str, Any]:
return {
"connected": self._connected,
"model": GEMINI_MODEL,
"voice": GEMINI_VOICE,
"session_owner": self._owner,
"reconnect_attempts": self._reconnect_attempts,
}
class _SessionGuard:
"""Async context manager for exclusive session ownership.
Always releases owner + lock on exit, even on exceptions.
"""
def __init__(self, client: GeminiVoiceClient, owner: str):
self._client = client
self._owner = owner
self._held = False
async def __aenter__(self):
await self._client._session_lock.acquire()
self._held = True
self._client._owner = self._owner
return self._client
async def __aexit__(self, exc_type, exc, tb):
try:
self._client._owner = None
finally:
if self._held:
self._client._session_lock.release()
self._held = False
return False # don't suppress exceptions

Some files were not shown because too many files have changed in this diff Show More