commit 39be38811c65e554fcd7c345169f50c6e2eeb3be Author: kassam Date: Sat Jul 4 23:28:25 2026 +0400 Update 2026-07-04 23:28:24 diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..b55ad94 --- /dev/null +++ b/.env.example @@ -0,0 +1,40 @@ +# Copy to .env — docker compose reads it automatically from this directory. + +# Signed license for THIS robot (default = the bundled example, entitles P1+P3; +# P2 needs a license that entitles P2 — re-sign with packages.P2=true). +SANAD_LICENSE_FILE=./license/sanad.lic.example +# Enforce machine-fingerprint binding (1 on a delivered robot; also uncomment the +# /etc/machine-id mount in docker-compose.yml). +SANAD_LICENSE_BIND=0 + +# Audio: builtin (G1 chest over DDS) | plugged (USB e.g. Anker via PulseAudio) +SANAD_AUDIO_PROFILE=builtin +# DDS interface to the G1 firmware. +SANAD_DDS_INTERFACE=eth0 + +# --- Plugged/Bluetooth speaker volume (Anker/JBL) --------------------------- +# The volume slider drives ALL speaker types like SanadV3: the G1 chest over DDS +# (always) AND the active PulseAudio sink (plugged/BT). The container runs as root, +# so it needs the HOST pulse socket + cookie to reach a uid-1000 PulseAudio. +# One-time host setup (stable socket at boot, no stray dir): +# loginctl enable-linger unitree +# Override these only if your pulse runs under a different uid or cookie path: +SANAD_PULSE_DIR=/run/user/1000/pulse +PULSE_SERVER=unix:/run/user/1000/pulse/native +# Cookie location varies by distro; if plugged volume/output is silent, try +# /home//.config/pulse/cookie instead and mount it accordingly. +PULSE_COOKIE=/run/user/1000/pulse/cookie + +# Conversation language. Empty = MULTILINGUAL auto-detect (P2's headline feature); +# set e.g. ar/en only to force a single language. +SANAD_LANGUAGE= + +# LED "Shining Mask": pin its BLE MAC, else auto-discover by name prefix. +SANAD_MASK_ADDRESS= + +# Bundle the chest-audio Unitree SDK into the image (1=yes default, 0=USB-only/leaner). +WITH_UNITREE_SDK=1 +# Base image (override only for a GPU build). +BASE_OS_IMAGE=python:3.10-slim-bookworm +# Image name/tag (e.g. a registry path for pull-and-run). +# SANAD_IMAGE=sanad-p2:latest diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9343007 --- /dev/null +++ b/.gitignore @@ -0,0 +1,20 @@ +# Python caches +__pycache__/ +*.pyc + +# Logs +*.log +Logs/ + +# Customer license — NEVER commit a real signed license; ship only the example. +license/sanad.lic + +# Runtime data (keep the seed structure + config; ignore generated media). +data/recordings/* +data/audio/* +data/faces/* +data/photos/* +!data/**/.gitkeep + +# NOTE: ./vendor IS committed on purpose — the vendored SanadV3 engine + Mask lib +# that make this package build standalone. Only caches are ignored (above). diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..6f9a220 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,75 @@ +# syntax=docker/dockerfile:1 +# ───────────────────────────────────────────────────────────────────────────── +# Sanad Package 3 — Recognition + Places + Memories. SELF-CONTAINED (vendors the +# SanadV3 engine + Mask lib under ./vendor; FROM python:3.10-slim; no sanad-base). +# docker build -t sanad-p3:latest . (Jetson without buildx: DOCKER_BUILDKIT=0) +# ───────────────────────────────────────────────────────────────────────────── +ARG BASE_OS_IMAGE=python:3.10-slim-bookworm +FROM ${BASE_OS_IMAGE} + +ENV DEBIAN_FRONTEND=noninteractive PYTHONUNBUFFERED=1 PYTHONDONTWRITEBYTECODE=1 PYTHONPATH=/app +WORKDIR /app + +# System deps: audio + PortAudio/toolchain (pyaudio) + BlueZ/D-Bus (mask) + +# iproute2 (`ip`, chest-mic) + libGL/glib for opencv-headless V4L camera capture. +RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates libsndfile1 alsa-utils pulseaudio-utils iproute2 \ + portaudio19-dev libportaudio2 build-essential python3-dev \ + bluez libdbus-1-3 libglib2.0-0 libgl1 \ + && rm -rf /var/lib/apt/lists/* + +COPY requirements.txt /tmp/requirements.txt +RUN python3 -m pip install --no-cache-dir --upgrade pip \ + && python3 -m pip install --no-cache-dir -r /tmp/requirements.txt + +# Optional Unitree SDK — chest (builtin) audio over DDS (full CycloneDDS + idlc, +# pinned binding). Wrapped so a failure never breaks the image. +ARG WITH_UNITREE_SDK=1 +ENV CYCLONEDDS_HOME=/usr/local LD_LIBRARY_PATH=/usr/local/lib +RUN if [ "$WITH_UNITREE_SDK" = "1" ]; then \ + ( set -eux; apt-get update; apt-get install -y --no-install-recommends git cmake build-essential; \ + git clone --depth 1 -b releases/0.10.x https://github.com/eclipse-cyclonedds/cyclonedds /tmp/cyclonedds; \ + cmake -S /tmp/cyclonedds -B /tmp/cyclonedds/build -DCMAKE_INSTALL_PREFIX=/usr/local; \ + cmake --build /tmp/cyclonedds/build --target install -j"$(nproc)"; \ + CYCLONEDDS_HOME=/usr/local CMAKE_PREFIX_PATH=/usr/local python3 -m pip install --no-cache-dir "cyclonedds==0.10.2"; \ + git clone --depth 1 https://github.com/unitreerobotics/unitree_sdk2_python /opt/unitree_sdk2_python; \ + python3 -m pip install --no-cache-dir -e /opt/unitree_sdk2_python; \ + python3 -c "import unitree_sdk2py; print('unitree_sdk2py OK')"; \ + rm -rf /tmp/cyclonedds /var/lib/apt/lists/*; ) \ + || echo "WARN[P3]: Unitree SDK build failed — chest audio unavailable; use SANAD_AUDIO_PROFILE=plugged"; \ + else echo "WITH_UNITREE_SDK=0 — skipping Unitree SDK"; fi + +COPY vendor/sanad_pkg /app/sanad_pkg +RUN mkdir -p /etc/sanad && cp /app/sanad_pkg/pubkey.ed25519 /etc/sanad/pubkey.ed25519 +COPY vendor/mask /app/mask +COPY vendor/Sanad /app/Sanad + +# P3 launcher + package-local memory feature + convenience routes + entrypoint + config. +COPY app_p3.py /app/app_p3.py +COPY routes_p3.py /app/routes_p3.py +COPY visitor_memory.py /app/visitor_memory.py +COPY routes_memory.py /app/routes_memory.py +COPY entrypoint.sh /app/entrypoint.sh +COPY config /app/pkg3_config +RUN chmod +x /app/entrypoint.sh + +COPY strip_key.py /tmp/strip_key.py +RUN python3 /tmp/strip_key.py && rm -f /tmp/strip_key.py + +RUN python3 - <<'PY' +import importlib.util as u, sys +ok = all(u.find_spec(m) for m in ("sanad_pkg.license", "Sanad", "visitor_memory")) +sys.path.insert(0, "/app/mask") +print("P3 self-contained: modules importable:", ok, "| cv2:", u.find_spec("cv2") is not None) +sys.exit(0 if ok else 1) +PY + +ENV SANAD_PACKAGE=P3 \ + SANAD_DASHBOARD_PORT=8013 \ + SANAD_DASHBOARD_HOST=0.0.0.0 \ + SANAD_MASK_DIR=/app/mask \ + SANAD_MEMORIES_DIR=/app/Sanad/data/memories \ + SANAD_LICENSE=/etc/sanad/sanad.lic \ + SANAD_PUBKEY=/etc/sanad/pubkey.ed25519 +EXPOSE 8013 +ENTRYPOINT ["/app/entrypoint.sh"] diff --git a/README.md b/README.md new file mode 100644 index 0000000..1e0ca01 --- /dev/null +++ b/README.md @@ -0,0 +1,22 @@ +# Sanad Package 3 — Facial Recognition + Places + Memories (SCAFFOLD) + +Perception + memory only; **no motion**. Dashboard on **:8013**. License +features: `face_rec`, `places`, `memory`, `mask`. + +Planned (`sanad-recognition` container): +- **Faces / VIP DB** — `vision/face_gallery.py` (enroll/upload/capture/describe, + VIP flag). Recognition is Gemini-side in-context (primer images) — no local ML. +- **Places** — `vision/zone_gallery.py` visual place recognition (the `/go` nav + action stays disabled here; it belongs to P4). +- **Memories** — NEW persistent visitor-profile store (attributes, notes, + last-seen, linked `face_id`); feeds personalized-greeting primers to comms. +- **Face/Mask** — drives the shared `sanad-mask` for expressions on recognition. +- Consumes the `frames` topic from `Sanad_Core/camera`; emits `recognition.event` + / `place.event`. + +Depends on `Sanad_Core` (camera, shared `sanad-mask`). See the plan for details. + +**Build model (when implemented):** self-contained like P1 — a `vendor/` copy of +the Sanad engine + a standalone `Dockerfile` (`FROM python:3.10-slim`) + its own +`docker-compose.yml`, refreshed by a `sync_vendor.sh`. No `sanad-base`. (Camera +frames + shared mask are the cross-container dependencies.) diff --git a/app_p3.py b/app_p3.py new file mode 100644 index 0000000..4b4cb48 --- /dev/null +++ b/app_p3.py @@ -0,0 +1,474 @@ +#!/usr/bin/env python3 +"""Sanad Package 3 — Facial Recognition + Places + Memories launcher. + +P3 = perception + memory (NO motion): identify faces (VIP DB), recognize places +(zones), and remember visitors across visits, driving mask expressions on +recognition. Recognition is 100% Gemini-side / in-context (primer images) — no +local ML. Dashboard on :8013. + +Self-contained wrapper around the vendored SanadV3 engine (like P1/P2): + 1. bootstrap the Project.Sanad namespace + flat Mask path, + 2. construct the perception subsystems (camera, face gallery, zone gallery, + recognition state) + comms core (brain/audio/voice/live_sub) + mask, + 3. construct the NEW package-local VisitorMemory store, + 4. wire lip-sync + Gemini emotions/social + recognition-driven expressions, + 5. inject a P3-scoped Project.Sanad.main shim + a package-local memory shim, + 6. mount the recognition/places/mask/memory routers + comms subset, serve the + SanadV3 SPA with non-P3 tabs hidden, on :8013. + +Kept Python-3.8 compatible. +""" +from __future__ import annotations + +import atexit +import importlib +import os +import sys +import types +from pathlib import Path + +# ── 1. namespace bootstrap (mirrors app_p2.py) ─────────────────────────────── +_APP = Path(os.environ.get("SANAD_APP_DIR", "/app")) +if str(_APP) not in sys.path: + sys.path.insert(0, str(_APP)) +_MASK_DIR = os.environ.setdefault("SANAD_MASK_DIR", str(_APP / "mask")) +if _MASK_DIR and _MASK_DIR not in sys.path: + sys.path.insert(0, _MASK_DIR) + +if "Project" not in sys.modules: + _proj = types.ModuleType("Project") + _proj.__path__ = [] + sys.modules["Project"] = _proj +if "Project.Sanad" not in sys.modules: + _sanad = importlib.import_module("Sanad") + sys.modules["Project.Sanad"] = _sanad + sys.modules["Project"].Sanad = _sanad # type: ignore[attr-defined] + +# package-local modules (memory store + its route) live next to this file. +sys.path.insert(0, str(Path(__file__).resolve().parent)) + +from Project.Sanad.core import asyncio_compat # noqa: E402,F401 +from Project.Sanad.core.logger import get_logger # noqa: E402 + +log = get_logger("pkg3.app") + +PACKAGE = "P3" +PACKAGE_TITLE = "Sanad — Recognition + Places + Memories (P3)" +# SanadV3 SPA tab ids P3 SHOWS / HIDES. +P3_SPA_TABS = ["operations", "voice", "recognition", "mask", "recordings", "settings"] +P3_SPA_HIDE = ["motion", "controller", "navigation", "livemap", "mapeditor", + "temp", "terminal"] +# Routers P3 does NOT mount → short-circuit client-side (no "Not Found" toasts). +P3_UNMOUNTED = ["/api/nav", "/api/controller", "/api/motion", "/api/skills", + "/api/macros", "/api/replay", "/api/wake-phrases", "/api/live-voice", + "/api/scripts"] + + +def _safe(name, factory): + try: + return factory() + except Exception: + log.exception("P3: could not construct %s — degraded", name) + return None + + +# ── 2. construct the perception + comms subsystems ─────────────────────────── +def _build_singletons(): + from Project.Sanad.core.brain import Brain + from Project.Sanad.voice.audio_manager import AudioManager + from Project.Sanad.gemini.client import GeminiVoiceClient + from Project.Sanad.gemini.subprocess import GeminiSubprocess + + brain = _safe("brain", Brain) # CRITICAL — greetings ride the live session + audio_mgr = _safe("audio_mgr", AudioManager) + voice_client = _safe("voice_client", GeminiVoiceClient) + + local_tts = None + try: + from Project.Sanad.voice.local_tts import LocalTTSEngine + local_tts = _safe("local_tts", LocalTTSEngine) + except Exception: + pass + typed_replay = None + if voice_client is not None and audio_mgr is not None: + try: + from Project.Sanad.voice.typed_replay import TypedReplayEngine + typed_replay = _safe("typed_replay", lambda: TypedReplayEngine(voice_client, audio_mgr)) + except Exception: + pass + + live_sub = _safe("live_sub", lambda: GeminiSubprocess()) + + # Perception: camera daemon + face gallery + zone gallery. + camera = None + try: + from Project.Sanad.vision.camera import CameraDaemon + camera = _safe("camera", lambda: CameraDaemon()) + except Exception: + log.exception("P3: CameraDaemon import failed — vision unavailable") + gallery = None + try: + from Project.Sanad.vision.face_gallery import FaceGallery + gallery = _safe("gallery", lambda: FaceGallery()) + except Exception: + log.exception("P3: FaceGallery import failed") + zone_gallery = None + try: + from Project.Sanad.vision.zone_gallery import ZoneGallery + zone_gallery = _safe("zone_gallery", lambda: ZoneGallery()) + except Exception: + log.exception("P3: ZoneGallery import failed") + + # Mask/face — expressions on recognition. + mask_face = None + try: + from Project.Sanad.face.mask_face import FaceController + mask_face = _safe("mask_face", FaceController) + except Exception: + log.exception("P3: FaceController import failed — LED mask unavailable") + + # NEW package-local visitor memory store. + memory = None + try: + from visitor_memory import VisitorMemory + memory = _safe("memory", VisitorMemory) + except Exception: + log.exception("P3: VisitorMemory init failed") + + # attachments + for meth, val in (("attach_voice", voice_client), + ("attach_audio_manager", audio_mgr)): + if brain is not None and val is not None and hasattr(brain, meth): + try: + getattr(brain, meth)(val) + except Exception: + log.exception("brain.%s failed", meth) + if live_sub is not None: + if audio_mgr is not None and hasattr(live_sub, "attach_audio_manager"): + try: + live_sub.attach_audio_manager(audio_mgr) + except Exception: + log.exception("live_sub.attach_audio_manager failed") + if camera is not None and hasattr(live_sub, "attach_camera"): + try: + live_sub.attach_camera(camera) # frames flow to the child for recognition + except Exception: + log.exception("live_sub.attach_camera failed") + + # Boot vision-restore (guarded — never crash the container if no camera). + # The vendored recognition_state exposes read(path)/mutate(path, **) — NOT load(). + try: + from Project.Sanad.vision import recognition_state as _rs + from Project.Sanad.config import BASE_DIR as _BD + _state_path = _BD / "data" / ".recognition_state.json" + st = _rs.read(_state_path) + want_vision = bool(getattr(st, "vision_enabled", False)) + if want_vision and camera is not None and hasattr(camera, "start"): + try: + camera.start() + log.info("P3: vision restored (camera started)") + except Exception: + log.exception("P3: camera.start() failed — disabling vision, booting headless") + try: + _rs.mutate(_state_path, vision_enabled=False) + except Exception: + pass + except Exception: + log.exception("P3: recognition-state restore skipped") + + return dict(brain=brain, audio_mgr=audio_mgr, voice_client=voice_client, + local_tts=local_tts, typed_replay=typed_replay, live_sub=live_sub, + camera=camera, gallery=gallery, zone_gallery=zone_gallery, + mask_face=mask_face, memory=memory) + + +# ── 2b. mask wiring (lip-sync + emotions + social + lifelike) ──────────────── +def _wire_mask(s): + mask_face = s.get("mask_face") + live_sub = s.get("live_sub") + if mask_face is None: + return + # lip-sync + if live_sub is not None and hasattr(live_sub, "register_mouth_callback"): + try: + live_sub.register_mouth_callback(lambda lvl: getattr(mask_face, "_gemini_linked", False) and _try(mask_face.set_mouth, int(lvl))) + log.info("LED face wired to lip-sync (MOUTH)") + except Exception: + log.exception("mouth hook failed") + # emotions + if live_sub is not None and hasattr(live_sub, "register_face_callback"): + try: + _HOLD = {"heart": 2.6, "love": 2.6, "kiss": 2.4, "laugh": 2.2, + "surprised": 1.8, "confused": 1.8} + live_sub.register_face_callback(lambda n: getattr(mask_face, "_gemini_linked", False) and _try(mask_face.react, str(n), _HOLD.get(n, 1.6))) + log.info("LED face wired to emotions (FACE)") + except Exception: + log.exception("face hook failed") + # social QR (off-thread — ~9s BLE upload) + if live_sub is not None and hasattr(live_sub, "register_social_callback"): + try: + def _on_social(account): + if not getattr(mask_face, "_gemini_linked", False): + return + import threading as _th + def _run(acc=str(account)): + try: + from Project.Sanad.dashboard.routes.mask_social import show_social_on_mask + show_social_on_mask(acc) + except Exception: + log.exception("show_social_on_mask failed") + _th.Thread(target=_run, daemon=True, name="mask-social").start() + live_sub.register_social_callback(_on_social) + log.info("LED face wired to social QR (SHOW)") + except Exception: + log.exception("social hook failed") + # lifelike state + reactions (synchronous bus) + try: + from Project.Sanad.core.event_bus import bus as _bus + _bus.on("brain.gestural_speaking_changed", + lambda enabled=False, **_k: (_try(mask_face.set_speaking, bool(enabled)), + (not enabled) and _try(mask_face.set_listening))) + _bus.on("voice.connected", lambda **_k: _try(mask_face.set_listening)) + _bus.on("voice.user_said", lambda **_k: _try(mask_face.set_thinking)) + _bus.on("voice.disconnected", lambda **_k: _try(mask_face.set_idle)) + _bus.on("voice.error", lambda **_k: _try(mask_face.react, "sad")) + _bus.on("recognition.event", lambda **_k: _try(mask_face.react, "smile")) # greet on recognition + log.info("LED face wired to lifelike + recognition-greeting events") + except Exception: + log.exception("lifelike hooks failed") + + +def _try(fn, *a): + try: + return fn(*a) + except Exception: + log.exception("%s failed", getattr(fn, "__name__", "callback")) + + +def _inject_main_shim(singletons): + shim = types.ModuleType("Project.Sanad.main") + for k, v in singletons.items(): + setattr(shim, k, v) + # motion / nav subsystems P3 does NOT own — present as None (routers guard). + for k in ("arm", "wake_mgr", "macro_rec", "macro_play", "teacher", "live_voice", + "loco_controller", "movement_dispatch", "nav_client"): + if not hasattr(shim, k): + setattr(shim, k, None) + shim.SUBSYSTEMS = {k: singletons.get(k) for k in ( # type: ignore[attr-defined] + "brain", "audio_mgr", "voice_client", "local_tts", "typed_replay", "live_sub", + "camera", "gallery", "zone_gallery", "mask_face", "memory")} + sys.modules["Project.Sanad.main"] = shim + return shim + + +# ── 3. build the P3 FastAPI app ─────────────────────────────────────────────── +_P3_REST = [ + ("health", "/api", "health"), + ("system", "/api/system", "system"), + ("voice", "/api/voice", "voice"), + ("audio_control", "/api/audio", "audio"), + ("prompt", "/api/prompt", "prompt"), + ("typed_replay", "/api/typed-replay", "typed-replay"), + ("records", "/api/records", "records"), + ("logs", "/api/logs", "logs"), + ("live_subprocess", "/api/live-subprocess", "live-subprocess"), + ("recognition", "/api/recognition", "recognition"), # faces / VIP + ("zones", "/api/zones", "zones"), # places (nav /go degrades to nav_unavailable) + ("mask", "/api/mask", "mask"), + ("mask_social", "/api/mask", "mask-social"), +] +_P3_WS = ["log_stream"] + + +def _tab_filter_snippet(): + import json as _json + css = ",".join(".tab[onclick*=\"switchTab('%s')\"],#tab-%s" % (t, t) for t in P3_SPA_HIDE) + ",#status-pills" + return ( + "" + "" + % (css, _json.dumps({"name": PACKAGE, "title": PACKAGE_TITLE, "tabs": P3_SPA_TABS}), + _json.dumps(P3_UNMOUNTED)) + ) + + +def build_app(): + from fastapi import FastAPI + from fastapi.staticfiles import StaticFiles + from fastapi.responses import HTMLResponse, JSONResponse + from Project.Sanad.config import BASE_DIR + from Project.Sanad.core.config_loader import section as _cfg_section + + app_cfg = _cfg_section("dashboard", "app") + app = FastAPI(title=PACKAGE_TITLE, version="0.1.0") + loaded, failed = [], {} + + def _register(mod_name, prefix, tag, package="Project.Sanad.dashboard.routes"): + try: + mod = importlib.import_module("%s.%s" % (package, mod_name)) + if not hasattr(mod, "router"): + raise AttributeError("no 'router'") + kw = {} + if prefix: + kw["prefix"] = prefix + if tag: + kw["tags"] = [tag] + app.include_router(mod.router, **kw) + loaded.append(mod_name) + except Exception as exc: + failed[mod_name] = str(exc) + log.exception("P3: router %s failed — skipped", mod_name) + + for m, p, t in _P3_REST: + _register(m, p, t) + for m in _P3_WS: + _register(m, None, "websocket", package="Project.Sanad.dashboard.websockets") + + # package-local routers: NEW memory store + P3 convenience. + try: + import routes_memory + app.include_router(routes_memory.router, prefix="/api/memory", tags=["memory"]) + loaded.append("routes_memory") + except Exception as exc: + failed["routes_memory"] = str(exc) + log.exception("P3: routes_memory failed — /api/memory unavailable") + try: + import routes_p3 + app.include_router(routes_p3.router, prefix="/api/p3", tags=["p3"]) + loaded.append("routes_p3") + except Exception as exc: + failed["routes_p3"] = str(exc) + log.exception("P3: routes_p3 failed") + + static_dir = BASE_DIR / app_cfg.get("static_subdir", "dashboard/static") + try: + app.mount("/static", StaticFiles(directory=str(static_dir)), name="static") + except Exception: + log.exception("P3: static mount failed") + + @app.get("/api/package") + async def package_info(): + from sanad_pkg import license as _lic + lic = _lic.current() + api_key = {"has_key": False, "masked": "", "source": "default"} + try: + import Project.Sanad.config as _cfg_mod + from Project.Sanad.dashboard.routes.voice import _mask_api_key + _k = getattr(_cfg_mod, "GEMINI_API_KEY", "") or "" + try: + from Project.Sanad.config import load_config + _stored = (load_config().get("gemini", {}) or {}).get("api_key") + except Exception: + _stored = None + api_key = {"has_key": bool(_k), "masked": _mask_api_key(_k), + "source": "config_file" if _stored else "default"} + except Exception: + log.exception("could not read api-key status") + return { + "package": PACKAGE, "title": PACKAGE_TITLE, "tabs": P3_SPA_TABS, + "features": {"face_rec": bool(lic.feature("face_rec", True)), + "places": bool(lic.feature("places", True)), + "memory": bool(lic.feature("memory", True)), + "mask": bool(lic.feature("mask", True))}, + "api_key": api_key, + "endpoints": {"recognition": "GET /api/recognition/*", "places": "GET /api/zones/*", + "memories": "GET /api/memory/", "mask": "GET /api/mask/status"}, + "loaded_routes": loaded, "failed_routes": failed, "license": lic.summary(), + } + + def _filtered_spa(): + index = static_dir / "index.html" + if not index.exists(): + return JSONResponse({"message": "SPA index.html not found", "loaded": loaded, "failed": failed}) + try: + html = index.read_text(encoding="utf-8") + filt = _tab_filter_snippet() + if "" in html: + html = html.replace("", filt + "", 1) + elif "" in html: + html = html.replace("", filt + "", 1) + else: + html = filt + html + return HTMLResponse(html) + except OSError as exc: + return JSONResponse({"error": "index.html unreadable: %s" % exc}, status_code=500) + + @app.get("/") + async def root(): + return _filtered_spa() + + @app.get("/full") + async def full_dashboard(): + return _filtered_spa() + + log.info("P3 dashboard built — routers loaded=%s failed=%s", loaded, list(failed)) + return app + + +def _init_dds_for_audio(): + if os.environ.get("SANAD_BUS_ADDR"): + return + try: + from unitree_sdk2py.core.channel import ChannelFactoryInitialize + iface = os.environ.get("SANAD_DDS_INTERFACE", "eth0") + ChannelFactoryInitialize(0, iface) + log.info("P3: DDS ChannelFactoryInitialize(0, %s) done — chest audio enabled", iface) + except Exception: + log.exception("P3: DDS init failed — chest audio unavailable (plugged still works)") + + +def _enforce_keyless_default(): + import Project.Sanad.config as _cfg + env_key = (os.environ.get("SANAD_GEMINI_API_KEY") or "").strip() + saved = "" + try: + from Project.Sanad.config import load_config + saved = ((load_config().get("gemini") or {}).get("api_key") or "").strip() + except Exception: + pass + if env_key or saved: + return + _cfg.GEMINI_API_KEY = "" + try: + import Project.Sanad.gemini.client as _gc + _gc.GEMINI_API_KEY = "" + except Exception: + pass + log.info("P3: keyless by default — customer adds a Gemini key via the dashboard") + + +def main(): + host = os.environ.get("SANAD_DASHBOARD_HOST", "0.0.0.0") + port = int(os.environ.get("SANAD_DASHBOARD_PORT", "8013")) + log.info("Sanad P3 (Recognition + Places + Memories) starting — %s:%d", host, port) + try: + from sanad_pkg.bus import bus + bus.connect() + except Exception: + log.exception("bus connect failed (continuing in-process)") + + _init_dds_for_audio() + _enforce_keyless_default() + singletons = _build_singletons() + _wire_mask(singletons) + _inject_main_shim(singletons) + + _mask = singletons.get("mask_face") + if _mask is not None and hasattr(_mask, "shutdown"): + atexit.register(lambda: _mask.shutdown()) + _cam = singletons.get("camera") + if _cam is not None and hasattr(_cam, "stop"): + atexit.register(lambda: _try(_cam.stop)) + + import uvicorn + app = build_app() + uvicorn.run(app, host=host, port=port, log_level="info") + + +if __name__ == "__main__": + main() diff --git a/config/mask_config.json b/config/mask_config.json new file mode 100644 index 0000000..fb3c488 --- /dev/null +++ b/config/mask_config.json @@ -0,0 +1,27 @@ +{ + "_comment": "Shining LED face mask (BLE). Driven by the FaceController subsystem (face/mask_face.py) which imports the standalone Mask project. Needs an env with bleak + Pillow (g1_env). Free the mask from the phone app before connecting.", + "mask_dir": "", + "_mask_dir": "Path to the Mask project (flat shiningmask lib). Empty -> auto: /Mask. Env override: SANAD_MASK_DIR.", + "name_prefix": "MASK", + "_name_prefix": "BLE scan prefix; the mask advertises e.g. 'MASK-02A711'. Env: SANAD_MASK_NAME_PREFIX.", + "address": "", + "_address": "Specific BLE MAC to connect to. Empty -> scan by name_prefix. Env: SANAD_MASK_ADDRESS.", + "adapter": "", + "_adapter": "BlueZ adapter (e.g. 'hci0'). Empty -> default. Env: SANAD_MASK_ADAPTER.", + "brightness": 95, + "_brightness": "0-128. Keep <=100 to avoid LED flicker (battery-limited).", + "fps": 8.0, + "_fps": "FaceAnimator (fallback driver) frame rate (PLAY commands/sec).", + "lifelike": true, + "_lifelike": "Use the LifelikeFace driver (face/face_motion.py): eye saccades, varied blinks, listening/thinking/speaking states, reactions, smooth lip-sync. false -> basic FaceAnimator.", + "autostart": true, + "_autostart": "Auto-connect + Start face on boot (best-effort, background — never blocks startup). After the one-time frame upload, later boots just connect + animate. false -> connect/start manually from the dashboard.", + "connect_timeout": 15.0, + "connect_attempts": 5, + "eye_color": [0, 230, 255], + "_eye_color": "Face eye/iris RGB (baked into the uploaded frames). Default cyan. Set via the dashboard 'Apply colors' (persisted here).", + "mouth_color": [255, 50, 50], + "_mouth_color": "Face mouth RGB. Default red.", + "sclera_color": [255, 255, 255], + "_sclera_color": "White-of-the-eye RGB. Default white." +} diff --git a/config/p3_config.json b/config/p3_config.json new file mode 100644 index 0000000..dcf1a22 --- /dev/null +++ b/config/p3_config.json @@ -0,0 +1,11 @@ +{ + "_comment": "Sanad Package 3 (Recognition + Places + Memories) defaults. Precedence: env > license feature > this file.", + "package": "P3", + "title": "Sanad — Recognition + Places + Memories", + "language_default": "", + "audio_profile_default": "builtin", + "port": 8013, + "voice_brain": "gemini", + "spa_tabs": ["operations", "voice", "recognition", "mask", "recordings", "settings"], + "excluded": ["motion", "controller", "navigation", "livemap", "mapeditor", "temp", "terminal"] +} diff --git a/data/audio/.gitkeep b/data/audio/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/data/audio_device.json b/data/audio_device.json new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/data/audio_device.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/data/camera_device.json b/data/camera_device.json new file mode 100644 index 0000000..7930d09 --- /dev/null +++ b/data/camera_device.json @@ -0,0 +1,5 @@ +{ + "profile_serial_assignments": { + "realsense_primary": "" + } +} \ No newline at end of file diff --git a/data/faces/.gitkeep b/data/faces/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/data/memories/.gitkeep b/data/memories/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/data/motions/config.json b/data/motions/config.json new file mode 100644 index 0000000..40a0170 --- /dev/null +++ b/data/motions/config.json @@ -0,0 +1,21 @@ +{ + "gemini": { + "api_key": "", + "model": "models/gemini-2.5-flash-native-audio-preview-12-2025", + "voice_name": "Charon" + }, + "audio": { + "send_sample_rate": 16000, + "receive_sample_rate": 24000, + "chunk_size": 512, + "g1_volume": 100 + }, + "motion": { + "action_cooldown_sec": 1.0, + "replay_hz": 60.0 + }, + "dashboard": { + "host": "0.0.0.0", + "port": 8000 + } +} \ No newline at end of file diff --git a/data/photos/.gitkeep b/data/photos/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/data/recordings/.gitkeep b/data/recordings/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/data/zones/.gitkeep b/data/zones/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..b62f2a4 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,63 @@ +# Self-contained compose for Sanad Package 3 (Recognition + Places + Memories). +# docker compose up -d --build # -> http://:8013 +# Jetson without buildx: DOCKER_BUILDKIT=0 docker compose up -d --build +services: + p3: + build: + context: . + dockerfile: Dockerfile + args: + BASE_OS_IMAGE: "${BASE_OS_IMAGE:-python:3.10-slim-bookworm}" + WITH_UNITREE_SDK: "${WITH_UNITREE_SDK:-1}" + image: "${SANAD_IMAGE:-sanad-p3:latest}" + container_name: sanad-p3 + network_mode: host + restart: unless-stopped + cap_add: + - NET_ADMIN + # camera V4L nodes are group 'video'; grant it so cv2.VideoCapture can open them. + group_add: + - video + environment: + SANAD_PACKAGE: P3 + SANAD_DASHBOARD_PORT: "8013" + SANAD_DASHBOARD_HOST: "0.0.0.0" + SANAD_VOICE_BRAIN: gemini + SANAD_AUDIO_PROFILE: "${SANAD_AUDIO_PROFILE:-builtin}" # builtin (chest, on a G1) | plugged (USB) + SANAD_DDS_INTERFACE: "${SANAD_DDS_INTERFACE:-eth0}" + # Plugged/Bluetooth audio (Anker USB, JBL BT) route through the HOST + # PulseAudio server — chest ("builtin") uses DDS and needs none of this. + PULSE_SERVER: "${PULSE_SERVER:-unix:/run/user/1000/pulse/native}" + PULSE_COOKIE: "${PULSE_COOKIE:-/run/user/1000/pulse/cookie}" # root→uid-1000 PA auth + SANAD_MASK_DIR: /app/mask + SANAD_MEMORIES_DIR: /app/Sanad/data/memories + # Pin the colour camera node — on a RealSense the colour node is NOT video0. + SANAD_CAMERA_USB_INDEX: "${SANAD_CAMERA_USB_INDEX:-}" + SANAD_LICENSE: /etc/sanad/sanad.lic + SANAD_PUBKEY: /etc/sanad/pubkey.ed25519 + SANAD_LICENSE_BIND: "${SANAD_LICENSE_BIND:-0}" + devices: + - "/dev/snd:/dev/snd" + - "/dev/bus/usb:/dev/bus/usb" # USB camera + BLE dongle + # Camera V4L node(s). video0 is the common USB-webcam default; a RealSense + # exposes ~6 nodes and the colour node is NOT video0 — add the others (or use + # SANAD_CAMERA_USB_INDEX). If a node is absent, comment its line out. + # Camera is OPTIONAL to boot: a bind of an ABSENT /dev/video0 makes `up` HARD-FAIL. + # UNCOMMENT + set to YOUR camera V4L node (RealSense colour node is NOT video0) to enable recognition: + # - "/dev/video0:/dev/video0" + # - "/dev/video1:/dev/video1" + # - "/dev/video2:/dev/video2" + # - "/dev/video4:/dev/video4" + volumes: + - "${SANAD_LICENSE_FILE:-./license/sanad.lic.example}:/etc/sanad/sanad.lic:ro" + # Plugged/Bluetooth audio (Anker/JBL): mount the host PulseAudio runtime dir + # (socket + auth cookie) so the container sets ANY sink's volume/output — the + # volume slider then drives chest (DDS) AND plugged (PA), exactly like SanadV3. + # Chest ("builtin") audio uses DDS only and needs none of this. One-time host + # setup for a stable boot-time socket: `loginctl enable-linger unitree`. + - "${SANAD_PULSE_DIR:-/run/user/1000/pulse}:/run/user/1000/pulse" + - "./data:/app/Sanad/data" # persist faces / zones / memories / recordings + - "./config/mask_config.json:/app/Sanad/config/mask_config.json" + - "/var/run/dbus:/var/run/dbus" # host BlueZ/D-Bus for the LED mask + # Bound license also needs the host machine-id: + # - "/etc/machine-id:/etc/machine-id:ro" diff --git a/entrypoint.sh b/entrypoint.sh new file mode 100755 index 0000000..e8d3d71 --- /dev/null +++ b/entrypoint.sh @@ -0,0 +1,73 @@ +#!/usr/bin/env bash +# Sanad Package 3 (Recognition) entrypoint. +# 1) license gate 2) resolve P2 env (env > license > config) 3) preflight 4) launch. +set -u +PKG="P3" +CFG="/app/pkg3_config/p3_config.json" + +# ── 1. license gate ────────────────────────────────────────────────────────── +# license_check exits 0 only when entitled. If NOT entitled we exit the CONTAINER +# cleanly (code 0) so a restart policy won't crash-loop. +if ! python3 -m sanad_pkg.license_check "$PKG"; then + echo "[$PKG] not licensed for this robot — container exiting cleanly." + exit 0 +fi + +# ── 2. resolve config (env wins, then license feature, then config file) ────── +read_cfg() { # read_cfg + python3 - "$CFG" "$1" <<'PY' 2>/dev/null || true +import json, sys +try: + print(json.load(open(sys.argv[1])).get(sys.argv[2], "") or "") +except Exception: + print("") +PY +} + +# Language: empty = MULTILINGUAL auto-detect (P2's headline feature). Only set a +# fixed language if the operator/license/config explicitly pins one. +if [ -z "${SANAD_LANGUAGE:-}" ]; then + SANAD_LANGUAGE="$(python3 -c 'from sanad_pkg import license as L; print(L.feature("language","") or "")' 2>/dev/null || true)" + [ -z "$SANAD_LANGUAGE" ] && SANAD_LANGUAGE="$(read_cfg language_default)" +fi +export SANAD_LANGUAGE +export SANAD_VOICE_BRAIN="${SANAD_VOICE_BRAIN:-gemini}" +[ -z "${SANAD_AUDIO_PROFILE:-}" ] && SANAD_AUDIO_PROFILE="$(read_cfg audio_profile_default)" +export SANAD_AUDIO_PROFILE="${SANAD_AUDIO_PROFILE:-builtin}" +export SANAD_DASHBOARD_HOST="${SANAD_DASHBOARD_HOST:-0.0.0.0}" +[ -z "${SANAD_DASHBOARD_PORT:-}" ] && SANAD_DASHBOARD_PORT="$(read_cfg port)" +export SANAD_DASHBOARD_PORT="${SANAD_DASHBOARD_PORT:-8013}" +export SANAD_MASK_DIR="${SANAD_MASK_DIR:-/app/mask}" +export PYTHONUNBUFFERED=1 + +# Jetson + Unitree SDK OpenMP load-order fix (only if the lib exists; override-able). +if [ -z "${LD_PRELOAD:-}" ] && [ -f /usr/lib/aarch64-linux-gnu/libgomp.so.1 ]; then + export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libgomp.so.1 +fi + +echo "[$PKG] entitled — lang=${SANAD_LANGUAGE:-} audio=$SANAD_AUDIO_PROFILE port=$SANAD_DASHBOARD_PORT brain=$SANAD_VOICE_BRAIN mask_dir=$SANAD_MASK_DIR" + +# ── 3. preflight (clear diagnostics) ───────────────────────────────────────── +python3 - "$SANAD_AUDIO_PROFILE" "$SANAD_MASK_DIR" <<'PY' || true +import importlib.util as u, sys +profile = sys.argv[1] if len(sys.argv) > 1 else "builtin" +mask_dir = sys.argv[2] if len(sys.argv) > 2 else "/app/mask" +def has(m): return u.find_spec(m) is not None +print("[P3] preflight:") +ok = sys.version_info >= (3, 9) +print(" python : %s %s" % (".".join(map(str, sys.version_info[:3])), + "OK" if ok else "TOO OLD — google-genai needs >=3.9")) +print(" google-genai : %s" % ("OK" if has("google.genai") else "MISSING — live conversation will NOT work")) +print(" pyaudio : %s" % ("OK" if has("pyaudio") else "missing — mic/speaker capture limited")) +print(" bleak (mask) : %s" % ("OK" if has("bleak") else "MISSING — LED mask will NOT connect")) +print(" Pillow (face) : %s" % ("OK" if has("PIL") else "missing — LifelikeFace falls back to FaceAnimator")) +sys.path.insert(0, mask_dir) +print(" mask lib : %s (%s)" % ("OK" if has("mask") else "MISSING", mask_dir)) +sdk = has("unitree_sdk2py") +print(" unitree SDK : %s" % ("OK" if sdk else "absent")) +if profile == "builtin" and not sdk: + print(" >> NOTE: audio profile 'builtin' (G1 chest) needs the Unitree SDK, which is") + print(" absent. Plug a USB speaker/mic and set SANAD_AUDIO_PROFILE=plugged.") +PY + +exec python3 /app/app_p3.py diff --git a/license/pubkey.ed25519 b/license/pubkey.ed25519 new file mode 100644 index 0000000..f622571 --- /dev/null +++ b/license/pubkey.ed25519 @@ -0,0 +1 @@ +ZOFerXRMTVQxkxsawjmGXJz8n5HmXfb8qLMhO/7DIC4= diff --git a/license/sanad.lic.example b/license/sanad.lic.example new file mode 100644 index 0000000..4b07a19 --- /dev/null +++ b/license/sanad.lic.example @@ -0,0 +1,27 @@ +{ + "payload": { + "robot_id": "G1-SN-DEMO-0001", + "machine_fingerprint": null, + "packages": { + "P1": true, + "P2": true, + "P3": true, + "P4": false + }, + "features": { + "language": "", + "multilingual": true, + "voice_command_motion": true, + "lipsync": true, + "mask": true, + "face_rec": true, + "places": true, + "memory": true, + "guide_tour": false, + "navigation": false + }, + "issued": "2026-06-22", + "expires": "2030-01-01" + }, + "sig": "WSI7gPG0mj1FhkOzRWcmhgo1mHmubOmPFycZF0mKUPqRaFQMD7GIil6sYlVE5njBtYb7EOevw6cetjcKOqkxCg==" +} diff --git a/p3ctl.sh b/p3ctl.sh new file mode 100755 index 0000000..b288be3 --- /dev/null +++ b/p3ctl.sh @@ -0,0 +1,62 @@ +#!/usr/bin/env bash +# p2ctl.sh — run / stop Sanad Package 3 (Recognition+Places+Memories) in dev mode +# (no Docker), against the vendored engine in ./vendor. Self-contained: no sibling +# Sanad/ checkout needed. +# +# ./p2ctl.sh start | stop | restart | status | logs [N] +# +# The conda env must have google-genai AND (for the LED mask) bleak==0.22.3 + Pillow. +# Override env: SANAD_P3_PY, SANAD_DASHBOARD_PORT (8013), SANAD_AUDIO_PROFILE (builtin), +# SANAD_DDS_INTERFACE (eth0), SANAD_MASK_DIR, SANAD_LICENSE / SANAD_PUBKEY. +set -u + +PKG_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PY="${SANAD_P3_PY:-$HOME/miniconda3/envs/gemini_sdk/bin/python}" +PORT="${SANAD_DASHBOARD_PORT:-8013}" +APP="$PKG_DIR/app_p3.py" +LOG="$PKG_DIR/p3.log" +LIC="${SANAD_LICENSE:-$PKG_DIR/license/sanad.lic}"; [ -f "$LIC" ] || LIC="$PKG_DIR/license/sanad.lic.example" + +_start() { + if pgrep -f app_p3.py >/dev/null 2>&1; then echo "P3 already running on :$PORT"; return 0; fi + [ -f "$APP" ] || { echo "ERROR: $APP not found (deploy first)"; return 1; } + cd "$PKG_DIR" + export SANAD_APP_DIR="$PKG_DIR/vendor" \ + SANAD_LICENSE="$LIC" \ + SANAD_PUBKEY="${SANAD_PUBKEY:-$PKG_DIR/license/pubkey.ed25519}" \ + SANAD_MASK_DIR="${SANAD_MASK_DIR:-$PKG_DIR/vendor/mask}" \ + PYTHONPATH="$PKG_DIR/vendor" \ + SANAD_DASHBOARD_PORT="$PORT" SANAD_DASHBOARD_HOST="0.0.0.0" \ + SANAD_VOICE_BRAIN="gemini" \ + SANAD_AUDIO_PROFILE="${SANAD_AUDIO_PROFILE:-builtin}" \ + SANAD_DDS_INTERFACE="${SANAD_DDS_INTERFACE:-eth0}" \ + PYTHONUNBUFFERED=1 + [ -f /usr/lib/aarch64-linux-gnu/libgomp.so.1 ] && export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libgomp.so.1 + nohup "$PY" "$APP" > "$LOG" 2>&1 & + sleep 3 + if pgrep -f app_p3.py >/dev/null 2>&1; then + echo "P3 started -> http://$(hostname -I | awk '{print $1}'):$PORT (log: $LOG)" + else + echo "P3 failed to start. Last log lines:"; tail -20 "$LOG" + fi +} + +_stop() { + pgrep -f app_p3.py >/dev/null 2>&1 || { echo "P3 was not running."; return 0; } + pkill -f app_p3.py 2>/dev/null + for _ in $(seq 1 8); do pgrep -f app_p3.py >/dev/null 2>&1 || break; sleep 1; done + pgrep -f app_p3.py >/dev/null 2>&1 && pkill -9 -f app_p3.py 2>/dev/null + sleep 1 + pgrep -f app_p3.py >/dev/null 2>&1 && echo "P3 still running (could not kill)." || echo "P3 stopped." +} +_status() { if pgrep -af app_p3.py; then echo -n "health: "; curl -s --max-time 4 "http://127.0.0.1:$PORT/api/health"; echo; else echo "P3 not running."; fi; } +_logs() { tail -n "${1:-40}" "$LOG" 2>/dev/null || echo "no log at $LOG"; } + +case "${1:-}" in + start) _start ;; + stop) _stop ;; + restart) _stop; sleep 2; _start ;; + status) _status ;; + logs) shift; _logs "${1:-40}" ;; + *) echo "usage: $0 {start|stop|restart|status|logs [N]}"; exit 2 ;; +esac diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..663d394 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,34 @@ +# Sanad Package 3 — Recognition + Places + Memories — self-contained deps. +# Comms + mask (like P2) + camera vision (opencv). Recognition is Gemini-side +# (in-context primers) — NO local ML / torch / face-embeddings. + +# Web dashboard +fastapi +uvicorn[standard] +pydantic +python-multipart +websockets + +# IPC bus shim + offline license verification +pyzmq +cryptography + +# Numerics + image +numpy +Pillow + +# Gemini Live voice + audio I/O +google-genai>=1.0.0 +pyaudio +soundfile +requests + +# Camera vision — JPEG-encode + USB (/dev/video*) capture. Headless (no GUI/libGL). +# pyrealsense2 is deliberately OMITTED (PyPI wheel's newer glibc ImportErrors on +# slim-bookworm); camera.py probes pyrealsense2 then falls back to cv2 V4L capture, +# so a USB colour camera works via opencv alone. Add pyrealsense2 (built from +# source) only if RealSense-specific depth is required. +opencv-python-headless + +# Mask (BLE LED "Shining Mask") — expressions on recognition. +bleak==0.22.3 diff --git a/routes_memory.py b/routes_memory.py new file mode 100644 index 0000000..eb382f8 --- /dev/null +++ b/routes_memory.py @@ -0,0 +1,106 @@ +"""/api/memory — visitor-memory CRUD (P3, package-local, mounted by app_p3.py). + +The VisitorMemory singleton is resolved lazily from the Project.Sanad.main shim +(app_p3 sets it), so a missing store degrades to 503 rather than crashing. +Kept Python-3.8 compatible. +""" +from __future__ import annotations + +from typing import Any, Dict, List, Optional + +from fastapi import APIRouter, HTTPException +from pydantic import BaseModel + +router = APIRouter() + + +def _mem(): + try: + from Project.Sanad.main import memory + except Exception: + memory = None + if memory is None: + raise HTTPException(503, "visitor memory store unavailable") + return memory + + +class ProfileCreate(BaseModel): + name: str + attributes: Optional[Dict[str, Any]] = None + notes: Optional[str] = "" + tags: Optional[List[str]] = None + linked_face_id: Optional[str] = "" + + +class ProfileUpdate(BaseModel): + name: Optional[str] = None + attributes: Optional[Dict[str, Any]] = None + notes: Optional[str] = None + tags: Optional[List[str]] = None + linked_face_id: Optional[str] = None + + +@router.get("/") +async def list_profiles(): + return {"ok": True, "profiles": _mem().list()} + + +@router.post("/") +async def create_profile(payload: ProfileCreate): + if not (payload.name or "").strip(): + raise HTTPException(400, "name is required") + prof = _mem().add(payload.name.strip(), attributes=payload.attributes, + notes=payload.notes or "", tags=payload.tags, + linked_face_id=payload.linked_face_id or "") + return {"ok": True, "profile": prof} + + +@router.get("/status") +async def status(): + return _mem().status() + + +@router.get("/primer") +async def primer(): + """The compact known-visitors summary fed into personalized greetings.""" + return {"ok": True, "primer": _mem().load_for_primer()} + + +@router.get("/by-face/{face_id}") +async def by_face(face_id: str): + prof = _mem().find_by_face(face_id) + if prof is None: + raise HTTPException(404, "no visitor linked to face %s" % face_id) + return {"ok": True, "profile": prof} + + +@router.get("/{pid}") +async def get_profile(pid: str): + prof = _mem().get(pid) + if prof is None: + raise HTTPException(404, "no visitor %s" % pid) + return {"ok": True, "profile": prof} + + +@router.put("/{pid}") +async def update_profile(pid: str, payload: ProfileUpdate): + prof = _mem().update(pid, **payload.dict(exclude_unset=True)) + if prof is None: + raise HTTPException(404, "no visitor %s" % pid) + return {"ok": True, "profile": prof} + + +@router.post("/{pid}/touch") +async def touch_profile(pid: str): + prof = _mem().touch(pid) + if prof is None: + raise HTTPException(404, "no visitor %s" % pid) + return {"ok": True, "profile": prof} + + +@router.delete("/{pid}") +async def delete_profile(pid: str): + ok = _mem().delete(pid) + if not ok: + raise HTTPException(404, "no visitor %s" % pid) + return {"ok": True, "deleted": pid} diff --git a/routes_p3.py b/routes_p3.py new file mode 100644 index 0000000..4d3053e --- /dev/null +++ b/routes_p3.py @@ -0,0 +1,250 @@ +"""P3-specific dashboard routes (mounted at /api/p2 by app_p2.py). + +Same first-class settings as P1 (REUSE Sanad's canonical logic, no fork) — the +one thing the base routes don't do is apply a change to the LIVE Gemini session +immediately by restarting the voice subprocess (the child reads the API key + +persona at spawn time). P3 is a superset of P1, so these are identical to P1's +convenience routes; the premium features (live-voice, wake-phrases, motion, +skills, mask) are served by the vendored Sanad routers mounted alongside. + + /api/p2/api-key GET masked status | POST set/update + live-restart + /api/p2/persona GET current persona+rules | POST update persona + live-restart + /api/p2/say speak a typed line (local, or via bus/hwbroker if SANAD_BUS_ADDR) + /api/p2/logs/delete delete all logs + /api/p2/settings one-shot view (api-key + persona + language + audio + live) + +Kept Python-3.8 compatible. +""" +from __future__ import annotations + +import asyncio +import base64 +import os + +from fastapi import APIRouter, HTTPException + +from Project.Sanad.core.logger import get_logger +from Project.Sanad.dashboard.routes import voice as _voice # reuse api-key logic +from Project.Sanad.dashboard.routes import prompt as _prompt # reuse persona logic +from Project.Sanad.dashboard.routes import typed_replay as _tr # reuse local TTS say +from sanad_pkg.bus import bus + +# Bind request models as module-level names so FastAPI resolves body annotations +# cleanly under `from __future__ import annotations`. +ApiKeyPayload = _voice.ApiKeyPayload +PromptUpdate = _prompt.PromptUpdate +SayPayload = _tr.SayPayload + +log = get_logger("pkg3.routes") +router = APIRouter() + + +async def _restart_live_if_running() -> bool: + """Restart the live Gemini subprocess (if running) so a new key/persona takes + effect immediately. Returns True if it was restarted.""" + try: + from Project.Sanad.main import live_sub + is_running = getattr(live_sub, "is_running", None) + if live_sub is None or not callable(is_running) or not is_running(): + return False + try: + live_sub.stop() + except Exception: + log.exception("live_sub.stop() failed") + try: + await asyncio.to_thread(live_sub.start) + return True + except Exception: + log.exception("live_sub.start() failed") + return False + except Exception: + log.exception("could not restart live subprocess") + return False + + +# ─────────────────────────── Gemini API key ─────────────────────────── +def _persist_and_hotswap_key(key: str) -> None: + """Persist gemini.api_key to data/motions/config.json (empty => remove) and + hot-swap the in-memory globals. Patches BOTH Project.Sanad.config and + gemini.client (each binds its own ref).""" + from Project.Sanad.config import load_config, save_config + import Project.Sanad.config as _cfg_mod + cfg = load_config() or {} + g = cfg.get("gemini") if isinstance(cfg.get("gemini"), dict) else {} + if key: + g["api_key"] = key + else: + g.pop("api_key", None) + cfg["gemini"] = g + save_config(cfg) + _cfg_mod.GEMINI_API_KEY = key + try: + import Project.Sanad.gemini.client as _gc + _gc.GEMINI_API_KEY = key + except Exception: + log.exception("could not patch gemini.client.GEMINI_API_KEY") + + +async def _disconnect_voice(): + try: + from Project.Sanad.main import voice_client + if voice_client is not None and getattr(voice_client, "connected", False): + await voice_client.disconnect() + except Exception: + log.exception("voice_client disconnect failed") + + +@router.get("/api-key") +async def p3_get_api_key(): + """Masked current key + where it came from (delegates to the voice route).""" + return await _voice.get_api_key() + + +@router.post("/api-key") +async def p3_set_api_key(payload: ApiKeyPayload): + """ADD / update the Gemini API key. Relaxed validation — accepts AIza… and + AQ.… / ephemeral tokens. Persists + hot-swaps + restarts the live session.""" + key = (payload.api_key or "").strip() + if len(key) < 10: + raise HTTPException(400, "API key looks too short (paste the full key).") + _persist_and_hotswap_key(key) + await _disconnect_voice() + restarted = await _restart_live_if_running() + return { + "ok": True, + "masked": _voice._mask_api_key(key), + "source": "config_file", + "live_subprocess_restarted": restarted, + "message": "API key added" + (" and applied (live session restarted)." + if restarted else " — start the session to use it."), + } + + +@router.post("/api-key/delete") +async def p3_delete_api_key(): + """DELETE the Gemini API key — clears it from data/motions/config.json and + in-memory. Conversation stops until a new key is added.""" + _persist_and_hotswap_key("") + await _disconnect_voice() + restarted = await _restart_live_if_running() + return { + "ok": True, + "deleted": True, + "live_subprocess_restarted": restarted, + "message": "API key deleted. Add a new key to re-enable conversation.", + } + + +# ─────────────────────────── Robot persona ─────────────────────────── +@router.get("/persona") +async def p3_get_persona(): + """Current persona system prompt + parsed rules + file paths.""" + return await _prompt.get_prompt() + + +@router.post("/persona") +async def p3_set_persona(payload: PromptUpdate): + """Change the robot persona — write scripts/sanad_script.txt (canonical prompt + logic) and restart the live session so it speaks with the new persona + immediately. The persona is also where you steer language/dialect (P3 = the + bilingual multilingual prompt by default).""" + result = await _prompt.update_prompt(payload) + restarted = await _restart_live_if_running() + result["live_subprocess_restarted"] = restarted + result["message"] = ( + "Persona saved and applied — live session restarted." + if restarted else + "Persona saved. Start (or restart) the live session to use the new persona." + ) + return result + + +# ─────────────────────────── say a line ─────────────────────────── +@router.post("/say") +async def p3_say(payload: SayPayload): + """Speak a typed line. Standalone (no bus) → play locally via Sanad's + typed-replay. Multi-package (SANAD_BUS_ADDR set) → synth via Gemini and hand + the PCM to the hwbroker `speak.request` (audio-lock).""" + text = (payload.text or "").strip() + if not text: + raise HTTPException(400, "text cannot be empty") + + if not os.environ.get("SANAD_BUS_ADDR"): + return await _tr.say(payload) + + from Project.Sanad.main import voice_client + if voice_client is None: + raise HTTPException(503, "voice client unavailable") + if not getattr(voice_client, "connected", False): + try: + await voice_client.connect() + except Exception as exc: + raise HTTPException(503, "Gemini connect failed: %s" % exc) + try: + audio, _parts = await voice_client.send_text(text, owner="p3_say") + except Exception as exc: + raise HTTPException(502, "Gemini error: %s" % exc) + if not audio: + return {"ok": False, "routed": "hwbroker", "reason": "no audio produced"} + bus.emit_sync("speak.request", owner="p3", + pcm_b64=base64.b64encode(audio).decode("ascii"), + rate=24000, channels=1, sampwidth=2) + return {"ok": True, "routed": "hwbroker (audio-lock)"} + + +# ─────────────────────────── logs ─────────────────────────── +@router.post("/logs/delete") +async def p3_delete_logs(): + """Delete all log files on the robot. Active .log files are truncated; rotated/ + snapshot/bundle files are removed.""" + from Project.Sanad.config import LOGS_DIR + cleared = [] + try: + for p in sorted(LOGS_DIR.glob("*.log*")): + try: + if p.name.endswith(".log") and "_snapshot_" not in p.name: + open(p, "w").close() + else: + p.unlink() + cleared.append(p.name) + except Exception: + log.exception("could not clear log %s", p.name) + except Exception: + log.exception("delete logs failed") + return {"ok": True, "count": len(cleared), "cleared": cleared} + + +# ─────────────────────────── combined view ─────────────────────────── +@router.get("/settings") +async def p3_settings(): + """One-shot P3 settings: api-key status + persona + language + audio + live + mask.""" + key_status = await _voice.get_api_key() + persona = "" + try: + persona = _prompt._load_system_prompt() + except Exception: + log.exception("could not load persona") + live_running = False + try: + from Project.Sanad.main import live_sub + is_running = getattr(live_sub, "is_running", None) + live_running = bool(live_sub is not None and callable(is_running) and is_running()) + except Exception: + pass + mask_status = None + try: + from Project.Sanad.main import mask_face + if mask_face is not None and hasattr(mask_face, "status"): + mask_status = mask_face.status() + except Exception: + pass + return { + "package": "P3", + "api_key": key_status, + "persona_preview": (persona[:400] + ("…" if len(persona) > 400 else "")), + "persona_chars": len(persona), + "language": os.environ.get("SANAD_LANGUAGE", "") or "(multilingual auto-detect)", + "audio_profile": os.environ.get("SANAD_AUDIO_PROFILE", "builtin"), + "live_running": live_running, + "mask": mask_status, + } diff --git a/strip_key.py b/strip_key.py new file mode 100644 index 0000000..09742dc --- /dev/null +++ b/strip_key.py @@ -0,0 +1,24 @@ +#!/usr/bin/env python3 +"""Build-time: blank any Gemini API key baked into the Sanad config so the P2 +image ships KEYLESS — the customer adds their own via the dashboard. Idempotent +and best-effort (never fails the build).""" +import json + +BASE = "/app/Sanad" +for rel, section in (("config/core_config.json", "gemini_defaults"), + ("data/motions/config.json", "gemini")): + path = "%s/%s" % (BASE, rel) + try: + with open(path) as f: + d = json.load(f) + except Exception: + continue + sec = d.get(section) + if isinstance(sec, dict) and sec.get("api_key"): + sec["api_key"] = "" + try: + with open(path, "w") as f: + json.dump(d, f, ensure_ascii=False, indent=2) + print("strip_key: blanked %s.api_key in %s" % (section, rel)) + except Exception as exc: + print("strip_key: could not write %s: %s" % (rel, exc)) diff --git a/sync_vendor.sh b/sync_vendor.sh new file mode 100755 index 0000000..14853c8 --- /dev/null +++ b/sync_vendor.sh @@ -0,0 +1,83 @@ +#!/usr/bin/env bash +# Refresh the vendored SanadV3 engine + sanad_pkg + flat BLE Mask lib from a full +# monorepo checkout. P2 ships SELF-CONTAINED copies under ./vendor so the repo +# builds standalone. When SanadV3 or Project/Mask change upstream, run this from a +# checkout that has Project/Sanadv3 + Project/Mask + Project/Packages, then commit +# the updated ./vendor + ./data + ./license. +# +# ./sync_vendor.sh [/path/to/Project] # default: ../../ (Packages/.. = Project/) +# +# Excludes runtime data, Logs, caches, the temp3d 3D viewer, and tests. +set -euo pipefail +HERE="$(cd "$(dirname "$0")" && pwd)" +PROJECT="${1:-$(cd "$HERE/../.." && pwd)}" # = Project/G1 (packages live under G1/) +SRC_V3="$PROJECT/Sanadv3" +SRC_PKG="$PROJECT/Packages/sanad_pkg" +SRC_LIC="$PROJECT/Packages/licensing" +# The flat Mask lib moved OUT of the robot tree in the G1/ reorg (now Project/Other/Mask). +# Try a few locations; override with SANAD_MASK_SRC=/abs/path if it moves again. +SRC_MASK="${SANAD_MASK_SRC:-}" +if [ -z "$SRC_MASK" ]; then + for c in "$PROJECT/Mask" "$PROJECT/../Other/Mask" "$PROJECT/Other/Mask" "$(dirname "$PROJECT")/Other/Mask"; do + [ -d "$c" ] && { SRC_MASK="$(cd "$c" && pwd)"; break; } + done +fi + +[ -d "$SRC_V3" ] || { echo "ERROR: no Sanadv3/ at $SRC_V3"; exit 1; } +[ -d "$SRC_MASK" ] || { echo "ERROR: Mask lib not found (set SANAD_MASK_SRC=/path/to/Mask)"; exit 1; } +echo ">> using Mask lib: $SRC_MASK" +[ -d "$SRC_PKG" ] || { echo "ERROR: no sanad_pkg at $SRC_PKG"; exit 1; } + +echo ">> vendoring SanadV3 engine from $SRC_V3" +rm -rf "$HERE/vendor"; mkdir -p "$HERE/vendor" +rsync -a \ + --exclude 'data/' --exclude 'Logs/' --exclude '__pycache__/' --exclude '*.pyc' \ + --exclude '.git/' --exclude 'dashboard/static/temp3d/' --exclude 'tests/' \ + "$SRC_V3/" "$HERE/vendor/Sanad/" + +echo ">> seeding minimal data/" +mkdir -p "$HERE/vendor/Sanad/data/motions" +cp "$SRC_V3/data/motions/config.json" "$HERE/vendor/Sanad/data/motions/config.json" +for j in audio_device.json camera_device.json wake_phrases.json; do + [ -f "$SRC_V3/data/$j" ] && cp "$SRC_V3/data/$j" "$HERE/vendor/Sanad/data/$j" || true +done +for d in recordings audio faces photos zones memories; do mkdir -p "$HERE/vendor/Sanad/data/$d"; touch "$HERE/vendor/Sanad/data/$d/.gitkeep"; done + +echo ">> vendoring sanad_pkg + public key" +rm -rf "$HERE/vendor/sanad_pkg"; cp -r "$SRC_PKG" "$HERE/vendor/sanad_pkg" +find "$HERE/vendor/sanad_pkg" -name __pycache__ -type d -prune -exec rm -rf {} + 2>/dev/null || true +mkdir -p "$HERE/license"; cp "$SRC_LIC/pubkey.ed25519" "$HERE/license/pubkey.ed25519" + +echo ">> vendoring flat BLE Mask lib (own path)" +rm -rf "$HERE/vendor/mask" +rsync -a --exclude '__pycache__/' --exclude '*.pyc' --exclude '.git/' \ + --exclude 'test_*.py' --exclude 'selftest.py' "$SRC_MASK/" "$HERE/vendor/mask/" + +echo ">> ship keyless (blank any baked Gemini key)" +python3 - "$HERE" <<'PY' +import json, sys +h = sys.argv[1] +for p, sec in ((h+"/vendor/Sanad/config/core_config.json", "gemini_defaults"), + (h+"/vendor/Sanad/data/motions/config.json", "gemini")): + try: + d = json.load(open(p)) + except Exception: + continue + s = d.get(sec) + if isinstance(s, dict) and s.get("api_key"): + s["api_key"] = "" + json.dump(d, open(p, "w"), ensure_ascii=False, indent=2) + print(" blanked", sec, "in", p) +PY + +echo ">> refresh ./data seed mirror (keep structure, drop runtime media)" +rsync -a --delete \ + --exclude 'recordings/*' --exclude 'audio/*' --exclude 'faces/*' --exclude 'photos/*' \ + "$HERE/vendor/Sanad/data/" "$HERE/data/" +for d in recordings audio faces photos zones memories; do mkdir -p "$HERE/data/$d"; touch "$HERE/data/$d/.gitkeep"; done + +echo ">> refresh host mask_config.json seed (the mask-color persistence mount)" +# Only this one file — do NOT touch the hand-written config/p2_config.json. +cp "$HERE/vendor/Sanad/config/mask_config.json" "$HERE/config/mask_config.json" + +echo ">> done. vendor: $(du -sh "$HERE/vendor" | cut -f1) — review & commit ./vendor ./data ./config ./license" diff --git a/test_p3.sh b/test_p3.sh new file mode 100755 index 0000000..fc8b40a --- /dev/null +++ b/test_p3.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +# Smoke-test Sanad Package 3. Usage: ./test_p3.sh [host:port] (default 127.0.0.1:8013) +H="${1:-127.0.0.1:8013}"; B="http://$H"; pass=0; fail=0 +chk(){ local code + if [ "$1" = GET ]; then code=$(curl -s -o /dev/null -w '%{http_code}' --max-time 8 "$B$2") + else code=$(curl -s -o /dev/null -w '%{http_code}' --max-time 8 -X "$1" "$B$2"); fi + if [ "$code" = "$3" ]; then printf " PASS [%s] %-34s %s\n" "$code" "$1 $2" "$4"; pass=$((pass+1)) + else printf " FAIL [%s≠%s] %-30s %s\n" "$code" "$3" "$1 $2" "$4"; fail=$((fail+1)); fi +} +echo "== Sanad P3 smoke test @ $B ==" +chk GET /api/health 200 "health" +chk GET /api/package 200 "manifest + license + features" +chk GET /api/p3/api-key 200 "key status (masked)" +chk GET /api/p3/settings 200 "combined settings" +chk GET /api/recognition/state 200 "recognition state" +chk GET /api/zones/ 200 "places / zones list" +chk GET /api/mask/status 200 "LED mask status" +chk GET /api/memory/ 200 "visitor memory list" +chk GET /api/memory/status 200 "memory store status" +chk GET /api/memory/primer 200 "greeting primer" +chk GET /api/live-subprocess/status 200 "conversation status" +chk GET /api/system/info 200 "system info" +chk GET /api/logs/ 200 "logs list" +echo "== $pass passed, $fail failed ==" +echo "-- manifest --" +curl -s --max-time 6 "$B/api/package" | python3 -c ' +import sys, json +d = json.load(sys.stdin); lic = d.get("license") or {} +print(" package :", d.get("package")) +print(" license :", lic.get("valid"), " packages:", lic.get("packages")) +print(" features:", d.get("features")) +' 2>/dev/null || true +echo "-- memory roundtrip (create -> list -> delete) --" +id=$(curl -s --max-time 6 -X POST "$B/api/memory/" -H 'Content-Type: application/json' -d '{"name":"Test VIP","attributes":{"vip":true},"notes":"smoke"}' | python3 -c "import sys,json;print(json.load(sys.stdin).get('profile',{}).get('id',''))" 2>/dev/null) +echo " created id: ${id:-}" +[ -n "$id" ] && curl -s --max-time 6 -X DELETE "$B/api/memory/$id" >/dev/null && echo " deleted ok" diff --git a/vendor/Sanad/.claude/settings.json b/vendor/Sanad/.claude/settings.json new file mode 100644 index 0000000..49630dd --- /dev/null +++ b/vendor/Sanad/.claude/settings.json @@ -0,0 +1,7 @@ +{ + "permissions": { + "allow": [ + "Bash(node -e ' *)" + ] + } +} diff --git a/vendor/Sanad/.gitignore b/vendor/Sanad/.gitignore new file mode 100644 index 0000000..c02264b --- /dev/null +++ b/vendor/Sanad/.gitignore @@ -0,0 +1,4 @@ +__pycache__/ +*.pyc +Logs/ +*.log diff --git a/vendor/Sanad/FEATURES.md b/vendor/Sanad/FEATURES.md new file mode 100644 index 0000000..f13300a --- /dev/null +++ b/vendor/Sanad/FEATURES.md @@ -0,0 +1,136 @@ +# SanadV3 — Feature Catalog + +Sanad is a bilingual (Arabic/English) humanoid receptionist/assistant on a +Unitree G1 (Jetson Orin NX, ROS 2 Foxy, Livox MID-360). This catalogs +**what's built today** (Part A) and **what's on the roadmap** (Part B). + +--- + +# Part A — Current features (built & running) + +Verified from the live subsystem registry (19 subsystems), dashboard tabs +(12), and API routers (22). + +## 1. Voice & Conversation +- **Gemini live voice** — real-time bilingual AR/EN spoken conversation (native-audio model) +- **Offline brain** — local pipeline via `ollama` (`SANAD_VOICE_BRAIN=local`), no cloud +- **Wake phrases** — configurable wake-word manager +- **Typed replay** — type text, robot speaks it (with speaker-monitor capture) +- **Local TTS** — on-device text-to-speech engine +- **Prompt management** — edit the system prompt from the dashboard +- **Lip-sync** — mask mouth driven by TTS `MOUTH` markers +- **Barge-in** — interrupt speech (volume-scaled threshold) + +## 2. Motion & Arm +- **35 arm actions** — 16 SDK built-ins + 19 custom JSONL motions +- **Macro record / playback** — capture and replay motion sequences +- **Teaching mode** — kinesthetic teach-and-repeat +- **Skills** — composed higher-level behaviors (skills.json) +- **Movement dispatch** — voice → motion (53 fixed + 10 parametric phrases, cooldown-gated) +- **Arm motion-block** — auto-inhibits arm moves while locomotion is active (safety interlock) + +## 3. Locomotion +- **LocoClient + MotionSwitcher** — walk / pose control via Unitree SDK (eth0) +- **E-STOP** — dashboard kill button +- **Single Ctrl+C teardown** — one signal cleanly stops every subsystem (~2s) + +## 4. LED Face Mask +- **Animated expressions** — neutral, smile, blink, look L/R, talk1–3, surprised, sad +- **Gestural-speaking events** — face reacts while speaking +- **Lip-sync** — mouth animates to speech + +## 5. Vision & Recognition +- **Face recognition** — identify people via camera +- **Face gallery** — enroll/manage known faces +- **Zone gallery / zones** — visual zone recognition +- **Camera feed** — attached to the live voice subprocess (vision-in-the-loop) + +## 6. Navigation (web_nav3 integration) +- **Live Map tab** — full embedded web_nav3 dashboard (set-pose, goals, bringup) +- **Navigation tab** — native canvas viewer (saved/live map, places, missions) +- **map_relay** — re-publishes the latched `/map` @1Hz so the map renders even when stationary +- **Saved maps** — load & view a pre-built `.db` (localize mode) +- **Places** — save named poses, one-click "Go" +- **Missions** — multi-waypoint routes (defined in web_nav3) +- **Cancel goal** — stop an active goal without tearing down bringup +- **SLAM** — RTABMap LiDAR-ICP, drift-corrected mapping/localization + +## 7. Audio +- **Device manager** — sink/source selection, live refresh +- **Audio profiles** — builtin / anker / hollyland_builtin (auto-switch on plug/unplug) + +## 8. Operations, System & Diagnostics +- **System control** — start/stop subsystems, status +- **Temperature monitor** — motor temps (live websocket stream) +- **Controller** — gamepad/teleop input +- **Web terminal** — shell in the browser (websocket) +- **Logs** — live log stream +- **Recordings & replay** — record/playback sessions +- **Scripts** — run saved scripts + +## Dashboard infrastructure +- 12 tabs, **fault-isolated routers** (one broken module never breaks the dashboard) +- WebSocket streams: log_stream, motor_temps, terminal +- No-store HTML (no stale-cache 404s after deploy) +- Lazy subsystem imports (missing dep → that subsystem unavailable, rest runs) + +--- + +# Part B — Roadmap (to add) + +Tiers = priority. 🏗️ = load-bearing · ⚠️ = Foxy constraint. + +## Tier 1 — Autonomous behaviors (the product) +1. **Voice-driven navigation** — "Sanad, go to the lobby" → nav goal +2. **Greeter mission** — recognized face → navigate → greet → express +3. **Named-person greeting** — identity → personalized line +4. **Patrol / guided tours** — ordered places, speech at each stop +5. **Return-to-base / dock-on-idle** — auto-home on idle/low battery + +## Tier 2 — Navigation & map (harden + edit) +6. 🏗️ **Map republish relay** — ✅ DONE (map_relay) +7. **Click-to-goal on Nav tab canvas** +8. **Live nav telemetry** — distance/ETA/waypoint, "arrived" toast +9. **Battery + nav-state status bar** +10. **Geofence zones on the map** +11. **Cancel-goal button** — ✅ DONE +### Map editing & annotation (all build on #6) +12. **Erase tool** — paint cells free; wipe ghost obstacles + the SLAM "spokes" +13. **Obstacle paint ("black points" / virtual walls)** — ⚠️ Foxy-safe KeepoutFilter substitute +14. **Shape tools + brush size** — line/rectangle/polygon +15. **Non-destructive overlay + undo/redo** +16. **Persist & auto-reload edits per map** +17. **Crop / trim map bounds** + +## Tier 3 — Voice & interaction +18. **Barge-in from dashboard** +19. **Quick-phrase soundboard** +20. **Conversation memory / visitor log** +21. **Per-speaker AR/EN auto-detect** +22. **Scheduled announcements** +23. **Bake edited map → PGM/YAML** (static map_server deploy) + +## Tier 4 — Face & presence +24. **Gaze / head-track recognized face** +25. **Emotion-from-context** (sentiment → expression) +26. **Idle breathing / look-around** +27. **Lip-sync to TTS amplitude** (enhance existing markers) + +## Tier 5 — Operator, fleet & reliability +28. 🏗️ **Global E-STOP button** — ✅ exists; surface consistently +29. **Health watchdog** — auto-restart dead subsystem + alert +30. **Per-subsystem enable/disable toggles** +31. **Behavior recorder → replay** (nav+voice timelines) +32. **Mission editor UI** (visual sequence builder) +33. **Remote access / tunnel** +34. **Reverse-proxy web_nav3 through :8001** — one origin, no iframe cross-port issues + +## Tier 6 — Future / blocked +35. **Speed / caution zones** — needs Galactic SpeedFilter or custom layer +36. **Multi-robot fleet** (SanadV3 ↔ BotBrain) — needs LocoClient arbitration + coordinator + +--- + +### Recommended next build order +**#1 voice→nav** → **#2 greeter mission** (the product), then **#12/#13 map editing** +(clean the spokes + virtual walls). #6 republish relay and #11 cancel are already done. diff --git a/vendor/Sanad/G1_Controller/__init__.py b/vendor/Sanad/G1_Controller/__init__.py new file mode 100644 index 0000000..bee07a0 --- /dev/null +++ b/vendor/Sanad/G1_Controller/__init__.py @@ -0,0 +1,12 @@ +"""G1_Controller — manual dashboard locomotion control (N2 Phase 1). + +`LocoController` wraps the Unitree `LocoClient` + `MotionSwitcherClient` for +operator-driven walking, postures and a discrete step pad. It reuses the arm +controller's single process-wide DDS init (one `ChannelFactoryInitialize`) and +is gated behind an in-memory "Enable movement" arm flag that defaults OFF every +boot. See dashboard/routes/controller.py for the REST surface. +""" + +from Project.Sanad.G1_Controller.loco_controller import LocoController + +__all__ = ["LocoController"] diff --git a/vendor/Sanad/G1_Controller/loco_controller.py b/vendor/Sanad/G1_Controller/loco_controller.py new file mode 100644 index 0000000..22ba59d --- /dev/null +++ b/vendor/Sanad/G1_Controller/loco_controller.py @@ -0,0 +1,593 @@ +"""LocoController — manual G1 locomotion via the Unitree LocoClient (N2 Phase 1). + +Ported from the proven scripts in G1_Lootah/Controller (g1_mode_controller.py, +keyboard_controller.py, hanger_boot_sequence.py). Design notes: + +* **One DDS init per process.** The arm controller owns the single + `ChannelFactoryInitialize(0, nic)` (motion/arm_controller.py). This class + NEVER initialises DDS — it lazily builds its `LocoClient` / + `MotionSwitcherClient` only after `arm._initialized` is True. +* **Default DISARMED.** `_armed` starts False every boot and gates every WRITE + method. Reads (status / fsm / joints), E-STOP and disarm are ALWAYS allowed. +* **StopMove watchdog.** Continuous `Move(..., True)` never self-terminates, so a + daemon thread StopMoves if no `move()` refresh arrives within + `watchdog_timeout_sec`. The frontend re-sends setpoints at ~10 Hz, so a tab + close / network drop trips the watchdog within the timeout. +* **Velocity caps.** Symmetric clamp on vx/vy/vyaw — Walk 0.6, Run 1.2. +* **Allow-anytime-warn.** move/step never hard-block on FSM; if not walk-ready + they still execute but return a `warning`. +* **Sim fallback.** When `unitree_sdk2py` is absent (workstation), every write + returns `{"simulated": True}` (never raises) so the whole UI is testable. + +SDK facts confirmed from source — do not "fix" them: +* `LocoClient.Move(vx, vy, vyaw, True)` — the continuous-mode kwarg is misspelled + `continous_move` (one n); we pass it POSITIONALLY to avoid a TypeError. +* `LocoClient` has NO StandUp()/Squat() — use SetFsmId(4)/SetFsmId(2). +* FSM id / mode are read via the private RPC `bot._Call(7001/7002, "{}")`. +""" + +from __future__ import annotations + +import json +import threading +import time +from typing import Any, Optional + +from Project.Sanad.core.config_loader import section as _cfg_section +from Project.Sanad.core.logger import get_logger + +log = get_logger("loco_controller") + +# -- SDK import (optional) ----------------------------------------------------- +try: + from unitree_sdk2py.g1.loco.g1_loco_client import LocoClient + from unitree_sdk2py.comm.motion_switcher.motion_switcher_client import ( + MotionSwitcherClient, + ) + _HAS_SDK = True +except ImportError: + LocoClient = None + MotionSwitcherClient = None + _HAS_SDK = False + log.warning("Unitree SDK not available — LocoController in simulation mode") + +# LocoClient general RPC api-ids for FSM read-back (stable across SDK builds). +ROBOT_API_ID_LOCO_GET_FSM_ID = 7001 +ROBOT_API_ID_LOCO_GET_FSM_MODE = 7002 + +# G1 29-DoF joint names for indices 12-28 (0-11 legs, 12-14 waist, 15-21 left +# arm, 22-28 right arm). Used by the Diagnostics joint read-out. +JOINT_NAMES = { + 12: "WAIST_YAW", 13: "WAIST_ROLL", 14: "WAIST_PITCH", + 15: "L_SHOULDER_PITCH", 16: "L_SHOULDER_ROLL", 17: "L_SHOULDER_YAW", + 18: "L_ELBOW", 19: "L_WRIST_ROLL", 20: "L_WRIST_PITCH", 21: "L_WRIST_YAW", + 22: "R_SHOULDER_PITCH", 23: "R_SHOULDER_ROLL", 24: "R_SHOULDER_YAW", + 25: "R_ELBOW", 26: "R_WRIST_ROLL", 27: "R_WRIST_PITCH", 28: "R_WRIST_YAW", +} + +# Discrete step pad — (vx, vy, vyaw) sign per direction; magnitude is +# step_speed_frac * cap_walk (a gentle single step). +_STEP_DIRS = { + "forward": (1.0, 0.0, 0.0), + "backward": (-1.0, 0.0, 0.0), + "slide_left": (0.0, 1.0, 0.0), + "slide_right": (0.0, -1.0, 0.0), + "rotate_left": (0.0, 0.0, 1.0), + "rotate_right": (0.0, 0.0, -1.0), +} + +_POSTURES = ( + "zero_torque", "damp", "stand_up", "squat", "sit", + "low_stand", "high_stand", "lie_to_stand", +) + + +class LocoController: + """Thread-safe manual locomotion control with a simulation fallback.""" + + def __init__(self, arm=None): + self._arm = arm # shared ArmController (owns the ONE DDS init) + self._bot = None # LocoClient (lazy) + self._msc = None # MotionSwitcherClient (lazy) + self._lc_ready = False + self._lock = threading.RLock() # serialise all loco client WRITE calls + self._armed = False # in-memory MANUAL gate — OFF every boot + + self._cur_v = (0.0, 0.0, 0.0) # last commanded (vx, vy, vyaw) + self._teleop_active = False + self._last_msc_mode: Optional[str] = None + + # watchdog + self._last_move_ts = 0.0 + self._wd_thread: Optional[threading.Thread] = None + self._wd_stop = threading.Event() + self._wd_stop.set() # not running until armed + # Monotonic stop-generation counter, bumped under _lock by + # estop/stop/disarm. move()/step()/prep_mode() capture it at start and + # bail the instant it changes — so E-STOP preempts an in-flight motion + # immediately AND can never be silently "un-cancelled" by a concurrent + # command (a lock-free Event clear() could; an int compare under the + # lock cannot). + self._stop_gen = 0 + # Serializes the discrete blocking operations (step/prep_mode) so two + # can't overlap and interleave Move commands. Continuous teleop move() + # is intentionally NOT guarded by this. + self._discrete_busy = False + + cfg = _cfg_section("motion", "loco_controller") + self._cap_walk = float(cfg.get("cap_walk", 0.6)) + self._cap_run = float(cfg.get("cap_run", 1.2)) + self._lin_step = float(cfg.get("lin_step", 0.05)) + self._ang_step = float(cfg.get("ang_step", 0.2)) + self._wd_timeout = float(cfg.get("watchdog_timeout_sec", 0.5)) + self._block_window = float(cfg.get("arm_block_window_sec", 1.5)) + self._step_dur = float(cfg.get("step_duration_sec", 0.6)) + self._step_frac = float(cfg.get("step_speed_frac", 0.5)) + self._loco_timeout = float(cfg.get("loco_timeout_sec", 10.0)) + self._msc_timeout = float(cfg.get("msc_timeout_sec", 5.0)) + + # ── client lifecycle ───────────────────────────────────────────────────── + + def _ensure_client(self) -> bool: + """Lazily build LocoClient + MotionSwitcherClient. Returns readiness. + + Never initialises DDS — requires the shared arm to have already run the + single ChannelFactoryInitialize. + """ + if not _HAS_SDK: + return False + if self._lc_ready: + return True + if self._arm is None or not getattr(self._arm, "_initialized", False): + return False + with self._lock: + if self._lc_ready: + return True + try: + bot = LocoClient() + bot.SetTimeout(self._loco_timeout) + bot.Init() + msc = MotionSwitcherClient() + msc.SetTimeout(self._msc_timeout) + msc.Init() + self._bot = bot + self._msc = msc + self._lc_ready = True + log.info("LocoClient + MotionSwitcherClient ready") + except Exception as exc: + log.error("LocoClient init failed: %s", exc) + self._lc_ready = False + return self._lc_ready + + def _safe_call(self, name: str, fn, *a, **kw): + try: + return True, fn(*a, **kw) + except Exception as exc: + log.error("%s failed: %s", name, exc) + return False, None + + def _rpc_get_int(self, api_id: int): + bot = self._bot + if bot is None: + return None + try: + code, data = bot._Call(api_id, "{}") + if code == 0 and data: + return json.loads(data).get("data") + except Exception: + pass + return None + + @staticmethod + def _clamp(v: float, cap: float) -> float: + return max(-cap, min(cap, float(v))) + + # ── FSM / readiness ────────────────────────────────────────────────────── + + def fsm_id(self): + return self._rpc_get_int(ROBOT_API_ID_LOCO_GET_FSM_ID) + + def fsm_mode(self): + return self._rpc_get_int(ROBOT_API_ID_LOCO_GET_FSM_MODE) + + def _walk_ready_warning(self) -> Optional[str]: + """allow-anytime-warn: None when ready, else a human message.""" + if not self._lc_ready: + return None + fid = self.fsm_id() + fmode = self.fsm_mode() + if fid == 200 and fmode not in (None, 2): + return None + return (f"Robot not in walk-ready FSM (id={fid}, mode={fmode}). " + f"Command sent anyway.") + + # ── arm flag + watchdog ────────────────────────────────────────────────── + + def is_armed(self) -> bool: + return self._armed + + def movement_active(self) -> bool: + """True when the robot may be walking: manual armed, teleop active, OR a + move/step issued within the block window. Used as the arm's motion-block + predicate so the arm never replays while the robot is (or just was) + moving — regardless of whether the MANUAL gate or the GEMINI gate + (Phase 3 voice dispatch, which calls move/step directly) triggered it.""" + if self._armed or self._teleop_active: + return True + return (time.monotonic() - self._last_move_ts) < self._block_window + + def arm_movement(self) -> dict: + """Unlock manual control. Cancels any in-flight arm motion first so the + arm and locomotion are never active simultaneously (movement wins).""" + try: + if self._arm is not None and getattr(self._arm, "is_busy", False): + log.info("arming movement — cancelling in-flight arm motion") + self._arm.cancel() + except Exception: + log.exception("arm.cancel() on arm_movement failed") + with self._lock: + self._armed = True + self._start_watchdog() + log.info("movement ARMED") + return {"ok": True, "armed": True} + + def disarm_movement(self) -> dict: + with self._lock: + self._stop_gen += 1 # break any in-flight step/prep/move + self._armed = False + self._teleop_active = False + self._wd_stop.set() + try: + self._raw_stop() + except Exception: + log.exception("StopMove on disarm failed") + log.info("movement DISARMED") + return {"ok": True, "armed": False} + + def _start_watchdog(self): + self._wd_stop.clear() + if self._wd_thread is None or not self._wd_thread.is_alive(): + self._wd_thread = threading.Thread( + target=self._watchdog_loop, daemon=True, name="loco-watchdog") + self._wd_thread.start() + + def _watchdog_loop(self): + period = max(0.02, min(0.1, self._wd_timeout / 2.0)) + while not self._wd_stop.is_set(): + fire = False + park = False + # Read-and-decide under the lock (atomic check-then-act); the actual + # StopMove runs after release so the critical section stays tiny. + with self._lock: + if self._teleop_active and (time.monotonic() - self._last_move_ts) > self._wd_timeout: + self._teleop_active = False + fire = True + # Self-park once there's nothing left to guard. The Gemini + # dispatch path uses step() directly and never calls + # disarm_movement(), so without this the watchdog would spin for + # the rest of the process lifetime after the first voice step. + if not self._armed and not self._teleop_active and not self._discrete_busy: + park = True + if fire: + log.warning("watchdog: teleop setpoint stale (>%.2fs) — StopMove", + self._wd_timeout) + try: + self._raw_stop() + except Exception: + log.exception("watchdog StopMove failed") + if park: + # Nothing left to guard — stop the thread (a later move/step + # re-arms it via _start_watchdog()). Done AFTER any stale-stop + # above so we never skip a pending StopMove. + self._wd_stop.set() + break + self._wd_stop.wait(period) + + def _raw_stop(self) -> bool: + """Issue StopMove if the client is up; no-op in sim. Lock-light.""" + if not self._lc_ready or self._bot is None: + return False + with self._lock: + ok, _ = self._safe_call("StopMove", self._bot.StopMove) + return ok + + # ── movement ───────────────────────────────────────────────────────────── + + def move(self, vx: float, vy: float, vyaw: float, run: bool = False) -> dict: + cap = self._cap_run if run else self._cap_walk + cvx, cvy, cvyaw = self._clamp(vx, cap), self._clamp(vy, cap), self._clamp(vyaw, cap) + capped = (cvx, cvy, cvyaw) != (float(vx), float(vy), float(vyaw)) + warning = self._walk_ready_warning() + sent = {"vx": cvx, "vy": cvy, "vyaw": cvyaw} + + with self._lock: + my_gen = self._stop_gen # capture under lock + + if not self._ensure_client(): + with self._lock: # sim: record intent for UI/watchdog + self._cur_v = (cvx, cvy, cvyaw) + self._last_move_ts = time.monotonic() + self._teleop_active = True + self._start_watchdog() + return {"ok": True, "sent": sent, "capped": capped, + "warning": warning, "simulated": True} + with self._lock: + # If an E-STOP / stop / disarm landed since we captured my_gen, do NOT + # (re)command velocity — and do NOT stamp the motion flags (so a + # cancelled tick doesn't extend the arm-block window). + if self._stop_gen != my_gen: + return {"ok": False, "cancelled": True, "sent": sent, + "capped": capped, "warning": warning, "simulated": False} + self._cur_v = (cvx, cvy, cvyaw) + self._last_move_ts = time.monotonic() + self._teleop_active = True + self._safe_call("SetBalanceMode", self._bot.SetBalanceMode, 1) + ok, _ = self._safe_call("Move", self._bot.Move, cvx, cvy, cvyaw, True) + self._start_watchdog() + return {"ok": bool(ok), "sent": sent, "capped": capped, + "warning": warning, "simulated": False} + + def stop_move(self) -> dict: + """Halt translation/rotation. Allowed even when disarmed.""" + with self._lock: + self._stop_gen += 1 + self._teleop_active = False + if not self._ensure_client(): + return {"ok": True, "simulated": True} + ok = self._raw_stop() + return {"ok": bool(ok), "simulated": False} + + def estop(self) -> dict: + """Emergency stop = StopMove only (no Damp / FSM change → keeps posture). + ALWAYS allowed, even disarmed and in sim. Bumps the stop generation so any + in-flight move()/step()/prep_mode() bails immediately (no lock wait).""" + with self._lock: + self._stop_gen += 1 + self._teleop_active = False + self._cur_v = (0.0, 0.0, 0.0) + if not self._ensure_client(): + log.warning("E-STOP (sim)") + return {"ok": True, "simulated": True} + ok = self._raw_stop() + log.warning("E-STOP — StopMove issued") + return {"ok": bool(ok), "simulated": False} + + def step(self, direction: str) -> dict: + """Discrete one-step pad: Move for step_duration then StopMove. + Blocking (~step_duration); call via asyncio.to_thread from the route. + + The sleep loop does NOT hold self._lock, so E-STOP / StopMove (which take + the lock briefly) preempt it immediately; the loop also bails the moment + the stop generation changes.""" + if direction not in _STEP_DIRS: + return {"ok": False, "reason": f"unknown direction: {direction}"} + sx, sy, syaw = _STEP_DIRS[direction] + k = self._cap_walk * self._step_frac + vx, vy, vyaw = sx * k, sy * k, syaw * k + warning = self._walk_ready_warning() + with self._lock: + if self._discrete_busy: + return {"ok": False, "dir": direction, "reason": "busy", + "warning": warning, "simulated": not self._lc_ready} + self._discrete_busy = True + my_gen = self._stop_gen + self._last_move_ts = time.monotonic() + self._teleop_active = True + self._start_watchdog() + if not self._ensure_client(): + with self._lock: + self._teleop_active = False + self._discrete_busy = False + return {"ok": True, "dir": direction, "warning": warning, "simulated": True} + try: + with self._lock: + if self._stop_gen != my_gen: # stopped before we began + return {"ok": False, "dir": direction, "cancelled": True, + "warning": warning, "simulated": False} + self._safe_call("SetBalanceMode", self._bot.SetBalanceMode, 1) + self._safe_call("Move", self._bot.Move, vx, vy, vyaw, True) + t_end = time.monotonic() + self._step_dur + while time.monotonic() < t_end: + if self._stop_gen != my_gen: + break + with self._lock: + self._last_move_ts = time.monotonic() # keep watchdog fed + time.sleep(0.05) + finally: + with self._lock: + self._safe_call("StopMove", self._bot.StopMove) + self._teleop_active = False + self._discrete_busy = False + return {"ok": True, "dir": direction, "warning": warning, "simulated": False} + + # ── postures / modes ───────────────────────────────────────────────────── + + def prep_mode(self) -> dict: + """PREP — StopMove → Damp → StandUp(FSM4) → height ramp → BalanceStand(0). + Exact order from g1_mode_controller.prep_mode, minus the blocking input(). + Blocking (~1s); call via asyncio.to_thread.""" + if not self._ensure_client(): + return {"ok": True, "mode": "prep", "simulated": True} + with self._lock: + if self._discrete_busy: + return {"ok": False, "mode": "prep", "reason": "busy", "simulated": False} + self._discrete_busy = True + my_gen = self._stop_gen + self._safe_call("StopMove", self._bot.StopMove) + self._safe_call("Damp", self._bot.Damp) + self._safe_call("SetFsmId(4)", self._bot.SetFsmId, 4) + try: + # Height ramp OUTSIDE the lock so E-STOP can preempt at any time. + h = 0.02 + while h <= 0.5 + 1e-9: + if self._stop_gen != my_gen: + log.warning("PREP cancelled (E-STOP)") + return {"ok": False, "mode": "prep", "cancelled": True, "simulated": False} + with self._lock: + self._safe_call("SetStandHeight", self._bot.SetStandHeight, round(h, 3)) + time.sleep(0.03) + h += 0.02 + with self._lock: + self._safe_call("BalanceStand", self._bot.BalanceStand, 0) + self._safe_call("SetStandHeight", self._bot.SetStandHeight, 0.22) + finally: + with self._lock: + self._discrete_busy = False + log.info("PREP complete") + return {"ok": True, "mode": "prep", "simulated": False} + + def ready_start_mode(self) -> dict: + """READY = PREP then Start (FSM 200 / balance engaged).""" + self.prep_mode() + if not self._ensure_client(): + return {"ok": True, "mode": "ready", "simulated": True} + with self._lock: + if hasattr(self._bot, "Start"): + ok, _ = self._safe_call("Start", self._bot.Start) + else: + ok, _ = self._safe_call("SetFsmId(200)", self._bot.SetFsmId, 200) + log.info("READY/START complete") + return {"ok": bool(ok), "mode": "ready", "simulated": False} + + def posture(self, name: str) -> dict: + if name not in _POSTURES: + return {"ok": False, "reason": f"unknown posture: {name}"} + if not self._ensure_client(): + return {"ok": True, "posture": name, "simulated": True} + bot = self._bot + with self._lock: + if name == "zero_torque": + ok, _ = self._safe_call("ZeroTorque", bot.ZeroTorque) + elif name == "damp": + ok, _ = self._safe_call("Damp", bot.Damp) + elif name == "stand_up": + ok, _ = self._safe_call("SetFsmId(4)", bot.SetFsmId, 4) + elif name == "squat": + ok, _ = self._safe_call("SetFsmId(2)", bot.SetFsmId, 2) + elif name == "sit": + ok, _ = self._safe_call("Sit", bot.Sit) + elif name == "low_stand": + ok, _ = self._safe_call("LowStand", bot.LowStand) + elif name == "high_stand": + ok, _ = self._safe_call("HighStand", bot.HighStand) + elif name == "lie_to_stand": + if hasattr(bot, "Lie2StandUp"): + ok, _ = self._safe_call("Lie2StandUp", bot.Lie2StandUp) + else: + ok, _ = self._safe_call("SetFsmId(702)", bot.SetFsmId, 702) + else: # unreachable (guarded above) + ok = False + return {"ok": bool(ok), "posture": name, "simulated": False} + + def set_balance_mode(self, mode: int) -> dict: + if not self._ensure_client(): + return {"ok": True, "balance_mode": int(mode), "simulated": True} + with self._lock: + ok, _ = self._safe_call("SetBalanceMode", self._bot.SetBalanceMode, int(mode)) + return {"ok": bool(ok), "balance_mode": int(mode), "simulated": False} + + def set_stand_height(self, h: float) -> dict: + if not self._ensure_client(): + return {"ok": True, "height": float(h), "simulated": True} + with self._lock: + ok, _ = self._safe_call("SetStandHeight", self._bot.SetStandHeight, float(h)) + return {"ok": bool(ok), "height": float(h), "simulated": False} + + # ── MotionSwitcher ─────────────────────────────────────────────────────── + + def msc_check(self) -> dict: + if not self._ensure_client() or self._msc is None: + return {"mode_name": None, "simulated": not self._lc_ready} + try: + ret = self._msc.CheckMode() + name = None + if isinstance(ret, tuple) and len(ret) >= 2 and isinstance(ret[1], dict): + name = ret[1].get("name") + elif isinstance(ret, dict): + name = ret.get("name") + self._last_msc_mode = name + return {"mode_name": name} + except Exception as exc: + log.error("msc_check failed: %s", exc) + return {"mode_name": None} + + def msc_select_ai(self) -> dict: + if not self._ensure_client() or self._msc is None: + return {"ok": True, "simulated": True} + with self._lock: + ok, _ = self._safe_call("SelectMode(ai)", self._msc.SelectMode, "ai") + return {"ok": bool(ok), "simulated": False} + + def msc_release(self) -> dict: + if not self._ensure_client() or self._msc is None: + return {"ok": True, "simulated": True} + with self._lock: + ok, _ = self._safe_call("ReleaseMode", self._msc.ReleaseMode) + return {"ok": bool(ok), "simulated": False} + + def reconnect(self) -> dict: + """Drop and rebuild Loco + MSC clients (does NOT re-init the DDS factory).""" + with self._lock: + self._bot = None + self._msc = None + self._lc_ready = False + ok = self._ensure_client() + return {"ok": bool(ok), "lc_ready": self._lc_ready} + + # ── reads ──────────────────────────────────────────────────────────────── + + def joints(self) -> dict: + q: list = [] + try: + if self._arm is not None: + q = self._arm.get_current_q() + except Exception: + q = [] + out = [] + for idx in range(12, 29): + val = q[idx] if idx < len(q) else 0.0 + out.append({"idx": idx, "name": JOINT_NAMES.get(idx, f"motor_{idx}"), + "q": float(val)}) + return {"joints": out} + + def status(self) -> dict: + # Polling /status lazily brings up the client once arm DDS is ready. + self._ensure_client() + fid = self.fsm_id() if self._lc_ready else None + fmode = self.fsm_mode() if self._lc_ready else None + walk_ready = bool(self._lc_ready and fid == 200 and fmode not in (None, 2)) + return { + "sdk_available": _HAS_SDK, + "lc_ready": self._lc_ready, + "armed": self._armed, + "fsm_id": fid, + "fsm_mode": fmode, + "walk_ready": walk_ready, + "msc_mode": self._last_msc_mode, + "teleop_active": self._teleop_active, + "last_velocity": {"vx": self._cur_v[0], "vy": self._cur_v[1], "vyaw": self._cur_v[2]}, + "caps": {"walk": self._cap_walk, "run": self._cap_run}, + "arm_initialized": bool(self._arm is not None and getattr(self._arm, "_initialized", False)), + } + + # ── shutdown helper ────────────────────────────────────────────────────── + + def shutdown(self): + """Best-effort StopMove + disarm for process shutdown. + + Uses _raw_stop() (NOT estop()) so teardown never builds a brand-new + LocoClient: estop() → _ensure_client() would lazily construct a client + and run bot.Init() (a DDS RPC) during interpreter teardown when we were + armed-but-never-built (Enable movement clicked, never moved, then + Ctrl+C). _raw_stop() no-ops when no client was ever created. Bump the + stop generation so any in-flight motion bails immediately.""" + with self._lock: + self._stop_gen += 1 + self._teleop_active = False + self._cur_v = (0.0, 0.0, 0.0) + try: + self._raw_stop() # no-op when _bot is None — never re-inits + except Exception: + log.exception("StopMove on shutdown failed") + finally: + self.disarm_movement() diff --git a/vendor/Sanad/README.md b/vendor/Sanad/README.md new file mode 100644 index 0000000..5289e99 --- /dev/null +++ b/vendor/Sanad/README.md @@ -0,0 +1,412 @@ +# Sanad + +Voice + motion assistant for the Unitree G1 humanoid. **Gemini Live** (or a +fully-offline pipeline) handles bilingual Arabic/English conversation; an arm +controller plays built-in SDK poses and recorded JSONL macros; a locomotion +controller walks/turns the robot; an optional camera feeds **Gemini-side face & +place recognition**; everything is orchestrated through a fault-isolated +**FastAPI dashboard** on `http://:8000`. + +``` +┌──────────────────────────────────────────────────────────────────────┐ +│ Dashboard (FastAPI) ── http://:8000 │ +│ ├─ Operations Quick-fire arm actions + gestural-speaking │ +│ ├─ Voice & Audio Live Gemini, Typed Replay, Wake Phrases, Audio │ +│ ├─ Motion & Replay SDK actions, JSONL replays, macros, teaching │ +│ ├─ Controller Locomotion teleop, postures, FSM modes, E-STOP │ +│ ├─ Recognition Camera vision + face gallery + zones/places │ +│ ├─ Recordings Skill registry, saved Gemini turns │ +│ ├─ Temperature Live 3D motor-temperature heatmap (three.js) │ +│ ├─ Terminal In-browser shell (PTY) to the robot │ +│ └─ Settings & Logs System info, tail/stream live logs │ +└──────────────────────────────────────────────────────────────────────┘ + │ + ├─ voice/sanad_voice.py (subprocess — model-agnostic voice loop) + │ ├─ gemini/script.py (Gemini Live brain — audio+video+state) + │ └─ local/script.py (offline brain — VAD→STT→LLM→TTS) + ├─ gemini/client.py (short-session client for Typed Replay) + ├─ gemini/subprocess.py (spawns+supervises sanad_voice.py; + │ pushes camera frames + motion state + │ to the child over its stdin) + ├─ voice/movement_dispatch.py(Gemini spoken phrase → locomotion) + ├─ vision/camera.py (RealSense/USB capture daemon) + ├─ vision/face_gallery.py (data/faces/ CRUD for the primer turn) + ├─ vision/zone_gallery.py (data/zones/ places + "go here" targets) + ├─ motion/arm_controller.py (G1 arm DDS publisher — owns DDS init) + ├─ G1_Controller/loco_controller.py (G1 locomotion via LocoClient) + ├─ voice/audio_io.py (mic + speaker abstraction — 3 profiles) + └─ core/brain.py (skill dispatcher, event bus) +``` + +### Camera + face/place recognition data flow + +``` +CameraDaemon (parent, in-memory JPEG+b64 cache) + ├─→ dashboard /api/recognition/frame.jpg ── snapshot_jpeg() + └─→ GeminiSubprocess._frame_forwarder ── get_frame_b64() + │ "frame:\n" over stdin +ArmController ─emit→ event bus ─→ main.py ─→ live_sub.send_state() + │ "state:\n" over stdin + ▼ + gemini/script.py _stdin_watcher thread + ├─ frame: → _LATEST_FRAME → _send_frame_loop → + │ session.send_realtime_input(video=Blob) + └─ state: → _STATE_PENDING → _send_state_loop → + session.send_realtime_input(text=…) + +Recognition toggles (vision / face-rec / zone-rec / movement) are written by the +dashboard to data/.recognition_state.json and POLLED by the Gemini child at 1 Hz +— so flipping a toggle takes effect mid-session with NO restart. +``` + + +## Quick start (on the robot) + +```bash +conda activate gemini_sdk +cd ~/Sanad +python3 main.py +``` + +Then open `http://:8000` in a browser. (The dashboard binds to the +`wlan0` IP by default — see *Runtime selection* to override.) + +Fully-offline brain (no cloud): `SANAD_VOICE_BRAIN=local python3 main.py` +(requires `ollama serve` + the local model env — see *Voice brains*). + +> **Gemini API key — required, none ships with the repo.** The `api_key` +> fields in `config/core_config.json` (`gemini_defaults`) and +> `data/motions/config.json` (`gemini`) are intentionally empty (`""`). +> The voice loop cannot connect until you supply one, by any of: +> - **Dashboard** → *Voice & Audio → Gemini API Key* — paste + save, hot-swaps live (no restart). Persists to `data/motions/config.json`. +> - **Env var** — `export SANAD_GEMINI_API_KEY=AIza...` before `python3 main.py`. +> - **Config file** — set `gemini_defaults.api_key` in `config/core_config.json`. +> +> Precedence (highest first): `data/motions/config.json` → `SANAD_GEMINI_API_KEY` → `config/core_config.json`. Get a key at . + + +## Dashboard features + +### Operations +Quick-fire SDK + JSONL arm actions (chip buttons), gestural-speaking toggle. + +### Voice & Audio +- **Live Voice Commands** — fire arm gestures from the *user's* transcript + (wake-phrase → arm action). Master gate + Deferred-trigger toggle. +- **Live Gemini Process** — start/stop the voice conversation subprocess, tail + its log. Choose the Gemini cloud brain or the offline brain via + `SANAD_VOICE_BRAIN`. +- **Typed Replay** — Gemini reads typed text aloud (wrapped with a + "repeat verbatim" prompt); optionally records the clip. +- **Gemini API Key** — hot-swap the key without restart. +- **Wake Phrase Manager** — add/remove phrase → action bindings. +- **Audio Controls** — mic/speaker mute, G1 chest-speaker volume (DDS), device + profile selection, PulseAudio soft-reset and Anker USB hard-reset. + +### Motion & Replay +- **Motion Control** — list SDK (built-in) + JSONL (recorded) actions, select + + play. Cancel smoothly returns to `arm_home.jsonl`. +- **Replay Manager** — upload `.jsonl` files, test-play with speed, Teaching + Mode (kinesthetic record — limp the arm and hand-guide it). +- **Macro Recorder** — record a new audio+motion pair, OR pick any WAV + any + motion (SDK or JSONL) and play them in parallel. + +### Controller *(locomotion)* +Manual teleoperation of the G1's **legs** via the Unitree `LocoClient`. +**Disarmed every boot**; all motion writes require Arm first. +- **Move / Step** — continuous teleop (vx/vy/vyaw) or discrete one-shot steps. +- **Postures & FSM modes** — zero-torque, damp, squat, sit, stand, balance, + stand-height; prep/ready sequences; MotionSwitcher select-AI/release. +- **Gemini Movement** — toggle voice-driven walking: the `MovementDispatcher` + parses Gemini's *own spoken confirmation phrases* ("Turning right." / + "أستدير يميناً.") and drives the legs (gated on this toggle + an E-STOP latch). +- **E-STOP** — always available; `StopMove` + disarm + latch the dispatcher. + +> **Safety:** the arm and locomotion are **mutually exclusive** — +> `arm.set_motion_block(loco.movement_active)` makes every arm +> replay/gesture refuse while the robot is (or just was, within ~1.5 s) walking. + +### Recognition +Camera vision + Gemini-side **face** and **zone/place** recognition. All are +**off by default**; each is a **hot toggle** (≈1 s to take effect, no restart). +- **Camera Vision** — `CameraDaemon` captures from a RealSense (preferred) or + USB camera; the supervisor streams JPEG frames to Gemini Live so it can answer + "what do you see?". Live preview panel. Auto-reconnects on USB unplug/stall + and warns if a RealSense negotiated USB 2.0 (Marcus-ported resilience). +- **Face Recognition** — manage `data/faces/face_{id}/` galleries: enroll from + the live camera or upload photos, rename, describe, download (per-photo or + ZIP), delete. On session start (and on any gallery change) the child sends a + **primer turn** carrying every enrolled face + a Khaleeji greeting + instruction — **Gemini matches in-context, so there is no local + face-recognition model**. Recognition needs vision on. +- **Zones & Places** — `data/zones/zone_{zid}/place_{pid}/` two-level gallery: + reference photos per place, optional linked face_ids, and a **"go here"** nav + target (`nav_target_zone/place_id` in the recognition-state file) for + place-aware navigation. +- **Sync Gallery** — force-resend the face/zone primer to the live session. + +### Recordings +Skill Registry (predefined audio+motion+callback skills from `skills.json`) + +Saved Records (captured Gemini turn recordings; play/pause/stop/rename/delete). + +### Temperature +Live **3D motor-temperature heatmap** — a standalone three.js viewer +(`dashboard/static/temp3d/`) loads the G1 29-DoF URDF + STL meshes and colors +each joint blue→red from the arm controller's throttled `rt/lowstate` snapshot, +streamed over `/ws/motor-temps` at ~8 fps. No second DDS subscriber. + +### Terminal +In-browser **PTY shell** to the robot (`/ws/terminal`, xterm.js) — a `bash -i` +as the dashboard's user, with resize + backpressure, bounded to 4 sessions. +(See *Security* — this is full shell access to whoever reaches the URL.) + +### Settings & Logs +System info (host, network interfaces, DDS interface, bound dashboard host/port, +per-subsystem status, audio devices), live log stream (`/ws/logs`), per-file +tail, snapshot, and a one-blob "Copy All Logs" bundle. + + +## Directory layout + +| Path | Contents | +|---|---| +| `main.py` | Entry point — fault-isolated boot of all subsystems + the dashboard. Doubles as the service container (route handlers `import` its module globals). | +| `config.py` | Runtime constants + layout-agnostic path resolution; layers `data/motions/config.json` over the JSON config at import. | +| `config/` | Per-subsystem JSON: `core`, `voice`, `gemini`, `local`, `motion`, `dashboard`. | +| `core/` | `brain.py` (skill dispatcher), `event_bus.py`, `skill_registry.py`, `config_loader.py`, `logger.py` (rotating + WS push), `asyncio_compat.py` (3.8 `to_thread` shim). | +| `gemini/` | Gemini Live — `client.py` (one-shot), `script.py` (live brain: audio + video + motion-state), `subprocess.py` (supervisor + stdin frame/state push). | +| `local/` | Fully-offline brain — `vad.py` (Silero), `stt.py` (faster-whisper), `llm.py` (Qwen via Ollama/llama.cpp), `tts.py` (CosyVoice2), `script.py` (the brain), `subprocess.py` (supervisor). Opt-in via `SANAD_VOICE_BRAIN=local`. | +| `voice/` | `sanad_voice.py` (subprocess entry, model-agnostic), `audio_io.py` / `audio_manager.py` / `audio_devices.py` (mic/speaker), `local_tts.py` (SpeechT5 Arabic TTS), `live_voice_loop.py` (user-transcript → arm gesture), `movement_dispatch.py` (Gemini-phrase → locomotion), `typed_replay.py`, `wake_phrase_manager.py`, `text_utils.py` (Arabic normalization + phrase matching), `model_script.py` / `model_subprocess.py` (brain templates). | +| `motion/` | `arm_controller.py` (production 5-phase JSONL replay engine, owns the single DDS init), `macro_player.py`, `macro_recorder.py`, `teaching.py`. (`sanad_arm_controller.py` is a legacy alternate — not wired by `main.py`.) | +| `G1_Controller/` | `loco_controller.py` — locomotion via Unitree `LocoClient` (move/step/postures/FSM/E-STOP); reuses the arm's DDS participant. | +| `vision/` | `camera.py` (RealSense/USB daemon, auto-reconnect), `face_gallery.py`, `zone_gallery.py`, `recognition_state.py` (atomic-JSON toggle IPC). | +| `dashboard/` | `app.py` (FastAPI factory + fault-isolated router registration), `routes/*.py` (20 REST routers), `websockets/*.py` (logs, motor-temps, terminal), `static/index.html` (single-page UI), `static/temp3d/` (3D viewer). | +| `scripts/` | Persona files — `sanad_script.txt` (voice persona "Bousandah"), `sanad_rule.txt`, `sanad_arm.txt` (voice→arm phrases). | +| `data/` | Runtime state — `motions/*.jsonl` (arm trajectories) + `instruction.json` (locomotion phrase map) + `skills.json` + `config.json` (dashboard-editable), `recordings/` (captured turns + macros), `faces/face_{id}/` + `zones/zone_{zid}/place_{pid}/` (galleries), `audio/` (typed-replay WAVs + records index), `.recognition_state.json` (toggle IPC). | +| `model/` | Local SpeechT5 / Whisper / CosyVoice2 weights when using the offline pipeline. | +| `logs/` | Per-module rotating logs. | + + +## Voice brains + +The child `voice/sanad_voice.py` is model-agnostic and selects a brain via +`SANAD_VOICE_BRAIN`. Every brain implements the same contract +(`__init__(audio_io, recorder, voice, system_prompt)`, `async run()`, `stop()`) +and ships a sibling supervisor that spawns the child and parses its +`USER:` / `BOT:` / state log markers. + +| Value | Brain | Pipeline | +|---|---|---| +| `gemini` *(default)* | `gemini/script.py` | Gemini Live native-audio (full-duplex speech-to-speech, server-side VAD, vision frames, face/zone primers, voice→movement). Cloud. | +| `local` | `local/script.py` | Silero VAD → faster-whisper (large-v3-turbo, CUDA int8) → Qwen2.5 (Ollama/llama.cpp) → CosyVoice2 streaming TTS. Fully on-device. | +| `model` | `voice/model_script.py` | Template/stub for adding a new provider (OpenAI Realtime, Claude Voice, …). | + +To add a brain: drop a file in `voice/` or a new `/` folder and add a +branch to `voice/sanad_voice.py:_build_brain()`; ship a supervisor modeled on +`voice/model_subprocess.py`. + + +## Runtime selection (env vars) + +| Var | Values | Default | Effect | +|---|---|---|---| +| `SANAD_VOICE_BRAIN` | `gemini`, `local`, `model` | `gemini` | Which brain the subprocess loads (see `voice/sanad_voice.py:_build_brain`). | +| `SANAD_AUDIO_PROFILE` | `builtin`, `anker`, `hollyland_builtin` | `builtin` | Mic + speaker pair. `builtin` = G1 UDP mic + G1 chest speaker via DDS. | +| `SANAD_DDS_INTERFACE` | network iface | `eth0` | DDS network for G1 low-level comms (arm + locomotion + speaker). | +| `SANAD_DASHBOARD_HOST` / `_INTERFACE` | IP / iface | `wlan0` IP | Dashboard bind address. | +| `SANAD_GEMINI_API_KEY` | string | `""` (empty) | Gemini API key. No key ships in the repo — set this, paste one in the dashboard (**Voice & Audio → Gemini API Key**), or fill `gemini_defaults.api_key` in `config/core_config.json`. See [Quick start](#quick-start-on-the-robot). | +| `SANAD_GEMINI_MODEL` / `_VOICE` | string | reads config | Override the Gemini model id / prebuilt voice. | +| `SANAD_G1_VOLUME` | `0`–`100` | `100` | G1 chest-speaker volume; also scales the barge-in threshold. | +| `SANAD_LIVE_SCRIPT` | path | auto | Override the subprocess entry script path. | +| `SANAD_RECORD` | `0` or `1` | `1` | Record every Gemini turn to `data/recordings/`. | +| `SANAD_AEC_ENABLE` | `0` or `1` | `1` | Enable WebRTC AEC3 (if the Python binding is installed). | +| `SANAD_VISION_ENABLE` | `0` or `1` | `0` | Boot default for camera vision. **Runtime truth is the Recognition-tab toggle** → `data/.recognition_state.json`, hot-applied without a restart. | +| `SANAD_FACE_RECOGNITION_ENABLE` | `0` or `1` | `0` | Boot default for Gemini-side face recognition. Also a hot toggle. | +| `SANAD_VISION_SEND_HZ` | float | `2` | Frames/sec the Gemini child relays to Live. | +| `SANAD_CAMERA_WIDTH` / `_HEIGHT` / `_FPS` | int | `424` / `240` / `15` | Capture profile. Also settable per-deploy in `config/core_config.json > camera`. | +| `SANAD_CAMERA_USB_INDEX` | int | auto | Pin a `/dev/videoN` node (avoids picking a RealSense IR stream). | +| `SANAD_FACES_MAX_SAMPLES` | int | `3` | Max photos per person fed into the gallery primer turn (token budget). | +| `SANAD_PROJECT_ROOT` | path | auto | Override the project root (see *Dynamic paths*). | + +> All `SANAD_VISION_*` / `SANAD_CAMERA_*` / `SANAD_FACE_*` vars are **boot +> defaults** forwarded to the Gemini child via `LIVE_TUNE`. Once running, the +> Recognition tab's toggles (vision / face-rec / zone-rec / movement) are the +> live source of truth in `data/.recognition_state.json`, polled at 1 Hz. + +CLI flags: `python3 main.py --host --port 8000 --network `; +`--check-env` prints a subsystem/environment diagnostic and exits. + + +## API surface + +All routes are registered defensively — a router whose import fails is recorded +(`GET /api/_dashboard_status`) and the server still boots without it. + +**REST** (prefix → controls): `/api` health · `/api/system` info · +`/api/voice` Gemini/local generate+connect+key · `/api/motion` arm actions · +`/api/skills` skill registry · `/api/macros` record/play · `/api/replay` JSONL +CRUD + teaching · `/api/audio` mute/volume/devices/reset · `/api/scripts` +persona files · `/api/records` saved WAVs · `/api/prompt` system prompt · +`/api/wake-phrases` bindings · `/api/live-voice` arm-phrase dispatcher · +`/api/live-subprocess` Gemini child · `/api/typed-replay` TTS · `/api/recognition` +vision + face gallery · `/api/zones` zones/places + nav target · `/api/temp` +motor map + snapshot · `/api/controller` locomotion (move/step/postures/modes/ +E-STOP). + +**WebSockets**: `/ws/logs` (live log stream + 500-line replay) · +`/ws/motor-temps` (3D heatmap data, ~8 fps) · `/ws/terminal` (PTY shell). + + +## Architecture notes + +- **Subprocess isolation**: `voice/sanad_voice.py` runs as a child of `main.py` + via the supervisor. If the voice loop crashes, the dashboard + arm + legs stay + up. +- **Single DDS init**: `motion/arm_controller.py` owns the one + `ChannelFactoryInitialize`; `LocoController` and the audio routes reuse that + participant rather than re-initializing. +- **Brain contract**: see `voice/model_script.py` — any new model implements + `__init__(audio_io, recorder, voice, system_prompt)`, `async run()`, `stop()`. +- **Supervisor contract**: each brain ships a sibling supervisor (e.g. + `gemini/subprocess.py`) that spawns `sanad_voice.py` with its + `SANAD_VOICE_BRAIN` and parses the brain's log markers. Template: + `voice/model_subprocess.py`. +- **Locomotion safety**: `LocoController` is disarmed every boot, has velocity + caps + a `StopMove` watchdog, and is mutually exclusive with the arm. + Voice-driven movement is **off by default** and gated by the Controller + toggle. Distances/degrees in `data/motions/instruction.json` are + **approximate and must be calibrated on the real robot** — there is no + obstacle/abort stack. +- **Audio routing**: the G1's platform-sound PulseAudio sink is NOT wired to a + physical speaker. All dashboard-triggered playback (`play_wav`, typed-replay + audio, record playback) routes through DDS `AudioClient.PlayStream` via + `audio_manager._play_pcm_via_g1`. The PyAudio path is a desktop/dev fallback. +- **Arm replay**: `motion/arm_controller.py:_replay_file_inner()` is a port of + `G1_Lootah/Manual_Recorder/g1_replay_v4_stable.py:Run()` — ramp-in → settle + hold → playback → smooth return → disable SDK. Body motors (0–14) lock to a + live snapshot while arm motors (15–28) follow the file at 60 Hz. `_return_home()` + runs unconditionally after a cancel for a jerk-free return. +- **Camera frame transport (stdin push)**: the `CameraDaemon` lives in the + parent and caches frames in memory. `GeminiSubprocess` base64-encodes the + latest frame to the child's stdin (~2 fps); the child's `_stdin_watcher` + relays it to Gemini Live with a staleness guard. Chosen over a file drop so + the parent owns the camera once and the dashboard preview reads the same cache. +- **Motion-state channel**: `arm_controller._execute()` emits + `motion.action_started` / `_done` / `_error` on the event bus. `main.py` + forwards each to the child as `state:\n`, injected to Gemini Live as + silent `[STATE-START] wave_hand` / `[STATE-DONE] wave_hand (2.3s)` text so it + can honestly answer "what are you doing?". +- **Recognition is Gemini-side**: no dlib/insightface/onnxruntime. Galleries are + pure file IO; `gemini/script.py:_send_gallery_primer()` builds one multimodal + `send_client_content` turn — every enrolled face/place's photos + a greeting + instruction — and Gemini matches incoming frames against it in-context. + + +## Camera vision on Jetson + +The Recognition tab needs `pyrealsense2` to talk to the Intel RealSense. +**Do not `pip install pyrealsense2` on JetPack 5** — the PyPI wheel is built +against glibc 2.32+ (Ubuntu 22.04) and fails to load on JetPack 5's glibc +2.31 with `ImportError: ... version 'GLIBC_2.32' not found`. + +The native runtime is already there (`apt`-installed `librealsense2`). Build +just the Python binding from source against it, into the `gemini_sdk` env: + +```bash +rs-enumerate-devices # confirm the D435I shows up at OS level first + +source ~/miniconda3/etc/profile.d/conda.sh && conda activate gemini_sdk +pip uninstall -y pyrealsense2 # remove the broken wheel if present +sudo apt install -y cmake build-essential git python3-dev libusb-1.0-0-dev pkg-config libssl-dev + +cd /tmp && rm -rf librealsense +git clone --depth=1 --branch v2.56.5 https://github.com/IntelRealSense/librealsense.git +cd librealsense && mkdir -p build && cd build +cmake .. -DBUILD_PYTHON_BINDINGS=ON -DPYTHON_EXECUTABLE=$(which python3) \ + -DBUILD_EXAMPLES=OFF -DBUILD_GRAPHICAL_EXAMPLES=OFF \ + -DBUILD_UNIT_TESTS=OFF -DCHECK_FOR_UPDATES=OFF -DCMAKE_BUILD_TYPE=Release +make -j$(nproc) pyrealsense2 +SITE=$(python3 -c "import sysconfig; print(sysconfig.get_paths()['purelib'])") +mkdir -p "$SITE/pyrealsense2" +cp wrappers/python/pyrealsense2*.so "$SITE/pyrealsense2/" +cp ../wrappers/python/pyrealsense2/__init__.py "$SITE/pyrealsense2/" 2>/dev/null || true + +python3 -c 'import pyrealsense2 as rs; print([d.get_info(rs.camera_info.name) for d in rs.context().query_devices()])' +``` + +Match the `--branch` tag to the installed runtime (`dpkg -l | grep librealsense2`). +If the build isn't worth it, `CameraDaemon` falls back to `cv2.VideoCapture(0)` +automatically — fine for a plain USB webcam, but note a RealSense exposes its +*depth* stream at `/dev/video0`, not RGB, so a real USB cam is the cleaner +fallback (or pin `SANAD_CAMERA_USB_INDEX`). On x86_64 / Ubuntu 22.04+ desktops, +`pip install pyrealsense2` just works. + + +## Dynamic paths + +Every path is derived at runtime — no hard-coded `/home/...` anywhere. +Resolution order for `BASE_DIR` in `config.py`: + +1. `SANAD_PROJECT_ROOT` env var (if set). +2. `PROJECT_BASE + PROJECT_NAME` from a `.env` file in `Sanad/` or its parent. +3. `Path(__file__).resolve().parent` — auto-detected. + +The project runs unchanged from either layout: +- dev: `/Project/Sanad/` +- deployed: `/home/unitree/Sanad/` + + +## Deployment (workstation → robot) + +```bash +rsync -av --delete \ + --exclude=__pycache__ --exclude=logs --exclude=model --exclude=.git \ + /path/to/Sanad/ \ + unitree@192.168.123.164:/home/unitree/Sanad/ +``` + +Then on the robot: `Ctrl+C` the running `main.py` and re-run. + + +## Security + +The dashboard has **no authentication**. Anyone who can reach +`http://:8000` gets full robot control — locomotion, arm, audio, file +upload/delete — and, via the **Terminal tab**, an interactive shell as the +dashboard's user. Bind it to a **trusted LAN only**; add auth before any wider +exposure. + + +## Troubleshooting + +| Symptom | Fix | +|---|---| +| `No LowState received in 2s — refusing to replay` | `main.py` was re-executed as both `__main__` and `Project.Sanad.main`, creating two arm instances. Fix lives in the `sys.modules` alias near the top of `main.py`. Restart. | +| `G1ArmActionClient not available — skipping` for SDK actions | Same duplicate-init issue as above. | +| `No module named 'Project'` in subprocess | Bootstrap preamble in `voice/sanad_voice.py:~30` synthesises the `Project.Sanad` namespace when run as `__main__`. | +| Controller moves rejected (409) | The Controller is **disarmed by default** — hit Arm first. Reads + E-STOP are always allowed. | +| Arm action refused while "movement armed" | Arm ↔ locomotion are mutually exclusive. Disarm/stop locomotion, then trigger the arm. | +| Voice-driven walking does nothing | "Gemini Movement" toggle off, or E-STOP latched. Toggle on; clear E-STOP. Distances are uncalibrated. | +| Arm jumps at start of JSONL replay | `SETTLE_HOLD_SEC` (in `config/motion_config.json > arm_controller`) too low — try `0.7` or `1.0`. | +| Record playback silent | `audio_mgr.play_wav` only routes to G1 DDS if the Unitree SDK is importable; on desktop it falls back to the PulseAudio sink. | +| Live Voice Commands transcript stuck | Deferred trigger was queued but `trigger_enabled` toggle was off. Toggle on — or the pending-trigger poll fires it automatically once enabled. | +| Gemini "no audio" on Typed Replay | Non-deterministic; the retry chain in `voice/typed_replay.py:generate_audio` tries three prompt variants. For reliable TTS, use the offline `local_tts` SpeechT5 path. | +| Local brain exits immediately | `ollama serve` not running / model not pulled, or weights missing under `model/`. Check `logs/local_subprocess.log`. The Gemini brain is the safe default. | +| Recognition tab: "Camera could not start (no backend)" | No camera backend acquired. Check `rs-enumerate-devices` (RealSense at OS level) and `python3 -c 'import pyrealsense2'` in the `gemini_sdk` env. The glibc `ImportError` means the pip wheel is incompatible — see "Camera vision on Jetson" above. | +| Camera badge stuck on "reconnecting…" | `CameraDaemon` lost the device and is retrying with exponential backoff. Re-seat the USB 3 cable; check `logs/camera.log` for the USB-2.0 warning. | +| Gemini doesn't greet an enrolled face | Face Recognition toggle on? Vision on? (Face rec needs frames.) Check `logs/gemini_brain.log` for `face gallery primed: N person(s)`. Hit "Sync Gallery" to force a re-prime. | +| Gemini unaware of motion state | The `motion.action_*` → `send_state` chain only runs when Live Gemini is up. Check `logs/gemini_subprocess.log` and `logs/gemini_brain.log` for `STATE injected:` lines. | + + +## License / attribution + +Internal project for YS Lootah Technology. Reuses/ports patterns from: +- `G1_Lootah/Manual_Recorder/g1_replay_v4_stable.py` (arm replay math) +- `SanadVoice/gemini_interact` (arm-phrase dispatch, skill registry) +- `SanadVoice/gemini_voice_v2` (local SpeechT5 TTS) +- `Project/Marcus` — camera→Gemini stdin-push transport, motion-state + injection, camera daemon resilience (auto-reconnect, USB-2.0 warning), the + `API/camera_api.py` cache shape (`get_frame_b64` / `get_fresh_frame`), and the + confirmation-phrase → locomotion pattern (`movement_dispatch`). +- Unitree `unitree_sdk2py` (G1 low-level SDK, `LocoClient`, `G1ArmActionClient`, + `AudioClient.PlayStream`). diff --git a/vendor/Sanad/__init__.py b/vendor/Sanad/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/vendor/Sanad/config.py b/vendor/Sanad/config.py new file mode 100644 index 0000000..47375df --- /dev/null +++ b/vendor/Sanad/config.py @@ -0,0 +1,472 @@ +"""Centralized configuration for the Sanad robot assistant. + +Resolution order for BASE_DIR (highest priority first): + 1. SANAD_PROJECT_ROOT environment variable + 2. PROJECT_BASE + PROJECT_NAME from .env file (or env vars) + 3. Path(__file__).resolve().parent.parent (auto-detected from this file's location) + +Every other directory is derived from BASE_DIR — never hardcode an absolute path. +""" + +from __future__ import annotations + +import json +import os +from pathlib import Path +from typing import Any + + +def _read_env_file(env_path: Path) -> dict[str, str]: + """Minimal .env reader (no python-dotenv dependency).""" + out: dict[str, str] = {} + if not env_path.exists(): + return out + try: + for raw in env_path.read_text(encoding="utf-8").splitlines(): + line = raw.strip() + if not line or line.startswith("#") or "=" not in line: + continue + k, v = line.split("=", 1) + out[k.strip()] = v.strip().strip('"').strip("'") + except OSError: + pass + return out + + +def _resolve_base_dir() -> Path: + """Resolve the Sanad project root with override support.""" + # 1. Direct env override + override = os.environ.get("SANAD_PROJECT_ROOT", "").strip() + if override: + p = Path(override).expanduser().resolve() + if p.exists(): + return p + + # 2. PROJECT_BASE + PROJECT_NAME pattern + _here = Path(__file__).resolve().parent # Sanad/ + env_files = [ + _here / ".env", # Sanad/.env + _here.parent / ".env", # Project/.env + ] + for env_path in env_files: + env = _read_env_file(env_path) + base = env.get("PROJECT_BASE") or os.environ.get("PROJECT_BASE", "") + name = env.get("PROJECT_NAME") or os.environ.get("PROJECT_NAME", "") + if base and name: + candidate = Path(base).expanduser().resolve() / name + if candidate.exists(): + return candidate + + # 3. Auto-detect — this file lives at Sanad/config.py, so parent = Sanad/ + return _here + + +BASE_DIR = _resolve_base_dir() +DATA_DIR = BASE_DIR / "data" +LOGS_DIR = BASE_DIR / "logs" +SCRIPTS_DIR = BASE_DIR / "scripts" +MODEL_DIR = BASE_DIR / "model" + +# Audio recordings (typed-replay, etc.) live under data/audio +AUDIO_RECORDINGS_DIR = DATA_DIR / "audio" +# Motion macro recordings (paired with audio) live under data/recordings/motion +MOTION_RECORDINGS_DIR = DATA_DIR / "recordings" / "motion" +# Motion JSONL macros (auto-discovered as actions) +MOTIONS_DIR = DATA_DIR / "motions" + +SKILLS_FILE = MOTIONS_DIR / "skills.json" +CONFIG_FILE = MOTIONS_DIR / "config.json" + +# ─── Load baseline defaults from config/core_config.json ─── +# Single source of truth. Runtime overrides via: +# 1. env vars (SANAD_GEMINI_API_KEY, SANAD_GEMINI_MODEL, ...) +# 2. data/motions/config.json (dashboard-editable — see load_config()) +# 3. config/core_config.json (this file) +def _load_core_config() -> dict[str, Any]: + cfg_path = BASE_DIR / "config" / "core_config.json" + if not cfg_path.exists(): + return {} + try: + raw = json.loads(cfg_path.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError): + return {} + # Strip _comment / _description noise + return {k: v for k, v in raw.items() if not k.startswith("_")} + + +_CORE_CFG = _load_core_config() +_GEMINI = _CORE_CFG.get("gemini_defaults", {}) +_AUDIO = _CORE_CFG.get("audio_defaults", {}) + +# -- Gemini defaults (override via data/motions/config.json or env) -- +GEMINI_API_KEY = os.environ.get( + "SANAD_GEMINI_API_KEY", + _GEMINI.get("api_key", "")) +GEMINI_MODEL = os.environ.get( + "SANAD_GEMINI_MODEL", + "models/" + _GEMINI.get("model_live", "gemini-2.5-flash-native-audio-preview-12-2025")) +GEMINI_VOICE = os.environ.get( + "SANAD_GEMINI_VOICE", + _GEMINI.get("voice_name", "Charon")) +GEMINI_WS_URI = _GEMINI.get( + "model_ws_uri", + "wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent") +GEMINI_WS_TIMEOUT = _GEMINI.get("ws_timeout_sec", 30) + +# -- Audio defaults -- +SEND_SAMPLE_RATE = _AUDIO.get("send_sample_rate", 16000) +RECEIVE_SAMPLE_RATE = _AUDIO.get("receive_sample_rate", 24000) +CHUNK_SIZE = _AUDIO.get("chunk_size", 512) +CHANNELS = _AUDIO.get("channels", 1) + +# -- PulseAudio hardware IDs -- +SINK = _AUDIO.get("sink", "alsa_output.usb-Anker_PowerConf_A3321-DEV-SN1-01.analog-stereo") +SOURCE = _AUDIO.get("source", "alsa_input.usb-Anker_PowerConf_A3321-DEV-SN1-01.mono-fallback") +MONITOR_SOURCE = f"{SINK}.monitor" + +# -- Dashboard -- +# Default: bind to wlan0's IP (auto-detected at startup) so the dashboard is +# reachable on the wireless network. Falls back to 0.0.0.0 (all interfaces) +# if wlan0 isn't present. +# +# Resolution order (highest priority first): +# 1. SANAD_DASHBOARD_HOST env var (explicit IP or hostname) +# 2. SANAD_DASHBOARD_INTERFACE env var → that interface's IP +# 3. wlan0 interface IP (default) +# 4. 0.0.0.0 (bind to all) +# +# Override via --host CLI flag too. +DASHBOARD_INTERFACE = os.environ.get("SANAD_DASHBOARD_INTERFACE", "wlan0") + + +def _get_interface_ip(iface: str) -> str | None: + """Return the IPv4 address bound to `iface`, or None if not present. + + Tries multiple strategies in order — different Linux setups expose + interface info via different mechanisms. + """ + # Strategy 1: fcntl SIOCGIFADDR (fastest, no subprocess) + ip = _get_iface_ip_fcntl(iface) + if ip: + return ip + # Strategy 2: parse `ip -4 -o addr show ` (works on Ubuntu/Jetson) + ip = _get_iface_ip_via_ip_cmd(iface) + if ip: + return ip + # Strategy 3: parse `/proc/net/fib_trie` (last resort) + ip = _get_iface_ip_via_proc(iface) + if ip: + return ip + return None + + +def _get_iface_ip_fcntl(iface: str) -> str | None: + try: + import fcntl + import socket + import struct + s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + try: + ifname = iface[:15].encode("utf-8") + packed = fcntl.ioctl( + s.fileno(), + 0x8915, # SIOCGIFADDR + struct.pack("256s", ifname), + ) + return socket.inet_ntoa(packed[20:24]) + finally: + s.close() + except Exception: + return None + + +def _get_iface_ip_via_ip_cmd(iface: str) -> str | None: + try: + import subprocess + r = subprocess.run( + ["ip", "-4", "-o", "addr", "show", iface], + capture_output=True, text=True, timeout=2.0, + ) + if r.returncode != 0: + return None + # Output: "5: wlan0 inet 10.255.254.86/24 brd ..." + for line in r.stdout.splitlines(): + parts = line.split() + for i, p in enumerate(parts): + if p == "inet" and i + 1 < len(parts): + return parts[i + 1].split("/")[0] + except Exception: + return None + return None + + +def _get_iface_ip_via_proc(iface: str) -> str | None: + """Fallback: scrape /proc/net/fib_trie for an IP advertised on this iface. + + Less reliable than fcntl/ip cmd but doesn't need any external tooling. + """ + try: + import subprocess + # Try `hostname -I` as a final fallback (returns space-separated IPs) + r = subprocess.run(["hostname", "-I"], capture_output=True, text=True, timeout=1.0) + if r.returncode == 0: + ips = (r.stdout or "").strip().split() + # Return first non-loopback IPv4 + for ip in ips: + if "." in ip and not ip.startswith("127."): + return ip + except Exception: + return None + return None + + +def list_network_interfaces() -> list[dict]: + """Return [{name, ip, is_up}] for every interface on the box. + + Used by the dashboard's system-info panel. + """ + out: list[dict] = [] + try: + import socket + for idx, name in socket.if_nameindex(): + ip = _get_interface_ip(name) + out.append({ + "name": name, + "index": idx, + "ip": ip or "", + "is_up": ip is not None, + }) + except Exception: + pass + return out + + +def _resolve_dashboard_host() -> str: + """Resolve the host the dashboard should bind to. + + Order: + 1. SANAD_DASHBOARD_HOST env var (explicit IP/hostname) + 2. SANAD_DASHBOARD_INTERFACE → that interface's IP + 3. wlan0's IP (default) + 4. First non-loopback IP from `hostname -I` + 5. 0.0.0.0 (bind everywhere) + """ + explicit = os.environ.get("SANAD_DASHBOARD_HOST", "").strip() + if explicit: + return explicit + iface_ip = _get_interface_ip(DASHBOARD_INTERFACE) + if iface_ip: + return iface_ip + # Try `hostname -I` as a final non-loopback fallback + try: + import subprocess + r = subprocess.run(["hostname", "-I"], capture_output=True, text=True, timeout=1.0) + if r.returncode == 0: + for ip in (r.stdout or "").strip().split(): + if "." in ip and not ip.startswith("127."): + return ip + except Exception: + pass + return "0.0.0.0" + + +DASHBOARD_HOST = _resolve_dashboard_host() +# Canonical SanadV3 port (matches shell_scripts/start_all.sh + docs). The +# legacy Sanad ran on :8000; SanadV3 is :8001 to never collide with it. +DASHBOARD_PORT = 8001 + +# -- Local TTS -- +LOCAL_TTS_MODEL = "MBZUAI/speecht5_tts_clartts_ar" +LOCAL_TTS_MODEL_PATH = str(MODEL_DIR / "speecht5_tts_clartts_ar") +LOCAL_TTS_HIFIGAN_PATH = str(MODEL_DIR / "speecht5_hifigan") +LOCAL_TTS_XVECTOR_PATH = str(MODEL_DIR / "arabic_xvector_embedding.pt") + +# -- Motion -- +_G1 = _CORE_CFG.get("g1_hardware", {}) +REPLAY_HZ = _G1.get("replay_hz", 60.0) +G1_NUM_MOTOR = _G1.get("num_motor", 29) +ENABLE_ARM_SDK_INDEX = _G1.get("enable_arm_sdk_index", 29) +KP_HIGH = 300.0 +KD_HIGH = 3.0 +KP_LOW = 80.0 +KD_LOW = 3.0 +KP_WRIST = 40.0 +KD_WRIST = 1.5 +WEAK_MOTORS = {4, 10, 15, 16, 17, 18, 22, 23, 24, 25} +WRIST_MOTORS = {19, 20, 21, 26, 27, 28} + +# -- Live Gemini subprocess tuning -- +LIVE_TUNE: dict[str, str] = { + "SANAD_REQUIRED_LOUD_CHUNKS": "5", + "SANAD_PREBUFFER_CHUNKS": "3", + "SANAD_PLAYBACK_TIMEOUT": "0.25", + "SANAD_BARGE_IN_COOLDOWN": "1.0", + "SANAD_AI_SPEAK_GRACE": "0.5", + # ECHO_GUARD_SEC suppresses USER SAID log lines for this many seconds + # after the robot finishes a chunk. Previously 1.2 — caused a visible + # lag where "robot finished talking" was followed by silence in the + # log even though Gemini was transcribing the user's new speech + # immediately. Lowered to 0.3 to match typical room reverb tail; the + # real echo protection is the silence-during-speaking gate, not this. + "SANAD_ECHO_GUARD_SEC": "0.3", + "SANAD_SPEAKING_ENERGY_GATE": "0.90", + "SANAD_CALIBRATION_CHUNKS": "30", + "SANAD_THRESHOLD_MULTIPLIER": "4.0", + # Base barge-in threshold calibrated at the REFERENCE volume (50%). + # At runtime, scaled QUADRATICALLY with actual G1 volume: + # scale = (actual_vol / ref_vol) ** 2 + # + # Physical reason: doubling digital speaker volume doubles sample + # amplitude, which means RECEIVED energy at the mic quadruples + # (energy ~ amplitude²). Linear scaling under-threshold echo at + # high volumes → caused "robot listening to himself" feedback. + # + # Measured on Hollyland + G1 speaker at 100% volume: + # echo peak (no user) up to ~15700 + # voice peak (user) 25000-32000+ (often saturates 32767) + # Safe threshold at 100% vol: ~18000, above echo / below voice. + # + # Working back with quadratic scale: base × (100/50)² = 18000 + # base × 4 = 18000 → base = 4500 at 50% ref volume. + "SANAD_MIN_THRESHOLD": "800", + "SANAD_PLAYBACK_BARGE_MIN": "2500", + "SANAD_PLAYBACK_BARGE_MULT": "1.5", + # Sustained-chunk requirement for barge-in. Balance: + # higher = fewer false triggers from echo bursts + # lower = quicker response to short commands ("stop", "توقف") + # Default 5 = ~160ms sustained voice. Real speech reliably + # sustains that long; single-chunk echo spikes don't. + "SANAD_PLAYBACK_REQUIRED_CHUNKS": "2", + "SANAD_SILENCE_AFTER_SPEECH": "1.2", + "SANAD_SPEECH_THRESHOLD": "300", + "SANAD_DDS_INTERFACE": os.environ.get("SANAD_DDS_INTERFACE", "eth0"), + # G1 built-in mic — UDP multicast 239.168.123.161:5555. + # Requires wake-up conversation mode ON in Unitree app. + "SANAD_USE_G1_MIC": "1", + + # ── Recognition (camera vision + face recognition) ── + # All of these are BOOT defaults. The runtime source of truth is the + # state file data/.recognition_state.json — toggled live from the + # Recognition tab and polled by the Gemini child at 1 Hz. + "SANAD_VISION_ENABLE": "0", + "SANAD_VISION_SEND_HZ": "2", + "SANAD_VISION_STALE_MS": "1500", + "SANAD_CAMERA_WIDTH": "424", + "SANAD_CAMERA_HEIGHT": "240", + "SANAD_CAMERA_FPS": "15", + "SANAD_CAMERA_JPEG_QUALITY": "70", + "SANAD_FACE_RECOGNITION_ENABLE": "0", + "SANAD_FACES_DIR": str(DATA_DIR / "faces"), + "SANAD_FACES_MAX_SAMPLES": "3", + "SANAD_FACES_PRIMER_RESIZE": "256", + "SANAD_RECOGNITION_STATE_PATH": str(DATA_DIR / ".recognition_state.json"), + "SANAD_RECOGNITION_POLL_S": "1.0", +} + +# -- Camera -- +CAMERA_SERVICE_PORT = 8091 +DIRECT_CAMERA_URL = f"http://127.0.0.1:{CAMERA_SERVICE_PORT}" + +# -- Navigation (web_nav3 / rosbridge) -- +WEB_NAV3_URL = os.environ.get("WEB_NAV3_URL", "http://127.0.0.1:8765") +ROSBRIDGE_URL = os.environ.get("ROSBRIDGE_URL", "ws://127.0.0.1:9090") +NAV_ROBOT_NAME = os.environ.get("NAV_ROBOT_NAME", "sanad") + +# -- DDS / hardware -- +# Jetson G1 default is eth0 (the robot's internal network). +# Override with SANAD_DDS_INTERFACE=lo for desktop/sim development. +DDS_NETWORK_INTERFACE = os.environ.get("SANAD_DDS_INTERFACE", "eth0") + + +def _ensure_dirs() -> list[str]: + """Create runtime directories. Failures are collected, not raised. + + Returns the list of directories that failed to create — caller can decide + whether to log/abort. The module import never crashes due to a single + permission error on a single directory. + """ + failed: list[str] = [] + for d in (DATA_DIR, LOGS_DIR, SCRIPTS_DIR, AUDIO_RECORDINGS_DIR, + MOTION_RECORDINGS_DIR, MOTIONS_DIR): + try: + d.mkdir(parents=True, exist_ok=True) + except OSError: + failed.append(str(d)) + return failed + + +# Best-effort: create dirs at import. Ignore failures here — individual +# subsystems will handle missing dirs at usage time and isolation prevents +# cascading import failures. +_DIRS_FAILED = _ensure_dirs() + + +def load_config() -> dict[str, Any]: + """Load runtime config overrides from CONFIG_FILE (if present).""" + if CONFIG_FILE.exists(): + try: + with open(CONFIG_FILE, "r", encoding="utf-8") as f: + return json.load(f) + except (json.JSONDecodeError, OSError): + return {} + return {} + + +def save_config(cfg: dict[str, Any]): + CONFIG_FILE.parent.mkdir(parents=True, exist_ok=True) + import os, tempfile + fd, tmp = tempfile.mkstemp( + prefix=f".{CONFIG_FILE.name}.", suffix=".tmp", + dir=str(CONFIG_FILE.parent), + ) + try: + with os.fdopen(fd, "w", encoding="utf-8") as f: + json.dump(cfg, f, ensure_ascii=False, indent=2) + os.replace(tmp, CONFIG_FILE) + except Exception: + try: + os.unlink(tmp) + except OSError: + pass + raise + + +# Apply config.json overrides on top of module constants (was previously dead code). +def _apply_overrides(): + cfg = load_config() + if not cfg: + return + g = globals() + gemini = cfg.get("gemini", {}) + if isinstance(gemini, dict): + if "api_key" in gemini and gemini["api_key"]: + g["GEMINI_API_KEY"] = gemini["api_key"] + if "model" in gemini: + g["GEMINI_MODEL"] = gemini["model"] + if "voice" in gemini: + g["GEMINI_VOICE"] = gemini["voice"] + audio = cfg.get("audio", {}) + if isinstance(audio, dict): + if "send_sample_rate" in audio: + g["SEND_SAMPLE_RATE"] = int(audio["send_sample_rate"]) + if "receive_sample_rate" in audio: + g["RECEIVE_SAMPLE_RATE"] = int(audio["receive_sample_rate"]) + if "chunk_size" in audio: + g["CHUNK_SIZE"] = int(audio["chunk_size"]) + if "sink" in audio: + g["SINK"] = audio["sink"] + if "source" in audio: + g["SOURCE"] = audio["source"] + dashboard = cfg.get("dashboard", {}) + if isinstance(dashboard, dict): + if "host" in dashboard: + g["DASHBOARD_HOST"] = dashboard["host"] + if "port" in dashboard: + g["DASHBOARD_PORT"] = int(dashboard["port"]) + + +try: + _apply_overrides() +except Exception: + # Never let a malformed config.json kill module import. + pass diff --git a/vendor/Sanad/config/core_config.json b/vendor/Sanad/config/core_config.json new file mode 100644 index 0000000..9c56a21 --- /dev/null +++ b/vendor/Sanad/config/core_config.json @@ -0,0 +1,101 @@ +{ + "_description": "Tunables for core/* modules. Loaded via core.config_loader.load('core').", + + "brain": { + "allowed_callback_prefixes": [ + "Project.Sanad.motion.", + "Project.Sanad.voice.", + "motion.", + "voice." + ], + "gestural_speaking_default": false + }, + + "logger": { + "log_level": "INFO", + "format": "%(asctime)s [%(name)s] %(levelname)-7s %(message)s", + "datefmt": "%Y-%m-%d %H:%M:%S", + "file_max_bytes": 10485760, + "file_backup_count": 7 + }, + + "event_bus": { + "emit_timeout_sec": 0.5 + }, + + "paths": { + "_comment": "Path roots — resolved against BASE_DIR in core/config.py", + "data": "data", + "logs": "logs", + "scripts": "scripts", + "model": "model", + "audio_recordings": "data/audio", + "motion_recordings": "data/recordings/motion", + "motions": "data/motions" + }, + + "gemini_defaults": { + "_comment": "Baseline Gemini API config — SINGLE SOURCE OF TRUTH. All voice modules read from here.", + "api_key": "", + "model_live": "gemini-2.5-flash-native-audio-preview-12-2025", + "model_ws_uri": "wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent", + "voice_name": "Charon", + "ws_timeout_sec": 30, + "default_system_prompt": "You are Bousandah, a wise and friendly Emirati assistant. Speak strictly in the UAE dialect (Khaleeji). Be helpful, concise, and use local greetings like 'Marhaba' and 'Ya Khoy'." + }, + + "g1_hardware": { + "_comment": "G1 humanoid hardware constants — shared by every motion/voice module that talks to the arm.", + "num_motor": 29, + "enable_arm_sdk_index": 29, + "replay_hz": 60.0 + }, + + "script_files": { + "_comment": "Filenames (under scripts/) used across voice + dashboard", + "persona": "sanad_script.txt", + "rules": "sanad_rule.txt", + "arm_phrases": "sanad_arm.txt" + }, + + "dashboard_defaults": { + "host": null, + "port": 8000, + "interface": "wlan0" + }, + + "audio_defaults": { + "_comment": "Host PulseAudio fallback only — the G1 deployment uses UDP multicast mic + AudioClient.PlayStream speaker (see SANAD_USE_G1_MIC in config.py LIVE_TUNE). Default here is the Jetson/G1 built-in platform-sound chip.", + "send_sample_rate": 16000, + "receive_sample_rate": 24000, + "chunk_size": 512, + "channels": 1, + "sink": "alsa_output.platform-sound.analog-stereo", + "source": "alsa_input.platform-sound.analog-stereo" + }, + + "dds": { + "network_interface_default": "eth0" + }, + + "camera": { + "_comment": "Recognition tab camera daemon (parent process reads this). width/height/fps/jpeg_quality + the reconnect knobs configure CameraDaemon. Frames are cached in memory and pushed to the Gemini child over its stdin (no file drop). send_hz/stale_ms are read by the Gemini child via SANAD_VISION_SEND_HZ / SANAD_VISION_STALE_MS env vars (LIVE_TUNE).", + "width": 424, + "height": 240, + "fps": 15, + "jpeg_quality": 70, + "send_hz": 2, + "stale_ms": 1500, + "stale_threshold_s": 10.0, + "reconnect_min_s": 2.0, + "reconnect_max_s": 10.0, + "capture_timeout_ms": 5000 + }, + + "faces": { + "_comment": "Face gallery for Gemini-side recognition. Folder layout: data/faces/face_{id}/{face_1.jpg, ...} + optional meta.json {\"name\": \"...\"}. Gemini does the matching — no local ML model.", + "dir_rel": "data/faces", + "max_samples_per_face": 3, + "primer_resize_long_side": 256 + } +} diff --git a/vendor/Sanad/config/dashboard_config.json b/vendor/Sanad/config/dashboard_config.json new file mode 100644 index 0000000..e11ecdc --- /dev/null +++ b/vendor/Sanad/config/dashboard_config.json @@ -0,0 +1,49 @@ +{ + "_description": "Tunables for dashboard/* modules. Loaded via core.config_loader.load('dashboard').", + + "app": { + "_comment": "dashboard/app.py — FastAPI app", + "title": "Sanad Dashboard", + "version": "1.0.0", + "static_subdir": "dashboard/static" + }, + + "api_input": { + "_comment": "Shared by every route that accepts user text input / uploads. Single source of truth.", + "max_text_len": 2000, + "max_upload_bytes": 8388608 + }, + + "voice_route": { + "_comment": "dashboard/routes/voice.py — reads max_text_len from api_input above", + "api_key_mask_visible": 4 + }, + + "typed_replay_route": { + "_comment": "dashboard/routes/typed_replay.py — reads max_text_len from api_input above" + }, + + "records_route": { + "_comment": "dashboard/routes/records.py", + "index_filename": "records.json" + }, + + "prompt_route": { + "_comment": "dashboard/routes/prompt.py — script/rule filenames come from core.script_files; default prompt from core.gemini_defaults.default_system_prompt" + }, + + "logs_route": { + "_comment": "dashboard/routes/logs.py", + "default_tail_lines": 200, + "max_tail_lines": 5000 + }, + + "scripts_route": { + "_comment": "dashboard/routes/scripts.py — max_script_bytes reads from api_input.max_upload_bytes" + }, + + "live_subprocess_route": { + "_comment": "dashboard/routes/live_subprocess.py", + "tail_default_lines": 100 + } +} diff --git a/vendor/Sanad/config/gemini_config.json b/vendor/Sanad/config/gemini_config.json new file mode 100644 index 0000000..39c84ca --- /dev/null +++ b/vendor/Sanad/config/gemini_config.json @@ -0,0 +1,35 @@ +{ + "_description": "Tunables for gemini/* modules. Loaded via core.config_loader.load('gemini'). API credentials (api_key, model, voice_name) still live in core_config.json > gemini_defaults — single source of truth shared with config.py.", + + "client": { + "_comment": "gemini/client.py — short-session WebSocket client used by dashboard /generate + typed replay. default_system_prompt comes from core.gemini_defaults.", + "recv_timeout_sec": 30, + "reconnect_max_attempts": 3, + "reconnect_initial_delay_sec": 1.0, + "reconnect_max_delay_sec": 10.0 + }, + + "subprocess": { + "_comment": "gemini/subprocess.py — GeminiSubprocess supervisor. Spawns voice/sanad_voice.py as a child, tails stdout for Gemini-specific log markers, pushes camera frames + motion state to the child over its stdin, exposes transcript + state to the dashboard.", + "log_tail_size": 2000, + "transcript_tail_size": 30, + "log_name": "gemini_subprocess", + "stop_timeout_sec": 3.0, + "terminate_timeout_sec": 2.0, + "frame_forward_interval_sec": 0.5, + "noisy_prefixes": [ + "ALSA lib ", + "Expression 'alsa_", + "Cannot connect to server socket", + "jack server is not running" + ], + "noisy_fragments": [ + "Unknown PCM", + "Evaluate error", + "snd_pcm_open_noupdate", + "PaAlsaStream", + "snd_config_evaluate", + "snd_func_refer" + ] + } +} diff --git a/vendor/Sanad/config/local_config.json b/vendor/Sanad/config/local_config.json new file mode 100644 index 0000000..c542d7d --- /dev/null +++ b/vendor/Sanad/config/local_config.json @@ -0,0 +1,92 @@ +{ + "_description": "Tunables for local/* — fully on-device voice pipeline (Silero VAD → Whisper → Qwen via llama.cpp → CosyVoice2). Loaded via core.config_loader.load('local').", + + "subprocess": { + "_comment": "local/subprocess.py — LocalSubprocess supervisor. Mirrors gemini/subprocess.py. IMPORTANT: python_bin points at the `local` conda env (Python 3.8 + Jetson CUDA torch) so CosyVoice+Whisper run with GPU, while the dashboard/Gemini stack stays in gemini_sdk (Python 3.10).", + "python_bin": "/home/unitree/miniconda3/envs/local/bin/python", + "log_tail_size": 2000, + "transcript_tail_size": 30, + "log_name": "local_subprocess", + "stop_timeout_sec": 5.0, + "terminate_timeout_sec": 3.0, + "noisy_prefixes": [ + "ALSA lib ", + "Expression 'alsa_", + "Cannot connect to server socket", + "jack server is not running" + ], + "noisy_fragments": [ + "Unknown PCM", + "Evaluate error", + "snd_pcm_open_noupdate", + "PaAlsaStream" + ] + }, + + "vad": { + "_comment": "Silero VAD — CPU. Emits speech_start / speech_end events.", + "sample_rate": 16000, + "frame_ms": 32, + "threshold": 0.55, + "min_silence_ms": 400, + "min_speech_ms": 250, + "pad_start_ms": 200, + "pad_end_ms": 200, + "device": "cpu" + }, + + "stt": { + "_comment": "faster-whisper Large V3 Turbo, INT8 on GPU.", + "model_name": "large-v3-turbo", + "model_subdir": "faster-whisper-large-v3-turbo", + "device": "cuda", + "compute_type": "int8_float16", + "beam_size": 1, + "language": null, + "vad_filter": false, + "no_speech_threshold": 0.6, + "min_utterance_chars": 2, + "temperature": 0.0 + }, + + "llm": { + "_comment": "Qwen 2.5 Instruct via Ollama (default) OR self-managed llama.cpp. Set backend to pick.", + "backend": "ollama", + + "_ollama_comment": "Ollama daemon — assumes `ollama serve` is running; `ollama pull qwen2.5:1.5b` to fetch.", + "ollama_host": "127.0.0.1", + "ollama_port": 11434, + "ollama_model": "qwen2.5:1.5b", + "ollama_keep_alive": "5m", + + "_llamacpp_comment": "Self-managed llama-server subprocess. Only used when backend='llama_cpp'.", + "model_subdir": "qwen2.5-1.5b-instruct-q4_k_m.gguf", + "server_binary": "llama-server", + "host": "127.0.0.1", + "port": 8080, + "n_gpu_layers": 99, + "ctx_size": 2048, + "threads": 4, + "startup_timeout_sec": 30, + + "_shared_comment": "Generation params — both backends.", + "request_timeout_sec": 30, + "max_tokens": 200, + "temperature": 0.7, + "top_p": 0.9, + "stop": ["<|im_end|>", "\n\n\n"], + "chunk_delimiters": ".,?!؟،", + "chunk_min_chars": 8 + }, + + "tts": { + "_comment": "CosyVoice2 0.5B streaming — GPU. Uses a 3s reference WAV for voice cloning.", + "model_subdir": "CosyVoice2-0.5B", + "reference_wav_subdir": "khaleeji_reference_3s.wav", + "reference_prompt": "", + "stream_chunk_sec": 0.25, + "sample_rate": 16000, + "queue_max": 3, + "device": "cuda" + } +} diff --git a/vendor/Sanad/config/mask_config.json b/vendor/Sanad/config/mask_config.json new file mode 100644 index 0000000..fb3c488 --- /dev/null +++ b/vendor/Sanad/config/mask_config.json @@ -0,0 +1,27 @@ +{ + "_comment": "Shining LED face mask (BLE). Driven by the FaceController subsystem (face/mask_face.py) which imports the standalone Mask project. Needs an env with bleak + Pillow (g1_env). Free the mask from the phone app before connecting.", + "mask_dir": "", + "_mask_dir": "Path to the Mask project (flat shiningmask lib). Empty -> auto: /Mask. Env override: SANAD_MASK_DIR.", + "name_prefix": "MASK", + "_name_prefix": "BLE scan prefix; the mask advertises e.g. 'MASK-02A711'. Env: SANAD_MASK_NAME_PREFIX.", + "address": "", + "_address": "Specific BLE MAC to connect to. Empty -> scan by name_prefix. Env: SANAD_MASK_ADDRESS.", + "adapter": "", + "_adapter": "BlueZ adapter (e.g. 'hci0'). Empty -> default. Env: SANAD_MASK_ADAPTER.", + "brightness": 95, + "_brightness": "0-128. Keep <=100 to avoid LED flicker (battery-limited).", + "fps": 8.0, + "_fps": "FaceAnimator (fallback driver) frame rate (PLAY commands/sec).", + "lifelike": true, + "_lifelike": "Use the LifelikeFace driver (face/face_motion.py): eye saccades, varied blinks, listening/thinking/speaking states, reactions, smooth lip-sync. false -> basic FaceAnimator.", + "autostart": true, + "_autostart": "Auto-connect + Start face on boot (best-effort, background — never blocks startup). After the one-time frame upload, later boots just connect + animate. false -> connect/start manually from the dashboard.", + "connect_timeout": 15.0, + "connect_attempts": 5, + "eye_color": [0, 230, 255], + "_eye_color": "Face eye/iris RGB (baked into the uploaded frames). Default cyan. Set via the dashboard 'Apply colors' (persisted here).", + "mouth_color": [255, 50, 50], + "_mouth_color": "Face mouth RGB. Default red.", + "sclera_color": [255, 255, 255], + "_sclera_color": "White-of-the-eye RGB. Default white." +} diff --git a/vendor/Sanad/config/motion_config.json b/vendor/Sanad/config/motion_config.json new file mode 100644 index 0000000..9070df1 --- /dev/null +++ b/vendor/Sanad/config/motion_config.json @@ -0,0 +1,70 @@ +{ + "_description": "Tunables for motion/* modules. Loaded via core.config_loader.load('motion').", + + "arm_controller": { + "_comment": "motion/arm_controller.py — enable_arm_sdk_index + replay_hz come from core.g1_hardware", + "ramp_in_steps": 60, + "ramp_out_steps": 180, + "settle_hold_sec": 0.5, + "watchdog_timeout_sec": 0.25, + "watchdog_disable_after_sec": 1.0, + "arm_indices_start": 15, + "arm_indices_stop": 29, + "jsonl_id_start": 100 + }, + + "loco_controller": { + "_comment": "G1_Controller/loco_controller.py — manual locomotion. NIC is shared from the arm's DDS init (config core.dds / SANAD_DDS_INTERFACE), not set here.", + "cap_walk": 0.6, + "cap_run": 1.2, + "lin_step": 0.05, + "ang_step": 0.2, + "watchdog_timeout_sec": 0.5, + "arm_block_window_sec": 1.5, + "step_duration_sec": 0.6, + "step_speed_frac": 0.5, + "loco_timeout_sec": 10.0, + "msc_timeout_sec": 5.0 + }, + + "macro_player": { + "_comment": "motion/macro_player.py — JSONL playback", + "ramp_in_steps": 60, + "ramp_out_steps": 60, + "watchdog_disable_after_sec": 1.0 + }, + + "macro_recorder": { + "_comment": "motion/macro_recorder.py — record arm trajectories", + "sample_rate_hz": 60.0, + "smoothing_window": 5 + }, + + "teaching": { + "_comment": "motion/teaching.py — teach-by-demo", + "safe_hold_sec": 3.0, + "waist_kp": 60.0, + "waist_kd": 4.0, + "hold_arm_kp": 60.0, + "hold_arm_kd": 4.0, + "teach_arm_kp": 0.0, + "teach_arm_kd": 2.0 + }, + + "sanad_arm_controller": { + "_comment": "motion/sanad_arm_controller.py — g1_num_motor + enable_arm_sdk_index + replay_hz come from core.g1_hardware", + "action_cooldown_sec": 1.0, + "stability_threshold": 0.06, + "gains": { + "kp_high": 300.0, + "kd_high": 3.0, + "kp_low": 80.0, + "kd_low": 3.0, + "kp_wrist": 40.0, + "kd_wrist": 1.5 + }, + "weak_motors": [4, 10, 15, 16, 17, 18, 22, 23, 24, 25], + "wrist_motors": [19, 20, 21, 26, 27, 28], + "data_subdir": "DataG1" + } +} diff --git a/vendor/Sanad/config/voice_config.json b/vendor/Sanad/config/voice_config.json new file mode 100644 index 0000000..00e151d --- /dev/null +++ b/vendor/Sanad/config/voice_config.json @@ -0,0 +1,75 @@ +{ + "_description": "Tunables for voice/* modules. Loaded via core.config_loader.load('voice').", + + "sanad_voice": { + "_comment": "voice/sanad_voice.py — main live voice subprocess. Gemini API credentials (api_key, model, voice_name) come from core_config.json's gemini_defaults — single source of truth.", + "mic_gain": 1.0, + "play_chunk_bytes": 96000, + "log_dir": "~/logs", + "log_name": "gemini_live_v2", + "session_timeout_sec": 660, + "max_reconnect_delay_sec": 30, + "max_consecutive_errors": 10, + "no_messages_timeout_sec": 30 + }, + + "mic_udp": { + "_comment": "G1 built-in mic — UDP multicast subscriber", + "group": "239.168.123.161", + "port": 5555, + "buffer_max_bytes": 64000, + "read_timeout_sec": 0.04, + "socket_timeout_sec": 1.0 + }, + + "speaker": { + "_comment": "G1 built-in speaker — AudioClient.PlayStream wrapper", + "app_name": "sanad", + "begin_stream_pause_sec": 0.15, + "wait_finish_margin_sec": 0.3 + }, + + "vad": { + "_comment": "Gemini Live server-side voice-activity-detection config", + "start_sensitivity": "START_SENSITIVITY_HIGH", + "end_sensitivity": "END_SENSITIVITY_LOW", + "prefix_padding_ms": 20, + "silence_duration_ms": 200 + }, + + "barge_in": { + "threshold": 500, + "loud_chunks_needed": 3, + "cooldown_sec": 0.3, + "echo_suppress_below": 500, + "ai_speak_grace_sec": 0.15 + }, + + "recording": { + "enabled": true, + "dir_relative": "data/recordings" + }, + + "typed_replay": { + "_comment": "voice/typed_replay.py — max_text_len comes from dashboard.api_input", + "monitor_chunk_size": 512, + "monitor_tail_sec": 0.2 + }, + + "live_voice_loop": { + "_comment": "voice/live_voice_loop.py — arm phrase dispatcher. arm_txt filename comes from core.script_files.arm_phrases", + "trigger_log_size": 100, + "poll_interval_sec": 0.1, + "deferred_default": false, + "trigger_enabled_default": false + }, + + "local_tts": { + "_comment": "voice/local_tts.py — offline Coqui TTS", + "model_subdir": "speecht5_tts_clartts_ar", + "vocoder_subdir": "speecht5_hifigan", + "xvector_filename": "arabic_xvector_embedding.pt", + "sample_rate": 16000, + "channels": 1 + } +} diff --git a/vendor/Sanad/core/__init__.py b/vendor/Sanad/core/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/vendor/Sanad/core/asyncio_compat.py b/vendor/Sanad/core/asyncio_compat.py new file mode 100644 index 0000000..d4debaf --- /dev/null +++ b/vendor/Sanad/core/asyncio_compat.py @@ -0,0 +1,33 @@ +"""asyncio compatibility shim for Python 3.8. + +`asyncio.to_thread` only exists from Python 3.9. The Jetson runs 3.8, so we +backfill it via run_in_executor on the default thread pool. + +Usage: + from Project.Sanad.core.asyncio_compat import to_thread + result = await to_thread(blocking_fn, arg1, arg2, kw=val) +""" + +from __future__ import annotations + +import asyncio +import functools +import sys +from typing import Any, Callable, TypeVar + +_T = TypeVar("_T") + +if sys.version_info >= (3, 9): + # Native implementation + to_thread = asyncio.to_thread # type: ignore[attr-defined] +else: + async def to_thread(func: Callable[..., _T], /, *args: Any, **kwargs: Any) -> _T: + """Backport of asyncio.to_thread for Python 3.8.""" + loop = asyncio.get_event_loop() + ctx = functools.partial(func, *args, **kwargs) + return await loop.run_in_executor(None, ctx) + +# Also patch the asyncio module so existing `asyncio.to_thread` calls work +# without rewriting every consumer file. Done lazily — only if missing. +if not hasattr(asyncio, "to_thread"): + asyncio.to_thread = to_thread # type: ignore[attr-defined] diff --git a/vendor/Sanad/core/brain.py b/vendor/Sanad/core/brain.py new file mode 100644 index 0000000..d4fdc90 --- /dev/null +++ b/vendor/Sanad/core/brain.py @@ -0,0 +1,272 @@ +"""The Brain — central orchestrator for the Sanad robot assistant. + +Responsibilities: + 1. Owns the SkillRegistry, resolves callbacks at runtime. + 2. Coordinates voice → motion → vision pipelines. + 3. Executes skills (audio + motion + callback) with configurable sync modes. + 4. Exposes a thread-safe API consumed by the FastAPI dashboard. +""" + +from __future__ import annotations + +import asyncio +import importlib +import time +from pathlib import Path +from typing import Any, Callable + +from Project.Sanad.config import ( + AUDIO_RECORDINGS_DIR, + MOTIONS_DIR, + MOTION_RECORDINGS_DIR, +) +from Project.Sanad.core.event_bus import bus +from Project.Sanad.core.logger import get_logger +from Project.Sanad.core.skill_registry import Skill, SkillRegistry + +log = get_logger("brain") + +# Whitelist of module path prefixes allowed for skill callbacks. +# Prevents arbitrary code execution via dashboard-editable skills.json. +from Project.Sanad.core.config_loader import section as _cfg_section +_BRAIN_CFG = _cfg_section("core", "brain") +ALLOWED_CALLBACK_PREFIXES = tuple(_BRAIN_CFG.get("allowed_callback_prefixes", [ + "Project.Sanad.motion.", + "Project.Sanad.voice.", + "motion.", + "voice.", +])) + + +class Brain: + """Singleton-style manager that bridges all subsystems.""" + + def __init__(self): + self.registry = SkillRegistry() + self._lock = asyncio.Lock() + + # Sub-modules are injected after construction so imports stay lazy. + self._voice = None # gemini.client.GeminiVoiceClient + self._audio_mgr = None # voice.audio_manager.AudioManager + self._arm = None # motion.arm_controller.ArmController + self._macro_rec = None # motion.macro_recorder.MacroRecorder + self._macro_play = None # motion.macro_player.MacroPlayer + self._live_voice = None # voice.live_voice_loop.LiveVoiceLoop + + self.gestural_speaking = False # toggle: move while Gemini speaks + self._running_skill: str | None = None + + # -- dependency injection -- + + def attach_voice(self, client): + self._voice = client + log.info("Voice client attached") + + def attach_audio_manager(self, mgr): + self._audio_mgr = mgr + log.info("Audio manager attached") + + def attach_arm(self, arm): + self._arm = arm + log.info("Arm controller attached") + + def attach_macro_recorder(self, rec): + self._macro_rec = rec + + def attach_macro_player(self, player): + self._macro_play = player + + def attach_live_voice(self, lv): + self._live_voice = lv + log.info("LiveVoiceLoop attached") + + # -- callback resolution -- + + def _resolve_callback(self, callback_str: str) -> Callable | None: + """Resolve 'module.submodule:function_name' → callable. + + SECURITY: only modules under ALLOWED_CALLBACK_PREFIXES may be imported. + Skill JSON is dashboard-editable and otherwise an arbitrary-import RCE. + + Examples: + "Project.Sanad.motion.arm_controller:wave_hand" + "motion.arm_controller:wave_hand" + """ + if not callback_str: + return None + if ":" not in callback_str: + log.error("Invalid callback (missing ':'): %s", callback_str) + return None + module_path, func_name = callback_str.rsplit(":", 1) + if not any(module_path.startswith(prefix) or module_path == prefix.rstrip(".") + for prefix in ALLOWED_CALLBACK_PREFIXES): + log.error( + "Callback %s rejected — module '%s' not in whitelist", + callback_str, module_path, + ) + return None + try: + mod = importlib.import_module(module_path) + return getattr(mod, func_name) + except Exception: + log.exception("Cannot resolve callback '%s'", callback_str) + return None + + # -- skill execution -- + + async def execute_skill(self, skill_id: str) -> dict[str, Any]: + """Run a skill: play audio + execute motion + fire callback.""" + skill = self.registry.get(skill_id) + if skill is None: + raise KeyError(f"Skill not found: {skill_id}") + if not skill.enabled: + raise RuntimeError(f"Skill '{skill_id}' is disabled.") + + async with self._lock: + if self._running_skill: + raise RuntimeError(f"Skill '{self._running_skill}' is already running.") + self._running_skill = skill_id + + t0 = time.monotonic() + result: dict[str, Any] = {"skill_id": skill_id, "ok": True} + + try: + await bus.emit("skill.started", skill_id=skill_id) + + # Validate required attachments before partial execution + if skill.audio_file and self._audio_mgr is None: + raise RuntimeError("AudioManager not attached but skill requires audio") + if skill.motion_file and self._arm is None: + raise RuntimeError("ArmController not attached but skill requires motion") + + if skill.sync_mode == "parallel": + await self._exec_parallel(skill, result) + elif skill.sync_mode == "audio_first": + await self._exec_audio_first(skill, result) + elif skill.sync_mode == "motion_first": + await self._exec_motion_first(skill, result) + else: + await self._exec_parallel(skill, result) + + # Fire callback — run blocking callbacks in a thread to avoid stalling the loop + cb = self._resolve_callback(skill.callback) + if cb is not None: + if asyncio.iscoroutinefunction(cb): + cb_result = await cb() + else: + cb_result = await asyncio.to_thread(cb) + result["callback_result"] = str(cb_result) if cb_result else "ok" + + except Exception as exc: + result["ok"] = False + result["error"] = str(exc) + log.exception("Skill %s failed", skill_id) + finally: + elapsed = time.monotonic() - t0 + result["elapsed_sec"] = round(elapsed, 3) + async with self._lock: + self._running_skill = None + await bus.emit("skill.finished", skill_id=skill_id, result=result) + + return result + + async def cancel_skill(self) -> dict[str, Any]: + """Cancel any running skill — sends cancel to arm controller.""" + cancelled = self._running_skill + if self._arm is not None and hasattr(self._arm, "cancel"): + try: + self._arm.cancel() + except Exception: + log.exception("arm.cancel() failed") + if self._audio_mgr is not None and hasattr(self._audio_mgr, "stop_playback"): + try: + self._audio_mgr.stop_playback() + except Exception: + pass + return {"cancelled": cancelled} + + async def _exec_parallel(self, skill: Skill, result: dict): + tasks = [] + if skill.audio_file: + tasks.append(asyncio.create_task(self._play_audio(skill.audio_file, result))) + if skill.motion_file: + tasks.append(asyncio.create_task(self._play_motion(skill.motion_file, result))) + if tasks: + await asyncio.gather(*tasks) + + async def _exec_audio_first(self, skill: Skill, result: dict): + if skill.audio_file: + await self._play_audio(skill.audio_file, result) + if skill.motion_file: + await self._play_motion(skill.motion_file, result) + + async def _exec_motion_first(self, skill: Skill, result: dict): + if skill.motion_file: + await self._play_motion(skill.motion_file, result) + if skill.audio_file: + await self._play_audio(skill.audio_file, result) + + async def _play_audio(self, audio_file: str, result: dict): + path = Path(audio_file) + if not path.is_absolute(): + path = AUDIO_RECORDINGS_DIR / path + if not path.exists(): + result["audio_error"] = f"File not found: {path}" + log.warning("Audio file missing: %s", path) + return + if self._audio_mgr is not None: + await asyncio.to_thread(self._audio_mgr.play_wav, path) + result["audio_played"] = str(path) + else: + result["audio_error"] = "AudioManager not attached" + + async def _play_motion(self, motion_file: str, result: dict): + path = Path(motion_file) + if not path.is_absolute(): + path = MOTIONS_DIR / path + if not path.exists(): + result["motion_error"] = f"File not found: {path}" + log.warning("Motion file missing: %s", path) + return + if self._arm is not None: + await asyncio.to_thread(self._arm.replay_file, str(path)) + result["motion_played"] = str(path) + else: + result["motion_error"] = "ArmController not attached" + + # -- macro recording -- + + async def start_macro_recording(self, name: str) -> dict[str, Any]: + if self._macro_rec is None: + raise RuntimeError("MacroRecorder not attached.") + return await asyncio.to_thread(self._macro_rec.start, name) + + async def stop_macro_recording(self) -> dict[str, Any]: + if self._macro_rec is None: + raise RuntimeError("MacroRecorder not attached.") + return await asyncio.to_thread(self._macro_rec.stop) + + async def play_macro(self, name: str) -> dict[str, Any]: + if self._macro_play is None: + raise RuntimeError("MacroPlayer not attached.") + return await asyncio.to_thread(self._macro_play.play, name) + + # -- gestural speaking toggle -- + + def set_gestural_speaking(self, enabled: bool): + self.gestural_speaking = enabled + bus.emit_sync("brain.gestural_speaking_changed", enabled=enabled) + log.info("Gestural speaking: %s", "ON" if enabled else "OFF") + + # -- status -- + + def status(self) -> dict[str, Any]: + return { + "voice_attached": self._voice is not None, + "arm_attached": self._arm is not None, + "audio_manager_attached": self._audio_mgr is not None, + "live_voice_attached": self._live_voice is not None, + "gestural_speaking": self.gestural_speaking, + "running_skill": self._running_skill, + "total_skills": len(self.registry.list_skills()), + } diff --git a/vendor/Sanad/core/config_loader.py b/vendor/Sanad/core/config_loader.py new file mode 100644 index 0000000..5cfe859 --- /dev/null +++ b/vendor/Sanad/core/config_loader.py @@ -0,0 +1,124 @@ +"""Single-source config loader for all Sanad subsystems. + +Each subsystem (core, voice, motion, dashboard) has its own JSON file at +`config/_config.json`. This module loads them on demand, caches +the result, and exposes helpers for pulling nested sections. + +Usage: + from Project.Sanad.core.config_loader import load, get + + cfg = load("voice") # full voice config dict + threshold = get("voice", "barge_in.threshold", 500) + rates = get("voice", "sanad_voice", {}) # whole section + +Why JSON (not TOML/YAML): standard library only, editable in any text +editor, commented via "_comment" keys. No third-party dep. +""" + +from __future__ import annotations + +import json +import threading +from pathlib import Path +from typing import Any + +from Project.Sanad.core.logger import get_logger + +log = get_logger("config_loader") + +# Resolved at first-load time (avoids circular import with config.py) +_BASE_DIR: Path | None = None +_CONFIG_DIR: Path | None = None + +_CACHE: dict[str, dict[str, Any]] = {} +_LOCK = threading.Lock() + + +def _resolve_dirs() -> tuple[Path, Path]: + """Find Sanad's root and config/ directory (lazy + cached).""" + global _BASE_DIR, _CONFIG_DIR + if _BASE_DIR is not None and _CONFIG_DIR is not None: + return _BASE_DIR, _CONFIG_DIR + here = Path(__file__).resolve().parent # Sanad/core + base = here.parent # Sanad/ + _BASE_DIR = base + _CONFIG_DIR = base / "config" + return _BASE_DIR, _CONFIG_DIR + + +def _strip_comments(d: Any) -> Any: + """Remove top-level "_comment"/"_description" keys — noise for callers.""" + if isinstance(d, dict): + return { + k: _strip_comments(v) for k, v in d.items() + if not (isinstance(k, str) and k.startswith("_")) + } + if isinstance(d, list): + return [_strip_comments(x) for x in d] + return d + + +def load(subsystem: str) -> dict[str, Any]: + """Load + cache config/_config.json. + + Returns a dict with all leading-underscore keys stripped. Missing + file returns an empty dict (callers supply their own defaults via + `get(..., default)`). + """ + with _LOCK: + if subsystem in _CACHE: + return _CACHE[subsystem] + + _, cfg_dir = _resolve_dirs() + path = cfg_dir / f"{subsystem}_config.json" + if not path.exists(): + log.warning("config file missing: %s — using empty dict", path) + _CACHE[subsystem] = {} + return _CACHE[subsystem] + + try: + raw = json.loads(path.read_text(encoding="utf-8")) + except json.JSONDecodeError as exc: + log.error("config file %s unreadable: %s", path, exc) + _CACHE[subsystem] = {} + return _CACHE[subsystem] + + cleaned = _strip_comments(raw) + _CACHE[subsystem] = cleaned + return cleaned + + +def get(subsystem: str, dotted_key: str, default: Any = None) -> Any: + """Fetch a nested key. Supports dotted-paths: 'barge_in.threshold'.""" + cfg = load(subsystem) + parts = dotted_key.split(".") + cur: Any = cfg + for p in parts: + if not isinstance(cur, dict) or p not in cur: + return default + cur = cur[p] + return cur + + +def section(subsystem: str, name: str) -> dict[str, Any]: + """Convenience — load one top-level section, always returning a dict. + + Example: `section("voice", "sanad_voice")` → dict of that section. + """ + s = get(subsystem, name, {}) + return s if isinstance(s, dict) else {} + + +def reload(subsystem: str | None = None) -> None: + """Drop cached config so next load() re-reads from disk.""" + with _LOCK: + if subsystem is None: + _CACHE.clear() + else: + _CACHE.pop(subsystem, None) + + +def config_dir() -> Path: + """Absolute path to Sanad/config/.""" + _, d = _resolve_dirs() + return d diff --git a/vendor/Sanad/core/event_bus.py b/vendor/Sanad/core/event_bus.py new file mode 100644 index 0000000..6c80ca6 --- /dev/null +++ b/vendor/Sanad/core/event_bus.py @@ -0,0 +1,91 @@ +"""Lightweight in-process event bus for inter-module communication. + +Usage: + from core.event_bus import bus + + # Subscribe + bus.on("voice.user_said", my_handler) # sync or async callable + bus.on("motion.action_done", other_handler) + + # Publish + await bus.emit("voice.user_said", text="hello") +""" + +from __future__ import annotations + +import asyncio +import threading +from collections import defaultdict +from typing import Any, Callable + +from Project.Sanad.core.logger import get_logger + +log = get_logger("event_bus", to_console=False) + + +class EventBus: + def __init__(self): + self._lock = threading.Lock() + self._listeners: dict[str, list[Callable]] = defaultdict(list) + + def on(self, event: str, callback: Callable): + with self._lock: + self._listeners[event].append(callback) + log.debug("Subscribed %s → %s", event, callback.__qualname__) + + def off(self, event: str, callback: Callable): + with self._lock: + try: + self._listeners[event].remove(callback) + except ValueError: + pass + + async def emit(self, event: str, **kwargs: Any): + with self._lock: + handlers = list(self._listeners.get(event, [])) + for handler in handlers: + try: + result = handler(**kwargs) + if asyncio.iscoroutine(result): + await result + except Exception: + log.exception("Handler %s for event '%s' failed", handler.__qualname__, event) + + def emit_sync(self, event: str, **kwargs: Any): + """Fire-and-forget from a sync context. + + Async handlers are scheduled on the running event loop if one exists. + Otherwise they are dropped with a warning (the original silent-no-op + bug — at least now it's logged). + """ + with self._lock: + handlers = list(self._listeners.get(event, [])) + for handler in handlers: + try: + if asyncio.iscoroutinefunction(handler): + try: + loop = asyncio.get_running_loop() + loop.create_task(handler(**kwargs)) + except RuntimeError: + log.warning( + "Async handler %s for '%s' dropped — no running loop", + handler.__qualname__, event, + ) + continue + result = handler(**kwargs) + if asyncio.iscoroutine(result): + # Sync handler returned a coroutine — schedule it + try: + loop = asyncio.get_running_loop() + loop.create_task(result) + except RuntimeError: + result.close() + log.warning( + "Coroutine result from %s for '%s' dropped — no running loop", + handler.__qualname__, event, + ) + except Exception: + log.exception("Handler %s for event '%s' failed", handler.__qualname__, event) + + +bus = EventBus() diff --git a/vendor/Sanad/core/logger.py b/vendor/Sanad/core/logger.py new file mode 100644 index 0000000..3c1be9f --- /dev/null +++ b/vendor/Sanad/core/logger.py @@ -0,0 +1,67 @@ +"""Unified logging with RotatingFileHandler for all Sanad modules.""" + +from __future__ import annotations + +import logging +import sys +from logging.handlers import RotatingFileHandler +from pathlib import Path + +from Project.Sanad.config import LOGS_DIR + +_MAX_BYTES = 10 * 1024 * 1024 # 10 MB +_BACKUP_COUNT = 3 +_FMT = "%(asctime)s [%(name)s] %(levelname)s %(message)s" +_formatter = logging.Formatter(_FMT) + +# Callback for the WebSocket log stream — set by log_stream.py at import time. +_ws_push_fn = None + + +def set_ws_push(fn): + """Register the push function from dashboard.websockets.log_stream.""" + global _ws_push_fn + _ws_push_fn = fn + + +class _WSHandler(logging.Handler): + """Forwards every log record to the WebSocket log stream.""" + + def emit(self, record: logging.LogRecord): + if _ws_push_fn is not None: + try: + _ws_push_fn(self.format(record)) + except Exception: + pass + + +def get_logger(name: str, *, to_console: bool = True) -> logging.Logger: + """Return a module-level logger that writes to logs/.log (rotating).""" + logger = logging.getLogger(f"sanad.{name}") + if logger.handlers: + return logger + + logger.setLevel(logging.DEBUG) + logger.propagate = False + + LOGS_DIR.mkdir(parents=True, exist_ok=True) + fh = RotatingFileHandler( + LOGS_DIR / f"{name}.log", maxBytes=_MAX_BYTES, backupCount=_BACKUP_COUNT + ) + fh.setFormatter(_formatter) + fh.setLevel(logging.DEBUG) + logger.addHandler(fh) + + if to_console: + sh = logging.StreamHandler(sys.stdout) + sh.setFormatter(_formatter) + sh.setLevel(logging.INFO) + logger.addHandler(sh) + + # WebSocket stream handler + wsh = _WSHandler() + wsh.setFormatter(_formatter) + wsh.setLevel(logging.INFO) + logger.addHandler(wsh) + + return logger diff --git a/vendor/Sanad/core/persona.py b/vendor/Sanad/core/persona.py new file mode 100644 index 0000000..ef0810c --- /dev/null +++ b/vendor/Sanad/core/persona.py @@ -0,0 +1,71 @@ +"""Active-persona selection — which script file Gemini loads as its system +prompt. + +The operator can keep several persona variants in scripts/ (e.g. +``sanad_script.txt``, ``sanad_script_v1.txt``, ``sanad_script_v2.txt``) and pick +which one is live. The selection is a single basename stored in +``data/active_persona.txt``; the DEFAULT (and reset target) is always the +configured persona (``sanad_script.txt``). The Gemini child resolves this at +session start, so a new selection takes effect on the next voice (re)connect. + +A missing/blank/stale pointer transparently falls back to the default, so this +can never break the voice — worst case it loads ``sanad_script.txt``. +""" +from __future__ import annotations + +from pathlib import Path + +from Project.Sanad.config import DATA_DIR, SCRIPTS_DIR + +ACTIVE_PERSONA_FILE = DATA_DIR / "active_persona.txt" + + +def default_persona_name() -> str: + """The configured default persona filename (core.script_files.persona).""" + try: + from Project.Sanad.core.config_loader import section as _section + name = (_section("core", "script_files") or {}).get("persona") + return (name or "sanad_script.txt").strip() or "sanad_script.txt" + except Exception: + return "sanad_script.txt" + + +def active_persona_name() -> str: + """Selected persona basename — the chosen variant if set AND still exists, + otherwise the default. Never raises.""" + default = default_persona_name() + try: + sel = ACTIVE_PERSONA_FILE.read_text(encoding="utf-8").strip() + except Exception: + sel = "" + if sel: + cand = SCRIPTS_DIR / Path(sel).name # basename only — no traversal + if cand.is_file(): + return cand.name + return default + + +def active_persona_path() -> Path: + """Full path to the persona script Gemini should load right now.""" + return SCRIPTS_DIR / active_persona_name() + + +def set_active_persona(name: str | None) -> str: + """Persist the selected persona basename. Passing None/"" or the default + name clears the pointer (revert to default). Returns the effective active + name. Raises FileNotFoundError if a non-default name doesn't exist.""" + nm = (Path(str(name)).name if name else "").strip() + default = default_persona_name() + if not nm or nm == default: + try: + ACTIVE_PERSONA_FILE.unlink() + except FileNotFoundError: + pass + except Exception: + pass + return default + if not (SCRIPTS_DIR / nm).is_file(): + raise FileNotFoundError(nm) + DATA_DIR.mkdir(parents=True, exist_ok=True) + ACTIVE_PERSONA_FILE.write_text(nm, encoding="utf-8") + return nm diff --git a/vendor/Sanad/core/skill_registry.py b/vendor/Sanad/core/skill_registry.py new file mode 100644 index 0000000..5722ddb --- /dev/null +++ b/vendor/Sanad/core/skill_registry.py @@ -0,0 +1,175 @@ +"""Skill Registry — maps audio files to motion commands and callback functions. + +A "skill" is a named unit that ties together: + - An audio clip (e.g. recordings/audio/intro.wav) + - A motion file (e.g. data/motions/wave.jsonl) — optional + - A callback (e.g. "motion.wave_hand") — resolved at runtime + +The registry is persisted in data/skills.json and can be edited via the +dashboard or programmatically through the Brain. + +Skill entry schema: +{ + "id": "intro_greeting", + "audio_file": "recordings/audio/intro.wav", + "motion_file": "data/motions/right_hand_up.jsonl", + "callback": "motion.trigger:wave_hand", + "sync_mode": "parallel", # parallel | audio_first | motion_first + "enabled": true, + "description": "Wave hand while playing intro audio" +} +""" + +from __future__ import annotations + +import json +import os +import tempfile +import threading +import uuid +from dataclasses import dataclass, field, asdict +from pathlib import Path +from typing import Any + +from Project.Sanad.config import SKILLS_FILE +from Project.Sanad.core.logger import get_logger + +log = get_logger("skill_registry") + + +@dataclass +class Skill: + id: str + audio_file: str = "" + motion_file: str = "" + callback: str = "" + sync_mode: str = "parallel" + enabled: bool = True + description: str = "" + meta: dict[str, Any] = field(default_factory=dict) + + def to_dict(self) -> dict[str, Any]: + return asdict(self) + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> Skill: + known = {f.name for f in cls.__dataclass_fields__.values()} + filtered = {k: v for k, v in data.items() if k in known} + return cls(**filtered) + + +class SkillRegistry: + """Thread-safe, JSON-backed registry of skills.""" + + def __init__(self, path: Path = SKILLS_FILE): + self._path = path + self._lock = threading.Lock() + self._skills: dict[str, Skill] = {} + self._load() + + # -- persistence -- + + def _load(self): + if not self._path.exists(): + self._skills = {} + return + try: + with open(self._path, "r", encoding="utf-8") as f: + payload = json.load(f) + for entry in payload.get("skills", []): + skill = Skill.from_dict(entry) + self._skills[skill.id] = skill + log.info("Loaded %d skills from %s", len(self._skills), self._path) + except Exception as exc: + log.warning("Could not load skills: %s", exc) + # Backup corrupt file rather than silently nuking + try: + self._path.rename(self._path.with_suffix(".json.corrupt")) + log.warning("Backed up corrupt skills to %s.corrupt", self._path) + except OSError: + pass + self._skills = {} + + _VALID_SYNC_MODES = {"parallel", "audio_first", "motion_first"} + + def _save(self): + self._path.parent.mkdir(parents=True, exist_ok=True) + payload = { + "version": 1, + "total": len(self._skills), + "skills": [s.to_dict() for s in self._skills.values()], + } + # Atomic write: tempfile + os.replace + fd, tmp = tempfile.mkstemp( + prefix=f".{self._path.name}.", suffix=".tmp", + dir=str(self._path.parent), + ) + try: + with os.fdopen(fd, "w", encoding="utf-8") as f: + json.dump(payload, f, ensure_ascii=False, indent=2) + os.replace(tmp, self._path) + except Exception: + try: + os.unlink(tmp) + except OSError: + pass + raise + + # -- CRUD -- + + def list_skills(self) -> list[dict[str, Any]]: + with self._lock: + return [s.to_dict() for s in self._skills.values()] + + def get(self, skill_id: str) -> Skill | None: + with self._lock: + return self._skills.get(skill_id) + + def add(self, skill: Skill) -> Skill: + if skill.sync_mode not in self._VALID_SYNC_MODES: + raise ValueError( + f"Invalid sync_mode '{skill.sync_mode}' (allowed: {sorted(self._VALID_SYNC_MODES)})" + ) + with self._lock: + if not skill.id: + skill.id = uuid.uuid4().hex[:12] + elif skill.id in self._skills: + raise ValueError(f"Skill id already exists: {skill.id}") + self._skills[skill.id] = skill + self._save() + log.info("Added skill %s (%s)", skill.id, skill.description) + return skill + + def update(self, skill_id: str, updates: dict[str, Any]) -> Skill | None: + with self._lock: + existing = self._skills.get(skill_id) + if existing is None: + return None + if "sync_mode" in updates and updates["sync_mode"] not in self._VALID_SYNC_MODES: + raise ValueError( + f"Invalid sync_mode '{updates['sync_mode']}'" + ) + for key, value in updates.items(): + if hasattr(existing, key) and key != "id": + setattr(existing, key, value) + self._save() + log.info("Updated skill %s", skill_id) + return existing + + def delete(self, skill_id: str) -> dict[str, Any] | None: + with self._lock: + skill = self._skills.pop(skill_id, None) + if skill is None: + return None + self._save() + log.info("Deleted skill %s", skill_id) + return skill.to_dict() + + def find_by_audio(self, audio_file: str) -> list[Skill]: + """Find all skills linked to a given audio file.""" + with self._lock: + return [s for s in self._skills.values() if s.audio_file == audio_file and s.enabled] + + def find_by_callback(self, callback: str) -> list[Skill]: + with self._lock: + return [s for s in self._skills.values() if s.callback == callback and s.enabled] diff --git a/vendor/Sanad/dashboard/__init__.py b/vendor/Sanad/dashboard/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/vendor/Sanad/dashboard/app.py b/vendor/Sanad/dashboard/app.py new file mode 100644 index 0000000..03ab771 --- /dev/null +++ b/vendor/Sanad/dashboard/app.py @@ -0,0 +1,143 @@ +"""FastAPI application — Sanad Dashboard. + +Each route module is imported INDIVIDUALLY inside try/except so that one +broken router (missing dep, syntax error in a sibling) cannot break the +entire dashboard. Failed routers are logged and the server starts without +them. +""" + +from __future__ import annotations + +import importlib +import logging + +# Backfill asyncio.to_thread on Python 3.8 — must run before any router import. +from Project.Sanad.core import asyncio_compat # noqa: F401 + +from fastapi import FastAPI +from fastapi.staticfiles import StaticFiles + +from Project.Sanad.config import BASE_DIR +from Project.Sanad.core.logger import get_logger + +log = get_logger("dashboard.app") + +from Project.Sanad.core.config_loader import section as _cfg_section +_APP_CFG = _cfg_section("dashboard", "app") +app = FastAPI( + title=_APP_CFG.get("title", "Sanad Dashboard"), + version=_APP_CFG.get("version", "1.0.0"), +) + + +# -- isolated route registration -- + +_REST_ROUTES: list[tuple[str, str, str]] = [ + # (module_name, prefix, tag) + ("health", "/api", "health"), + ("system", "/api/system", "system"), + ("voice", "/api/voice", "voice"), + ("motion", "/api/motion", "motion"), + ("skills", "/api/skills", "skills"), + ("macros", "/api/macros", "macros"), + ("logs", "/api/logs", "logs"), + ("replay", "/api/replay", "replay"), + ("audio_control", "/api/audio", "audio"), + ("scripts", "/api/scripts", "scripts"), + ("records", "/api/records", "records"), + ("prompt", "/api/prompt", "prompt"), + ("wake_phrases", "/api/wake-phrases", "wake-phrases"), + ("live_voice", "/api/live-voice", "live-voice"), + ("live_subprocess", "/api/live-subprocess", "live-subprocess"), + ("typed_replay", "/api/typed-replay", "typed-replay"), + ("recognition", "/api/recognition", "recognition"), + ("zones", "/api/zones", "zones"), + ("temp_monitor", "/api/temp", "temperature"), + ("controller", "/api/controller", "controller"), + ("mask", "/api/mask", "mask"), + ("mask_social", "/api/mask", "mask-social"), + ("navigation", "/api/nav", "navigation"), +] + +_WS_ROUTES: list[str] = ["log_stream", "motor_temps", "terminal"] + +_loaded_routes: list[str] = [] +_failed_routes: dict[str, str] = {} + + +def _register_router(module_name: str, prefix: str | None = None, tag: str | None = None, + package: str = "Project.Sanad.dashboard.routes"): + """Import + register one router. Failures are logged, never raised.""" + full_name = f"{package}.{module_name}" + try: + mod = importlib.import_module(full_name) + if not hasattr(mod, "router"): + raise AttributeError(f"{full_name} has no 'router' attribute") + kwargs: dict = {} + if prefix is not None: + kwargs["prefix"] = prefix + if tag is not None: + kwargs["tags"] = [tag] + app.include_router(mod.router, **kwargs) + _loaded_routes.append(module_name) + log.info("Registered router: %s", module_name) + except Exception as exc: + _failed_routes[module_name] = str(exc) + log.exception("Failed to register router %s — skipping", module_name) + + +# REST routes +for mod_name, prefix, tag in _REST_ROUTES: + _register_router(mod_name, prefix=prefix, tag=tag) + +# WebSocket routes +for mod_name in _WS_ROUTES: + _register_router( + mod_name, + package="Project.Sanad.dashboard.websockets", + tag="websocket", + ) + + +# -- Static files (dashboard UI) — best effort -- +STATIC_DIR = BASE_DIR / _APP_CFG.get("static_subdir", "dashboard/static") +try: + STATIC_DIR.mkdir(parents=True, exist_ok=True) + app.mount("/static", StaticFiles(directory=str(STATIC_DIR)), name="static") + log.info("Static dir mounted: %s", STATIC_DIR) +except Exception: + log.exception("Could not mount static dir %s — serving without it", STATIC_DIR) + + +@app.get("/") +async def root(): + """Serve the dashboard SPA.""" + index = STATIC_DIR / "index.html" + if index.exists(): + from fastapi.responses import HTMLResponse + try: + # no-store so the browser always re-fetches the dashboard HTML/JS + # after a deploy — otherwise stale cached JS keeps calling old + # endpoints (e.g. /nav/* instead of /api/nav/*) and 404s. + return HTMLResponse( + index.read_text(encoding="utf-8"), + headers={"Cache-Control": "no-store, must-revalidate"}, + ) + except OSError as exc: + return {"error": f"Could not read index.html: {exc}"} + return { + "message": "Sanad Dashboard — index.html not found", + "loaded_routes": _loaded_routes, + "failed_routes": _failed_routes, + } + + +@app.get("/api/_dashboard_status") +async def dashboard_load_status(): + """Diagnostic — which routers loaded, which failed.""" + return { + "loaded": _loaded_routes, + "failed": _failed_routes, + "total_loaded": len(_loaded_routes), + "total_failed": len(_failed_routes), + } diff --git a/vendor/Sanad/dashboard/routes/__init__.py b/vendor/Sanad/dashboard/routes/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/vendor/Sanad/dashboard/routes/_arbiter.py b/vendor/Sanad/dashboard/routes/_arbiter.py new file mode 100644 index 0000000..8d263da --- /dev/null +++ b/vendor/Sanad/dashboard/routes/_arbiter.py @@ -0,0 +1,66 @@ +"""In-process arbitration between Nav2 (web_nav3) and the manual LocoController. + +Both stacks can drive the G1's legs via different command paths: + - Nav2 (web_nav3) publishes cmd_vel from a navigation goal/mission. + - LocoController issues LocoClient.Move()/step() from the Controller tab and + Gemini movement dispatch. + +The documented hazard is "two stacks must never both drive the legs at once". +This module is a tiny thread-safe gate that lets ONE commander own the legs at a +time. controller.py sets loco_active for arm/move/step and refuses when nav is +active; navigation.py sets nav_active for goto/missions/run and refuses when loco +is active. The E-STOP / cancel paths clear the relevant flag. + +Pure in-process state (no DDS, no HTTP) — both routers share this single module +instance, so the flags are coherent across the dashboard process. +""" + +from __future__ import annotations + +import threading + +_lock = threading.Lock() +_loco_active = False +_nav_active = False + + +def loco_active() -> bool: + with _lock: + return _loco_active + + +def nav_active() -> bool: + with _lock: + return _nav_active + + +def acquire_loco() -> bool: + """Claim the legs for manual loco. Returns False if Nav2 holds them.""" + global _loco_active + with _lock: + if _nav_active: + return False + _loco_active = True + return True + + +def release_loco() -> None: + global _loco_active + with _lock: + _loco_active = False + + +def acquire_nav() -> bool: + """Claim the legs for Nav2. Returns False if manual loco holds them.""" + global _nav_active + with _lock: + if _loco_active: + return False + _nav_active = True + return True + + +def release_nav() -> None: + global _nav_active + with _lock: + _nav_active = False diff --git a/vendor/Sanad/dashboard/routes/_safe_io.py b/vendor/Sanad/dashboard/routes/_safe_io.py new file mode 100644 index 0000000..633ea7c --- /dev/null +++ b/vendor/Sanad/dashboard/routes/_safe_io.py @@ -0,0 +1,81 @@ +"""Shared filesystem safety helpers for dashboard routes. + +Provides: + - safe_filename: validate + reject traversal/special chars + - safe_path_under: ensure resolved path stays inside a base dir + - atomic_write_bytes: write-to-temp + os.replace + - atomic_write_text + - atomic_write_json +""" + +from __future__ import annotations + +import json +import os +import tempfile +from pathlib import Path +from typing import Any + +from fastapi import HTTPException + +from Project.Sanad.core.config_loader import section as _cfg_section + +# Maximum upload size in bytes — SINGLE SOURCE in dashboard.api_input +MAX_UPLOAD_BYTES = _cfg_section("dashboard", "api_input").get( + "max_upload_bytes", 8 * 1024 * 1024) + + +def safe_filename(name: str | None) -> str: + """Strip directory components and reject obviously unsafe names.""" + if not name: + raise HTTPException(400, "Filename required.") + cleaned = os.path.basename(name).strip() + if not cleaned or cleaned in {".", ".."}: + raise HTTPException(400, "Invalid filename.") + if any(c in cleaned for c in ("\x00", "\n", "\r")): + raise HTTPException(400, "Invalid characters in filename.") + return cleaned + + +def safe_path_under(base: Path, name: str) -> Path: + """Resolve `base/name` and verify it stays inside `base`.""" + cleaned = safe_filename(name) + base_resolved = base.resolve() + candidate = (base / cleaned).resolve() + try: + candidate.relative_to(base_resolved) + except ValueError: + raise HTTPException(400, "Path traversal denied.") + return candidate + + +def check_upload_size(content: bytes, max_bytes: int = MAX_UPLOAD_BYTES) -> None: + if len(content) > max_bytes: + raise HTTPException( + 413, + f"Upload too large: {len(content)} bytes (max {max_bytes}).", + ) + + +def atomic_write_bytes(path: Path, data: bytes) -> None: + """Write bytes atomically via tempfile + os.replace.""" + path.parent.mkdir(parents=True, exist_ok=True) + fd, tmp = tempfile.mkstemp(prefix=f".{path.name}.", suffix=".tmp", dir=str(path.parent)) + try: + with os.fdopen(fd, "wb") as f: + f.write(data) + os.replace(tmp, path) + except Exception: + try: + os.unlink(tmp) + except OSError: + pass + raise + + +def atomic_write_text(path: Path, text: str, encoding: str = "utf-8") -> None: + atomic_write_bytes(path, text.encode(encoding)) + + +def atomic_write_json(path: Path, payload: Any, indent: int = 2) -> None: + atomic_write_text(path, json.dumps(payload, ensure_ascii=False, indent=indent)) diff --git a/vendor/Sanad/dashboard/routes/audio_control.py b/vendor/Sanad/dashboard/routes/audio_control.py new file mode 100644 index 0000000..cd6f02e --- /dev/null +++ b/vendor/Sanad/dashboard/routes/audio_control.py @@ -0,0 +1,965 @@ +"""Audio control endpoints — mic mute, speaker mute, device profile selection.""" + +from __future__ import annotations + +import asyncio +import os +import subprocess +import threading + +from fastapi import APIRouter, HTTPException +from pydantic import BaseModel + +from Project.Sanad.core.logger import get_logger +from Project.Sanad.voice import audio_devices as ad + +log = get_logger("audio_route") + +router = APIRouter() + +# ─────────────────────── G1 built-in speaker (DDS) ─────────────────────── +# +# pactl set-sink-mute has NO effect on the G1 built-in speaker because +# sanad_voice.py streams PCM to it via the Unitree DDS AudioClient API, +# bypassing PulseAudio entirely. To actually silence the built-in speaker +# mid-playback we must call AudioClient.SetVolume(0) over DDS. +# +# This module keeps a lazily-initialized AudioClient + a cached volume so +# the dashboard can mute/unmute without waiting on DDS init for every click. + +_g1_audio_client = None +_g1_audio_lock = threading.Lock() +_g1_current_volume: int = 100 # what's actually on the hardware right now +_g1_user_volume: int = 100 # the user's preferred "unmuted" level +_g1_init_error: str = "" + + +def _load_persisted_g1_volume() -> int: + """Read the saved G1 volume from data/motions/config.json. + + Keys are `audio.g1_volume` (persistent target level 0-100). Returns + 100 if no value is stored — matches the default the Unitree SDK sets + on the voice service. + """ + try: + from Project.Sanad.config import load_config + cfg = load_config() or {} + audio = cfg.get("audio") or {} + vol = int(audio.get("g1_volume", 100)) + return max(0, min(100, vol)) + except Exception: + return 100 + + +def _save_persisted_g1_volume(level: int) -> None: + """Persist the user's volume choice to config.json so it survives restart.""" + try: + from Project.Sanad.config import load_config, save_config + cfg = load_config() or {} + audio = cfg.get("audio") if isinstance(cfg.get("audio"), dict) else {} + audio["g1_volume"] = max(0, min(100, int(level))) + cfg["audio"] = audio + save_config(cfg) + except Exception as exc: + log.warning("could not persist g1_volume: %s", exc) + + +# Initialize user volume from the persisted value so the dashboard shows +# the correct level on first load even if no one has touched it yet. +_g1_user_volume = _load_persisted_g1_volume() +_g1_current_volume = _g1_user_volume + + +def _get_g1_audio_client(): + """Lazy-init AudioClient. Safe to call from multiple routes.""" + global _g1_audio_client, _g1_init_error + if _g1_audio_client is not None: + return _g1_audio_client + try: + from unitree_sdk2py.core.channel import ChannelFactoryInitialize + from unitree_sdk2py.g1.audio.g1_audio_client import AudioClient + except ImportError as exc: + _g1_init_error = f"unitree_sdk2py not installed: {exc}" + return None + iface = os.environ.get("SANAD_DDS_INTERFACE", "eth0") + # ChannelFactoryInitialize can only be called once per process. The + # arm controller normally calls it first at startup — the second call + # either no-ops or raises, so wrap it defensively. + try: + ChannelFactoryInitialize(0, iface) + except Exception as exc: + log.debug("ChannelFactoryInitialize already called or failed: %s", exc) + try: + client = AudioClient() + client.SetTimeout(5.0) + client.Init() + _g1_audio_client = client + log.info("G1 AudioClient initialized for dashboard mute control (iface=%s)", iface) + return client + except Exception as exc: + _g1_init_error = f"AudioClient init failed: {exc}" + log.warning("G1 AudioClient init failed: %s", exc) + return None + + +def _pactl(args: list[str]) -> subprocess.CompletedProcess[str]: + return subprocess.run(["pactl", *args], check=True, text=True, capture_output=True) + + +def _get_muted(kind: str, name: str) -> bool: + if not name: + return False + try: + cmd = "get-source-mute" if kind == "source" else "get-sink-mute" + r = _pactl([cmd, name]) + return (r.stdout or "").strip().lower().endswith("yes") + except (FileNotFoundError, subprocess.CalledProcessError): + return False + + +def _set_muted(kind: str, name: str, muted: bool) -> bool: + if not name: + return False + cmd = "set-source-mute" if kind == "source" else "set-sink-mute" + _pactl([cmd, name, "1" if muted else "0"]) + return _get_muted(kind, name) + + +def _current_sink_source() -> tuple[str, str]: + cur = ad.current_selection() + return cur.get("sink", ""), cur.get("source", "") + + +# ─────────────────────── status / mute ─────────────────────── + + +@router.get("/status") +async def audio_status(): + """Return current device + mute state + G1 speaker volume. + + `speaker_muted` is the EFFECTIVE mute state — True if either the + PulseAudio sink is muted OR the G1 built-in speaker volume is 0. + `pulse_sink_muted` and `g1_speaker_muted` are the per-path states. + `g1_current_volume` = what's live on the hardware. + `g1_user_volume` = the user's preferred unmuted level (what we + restore to when they un-mute). + """ + def _do(): + sink, source = _current_sink_source() + cur = ad.current_selection() + pulse_muted = _get_muted("sink", sink) + # Read cached state — avoid DDS GetVolume round-trips on every poll + global _g1_current_volume, _g1_user_volume + g1_muted = _g1_current_volume == 0 + return { + "mic_muted": _get_muted("source", source), + # Effective (OR of both paths) — the badge the user sees + "speaker_muted": pulse_muted or g1_muted, + # Per-path breakdown so the UI can distinguish + "pulse_sink_muted": pulse_muted, + "g1_speaker_muted": g1_muted, + "g1_current_volume": _g1_current_volume, + "g1_user_volume": _g1_user_volume, + # Only report available once an AudioClient has actually been + # built — reporting True before any init attempt made the UI + # advertise G1 speaker controls that then 503 on first use. + # `g1_init_error` surfaces *why* it's unavailable (or "" if + # init was never attempted yet). + "g1_available": _g1_audio_client is not None, + "g1_init_error": _g1_init_error, + "sink": sink, + "source": source, + "current": cur, + "pactl_available": ad.pactl_available(), + } + return await asyncio.to_thread(_do) + + +@router.post("/mic/mute") +async def toggle_mic(muted: bool | None = None): + def _do(): + _, source = _current_sink_source() + if not source: + raise HTTPException(503, "No source device selected") + target = muted if muted is not None else not _get_muted("source", source) + try: + actual = _set_muted("source", source, target) + except (FileNotFoundError, subprocess.CalledProcessError) as exc: + raise HTTPException(500, f"pactl failed: {exc}") + return {"mic_muted": actual, "source": source} + return await asyncio.to_thread(_do) + + +@router.post("/speaker/mute") +async def toggle_speaker(muted: bool | None = None): + """Mute/unmute the SPEAKER — both the PulseAudio sink AND the G1 + built-in speaker, so the effect is audible regardless of which + playback path is currently active (Anker PowerConf via PyAudio vs + G1 built-in via Unitree DDS AudioClient). + + Each of the two paths is attempted independently; the response + reports which one(s) succeeded. If either path is muted, the button + shows as "Muted". + """ + def _do(): + global _g1_current_volume, _g1_user_volume + sink, _ = _current_sink_source() + + # Decide target state — if muted is None, toggle based on + # whichever path is currently "not muted" + if muted is None: + pulse_cur = _get_muted("sink", sink) if sink else False + g1_cur = _g1_current_volume == 0 + # Toggle: if anything is live, mute everything; else unmute all + target = not (pulse_cur or g1_cur) + else: + target = bool(muted) + + result = {"speaker_muted": target, "pulse": None, "g1": None} + + # ── Path 1: PulseAudio sink (Anker PowerConf, USB, etc.) ── + if sink: + try: + actual_pulse = _set_muted("sink", sink, target) + result["pulse"] = {"ok": True, "muted": actual_pulse, "sink": sink} + except (FileNotFoundError, subprocess.CalledProcessError) as exc: + result["pulse"] = {"ok": False, "error": f"pactl failed: {exc}"} + else: + result["pulse"] = {"ok": False, "error": "no sink selected"} + + # ── Path 2: G1 built-in speaker via DDS AudioClient ── + # Mute = SetVolume(0). Unmute = SetVolume(_g1_user_volume) so the + # user's chosen level is restored (instead of always jumping back + # to 100). + client = _get_g1_audio_client() + if client is None: + result["g1"] = {"ok": False, "error": _g1_init_error or "AudioClient unavailable"} + else: + volume = 0 if target else _g1_user_volume + try: + with _g1_audio_lock: + code = client.SetVolume(volume) + _g1_current_volume = volume + result["g1"] = { + "ok": True, "muted": volume == 0, + "volume": volume, "code": code, + } + log.info("G1 speaker volume set to %d (rc=%s)", volume, code) + except Exception as exc: + result["g1"] = {"ok": False, "error": f"SetVolume failed: {exc}"} + + # Final effective state — either path counts as muted + pulse_muted = result["pulse"].get("muted", False) if result["pulse"] else False + g1_muted = result["g1"].get("muted", False) if result["g1"] else False + result["speaker_muted"] = bool(pulse_muted or g1_muted) if target else False + result["sink"] = sink + result["g1_current_volume"] = _g1_current_volume + result["g1_user_volume"] = _g1_user_volume + return result + return await asyncio.to_thread(_do) + + +@router.post("/g1-speaker/mute") +async def toggle_g1_speaker_only(muted: bool | None = None): + """Mute/unmute ONLY the G1 built-in speaker via DDS AudioClient. + + Useful for testing the DDS path in isolation — the normal + /speaker/mute endpoint hits both PulseAudio and G1 at once. + On unmute, restores the user's last chosen volume (not always 100). + """ + def _do(): + global _g1_current_volume + client = _get_g1_audio_client() + if client is None: + raise HTTPException( + 503, + f"G1 AudioClient unavailable: {_g1_init_error or 'unknown'}", + ) + if muted is None: + target = _g1_current_volume > 0 # toggle + else: + target = bool(muted) + volume = 0 if target else _g1_user_volume + try: + with _g1_audio_lock: + code = client.SetVolume(volume) + _g1_current_volume = volume + except Exception as exc: + raise HTTPException(500, f"SetVolume failed: {exc}") + log.info("G1 speaker volume set to %d (rc=%s)", volume, code) + return { + "g1_muted": volume == 0, + "volume": volume, + "user_volume": _g1_user_volume, + "return_code": code, + } + return await asyncio.to_thread(_do) + + +# ─────────────────────── G1 speaker volume (0-100) ─────────────────────── + + +class G1VolumePayload(BaseModel): + level: int # 0..100 + + +@router.get("/g1-speaker/volume") +async def get_g1_volume(): + """Return the current G1 speaker volume state. + + Response: + { + "available": true, # AudioClient available? + "current_volume": 75, # what's on hardware right now + "user_volume": 75, # user's preferred unmuted level + "muted": false, # current_volume == 0 + "persisted": 75, # value from config.json + } + """ + def _do(): + return { + # True only after an AudioClient was actually constructed — + # `init_error` (below) explains an unavailable/never-tried state. + "available": _g1_audio_client is not None, + "current_volume": _g1_current_volume, + "user_volume": _g1_user_volume, + "muted": _g1_current_volume == 0, + "persisted": _load_persisted_g1_volume(), + "init_error": _g1_init_error, + } + return await asyncio.to_thread(_do) + + +@router.post("/g1-speaker/volume") +async def set_g1_volume(payload: G1VolumePayload): + """Set the G1 built-in speaker volume via DDS AudioClient. + + Body: `{"level": 0..100}` + + Effects: + - Immediately applies to hardware via AudioClient.SetVolume(level). + - Persists to data/motions/config.json under `audio.g1_volume` so + it survives restarts. + - If level > 0, updates _g1_user_volume (the "unmuted" restore + target). level == 0 is a soft mute that preserves user_volume. + - Takes effect on the live playback immediately — you can slide + the volume down mid-speech and hear it get quieter. + """ + def _do(): + global _g1_current_volume, _g1_user_volume + level = int(payload.level) + if not 0 <= level <= 100: + raise HTTPException(400, "level must be 0..100") + + # 1) G1 chest speaker (DDS) — best-effort so it works even when an + # external sink (JBL) is the active output. + code = None + client = _get_g1_audio_client() + if client is not None: + try: + with _g1_audio_lock: + code = client.SetVolume(level) + _g1_current_volume = level + except Exception as exc: + log.warning("G1 SetVolume failed: %s", exc) + if level > 0: + _g1_user_volume = level + + # 2) The ACTIVE profile's PulseAudio sink (JBL / Anker / …). Target the + # RESOLVED sink from the saved selection, NOT @DEFAULT_SINK@ — the PA + # default can be a different sink (e.g. the chest platform-sound) even + # when the JBL is the selected output, so @DEFAULT_SINK@ would move the + # wrong sink and the slider would appear to do nothing on the JBL. + pa_applied = False + try: + sink = (ad.load_state() or {}).get("sink") or "@DEFAULT_SINK@" + _pactl(["set-sink-volume", sink, "%d%%" % level]) + if level > 0: + _pactl(["set-sink-mute", sink, "0"]) + pa_applied = True + except Exception as exc: + log.warning("PA set-sink-volume failed: %s", exc) + + if client is None and not pa_applied: + raise HTTPException(503, "No speaker available (G1 + PulseAudio both failed)") + + _save_persisted_g1_volume(_g1_user_volume) + log.info("volume → %d (g1_rc=%s, pa=%s, user_pref=%d)", + level, code, pa_applied, _g1_user_volume) + return { + "ok": True, + "current_volume": level, + "user_volume": _g1_user_volume, + "muted": level == 0, + "return_code": code, + "pa_applied": pa_applied, + "persisted": True, + } + return await asyncio.to_thread(_do) + + +# ─────────────────────── device profiles ─────────────────────── + + +@router.get("/devices") +async def list_devices(): + """Full device + profile listing for the dashboard picker.""" + return await asyncio.to_thread(ad.status) + + +@router.get("/profiles") +async def list_profiles(): + """Just the named profiles + which are currently plugged in.""" + def _do(): + from dataclasses import asdict + detected = ad.detect_plugged_profiles() if ad.pactl_available() else [] + detected_ids = {d["profile"]["id"] for d in detected} + return { + "profiles": [ + { + **asdict(p), + "available": p.id in detected_ids, + } + for p in ad.PROFILES + ], + "detected_ids": list(detected_ids), + } + return await asyncio.to_thread(_do) + + +class ProfileSelect(BaseModel): + profile_id: str + + +@router.post("/select-profile") +async def select_profile(payload: ProfileSelect): + def _do(): + result = ad.select_profile(payload.profile_id) + if not result.get("ok"): + raise HTTPException(409, result.get("error") or "Could not select profile") + # Best-effort: tell the audio_manager to refresh its cached state + try: + from Project.Sanad.main import audio_mgr + if audio_mgr is not None and hasattr(audio_mgr, "refresh_devices"): + audio_mgr.refresh_devices() + except Exception: + pass + return result + return await asyncio.to_thread(_do) + + +class ManualSelect(BaseModel): + sink: str + source: str + + +@router.post("/select-manual") +async def select_manual(payload: ManualSelect): + def _do(): + if not payload.sink and not payload.source: + raise HTTPException(400, "At least one of sink/source required") + result = ad.select_manual(payload.sink, payload.source) + if not result.get("ok"): + raise HTTPException(500, str(result.get("errors") or "Selection failed")) + try: + from Project.Sanad.main import audio_mgr + if audio_mgr is not None and hasattr(audio_mgr, "refresh_devices"): + audio_mgr.refresh_devices() + except Exception: + pass + return result + return await asyncio.to_thread(_do) + + +@router.post("/refresh") +async def refresh_devices(): + """Re-scan plugged devices and re-resolve current selection.""" + return await asyncio.to_thread(ad.status) + + +@router.post("/apply") +async def apply_audio(): + """Re-scan all USB ports, resolve the best profile, and set pactl defaults. + + Use this after plugging/unplugging devices or switching USB ports. + """ + def _do(): + result = ad.apply_current_selection() + # Also refresh AudioManager so it picks up the new sink/source + try: + from Project.Sanad.main import audio_mgr + if audio_mgr is not None: + audio_mgr.refresh_devices() + except Exception: + pass + # Hot-swap the live Gemini voice to the selected profile too, so picking + # a device (e.g. the JBL) moves BOTH record playback AND the live voice + # to it — without dropping the session. Best-effort; no-op if not running. + try: + from Project.Sanad.main import live_sub + pid = (ad.load_state() or {}).get("profile_id") + if (pid and live_sub is not None and hasattr(live_sub, "send_profile") + and hasattr(live_sub, "is_running") and live_sub.is_running()): + live_sub.send_profile(pid, reason="dashboard audio Apply") + except Exception: + pass + # Restore the user's SAVED volume to the selected sink (USB/BT speakers + # like the JBL otherwise come back at a low PulseAudio default). Use the + # saved level, NOT a forced 100%, so the slider/sink keep the user's + # choice across selects + restarts. Target the resolved sink. + try: + sink = (ad.load_state() or {}).get("sink") or "@DEFAULT_SINK@" + _pactl(["set-sink-volume", sink, "%d%%" % _g1_user_volume]) + if _g1_user_volume > 0: + _pactl(["set-sink-mute", sink, "0"]) + except Exception: + pass + return result + return await asyncio.to_thread(_do) + + +# ─────────────────────── Reset endpoints (Pulse + USB) ─────────────────────── +# +# Two distinct recovery paths for the dashboard's audio panel: +# +# POST /api/audio/reset — SOFT: restart pulseaudio / pipewire-pulse. +# Fixes Pulse-side state (stuck profile, lost default sink, crashed +# module). Cannot recover a kernel-side missing USB capture descriptor +# — snd-usb-audio parses those at probe time and Pulse can't influence +# that. Use for "devices look weird" failures. +# +# POST /api/audio/usb-reset — HARD: unbind+rebind snd-usb-audio scoped +# to the Anker VID:PID. Forces snd-usb-audio to re-parse UAC1 +# descriptors → input profile reappears even after the firmware/USB +# handshake dropped it. Use for "Anker mic missing from pactl" — the +# symptom soft-reset cannot fix. +# +# Both gate with module-level locks (no concurrent reset), refuse while Live +# Gemini is running or a record is mid-playback, and return structured +# before/after diagnostics so the dashboard can show meaningful toasts. + +_RESET_LOCK = threading.Lock() +_USB_RESET_LOCK = threading.Lock() + +# Anker PowerConf A3321 — used both for VID:PID matching in sysfs and for +# logging. Change here if you add support for a different USB conference +# device (Hollyland etc). +_USB_RESET_TARGETS = ( + {"vid": "291a", "pid": "3301", "label": "Anker PowerConf"}, +) + + +def _refuse_if_busy() -> None: + """Raise HTTPException(409) if Live Gemini is active or a record is playing. + + Used by both reset endpoints — a userspace audio restart mid-stream + leaves the active session in a broken state (PortAudio handle pointing + at a dead Pulse, in-flight write() raises, etc.). Cheaper to refuse + than to recover. + """ + try: + from Project.Sanad.main import live_sub + except Exception: + live_sub = None + if live_sub is not None: + try: + st = live_sub.status() or {} + except Exception: + st = {} + state = (st.get("state") or "").lower() + if st.get("running") or state not in ("", "stopped", "error"): + raise HTTPException( + 409, f"Stop Live Gemini before resetting audio (state={state or '?'}).", + ) + + try: + from Project.Sanad.main import audio_mgr + except Exception: + audio_mgr = None + if audio_mgr is not None and hasattr(audio_mgr, "playback_status"): + try: + ps = audio_mgr.playback_status() or {} + if ps.get("playing"): + raise HTTPException( + 409, "Stop the active playback before resetting audio.", + ) + except HTTPException: + raise + except Exception: + pass + + +def _detect_pa_flavour() -> str: + """Return 'pipewire' if pipewire-pulse is the active daemon, else 'pulse'.""" + try: + r = subprocess.run( + ["pgrep", "-x", "pipewire-pulse"], + check=False, capture_output=True, text=True, timeout=1.0, + ) + if r.returncode == 0 and (r.stdout or "").strip(): + return "pipewire" + except (FileNotFoundError, subprocess.SubprocessError): + pass + return "pulse" + + +def _kill_audio_daemon(flavour: str) -> dict: + """Issue the restart command for the detected daemon. Non-zero exit is a + soft warning (some installs return 1 when there's no daemon to kill).""" + if flavour == "pipewire": + cmd = ["systemctl", "--user", "restart", "pipewire-pulse.service"] + else: + cmd = ["pulseaudio", "-k"] + try: + r = subprocess.run(cmd, check=False, capture_output=True, + text=True, timeout=5.0) + info = {"cmd": " ".join(cmd), "returncode": r.returncode, + "stderr": (r.stderr or "").strip()[:300]} + if r.returncode != 0: + log.warning("audio reset: %s exited %d (%s)", + cmd[0], r.returncode, info["stderr"]) + return info + except FileNotFoundError as exc: + return {"cmd": " ".join(cmd), "returncode": -1, + "stderr": f"binary missing: {exc}"} + except subprocess.TimeoutExpired: + return {"cmd": " ".join(cmd), "returncode": -1, + "stderr": "timeout (>5s)"} + + +def _wait_for_pactl(deadline_s: float = 5.0, interval_s: float = 0.2) -> bool: + """Poll `pactl info` until it returns 0 or the deadline expires.""" + import time as _time + end = _time.monotonic() + deadline_s + while _time.monotonic() < end: + if ad.pactl_available(): + return True + _time.sleep(interval_s) + return False + + +@router.post("/reset") +async def reset_audio_subsystem(): + """SOFT reset — restart pulseaudio/pipewire-pulse and re-resolve devices. + + Use when devices look stuck, pactl is unavailable, or the wrong sink + is being selected. **Does NOT recover a kernel-side missing USB capture + descriptor** — for that symptom use /api/audio/usb-reset. + """ + if os.geteuid() == 0: + raise HTTPException( + 403, "Refusing to reset audio as root — Sanad must run as the " + "unitree user so the per-user PulseAudio session is reachable.", + ) + if not _RESET_LOCK.acquire(blocking=False): + raise HTTPException(429, "Reset already in progress.") + try: + _refuse_if_busy() + log.info( + "audio reset requested (uid=%s PULSE_RUNTIME_PATH=%s XDG_RUNTIME_DIR=%s)", + os.geteuid(), + os.environ.get("PULSE_RUNTIME_PATH") or "-", + os.environ.get("XDG_RUNTIME_DIR") or "-", + ) + try: + from Project.Sanad.main import audio_mgr + except Exception: + audio_mgr = None + + def _do() -> dict: + before = {"pactl_available": ad.pactl_available(), + "selection": ad.current_selection()} + + # Quiesce AudioManager so the next play_wav rebinds cleanly. + pya_closed = False + if audio_mgr is not None: + play_lock = getattr(audio_mgr, "play_lock", None) + acquired = False + if play_lock is not None: + acquired = play_lock.acquire(timeout=2.0) + try: + try: + audio_mgr.close() + pya_closed = True + except Exception as exc: + log.warning("audio reset: audio_mgr.close failed: %s", exc) + finally: + if acquired and play_lock is not None: + play_lock.release() + + flavour = _detect_pa_flavour() + kill_info = _kill_audio_daemon(flavour) + came_back = _wait_for_pactl(deadline_s=5.0) + if not came_back and flavour == "pulse": + # autospawn may be disabled — try an explicit start. + try: + subprocess.run(["pulseaudio", "--start"], check=False, + capture_output=True, text=True, timeout=3.0) + except (FileNotFoundError, subprocess.SubprocessError) as exc: + log.warning("audio reset: pulseaudio --start failed: %s", exc) + came_back = _wait_for_pactl(deadline_s=2.0) + if not came_back: + raise HTTPException(500, { + "error": "audio daemon did not return within ~7s", + "flavour": flavour, "kill": kill_info, + }) + + apply_result: dict = {} + try: + apply_result = ad.apply_current_selection() or {} + except Exception as exc: + log.warning("audio reset: apply_current_selection failed: %s", exc) + apply_result = {"error": str(exc)} + + if audio_mgr is not None: + try: + import pyaudio + audio_mgr.pya = pyaudio.PyAudio() + audio_mgr.refresh_devices() + except Exception as exc: + log.error("audio reset: PyAudio re-init failed: %s", exc) + raise HTTPException( + 500, f"PortAudio re-init failed after daemon restart: {exc}") + + after_sel = ad.current_selection() or {} + detected = ad.detect_plugged_profiles() or [] + after = { + "pactl_available": ad.pactl_available(), + "selection": after_sel, + "detected_profiles": [p.get("profile", {}).get("id") for p in detected], + } + return { + "ok": True, "best_effort": True, "flavour": flavour, + "kill": kill_info, "pya_reinitialized": pya_closed, + "apply_result": apply_result, + "input_recovered": bool(after_sel.get("source")), + "output_recovered": bool(after_sel.get("sink")), + "before": before, "after": after, + "hint": ("Soft reset only fixes Pulse-side state. If " + "input_recovered is False, try POST /api/audio/usb-reset " + "or physically replug the dongle."), + } + return await asyncio.to_thread(_do) + finally: + _RESET_LOCK.release() + + +def _find_usb_devices_by_vid_pid(vid: str, pid: str) -> list[str]: + """Return sysfs bus-id strings (e.g. '1-3') for every USB device whose + idVendor/idProduct match. Empty list when nothing matches. + + We read /sys/bus/usb/devices/* — every USB *device* (not interface) has + idVendor/idProduct files. Interfaces (paths with a colon, e.g. '1-3:1.1') + do not, so they're naturally skipped. + """ + import glob + hits: list[str] = [] + for path in glob.glob("/sys/bus/usb/devices/*"): + name = os.path.basename(path) + if ":" in name: + continue + try: + with open(os.path.join(path, "idVendor")) as f: + v = f.read().strip().lower() + with open(os.path.join(path, "idProduct")) as f: + p = f.read().strip().lower() + except OSError: + continue + if v == vid.lower() and p == pid.lower(): + hits.append(name) + return hits + + +def _snd_usb_interfaces_for_device(bus_id: str) -> list[str]: + """For USB device `bus_id` (e.g. '1-3'), return all interface names that + are currently bound to the snd-usb-audio driver (e.g. ['1-3:1.0']). + + Used so we unbind ONLY the audio interfaces and don't touch HID / HUB + interfaces on the same composite device. + """ + import glob + bound: list[str] = [] + base = f"/sys/bus/usb/devices/{bus_id}" + for iface in glob.glob(f"{base}/{bus_id}:*"): + driver_link = os.path.join(iface, "driver") + if not os.path.islink(driver_link): + continue + try: + driver = os.path.basename(os.readlink(driver_link)) + except OSError: + continue + if driver == "snd-usb-audio": + bound.append(os.path.basename(iface)) + return bound + + +def _write_sysfs(path: str, value: str) -> tuple[bool, str]: + """Write `value` to a sysfs file. Returns (success, error_message). + + Writes to /sys/bus/usb/drivers/snd-usb-audio/{bind,unbind} usually + require root. If permission denied, the caller should fall back to + invoking shell_scripts/reset_anker_usb.sh via sudo (one-time sudoers + setup documented in that script's header). + """ + try: + with open(path, "w") as f: + f.write(value) + return True, "" + except PermissionError as exc: + return False, f"permission denied: {path} ({exc})" + except OSError as exc: + return False, f"write failed: {path} ({exc})" + + +@router.post("/usb-reset") +async def usb_reset_anker(): + """HARD reset — unbind+rebind snd-usb-audio for the Anker (VID:PID + 291a:3301). Forces the kernel to re-parse the USB Audio Class + descriptors, which is the only way to recover a missing capture profile + on this Jetson without a physical replug. + + Tries two paths: + 1. Direct sysfs write (no sudo) — works if a udev rule has set + `audio` group ownership / world-write on the snd-usb-audio bind + files, or if Sanad runs as root (it shouldn't). + 2. Fallback to `sudo shell_scripts/reset_anker_usb.sh` — works after + a one-time sudoers entry; see that script's header for setup. + + Refuses while Live Gemini or a record playback is in flight (same + guard as the soft reset). + """ + if not _USB_RESET_LOCK.acquire(blocking=False): + raise HTTPException(429, "USB reset already in progress.") + try: + _refuse_if_busy() + + # Find candidate Anker USB devices currently enumerated. + candidates: list[dict] = [] + for tgt in _USB_RESET_TARGETS: + for bus_id in _find_usb_devices_by_vid_pid(tgt["vid"], tgt["pid"]): + candidates.append({"bus_id": bus_id, **tgt}) + if not candidates: + wanted = ", ".join( + "{}:{}".format(t["vid"], t["pid"]) for t in _USB_RESET_TARGETS + ) + raise HTTPException( + 404, + f"No matching USB device found (looked for {wanted}). " + "Plug the Anker dongle and try again.", + ) + + log.info("usb reset: candidates=%s", candidates) + + def _do() -> dict: + before_detected = [ + p.get("profile", {}).get("id") + for p in (ad.detect_plugged_profiles() or []) + ] + results: list[dict] = [] + for cand in candidates: + bus = cand["bus_id"] + ifaces = _snd_usb_interfaces_for_device(bus) + attempt = {"bus_id": bus, "label": cand["label"], + "snd_interfaces": ifaces, "method": None, + "ok": False, "error": ""} + if not ifaces: + attempt["error"] = ("no snd-usb-audio interfaces bound " + "to this device — already unbound or " + "kernel didn't claim it") + results.append(attempt) + continue + + # ─── Path 1: direct sysfs write ─── + unbind_path = "/sys/bus/usb/drivers/snd-usb-audio/unbind" + bind_path = "/sys/bus/usb/drivers/snd-usb-audio/bind" + direct_ok = True + direct_err = "" + for iface in ifaces: + ok, err = _write_sysfs(unbind_path, iface) + if not ok: + direct_ok = False + direct_err = err + break + if direct_ok: + import time as _time + _time.sleep(0.5) + for iface in ifaces: + ok, err = _write_sysfs(bind_path, iface) + if not ok: + direct_ok = False + direct_err = err + break + if direct_ok: + attempt.update({"method": "direct-sysfs", "ok": True}) + results.append(attempt) + continue + + # ─── Path 2: sudo helper script ─── + from pathlib import Path as _Path + helper = (_Path(__file__).resolve().parent.parent.parent + / "shell_scripts" / "reset_anker_usb.sh") + if not helper.exists(): + attempt.update({"method": "direct-sysfs", + "error": f"{direct_err}; helper not present " + f"at {helper}"}) + results.append(attempt) + continue + try: + r = subprocess.run( + ["sudo", "-n", str(helper), bus], + check=False, capture_output=True, text=True, timeout=10.0, + ) + attempt["method"] = "sudo-helper" + if r.returncode == 0: + attempt["ok"] = True + else: + attempt["error"] = ( + f"sudo helper exited {r.returncode}: " + f"{(r.stderr or r.stdout or '').strip()[:300]}" + ) + except subprocess.TimeoutExpired: + attempt["error"] = "sudo helper timed out (>10s)" + except FileNotFoundError as exc: + attempt["error"] = f"sudo not available: {exc}" + results.append(attempt) + + # Settle, then re-detect + import time as _time + _time.sleep(1.0) + try: + ad.apply_current_selection() + except Exception: + pass + try: + from Project.Sanad.main import audio_mgr + if audio_mgr is not None and hasattr(audio_mgr, "refresh_devices"): + audio_mgr.refresh_devices() + except Exception: + pass + + after_detected = [ + p.get("profile", {}).get("id") + for p in (ad.detect_plugged_profiles() or []) + ] + any_ok = any(r.get("ok") for r in results) + mic_now = any( + "anker" in (p.get("profile", {}).get("id") or "").lower() + for p in (ad.detect_plugged_profiles() or []) + ) + + return { + "ok": any_ok, + "candidates": results, + "before_detected_profiles": before_detected, + "after_detected_profiles": after_detected, + "input_recovered": mic_now, + "hint": ( + "If ok is False, the unbind/rebind path needs sudo. " + "Run `bash shell_scripts/reset_anker_usb.sh --setup-sudoers` " + "once on the robot to install the sudoers entry, then retry." + ) if not any_ok else None, + } + + return await asyncio.to_thread(_do) + finally: + _USB_RESET_LOCK.release() diff --git a/vendor/Sanad/dashboard/routes/controller.py b/vendor/Sanad/dashboard/routes/controller.py new file mode 100644 index 0000000..1c55e13 --- /dev/null +++ b/vendor/Sanad/dashboard/routes/controller.py @@ -0,0 +1,338 @@ +"""Controller tab — manual dashboard locomotion control (N2 Phase 1/2). + +Routes live under /api/controller. All WRITE actions (move / step / postures / +modes / MotionSwitcher) require the in-memory "Enable movement" arm flag and +return 409 when disarmed. Reads (/status, /joints, /msc, /status/summary), +E-STOP and the arm toggle are ALWAYS available. + +`/status/summary` is the aggregate the dashboard polls for the global subsystem +status strip (Camera / Face / Place / Movement). It is kept under /api/controller +(final path /api/controller/status/summary) so no second router is needed; note +/api/status (no /summary) is already used by the SPA, so the suffix matters. +""" + +from __future__ import annotations + +import asyncio + +from fastapi import APIRouter, HTTPException, Query +from pydantic import BaseModel + +from Project.Sanad.config import BASE_DIR +from Project.Sanad.core.logger import get_logger +from Project.Sanad.vision import recognition_state + +from Project.Sanad.dashboard.routes import _arbiter + +log = get_logger("controller_routes") + +router = APIRouter() + +STATE_PATH = BASE_DIR / "data" / ".recognition_state.json" + + +# ── lazy subsystem accessors ──────────────────────────────── + +def _get_loco(): + try: + from Project.Sanad.main import loco_controller # type: ignore + return loco_controller + except Exception: + return None + + +def _get_camera(): + try: + from Project.Sanad.main import camera # type: ignore + return camera + except Exception: + return None + + +def _get_live_sub(): + try: + from Project.Sanad.main import live_sub # type: ignore + return live_sub + except Exception: + return None + + +def _get_dispatch(): + try: + from Project.Sanad.main import movement_dispatch # type: ignore + return movement_dispatch + except Exception: + return None + + +def _require_loco(): + lc = _get_loco() + if lc is None: + raise HTTPException(503, "Locomotion controller subsystem unavailable.") + return lc + + +def _require_armed(lc): + if not lc.is_armed(): + raise HTTPException(409, "Movement is disarmed. Enable movement first.") + + +def _claim_loco(): + """Arbitration gate: refuse a leg command while a Nav2 goal owns the legs.""" + if not _arbiter.acquire_loco(): + raise HTTPException( + 409, "Navigation (Nav2) is active. Cancel the nav goal before manual movement." + ) + + +def _cancel_nav(): + """Cancel any in-flight Nav2 goal and clear the nav arbitration flag. + + Used by E-STOP so the global stop halts the legs no matter which stack is + driving them. Calls the nav client in-process (no HTTP self-call); never + raises into the caller. + """ + try: + from Project.Sanad.dashboard.routes.navigation import _CLIENT as _nav_client + if _nav_client is not None: + _nav_client.cancel() + except Exception: + log.exception("estop nav cancel failed") + finally: + _arbiter.release_nav() + + +# ── reads ─────────────────────────────────────────────────── + +@router.get("/status") +async def get_status(): + lc = _require_loco() + return await asyncio.to_thread(lc.status) + + +@router.get("/joints") +async def get_joints(): + lc = _require_loco() + return await asyncio.to_thread(lc.joints) + + +@router.get("/msc") +async def get_msc(): + lc = _require_loco() + return await asyncio.to_thread(lc.msc_check) + + +# ── arm flag / E-STOP (always available) ──────────────────── + +@router.post("/arm") +async def set_arm(on: bool = Query(...)): + lc = _require_loco() + if on: + # Refuse to arm manual loco while Nav2 owns the legs. + _claim_loco() + try: + res = await asyncio.to_thread(lc.arm_movement) + except Exception: + _arbiter.release_loco() + raise + return res + res = await asyncio.to_thread(lc.disarm_movement) + _arbiter.release_loco() + return res + + +@router.post("/gemini-movement") +async def set_gemini_movement(on: bool = Query(...)): + """Enable / disable Gemini voice-driven locomotion (N2 Phase 3 gate). + + Writes recognition_state.movement_enabled — SEPARATE from the manual arm + flag. The Gemini child announces the toggle (spoken), and the parent + MovementDispatcher starts/stops acting on confirmation phrases. Default OFF. + """ + st = await asyncio.to_thread(recognition_state.mutate, STATE_PATH, + movement_enabled=bool(on)) + # Enabling Gemini movement also clears any E-STOP latch on the dispatcher. + if on: + md = _get_dispatch() + if md is not None: + try: + md.clear_estop() + except Exception: + log.exception("clear_estop failed") + log.info("gemini-movement %s", "ON" if on else "OFF") + return {"ok": True, "movement_enabled": st.movement_enabled} + + +@router.post("/estop") +async def estop(): + lc = _require_loco() + res = await asyncio.to_thread(lc.estop) + # Full stop: drop the manual arm flag AND latch the voice dispatcher off, so + # no source (teleop, step, or voice dispatch) can keep driving the robot. The + # dispatcher latch is used instead of flipping movement_enabled so the Gemini + # child does not deliver a spoken "movement disabled" line during an E-STOP. + try: + await asyncio.to_thread(lc.disarm_movement) + except Exception: + log.exception("estop disarm failed") + md = _get_dispatch() + if md is not None: + try: + md.emergency_stop() + except Exception: + log.exception("estop dispatcher latch failed") + # Cancel any in-flight Nav2 goal too: the legs have exactly one commander, + # and an E-STOP must halt the legs whether loco or Nav2 is driving them. + await asyncio.to_thread(_cancel_nav) + _arbiter.release_loco() + return {"ok": True, **res} + + +@router.post("/stop") +async def stop(): + lc = _require_loco() + # Allowed even when disarmed — StopMove is always safe. + res = await asyncio.to_thread(lc.stop_move) + return res + + +# ── movement (armed) ──────────────────────────────────────── + +class MoveBody(BaseModel): + vx: float = 0.0 + vy: float = 0.0 + vyaw: float = 0.0 + run: bool = False + + +@router.post("/move") +async def move(body: MoveBody): + lc = _require_loco() + _require_armed(lc) + _claim_loco() + return await asyncio.to_thread(lc.move, body.vx, body.vy, body.vyaw, body.run) + + +@router.post("/step") +async def step(dir: str = Query(...)): + lc = _require_loco() + _require_armed(lc) + _claim_loco() + res = await asyncio.to_thread(lc.step, dir) + if not res.get("ok"): + raise HTTPException(400, res.get("reason", "step failed")) + return res + + +# ── modes / postures (armed) ──────────────────────────────── + +@router.post("/mode/prep") +async def mode_prep(): + lc = _require_loco() + _require_armed(lc) + return await asyncio.to_thread(lc.prep_mode) + + +@router.post("/mode/ready") +async def mode_ready(): + lc = _require_loco() + _require_armed(lc) + return await asyncio.to_thread(lc.ready_start_mode) + + +@router.post("/posture/{name}") +async def posture(name: str): + lc = _require_loco() + _require_armed(lc) + res = await asyncio.to_thread(lc.posture, name) + if not res.get("ok") and res.get("reason"): + raise HTTPException(400, res["reason"]) + return res + + +@router.post("/balance") +async def balance(mode: int = Query(...)): + lc = _require_loco() + _require_armed(lc) + return await asyncio.to_thread(lc.set_balance_mode, mode) + + +@router.post("/height") +async def height(h: float = Query(...)): + lc = _require_loco() + _require_armed(lc) + return await asyncio.to_thread(lc.set_stand_height, h) + + +# ── MotionSwitcher / reconnect (armed) ────────────────────── + +@router.post("/msc/select-ai") +async def msc_select_ai(): + lc = _require_loco() + _require_armed(lc) + return await asyncio.to_thread(lc.msc_select_ai) + + +@router.post("/msc/release") +async def msc_release(): + lc = _require_loco() + _require_armed(lc) + return await asyncio.to_thread(lc.msc_release) + + +@router.post("/reconnect") +async def reconnect(): + lc = _require_loco() + _require_armed(lc) + return await asyncio.to_thread(lc.reconnect) + + +# ── aggregate subsystem summary (always available) ────────── + +@router.get("/status/summary") +async def status_summary(): + """Live on/off state for the header status strip. Never raises.""" + try: + st = recognition_state.read(STATE_PATH) + except Exception: + st = recognition_state.RecognitionState() + + cam = _get_camera() + camera_running = False + try: + camera_running = bool(cam is not None and cam.is_running()) + except Exception: + camera_running = False + + lc = _get_loco() + movement_armed = False + try: + movement_armed = bool(lc is not None and lc.is_armed()) + except Exception: + movement_armed = False + + sub = _get_live_sub() + gemini_running = False + try: + runner = getattr(sub, "is_running", None) + gemini_running = bool(callable(runner) and runner()) + except Exception: + gemini_running = False + + # Effective Gemini-movement = the file flag AND not latched off by an E-STOP. + md = _get_dispatch() + estopped = False + try: + estopped = bool(md is not None and md.is_estopped()) + except Exception: + estopped = False + + return { + "vision_enabled": st.vision_enabled, + "camera_running": camera_running, + "face_rec_enabled": st.face_rec_enabled, + "zone_rec_enabled": st.zone_rec_enabled, + "movement_armed": movement_armed, + "gemini_movement_enabled": st.movement_enabled and not estopped, + "gemini_running": gemini_running, + } diff --git a/vendor/Sanad/dashboard/routes/health.py b/vendor/Sanad/dashboard/routes/health.py new file mode 100644 index 0000000..f685209 --- /dev/null +++ b/vendor/Sanad/dashboard/routes/health.py @@ -0,0 +1,51 @@ +"""Health and status endpoints.""" + +from __future__ import annotations + +from fastapi import APIRouter + +from Project.Sanad.core.logger import get_logger + +log = get_logger("health_route") + +router = APIRouter() + + +def _safe_status(component, name: str) -> dict: + """Get status without crashing the whole endpoint if one component fails.""" + if component is None: + return {"available": False} + try: + if hasattr(component, "status") and callable(component.status): + return component.status() + return {"available": True} + except Exception as exc: + log.warning("status() failed for %s: %s", name, exc) + return {"available": True, "error": str(exc)} + + +@router.get("/health") +async def health(): + from Project.Sanad.main import brain + return { + "status": "ok", + "brain": _safe_status(brain, "brain"), + } + + +@router.get("/status") +async def full_status(): + from Project.Sanad.main import ( + brain, arm, voice_client, macro_rec, macro_play, + live_voice, live_sub, wake_mgr, + ) + return { + "brain": _safe_status(brain, "brain"), + "voice": _safe_status(voice_client, "voice"), + "arm": _safe_status(arm, "arm"), + "macro_recorder": _safe_status(macro_rec, "macro_rec"), + "macro_player": _safe_status(macro_play, "macro_play"), + "live_voice": _safe_status(live_voice, "live_voice"), + "live_subprocess": _safe_status(live_sub, "live_sub"), + "wake_manager": _safe_status(wake_mgr, "wake_mgr"), + } diff --git a/vendor/Sanad/dashboard/routes/live_subprocess.py b/vendor/Sanad/dashboard/routes/live_subprocess.py new file mode 100644 index 0000000..43e1610 --- /dev/null +++ b/vendor/Sanad/dashboard/routes/live_subprocess.py @@ -0,0 +1,55 @@ +"""Live Gemini Subprocess control endpoints.""" + +from __future__ import annotations + +import asyncio + +from fastapi import APIRouter, HTTPException, Query + +from Project.Sanad.config import BASE_DIR +from Project.Sanad.vision import recognition_state + +router = APIRouter() + +_STATE_PATH = BASE_DIR / "data" / ".recognition_state.json" + + +def _sub_or_503(): + from Project.Sanad.main import live_sub + if live_sub is None: + raise HTTPException(503, "Live subprocess not available") + return live_sub + + +@router.get("/status") +async def subprocess_status(): + from Project.Sanad.main import live_sub + # record_enabled is a live flag (recognition_state) the panel toggle drives; + # surface it so the UI shows the current state even before a session starts. + rec = bool(recognition_state.read(_STATE_PATH).record_enabled) + if live_sub is None: + return {"available": False, "state": "unavailable", "record_enabled": rec} + return {**live_sub.status(), "record_enabled": rec} + + +@router.post("/record") +async def set_record(on: bool = Query(...)): + """Toggle auto-recording of conversation turns to data/recordings/. Takes + effect live (the voice child syncs its recorder) — no session restart.""" + st = await asyncio.to_thread( + recognition_state.mutate, _STATE_PATH, record_enabled=bool(on)) + return {"ok": True, "record_enabled": st.record_enabled} + + +@router.post("/start") +async def start_subprocess(): + live_sub = _sub_or_503() + try: + return await asyncio.to_thread(live_sub.start) + except RuntimeError as exc: + raise HTTPException(404, str(exc)) + + +@router.post("/stop") +async def stop_subprocess(): + return await asyncio.to_thread(_sub_or_503().stop) diff --git a/vendor/Sanad/dashboard/routes/live_voice.py b/vendor/Sanad/dashboard/routes/live_voice.py new file mode 100644 index 0000000..1075841 --- /dev/null +++ b/vendor/Sanad/dashboard/routes/live_voice.py @@ -0,0 +1,73 @@ +"""Live Voice Commands — voice-to-arm phrase trigger dispatcher. + +Listens to GeminiSubprocess user transcripts, matches against +sanad_arm.txt phrases, and fires ARM.trigger_action_by_id. + +Endpoints: + POST /start begin polling transcripts + POST /stop stop polling + POST /deferred-mode?enabled toggle instant vs deferred trigger + POST /trigger-enabled?enabled master gate — allow arm actions or not + GET /status running, last heard, last action, etc. + GET /triggers arm trigger history (log) +""" + +from __future__ import annotations + +from fastapi import APIRouter, HTTPException + +router = APIRouter() + + +def _loop(): + from Project.Sanad.main import live_voice + if live_voice is None: + raise HTTPException(503, "LiveVoiceLoop not initialized.") + return live_voice + + +@router.get("/status") +async def status(): + from Project.Sanad.main import live_voice + if live_voice is None: + return {"available": False} + return {"available": True, **live_voice.status()} + + +@router.post("/start") +async def start(): + loop = _loop() + await loop.start() + return {"ok": True, **loop.status()} + + +@router.post("/stop") +async def stop(): + loop = _loop() + await loop.stop() + return {"ok": True, **loop.status()} + + +@router.post("/deferred-mode") +async def set_deferred(enabled: bool): + loop = _loop() + loop.set_deferred(enabled) + return {"ok": True, "deferred_mode": loop.deferred_mode} + + +@router.post("/trigger-enabled") +async def set_trigger_enabled(enabled: bool): + """Master gate for voice → arm triggering. Default OFF.""" + loop = _loop() + loop.set_trigger_enabled(enabled) + return {"ok": True, "trigger_enabled": loop.trigger_enabled} + + +@router.get("/triggers") +async def triggers(): + loop = _loop() + return { + "triggers": list(loop.triggers), + "total": len(loop.triggers), + "dispatch_actions": len(loop.wake_dispatch), + } diff --git a/vendor/Sanad/dashboard/routes/logs.py b/vendor/Sanad/dashboard/routes/logs.py new file mode 100644 index 0000000..7eccd04 --- /dev/null +++ b/vendor/Sanad/dashboard/routes/logs.py @@ -0,0 +1,203 @@ +"""Log viewing and snapshot endpoints.""" + +from __future__ import annotations + +import asyncio +import json +import platform +import shutil +import socket +import sys +from collections import deque +from datetime import datetime + +from fastapi import APIRouter, HTTPException +from fastapi.responses import PlainTextResponse + +from Project.Sanad.config import BASE_DIR, LOGS_DIR +from Project.Sanad.dashboard.routes._safe_io import safe_path_under + +router = APIRouter() + + +def _list_logs_sync(): + LOGS_DIR.mkdir(parents=True, exist_ok=True) + files = [] + for p in sorted(LOGS_DIR.glob("*.log*")): + files.append({ + "name": p.name, + "size_bytes": p.stat().st_size, + }) + return files + + +@router.get("/") +async def list_logs(): + files = await asyncio.to_thread(_list_logs_sync) + return {"logs_dir": str(LOGS_DIR), "files": files} + + +def _tail_sync(path, lines: int) -> list[str]: + with open(path, "r", encoding="utf-8", errors="replace") as f: + tail = deque(f, maxlen=lines) + return [l.rstrip("\n") for l in tail] + + +@router.get("/tail/{filename}") +async def tail_log(filename: str, lines: int = 200): + path = safe_path_under(LOGS_DIR, filename) + if not path.exists(): + raise HTTPException(404, "File not found") + lines_out = await asyncio.to_thread(_tail_sync, path, lines) + return {"filename": path.name, "lines": lines_out} + + +def _snapshot_sync(ts: str): + saved = [] + for p in LOGS_DIR.glob("*.log"): + # Skip prior snapshots to avoid recursive growth + if "_snapshot_" in p.stem: + continue + dest = LOGS_DIR / f"{p.stem}_snapshot_{ts}.log" + shutil.copy2(p, dest) + saved.append({"source": p.name, "snapshot": dest.name, "size_bytes": dest.stat().st_size}) + return saved + + +@router.post("/snapshot") +async def save_log_snapshot(): + """Save timestamped copy of all log files.""" + LOGS_DIR.mkdir(parents=True, exist_ok=True) + ts = datetime.now().strftime("%Y%m%d_%H%M%S") + saved = await asyncio.to_thread(_snapshot_sync, ts) + return {"ok": True, "saved_at": ts, "snapshots": saved} + + +# ─────────────────────── full bundle (everything in one text blob) ─────────────────────── + +def _build_bundle_sync(lines_per_file: int, include_system: bool) -> str: + """Build the full text bundle — header, subsystem status, all logs. + + Returns a single string safe to copy directly into a bug report. + """ + out: list[str] = [] + ts = datetime.now().isoformat(timespec="seconds") + out.append("=" * 72) + out.append(f"SANAD LOG BUNDLE — {ts}") + out.append("=" * 72) + out.append(f"Hostname : {socket.gethostname()}") + out.append(f"Platform : {platform.platform()}") + out.append(f"Python : {sys.version.split()[0]}") + out.append(f"Executable: {sys.executable}") + out.append(f"BASE_DIR : {BASE_DIR}") + out.append(f"LOGS_DIR : {LOGS_DIR}") + + # Subsystems — pull live status from main.SUBSYSTEMS + if include_system: + out.append("") + out.append("-" * 72) + out.append("SUBSYSTEMS") + out.append("-" * 72) + try: + from Project.Sanad.main import SUBSYSTEMS + except Exception as exc: + out.append(f" could not import SUBSYSTEMS: {exc}") + SUBSYSTEMS = {} + + for name in sorted(SUBSYSTEMS): + comp = SUBSYSTEMS[name] + if comp is None: + out.append(f" ✗ {name:15s} unavailable") + continue + status: dict = {} + if hasattr(comp, "status") and callable(comp.status): + try: + s = comp.status() + if isinstance(s, dict): + status = s + else: + status = {"raw": str(s)} + except Exception as exc: + status = {"status_error": str(exc)} + try: + status_str = json.dumps(status, ensure_ascii=False, default=str) + except Exception: + status_str = str(status) + out.append(f" ✓ {name:15s} {status_str}") + + # Dashboard router load state + out.append("") + out.append("-" * 72) + out.append("DASHBOARD ROUTERS") + out.append("-" * 72) + try: + from Project.Sanad.dashboard.app import _loaded_routes, _failed_routes + out.append(f" loaded ({len(_loaded_routes)}): {', '.join(_loaded_routes)}") + if _failed_routes: + out.append(f" failed ({len(_failed_routes)}):") + for name, err in _failed_routes.items(): + out.append(f" - {name}: {err}") + else: + out.append(" failed (0): —") + except Exception as exc: + out.append(f" could not read dashboard state: {exc}") + + # All log files — tail N lines each, skip snapshots + out.append("") + out.append("-" * 72) + out.append(f"LOG FILES (last {lines_per_file} lines each)") + out.append("-" * 72) + + LOGS_DIR.mkdir(parents=True, exist_ok=True) + log_paths = sorted(LOGS_DIR.glob("*.log*")) + files_included = 0 + for p in log_paths: + if "_snapshot_" in p.stem: + continue # skip stale snapshots + try: + size = p.stat().st_size + except OSError: + size = 0 + out.append("") + out.append(f"=== {p.name} ({size} bytes) ===") + try: + with open(p, "r", encoding="utf-8", errors="replace") as f: + tail = deque(f, maxlen=lines_per_file) + for raw in tail: + out.append(raw.rstrip("\n")) + files_included += 1 + except OSError as exc: + out.append(f" ") + + out.append("") + out.append("=" * 72) + out.append(f"END OF BUNDLE — {files_included} log file(s) included") + out.append("=" * 72) + return "\n".join(out) + + +@router.get("/bundle") +async def logs_bundle(lines: int = 1000, include_system: bool = True): + """Return a single plain-text dump of everything useful for debugging. + + Includes: + - Timestamp, hostname, platform, Python, BASE_DIR, LOGS_DIR + - Live status of every subsystem in main.SUBSYSTEMS + - Dashboard router load/fail state + - Tail of every .log file in LOGS_DIR (configurable per-file limit) + + Response is `text/plain` so it's safe to copy straight to clipboard + or pipe into a file. Intended use: dashboard "Copy All Logs" button + and manual `curl ... > sanad_bundle.txt` debugging. + """ + # Clamp lines to keep the payload sane + lines = max(10, min(int(lines), 50000)) + text = await asyncio.to_thread(_build_bundle_sync, lines, include_system) + return PlainTextResponse( + text, + headers={ + "Content-Disposition": ( + f'inline; filename="sanad_bundle_{datetime.now().strftime("%Y%m%d_%H%M%S")}.txt"' + ), + }, + ) diff --git a/vendor/Sanad/dashboard/routes/macros.py b/vendor/Sanad/dashboard/routes/macros.py new file mode 100644 index 0000000..56c3042 --- /dev/null +++ b/vendor/Sanad/dashboard/routes/macros.py @@ -0,0 +1,238 @@ +"""Macro recording and playback endpoints.""" + +from __future__ import annotations + +import asyncio +from pathlib import Path + +from fastapi import APIRouter, HTTPException +from pydantic import BaseModel + +from Project.Sanad.config import AUDIO_RECORDINGS_DIR, MOTIONS_DIR +from Project.Sanad.core.logger import get_logger + +log = get_logger("macros_route") +router = APIRouter() + + +def _block_if_movement_armed(): + """409 when locomotion movement is armed — arm motion is mutually exclusive + with walking. The arm controller's motion-block is the safety net.""" + try: + from Project.Sanad.main import loco_controller # type: ignore + armed = loco_controller is not None and loco_controller.is_armed() + except HTTPException: + raise + except Exception: + return + if armed: + raise HTTPException( + 409, "Arm actions are disabled while movement is enabled. " + "Disable movement in the Controller tab first.") + + +class MacroName(BaseModel): + name: str + + +class ComboPlayPayload(BaseModel): + audio_file: str = "" # filename under data/audio/ (or empty for none) + motion_file: str = "" # DEPRECATED — use action_id. Still accepted for bare JSONL by filename. + action_id: int | None = None # arm_controller action id (SDK built-in OR JSONL) — preferred + speed: float = 1.0 + + +@router.get("/") +async def list_macros(): + from Project.Sanad.main import macro_play + if macro_play is None: + return {"macros": []} + return {"macros": macro_play.list_macros()} + + +@router.get("/status") +async def macro_status(): + from Project.Sanad.main import macro_rec, macro_play + return { + "recorder": macro_rec.status() if macro_rec else {}, + "player": macro_play.status() if macro_play else {}, + } + + +@router.post("/record/start") +async def start_recording(payload: MacroName): + from Project.Sanad.main import macro_rec + if macro_rec is None: + raise HTTPException(503, "Macro recorder not available.") + return macro_rec.start(payload.name) + + +@router.post("/record/stop") +async def stop_recording(): + import asyncio + from Project.Sanad.main import macro_rec + if macro_rec is None: + raise HTTPException(503, "Macro recorder not available.") + return await asyncio.to_thread(macro_rec.stop) + + +@router.post("/play") +async def play_macro(payload: MacroName): + from Project.Sanad.main import brain + _block_if_movement_armed() + return await brain.play_macro(payload.name) + + +@router.post("/stop") +async def stop_macro(): + from Project.Sanad.main import macro_play + if macro_play: + macro_play.stop() + return {"ok": True} + + +# ─── Ad-hoc audio + motion combined playback ───────────────────────── +# List the two catalogues so the dashboard can populate dropdowns, then +# play the chosen pair in parallel (asyncio.gather) — same scheme the +# Brain uses for `parallel`-mode skills, but ad-hoc instead of predefined. + +@router.get("/audio-files") +async def list_audio_files(): + """Enumerate playable audio files under data/audio/.""" + AUDIO_RECORDINGS_DIR.mkdir(parents=True, exist_ok=True) + files = [] + for p in sorted(AUDIO_RECORDINGS_DIR.glob("*.wav")): + try: + files.append({ + "name": p.name, + "size_kb": round(p.stat().st_size / 1024, 1), + }) + except OSError: + continue + return {"files": files, "dir": str(AUDIO_RECORDINGS_DIR)} + + +@router.get("/motion-files") +async def list_motion_files(): + """Enumerate playable .jsonl motions under data/motions/ (thin wrapper + so the Macro Recorder dropdown doesn't have to call the replay route).""" + MOTIONS_DIR.mkdir(parents=True, exist_ok=True) + files = [] + for p in sorted(MOTIONS_DIR.glob("*.jsonl")): + try: + files.append({ + "name": p.name, + "size_kb": round(p.stat().st_size / 1024, 1), + }) + except OSError: + continue + return {"files": files, "dir": str(MOTIONS_DIR)} + + +@router.post("/stop-combined") +async def stop_combined(): + """Immediately stop any in-flight combined playback. + + - `arm.cancel()` — breaks the replay loop and triggers the smooth + return-to-home ramp (see `_return_home` in arm_controller.py). + - `audio_mgr.stop_playback()` — sends AUDIO_STOP_PLAY to the G1 + chest speaker via DDS. + Both run unconditionally so Stop works even if only one side was + actually playing. + """ + from Project.Sanad.main import audio_mgr, arm + result = {"motion_stopped": False, "audio_stopped": False} + if arm is not None: + try: + arm.cancel() + result["motion_stopped"] = True + except Exception as exc: + log.warning("stop-combined: arm.cancel failed: %s", exc) + result["motion_error"] = str(exc) + if audio_mgr is not None: + try: + audio_mgr.stop_playback() + result["audio_stopped"] = True + except Exception as exc: + log.warning("stop-combined: audio stop failed: %s", exc) + result["audio_error"] = str(exc) + return {"ok": True, **result} + + +@router.post("/play-combined") +async def play_combined(payload: ComboPlayPayload): + """Fire a user-picked audio clip and arm action in parallel. + + Motion dispatch is via `arm.trigger_by_id(action_id)` which handles + BOTH SDK built-in actions (shake_hand, wave, …) and recorded JSONL + replays. Audio goes through `audio_mgr.play_wav` (routed to the G1 + chest speaker via DDS). Either side may be omitted. + """ + from Project.Sanad.main import audio_mgr, arm + + has_audio = bool(payload.audio_file) + has_motion = payload.action_id is not None or bool(payload.motion_file) + if not has_audio and not has_motion: + raise HTTPException(400, "pick at least one of audio_file / action_id / motion_file") + if has_motion: + _block_if_movement_armed() # audio-only combos still allowed while armed + + tasks = [] + result: dict = { + "audio_file": payload.audio_file, + "action_id": payload.action_id, + "motion_file": payload.motion_file, + } + + if has_audio: + if audio_mgr is None: + raise HTTPException(503, "AudioManager not available") + audio_path = (AUDIO_RECORDINGS_DIR / payload.audio_file).resolve() + try: + audio_path.relative_to(AUDIO_RECORDINGS_DIR.resolve()) + except ValueError: + raise HTTPException(400, "audio_file path traversal denied") + if not audio_path.exists(): + raise HTTPException(404, f"audio not found: {payload.audio_file}") + + async def _play_audio(): + try: + await asyncio.to_thread(audio_mgr.play_wav, audio_path) + result["audio_played"] = audio_path.name + except Exception as exc: + log.exception("combined play: audio failed") + result["audio_error"] = str(exc) + tasks.append(_play_audio()) + + if has_motion: + if arm is None: + raise HTTPException(503, "ArmController not available") + + async def _play_motion(): + try: + if payload.action_id is not None: + # SDK built-in OR JSONL — arm.trigger_by_id handles both + await asyncio.to_thread(arm.trigger_by_id, + int(payload.action_id), + payload.speed) + result["motion_played"] = f"action_id={payload.action_id}" + else: + # Legacy path: bare JSONL filename + motion_path = (MOTIONS_DIR / payload.motion_file).resolve() + try: + motion_path.relative_to(MOTIONS_DIR.resolve()) + except ValueError: + result["motion_error"] = "motion_file path traversal denied" + return + if not motion_path.exists(): + result["motion_error"] = f"motion not found: {payload.motion_file}" + return + await asyncio.to_thread(arm.replay_file, str(motion_path), payload.speed) + result["motion_played"] = motion_path.name + except Exception as exc: + log.exception("combined play: motion failed") + result["motion_error"] = str(exc) + tasks.append(_play_motion()) + + await asyncio.gather(*tasks) + return {"ok": True, **result} diff --git a/vendor/Sanad/dashboard/routes/mask.py b/vendor/Sanad/dashboard/routes/mask.py new file mode 100644 index 0000000..129ab67 --- /dev/null +++ b/vendor/Sanad/dashboard/routes/mask.py @@ -0,0 +1,179 @@ +"""Mask Face tab — Shining LED face mask control (BLE). + +Routes live under /api/mask. Backed by the FaceController subsystem +(face/mask_face.py), which owns a dedicated asyncio loop + BLE connection to the +standalone Mask project's `shiningmask` library. + +Every handler is failure-safe: if the subsystem or its library is unavailable it +returns 503 (GET /status returns a degraded body) rather than crash the +dashboard. FaceController raises RuntimeError for "not connected" / "face not +started"; those map to 409. Blocking BLE calls run in a thread pool so the event +loop stays responsive. +""" + +from __future__ import annotations + +import asyncio +from typing import List, Optional + +from fastapi import APIRouter, HTTPException, Query +from pydantic import BaseModel + +from Project.Sanad.core.logger import get_logger + +log = get_logger("mask_routes") + +router = APIRouter() + + +# ── lazy subsystem accessor ───────────────────────────────── + +def _get_face(): + try: + from Project.Sanad.main import mask_face # type: ignore + return mask_face + except Exception: + return None + + +def _require(): + mf = _get_face() + if mf is None: + raise HTTPException(503, "Mask face subsystem unavailable.") + return mf + + +def _run(fn, *args, **kwargs): + """Call a FaceController method, mapping its errors to HTTP status codes.""" + try: + return fn(*args, **kwargs) + except HTTPException: + raise + except RuntimeError as exc: + raise HTTPException(409, str(exc)) + except Exception as exc: # noqa: BLE001 + log.exception("mask operation failed") + raise HTTPException(500, str(exc)) + + +# ── status ────────────────────────────────────────────────── + +@router.get("/status") +async def status(): + """Never raises — returns a degraded body if the subsystem is missing.""" + mf = _get_face() + if mf is None: + return {"available": False, "connected": False, "lib_available": False, + "last_error": "mask face subsystem not constructed"} + s = await asyncio.to_thread(mf.status) + s["available"] = True + return s + + +# ── connection ────────────────────────────────────────────── + +@router.post("/connect") +async def connect(timeout: Optional[float] = Query(None), + attempts: Optional[int] = Query(None)): + mf = _require() + return await asyncio.to_thread(_run, mf.connect, timeout, attempts) + + +@router.post("/disconnect") +async def disconnect(): + mf = _require() + return await asyncio.to_thread(_run, mf.disconnect) + + +# ── simple commands ───────────────────────────────────────── + +@router.post("/brightness") +async def brightness(level: int = Query(..., ge=0, le=255)): + mf = _require() + return await asyncio.to_thread(_run, mf.set_brightness, level) + + +class TextBody(BaseModel): + text: str = "" + color: List[int] = [255, 255, 255] + mode: Optional[int] = None + bg: Optional[List[int]] = None # background RGB (None -> black) + speed: Optional[int] = None # scroll speed 0-255 (None -> firmware default) + + +@router.post("/text") +async def text(body: TextBody): + mf = _require() + bg = tuple(body.bg) if body.bg else None + return await asyncio.to_thread(_run, mf.set_text, body.text, tuple(body.color), + body.mode, bg, body.speed) + + +@router.post("/image") +async def image(id: int = Query(...)): + mf = _require() + return await asyncio.to_thread(_run, mf.show_image, id) + + +@router.post("/animation") +async def animation(id: int = Query(...)): + mf = _require() + return await asyncio.to_thread(_run, mf.play_animation, id) + + +@router.post("/clear") +async def clear(): + mf = _require() + return await asyncio.to_thread(_run, mf.clear_diy) + + +# ── animated face ─────────────────────────────────────────── + +@router.post("/face/start") +async def face_start(reload: bool = Query(False)): + mf = _require() + return await asyncio.to_thread(_run, mf.face_start, reload) + + +@router.post("/face/stop") +async def face_stop(): + mf = _require() + return await asyncio.to_thread(_run, mf.face_stop) + + +@router.post("/face/return") +async def face_return(): + """Resume the live animated face after a text/image/animation override.""" + mf = _require() + return await asyncio.to_thread(_run, mf.return_face) + + +class FaceColorBody(BaseModel): + eye: Optional[List[int]] = None # eye/iris RGB + mouth: Optional[List[int]] = None # mouth RGB + sclera: Optional[List[int]] = None # white-of-the-eye RGB + + +@router.post("/face/color") +async def face_color(body: FaceColorBody): + """Recolor the animated face (re-uploads the frame set if the face is live).""" + mf = _require() + return await asyncio.to_thread(_run, mf.set_face_color, body.eye, body.mouth, body.sclera) + + +@router.post("/speaking") +async def speaking(on: bool = Query(...)): + mf = _require() + return await asyncio.to_thread(_run, mf.set_speaking, on) + + +@router.post("/mouth") +async def mouth(level: int = Query(..., ge=0, le=3)): + mf = _require() + return await asyncio.to_thread(_run, mf.set_mouth, level) + + +@router.post("/expression/{name}") +async def expression(name: str): + mf = _require() + return await asyncio.to_thread(_run, mf.show_expression, name) diff --git a/vendor/Sanad/dashboard/routes/mask_social.py b/vendor/Sanad/dashboard/routes/mask_social.py new file mode 100644 index 0000000..4ecf514 --- /dev/null +++ b/vendor/Sanad/dashboard/routes/mask_social.py @@ -0,0 +1,395 @@ +"""Social-media / QR display on the LED mask. + +Renders a QR code (for a preset Instagram account) or an uploaded image onto the +mask's 46x58 display and holds it via the FaceController's reserved scratch slot +until the animated face is resumed. The shared helper :func:`show_social_on_mask` +is also called from the Gemini ``[[SHOW:account]]`` relay wired in ``main.py``. + +Routes (under /api/mask): + POST /social/{account} -> show a preset Instagram QR + POST /qr -> upload an image (QR or any picture) + show it + POST /face/resume -> stop showing the scratch image, return to the face + GET /social -> list the preset accounts +""" + +from __future__ import annotations + +import asyncio +import io +import logging +import os +import sys +from pathlib import Path + +import re + +from fastapi import APIRouter, File, HTTPException, Query, UploadFile +from fastapi.responses import FileResponse + +log = logging.getLogger("sanad.mask_social") +router = APIRouter() # prefix "/api/mask" supplied by dashboard/app.py _REST_ROUTES + +# Preset Instagram accounts the mask can show as a QR. The mask is a low-res +# 46x58 panel, so a full-URL QR is dense; the black margin acts as the quiet +# zone and we scale modules crisply (NEAREST) to give it the best chance. +SOCIAL = { + "bu_sunaidah": {"handle": "@bu.sunaidah", + "url": "https://instagram.com/bu.sunaidah", + "short": "da.gd/VMkH8J"}, # -> instagram.com/bu.sunaidah (v1 QR) + "yslootahtech": {"handle": "@yslootahtech", + "url": "https://instagram.com/yslootahtech", + "short": "da.gd/Qr8RO"}, # -> instagram.com/yslootahtech (v1 QR) +} + + +def _ensure_mask_path() -> None: + """Make the flat Mask lib (colorface) importable from this route — using the + SAME dir the FaceController resolved (the Mask lib lives outside the repo).""" + d = os.environ.get("SANAD_MASK_DIR") + if not d: + try: + from Project.Sanad.main import mask_face as _mf # type: ignore + d = getattr(_mf, "mask_dir", None) + except Exception: + d = None + if not d: + d = str(Path(__file__).resolve().parents[2] / "Mask") + if d and d not in sys.path: + sys.path.insert(0, d) + + +def _get_face(): + from Project.Sanad.main import mask_face # type: ignore + if mask_face is None: + raise HTTPException(status_code=503, detail="mask face unavailable") + return mask_face + + +_EYE_BAND = 16 # top rows reserved for the cyan eyes; the code sits below them + + +def _compose_under_eyes(inner) -> bytes: + """Draw two cyan eyes across the top and place ``inner`` (a QR / image) in the + area BELOW them, then encode for the mask. Keeps the panel looking like a face + with a code under the eyes instead of a full-screen QR.""" + _ensure_mask_path() + import colorface as cf + from PIL import Image, ImageDraw + W, H = cf.DISPLAY_W, cf.DISPLAY_H + inner = inner.convert("RGB") + iw, ih = inner.size + # keep the code a small badge under the eyes (~70% of the space below them) + target = max(20, int(min(W, H - _EYE_BAND - 1) * 0.72)) + if iw <= target and ih <= target: + s = max(1, min(target // iw, target // ih)) # crisp integer up-scale (QR) + nw, nh = iw * s, ih * s + else: + s = min(target / iw, target / ih) # scale big images down + nw, nh = max(1, int(iw * s)), max(1, int(ih * s)) + inner = inner.resize((nw, nh), Image.NEAREST) + canvas = Image.new("RGB", (W, H), (0, 0, 0)) + g = ImageDraw.Draw(canvas) + eye = cf.DEFAULT_EYE + for cx in (W // 2 - 10, W // 2 + 10): # two eyes at the top + g.ellipse([cx - 5, 3, cx + 5, 13], fill=(255, 255, 255)) + g.ellipse([cx - 3, 5, cx + 3, 11], fill=eye) + g.ellipse([cx - 1, 7, cx + 1, 10], fill=(0, 0, 0)) + x = (W - nw) // 2 + y = _EYE_BAND + (H - _EYE_BAND - nh) // 2 + canvas.paste(inner, (max(0, x), max(_EYE_BAND, y))) + return cf.encode(canvas) + + +def _qr_bytes(url: str) -> bytes: + """Render a QR for ``url`` FULL-SCREEN with the largest crisp (integer) module + size the 46-wide panel allows — the only way it has any chance of scanning. + Only a ~version-1 QR (<=17 chars) reaches ~2 px/module; longer data is denser + and won't scan. Returns (bytes, qr_version).""" + _ensure_mask_path() + import qrcode + from PIL import Image + import colorface as cf + W, H = cf.DISPLAY_W, cf.DISPLAY_H + qr = qrcode.QRCode(error_correction=qrcode.constants.ERROR_CORRECT_L, + box_size=1, border=1) + qr.add_data(url) + qr.make(fit=True) + q = qr.make_image(fill_color=(255, 255, 255), + back_color=(0, 0, 0)).convert("RGB") + scale = max(1, min(W, H) // max(1, q.width)) # largest integer that fits + if scale > 1: + q = q.resize((q.width * scale, q.width * scale), Image.NEAREST) + canvas = Image.new("RGB", (W, H), (0, 0, 0)) + canvas.paste(q, ((W - q.width) // 2, (H - q.height) // 2)) + return cf.encode(canvas) + + +def _image_bytes(img) -> bytes: + """Show an uploaded QR/image FULL-SCREEN, crisp (NEAREST) — best effort.""" + _ensure_mask_path() + import colorface as cf + from PIL import Image + W, H = cf.DISPLAY_W, cf.DISPLAY_H + s = min(W, H) + img = img.convert("RGB").resize((s, s), Image.NEAREST) + canvas = Image.new("RGB", (W, H), (0, 0, 0)) + canvas.paste(img, ((W - s) // 2, (H - s) // 2)) + return cf.encode(canvas) + + +def show_social_on_mask(account: str) -> dict: + """Show the account's **scannable** QR on the mask — a version-1 QR made from + a short (da.gd) link that redirects to the Instagram profile. Shared by the + dashboard button and the Gemini ``show_social`` tool. Raises for an unknown + account; propagates FaceController errors (e.g. not connected).""" + acc = SOCIAL.get(str(account).strip().lower()) + if not acc: + raise HTTPException(status_code=404, detail="unknown account") + data = _qr_bytes(acc.get("short") or acc["url"]) # v1 short link -> scannable + mf = _get_face() + res = mf.show_scratch_image(data) + log.info("showing scannable social QR on mask: %s (%s)", acc["handle"], acc.get("short")) + return {"ok": True, "handle": acc["handle"], "scannable": True, **(res or {})} + + +@router.get("/social") +async def list_social(): + return {"accounts": [{"id": k, "handle": v["handle"]} for k, v in SOCIAL.items()]} + + +def _friendly(exc: Exception) -> HTTPException: + """Map FaceController errors to clean HTTP responses (esp. the common + 'mask not connected' — usually the mask is off / far / held by the phone app).""" + if isinstance(exc, HTTPException): + return exc + msg = str(exc) + if "not connected" in msg or "not started" in msg or "MASK" in msg: + return HTTPException(status_code=503, detail=( + "Mask not connected — power it on, bring it close to the robot, and " + "free it from the phone app.")) + log.exception("mask scratch op failed") + return HTTPException(status_code=500, detail="%s: %s" % (type(exc).__name__, msg)) + + +@router.post("/social/{account}") +async def show_social(account: str): + try: + return await asyncio.to_thread(show_social_on_mask, account) + except Exception as exc: + raise _friendly(exc) + + +@router.post("/qr") +async def upload_qr(file: UploadFile = File(...)): + """Upload an image (a QR you generated, or any picture) and show it on the mask.""" + raw = await file.read() + if not raw: + raise HTTPException(status_code=400, detail="empty upload") + from PIL import Image + try: + img = Image.open(io.BytesIO(raw)) + img.load() + except Exception: + raise HTTPException(status_code=400, detail="not a valid image") + try: + data = await asyncio.to_thread(_image_bytes, img) + mf = _get_face() + return await asyncio.to_thread(mf.show_scratch_image, data) + except Exception as exc: + raise _friendly(exc) + + +@router.post("/face/resume") +async def resume_face(): + """Stop showing the scratch image and resume the animated face.""" + mf = _get_face() + return await asyncio.to_thread(mf.set_expression, None) + + +@router.post("/face/mouth") +async def face_mouth(hidden: bool = Query(...)): + """Show (hidden=false) or hide (hidden=true) the mouth on the animated face.""" + mf = _get_face() + return await asyncio.to_thread(mf.set_mouth_hidden, hidden) + + +@router.post("/link") +async def face_link(on: bool = Query(...)): + """Link (on=true) / unlink (on=false) Gemini <-> the mask. + + ON connects the mask + lets Gemini drive its emotions/social. + OFF tears the link down (no BLE churn) and Gemini stops touching the mask. + Default state is OFF. Runs in a thread — a link-on may briefly block while it + makes its first connect attempt.""" + mf = _get_face() + return await asyncio.to_thread(mf.set_gemini_linked, on) + + +# ── saved QR library ──────────────────────────────────────────────── +# Upload QR/images, save them by name, list/show/delete them. Stored as PNGs +# under data/qr_codes so they persist across restarts. + +_QR_DIR = None + + +def _qr_dir() -> Path: + global _QR_DIR + if _QR_DIR is None: + try: + from Project.Sanad.config import BASE_DIR + base = Path(BASE_DIR) + except Exception: + base = Path(__file__).resolve().parents[2] + _QR_DIR = base / "data" / "qr_codes" + _QR_DIR.mkdir(parents=True, exist_ok=True) + return _QR_DIR + + +def _safe_name(name: str) -> str: + n = re.sub(r"[^A-Za-z0-9_.-]", "_", (name or "").strip())[:40].strip("._") + return n or "qr" + + +@router.post("/qr/save") +async def qr_save(name: str = Query(...), file: UploadFile = File(...)): + """Save an uploaded QR/image into the library under ``name``.""" + raw = await file.read() + if not raw: + raise HTTPException(status_code=400, detail="empty upload") + from PIL import Image + try: + img = Image.open(io.BytesIO(raw)) + img.load() + except Exception: + raise HTTPException(status_code=400, detail="not a valid image") + sn = _safe_name(name) + await asyncio.to_thread(img.convert("RGB").save, str(_qr_dir() / (sn + ".png"))) + return {"ok": True, "name": sn} + + +@router.post("/qr/save_link") +async def qr_save_link(name: str = Query(...), url: str = Query(...)): + """Generate a QR from ``url`` and save it to the library. Returns the QR + version + whether it's short enough to actually scan on the mask (version 1).""" + u = (url or "").strip() + if not u: + raise HTTPException(status_code=400, detail="empty url") + _ensure_mask_path() + import qrcode + qr = qrcode.QRCode(error_correction=qrcode.constants.ERROR_CORRECT_L, + box_size=10, border=2) + qr.add_data(u) + qr.make(fit=True) + img = qr.make_image(fill_color=(255, 255, 255), + back_color=(0, 0, 0)).convert("RGB") + sn = _safe_name(name or u) + await asyncio.to_thread(img.save, str(_qr_dir() / (sn + ".png"))) + return {"ok": True, "name": sn, "version": qr.version, + "scannable_on_mask": qr.version <= 1, + "note": ("scannable" if qr.version <= 1 else + "too dense to scan on the mask — use a shorter link")} + + +@router.get("/qr/library") +async def qr_library(): + """List the saved QR names.""" + return {"qr": sorted(p.stem for p in _qr_dir().glob("*.png"))} + + +@router.get("/qr/thumb/{name}") +async def qr_thumb(name: str): + """Serve a saved QR image (for the dashboard thumbnail).""" + p = _qr_dir() / (_safe_name(name) + ".png") + if not p.exists(): + raise HTTPException(status_code=404, detail="not found") + return FileResponse(str(p), media_type="image/png") + + +@router.post("/qr/show/{name}") +async def qr_show(name: str): + """Show a saved QR (under the eyes) on the mask.""" + p = _qr_dir() / (_safe_name(name) + ".png") + if not p.exists(): + raise HTTPException(status_code=404, detail="not found") + from PIL import Image + try: + img = Image.open(p) + data = await asyncio.to_thread(_image_bytes, img) + mf = _get_face() + return await asyncio.to_thread(mf.show_scratch_image, data) + except Exception as exc: + raise _friendly(exc) + + +@router.delete("/qr/{name}") +async def qr_delete(name: str): + """Delete a saved QR from the library.""" + p = _qr_dir() / (_safe_name(name) + ".png") + if p.exists(): + p.unlink() + return {"ok": True, "deleted": _safe_name(name)} + + +# ── saved TEXT library ────────────────────────────────────────────── +# Save words/phrases and scroll any of them across the mask on demand. + +_TEXT_DIR = None + + +def _text_dir() -> Path: + global _TEXT_DIR + if _TEXT_DIR is None: + try: + from Project.Sanad.config import BASE_DIR + base = Path(BASE_DIR) + except Exception: + base = Path(__file__).resolve().parents[2] + _TEXT_DIR = base / "data" / "mask_texts" + _TEXT_DIR.mkdir(parents=True, exist_ok=True) + return _TEXT_DIR + + +@router.post("/texts/save") +async def text_save(text: str = Query(...), name: str = Query("")): + """Save a word/phrase to the text library (name defaults to the text).""" + t = (text or "").strip()[:200] + if not t: + raise HTTPException(status_code=400, detail="empty text") + nm = _safe_name(name or t) + await asyncio.to_thread((_text_dir() / (nm + ".txt")).write_text, t) + return {"ok": True, "name": nm, "text": t} + + +@router.get("/texts/library") +async def text_library(): + """List the saved texts.""" + out = [] + for p in sorted(_text_dir().glob("*.txt")): + try: + out.append({"name": p.stem, "text": p.read_text()[:80]}) + except Exception: + pass + return {"texts": out} + + +@router.post("/texts/show/{name}") +async def text_show(name: str): + """Scroll a saved text across the mask.""" + p = _text_dir() / (_safe_name(name) + ".txt") + if not p.exists(): + raise HTTPException(status_code=404, detail="not found") + txt = p.read_text() + mf = _get_face() + try: + return await asyncio.to_thread(mf.set_text, txt, (255, 255, 255), None, None, 38) + except Exception as exc: + raise _friendly(exc) + + +@router.delete("/texts/{name}") +async def text_delete(name: str): + """Delete a saved text.""" + p = _text_dir() / (_safe_name(name) + ".txt") + if p.exists(): + p.unlink() + return {"ok": True, "deleted": _safe_name(name)} diff --git a/vendor/Sanad/dashboard/routes/motion.py b/vendor/Sanad/dashboard/routes/motion.py new file mode 100644 index 0000000..58cea95 --- /dev/null +++ b/vendor/Sanad/dashboard/routes/motion.py @@ -0,0 +1,89 @@ +"""Motion endpoints — arm actions, replay management.""" + +from __future__ import annotations + +import asyncio + +from fastapi import APIRouter, HTTPException +from pydantic import BaseModel + +router = APIRouter() + + +def _block_if_movement_armed(): + """409 if locomotion movement is armed — arm actions are mutually exclusive + with walking. The arm controller's own motion-block is the safety net; this + just gives the dashboard a clear message instead of a silent no-op.""" + try: + from Project.Sanad.main import loco_controller # type: ignore + armed = loco_controller is not None and loco_controller.is_armed() + except HTTPException: + raise + except Exception: + return + if armed: + raise HTTPException( + 409, "Arm actions are disabled while movement is enabled. " + "Disable movement in the Controller tab first.") + + +@router.get("/status") +async def motion_status(): + from Project.Sanad.main import arm + return arm.status() if arm else {"error": "Arm not attached"} + + +@router.get("/actions") +async def list_actions(): + from Project.Sanad.main import arm + return {"actions": arm.list_actions() if arm else []} + + +class TriggerPayload(BaseModel): + action_id: int | None = None + action_name: str | None = None + speed: float = 1.0 + + +@router.post("/trigger") +async def trigger_action(payload: TriggerPayload): + from Project.Sanad.main import arm + if arm is None: + raise HTTPException(503, "Arm controller not attached.") + _block_if_movement_armed() + + speed = max(0.1, min(payload.speed, 5.0)) + + # NOTE: TOCTOU on arm.is_busy is unavoidable from the route layer. + # The internal arm controller has its own _lock + _is_busy guard inside + # _execute() that returns silently if busy. We rely on that. + if payload.action_id is not None: + try: + await asyncio.to_thread(arm.trigger_by_id, payload.action_id, speed) + except KeyError as exc: + raise HTTPException(404, str(exc)) + return {"ok": True, "action_id": payload.action_id, "speed": speed} + elif payload.action_name: + try: + await asyncio.to_thread(arm.trigger_by_name, payload.action_name, speed) + except KeyError as exc: + raise HTTPException(404, str(exc)) + return {"ok": True, "action_name": payload.action_name, "speed": speed} + else: + raise HTTPException(400, "Provide action_id or action_name.") + + +@router.post("/cancel") +async def cancel_motion(): + from Project.Sanad.main import arm + if arm is None: + raise HTTPException(503, "Arm controller not attached.") + arm.cancel() + return {"ok": True, "cancelled": True} + + +@router.post("/gestural-speaking") +async def toggle_gestural(enabled: bool = True): + from Project.Sanad.main import brain + brain.set_gestural_speaking(enabled) + return {"gestural_speaking": brain.gestural_speaking} diff --git a/vendor/Sanad/dashboard/routes/navigation.py b/vendor/Sanad/dashboard/routes/navigation.py new file mode 100644 index 0000000..0a2ef8b --- /dev/null +++ b/vendor/Sanad/dashboard/routes/navigation.py @@ -0,0 +1,402 @@ +"""Navigation tab — proxy to the web_nav3 Nav2 stack. + +Routes live under /api/nav (the prefix is applied centrally in dashboard/app.py, +NOT here). This router is a thin HTTP proxy: it forwards dashboard requests to a +single module-level WebNav3Client, which itself talks to the standalone web_nav3 +FastAPI service (default http://127.0.0.1:8765 + rosbridge on :9090). + +Fault isolation, two layers: + 1. The `from ...navigation import WebNav3Client` import is GUARDED. If the + navigation package can't be imported (missing dep, syntax error), this + module still imports cleanly — `_CLIENT` is None and every handler degrades + (GET /status returns {"available": False}; actions raise 503). This mirrors + how app.py loads each router in isolation. + 2. WebNav3Client never raises into us by contract — every method returns a + clean dict / NavStatus even when web_nav3 is unreachable — so handlers just + forward the result. Blocking HTTP calls run off the event loop. +""" + +from __future__ import annotations + +import asyncio + +from fastapi import APIRouter, HTTPException, Query +from pydantic import BaseModel + +from Project.Sanad.core.logger import get_logger + +from Project.Sanad.dashboard.routes import _arbiter + +log = get_logger("navigation_route") + +# Module-level router with NO prefix and NO tags — those are supplied by +# app.include_router(prefix="/api/nav", tags=["navigation"]) at registration time. +router = APIRouter() + + +# ── guarded optional import ───────────────────────────────── +# A broken navigation package must NOT stop this route module from importing — +# app.py would otherwise log the whole router as failed. Guard it and degrade. +try: + from Project.Sanad.navigation import WebNav3Client # type: ignore + _IMPORT_ERROR: str | None = None +except Exception as exc: # noqa: BLE001 + WebNav3Client = None # type: ignore[assignment,misc] + _IMPORT_ERROR = f"{type(exc).__name__}: {exc}" + log.warning("navigation client unavailable — nav routes degraded: %s", _IMPORT_ERROR) + + +# ── config (env var -> dashboard config section -> default) ── +def _nav_config() -> dict: + """Resolve nav connection config. Precedence: env var -> config -> default.""" + import os + + from Project.Sanad.core.config_loader import section as _cfg_section + + cfg = _cfg_section("dashboard", "navigation") + web_nav3_url = ( + os.environ.get("WEB_NAV3_URL") + or cfg.get("web_nav3_url") + or "http://127.0.0.1:8765" + ) + rosbridge_url = ( + os.environ.get("ROSBRIDGE_URL") + or cfg.get("rosbridge_url") + or "ws://127.0.0.1:9090" + ) + robot = os.environ.get("SANAD_ROBOT_NAME") or cfg.get("robot") or "sanad" + return { + "web_nav3_url": str(web_nav3_url), + "rosbridge_url": str(rosbridge_url), + "robot": str(robot), + } + + +_CFG = _nav_config() + +# ── single module-level client ────────────────────────────── +# One WebNav3Client for the whole dashboard, built from config. If the import +# was guarded out (above), or construction fails, _CLIENT stays None and every +# handler degrades gracefully. +if WebNav3Client is not None: + try: + _CLIENT = WebNav3Client(base_url=_CFG["web_nav3_url"], robot=_CFG["robot"]) + log.info("WebNav3Client ready → %s (robot=%s)", _CFG["web_nav3_url"], _CFG["robot"]) + except Exception as exc: # noqa: BLE001 + _CLIENT = None + _IMPORT_ERROR = f"construct failed: {type(exc).__name__}: {exc}" + log.warning("WebNav3Client construction failed — nav routes degraded: %s", exc) +else: + _CLIENT = None + + +def _require(): + """Return the live client or raise 503 (for ACTION endpoints).""" + if _CLIENT is None: + raise HTTPException(503, f"Navigation client unavailable. {_IMPORT_ERROR or ''}".strip()) + return _CLIENT + + +def _claim_nav(): + """Arbitration gate: refuse to start a Nav2 goal while manual loco owns legs.""" + if not _arbiter.acquire_nav(): + raise HTTPException( + 409, "Manual movement (Controller) is armed. Disarm it before navigating." + ) + + +# ── request bodies ────────────────────────────────────────── +class _NameBody(BaseModel): + name: str + + +class _IdBody(BaseModel): + id: object # mission ids may be int or str; forward as-is + + +class _StartBody(BaseModel): + mode: int = 2 # web_nav3 launch mode (e.g. 3 = localize against a saved map) + db_path: str | None = None # saved map to load (None = build fresh) + + +class _PoseBody(BaseModel): + name: str + x: float + y: float + yaw: float = 0.0 + + +class _RenameBody(BaseModel): + old: str + new: str + + +# ── status (never raises — degraded body when unavailable) ── +@router.get("/status") +async def status(): + if _CLIENT is None: + return {"available": False, "error": _IMPORT_ERROR} + nav = await asyncio.to_thread(_CLIENT.status) + # WebNav3Client.status() returns a NavStatus dataclass. + body = nav.as_dict() if hasattr(nav, "as_dict") else dict(nav) + body["available"] = True + return body + + +# ── places / navigation ───────────────────────────────────── +@router.get("/places") +async def places(map_name: str | None = Query(None, alias="map")): + """List saved places. Per-MAP when ?map= is given (each map keeps + its own places); else the legacy per-robot store.""" + client = _require() + return await asyncio.to_thread(client.list_places, map_name) + + +@router.post("/goto") +async def goto(body: _NameBody): + client = _require() + _claim_nav() + res = await asyncio.to_thread(client.goto, body.name) + # A failed dispatch never drove the legs — release the gate so manual loco + # isn't locked out by a goto that never started. + if isinstance(res, dict) and not res.get("ok", True): + _arbiter.release_nav() + return res + + +@router.post("/start") +async def start(body: _StartBody): + client = _require() + return await asyncio.to_thread(client.start, body.mode, body.db_path) + + +class _DbBody(BaseModel): + db_path: str + + +@router.post("/load_map") +async def load_map(body: _DbBody): + """View a saved map: stop any running bringup, then localize against it.""" + client = _require() + return await asyncio.to_thread(client.load_map, body.db_path) + + +@router.post("/cancel") +async def cancel(): + client = _require() + res = await asyncio.to_thread(client.cancel) + # WebNav3Client.cancel() is a no-op server-side (it only returns a note), + # so releasing the arbiter without truly stopping Nav2 would let the robot + # keep driving while manual loco re-acquires the legs (double-drive). Send a + # REAL goal-cancel over rosbridge first, and disarm the arrival monitor so a + # stale terminal can't fire, THEN release. + try: + from Project.Sanad.navigation.goal_monitor import request_cancel, disarm + disarm() + cancelled = await asyncio.to_thread(request_cancel) + if isinstance(res, dict): + res = {**res, "cancel_sent": bool(cancelled)} + except Exception as exc: # noqa: BLE001 + log.debug("goal cancel skipped: %s", exc) + _arbiter.release_nav() + return res + + +@router.post("/save_here") +async def save_here(body: _NameBody): + client = _require() + return await asyncio.to_thread(client.save_here, body.name) + + +@router.post("/save_at") +async def save_at(body: _PoseBody, map_name: str | None = Query(None, alias="map")): + """Save a named place at a map coordinate (from clicking the map). Per-MAP + when ?map= given. Re-saving an existing name MOVES the place.""" + client = _require() + return await asyncio.to_thread(client.save_at, body.name, body.x, body.y, body.yaw, map_name) + + +@router.post("/places/delete") +async def delete_place(body: _NameBody, map_name: str | None = Query(None, alias="map")): + """Delete a saved place (per-map).""" + client = _require() + return await asyncio.to_thread(client.delete_place, body.name, map_name) + + +@router.post("/places/rename") +async def rename_place(body: _RenameBody, map_name: str | None = Query(None, alias="map")): + """Rename a saved place (per-map).""" + client = _require() + return await asyncio.to_thread(client.rename_place, body.old, body.new, map_name) + + +class _MapEditsBody(BaseModel): + edits: list # [[world_x, world_y, value], ...] value 0=free/erase, 100=wall + + +@router.get("/map_edits") +async def get_map_edits(map_name: str = Query(..., alias="map")): + """Saved edit overlay for a map (erased points + painted walls).""" + client = _require() + return await asyncio.to_thread(client.get_map_edits, map_name) + + +@router.post("/map_edits") +async def save_map_edits(body: _MapEditsBody, map_name: str = Query(..., alias="map")): + """Persist a map's edit overlay (Map Editor).""" + client = _require() + return await asyncio.to_thread(client.save_map_edits, map_name, body.edits) + + +class _VoiceGotoBody(BaseModel): + place: str + + +def _resolve_place(client, spoken: str) -> dict: + """Resolve a spoken place name against the ACTIVE map's places. + + Strategy: exact (case-insensitive) → single substring candidate → + ambiguous / unknown. Returns a dict the caller (and ultimately Gemini) + can act on. Never raises. + """ + try: + st = client.status() + body = st.as_dict() if hasattr(st, "as_dict") else dict(st) + except Exception as exc: # noqa: BLE001 + return {"ok": False, "reason": "status_error", "detail": str(exc)[:160]} + if not body.get("bringup_alive"): + return {"ok": False, "reason": "no_map", + "detail": "No navigation session is running — load a map first."} + active_map = body.get("active_map") + try: + places = client.list_places(active_map) or [] + except Exception: # noqa: BLE001 + places = [] + names = [p.get("name") for p in places if isinstance(p, dict) and p.get("name")] + sl = (spoken or "").strip().lower() + if not sl: + return {"ok": False, "reason": "no_place", "map": active_map, "places": names} + exact = [n for n in names if n.lower() == sl] + if exact: + return {"ok": True, "resolved": exact[0], "map": active_map} + subs = [] + for n in names: + nl = n.lower() + if sl in nl or nl in sl: + subs.append(n) + subs = list(dict.fromkeys(subs)) # de-dup, preserve order + if len(subs) == 1: + return {"ok": True, "resolved": subs[0], "map": active_map} + if len(subs) > 1: + return {"ok": False, "reason": "ambiguous", "candidates": subs, "map": active_map} + return {"ok": False, "reason": "unknown_place", "candidates": names, "map": active_map} + + +@router.get("/active") +async def active(): + """Navigation context for Gemini: the active map, its mode, and that map's + place names — one call so the voice tools (list_places / where_am_i) don't + have to guess the active map.""" + client = _require() + st = await asyncio.to_thread(client.status) + body = st.as_dict() if hasattr(st, "as_dict") else dict(st) + places = [] + if body.get("bringup_alive"): + try: + pl = await asyncio.to_thread(client.list_places, body.get("active_map")) + places = [p.get("name") for p in (pl or []) + if isinstance(p, dict) and p.get("name")] + except Exception: # noqa: BLE001 + places = [] + return { + "map": body.get("active_map"), + "mode": body.get("mode"), + "mode_label": body.get("mode_label"), + "localizing": bool(body.get("localizing")), + "bringup_alive": bool(body.get("bringup_alive")), + "places": places, + } + + +@router.post("/voice_goto") +async def voice_goto(body: _VoiceGotoBody): + """Resolve a spoken place name and drive there — Gemini's navigate_to_place. + + Arbiter-gated (claims the legs for Nav2) and arms the arrival monitor so + Gemini later hears [NAV ARRIVED]/[NAV FAILED]. Never raises into the caller; + returns a structured result the model can speak from. + """ + client = _require() + res = await asyncio.to_thread(_resolve_place, client, body.place or "") + if not res.get("ok"): + return res + # Claim the legs for Nav2 — refuse (don't raise) if manual loco is armed. + if not _arbiter.acquire_nav(): + return {"ok": False, "reason": "manual_armed", + "detail": "Manual movement (Controller) is armed — disarm it to navigate."} + drive = await asyncio.to_thread(client.goto, res["resolved"]) + if isinstance(drive, dict) and not drive.get("ok", True): + _arbiter.release_nav() + return {"ok": False, "reason": "dispatch_failed", + "resolved": res["resolved"], "detail": drive} + # Arm arrival monitoring (best-effort; absence must not fail the drive). + try: + from Project.Sanad.navigation.goal_monitor import arm_goal + arm_goal(res["resolved"]) + except Exception as exc: # noqa: BLE001 + log.debug("goal monitor arm skipped: %s", exc) + return {"ok": True, "resolved": res["resolved"], "map": res.get("map")} + + +@router.post("/goto_pose") +async def goto_pose(body: _PoseBody): + """Arbiter-gate a coordinate nav goal (click-to-drive). + + The browser publishes the actual /goal_pose over rosbridge; this only + CLAIMS the legs for Nav2 (409 if manual loco is armed) so the two stacks + never both drive. The frontend sends the goal only after this returns ok. + """ + _require() + _claim_nav() + # Arm the arrival monitor so this click-to-drive goal releases the arbiter + # when it ends — without this, nav_active stays True forever after the goal + # completes (the browser publishes the goal but never arms anything). + try: + from Project.Sanad.navigation.goal_monitor import arm_goal + arm_goal(f"({body.x:.1f}, {body.y:.1f})") + except Exception as exc: # noqa: BLE001 + log.debug("goal monitor arm skipped: %s", exc) + return {"ok": True, "x": body.x, "y": body.y, "yaw": body.yaw} + + +# ── maps / missions ───────────────────────────────────────── +@router.get("/maps") +async def maps(): + client = _require() + return await asyncio.to_thread(client.list_maps) + + +@router.get("/missions") +async def missions(): + client = _require() + return await asyncio.to_thread(client.list_missions) + + +@router.post("/missions/run") +async def run_mission(body: _IdBody): + client = _require() + _claim_nav() + res = await asyncio.to_thread(client.run_mission, body.id) + if isinstance(res, dict) and not res.get("ok", True): + _arbiter.release_nav() + return res + + +# ── config (what the SPA needs to render links / connect) ─── +@router.get("/config") +async def config(): + return { + "web_nav3_url": _CFG["web_nav3_url"], + "rosbridge_url": _CFG["rosbridge_url"], + "robot": _CFG["robot"], + } diff --git a/vendor/Sanad/dashboard/routes/prompt.py b/vendor/Sanad/dashboard/routes/prompt.py new file mode 100644 index 0000000..f6f1e10 --- /dev/null +++ b/vendor/Sanad/dashboard/routes/prompt.py @@ -0,0 +1,98 @@ +"""Prompt management — view, edit, reload system prompts.""" + +from __future__ import annotations + +from fastapi import APIRouter, HTTPException +from pydantic import BaseModel + +from Project.Sanad.config import SCRIPTS_DIR +from Project.Sanad.core.config_loader import section as _cfg_section +from Project.Sanad.dashboard.routes._safe_io import ( + atomic_write_text, MAX_UPLOAD_BYTES, +) + +router = APIRouter() + +# Filenames — SINGLE SOURCE in core.script_files +_SCRIPTS = _cfg_section("core", "script_files") +SCRIPT_PROMPT_PATH = SCRIPTS_DIR / _SCRIPTS.get("persona", "sanad_script.txt") +RULE_PROMPT_PATH = SCRIPTS_DIR / _SCRIPTS.get("rules", "sanad_rule.txt") +MAX_PROMPT_BYTES = MAX_UPLOAD_BYTES + +# Default system prompt — SINGLE SOURCE in core.gemini_defaults +DEFAULT_SYSTEM_PROMPT = _cfg_section("core", "gemini_defaults").get( + "default_system_prompt", + "You are Sanad (Bousandah), a wise and friendly Emirati assistant. " + "Speak strictly in the UAE dialect (Khaleeji). " + "Be helpful, concise, and use local greetings like 'Marhaba' and 'Ya Khoy'." +) + + +def _load_system_prompt() -> str: + try: + content = SCRIPT_PROMPT_PATH.read_text(encoding="utf-8-sig").strip() + if content: + return content + except FileNotFoundError: + pass + return DEFAULT_SYSTEM_PROMPT + + +def _load_rule_prompts() -> dict[str, str]: + result = {"system_prompt": "", "replay_prompt": ""} + try: + content = RULE_PROMPT_PATH.read_text(encoding="utf-8-sig").strip() + sections: dict[str, list[str]] = {} + current = None + for line in content.splitlines(): + stripped = line.strip() + if stripped.startswith("[") and stripped.endswith("]"): + current = stripped[1:-1].strip() + sections[current] = [] + elif current is not None: + sections[current].append(line.rstrip()) + result["system_prompt"] = "\n".join(sections.get("SYSTEM_PROMPT", [])).strip() + result["replay_prompt"] = "\n".join(sections.get("REPLAY_SYSTEM_PROMPT", [])).strip() + except FileNotFoundError: + pass + if not result["system_prompt"]: + result["system_prompt"] = _load_system_prompt() + return result + + +@router.get("/") +async def get_prompt(): + return { + "script_path": str(SCRIPT_PROMPT_PATH), + "rule_path": str(RULE_PROMPT_PATH), + "system_prompt": _load_system_prompt(), + "rules": _load_rule_prompts(), + } + + +class PromptUpdate(BaseModel): + content: str + + +@router.post("/update") +async def update_prompt(payload: PromptUpdate): + if len(payload.content.encode("utf-8")) > MAX_PROMPT_BYTES: + raise HTTPException(413, f"Prompt too large (max {MAX_PROMPT_BYTES} bytes).") + try: + SCRIPTS_DIR.mkdir(parents=True, exist_ok=True) + atomic_write_text(SCRIPT_PROMPT_PATH, payload.content.rstrip() + "\n") + except OSError as exc: + raise HTTPException(500, f"Could not write prompt: {exc}") + return {"ok": True, "path": str(SCRIPT_PROMPT_PATH), "length": len(payload.content)} + + +@router.post("/reload") +async def reload_prompts(): + rules = _load_rule_prompts() + return { + "ok": True, + "system_prompt": rules["system_prompt"], + "replay_prompt": rules["replay_prompt"], + "script_path": str(SCRIPT_PROMPT_PATH), + "rule_path": str(RULE_PROMPT_PATH), + } diff --git a/vendor/Sanad/dashboard/routes/recognition.py b/vendor/Sanad/dashboard/routes/recognition.py new file mode 100644 index 0000000..b1f9d60 --- /dev/null +++ b/vendor/Sanad/dashboard/routes/recognition.py @@ -0,0 +1,457 @@ +"""Recognition tab — camera vision + face gallery + hot toggles. + +Single router covering: + - Vision / Face Recognition toggles (hot — no Gemini restart needed) + - Live camera preview (latest JPEG drop) + - Face gallery CRUD: enroll, upload, capture, rename, delete, ZIP + - Per-photo download + delete + +Toggle changes write data/.recognition_state.json atomically. The Gemini +child polls that file at 1 Hz and applies changes mid-session. +""" + +from __future__ import annotations + +import io +from typing import Optional + +from fastapi import APIRouter, File, HTTPException, Query, UploadFile +from fastapi.responses import FileResponse, Response, StreamingResponse +from pydantic import BaseModel + +from Project.Sanad.config import BASE_DIR +from Project.Sanad.core.logger import get_logger +from Project.Sanad.dashboard.routes._safe_io import check_upload_size +from Project.Sanad.vision import recognition_state + +log = get_logger("recognition_routes") + +router = APIRouter() + + +# ── paths (resolved from BASE_DIR) ────────────────────────── + +STATE_PATH = BASE_DIR / "data" / ".recognition_state.json" +FACES_DIR = BASE_DIR / "data" / "faces" + +ALLOWED_IMAGE_EXTS = {".jpg", ".jpeg", ".png"} + + +# ── helpers ───────────────────────────────────────────────── + +def _get_camera(): + """Lazy import to avoid circular import on dashboard load.""" + try: + from Project.Sanad.main import camera # type: ignore + return camera + except Exception: + return None + + +def _get_gallery(): + """Lazy import — same reason.""" + try: + from Project.Sanad.main import gallery # type: ignore + return gallery + except Exception: + return None + + +def _bump_and_write_state(**changes) -> recognition_state.RecognitionState: + """Apply changes (vision_enabled, face_rec_enabled) and persist.""" + return recognition_state.mutate(STATE_PATH, **changes) + + +def _bump_gallery_version() -> int: + cur = recognition_state.read(STATE_PATH) + new_version = cur.gallery_version + 1 + recognition_state.mutate(STATE_PATH, gallery_version=new_version) + return new_version + + +# ── state + toggles ───────────────────────────────────────── + +@router.get("/state") +async def get_state(): + """Return the current toggle/camera/gallery state.""" + st = recognition_state.read(STATE_PATH) + cam = _get_camera() + gallery = _get_gallery() + faces_count = 0 + photos_count = 0 + if gallery is not None: + try: + entries = gallery.list() + faces_count = len(entries) + photos_count = sum(len(e.sample_paths) for e in entries) + except Exception: + pass + return { + "vision_enabled": st.vision_enabled, + "face_rec_enabled": st.face_rec_enabled, + "gallery_version": st.gallery_version, + "camera": cam.status() if cam is not None else { + "running": False, "backend": None, "error": "camera subsystem unavailable" + }, + "faces_count": faces_count, + "photos_count": photos_count, + } + + +@router.post("/vision") +async def set_vision(on: bool = Query(...)): + """Enable / disable camera vision (hot — no Gemini restart).""" + cam = _get_camera() + if cam is None: + log.warning("vision toggle requested but camera subsystem unavailable") + raise HTTPException(503, "Camera subsystem not available.") + + if on and not cam.is_running(): + ok = cam.start() + if not ok: + log.warning("vision ON requested but camera.start() failed: %s", + cam.error or "no backend") + _bump_and_write_state(vision_enabled=False) + raise HTTPException(503, + f"Camera could not start (no backend). {cam.error or ''}") + elif (not on) and cam.is_running(): + cam.stop() + + st = _bump_and_write_state(vision_enabled=bool(on)) + log.info("vision %s (backend=%s)", "ON" if on else "OFF", + cam.backend if cam.is_running() else "none") + return {"ok": True, "vision_enabled": st.vision_enabled, + "camera": cam.status()} + + +@router.post("/face-rec") +async def set_face_rec(on: bool = Query(...)): + """Enable / disable face recognition (hot — no Gemini restart). + + The Gemini child picks the change up within ~1 s: ON re-sends the + gallery primer and tells Gemini it can recognise people; OFF tells + Gemini to disregard the gallery and stop identifying anyone. Both + take effect on the live session — no reconnect needed. + """ + st = _bump_and_write_state(face_rec_enabled=bool(on)) + log.info("face recognition %s", "ON" if on else "OFF") + return {"ok": True, "face_rec_enabled": st.face_rec_enabled} + + +@router.post("/sync") +async def sync_gallery(): + """Bump gallery_version so the child re-sends the primer if face-rec is ON.""" + v = _bump_gallery_version() + log.info("gallery sync requested → v.%d", v) + return {"ok": True, "gallery_version": v} + + +# ── live preview ──────────────────────────────────────────── + +@router.get("/frame.jpg") +async def latest_frame(): + """Serve the most recent camera frame straight from the daemon's + in-memory cache (no file drop — frames are also pushed to the Gemini + child over its stdin).""" + cam = _get_camera() + if cam is None: + raise HTTPException(503, "Camera subsystem unavailable.") + jpeg = cam.snapshot_jpeg() + if not jpeg: + raise HTTPException(404, "No frame captured yet.") + return Response( + content=jpeg, + media_type="image/jpeg", + headers={"Cache-Control": "no-store, must-revalidate"}, + ) + + +# ── camera resolution / quality ───────────────────────────── + +class CameraConfigPayload(BaseModel): + width: Optional[int] = None + height: Optional[int] = None + fps: Optional[int] = None + jpeg_quality: Optional[int] = None + + +@router.post("/camera-config") +async def set_camera_config(payload: CameraConfigPayload): + """Hot-swap the camera capture profile (resolution / fps / JPEG quality). + + If the camera is running, CameraDaemon.reconfigure() rebuilds the + pipeline at the new profile (~0.5 s gap). If idle, the values just + take effect on the next start. Bounds are sanity-checked here so a + fat-fingered value can't wedge the daemon.""" + cam = _get_camera() + if cam is None: + raise HTTPException(503, "Camera subsystem unavailable.") + if payload.width is not None and not (160 <= payload.width <= 1920): + raise HTTPException(400, "width out of range (160–1920)") + if payload.height is not None and not (120 <= payload.height <= 1080): + raise HTTPException(400, "height out of range (120–1080)") + if payload.fps is not None and not (1 <= payload.fps <= 60): + raise HTTPException(400, "fps out of range (1–60)") + if payload.jpeg_quality is not None and not (10 <= payload.jpeg_quality <= 95): + raise HTTPException(400, "jpeg_quality out of range (10–95)") + profile = cam.reconfigure( + width=payload.width, height=payload.height, + fps=payload.fps, jpeg_quality=payload.jpeg_quality, + ) + log.info("camera reconfigured via dashboard → %s", profile) + return {"ok": True, "profile": profile, "camera": cam.status()} + + +# ── face gallery routes ───────────────────────────────────── + +def _validate_image(content: bytes, filename: str | None = None) -> None: + """Reject non-JPEG/PNG content + oversize uploads.""" + check_upload_size(content) + if len(content) < 16: + raise HTTPException(400, "Image too small / empty.") + is_jpeg = content[:3] == b"\xff\xd8\xff" + is_png = content[:8] == b"\x89PNG\r\n\x1a\n" + if not (is_jpeg or is_png): + raise HTTPException( + 400, + f"Only JPEG/PNG accepted (got {filename or 'unknown'}).", + ) + + +def _entry_to_dict(entry) -> dict: + photos = [] + for p in entry.sample_paths: + try: + photos.append({"name": p.name, "size_bytes": p.stat().st_size}) + except OSError: + continue + return { + "id": entry.id, + "name": entry.name, + "description": entry.description, + "added_at": entry.added_at, + "photos": photos, + } + + +@router.get("/faces") +async def list_faces(): + gallery = _get_gallery() + if gallery is None: + raise HTTPException(503, "Face gallery subsystem unavailable.") + entries = gallery.list() + return {"faces": [_entry_to_dict(e) for e in entries], + "total": len(entries)} + + +class RenamePayload(BaseModel): + name: Optional[str] = None + + +class DescribePayload(BaseModel): + description: Optional[str] = None + + +@router.post("/faces/enroll") +async def enroll_from_camera(name: Optional[str] = Query(default=None), + description: Optional[str] = Query(default=None)): + """Create a new face from the camera's latest snapshot.""" + gallery = _get_gallery() + if gallery is None: + raise HTTPException(503, "Face gallery subsystem unavailable.") + cam = _get_camera() + if cam is None or not cam.is_running(): + raise HTTPException(409, "Camera is not running. Toggle Vision ON first.") + # get_fresh_frame waits briefly for a current frame so the enrolled + # photo is the scene the user is posing for, not a stale buffer. + jpeg = cam.get_fresh_frame(max_age_s=0.5, timeout_s=1.5) + if not jpeg: + raise HTTPException(409, "Camera has no frame yet. Wait a moment and retry.") + entry = gallery.create_face( + [jpeg], + name=name.strip() if name else None, + description=description.strip() if description else None, + ) + v = _bump_gallery_version() + log.info("enrolled face_%d via camera (name=%s, desc=%s, v.%d)", + entry.id, name or "(unnamed)", + "yes" if description else "no", v) + return {"ok": True, "face": _entry_to_dict(entry)} + + +@router.post("/faces/upload") +async def enroll_from_upload( + files: list[UploadFile] = File(...), + name: Optional[str] = Query(default=None), + description: Optional[str] = Query(default=None), +): + """Create a new face from uploaded image file(s).""" + gallery = _get_gallery() + if gallery is None: + raise HTTPException(503, "Face gallery subsystem unavailable.") + if not files: + raise HTTPException(400, "At least one image file required.") + image_bytes: list[bytes] = [] + for f in files: + content = await f.read() + _validate_image(content, f.filename) + image_bytes.append(content) + entry = gallery.create_face( + image_bytes, + name=name.strip() if name else None, + description=description.strip() if description else None, + ) + v = _bump_gallery_version() + log.info("enrolled face_%d via upload (%d photos, name=%s, desc=%s, v.%d)", + entry.id, len(image_bytes), name or "(unnamed)", + "yes" if description else "no", v) + return {"ok": True, "face": _entry_to_dict(entry)} + + +@router.post("/faces/{face_id}/capture") +async def capture_to_face(face_id: int): + """Add a new sample (from the camera) to an existing face.""" + gallery = _get_gallery() + if gallery is None: + raise HTTPException(503, "Face gallery subsystem unavailable.") + cam = _get_camera() + if cam is None or not cam.is_running(): + raise HTTPException(409, "Camera is not running. Toggle Vision ON first.") + jpeg = cam.get_fresh_frame(max_age_s=0.5, timeout_s=1.5) + if not jpeg: + raise HTTPException(409, "Camera has no frame yet.") + try: + fname = gallery.add_photo(face_id, jpeg) + except FileNotFoundError as exc: + raise HTTPException(404, str(exc)) + v = _bump_gallery_version() + log.info("captured new photo for face_%d → %s (v.%d)", face_id, fname, v) + return {"ok": True, "added": fname, "face": _entry_to_dict(gallery.get(face_id))} + + +@router.post("/faces/{face_id}/upload") +async def upload_to_face(face_id: int, files: list[UploadFile] = File(...)): + """Add one or more uploaded samples to an existing face.""" + gallery = _get_gallery() + if gallery is None: + raise HTTPException(503, "Face gallery subsystem unavailable.") + if gallery.get(face_id) is None: + raise HTTPException(404, f"face_{face_id} not found") + added: list[str] = [] + for f in files: + content = await f.read() + _validate_image(content, f.filename) + try: + fname = gallery.add_photo(face_id, content) + added.append(fname) + except FileNotFoundError as exc: + raise HTTPException(404, str(exc)) + v = _bump_gallery_version() + log.info("uploaded %d photo(s) to face_%d (v.%d)", len(added), face_id, v) + return {"ok": True, "added": added, + "face": _entry_to_dict(gallery.get(face_id))} + + +@router.post("/faces/{face_id}/rename") +async def rename_face(face_id: int, payload: RenamePayload): + gallery = _get_gallery() + if gallery is None: + raise HTTPException(503, "Face gallery subsystem unavailable.") + try: + gallery.rename(face_id, payload.name) + except FileNotFoundError as exc: + raise HTTPException(404, str(exc)) + v = _bump_gallery_version() + log.info("renamed face_%d → %s (v.%d)", face_id, + payload.name or "(unnamed)", v) + return {"ok": True, "face": _entry_to_dict(gallery.get(face_id))} + + +@router.post("/faces/{face_id}/describe") +async def describe_face(face_id: int, payload: DescribePayload): + """Set / clear a face's free-text description. The description is + folded into the Gemini primer turn so Gemini can reference it.""" + gallery = _get_gallery() + if gallery is None: + raise HTTPException(503, "Face gallery subsystem unavailable.") + try: + gallery.set_description(face_id, payload.description) + except FileNotFoundError as exc: + raise HTTPException(404, str(exc)) + v = _bump_gallery_version() + log.info("described face_%d (%s, v.%d)", face_id, + "set" if payload.description else "cleared", v) + return {"ok": True, "face": _entry_to_dict(gallery.get(face_id))} + + +@router.delete("/faces/{face_id}") +async def delete_face(face_id: int): + gallery = _get_gallery() + if gallery is None: + raise HTTPException(503, "Face gallery subsystem unavailable.") + try: + gallery.delete_face(face_id) + except FileNotFoundError as exc: + raise HTTPException(404, str(exc)) + v = _bump_gallery_version() + log.info("deleted face_%d (v.%d)", face_id, v) + return {"ok": True, "deleted": face_id} + + +@router.delete("/faces/{face_id}/photo/{photo_name}") +async def delete_photo(face_id: int, photo_name: str): + gallery = _get_gallery() + if gallery is None: + raise HTTPException(503, "Face gallery subsystem unavailable.") + # safe filename — only allow simple file names, no traversal + if "/" in photo_name or ".." in photo_name or "\x00" in photo_name: + raise HTTPException(400, "Invalid photo name.") + try: + gallery.delete_photo(face_id, photo_name) + except FileNotFoundError as exc: + raise HTTPException(404, str(exc)) + except ValueError as exc: + raise HTTPException(400, str(exc)) + v = _bump_gallery_version() + log.info("deleted photo %s from face_%d (v.%d)", photo_name, face_id, v) + return {"ok": True, "deleted": photo_name} + + +@router.get("/faces/{face_id}/photo/{photo_name}") +async def get_photo(face_id: int, photo_name: str, + download: int = Query(default=0)): + """Serve a single photo. Add ?download=1 for attachment disposition.""" + gallery = _get_gallery() + if gallery is None: + raise HTTPException(503, "Face gallery subsystem unavailable.") + if "/" in photo_name or ".." in photo_name or "\x00" in photo_name: + raise HTTPException(400, "Invalid photo name.") + path = gallery.get_photo(face_id, photo_name) + if path is None: + raise HTTPException(404, "Photo not found.") + media = "image/png" if path.suffix.lower() == ".png" else "image/jpeg" + headers = {} + if download: + headers["Content-Disposition"] = ( + f'attachment; filename="face_{face_id}_{photo_name}"' + ) + return FileResponse(path, media_type=media, headers=headers) + + +@router.get("/faces/{face_id}/download.zip") +async def download_face_zip(face_id: int): + gallery = _get_gallery() + if gallery is None: + raise HTTPException(503, "Face gallery subsystem unavailable.") + try: + data = gallery.zip_face(face_id) + except FileNotFoundError as exc: + raise HTTPException(404, str(exc)) + return StreamingResponse( + io.BytesIO(data), + media_type="application/zip", + headers={ + "Content-Disposition": f'attachment; filename="face_{face_id}.zip"', + "Content-Length": str(len(data)), + }, + ) diff --git a/vendor/Sanad/dashboard/routes/records.py b/vendor/Sanad/dashboard/routes/records.py new file mode 100644 index 0000000..f9212c1 --- /dev/null +++ b/vendor/Sanad/dashboard/routes/records.py @@ -0,0 +1,302 @@ +"""Saved records management — list, play, pause, resume, stop, rename, delete. + +Manages WAV recordings saved via the typed replay engine. +""" + +from __future__ import annotations + +import json +import threading +from pathlib import Path +from typing import Any + +from fastapi import APIRouter, HTTPException +from pydantic import BaseModel + +from Project.Sanad.config import AUDIO_RECORDINGS_DIR +from Project.Sanad.dashboard.routes._safe_io import ( + safe_filename, safe_path_under, atomic_write_json, +) + +router = APIRouter() + +RECORDS_INDEX = AUDIO_RECORDINGS_DIR / "records.json" +_INDEX_LOCK = threading.Lock() + +# Strong refs to fire-and-forget playback tasks. The event loop only keeps a +# weak reference to tasks, so an unreferenced create_task() result can be +# garbage-collected (cancelling playback) before it finishes. Mirror replay.py. +import asyncio as _asyncio # noqa: E402 +_BG_TASKS: set[_asyncio.Task] = set() + + +def _load_index() -> dict[str, Any]: + if not RECORDS_INDEX.exists(): + return {"records": [], "total_records": 0, "last_updated": ""} + try: + with open(RECORDS_INDEX, "r", encoding="utf-8") as f: + return json.load(f) + except (json.JSONDecodeError, OSError): + # Backup corrupt file rather than nuking it + try: + RECORDS_INDEX.rename(RECORDS_INDEX.with_suffix(".json.corrupt")) + except OSError: + pass + return {"records": [], "total_records": 0, "last_updated": ""} + + +def _save_index(payload: dict[str, Any]): + AUDIO_RECORDINGS_DIR.mkdir(parents=True, exist_ok=True) + payload["total_records"] = len(payload.get("records", [])) + atomic_write_json(RECORDS_INDEX, payload) + + +def _resolve_path(path_str: str) -> Path: + """Resolve record path — basename / relative / absolute. + + Legacy records stored absolute paths. New records store basenames. + Both flavors resolve to a real file under AUDIO_RECORDINGS_DIR. + """ + if not path_str: + return AUDIO_RECORDINGS_DIR + p = Path(path_str) + if p.is_absolute(): + return p + return AUDIO_RECORDINGS_DIR / p + + +def _reconcile(payload: dict[str, Any]) -> tuple[dict[str, Any], int]: + kept, removed = [], 0 + for entry in payload.get("records", []): + try: + sp = _resolve_path(entry["files"]["speaker_recording"]["path"]) + rp = _resolve_path(entry["files"]["gemini_raw_output"]["path"]) + if sp.exists() and rp.exists(): + kept.append(entry) + else: + removed += 1 + except (KeyError, TypeError): + removed += 1 + payload["records"] = kept + payload["total_records"] = len(kept) + return payload, removed + + +@router.get("/") +async def list_records(): + with _INDEX_LOCK: + payload = _load_index() + payload, removed = _reconcile(payload) + if removed: + _save_index(payload) + return payload + + +class RecordPlay(BaseModel): + record_name: str + file_kind: str = "speaker" # speaker | raw + + +@router.post("/play") +async def play_record(payload: RecordPlay): + with _INDEX_LOCK: + index = _load_index() + entry = next((r for r in index.get("records", []) if r.get("record_name") == payload.record_name), None) + if entry is None: + raise HTTPException(404, f"Record not found: {payload.record_name}") + + file_key = "speaker_recording" if payload.file_kind == "speaker" else "gemini_raw_output" + raw_path = _resolve_path(entry["files"][file_key]["path"]).resolve() + base = AUDIO_RECORDINGS_DIR.resolve() + try: + raw_path.relative_to(base) + except ValueError: + raise HTTPException(400, "Record path outside recordings directory.") + if not raw_path.exists(): + raise HTTPException(404, f"File not found: {raw_path.name}") + + from Project.Sanad.main import audio_mgr + import threading + # Fire-and-forget on a DEDICATED daemon thread — NOT asyncio.to_thread. + # to_thread runs on the shared default executor, which gets starved while + # the dashboard services the live-voice child's reconnect chatter; that + # delayed record playback by several seconds (clip silent, counter parked). + # A dedicated thread starts immediately regardless of executor/event-loop + # load. play_wav blocks for the clip duration and serves pause/stop via + # _play_state; the UI stays responsive because this handler returns now. + # Python keeps running threads alive, so no ref is needed to prevent GC. + threading.Thread( + target=audio_mgr.play_wav, args=(raw_path, payload.record_name), + name="record-playback", daemon=True, + ).start() + return {"ok": True, "record_name": payload.record_name, + "file_kind": payload.file_kind, "path": str(raw_path)} + + +@router.post("/pause") +async def pause_playback(): + from Project.Sanad.main import audio_mgr + return audio_mgr.pause_playback() + + +@router.post("/resume") +async def resume_playback(): + from Project.Sanad.main import audio_mgr + return audio_mgr.resume_playback() + + +@router.post("/seek") +async def seek_playback(position_sec: float): + """Jump to a position (seconds) in the currently-playing clip — used by the + waveform scrubber. No-op (ok=False) if nothing is playing.""" + from Project.Sanad.main import audio_mgr + return audio_mgr.seek_playback(position_sec) + + +@router.post("/stop") +async def stop_playback(): + from Project.Sanad.main import audio_mgr + import asyncio + await asyncio.to_thread(audio_mgr.stop_playback) + return {"ok": True, "stopped": True} + + +@router.get("/playback-status") +async def playback_status(): + from Project.Sanad.main import audio_mgr + return audio_mgr.playback_status() + + +@router.post("/live-hold") +async def set_live_hold(on: bool): + """Manual hold for the live-Gemini pause. on=True pauses the live voice and + keeps it paused (records won't resume it) until on=False is sent. Default + behaviour (on=False) is AUTO: records pause Gemini only for the clip.""" + from Project.Sanad.main import audio_mgr + return {"live_hold": audio_mgr.set_live_voice_hold(on)} + + +class RecordRename(BaseModel): + record_name: str + new_name: str + + +@router.post("/rename") +async def rename_record(payload: RecordRename): + new_name = safe_filename(payload.new_name) + # Strip any extension the user provided — we add our own + if new_name.lower().endswith(".wav"): + new_name = new_name[:-4] + if not new_name or new_name.startswith("."): + raise HTTPException(400, "Invalid new name.") + + with _INDEX_LOCK: + index = _load_index() + entry = next( + (r for r in index.get("records", []) if r.get("record_name") == payload.record_name), + None, + ) + if entry is None: + raise HTTPException(404, f"Record not found: {payload.record_name}") + + base = AUDIO_RECORDINGS_DIR.resolve() + for key in ("speaker_recording", "gemini_raw_output"): + try: + old_path = _resolve_path(entry["files"][key]["path"]).resolve() + old_path.relative_to(base) # ensure inside recordings dir + except (KeyError, ValueError): + continue + if not old_path.exists(): + continue + suffix = "_raw.wav" if key == "gemini_raw_output" else ".wav" + new_path = safe_path_under(AUDIO_RECORDINGS_DIR, f"{new_name}{suffix}") + if new_path.exists(): + raise HTTPException(409, f"File already exists: {new_path.name}") + old_path.rename(new_path) + entry["files"][key]["path"] = new_path.name # basename — portable + entry["files"][key]["name"] = new_path.name + + entry["record_name"] = new_name + _save_index(index) + return {"ok": True, "record": entry} + + +class RecordDelete(BaseModel): + record_name: str + + +@router.post("/delete") +async def delete_record(payload: RecordDelete): + with _INDEX_LOCK: + index = _load_index() + kept = [] + deleted_entry = None + for r in index.get("records", []): + if r.get("record_name") == payload.record_name and deleted_entry is None: + deleted_entry = r + else: + kept.append(r) + + if deleted_entry is None: + raise HTTPException(404, f"Record not found: {payload.record_name}") + + base = AUDIO_RECORDINGS_DIR.resolve() + deleted_files = [] + for fi in deleted_entry.get("files", {}).values(): + try: + # _resolve_path handles new-style basenames (resolved under + # AUDIO_RECORDINGS_DIR) as well as legacy absolute paths. + # A raw Path(basename) would resolve vs CWD and fall outside + # base, so the relative_to guard would skip the unlink and the + # WAV would be orphaned on disk. Mirror play_record/rename_record. + p = _resolve_path(fi.get("path", "")).resolve() + p.relative_to(base) # never delete files outside recordings dir + except (ValueError, OSError): + continue + if p.exists(): + p.unlink() + deleted_files.append(str(p)) + + index["records"] = kept + _save_index(index) + return {"ok": True, "deleted": payload.record_name, "deleted_files": deleted_files} + + +class RecordBulkDelete(BaseModel): + record_names: list[str] | None = None + all: bool = False + + +@router.post("/delete-bulk") +async def delete_bulk(payload: RecordBulkDelete): + """Delete many records in one call. all=True wipes every record; otherwise + only those in record_names. Files are unlinked, guarded to the recordings + dir (same safety as /delete).""" + names = set(payload.record_names or []) + with _INDEX_LOCK: + index = _load_index() + base = AUDIO_RECORDINGS_DIR.resolve() + kept: list = [] + removed: list = [] + deleted_files = 0 + for r in index.get("records", []): + if payload.all or r.get("record_name") in names: + removed.append(r.get("record_name")) + for fi in r.get("files", {}).values(): + try: + p = _resolve_path(fi.get("path", "")).resolve() + p.relative_to(base) # never delete outside recordings dir + except (ValueError, OSError): + continue + if p.exists(): + try: + p.unlink() + deleted_files += 1 + except OSError: + pass + else: + kept.append(r) + index["records"] = kept + _save_index(index) + return {"ok": True, "deleted": removed, "deleted_count": len(removed), + "deleted_files": deleted_files} diff --git a/vendor/Sanad/dashboard/routes/replay.py b/vendor/Sanad/dashboard/routes/replay.py new file mode 100644 index 0000000..698bcca --- /dev/null +++ b/vendor/Sanad/dashboard/routes/replay.py @@ -0,0 +1,184 @@ +"""Replay management endpoints — JSONL files, teaching, test replay, speed control. + +Mirrors the replay management features from AI_Photographer/Server/photo_server.py. +""" + +from __future__ import annotations + +import asyncio + +from fastapi import APIRouter, HTTPException, UploadFile, File +from fastapi.responses import FileResponse +from pydantic import BaseModel + +from Project.Sanad.config import MOTIONS_DIR +from Project.Sanad.core.logger import get_logger +from Project.Sanad.dashboard.routes._safe_io import ( + safe_path_under, check_upload_size, atomic_write_bytes, +) + +log = get_logger("replay_route") +router = APIRouter() + + +def _block_if_movement_armed(): + """409 when locomotion movement is armed — arm motion (replay / teaching) is + mutually exclusive with walking.""" + try: + from Project.Sanad.main import loco_controller # type: ignore + armed = loco_controller is not None and loco_controller.is_armed() + except HTTPException: + raise + except Exception: + return + if armed: + raise HTTPException( + 409, "Arm actions are disabled while movement is enabled. " + "Disable movement in the Controller tab first.") + + +# -- models -- + +class ReplayRequest(BaseModel): + name: str + speed: float = 1.0 + +class RenameRequest(BaseModel): + old_name: str + new_name: str + +class TeachRequest(BaseModel): + name: str + duration_sec: float = 15.0 + + +# -- motion file CRUD -- + +@router.get("/files") +async def list_motion_files(): + from Project.Sanad.main import arm + return {"files": arm.list_motion_files()} + + +@router.get("/files/{filename}") +async def download_motion_file(filename: str): + path = safe_path_under(MOTIONS_DIR, filename) + if not path.exists(): + raise HTTPException(404, "File not found.") + return FileResponse(path, filename=path.name, media_type="application/json") + + +@router.post("/files/upload") +async def upload_motion_file(file: UploadFile = File(...)): + if not file.filename or not file.filename.lower().endswith(".jsonl"): + raise HTTPException(400, "Only .jsonl files accepted.") + MOTIONS_DIR.mkdir(parents=True, exist_ok=True) + dest = safe_path_under(MOTIONS_DIR, file.filename) + content = await file.read() + check_upload_size(content) + atomic_write_bytes(dest, content) + return {"ok": True, "name": dest.name, "size_bytes": len(content)} + + +@router.post("/files/rename") +async def rename_motion_file(payload: RenameRequest): + old = safe_path_under(MOTIONS_DIR, payload.old_name) + new = safe_path_under(MOTIONS_DIR, payload.new_name) + if not old.exists(): + raise HTTPException(404, f"File not found: {payload.old_name}") + if new.exists(): + raise HTTPException(409, f"File already exists: {payload.new_name}") + old.rename(new) + return {"ok": True, "old_name": old.name, "new_name": new.name} + + +@router.delete("/files/{filename}") +async def delete_motion_file(filename: str): + path = safe_path_under(MOTIONS_DIR, filename) + if not path.exists(): + raise HTTPException(404, "File not found.") + path.unlink() + return {"ok": True, "deleted": path.name} + + +# -- test replay -- + +_BG_TASKS: set[asyncio.Task] = set() + + +@router.post("/test") +async def test_replay(payload: ReplayRequest): + """Test-play a motion file at the given speed.""" + from Project.Sanad.main import arm + _block_if_movement_armed() + if arm.is_busy: + raise HTTPException(409, "Arm is busy.") + path = safe_path_under(MOTIONS_DIR, payload.name) + if not path.exists(): + raise HTTPException(404, f"Motion file not found: {path.name}") + + async def _run(): + try: + await asyncio.to_thread(arm.replay_file, str(path), payload.speed) + except Exception: + log.exception("Test replay failed") + + task = asyncio.create_task(_run()) + _BG_TASKS.add(task) + task.add_done_callback(_BG_TASKS.discard) + return {"ok": True, "name": path.name, "speed": payload.speed} + + +@router.post("/cancel") +async def cancel_replay(): + """Stop the current replay — the smooth return-to-home runs as the + final phase of the replay itself. + + Matches g1_replay_v4_stable.py's behaviour: the play loop breaks on + the cancel flag, then the same Run() function executes its + return-home ramp + DisableSDK. No separate scheduling needed. + """ + from Project.Sanad.main import arm + arm.cancel() + return {"ok": True, "message": "Cancelled — returning to home pose smoothly."} + + +@router.get("/status") +async def replay_status(): + from Project.Sanad.main import arm, teacher + return { + "arm": arm.status(), + "teaching": teacher.status() if teacher else {}, + } + + +# -- teaching mode -- + +@router.post("/teach/start") +async def start_teaching(payload: TeachRequest): + from Project.Sanad.main import teacher + if teacher is None: + raise HTTPException(503, "Teaching module not available.") + _block_if_movement_armed() + if teacher.is_recording: + raise HTTPException(409, "Teaching session already active.") + existing = MOTIONS_DIR / f"{payload.name}.jsonl" + if existing.exists(): + raise HTTPException(409, f"Motion file already exists: {payload.name}.jsonl") + return teacher.start(payload.name, payload.duration_sec) + + +@router.post("/teach/stop") +async def stop_teaching(): + from Project.Sanad.main import teacher + if teacher is None: + raise HTTPException(503, "Teaching module not available.") + return teacher.stop() + + +@router.get("/teach/status") +async def teaching_status(): + from Project.Sanad.main import teacher + if teacher is None: + return {"recording": False, "phase": "idle"} + return teacher.status() diff --git a/vendor/Sanad/dashboard/routes/scripts.py b/vendor/Sanad/dashboard/routes/scripts.py new file mode 100644 index 0000000..f4f18f2 --- /dev/null +++ b/vendor/Sanad/dashboard/routes/scripts.py @@ -0,0 +1,168 @@ +"""Script/prompt file management — CRUD for sanad_script.txt, sanad_rule.txt, etc.""" + +from __future__ import annotations + +import asyncio +from datetime import datetime +from pathlib import Path + +from fastapi import APIRouter, HTTPException +from pydantic import BaseModel + +from Project.Sanad.config import SCRIPTS_DIR +from Project.Sanad.core import persona as _persona +from Project.Sanad.dashboard.routes._safe_io import ( + atomic_write_text, MAX_UPLOAD_BYTES, +) + +router = APIRouter() + +MAX_SCRIPT_BYTES = MAX_UPLOAD_BYTES + + +def _safe_path(name: str) -> Path: + cleaned = name.strip() + if not cleaned or "/" in cleaned or "\\" in cleaned or cleaned in {".", ".."}: + raise HTTPException(400, "Invalid script name.") + path = (SCRIPTS_DIR / cleaned).resolve() + if not str(path).startswith(str(SCRIPTS_DIR.resolve())): + raise HTTPException(400, "Path traversal denied.") + return path + + +@router.get("/") +async def list_scripts(): + SCRIPTS_DIR.mkdir(parents=True, exist_ok=True) + active = _persona.active_persona_name() + default = _persona.default_persona_name() + items = [] + for p in sorted(SCRIPTS_DIR.iterdir(), key=lambda x: x.name.lower()): + if not p.is_file(): + continue + st = p.stat() + items.append({ + "name": p.name, + "size_bytes": st.st_size, + "modified_at": datetime.fromtimestamp(st.st_mtime).isoformat(timespec="seconds"), + "active": p.name == active, # the persona Gemini loads now + "is_default": p.name == default, # the fallback (sanad_script.txt) + }) + return {"path": str(SCRIPTS_DIR), "files": items, + "active": active, "default": default} + + +class ScriptActive(BaseModel): + name: str | None = None # None / "" / the default name → revert to default + restart: bool = False # also restart the live voice so it takes effect now + + +@router.get("/active") +async def get_active(): + """Which persona Gemini will load, and the default it falls back to.""" + return {"active": _persona.active_persona_name(), + "default": _persona.default_persona_name()} + + +@router.post("/active") +async def set_active(payload: ScriptActive): + """Select the persona script Gemini uses. With restart=true, the live voice + session is bounced so the new persona takes effect immediately; otherwise it + applies on the next voice (re)connect.""" + try: + active = _persona.set_active_persona(payload.name) + except FileNotFoundError: + raise HTTPException(404, f"Script not found: {payload.name}") + restarted = False + if payload.restart: + try: + from Project.Sanad.main import live_sub + if live_sub is not None and hasattr(live_sub, "start"): + if hasattr(live_sub, "is_running") and live_sub.is_running(): + await asyncio.to_thread(live_sub.stop) + await asyncio.sleep(1.5) + await asyncio.to_thread(live_sub.start) + restarted = True + except Exception: + pass # selection is saved regardless of restart success + return {"ok": True, "active": active, + "default": _persona.default_persona_name(), "restarted": restarted} + + +class ScriptLoad(BaseModel): + name: str + +@router.post("/load") +async def load_script(payload: ScriptLoad): + path = _safe_path(payload.name) + if not path.exists(): + raise HTTPException(404, f"Script not found: {payload.name}") + content = path.read_text(encoding="utf-8-sig") + st = path.stat() + return { + "name": path.name, + "content": content, + "size_bytes": st.st_size, + "modified_at": datetime.fromtimestamp(st.st_mtime).isoformat(timespec="seconds"), + } + + +class ScriptSave(BaseModel): + name: str + content: str + +@router.post("/save") +async def save_script(payload: ScriptSave): + if len(payload.content.encode("utf-8")) > MAX_SCRIPT_BYTES: + raise HTTPException(413, f"Content too large (max {MAX_SCRIPT_BYTES} bytes).") + path = _safe_path(payload.name) + SCRIPTS_DIR.mkdir(parents=True, exist_ok=True) + atomic_write_text(path, payload.content) + return {"ok": True, "name": path.name, "size_bytes": path.stat().st_size} + + +class ScriptCreate(BaseModel): + name: str + content: str = "" + +@router.post("/create") +async def create_script(payload: ScriptCreate): + if len(payload.content.encode("utf-8")) > MAX_SCRIPT_BYTES: + raise HTTPException(413, f"Content too large (max {MAX_SCRIPT_BYTES} bytes).") + path = _safe_path(payload.name) + if path.exists(): + raise HTTPException(409, f"File already exists: {payload.name}") + SCRIPTS_DIR.mkdir(parents=True, exist_ok=True) + atomic_write_text(path, payload.content) + return {"ok": True, "name": path.name} + + +class ScriptRename(BaseModel): + old_name: str + new_name: str + +@router.post("/rename") +async def rename_script(payload: ScriptRename): + old = _safe_path(payload.old_name) + new = _safe_path(payload.new_name) + if not old.exists(): + raise HTTPException(404, f"Not found: {payload.old_name}") + if new.exists(): + raise HTTPException(409, f"Already exists: {payload.new_name}") + old.rename(new) + return {"ok": True, "old_name": payload.old_name, "new_name": new.name} + + +class ScriptDelete(BaseModel): + name: str + +@router.post("/delete") +async def delete_script(payload: ScriptDelete): + path = _safe_path(payload.name) + if not path.exists(): + raise HTTPException(404, f"Not found: {payload.name}") + if path.name == _persona.default_persona_name(): + raise HTTPException(409, f"Cannot delete the default persona ({path.name}).") + path.unlink() + # If the active selection was the deleted file, resolution auto-falls-back + # to the default — no extra cleanup needed. + return {"ok": True, "deleted": payload.name} diff --git a/vendor/Sanad/dashboard/routes/skills.py b/vendor/Sanad/dashboard/routes/skills.py new file mode 100644 index 0000000..693215c --- /dev/null +++ b/vendor/Sanad/dashboard/routes/skills.py @@ -0,0 +1,101 @@ +"""Skill registry CRUD endpoints + skill execution.""" + +from __future__ import annotations + +from fastapi import APIRouter, HTTPException, UploadFile, File +from pydantic import BaseModel + +from Project.Sanad.config import AUDIO_RECORDINGS_DIR +from Project.Sanad.dashboard.routes._safe_io import ( + safe_path_under, check_upload_size, atomic_write_bytes, +) + +router = APIRouter() + + +class SkillCreate(BaseModel): + id: str = "" + audio_file: str = "" + motion_file: str = "" + callback: str = "" + sync_mode: str = "parallel" + enabled: bool = True + description: str = "" + + +class SkillUpdate(BaseModel): + audio_file: str | None = None + motion_file: str | None = None + callback: str | None = None + sync_mode: str | None = None + enabled: bool | None = None + description: str | None = None + + +@router.get("/") +async def list_skills(): + from Project.Sanad.main import brain + return {"skills": brain.registry.list_skills()} + + +@router.get("/{skill_id}") +async def get_skill(skill_id: str): + from Project.Sanad.main import brain + skill = brain.registry.get(skill_id) + if skill is None: + raise HTTPException(404, f"Skill not found: {skill_id}") + return skill.to_dict() + + +@router.post("/") +async def create_skill(payload: SkillCreate): + from Project.Sanad.main import brain + from Project.Sanad.core.skill_registry import Skill + try: + skill = Skill(**payload.model_dump()) + created = brain.registry.add(skill) + except ValueError as exc: + raise HTTPException(400, str(exc)) + return {"ok": True, "skill": created.to_dict()} + + +@router.put("/{skill_id}") +async def update_skill(skill_id: str, payload: SkillUpdate): + from Project.Sanad.main import brain + updates = {k: v for k, v in payload.model_dump().items() if v is not None} + try: + updated = brain.registry.update(skill_id, updates) + except ValueError as exc: + raise HTTPException(400, str(exc)) + if updated is None: + raise HTTPException(404, f"Skill not found: {skill_id}") + return {"ok": True, "skill": updated.to_dict()} + + +@router.delete("/{skill_id}") +async def delete_skill(skill_id: str): + from Project.Sanad.main import brain + deleted = brain.registry.delete(skill_id) + if not deleted: + raise HTTPException(404, f"Skill not found: {skill_id}") + return {"ok": True, "deleted": deleted} + + +@router.post("/{skill_id}/execute") +async def execute_skill(skill_id: str): + from Project.Sanad.main import brain + result = await brain.execute_skill(skill_id) + return result + + +@router.post("/upload-audio") +async def upload_audio(file: UploadFile = File(...)): + """Upload a .wav file for skill binding.""" + if not file.filename or not file.filename.lower().endswith(".wav"): + raise HTTPException(400, "Only .wav files are accepted.") + AUDIO_RECORDINGS_DIR.mkdir(parents=True, exist_ok=True) + dest = safe_path_under(AUDIO_RECORDINGS_DIR, file.filename) + content = await file.read() + check_upload_size(content) + atomic_write_bytes(dest, content) + return {"ok": True, "path": str(dest), "size_bytes": len(content)} diff --git a/vendor/Sanad/dashboard/routes/system.py b/vendor/Sanad/dashboard/routes/system.py new file mode 100644 index 0000000..aba03f4 --- /dev/null +++ b/vendor/Sanad/dashboard/routes/system.py @@ -0,0 +1,315 @@ +"""System information endpoints — network, subsystems, dashboard binding.""" + +from __future__ import annotations + +import asyncio +import os +import platform +import shutil +import socket +import sys +from pathlib import Path +from typing import Any + +from fastapi import APIRouter, HTTPException +from pydantic import BaseModel + +from Project.Sanad.config import ( + AUDIO_RECORDINGS_DIR, + BASE_DIR, + DASHBOARD_HOST, + DASHBOARD_INTERFACE, + DASHBOARD_PORT, + DATA_DIR, + DDS_NETWORK_INTERFACE, + LOGS_DIR, + list_network_interfaces, +) +from Project.Sanad.core.logger import get_logger + +log = get_logger("system_route") + +router = APIRouter() + + +def _runtime_bind() -> tuple[str, int]: + """The host/port the server is ACTUALLY bound to. + + main.py launches `uvicorn.run(_app, host=args.host, port=args.port)` with + the CLI --host/--port (start_sanad.sh passes `--port $PORT`, default 8001), + which can differ from the import-time DASHBOARD_HOST/DASHBOARD_PORT config + defaults (port 8000). Reading the live argv reports the real URL instead of + a stale config value. Falls back to the config constants when an arg is + absent (e.g. argparse default in effect).""" + host = DASHBOARD_HOST + port = DASHBOARD_PORT + argv = sys.argv + for i, tok in enumerate(argv): + if tok == "--host" and i + 1 < len(argv): + host = argv[i + 1] + elif tok.startswith("--host="): + host = tok.split("=", 1)[1] + elif tok == "--port" and i + 1 < len(argv): + try: + port = int(argv[i + 1]) + except (TypeError, ValueError): + pass + elif tok.startswith("--port="): + try: + port = int(tok.split("=", 1)[1]) + except (TypeError, ValueError): + pass + return host, port + + +def _safe_status(component, name: str) -> dict[str, Any]: + if component is None: + return {"available": False} + try: + if hasattr(component, "status") and callable(component.status): + s = component.status() + if not isinstance(s, dict): + s = {"raw": str(s)} + s.setdefault("available", True) + return s + return {"available": True} + except Exception as exc: + log.warning("status() failed for %s: %s", name, exc) + return {"available": True, "error": str(exc)} + + +@router.get("/info") +async def system_info(): + """One-shot system snapshot for the dashboard system panel.""" + def _do(): + # Subsystems + try: + from Project.Sanad.main import SUBSYSTEMS + except Exception: + SUBSYSTEMS = {} + + subsystem_list = [] + for name in sorted(SUBSYSTEMS): + comp = SUBSYSTEMS[name] + entry = { + "name": name, + "connected": comp is not None, + } + if comp is not None and hasattr(comp, "status") and callable(comp.status): + try: + s = comp.status() + if isinstance(s, dict): + entry["status"] = s + except Exception as exc: + entry["status_error"] = str(exc) + subsystem_list.append(entry) + + connected_count = sum(1 for s in subsystem_list if s["connected"]) + + # Audio device current selection (best-effort) + audio_info = {} + try: + from Project.Sanad.voice import audio_devices as ad + audio_info = { + "pactl_available": ad.pactl_available(), + "current": ad.current_selection(), + "detected_profile_ids": [ + d["profile"]["id"] for d in ad.detect_plugged_profiles() + ] if ad.pactl_available() else [], + } + except Exception as exc: + audio_info = {"error": str(exc)} + + # Network interfaces + try: + interfaces = list_network_interfaces() + except Exception: + interfaces = [] + + # Determine the URL the dashboard is reachable at — use the ACTUAL + # runtime bind args (argv), not the import-time config defaults. + bound_host, bound_port = _runtime_bind() + if bound_host == "0.0.0.0": + # Try to find the wlan0 IP for display purposes + up_ifaces = [i for i in interfaces if i["is_up"] and i["ip"] and not i["ip"].startswith("127.")] + display_host = up_ifaces[0]["ip"] if up_ifaces else bound_host + else: + display_host = bound_host + + return { + "host": { + "hostname": socket.gethostname(), + "platform": platform.platform(), + "python": sys.version.split()[0], + "executable": sys.executable, + "base_dir": str(BASE_DIR), + "pid": os.getpid(), + }, + "dashboard": { + "interface": DASHBOARD_INTERFACE, + "bound_host": bound_host, + "display_host": display_host, + "port": bound_port, + "url": f"http://{display_host}:{bound_port}", + }, + "dds": { + "interface": DDS_NETWORK_INTERFACE, + }, + "network": { + "interfaces": interfaces, + }, + "subsystems": { + "total": len(subsystem_list), + "connected": connected_count, + "disconnected": len(subsystem_list) - connected_count, + "list": subsystem_list, + }, + "audio": audio_info, + } + + return await asyncio.to_thread(_do) + + +# ───────────────────── storage tracking + cleanup ───────────────────── +# Categories surfaced in the Settings → Storage panel. `cleanable` ones get a +# Clean button + are included in "Clean all"; the rest (faces/motions/zones) +# are shown for tracking only — they're operational assets (enrollments, +# motion configs) managed in their own tabs, not disposable clutter. +_STORAGE_CATS = [ + ("recordings", "Conversation recordings", DATA_DIR / "recordings", True), + ("records", "Named records (Typed Replay)", AUDIO_RECORDINGS_DIR, True), + ("logs", "Logs", LOGS_DIR, True), + ("faces", "Enrolled faces", DATA_DIR / "faces", False), + ("motions", "Motion replays + config", DATA_DIR / "motions", False), + ("photos", "Photos", DATA_DIR / "photos", False), + ("zones", "Vision zones", DATA_DIR / "zones", False), +] +_CLEANABLE = {k for k, _l, _p, c in _STORAGE_CATS if c} + + +def _dir_stats(path: Path) -> tuple[int, int]: + """(total_bytes, file_count) of a dir tree. Missing dir → (0, 0).""" + total, n = 0, 0 + try: + for root, _dirs, files in os.walk(path): + for f in files: + try: + total += os.path.getsize(os.path.join(root, f)) + n += 1 + except OSError: + pass + except Exception: + pass + return total, n + + +def _human(b: float) -> str: + f = float(b) + for u in ("B", "KB", "MB", "GB", "TB"): + if f < 1024 or u == "TB": + return f"{f:.0f} {u}" if u == "B" else f"{f:.1f} {u}" + f /= 1024 + return f"{f:.1f} TB" + + +@router.get("/storage") +async def storage_usage(): + """Per-category data/log sizes + disk free, for the Storage panel.""" + def _do(): + cats = [] + for key, label, path, cleanable in _STORAGE_CATS: + size, files = _dir_stats(Path(path)) + cats.append({ + "key": key, "label": label, "path": str(path), + "size_bytes": size, "size_human": _human(size), + "files": files, "cleanable": cleanable, + }) + data_b, _ = _dir_stats(DATA_DIR) + logs_b, _ = _dir_stats(LOGS_DIR) + try: + du = shutil.disk_usage(str(BASE_DIR)) + disk = { + "free_human": _human(du.free), "total_human": _human(du.total), + "used_pct": round(100.0 * (du.total - du.free) / du.total, 1), + } + except Exception: + disk = {} + return { + "categories": cats, + "data_bytes": data_b, "data_human": _human(data_b), + "logs_human": _human(logs_b), + "total_human": _human(data_b + logs_b), + "disk": disk, + } + return await asyncio.to_thread(_do) + + +class _CleanReq(BaseModel): + target: str # recordings | records | logs | all + + +def _clean_recordings() -> tuple[int, int]: + d = DATA_DIR / "recordings" + freed, n = 0, 0 + for f in list(d.glob("*.wav")) + [d / "index.json"]: + if f.is_file(): + try: + freed += f.stat().st_size + f.unlink() + n += 1 + except OSError: + pass + return n, freed + + +def _clean_records() -> tuple[int, int]: + d = AUDIO_RECORDINGS_DIR + freed, n = 0, 0 + for f in list(d.glob("*.wav")) + [d / "records.json"]: + if f.is_file(): + try: + freed += f.stat().st_size + f.unlink() + n += 1 + except OSError: + pass + return n, freed + + +def _clean_logs() -> tuple[int, int]: + # Truncate (not delete) — active loggers hold append-mode handles, so + # truncating to 0 clears content cleanly without losing the fd. + freed, n = 0, 0 + for f in Path(LOGS_DIR).glob("*.log"): + try: + freed += f.stat().st_size + open(f, "w").close() + n += 1 + except OSError: + pass + return n, freed + + +@router.post("/storage/clean") +async def storage_clean(req: _CleanReq): + """Clean a disposable category (recordings | records | logs) or 'all'. + Recordings/records are deleted; logs are truncated. Assets (faces, motions, + zones) are never touched here.""" + t = (req.target or "").strip().lower() + if t != "all" and t not in _CLEANABLE: + raise HTTPException(400, f"target must be 'all' or one of {sorted(_CLEANABLE)}") + + def _do(): + targets = ["recordings", "records", "logs"] if t == "all" else [t] + fns = {"recordings": _clean_recordings, "records": _clean_records, + "logs": _clean_logs} + result, total = {}, 0 + for tg in targets: + n, freed = fns[tg]() + result[tg] = {"items": n, "freed_bytes": freed, "freed_human": _human(freed)} + total += freed + log.info("storage clean %s → freed %s", targets, _human(total)) + return {"ok": True, "cleaned": targets, + "total_freed_bytes": total, "total_freed_human": _human(total), + "result": result} + return await asyncio.to_thread(_do) diff --git a/vendor/Sanad/dashboard/routes/temp_monitor.py b/vendor/Sanad/dashboard/routes/temp_monitor.py new file mode 100644 index 0000000..81505bb --- /dev/null +++ b/vendor/Sanad/dashboard/routes/temp_monitor.py @@ -0,0 +1,81 @@ +"""REST endpoints backing the 3D motor-temperature dashboard (N1). + +Serves the motor name/mesh mapping + thresholds, and a one-shot temperature +snapshot (the front-end's initial fetch fallback). The live stream is over +/ws/motor-temps (dashboard/websockets/motor_temps.py). The 3D view itself is +the static page at /static/temp3d/index.html. +""" + +from __future__ import annotations + +import time + +from fastapi import APIRouter + +from Project.Sanad.dashboard.temp_motor_map import ( + MOTOR_NAMES, + MOTOR_TO_MESH, + TEMP_HOT_THRESHOLD, + TEMP_MAX, + TEMP_MIN, + TEMP_WARM_THRESHOLD, + build_payload, +) + +router = APIRouter() + + +def _get_arm(): + """Lazy import — avoids a circular import on dashboard load.""" + try: + from Project.Sanad.main import arm # type: ignore + return arm + except Exception: + return None + + +@router.get("/mapping") +async def motor_mapping(): + """Motor id → name / mesh map + the temperature gradient thresholds.""" + return { + "motor_names": MOTOR_NAMES, + "motor_to_mesh": MOTOR_TO_MESH, + "thresholds": { + "min": TEMP_MIN, + "max": TEMP_MAX, + "warm": TEMP_WARM_THRESHOLD, + "hot": TEMP_HOT_THRESHOLD, + }, + } + + +@router.get("/motors") +async def motors_snapshot(): + """One-shot motor temperature + position snapshot (Marcus payload shape).""" + arm = _get_arm() + temps: list = [] + positions: list = [] + if arm is not None: + try: + temps = arm.get_motor_temps() + except Exception: + temps = [] + try: + positions = arm.get_current_q() + except Exception: + positions = [] + return build_payload(temps, positions, time.time()) + + +@router.get("/battery") +async def battery_status(): + """Live G1 battery (BMS) snapshot: state-of-charge %, voltage, current, + charge/discharge status, pack temperature, cycles. `available=False` until + the BMS topic (rt/lf/bmsstate) delivers its first message.""" + arm = _get_arm() + if arm is None or not hasattr(arm, "get_battery"): + return {"available": False} + try: + return arm.get_battery() + except Exception: + return {"available": False} diff --git a/vendor/Sanad/dashboard/routes/typed_replay.py b/vendor/Sanad/dashboard/routes/typed_replay.py new file mode 100644 index 0000000..efc0c44 --- /dev/null +++ b/vendor/Sanad/dashboard/routes/typed_replay.py @@ -0,0 +1,146 @@ +"""Typed Replay dashboard endpoints. + +Full CRUD over the records index: + POST /say generate + play + optionally record + POST /replay-last re-play cached audio + POST /save-last persist cached generation + GET /records list + GET /records/{name} get one + POST /records/{name}/play play saved WAV (speaker or raw) + POST /records/{name}/rename rename + DELETE /records/{name} delete + GET /status engine + session state +""" + +from __future__ import annotations + +import asyncio +from typing import Literal + +from fastapi import APIRouter, HTTPException +from pydantic import BaseModel + +from Project.Sanad.core.config_loader import section as _cfg_section +router = APIRouter() + +# MAX_TEXT_LEN — SINGLE SOURCE in dashboard.api_input +MAX_TEXT_LEN = _cfg_section("dashboard", "api_input").get("max_text_len", 2000) + + +class SayPayload(BaseModel): + text: str + record: bool = False + record_name: str = "" + + +class SaveLastPayload(BaseModel): + record_name: str = "" + + +class RenamePayload(BaseModel): + new_name: str + + +class PlayRecordPayload(BaseModel): + file_kind: Literal["speaker", "raw"] = "speaker" + + +def _engine(): + from Project.Sanad.main import typed_replay + if typed_replay is None: + raise HTTPException(503, "TypedReplayEngine not initialized.") + return typed_replay + + +# ───────────────────── generate / replay ───────────────────── + +@router.post("/say") +async def say(payload: SayPayload): + if not payload.text or not payload.text.strip(): + raise HTTPException(400, "text cannot be empty") + if len(payload.text) > MAX_TEXT_LEN: + raise HTTPException(413, f"text too long (max {MAX_TEXT_LEN})") + eng = _engine() + try: + return await eng.say(payload.text, record=payload.record, + record_name=payload.record_name) + except ValueError as exc: + raise HTTPException(400, str(exc)) + except RuntimeError as exc: + raise HTTPException(503, str(exc)) + + +@router.post("/replay-last") +async def replay_last(): + eng = _engine() + try: + return await asyncio.to_thread(eng.replay_last) + except RuntimeError as exc: + raise HTTPException(400, str(exc)) + + +@router.post("/save-last") +async def save_last(payload: SaveLastPayload): + eng = _engine() + try: + return {"ok": True, "record": eng.save_last(payload.record_name)} + except RuntimeError as exc: + raise HTTPException(400, str(exc)) + + +# ───────────────────── record CRUD ─────────────────────────── + +@router.get("/records") +async def list_records(): + return _engine().list_records() + + +@router.get("/records/{name}") +async def get_record(name: str): + try: + return _engine().find_record(name) + except KeyError: + raise HTTPException(404, f"record not found: {name}") + + +@router.post("/records/{name}/play") +async def play_record(name: str, payload: PlayRecordPayload): + eng = _engine() + try: + return await asyncio.to_thread(eng.play_record, name, payload.file_kind) + except KeyError: + raise HTTPException(404, f"record not found: {name}") + except FileNotFoundError as exc: + raise HTTPException(410, f"file missing on disk: {exc}") + except RuntimeError as exc: + raise HTTPException(503, str(exc)) + + +@router.post("/records/{name}/rename") +async def rename_record(name: str, payload: RenamePayload): + eng = _engine() + try: + return {"ok": True, "record": eng.rename_record(name, payload.new_name)} + except KeyError: + raise HTTPException(404, f"record not found: {name}") + except ValueError as exc: + raise HTTPException(400, str(exc)) + + +@router.delete("/records/{name}") +async def delete_record(name: str): + eng = _engine() + try: + return {"ok": True, **eng.delete_record(name)} + except KeyError: + raise HTTPException(404, f"record not found: {name}") + + +# ───────────────────── status ──────────────────────────────── + +@router.get("/status") +async def status(): + from Project.Sanad.main import typed_replay + if typed_replay is None: + return {"available": False} + return {"available": True, **typed_replay.status()} diff --git a/vendor/Sanad/dashboard/routes/voice.py b/vendor/Sanad/dashboard/routes/voice.py new file mode 100644 index 0000000..a3d1b88 --- /dev/null +++ b/vendor/Sanad/dashboard/routes/voice.py @@ -0,0 +1,237 @@ +"""Voice endpoints — Gemini interaction, local TTS, prompt management.""" + +from __future__ import annotations + +import asyncio + +from fastapi import APIRouter, HTTPException +from pydantic import BaseModel + +from Project.Sanad.core.config_loader import section as _cfg_section +from Project.Sanad.core.logger import get_logger + +log = get_logger("voice_route") + +router = APIRouter() + +_VR = _cfg_section("dashboard", "voice_route") +_API = _cfg_section("dashboard", "api_input") +# MAX_TEXT_LEN — SINGLE SOURCE in dashboard.api_input +MAX_TEXT_LEN = _API.get("max_text_len", 2000) +_API_KEY_MASK_VISIBLE = _VR.get("api_key_mask_visible", 4) + + +def _mask_api_key(key: str) -> str: + """Mask an API key for display — keeps 4 chars on each end. + + Examples: + "" → "" + "AIza123" → "*******" (≤8 chars = full mask) + "AIzaSy...kqf8" → "AIza***...kqf8" (>8 chars = partial mask) + """ + if not key: + return "" + if len(key) <= 8: + return "*" * len(key) + return f"{key[:4]}{'*' * (len(key) - 8)}{key[-4:]}" + + +class TextPayload(BaseModel): + text: str + engine: str = "gemini" # "gemini" | "local" + + +@router.get("/status") +async def voice_status(): + from Project.Sanad.main import voice_client, local_tts + return { + "gemini": voice_client.status() if voice_client else {}, + "local_tts": local_tts.status() if local_tts else {}, + } + + +@router.post("/generate") +async def generate_speech(payload: TextPayload): + """Generate speech from text using Gemini or local TTS.""" + if not payload.text.strip(): + raise HTTPException(400, "Text cannot be empty.") + if len(payload.text) > MAX_TEXT_LEN: + raise HTTPException(413, f"Text too long (max {MAX_TEXT_LEN} chars).") + + from Project.Sanad.main import voice_client, local_tts, audio_mgr + + if payload.engine == "local": + if local_tts is None: + raise HTTPException(503, "Local TTS not available.") + pcm = await asyncio.to_thread(local_tts.synthesize, payload.text) + if audio_mgr: + await asyncio.to_thread(audio_mgr.play_pcm, pcm, 1, 16000, 2) + return { + "ok": True, + "engine": "local", + "duration_sec": round(len(pcm) / (16000 * 2), 3), + } + else: + if voice_client is None: + raise HTTPException(503, "Voice client not initialized.") + if not voice_client.connected: + try: + await voice_client.connect() + except Exception: + log.exception("Gemini reconnect failed in /generate") + raise HTTPException(503, "Gemini not connected and reconnect failed.") + # Check session ownership — TypedReplay or live loop may hold it + if voice_client.session_owner is not None: + raise HTTPException( + 409, + f"Voice session busy (owned by {voice_client.session_owner})", + ) + try: + audio_bytes, text_parts = await voice_client.send_text( + payload.text, owner="voice_route" + ) + except RuntimeError as exc: + raise HTTPException(503, str(exc)) + except Exception as exc: + raise HTTPException(502, f"Gemini communication error: {exc}") + if audio_bytes and audio_mgr: + await asyncio.to_thread(audio_mgr.play_pcm, audio_bytes, 1, 24000, 2) + return { + "ok": True, + "engine": "gemini", + "has_audio": bool(audio_bytes), + "text_response": text_parts, + } + + +@router.post("/connect") +async def connect_gemini(): + from Project.Sanad.main import voice_client + if voice_client is None: + raise HTTPException(503, "Voice client not initialized.") + try: + await voice_client.connect() + except Exception as exc: + raise HTTPException(502, f"Gemini connection failed: {exc}") + return {"connected": voice_client.connected} + + +@router.post("/disconnect") +async def disconnect_gemini(): + from Project.Sanad.main import voice_client + if voice_client: + await voice_client.disconnect() + return {"connected": False} + + +# ─────────────────────── Gemini API key management ─────────────────────── + +class ApiKeyPayload(BaseModel): + api_key: str + + +@router.get("/api-key") +async def get_api_key(): + """Return the current Gemini API key in masked form. + + Never returns the full key. Response: + { + "has_key": true, + "masked": "AIza***...kqf8", + "length": 39, + "source": "config_file" | "default" + } + """ + import Project.Sanad.config as cfg_mod + key = getattr(cfg_mod, "GEMINI_API_KEY", "") or "" + # Detect where the value came from (persisted override vs module default) + try: + from Project.Sanad.config import load_config + stored = load_config().get("gemini", {}) or {} + source = "config_file" if stored.get("api_key") else "default" + except Exception: + source = "default" + return { + "has_key": bool(key), + "masked": _mask_api_key(key), + "length": len(key), + "source": source, + } + + +@router.post("/api-key") +async def update_api_key(payload: ApiKeyPayload): + """Update the Gemini API key — persists to data/motions/config.json and + hot-swaps the in-memory value so the next Gemini connect uses it. + + Also disconnects any currently-connected Gemini session so that the + next reconnect picks up the new key cleanly. Returns the NEW masked + key + a flag telling the dashboard to trigger a reconnect. + """ + key = payload.api_key.strip() + if not key: + raise HTTPException(400, "API key cannot be empty.") + if len(key) < 20: + raise HTTPException(400, "API key looks too short.") + if not key.startswith("AIza"): + raise HTTPException( + 400, + "Gemini API keys normally start with 'AIza'. " + "Double-check you're pasting a Google AI Studio key.", + ) + + # Persist to data/motions/config.json (atomic temp-then-replace) + try: + from Project.Sanad.config import load_config, save_config + cfg = load_config() or {} + gemini_cfg = cfg.get("gemini") if isinstance(cfg.get("gemini"), dict) else {} + gemini_cfg["api_key"] = key + cfg["gemini"] = gemini_cfg + save_config(cfg) + except Exception as exc: + log.exception("Failed to persist API key to config.json") + raise HTTPException(500, f"Could not save config: {exc}") + + # Hot-swap the in-memory module globals. + # Both Project.Sanad.config AND Project.Sanad.gemini.client + # have their OWN reference to GEMINI_API_KEY (the latter was created + # at `from Project.Sanad.config import GEMINI_API_KEY` at import time). + # Python's `from X import Y` binds a local name — updating config.Y + # alone does NOT propagate to the importer, so we must patch both. + try: + import Project.Sanad.config as _cfg_mod + _cfg_mod.GEMINI_API_KEY = key + except Exception: + log.exception("could not patch config.GEMINI_API_KEY") + + try: + import Project.Sanad.gemini.client as _gc + _gc.GEMINI_API_KEY = key + except Exception: + log.exception("could not patch gemini.client.GEMINI_API_KEY") + + # Disconnect any live session so reconnect uses the new key. + from Project.Sanad.main import voice_client + was_connected = False + if voice_client is not None: + was_connected = bool(getattr(voice_client, "connected", False)) + if was_connected: + try: + await voice_client.disconnect() + except Exception: + log.exception("disconnect during api-key swap failed") + + log.info("Gemini API key updated (length=%d) source=config_file", len(key)) + + return { + "ok": True, + "masked": _mask_api_key(key), + "length": len(key), + "source": "config_file", + "was_connected": was_connected, + "message": ( + "API key saved. Click 'Connect' to reopen the Gemini session with " + "the new key. Any running Live Gemini subprocess must be restarted " + "separately (Stop → Start) to pick up the new key." + ), + } diff --git a/vendor/Sanad/dashboard/routes/wake_phrases.py b/vendor/Sanad/dashboard/routes/wake_phrases.py new file mode 100644 index 0000000..66b65a2 --- /dev/null +++ b/vendor/Sanad/dashboard/routes/wake_phrases.py @@ -0,0 +1,72 @@ +"""Wake-phrase CRUD endpoints. + +Lets the dashboard edit the wake-phrase → action mapping stored in +data/wake_phrases.json. +""" + +from __future__ import annotations + +from typing import Optional + +from fastapi import APIRouter, HTTPException +from pydantic import BaseModel + +router = APIRouter() + + +class WakePhrasePayload(BaseModel): + phrase: str + action_id: str + + +class EnablePayload(BaseModel): + phrase: str + action_id: str + enabled: bool + + +def _mgr(): + from Project.Sanad.main import wake_mgr + if wake_mgr is None: + raise HTTPException(503, "WakePhraseManager not initialized.") + return wake_mgr + + +@router.get("/") +async def list_phrases(): + m = _mgr() + return { + "status": m.status(), + "phrases": m.list(), + } + + +@router.post("/") +async def add_phrase(payload: WakePhrasePayload): + m = _mgr() + try: + entry = m.add(payload.phrase, payload.action_id) + except ValueError as exc: + raise HTTPException(400, str(exc)) + return {"ok": True, "entry": entry} + + +@router.delete("/") +async def remove_phrase(phrase: str, action_id: Optional[str] = None): + m = _mgr() + removed = m.remove(phrase, action_id) + return {"ok": True, "removed": removed} + + +@router.post("/enable") +async def set_enabled(payload: EnablePayload): + m = _mgr() + ok = m.set_enabled(payload.phrase, payload.action_id, payload.enabled) + if not ok: + raise HTTPException(404, "phrase+action_id not found") + return {"ok": True} + + +@router.get("/status") +async def status(): + return _mgr().status() diff --git a/vendor/Sanad/dashboard/routes/zones.py b/vendor/Sanad/dashboard/routes/zones.py new file mode 100644 index 0000000..d6def31 --- /dev/null +++ b/vendor/Sanad/dashboard/routes/zones.py @@ -0,0 +1,597 @@ +"""Zones tab — zone → place → linked-faces management + "go here" destination. + +Hierarchy (replaces the old flat places): + Zone (name + description) + └─ Place (name + description + optional reference photos + linked face ids) + +Routes live under /api/zones. Toggle + CRUD changes write +data/.recognition_state.json (the SAME file faces use); the Gemini child polls +it at 1 Hz and re-primes / announces mid-session. The "go here" endpoints set a +navigation target the robot will head to once N2 locomotion is wired — for now +they just record the target and feed Gemini the place's reference. +""" + +from __future__ import annotations + +import asyncio +import io +from typing import Optional + +from fastapi import APIRouter, File, HTTPException, Query, UploadFile +from fastapi.responses import FileResponse, StreamingResponse +from pydantic import BaseModel + +from Project.Sanad.config import BASE_DIR +from Project.Sanad.core.logger import get_logger +from Project.Sanad.dashboard.routes._safe_io import check_upload_size +from Project.Sanad.vision import recognition_state + +log = get_logger("zones_routes") + +router = APIRouter() + +STATE_PATH = BASE_DIR / "data" / ".recognition_state.json" + + +# ── lazy subsystem accessors ──────────────────────────────── + +def _get_camera(): + try: + from Project.Sanad.main import camera # type: ignore + return camera + except Exception: + return None + + +def _get_zone_gallery(): + try: + from Project.Sanad.main import zone_gallery # type: ignore + return zone_gallery + except Exception: + return None + + +def _get_face_gallery(): + try: + from Project.Sanad.main import gallery # type: ignore + return gallery + except Exception: + return None + + +def _require_zones(): + g = _get_zone_gallery() + if g is None: + raise HTTPException(503, "Zone gallery subsystem unavailable.") + return g + + +def _bump_zones_version() -> int: + cur = recognition_state.read(STATE_PATH) + v = cur.zones_version + 1 + recognition_state.mutate(STATE_PATH, zones_version=v) + return v + + +def _validate_image(content: bytes, filename: str | None = None) -> None: + check_upload_size(content) + if len(content) < 16: + raise HTTPException(400, "Image too small / empty.") + if not (content[:3] == b"\xff\xd8\xff" or content[:8] == b"\x89PNG\r\n\x1a\n"): + raise HTTPException(400, f"Only JPEG/PNG accepted (got {filename or 'unknown'}).") + + +def _safe_photo_name(name: str) -> None: + if "/" in name or ".." in name or "\x00" in name: + raise HTTPException(400, "Invalid photo name.") + + +def _resolve_faces(face_ids: list[int]) -> list[dict]: + """Turn linked face ids into [{id, name}] using the face gallery.""" + g = _get_face_gallery() + out = [] + for fid in face_ids: + name = None + if g is not None: + try: + e = g.get(fid) + name = e.name if e else None + except Exception: + name = None + out.append({"id": fid, "name": name}) + return out + + +def _place_to_dict(p) -> dict: + d = p.to_dict() + d["faces"] = _resolve_faces(p.face_ids) + return d + + +def _zone_to_dict(z) -> dict: + return { + "id": z.id, "name": z.name, "description": z.description, + "linked_map": getattr(z, "linked_map", None), + "added_at": z.added_at, + "places": [_place_to_dict(p) for p in z.places], + } + + +async def _maybe_drive_to_place(zone, place) -> Optional[dict]: + """If the place links a nav2 place AND its zone's map is the one currently + localized, actually DRIVE there (arbiter-gated + arm arrival monitor). + Returns the drive outcome, or None when the place isn't drivable (no link). + Best-effort: never raises into the caller.""" + nav_place = getattr(place, "nav_place", None) + linked_map = getattr(zone, "linked_map", None) + if not nav_place or not linked_map: + return None + try: + from Project.Sanad.dashboard.routes import navigation as navmod + from Project.Sanad.dashboard.routes import _arbiter + except Exception: + return {"ok": False, "reason": "nav_unavailable"} + client = getattr(navmod, "_CLIENT", None) + if client is None: + return {"ok": False, "reason": "nav_unavailable"} + try: + st = await asyncio.to_thread(client.status) + body = st.as_dict() if hasattr(st, "as_dict") else dict(st) + except Exception as exc: # noqa: BLE001 + return {"ok": False, "reason": "status_error", "detail": str(exc)[:120]} + if not body.get("bringup_alive"): + return {"ok": False, "reason": "no_map"} + # The robot can only drive in the currently-localized map. Require the + # zone's linked map to match (compare on the sanitized .db stem). + active = (body.get("active_map") or "").strip().lower() + want = (linked_map or "").strip().lower() + if want.endswith(".db"): + want = want[:-3] + if active and want and active != want: + return {"ok": False, "reason": "wrong_map", + "active": body.get("active_map"), "want": linked_map} + if not _arbiter.acquire_nav(): + return {"ok": False, "reason": "manual_armed"} + drive = await asyncio.to_thread(client.goto, nav_place) + if isinstance(drive, dict) and not drive.get("ok", True): + _arbiter.release_nav() + return {"ok": False, "reason": "dispatch_failed", "detail": drive} + try: + from Project.Sanad.navigation.goal_monitor import arm_goal + arm_goal(nav_place) + except Exception: + pass + return {"ok": True, "resolved": nav_place} + + +def _nav_target_dict(st, gallery) -> Optional[dict]: + zid, pid = st.nav_target_zone_id, st.nav_target_place_id + if not zid or not pid: + return None + zone_name = place_name = None + if gallery is not None: + try: + z = gallery.get_zone(zid) + zone_name = z.name if z else None + p = gallery.get_place(zid, pid) + place_name = p.name if p else None + except Exception: + pass + return {"zone_id": zid, "place_id": pid, + "zone_name": zone_name, "place_name": place_name} + + +# ── state + toggle ────────────────────────────────────────── + +@router.get("/state") +async def get_state(): + st = recognition_state.read(STATE_PATH) + g = _get_zone_gallery() + zones_count = places_count = 0 + if g is not None: + try: + zones = g.list_zones() + zones_count = len(zones) + places_count = sum(len(z.places) for z in zones) + except Exception: + pass + return { + "zone_rec_enabled": st.zone_rec_enabled, + "zones_version": st.zones_version, + "zones_count": zones_count, + "places_count": places_count, + "nav_target": _nav_target_dict(st, g), + } + + +@router.post("/zone-rec") +async def set_zone_rec(on: bool = Query(...)): + """Enable / disable the robot's knowledge of zones & places (hot).""" + st = recognition_state.mutate(STATE_PATH, zone_rec_enabled=bool(on)) + log.info("zone recognition %s", "ON" if on else "OFF") + return {"ok": True, "zone_rec_enabled": st.zone_rec_enabled} + + +@router.post("/sync") +async def sync_zones(): + v = _bump_zones_version() + log.info("zones sync requested → v.%d", v) + return {"ok": True, "zones_version": v} + + +# ── zones CRUD ────────────────────────────────────────────── + +class NamePayload(BaseModel): + name: Optional[str] = None + + +class DescribePayload(BaseModel): + description: Optional[str] = None + + +class FacesPayload(BaseModel): + face_ids: list[int] = [] + + +class LinkMapPayload(BaseModel): + # nav2 map .db basename (e.g. "office.db"); None/"" unlinks. + map: Optional[str] = None + + +class NavPlacePayload(BaseModel): + # nav2 place name in the zone's linked map; None/"" unlinks. + nav_place: Optional[str] = None + + +@router.get("") +async def list_zones(): + g = _require_zones() + zones = g.list_zones() + return {"zones": [_zone_to_dict(z) for z in zones], "total": len(zones)} + + +@router.post("/create") +async def create_zone(name: Optional[str] = Query(default=None), + description: Optional[str] = Query(default=None)): + g = _require_zones() + if not (name or "").strip() and not (description or "").strip(): + raise HTTPException(400, "A zone needs at least a name or a description.") + z = g.create_zone(name=name, description=description) + _bump_zones_version() + return {"ok": True, "zone": _zone_to_dict(z)} + + +@router.post("/{zone_id}/rename") +async def rename_zone(zone_id: int, payload: NamePayload): + g = _require_zones() + try: + g.rename_zone(zone_id, payload.name) + except FileNotFoundError as exc: + raise HTTPException(404, str(exc)) + _bump_zones_version() + return {"ok": True, "zone": _zone_to_dict(g.get_zone(zone_id))} + + +@router.post("/{zone_id}/describe") +async def describe_zone(zone_id: int, payload: DescribePayload): + g = _require_zones() + try: + g.describe_zone(zone_id, payload.description) + except FileNotFoundError as exc: + raise HTTPException(404, str(exc)) + _bump_zones_version() + return {"ok": True, "zone": _zone_to_dict(g.get_zone(zone_id))} + + +@router.delete("/{zone_id}") +async def delete_zone(zone_id: int): + g = _require_zones() + try: + g.delete_zone(zone_id) + except FileNotFoundError as exc: + raise HTTPException(404, str(exc)) + # If the active destination was inside this zone, clear it. + st = recognition_state.read(STATE_PATH) + if st.nav_target_zone_id == zone_id: + recognition_state.mutate(STATE_PATH, nav_target_zone_id=0, nav_target_place_id=0) + _bump_zones_version() + return {"ok": True, "deleted": zone_id} + + +# ── places CRUD (within a zone) ───────────────────────────── + +@router.post("/{zone_id}/places/create") +async def create_place( + zone_id: int, + name: Optional[str] = Query(default=None), + description: Optional[str] = Query(default=None), + face_ids: list[int] = Query(default=[]), + nav_place: Optional[str] = Query(default=None), + files: Optional[list[UploadFile]] = File(default=None), +): + g = _require_zones() + if g.get_zone(zone_id) is None: + raise HTTPException(404, f"zone_{zone_id} not found") + if not (name or "").strip() and not (description or "").strip(): + raise HTTPException(400, "A place needs at least a name or a description.") + image_bytes: list[bytes] = [] + for f in (files or []): + content = await f.read() + if not content: + continue + _validate_image(content, f.filename) + image_bytes.append(content) + p = g.create_place(zone_id, name=name, description=description, + face_ids=face_ids, image_bytes_list=image_bytes or None, + nav_place=nav_place) + _bump_zones_version() + return {"ok": True, "place": _place_to_dict(p)} + + +@router.post("/{zone_id}/link_map") +async def link_zone_map(zone_id: int, payload: LinkMapPayload): + """Bind (or unbind) the zone to a nav2 map .db. Required before its places + can link to that map's nav places / before Gemini Nav can drive in it.""" + g = _require_zones() + try: + g.set_zone_map(zone_id, payload.map) + except FileNotFoundError as exc: + raise HTTPException(404, str(exc)) + _bump_zones_version() + return {"ok": True, "zone": _zone_to_dict(g.get_zone(zone_id))} + + +@router.post("/{zone_id}/places/{place_id}/nav_link") +async def link_place_nav(zone_id: int, place_id: int, payload: NavPlacePayload): + """Link (or unlink) a place to a nav2 place name in the zone's map — this is + what makes the place drivable from voice / 'Go here'.""" + g = _require_zones() + try: + g.set_place_nav(zone_id, place_id, payload.nav_place) + except FileNotFoundError as exc: + raise HTTPException(404, str(exc)) + _bump_zones_version() + return {"ok": True, "place": _place_to_dict(g.get_place(zone_id, place_id))} + + +@router.post("/{zone_id}/places/{place_id}/rename") +async def rename_place(zone_id: int, place_id: int, payload: NamePayload): + g = _require_zones() + try: + g.rename_place(zone_id, place_id, payload.name) + except FileNotFoundError as exc: + raise HTTPException(404, str(exc)) + _bump_zones_version() + return {"ok": True, "place": _place_to_dict(g.get_place(zone_id, place_id))} + + +@router.post("/{zone_id}/places/{place_id}/describe") +async def describe_place(zone_id: int, place_id: int, payload: DescribePayload): + g = _require_zones() + try: + g.describe_place(zone_id, place_id, payload.description) + except FileNotFoundError as exc: + raise HTTPException(404, str(exc)) + _bump_zones_version() + return {"ok": True, "place": _place_to_dict(g.get_place(zone_id, place_id))} + + +@router.post("/{zone_id}/places/{place_id}/faces") +async def set_place_faces(zone_id: int, place_id: int, payload: FacesPayload): + """Replace the set of saved faces linked to this place.""" + g = _require_zones() + try: + g.set_place_faces(zone_id, place_id, payload.face_ids) + except FileNotFoundError as exc: + raise HTTPException(404, str(exc)) + _bump_zones_version() + return {"ok": True, "place": _place_to_dict(g.get_place(zone_id, place_id))} + + +@router.post("/{zone_id}/places/{place_id}/capture") +async def capture_to_place(zone_id: int, place_id: int): + g = _require_zones() + cam = _get_camera() + if cam is None or not cam.is_running(): + raise HTTPException(409, "Camera is not running. Toggle Vision ON first.") + jpeg = cam.get_fresh_frame(max_age_s=0.5, timeout_s=1.5) + if not jpeg: + raise HTTPException(409, "Camera has no frame yet.") + try: + fname = g.add_photo(zone_id, place_id, jpeg) + except FileNotFoundError as exc: + raise HTTPException(404, str(exc)) + _bump_zones_version() + return {"ok": True, "added": fname, "place": _place_to_dict(g.get_place(zone_id, place_id))} + + +@router.post("/{zone_id}/places/{place_id}/upload") +async def upload_to_place(zone_id: int, place_id: int, + files: list[UploadFile] = File(...)): + g = _require_zones() + if g.get_place(zone_id, place_id) is None: + raise HTTPException(404, f"zone_{zone_id}/place_{place_id} not found") + added: list[str] = [] + for f in files: + content = await f.read() + _validate_image(content, f.filename) + try: + added.append(g.add_photo(zone_id, place_id, content)) + except FileNotFoundError as exc: + raise HTTPException(404, str(exc)) + _bump_zones_version() + return {"ok": True, "added": added, "place": _place_to_dict(g.get_place(zone_id, place_id))} + + +@router.delete("/{zone_id}/places/{place_id}") +async def delete_place(zone_id: int, place_id: int): + g = _require_zones() + try: + g.delete_place(zone_id, place_id) + except FileNotFoundError as exc: + raise HTTPException(404, str(exc)) + st = recognition_state.read(STATE_PATH) + if st.nav_target_zone_id == zone_id and st.nav_target_place_id == place_id: + recognition_state.mutate(STATE_PATH, nav_target_zone_id=0, nav_target_place_id=0) + _bump_zones_version() + return {"ok": True, "deleted": place_id} + + +@router.delete("/{zone_id}/places/{place_id}/photo/{photo_name}") +async def delete_place_photo(zone_id: int, place_id: int, photo_name: str): + g = _require_zones() + _safe_photo_name(photo_name) + try: + g.delete_photo(zone_id, place_id, photo_name) + except FileNotFoundError as exc: + raise HTTPException(404, str(exc)) + _bump_zones_version() + return {"ok": True, "deleted": photo_name} + + +@router.get("/{zone_id}/places/{place_id}/photo/{photo_name}") +async def get_place_photo(zone_id: int, place_id: int, photo_name: str, + download: int = Query(default=0)): + g = _require_zones() + _safe_photo_name(photo_name) + path = g.get_photo(zone_id, place_id, photo_name) + if path is None: + raise HTTPException(404, "Photo not found.") + media = "image/png" if path.suffix.lower() == ".png" else "image/jpeg" + headers = {} + if download: + headers["Content-Disposition"] = ( + f'attachment; filename="zone_{zone_id}_place_{place_id}_{photo_name}"') + return FileResponse(path, media_type=media, headers=headers) + + +@router.get("/{zone_id}/places/{place_id}/download.zip") +async def download_place_zip(zone_id: int, place_id: int): + g = _require_zones() + try: + data = g.zip_place(zone_id, place_id) + except FileNotFoundError as exc: + raise HTTPException(404, str(exc)) + return StreamingResponse( + io.BytesIO(data), media_type="application/zip", + headers={ + "Content-Disposition": f'attachment; filename="zone_{zone_id}_place_{place_id}.zip"', + "Content-Length": str(len(data)), + }, + ) + + +# ── "go here" navigation target ───────────────────────────── + +@router.post("/{zone_id}/places/{place_id}/go") +async def go_to_place(zone_id: int, place_id: int): + """Set this place as the active destination AND, if the place links a nav2 + place in this zone's (currently-localized) map, actually drive there. + + Two effects: (1) records nav_target so the Gemini child primes on the + reference photo + announces the destination; (2) if drivable, dispatches a + Nav2 goal (arbiter-gated, with arrival monitoring). A place with no nav link + is announce-only, as before.""" + g = _require_zones() + p = g.get_place(zone_id, place_id) + if p is None: + raise HTTPException(404, f"zone_{zone_id}/place_{place_id} not found") + recognition_state.mutate(STATE_PATH, + nav_target_zone_id=zone_id, + nav_target_place_id=place_id) + log.info("nav target set → zone_%d/place_%d (%s)", zone_id, place_id, + p.name or "(unnamed)") + zone = g.get_zone(zone_id) + drive = await _maybe_drive_to_place(zone, p) + return {"ok": True, + "nav_target": {"zone_id": zone_id, "place_id": place_id, + "place_name": p.name}, + "drive": drive} + + +@router.post("/nav/clear") +async def clear_nav_target(): + recognition_state.mutate(STATE_PATH, nav_target_zone_id=0, nav_target_place_id=0) + log.info("nav target cleared") + return {"ok": True, "nav_target": None} + + +def _resolve_map_path(client, linked_map: str) -> Optional[str]: + """Find the .db path for a zone's linked map name via the nav client.""" + want = (linked_map or "").strip().lower() + want_stem = want[:-3] if want.endswith(".db") else want + try: + maps = client.list_maps() or [] + except Exception: + return None + for m in maps: + nm = (m.get("name") or "").strip().lower() + if nm == want or (nm[:-3] if nm.endswith(".db") else nm) == want_stem: + return m.get("path") + return None + + +@router.post("/{zone_id}/gemini_nav/start") +async def gemini_nav_start(zone_id: int): + """Enter 'Gemini Nav' for a zone: localize the zone's map, turn on camera + + face + zone recognition + movement, ensure the Gemini session is live, and + greet the user so they can converse to navigate. + + The robot only ever runs ONE map; this loads the zone's map in localize-only + mode (so it cannot fresh-map while driving), exactly as the user requires. + """ + g = _require_zones() + zone = g.get_zone(zone_id) + if zone is None: + raise HTTPException(404, f"zone_{zone_id} not found") + linked_map = getattr(zone, "linked_map", None) + if not linked_map: + raise HTTPException(400, "This zone has no linked nav2 map — link one first.") + + # 1) Localize the zone's map (single bringup, mode 3 — no fresh mapping). + loaded: dict = {"ok": False, "reason": "nav_unavailable"} + try: + from Project.Sanad.dashboard.routes import navigation as navmod + client = getattr(navmod, "_CLIENT", None) + if client is not None: + db_path = await asyncio.to_thread(_resolve_map_path, client, linked_map) + if db_path: + loaded = await asyncio.to_thread(client.load_map, db_path) + else: + loaded = {"ok": False, "reason": "map_not_found", "map": linked_map} + except Exception as exc: # noqa: BLE001 + loaded = {"ok": False, "reason": "load_error", "detail": str(exc)[:160]} + + # 2) Camera + face + zone recognition + movement ON for the session. + recognition_state.mutate(STATE_PATH, + vision_enabled=True, face_rec_enabled=True, + zone_rec_enabled=True, movement_enabled=True) + _bump_zones_version() + + # 3) Ensure the Gemini session is live, then greet (zone + drivable places). + session_started = False + try: + from Project.Sanad.main import live_sub + if live_sub is not None: + if hasattr(live_sub, "is_running") and not live_sub.is_running(): + await asyncio.to_thread(live_sub.start) + session_started = True + drivable = [p.name or p.nav_place for p in zone.places + if getattr(p, "nav_place", None)] + zname = zone.name or f"zone {zone_id}" + if drivable: + placelist = ", ".join(str(x) for x in drivable) + greet = (f"You are now in the '{zname}' zone. You can drive the " + f"user to: {placelist}. Greet the user warmly in your " + f"normal Khaleeji style and ask where they would like to go.") + else: + greet = (f"You are now in the '{zname}' zone, but no drivable " + f"places are linked to its map yet. Greet the user and " + f"say places still need to be linked before you can drive.") + if hasattr(live_sub, "send_state"): + live_sub.send_state("nav_zone", greet) + except Exception as exc: # noqa: BLE001 + log.warning("gemini_nav greet failed: %s", exc) + + return {"ok": True, "zone_id": zone_id, "zone": _zone_to_dict(zone), + "loaded": loaded, "session_started": session_started} diff --git a/vendor/Sanad/dashboard/static/index.html b/vendor/Sanad/dashboard/static/index.html new file mode 100644 index 0000000..50dac2c --- /dev/null +++ b/vendor/Sanad/dashboard/static/index.html @@ -0,0 +1,3761 @@ + + + + + + Sanad Dashboard + + + +

+ + +

Sanad Dashboard

+ + + + + + Connecting... + +

+ + +

+ CAM + FACE + PLACE + MOVE +

+ + +

Operations

Voice & Audio

Motion & Replay

Controller

Navigation

Live Map

Map Editor

Recognition

Mask Face

Recordings

Temperature

Terminal

Settings & Logs

+ + +

System Info

+ +

Network interfaces

Subsystems (connected / disconnected)

+ + +

Audio Control

Microphone

Speaker

+ Speaker Volume (G1 / JBL / Anker) + – +

+ + + +

+ Controls the ACTIVE speaker — G1 chest (DDS) + the selected PulseAudio sink (JBL/Anker). Applies live. +

+ Audio device profile +

+ + + + + +

Manual sink / source override

+ +

+ + +

Quick Actions

+ Speed + + + +

+ +

+ + +

Live Voice Commands

+ + + + + +

+ Arm Trigger + +

+ Deferred Trigger + +

Last heard: --

Pending action: --

Audio attached: -- | Arm attached: -- | Gemini: --

+ + +

Live Gemini Process

+ + + + + + Pause: Auto + + +

State: --

User: --

+ + +

Gemini API Key

+ The key used by GeminiVoiceClient and the Live Gemini subprocess. + Saved to data/motions/config.json. Get a free key at + aistudio.google.com/app/apikey. +

+ Current + + + +

+ New key + + + +

+ + +

Typed Replay Engine

+ +

+ + Record speaker + +

+ + + +

+ Session +

+ + +

Wake Phrase Manager

+ + +

+ +

+ + +

+ +

+ + +

Motion Control

+ Gestural Speaking + + Speed + + + +

+ SDK Actions (built-in) + +

+ JSONL Replays (recorded) + +

+ + +

Replay Manager

+ Motion Files +

+ + Upload .jsonl +

+ Test Replay +

+ + + + +

+ Teaching Mode +

+ + + + +

+ + +

Macro Recorder (Audio + Motion)

+ + + Record +

+ + + +

+ + + Play +

Voice (WAV)

+ +

Motion (JSONL)

+ +

Speed

+ +

+ + + +

+ +

+ + +

Camera Vision & Face Recognition

+ Camera Vision + + + + + -- +

+ Face Recognition + + + + + -- +

+ +

+ Toggles take effect within ~1 second on the running Gemini session — no restart required. +

+ + +

Live Preview

+ +

Camera off — toggle Vision ON to see the live feed.

Resolution / FPS

+ + + + + + +

JPEG Quality

+ + + +

+ Each button rebuilds the capture pipeline (~0.5 s). Modes match the + RealSense D435I colour sensor — on USB 2.x, stick to 424×240 or 640×480. + If the feed is grayscale/IR, pin the colour node with SANAD_CAMERA_USB_INDEX. +

+ + +

Add New Face

+ Name + +

+ Description — who is this person? (Gemini reads it) + +

+ + + 📁 Upload images + + +

+ Tip: add 2–3 photos / different angles per person for best recognition. + The description is sent to Gemini with the photos — it can then greet + and talk about the person using what you wrote. +

+ + +

Enrolled Faces

+ + +

Loading…

+ + +

Zones & Places

+ Zone Recognition + + + + + -- +

+ + +

+ Destination: + none + +

+ Group locations into zones, add places inside each (name + description + + optional reference photos), and link saved faces to a place. “Go here” sets a + destination and shows Gemini the place — the robot drives there once movement + (locomotion) is enabled. +

+ + +

Add New Zone

+ + + +

+ + +

Zones

+ +

Loading…

+ +

+ + +

+ +

🔋 Battery

--%

Voltage
--

Current
--

Pack temp
--

Cycles
--

Reading battery…

+ +

+ Live motor surface/winding temperatures from rt/lowstate on the full + G1 (29 DOF). Blue ≈ 30°C → red ≈ 120°C. Drag to orbit, scroll to zoom. + Streamed over /ws/motor-temps — no second DDS subscriber. +

+ + +

+ FSM — + unknown + MSC — + SDK — +

+ + Enable movement + + + Enable Gemini movement + + +

+ +

+ CAM + FACE + PLACE + MOVE + GEMINI-MOVE + EXPLORE · soon +

+ Manual operator control. Robot is assumed standing in walking mode — use Ready/Start only if needed. + All controls below are locked until Enable movement is on; E-STOP always works. + While movement is on, arm replays/actions are disabled (and vice-versa). +

+ +

Locomotion / Teleop

Discrete step pad

+ + + + + + + + + +

+ + Run (1.2) +

vx 0.00 · vy 0.00 · ω 0.00

W/S forward·back · Q/E strafe · A/D rotate · Space halt

+ + +

Postures & Modes

+ + + + + + + + + + +

+ + +

MotionSwitcher / Low-Level

+ + + + + + +

+ + +

Diagnostics — joints 12–28

+ + +

+ NAV — + unknown + BRINGUP — + BRIDGE — + MODE — +

+ + +

+ Autonomous navigation via web_nav3 (Nav2 + rosbridge on the robot). Saved places + let you send a goal with one click; the robot drives there once locomotion is enabled. + The full nav dashboard (live map, set-pose, manual goals) is also available at + :8765. +

+ +

+ + +

Places

Loading…

+ + +

Save current pose as place

+ + +

+ Captures the robot's current map pose under this name. Drive (or teleop) the robot to the + spot first, then save. Saved places appear in the list to the left. +

+ + +

Missions

Loading…

+ Multi-waypoint routes / patrols defined in web_nav3. Run executes the full sequence. +

+ + +

MAP NAVIGATION

+ + + map: — + + Click: + + + + + + + + connecting… +

VIEW — pick GOAL to drive, or ADD to bookmark places. Cyan = robot, green dots = this map's places. Each map has its own places. Use the Places list to Go / Move / rename / delete.

+ +

+ + +

+ +

Live Map — full web_nav3 dashboard

+ +

+ Full web_nav3 dashboard (live map, set-pose, manual goals, missions) embedded from the robot. + Also available standalone at + :8765. + If it stays blank, check that bringup + rosbridge are alive (see the Navigation tab) and that the + robot is reachable on the network. +

+ +

+ + +

MAP EDITOR

+ + + map: — + MODE — + + Tool: + + + + Brush + + + + + + + + + load a map… +

+ Edit a SAVED map. Pick a map → Load & Edit. 🧽 Erase removes black phantom obstacles (paints them free); ⬛ Wall paints virtual walls / keep-outs. Click-drag to paint (brush size above). Save stores the edits per-map and applies them to the robot's navigation — it stops avoiding erased points and treats painted walls as keep-outs. Yellow = your edits. The original map .db is never modified. +

+ + +

+ + unknown + FACE — + SPEAK — +

+ + + +

+ LED face mask over Bluetooth. Check Link Gemini (below) to connect it + let Gemini show emotions; leave it off and the mask stays idle (no reconnecting). + Once linked it self-heals dropped links — keep the mask near the Jetson and free it from the phone app first. Faces upload once (~25 s) then animate via PLAY. +

+ +

Animated Face

+ + + + +

+ + Link Gemini + + + Speaking + + + Hide mouth + +

+ Mouth + + 0 +

+ + + + + + + + + + + + + + + + +

+ Face colors + Eyes + Mouth + Sclera + +

Run face → it blinks/glances on its own and lip-syncs while Gemini speaks. Colors re-upload the frames (~30–60s). Auto-reconnect keeps the face alive through BLE drops.

+ + +

Brightness & Text

+ Brightness + + 95 +

Keep ≤100 to avoid LED flicker (battery-limited).

+ + + + +

+ BG + Speed + + +

Text overrides the animated face until you Run face / ↩ Face again.

+ + +

Social / QR on Face

+ + + ⬆ Show once + +

Social buttons show a scannable QR (short da.gd link → Instagram). Full-URL / dense QRs show full-screen but only scan if short (use QR from link below).

+ +

+ Saved QR codes + + ⬆ Add + Save + +

+ QR from link + + + +

+ + +

Saved Text / Words

+ + + +

Save words/phrases, then scroll any of them across the mask on demand.

+ + +

Built-in Images / Animations

+ Image + + + + +

+ Anim + + + + +

+ +

Built-in IMAG ids ~0–105, ANIM ids ~0–69 (values above range show garbled frames).

+ + +

Skill Registry

No skills configured

+ +

+ + +

Saved Records

+ +

+ + + Keep Gemini paused (hold) + + Auto — resumes after each clip +

+ +

+ + + Select all + + + + +

No records saved

+ +

+ + + +

Terminal — unitree@robot

+ disconnected + + + +

+ Runs as the dashboard's user on the robot (typically unitree). No SSH handshake — the dashboard is already on the robot. Works on whichever Wi-Fi the robot is connected to. +

+ + +

Scripts Manager

+ Gemini persona: + … + + + +

Create variants (e.g. sanad_script_v2.txt) then select one and “Use for Gemini”. Default is always sanad_script.txt.

+ +

+ + + + +

+ + +

Prompt Management

+ +

+ + +

Storage

+ … + + + +

loading…

“Clean all” = recordings + named records + logs. Faces, motions & zones are shown for tracking only — manage those in their own tabs.

+ + +

Live Logs

+ + + + + +

+ +

+ + + + + + + + diff --git a/vendor/Sanad/dashboard/temp_motor_map.py b/vendor/Sanad/dashboard/temp_motor_map.py new file mode 100644 index 0000000..281951e --- /dev/null +++ b/vendor/Sanad/dashboard/temp_motor_map.py @@ -0,0 +1,90 @@ +"""G1 29-DoF motor → name / mesh mapping for the 3D temperature dashboard. + +Ported verbatim from Marcus/Features/TempMonitor/config_g1.py so the copied +three.js front-end (static/temp3d/index.html) binds temperature colours to the +correct STL meshes. `build_payload()` turns the arm controller's raw lowstate +snapshot into the exact 'motor_update' payload shape that front-end expects. +""" + +from __future__ import annotations + +from typing import Any, Optional + +# Motor ID → human name (29 motors = 29 DOF) +MOTOR_NAMES: dict[int, str] = { + 0: "Left Hip Pitch", 1: "Left Hip Roll", 2: "Left Hip Yaw", 3: "Left Knee", + 4: "Left Ankle Pitch", 5: "Left Ankle Roll", + 6: "Right Hip Pitch", 7: "Right Hip Roll", 8: "Right Hip Yaw", 9: "Right Knee", + 10: "Right Ankle Pitch", 11: "Right Ankle Roll", + 12: "Waist Yaw", 13: "Waist Roll", 14: "Waist Pitch", + 15: "Left Shoulder Pitch", 16: "Left Shoulder Roll", 17: "Left Shoulder Yaw", + 18: "Left Elbow", 19: "Left Wrist Roll", 20: "Left Wrist Pitch", 21: "Left Wrist Yaw", + 22: "Right Shoulder Pitch", 23: "Right Shoulder Roll", 24: "Right Shoulder Yaw", + 25: "Right Elbow", 26: "Right Wrist Roll", 27: "Right Wrist Pitch", 28: "Right Wrist Yaw", +} + +# Motor ID → URDF link / STL mesh name +MOTOR_TO_MESH: dict[int, str] = { + 0: "left_hip_pitch_link", 1: "left_hip_roll_link", 2: "left_hip_yaw_link", + 3: "left_knee_link", 4: "left_ankle_pitch_link", 5: "left_ankle_roll_link", + 6: "right_hip_pitch_link", 7: "right_hip_roll_link", 8: "right_hip_yaw_link", + 9: "right_knee_link", 10: "right_ankle_pitch_link", 11: "right_ankle_roll_link", + 12: "waist_yaw_link", 13: "waist_roll_link", 14: "torso_link", + 15: "left_shoulder_pitch_link", 16: "left_shoulder_roll_link", 17: "left_shoulder_yaw_link", + 18: "left_elbow_link", 19: "left_wrist_roll_link", 20: "left_wrist_pitch_link", + 21: "left_wrist_yaw_link", + 22: "right_shoulder_pitch_link", 23: "right_shoulder_roll_link", 24: "right_shoulder_yaw_link", + 25: "right_elbow_link", 26: "right_wrist_roll_link", 27: "right_wrist_pitch_link", + 28: "right_wrist_yaw_link", +} + +# Temperature thresholds (°C) — the three.js gradient maps MIN→MAX (blue→red). +TEMP_MIN = 30 +TEMP_MAX = 120 +TEMP_WARM_THRESHOLD = 45 +TEMP_HOT_THRESHOLD = 60 + + +def _coerce(v: Optional[int]) -> float: + """Temperatures default to 0 when the firmware didn't report one, so the + front-end's Math.max / .toFixed never sees null/NaN.""" + return float(v) if v is not None else 0.0 + + +def build_payload(temps: list[dict[str, Any]], + positions: list[float], + timestamp: float) -> dict[str, Any]: + """Build the Marcus-compatible 'motor_update' payload. + + `temps` — arm.get_motor_temps(): [{motor_id, surface, winding}] + `positions` — arm.get_current_q(): joint angles indexed by motor id + """ + temperatures: list[dict[str, Any]] = [] + for t in temps or []: + i = t.get("motor_id") + surface = t.get("surface") + winding = t.get("winding") + if surface is not None and winding is not None: + avg = (_coerce(surface) + _coerce(winding)) / 2.0 + else: + avg = _coerce(surface if surface is not None else winding) + entry: dict[str, Any] = { + "motor_id": i, + "motor_name": MOTOR_NAMES.get(i, f"Motor {i}"), + "mesh_name": MOTOR_TO_MESH.get(i, ""), + "surface": _coerce(surface), + "winding": _coerce(winding), + "temp1": _coerce(surface), + "temp2": _coerce(winding), + "avg": avg, + } + if positions and isinstance(i, int) and i < len(positions): + entry["position"] = float(positions[i]) + temperatures.append(entry) + + pos_list: list[dict[str, Any]] = [ + {"motor_id": i, "position": float(q), "link_name": MOTOR_TO_MESH.get(i)} + for i, q in enumerate(positions or []) + ] + return {"temperatures": temperatures, "positions": pos_list, + "timestamp": timestamp} diff --git a/vendor/Sanad/dashboard/websockets/__init__.py b/vendor/Sanad/dashboard/websockets/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/vendor/Sanad/dashboard/websockets/log_stream.py b/vendor/Sanad/dashboard/websockets/log_stream.py new file mode 100644 index 0000000..6e3bb94 --- /dev/null +++ b/vendor/Sanad/dashboard/websockets/log_stream.py @@ -0,0 +1,104 @@ +"""WebSocket endpoint for real-time log streaming. + +Clients connect to /ws/logs and receive live log lines from all modules. +""" + +from __future__ import annotations + +import asyncio +import threading +from collections import deque + +from fastapi import APIRouter, WebSocket, WebSocketDisconnect + +from Project.Sanad.core.logger import set_ws_push + +router = APIRouter() + +MAX_WATCHERS = 50 + +# Ring buffer of recent log lines (shared across connections). +_recent: deque[str] = deque(maxlen=500) +# Each watcher is an (event_loop, queue) pair. We keep the loop so cross-thread +# producers can schedule the enqueue on the consumer's loop (asyncio.Queue is +# NOT thread-safe — calling put_nowait off-loop neither wakes the parked +# `await queue.get()` nor safely mutates the queue's internals). +_watchers: set[tuple[asyncio.AbstractEventLoop, asyncio.Queue]] = set() +_watchers_lock = threading.Lock() + + +def push_log_line(line: str): + """Called from the logging system to feed new lines. + + May be called from ANY thread (logging is multi-threaded), so the append + to _recent and the per-watcher enqueue are done together under the same + lock that log_ws holds while snapshotting history + registering — that + closes the history/live overlap window so a connecting client can't see a + line both in its history replay and again live. The enqueue itself is + marshalled onto each watcher's loop via call_soon_threadsafe because + asyncio.Queue.put_nowait is not safe to call from a foreign thread. + """ + with _watchers_lock: + _recent.append(line) + snapshot = list(_watchers) + for loop, q in snapshot: + try: + loop.call_soon_threadsafe(_safe_put, q, line) + except RuntimeError: + # Loop already closed — watcher is going away; skip it. + pass + + +def _safe_put(q: asyncio.Queue, line: str) -> None: + """Enqueue on the consumer's own loop thread (so it's safe).""" + try: + q.put_nowait(line) + except asyncio.QueueFull: + # Drop on overflow rather than block — logs are not critical data + pass + + +# Register with the logger so all log records are pushed to WS clients. +# Wrap so a logger registration failure doesn't break Dashboard import. +try: + set_ws_push(push_log_line) +except Exception: + pass + + +@router.websocket("/ws/logs") +async def log_ws(ws: WebSocket): + await ws.accept() + + loop = asyncio.get_running_loop() + queue: asyncio.Queue[str] = asyncio.Queue(maxsize=200) + watcher = (loop, queue) + with _watchers_lock: + if len(_watchers) >= MAX_WATCHERS: + await ws.close(code=1013, reason="Too many log watchers") + return + # Register the live queue and snapshot history under the SAME lock that + # push_log_line holds — so every line is either in this history + # snapshot or arrives on the queue, never both (no replay duplicates). + _watchers.add(watcher) + history = list(_recent) + + try: + # Send recent history + for line in history: + await ws.send_text(line) + + while True: + line = await queue.get() + await ws.send_text(line) + except WebSocketDisconnect: + pass + except Exception: + # Any other error closes the connection cleanly + try: + await ws.close() + except Exception: + pass + finally: + with _watchers_lock: + _watchers.discard(watcher) diff --git a/vendor/Sanad/dashboard/websockets/motor_temps.py b/vendor/Sanad/dashboard/websockets/motor_temps.py new file mode 100644 index 0000000..c70e32f --- /dev/null +++ b/vendor/Sanad/dashboard/websockets/motor_temps.py @@ -0,0 +1,81 @@ +"""WebSocket endpoint streaming G1 motor temperatures to the 3D dashboard (N1). + +Polls the arm controller's throttled rt/lowstate snapshot (arm.get_motor_temps +/ arm.get_current_q — NO second DDS subscriber, no second ChannelFactoryInitialize) +and pushes a Marcus-compatible 'motor_update' payload to each connected client. + +Front-end: dashboard/static/temp3d/index.html (ported three.js view), which +opens this socket via a tiny shim in place of socket.io. +""" + +from __future__ import annotations + +import asyncio +import threading +import time + +from fastapi import APIRouter, WebSocket, WebSocketDisconnect + +from Project.Sanad.core.logger import get_logger +from Project.Sanad.dashboard.temp_motor_map import build_payload + +log = get_logger("motor_temps_ws") + +router = APIRouter() + +MAX_WATCHERS = 20 +PUSH_HZ = 8.0 # ~8 fps is plenty for a temperature heatmap + +_count = 0 +_count_lock = threading.Lock() + + +def _get_arm(): + """Lazy import — avoids a circular import on dashboard load.""" + try: + from Project.Sanad.main import arm # type: ignore + return arm + except Exception: + return None + + +@router.websocket("/ws/motor-temps") +async def motor_temps_ws(ws: WebSocket): + await ws.accept() + + global _count + with _count_lock: + if _count >= MAX_WATCHERS: + await ws.close(code=1013, reason="Too many temperature watchers") + return + _count += 1 + + period = 1.0 / PUSH_HZ + try: + while True: + arm = _get_arm() + temps: list = [] + positions: list = [] + if arm is not None: + try: + temps = arm.get_motor_temps() + except Exception: + temps = [] + try: + positions = arm.get_current_q() + except Exception: + positions = [] + payload = build_payload(temps, positions, time.time()) + await ws.send_json(payload) + await asyncio.sleep(period) + except WebSocketDisconnect: + pass + except Exception: + # Any other error (client gone mid-send, serialise issue) closes cleanly. + try: + await ws.close() + except Exception: + pass + finally: + with _count_lock: + _count -= 1 diff --git a/vendor/Sanad/dashboard/websockets/terminal.py b/vendor/Sanad/dashboard/websockets/terminal.py new file mode 100644 index 0000000..af27838 --- /dev/null +++ b/vendor/Sanad/dashboard/websockets/terminal.py @@ -0,0 +1,323 @@ +"""WebSocket → PTY bridge for the dashboard's Terminal tab. + +Spawns a shell (bash by default) inside a pseudo-terminal on the robot and +relays stdin/stdout to a browser xterm.js instance over WebSocket. From the +operator's seat this is functionally identical to an in-browser +`ssh unitree@` — except no SSH handshake is needed because the +dashboard process already runs as unitree on the robot. The Terminal tab +connects to ws:///ws/terminal and you land in unitree's shell +directly. + +PROTOCOL — text frames only. Control vs. keystrokes are disambiguated by +the leading byte: + client → server: + "\\x1f" + json-encoded control object (init / resize) + e.g. "\\x1f{\\"type\\":\\"init\\",\\"cols\\":80,\\"rows\\":24}" + keystrokes — written to PTY + server → client: + PTY stdout/stderr chunks + +The \\x1f prefix (ASCII Unit Separator) is the disambiguator. If we just +JSON-sniffed every message, a user pasting `{"type":"resize",...}` into +their shell would silently resize the PTY instead of pasting the text. + +SECURITY NOTE: anyone who can reach the dashboard URL gets shell access +as the unitree user. The dashboard already exposes equally-powerful +endpoints (E-STOP, motion replay, audio mute, etc.) so this isn't a new +threat class — but it IS a single-bullet kill switch for the robot. Bind +the dashboard to a trusted network only. +""" + +from __future__ import annotations + +import asyncio +import fcntl +import json +import os +import pty +import select +import shutil +import signal +import struct +import termios +import threading + +from fastapi import APIRouter, WebSocket, WebSocketDisconnect + +from Project.Sanad.core.logger import get_logger + +log = get_logger("terminal_ws") + +router = APIRouter() + +# Magic prefix that distinguishes control messages from raw keystrokes. +# ASCII 0x1F (Unit Separator) — not produced by normal keyboard input, +# so user-pasted JSON can never spoof a control frame. +_CTRL_PREFIX = "\x1f" + +# Concurrent-session cap so a runaway tab can't spawn 50 bashes on the robot. +_MAX_SESSIONS = 4 +_active: set[int] = set() +_active_lock = threading.Lock() + +# Bounded queue depth between the PTY reader thread and the WS sender. +# A chatty shell command (e.g. `yes`, `cat /dev/urandom`) at gigabytes/sec +# would otherwise pile up unbounded asyncio tasks + string refs. Past the +# cap we drop chunks and surface a single drop notice — ANSI may corrupt +# briefly but the session stays alive. +_SEND_QUEUE_MAX = 64 + + +def _resolve_shell() -> list[str]: + """Pick a sensible shell. SHELL env first, then /bin/bash, then sh.""" + sh = os.environ.get("SHELL", "") + if sh and shutil.which(sh): + return [sh, "-i"] + if shutil.which("/bin/bash"): + return ["/bin/bash", "-i"] + return ["/bin/sh", "-i"] + + +def _set_pty_size(fd: int, cols: int, rows: int) -> None: + """Inform the PTY of its new window size so curses-style apps (htop, + less, vim) lay out correctly.""" + try: + # TIOCSWINSZ payload: rows, cols, xpixel, ypixel (xpixel/ypixel + # unused, kept 0). + fcntl.ioctl(fd, termios.TIOCSWINSZ, + struct.pack("HHHH", rows, cols, 0, 0)) + except Exception as exc: + log.debug("TIOCSWINSZ failed (cols=%s rows=%s): %s", cols, rows, exc) + + +async def _reap_child(pid: int) -> None: + """SIGHUP → wait briefly → SIGKILL → wait briefly → giveup. + + Earlier version SIGKILLed unconditionally because the WNOHANG check + happened immediately after SIGHUP (which never returns true that fast). + Now we poll for up to ~1.5s after SIGHUP before escalating. + """ + async def _wait_exit(timeout_s: float, interval_s: float = 0.1) -> bool: + end = asyncio.get_running_loop().time() + timeout_s + while asyncio.get_running_loop().time() < end: + try: + done_pid, _ = os.waitpid(pid, os.WNOHANG) + except ChildProcessError: + return True # already reaped + except OSError: + return False + if done_pid: + return True + await asyncio.sleep(interval_s) + return False + + # 1. Polite request + try: + os.kill(pid, signal.SIGHUP) + except ProcessLookupError: + return + except OSError as exc: + log.debug("SIGHUP pid=%d: %s", pid, exc) + return + + if await _wait_exit(1.5): + return + + # 2. Force + try: + os.kill(pid, signal.SIGKILL) + except ProcessLookupError: + return + except OSError as exc: + log.debug("SIGKILL pid=%d: %s", pid, exc) + return + + if not await _wait_exit(1.0): + log.warning("terminal child pid=%d failed to exit after SIGKILL", pid) + + +@router.websocket("/ws/terminal") +async def terminal_ws(ws: WebSocket) -> None: + """Bridge a browser xterm.js to a shell PTY on the robot.""" + await ws.accept() + + # Concurrent-session guard. + with _active_lock: + if len(_active) >= _MAX_SESSIONS: + await ws.send_text( + f"\r\n[terminal] Refused — already have {_MAX_SESSIONS} " + f"open sessions. Close another tab and reconnect.\r\n" + ) + await ws.close(code=1008) + return + + # Fork + exec the shell. Parent gets the master fd; child becomes the + # shell with stdin/stdout/stderr wired to the slave end. + cmd = _resolve_shell() + try: + pid, fd = pty.fork() + except OSError as exc: + log.error("pty.fork failed: %s", exc) + await ws.send_text(f"\r\n[terminal] pty.fork failed: {exc}\r\n") + await ws.close(code=1011) + return + + if pid == 0: + # CHILD — set env so the shell is interactive and looks right. + os.environ.setdefault("TERM", "xterm-256color") + os.environ.setdefault("LANG", os.environ.get("LANG", "en_US.UTF-8")) + try: + os.execvp(cmd[0], cmd) + except OSError as exc: + # exec failed — printing to fd 2 reaches the parent via the + # PTY so the browser sees the error before we _exit. + os.write(2, f"[terminal] exec failed: {exc}\n".encode()) + os._exit(127) + return # unreachable in child + + # PARENT + with _active_lock: + _active.add(pid) + log.info("terminal session started pid=%d cmd=%s", pid, cmd[0]) + + loop = asyncio.get_running_loop() + closed = asyncio.Event() + # Bounded queue + dedicated sender task = backpressure. If the queue + # fills up we drop the chunk and bump _dropped so we can surface a + # short notice in the stream. + send_q: asyncio.Queue[str] = asyncio.Queue(maxsize=_SEND_QUEUE_MAX) + dropped = 0 + + def _reader_thread() -> None: + """Drain PTY master fd → queue. Runs in a daemon thread because + select.select on a pipe blocks; asyncio has no portable + equivalent for arbitrary fds on Windows (and we want one code + path).""" + nonlocal dropped + try: + while not closed.is_set(): + try: + r, _, _ = select.select([fd], [], [], 0.1) + except (OSError, ValueError): + break + if not r: + continue + try: + data = os.read(fd, 4096) + except OSError: + break + if not data: # EOF — child exited / PTY closed + break + try: + text = data.decode("utf-8", errors="replace") + except Exception: + continue + # put_nowait raises on full — we drop and count. + try: + loop.call_soon_threadsafe(_enqueue, text) + except RuntimeError: + # loop closed — bail + break + finally: + loop.call_soon_threadsafe(closed.set) + + def _enqueue(text: str) -> None: + nonlocal dropped + try: + send_q.put_nowait(text) + except asyncio.QueueFull: + dropped += 1 + + async def _sender_task() -> None: + """Drains send_q → WebSocket. Single producer, single consumer + means no extra locking needed. Backoff on send failure and let + the closed flag end the session.""" + nonlocal dropped + while not closed.is_set(): + try: + text = await asyncio.wait_for(send_q.get(), timeout=0.5) + except asyncio.TimeoutError: + continue + try: + await ws.send_text(text) + except Exception as exc: + log.info("terminal ws.send failed (likely client gone): %s", exc) + closed.set() + return + # If we dropped chunks since the last successful send, tell + # the user once so the ANSI corruption isn't mysterious. + if dropped: + d = dropped + dropped = 0 + try: + await ws.send_text( + f"\r\n\x1b[2m[term: dropped {d} chunk(s) — slow client]" + f"\x1b[0m\r\n", + ) + except Exception: + closed.set() + return + + reader = threading.Thread(target=_reader_thread, daemon=True, + name=f"terminal-rx-{pid}") + reader.start() + sender = asyncio.create_task(_sender_task()) + + # Initial sizing — xterm.js will send a {type:"init",...} control + # frame right after onopen with the actual window size. + _set_pty_size(fd, 80, 24) + + try: + while not closed.is_set(): + try: + msg = await asyncio.wait_for(ws.receive_text(), timeout=0.5) + except asyncio.TimeoutError: + continue + except WebSocketDisconnect: + break + + if not msg: + continue + + # Control frame? Must start with the magic prefix. User-typed + # / pasted text can never spoof this — \x1f isn't producible + # by normal keyboard input. + if msg[:1] == _CTRL_PREFIX: + try: + ctrl = json.loads(msg[1:]) + except (json.JSONDecodeError, ValueError): + ctrl = None + if isinstance(ctrl, dict) and ctrl.get("type") in ("init", "resize"): + cols = int(ctrl.get("cols") or 80) + rows = int(ctrl.get("rows") or 24) + _set_pty_size(fd, cols, rows) + # Either way, control frames are NEVER forwarded to PTY. + continue + + # Plain keystrokes — write to PTY master. + try: + os.write(fd, msg.encode("utf-8", errors="replace")) + except OSError as exc: + log.info("terminal pty write failed (child likely exited): %s", exc) + break + finally: + closed.set() + try: + sender.cancel() + except Exception: + pass + try: + await _reap_child(pid) + except Exception as exc: + log.debug("reap_child pid=%d: %s", pid, exc) + try: + os.close(fd) + except OSError: + pass + with _active_lock: + _active.discard(pid) + log.info("terminal session ended pid=%d", pid) + try: + await ws.close() + except Exception: + pass diff --git a/vendor/Sanad/data/audio/.gitkeep b/vendor/Sanad/data/audio/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/vendor/Sanad/data/audio_device.json b/vendor/Sanad/data/audio_device.json new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/vendor/Sanad/data/audio_device.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/vendor/Sanad/data/camera_device.json b/vendor/Sanad/data/camera_device.json new file mode 100644 index 0000000..7930d09 --- /dev/null +++ b/vendor/Sanad/data/camera_device.json @@ -0,0 +1,5 @@ +{ + "profile_serial_assignments": { + "realsense_primary": "" + } +} \ No newline at end of file diff --git a/vendor/Sanad/data/faces/.gitkeep b/vendor/Sanad/data/faces/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/vendor/Sanad/data/memories/.gitkeep b/vendor/Sanad/data/memories/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/vendor/Sanad/data/motions/config.json b/vendor/Sanad/data/motions/config.json new file mode 100644 index 0000000..40a0170 --- /dev/null +++ b/vendor/Sanad/data/motions/config.json @@ -0,0 +1,21 @@ +{ + "gemini": { + "api_key": "", + "model": "models/gemini-2.5-flash-native-audio-preview-12-2025", + "voice_name": "Charon" + }, + "audio": { + "send_sample_rate": 16000, + "receive_sample_rate": 24000, + "chunk_size": 512, + "g1_volume": 100 + }, + "motion": { + "action_cooldown_sec": 1.0, + "replay_hz": 60.0 + }, + "dashboard": { + "host": "0.0.0.0", + "port": 8000 + } +} \ No newline at end of file diff --git a/vendor/Sanad/data/photos/.gitkeep b/vendor/Sanad/data/photos/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/vendor/Sanad/data/recordings/.gitkeep b/vendor/Sanad/data/recordings/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/vendor/Sanad/data/zones/.gitkeep b/vendor/Sanad/data/zones/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/vendor/Sanad/examples/voice_example.py b/vendor/Sanad/examples/voice_example.py new file mode 100644 index 0000000..92bf2cd --- /dev/null +++ b/vendor/Sanad/examples/voice_example.py @@ -0,0 +1,136 @@ +#!/usr/bin/env python3 +"""voice_example.py — demos for each voice subsystem in isolation. + +Each subcommand exercises one component so you can debug pieces without +running the full Sanad stack. + +Usage: + python3 voice_example.py gemini "hello" # one-shot Gemini text→audio + python3 voice_example.py local_tts "hello" # local Coqui TTS + python3 voice_example.py typed_replay "hello" # typed replay engine + python3 voice_example.py live # spawn GeminiSubprocess + python3 voice_example.py status # show status of all subsystems + +Assumes Project.Sanad is importable (run from repo root or with PYTHONPATH set). +""" + +from __future__ import annotations + +import argparse +import asyncio +import sys + + +def _demo_gemini(text: str) -> None: + """One-shot: connect Gemini, send text, play reply.""" + from Project.Sanad.gemini.client import GeminiVoiceClient + from Project.Sanad.voice.audio_manager import AudioManager + + async def run(): + client = GeminiVoiceClient() + audio = AudioManager() + await client.connect() + try: + audio_bytes, text_parts = await client.send_text(text, owner="example") + print(f"[gemini] got {len(audio_bytes)} bytes audio, text={text_parts}") + if audio_bytes: + await asyncio.to_thread(audio.play_pcm, audio_bytes, 1, 24000, 2) + finally: + await client.disconnect() + + asyncio.run(run()) + + +def _demo_local_tts(text: str) -> None: + """Synthesize with local Coqui TTS and play.""" + from Project.Sanad.voice.local_tts import LocalTTSEngine + from Project.Sanad.voice.audio_manager import AudioManager + + tts = LocalTTSEngine() + audio = AudioManager() + pcm = tts.synthesize(text) + print(f"[local_tts] generated {len(pcm)} bytes") + audio.play_pcm(pcm, 1, 16000, 2) + + +def _demo_typed_replay(text: str) -> None: + """Exercise the TypedReplayEngine end-to-end.""" + from Project.Sanad.gemini.client import GeminiVoiceClient + from Project.Sanad.voice.audio_manager import AudioManager + from Project.Sanad.voice.typed_replay import TypedReplayEngine + + async def run(): + client = GeminiVoiceClient() + await client.connect() + audio = AudioManager() + engine = TypedReplayEngine(client, audio) + result = await engine.say(text) + print(f"[typed_replay] {result}") + await client.disconnect() + + asyncio.run(run()) + + +def _demo_live() -> None: + """Spawn the live voice subprocess — same as dashboard /api/live-subprocess.""" + from Project.Sanad.gemini.subprocess import GeminiSubprocess + + mgr = GeminiSubprocess() + info = mgr.start() + print(f"[live] {info}") + print("Running. Ctrl+C to stop.") + try: + while True: + import time + time.sleep(1) + except KeyboardInterrupt: + print("\n[live] stopping...") + print(mgr.stop()) + + +def _demo_status() -> None: + """Print status of all voice subsystems.""" + from Project.Sanad.gemini.client import GeminiVoiceClient + try: + from Project.Sanad.voice.local_tts import LocalTTSEngine + except Exception: + LocalTTSEngine = None + + client = GeminiVoiceClient() + print("[gemini]", client.status()) + if LocalTTSEngine: + try: + tts = LocalTTSEngine() + print("[local_tts]", tts.status()) + except Exception as exc: + print(f"[local_tts] unavailable: {exc}") + else: + print("[local_tts] not installed") + + +def main(): + ap = argparse.ArgumentParser(description=__doc__) + sub = ap.add_subparsers(dest="cmd", required=True) + + for name in ("gemini", "local_tts", "typed_replay"): + sp = sub.add_parser(name, help=f"demo {name}") + sp.add_argument("text", help="text to speak") + + sub.add_parser("live", help="spawn live voice subprocess") + sub.add_parser("status", help="print subsystem status") + + args = ap.parse_args() + if args.cmd == "gemini": + _demo_gemini(args.text) + elif args.cmd == "local_tts": + _demo_local_tts(args.text) + elif args.cmd == "typed_replay": + _demo_typed_replay(args.text) + elif args.cmd == "live": + _demo_live() + elif args.cmd == "status": + _demo_status() + + +if __name__ == "__main__": + main() diff --git a/vendor/Sanad/face/__init__.py b/vendor/Sanad/face/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/vendor/Sanad/face/emotion_frames.py b/vendor/Sanad/face/emotion_frames.py new file mode 100644 index 0000000..20cb632 --- /dev/null +++ b/vendor/Sanad/face/emotion_frames.py @@ -0,0 +1,208 @@ +"""Extra emotion frames for the LED mask, in the same 46x58 display space + RGB +style as :mod:`colorface` (black bg, cyan eyes, red mouth). These are the +expression frames Gemini can trigger via ``set_expression`` that the base +``colorface.default_frames`` does not draw (heart, laugh, love-eyes, cool, +sleepy, confused, kiss, star-struck). + +``emotion_frames(...)`` returns ``{name: raw_bytes}`` ready for the mask's DIY +image upload, exactly like ``colorface.default_frames``. Positions mirror +``colorface.build_face`` so the eyes/mouth line up with the rest of the set. +""" + +from __future__ import annotations + +import math + +import colorface as _cf +from colorface import DISPLAY_W as W, DISPLAY_H as H, encode +from PIL import Image, ImageDraw + +# eye/mouth geometry copied from colorface.build_face so frames are consistent +_EYE_L = W // 2 - 10 # 13 +_EYE_R = W // 2 + 10 # 33 +_EYE_T, _EYE_B = 15, 29 # normal eye top/bottom +_EYE_W = 6 +_MOUTH_CY = 44 +_MOUTH_CX = W // 2 # 23 + + +def _canvas(): + img = Image.new("RGB", (W, H), (0, 0, 0)) + return img, ImageDraw.Draw(img) + + +def _round_eye(g, cx, eye_color, sclera_color, *, t=_EYE_T, b=_EYE_B, w=_EYE_W): + g.ellipse([cx - w, t, cx + w, b], fill=sclera_color) + g.ellipse([cx - w + 3, t + 4, cx + w - 3, b - 2], fill=eye_color) + m = (t + b) // 2 + g.ellipse([cx - 1, m - 1, cx + 1, m + 2], fill=(0, 0, 0)) + + +def _happy_eye(g, cx, color): + # upward "^"-ish squint (a smiling eye) + g.arc([cx - 7, _EYE_T - 1, cx + 7, _EYE_B + 5], start=200, end=340, + fill=color, width=3) + + +def _heart(g, cx, cy, half, color): + """A filled heart centred at (cx, cy), ``half`` = half-width.""" + r = half / 2.0 + g.pieslice([cx - half, cy - r, cx, cy + r], 0, 360, fill=color) # left lobe + g.pieslice([cx, cy - r, cx + half, cy + r], 0, 360, fill=color) # right lobe + g.polygon([(cx - half, cy + r * 0.2), (cx + half, cy + r * 0.2), + (cx, cy + half)], fill=color) + + +def _star(g, cx, cy, r, color): + pts = [] + for i in range(10): + ang = -math.pi / 2 + i * math.pi / 5 + rad = r if i % 2 == 0 else r * 0.45 + pts.append((cx + rad * math.cos(ang), cy + rad * math.sin(ang))) + g.polygon(pts, fill=color) + + +def _smile(g, color, *, big=False): + if big: # open grin + g.chord([_MOUTH_CX - 13, _MOUTH_CY - 6, _MOUTH_CX + 13, _MOUTH_CY + 12], + start=0, end=180, fill=color) + else: + g.arc([_MOUTH_CX - 12, _MOUTH_CY - 8, _MOUTH_CX + 12, _MOUTH_CY + 8], + start=20, end=160, fill=color, width=4) + + +# Fixed emoji colors — these frames are icons, not part of the face's colour +# scheme, so a heart is always red and a thumb always yellow regardless of the +# user's chosen eye/mouth colours. +_RED = (255, 45, 75) +_PINK = (255, 95, 155) +_YELLOW = (255, 200, 40) + + +# -- individual emotion drawings --------------------------------------------- + +def _heart_face(eye, mouth, sclera): + img, g = _canvas() + _heart(g, W // 2, 26, 18, _RED) # one big RED heart fills the face + return img + + +def _laugh(eye, mouth, sclera): + img, g = _canvas() + _happy_eye(g, _EYE_L, eye) + _happy_eye(g, _EYE_R, eye) + _smile(g, mouth, big=True) # wide open grin + # a joy tear under each eye + for cx in (_EYE_L, _EYE_R): + g.ellipse([cx - 2, _EYE_B + 3, cx + 2, _EYE_B + 9], fill=(0, 180, 255)) + return img + + +def _love(eye, mouth, sclera): + img, g = _canvas() + _heart(g, _EYE_L, 22, 8, _PINK) # pink heart-shaped eyes + _heart(g, _EYE_R, 22, 8, _PINK) + _smile(g, _PINK) + return img + + +def _cool(eye, mouth, sclera): + img, g = _canvas() + frame = (40, 40, 55) + lens = (10, 10, 20) + # two lenses + bridge (sunglasses) + for cx in (_EYE_L, _EYE_R): + g.rounded_rectangle([cx - 8, _EYE_T, cx + 8, _EYE_B + 1], radius=4, + fill=lens, outline=frame, width=2) + g.line([cx - 5, _EYE_T + 3, cx + 2, _EYE_T + 3], fill=eye, width=2) # glint + g.line([_EYE_L + 8, _EYE_T + 3, _EYE_R - 8, _EYE_T + 3], fill=frame, width=3) + # a cool little smirk (raised on one side) + g.arc([_MOUTH_CX - 11, _MOUTH_CY - 5, _MOUTH_CX + 12, _MOUTH_CY + 8], + start=15, end=120, fill=mouth, width=4) + return img + + +def _sleepy(eye, mouth, sclera): + img, g = _canvas() + # droopy half-closed eyes: lid arc over a thin slit + for cx in (_EYE_L, _EYE_R): + g.arc([cx - 7, _EYE_T + 2, cx + 7, _EYE_B + 4], start=160, end=20, + fill=eye, width=3) + # small tired mouth + g.ellipse([_MOUTH_CX - 4, _MOUTH_CY - 2, _MOUTH_CX + 4, _MOUTH_CY + 4], fill=mouth) + # zZ drawn as cheap line-glyphs (no font dependency) + for (x, y, s) in ((36, 8, 5), (41, 3, 3)): + g.line([x, y, x + s, y], fill=eye, width=1) + g.line([x + s, y, x, y + s], fill=eye, width=1) + g.line([x, y + s, x + s, y + s], fill=eye, width=1) + return img + + +def _confused(eye, mouth, sclera): + img, g = _canvas() + _round_eye(g, _EYE_L, eye, sclera) # normal eye + _round_eye(g, _EYE_R, eye, sclera, t=_EYE_T - 3, b=_EYE_B - 3, w=5) # raised/small + # raised brow over the small eye + g.line([_EYE_R - 6, _EYE_T - 6, _EYE_R + 6, _EYE_T - 9], fill=eye, width=2) + # wavy/squiggle mouth + pts = [(_MOUTH_CX - 12 + i * 4, _MOUTH_CY + (3 if i % 2 else -3)) for i in range(7)] + g.line(pts, fill=mouth, width=3, joint="curve") + return img + + +def _kiss(eye, mouth, sclera): + img, g = _canvas() + _round_eye(g, _EYE_L, eye, sclera) + g.line([_EYE_R - 6, (_EYE_T + _EYE_B) // 2, _EYE_R + 6, (_EYE_T + _EYE_B) // 2], + fill=eye, width=3) # winking eye + # puckered red lips + g.ellipse([_MOUTH_CX - 4, _MOUTH_CY - 4, _MOUTH_CX + 4, _MOUTH_CY + 5], fill=_RED) + g.ellipse([_MOUTH_CX - 2, _MOUTH_CY - 2, _MOUTH_CX + 2, _MOUTH_CY + 3], fill=(0, 0, 0)) + _heart(g, 37, 13, 6, _RED) # little floating red heart + return img + + +def _star_struck(eye, mouth, sclera): + img, g = _canvas() + _star(g, _EYE_L, 22, 7, (255, 220, 0)) + _star(g, _EYE_R, 22, 7, (255, 220, 0)) + _smile(g, mouth, big=True) + return img + + +def _thumbs_up(eye, mouth, sclera): + # a 👍: one bold vertical thumb + a bold fist block, kept simple so it reads + # on the low-res LED grid (fine detail just blurs into a blob). + img, g = _canvas() + g.rounded_rectangle([11, 30, 37, 52], radius=8, fill=_YELLOW) # fist block + g.rounded_rectangle([13, 6, 29, 34], radius=8, fill=_YELLOW) # big thumb up + g.line([30, 34, 36, 34], fill=(0, 0, 0), width=3) # thumb/finger split + return img + + +_BUILDERS = { + "heart": _heart_face, + "laugh": _laugh, + "love": _love, + "cool": _cool, + "sleepy": _sleepy, + "confused": _confused, + "kiss": _kiss, + "star_struck": _star_struck, + "thumbs_up": _thumbs_up, +} + + +def emotion_frames(*, eye_color=_cf.DEFAULT_EYE, mouth_color=_cf.DEFAULT_MOUTH, + sclera_color=_cf.WHITE, include=None) -> dict: + """Return ``{name: raw_bytes}`` for the extra emotion frames. + + ``include`` optionally restricts to a subset (a set/list of names) so the + caller can honour the mask's slot budget. + """ + names = list(_BUILDERS) if include is None else [n for n in _BUILDERS if n in include] + out = {} + for name in names: + img = _BUILDERS[name](eye_color, mouth_color, sclera_color) + out[name] = encode(img) + return out diff --git a/vendor/Sanad/face/face_motion.py b/vendor/Sanad/face/face_motion.py new file mode 100644 index 0000000..99838e7 --- /dev/null +++ b/vendor/Sanad/face/face_motion.py @@ -0,0 +1,599 @@ +#!/usr/bin/env python3 +"""Lifelike face motion for the Shining LED mask — SanadV3. + +A richer, more *organic* driver than the Mask lib's built-in idle. Instead of an +occasional blink/glance, it gives the robot's face the small, constant motion a +real face has: + + * natural blinking — varied intervals, occasional quick double-blinks + * frequent small eye saccades (darts) with short gaze holds and drift + * idle micro-expressions (a brief smile now and then) + * state-aware behaviour: + idle — relaxed, wanders, blinks + listening — attentive, eyes mostly forward, fewer darts, soft blinks + thinking — looks away (longer gaze holds), slower blinks + speaking — mouth lip-syncs to audio + the odd mid-sentence blink + * quick reactions: surprised / happy(smile) / sad, held briefly then released + +It drives the mask by PLAY of the pre-uploaded DIY frames (no per-frame upload), +so motion is smooth. Lip-sync composes with the eye motion via feed_audio_level(). + +Run it standalone (keep the mask within ~30 cm for the one-time frame upload): + + python3 face/face_motion.py # connect, load frames, stay alive + python3 face/face_motion.py --demo # cycle the states to show the range + python3 face/face_motion.py --reload # force re-upload of the frame set + +Integrate into Sanad: construct ``LifelikeFace(mask=)`` +(or let it connect itself), ``await face.start()``, then drive it from the event +bus / Gemini lip-sync markers: + + face.set_listening() # when the user starts speaking + face.set_thinking() # while a tool/response is being prepared + face.set_speaking(True/False) # around a spoken reply + face.feed_audio_level(rms_0_to_1) # per audio chunk -> real lip-sync + face.react("surprised" | "smile" | "sad") + face.set_idle() # back to relaxed wandering +""" + +from __future__ import annotations + +import argparse +import asyncio +import os +import random +import sys +import time +import logging +import threading +from pathlib import Path + +log = logging.getLogger("sanad.face_motion") + +# Frames present in the mask's DIY slots (colorface.default_frames()). +GAZE = ("neutral", "look_left", "look_right") +MOUTH = ("neutral", "talk1", "talk2", "talk3") + +# How long after the last lip-sync marker the face stays "speaking" (mouth +# follows the amplitude; pauses close it). When markers stop for this long the +# turn ends and the eyes return to their underlying state. +_SPEECH_WINDOW = 0.6 + +# Mouth-frame cadence while speaking. Each frame is a FULL-face DIY slot switch; +# this small mask can't cleanly repaint the LED matrix faster than ~5/s, so the +# old 0.09s (~11/s) cadence overran it and showed torn/scrambled composites of +# several frames at once. Cap it well under the tear threshold — speech visemes +# only change ~4-7/s anyway, so lip-sync still reads fine. +_SPEAK_FRAME_SEC = 0.22 + +# BLE-link health. If the mask link drops mid-session every play_diy raises a +# BleakError, which left the loop busy-spinning ~20x/s forever (no recovery but a +# manual disconnect/connect). Treat a run of consecutive play failures (or +# mask.is_connected going False) as a dropped link and attempt a *bounded* +# reconnect; if that is exhausted, stop the loop so the face goes idle/unavailable +# instead of hammering a dead transport. +_PLAY_FAIL_LIMIT = 10 # consecutive failed plays before we call it a drop +_RECONNECT_ATTEMPTS = 3 # reconnect tries per detected drop +_RECONNECT_BACKOFF = 2.0 # seconds between reconnect tries + +# BLE link keepalive. _play() skips re-sending an *unchanged* frame, so a long +# neutral/idle stretch writes nothing but the occasional blink (every 2-4.5s). +# If that quiet gap (plus an RF glitch) outlasts the link's supervision timeout +# the mask drops — and every reconnect briefly flashes the mask's own built-in +# face. Re-send the current frame at least this often so the link never goes +# quiet long enough to be dropped. One tiny write/sec when idle; free when busy. +_KEEPALIVE_SEC = 1.0 + +# Frames that carry the animated face's mouth (gaze + lip-sync). "Hide mouth" +# blacks out the mouth region on just these, leaving eyes-only — the emotion +# icons (heart/thumb/…) are left alone. +_MOUTH_FRAMES = ("neutral", "talk1", "talk2", "talk3", "blink", + "look_left", "look_right") +_MOUTH_MASK_TOP = 32 # display-y below which the mouth lives (eyes end ~29) + + +def _mask_mouth_bytes(data: bytes) -> bytes: + """Return a copy of an encoded 46x58 frame with the mouth region blacked out + (decode the transposed bytes -> mask display rows >= _MOUTH_MASK_TOP -> re-encode).""" + import colorface as _cf + from PIL import Image as _Image, ImageDraw as _ImageDraw + img = _Image.frombytes("RGB", (_cf.DISPLAY_H, _cf.DISPLAY_W), bytes(data)) + img = img.transpose(_Image.Transpose.TRANSPOSE) # -> 46x58 display space + _ImageDraw.Draw(img).rectangle([0, _MOUTH_MASK_TOP, _cf.DISPLAY_W, _cf.DISPLAY_H], + fill=(0, 0, 0)) + return _cf.encode(img) + + +def _add_mask_to_path() -> str: + """Put the flat Mask library (mask.py / faceanim.py / colorface.py) on sys.path.""" + d = os.environ.get("SANAD_MASK_DIR") or str(Path(__file__).resolve().parents[2] / "Mask") + if d and d not in sys.path: + sys.path.insert(0, d) + return d + + +class LifelikeFace: + """Organic, state-aware motion driver for the LED mask.""" + + def __init__(self, mask=None, *, name_prefix="MASK", address=None, adapter=None, + brightness=95, frames=None, eye_color=None, mouth_color=None, + sclera_color=None, auto_reconnect=True, hide_mouth=False): + _add_mask_to_path() + import mask as _mask # flat Mask lib + import faceanim as _faceanim + import colorface as _colorface + self._ShiningMask = _mask.ShiningMask + self._FaceAnimator = _faceanim.FaceAnimator + self._colorface = _colorface + + self.mask = mask + self._own_mask = mask is None + self.name_prefix = name_prefix + self.address = address + self.adapter = adapter + self.brightness = int(brightness) + # When False, a dropped link is NOT self-healed here — the loop bails + # cleanly (and forces the transport disconnected) so an external owner + # (FaceController's reconnect supervisor) brings the link + face back. + self._auto_reconnect = bool(auto_reconnect) + # Frame colors: explicit frames win; else build the default set tinted + # with whatever colors were given (None -> the lib defaults cyan/red). + if frames is None: + ck = {} + if eye_color is not None: + ck["eye_color"] = tuple(eye_color) + if mouth_color is not None: + ck["mouth_color"] = tuple(mouth_color) + if sclera_color is not None: + ck["sclera_color"] = tuple(sclera_color) + frames = _colorface.default_frames(**ck) + # Extra Gemini-triggerable emotions (heart, laugh, love-eyes, cool, + # sleepy, confused, kiss, star_struck) in the same style. Appended + # after the base set so slot ids 1..N stay stable for existing + # frames. Guarded: a missing module never breaks the face. + try: + from Project.Sanad.face.emotion_frames import emotion_frames as _emo + # 7 emotions so slots 1..19 hold the face set and slot 20 stays + # free as a scratch slot for QR/social images (mask caps at 20). + frames = {**frames, **_emo(**ck, include={ + "heart", "laugh", "love", "cool", "confused", "kiss", "thumbs_up"})} + except Exception: + log.exception("emotion frames unavailable — base frames only") + self.frames = frames + # Reserved DIY slot (just past the animated frames) for on-demand images + # (QR / social) shown via the FaceController's show_scratch_image(). + self.scratch_slot = len(self.frames) + 1 + # Mouth show/hide: keep the unmasked originals so a live toggle can + # re-upload just the gaze/talk slots masked or normal. + self._base_frames = dict(self.frames) + self.hide_mouth = bool(hide_mouth) + if self.hide_mouth: + self.frames = {n: (_mask_mouth_bytes(d) if n in _MOUTH_FRAMES else d) + for n, d in self.frames.items()} + + def mouth_frames_for(self, hidden: bool) -> dict: + """{name: bytes} for the gaze/talk frames, masked (hidden) or normal — the + FaceController re-uploads just these slots to toggle the mouth live.""" + return {n: (_mask_mouth_bytes(self._base_frames[n]) if hidden + else self._base_frames[n]) + for n in _MOUTH_FRAMES if n in self._base_frames} + + self.slots: dict = {} + self._state = "idle" # underlying eye state: idle|listening|thinking + self._speaking = False # explicit speaking turn (set_speaking) + self._level = 0.0 # live lip-sync amplitude 0..1 + self._last_mouth_t = 0.0 # last set_mouth/feed_audio_level time + self._react = None + self._react_until = 0.0 + self._cur = None + self._task = None + self._stop = False + self._play_fails = 0 # consecutive play_diy failures (link-drop signal) + self._last_write = 0.0 # monotonic of the last successful play_diy (keepalive) + self._paused = False # loop stops writing (used during a scratch upload) + self._paused_ack = threading.Event() # set once the loop has actually parked + + # -- lifecycle ------------------------------------------------------------ + + async def start(self, *, reload: bool = False): + if self.mask is None: + self.mask = self._ShiningMask( + address=self.address, name_prefix=self.name_prefix, adapter=self.adapter) + await self.mask.connect(timeout=20.0, attempts=12) + await self.mask.set_brightness(self.brightness) + # Upload the frame set via the RELIABLE (acked) image path — see + # _upload_frames. We no longer borrow FaceAnimator.load(), whose + # fire-and-forget upload silently corrupts slots on a marginal link (a + # dropped packet -> garbage frame, no exception -> no retry). + await self._upload_frames(force=reload) + self._stop = False + await self._play("neutral") + self._task = asyncio.create_task(self._loop()) + return self + + async def stop(self): + self._stop = True + if self._task: + self._task.cancel() + try: + await self._task + except asyncio.CancelledError: + pass + self._task = None + if self._own_mask and self.mask is not None: + try: + await self.mask.disconnect() + except Exception: + pass + + async def __aenter__(self): + return await self.start() + + async def __aexit__(self, *exc): + await self.stop() + + # -- reliable frame upload ------------------------------------------------ + + async def _upload_frames(self, *, force: bool): + """Upload the frame set to the mask's DIY slots, RELIABLY. + + The mask's default DIY upload is fire-and-forget: ~80 write-without- + response packets per frame with no ack, so a packet dropped on a + marginal BLE link silently corrupts that slot (no exception -> no retry) + and the frame renders as garbage. We instead drive each frame through + the mask's ACKED image path (upload_image -> _upload(kind=IMAGE), + per-packet REOK), retrying the whole frame on any failure (a fresh DATS + resets the half-written slot). Same name->slot (1..N) map as before. + """ + names = list(self.frames) + self.slots = {name: i + 1 for i, name in enumerate(names)} + if not force: + try: + count = await self.mask.get_diy_count(timeout=4.0) or 0 + except Exception: + count = 0 + if count >= len(names): + return # frames already stored (persist in flash) + await self.mask.clear_diy() + acked = True + for i, (name, data) in enumerate(self.frames.items(), start=1): + acked = await self._upload_one_frame(i, bytes(data), acked) + await asyncio.sleep(0.2) + + async def _upload_one_frame(self, slot: int, data: bytes, acked: bool) -> bool: + """Upload one frame. Prefer the acked image path; on failure reconnect + + retry the whole frame. If frame 1 proves this mask clone never acks + IMAGE uploads, latch off the acked path and use paced fire-and-forget + for the rest. Returns whether to keep using the acked path.""" + if acked: + for attempt in range(5): + try: + await self.mask.upload_image(data, slot, timeout=8.0) + return True + except Exception as exc: + # frame 1 failing its first two acked tries => this clone + # doesn't ack IMAGE uploads; stop trying it. + if slot == 1 and attempt >= 1: + log.warning("mask: IMAGE uploads not acked by this clone " + "(%s) -- using paced fire-and-forget", exc) + break + if attempt == 4: + log.warning("mask: acked upload of slot %d exhausted (%s) " + "-- fire-and-forget fallback", slot, exc) + break + await self._reupload_reconnect() + # fallback: paced fire-and-forget (probabilistic -- keep the mask close) + await self.mask.upload_raw_image(data, index=slot, + chunk_delay=0.10, init_delay=0.30) + return False + + async def _reupload_reconnect(self): + """Drop + re-establish the link mid-upload so the next frame attempt + starts clean (a fresh DATS resets any half-written slot).""" + try: + await self.mask.disconnect() + except Exception: + pass + await asyncio.sleep(1.0) + try: + await self.mask.connect(timeout=15.0, attempts=8) + await self.mask.set_brightness(self.brightness) + except Exception: + pass + + # -- control -------------------------------------------------------------- + + def set_state(self, state: str): + self._state = state if state in ("idle", "listening", "thinking", "speaking") else "idle" + + def set_idle(self): + self._speaking = False + self._last_mouth_t = 0.0 # leave any speaking overlay immediately + self.set_state("idle") + + def set_listening(self): + self._speaking = False + self._last_mouth_t = 0.0 + self.set_state("listening") + + def set_thinking(self): + self._speaking = False + self._last_mouth_t = 0.0 + self.set_state("thinking") + + def set_speaking(self, on: bool): + """Mark a speaking turn. Without lip-sync markers the mouth auto-talks; + with them it follows the amplitude. The underlying eye state is kept, so + it returns there when the turn ends.""" + self._speaking = bool(on) + if not on: + self._last_mouth_t = 0.0 + self._level = 0.0 + + def feed_audio_level(self, level: float): + """Per-audio-chunk amplitude 0..1 -> real lip-sync (mouth opens by loudness). + + Keeps the face 'speaking' for a short window after the last call, so + pauses close the mouth and the turn ends cleanly when markers stop — + without depending on an explicit speaking on/off signal.""" + self._level = max(self._level * 0.4, min(1.0, float(level))) # fast attack + self._last_mouth_t = time.monotonic() + + def react(self, emotion: str, hold: float = 1.4): + """Briefly hold an expression (surprised / smile / sad / …) then release.""" + if emotion in self.slots: + self._react = emotion + self._react_until = time.monotonic() + float(hold) + + # -- FaceController-compatible API (so it can drop in for FaceAnimator) ---- + + def set_mouth(self, level: int): + """Discrete mouth level 0..3 (e.g. from the Gemini [[MOUTH:n]] relay) -> + drives lip-sync. Maps the level to a representative amplitude; level 0 + decays the mouth shut but keeps the short speaking window alive.""" + amp = (0.0, 0.12, 0.24, 0.5)[max(0, min(3, int(level)))] + self._level = max(self._level * 0.4, amp) + self._last_mouth_t = time.monotonic() + + def set_expression(self, name): + """Hold an expression frame until cleared with None (vs the timed react).""" + if name and name in self.slots: + self._react = name + self._react_until = float("inf") + elif self._react_until == float("inf"): + self._react = None + + def pause(self): + """Stop the loop from writing to the mask (so a concurrent scratch-slot + upload's per-packet acks aren't disturbed by play_diy traffic).""" + self._paused_ack.clear() + self._paused = True + + def wait_paused(self, timeout: float = 2.0) -> bool: + """Block until the loop has actually reached the paused branch (so no + play_diy is in flight when the caller starts the scratch upload).""" + return self._paused_ack.wait(timeout) + + def resume(self): + self._paused = False + self._paused_ack.clear() + self._cur = None # force a redraw when the loop takes over again + + async def show(self, name: str): + """One-off: briefly show a named frame (used by FaceController.show_expression).""" + if name in self.slots: + self.react(name, hold=1.5) + + # -- internals ------------------------------------------------------------ + + @staticmethod + def _mouth_for(level: float) -> str: + i = 0 if level < 0.06 else 1 if level < 0.16 else 2 if level < 0.32 else 3 + return MOUTH[i] + + async def _play(self, name: str, *, force: bool = False): + slot = self.slots.get(name) + if slot is None or (name == self._cur and not force): + return + try: + await self.mask.play_diy(slot) + self._cur = name + self._last_write = time.monotonic() # keepalive clock: link saw traffic + self._play_fails = 0 # link is alive again + except Exception: + self._cur = None # retry next tick on a transient BLE error + self._play_fails += 1 # ...but count it: a sustained run == a drop + + def _link_dead(self) -> bool: + """True once the BLE link looks gone: the transport reports disconnected, + or play_diy has failed a sustained run in a row (a single glitch is still + treated as transient and retried).""" + connected = bool(getattr(self.mask, "is_connected", False)) if self.mask else False + return (not connected) or self._play_fails >= _PLAY_FAIL_LIMIT + + async def _reconnect(self) -> bool: + """Bounded reconnect after a detected drop. Frames persist on the mask's + flash, so on success we only re-pin brightness + redraw the current frame + (no re-upload). Returns True if the link is back, False if exhausted.""" + for i in range(_RECONNECT_ATTEMPTS): + if self._stop: + return False + try: + if getattr(self.mask, "is_connected", False): + await self.mask.disconnect() # clean any half-open client first + except Exception: + pass + try: + await self.mask.connect(timeout=10.0, attempts=2) + await self.mask.set_brightness(self.brightness) + self._play_fails = 0 + self._cur = None # force a redraw on the fresh link + await self._play("neutral") + return True + except Exception: + await asyncio.sleep(_RECONNECT_BACKOFF) + return False + + async def _blink(self, restore: str): + await self._play("blink") + await asyncio.sleep(random.uniform(0.08, 0.13)) + if random.random() < 0.18: # occasional quick double-blink + await self._play(restore) + await asyncio.sleep(random.uniform(0.07, 0.11)) + await self._play("blink") + await asyncio.sleep(random.uniform(0.08, 0.12)) + await self._play(restore) + + async def _loop(self): + mono = time.monotonic + t_blink = mono() + random.uniform(1.5, 4.0) + t_sacc = mono() + random.uniform(0.6, 1.6) + t_micro = mono() + random.uniform(12.0, 25.0) + gaze = "neutral" + while not self._stop: + t = mono() + + # BLE link health: if it dropped, try a bounded reconnect instead of + # busy-spinning play_diy on a dead transport. If reconnect is + # exhausted, leave the loop so the face stops (the controller's + # status() then reports it not running) rather than spinning forever. + if self._link_dead(): + if not self._auto_reconnect: + # Owner-managed recovery (FaceController supervisor): make the + # transport report disconnected so the supervisor's is_connected + # check fires, then leave the loop. The supervisor reconnects + # and rebuilds the face (frames persist on the mask's flash). + try: + if getattr(self.mask, "is_connected", False): + await self.mask.disconnect() + except Exception: + pass + break + if not await self._reconnect(): + break + t = mono() # reconnect can take a while + + # Paused (during a scratch-slot upload): write nothing so the upload's + # per-packet REOK acks aren't disturbed by play_diy traffic. Signal + # that we've actually parked so the caller can start the upload. + if self._paused: + self._paused_ack.set() + await asyncio.sleep(0.1) + continue + + # BLE keepalive: re-send the current frame if the link has gone quiet. + # _play() skips unchanged frames, so a long neutral idle stretch writes + # nothing but blinks; a quiet gap past the supervision timeout drops the + # link, and each reconnect flashes the mask's built-in face. A cheap + # periodic re-send keeps the link alive (no-op while speaking — that + # path already writes ~11x/s, so _last_write stays fresh). + if self._cur is not None and (t - self._last_write) >= _KEEPALIVE_SEC: + await self._play(self._cur, force=True) + + # transient reaction overrides everything briefly + if self._react is not None: + if t < self._react_until: + await self._play(self._react) + await asyncio.sleep(0.06) + continue + self._react = None + self._cur = None # force a redraw of whatever's underneath + + # "speaking" = an explicit turn OR fresh lip-sync markers (the latter + # window auto-expires, so the mouth closes and the turn ends when the + # markers stop, without needing a reliable speaking-off signal). + lipsync_active = (t - self._last_mouth_t) < _SPEECH_WINDOW + if self._speaking or lipsync_active: + if lipsync_active: + base = self._mouth_for(self._level) # 0 = closed on pauses + self._level *= 0.55 # decay toward closed + else: + base = MOUTH[random.choice([0, 1, 1, 2, 2, 3, 3, 2, 1, 0])] # auto-talk + await self._play(base) + # No mid-speech blink: a blink is a 2-3 frame burst that, on top + # of the mouth cadence, spikes the switch rate and tears the + # display. Eyes blink between utterances (idle/listening) instead. + await asyncio.sleep(_SPEAK_FRAME_SEC) + continue + + # --- non-speaking: idle / listening / thinking --- + if t >= t_blink: + await self._blink(gaze) + lo, hi = (3.5, 6.5) if self._state == "thinking" else (2.0, 4.5) + t_blink = t + random.uniform(lo, hi) + + if t >= t_sacc: + if self._state == "thinking": + gaze = random.choice(["look_left", "look_right", "look_left", "look_right", "neutral"]) + hold = random.uniform(0.9, 1.8) + elif self._state == "listening": + gaze = random.choice(["neutral", "neutral", "neutral", "look_left", "look_right"]) + hold = random.uniform(0.5, 1.2) + else: # idle — relaxed wandering + gaze = random.choice(["neutral", "neutral", "look_left", "look_right", "neutral"]) + hold = random.uniform(0.3, 0.9) + await self._play(gaze) + t_sacc = t + hold + random.uniform(0.4, 1.4) + else: + await self._play(gaze) + + if self._state == "idle" and t >= t_micro: # rare idle micro-smile + await self._play("smile") + await asyncio.sleep(random.uniform(0.6, 1.0)) + gaze = "neutral" + self._cur = None + t_micro = t + random.uniform(15.0, 30.0) + + await asyncio.sleep(0.05) + + +# --------------------------------------------------------------------------- +# Standalone runner +# --------------------------------------------------------------------------- + +async def _amain(args): + face = LifelikeFace(name_prefix=args.name_prefix, address=args.address, + brightness=args.brightness) + print("connecting + loading frames (keep the mask within ~30 cm) ...", flush=True) + await face.start(reload=args.reload) + print("lifelike motion running. Ctrl+C to stop.", flush=True) + try: + if args.demo: + steps = [ + ("idle (wandering + blinks)", lambda: face.set_idle(), 7), + ("listening (attentive)", lambda: face.set_listening(), 7), + ("thinking (looks away)", lambda: face.set_thinking(), 7), + ("speaking (auto lip-sync)", lambda: face.set_speaking(True), 7), + ("react: surprised", lambda: face.react("surprised", 2.0), 2.2), + ("react: smile", lambda: face.react("smile", 2.0), 2.2), + ("react: sad", lambda: face.react("sad", 2.0), 2.2), + ("back to idle", lambda: face.set_idle(), 5), + ] + for label, action, dur in steps: + print(" ->", label, flush=True) + action() + await asyncio.sleep(dur) + face.set_idle() + await asyncio.sleep(2) + else: + while True: + await asyncio.sleep(1) + except KeyboardInterrupt: + print("\nstopping ...") + finally: + await face.stop() + + +def main(): + ap = argparse.ArgumentParser(description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + ap.add_argument("--demo", action="store_true", help="cycle through the states/reactions") + ap.add_argument("--reload", action="store_true", help="force re-upload of the frame set") + ap.add_argument("--address", help="mask BLE MAC") + ap.add_argument("--name-prefix", default="MASK") + ap.add_argument("--brightness", type=int, default=95) + asyncio.run(_amain(ap.parse_args())) + + +if __name__ == "__main__": + main() diff --git a/vendor/Sanad/face/mask_face.py b/vendor/Sanad/face/mask_face.py new file mode 100644 index 0000000..cd75d05 --- /dev/null +++ b/vendor/Sanad/face/mask_face.py @@ -0,0 +1,801 @@ +"""Shining LED face mask — Sanad subsystem (BLE, owns its own asyncio loop). + +Wraps the standalone **Mask** project (``Project/Mask`` — the flat ``shiningmask`` +library: ``mask.py`` / ``faceanim.py`` / ``colorface.py`` …) as a Sanad subsystem +so the dashboard "Mask Face" tab can drive the robot's animated LED face. + +Why a dedicated loop: the mask talks BLE (bleak/BlueZ) and ``FaceAnimator`` runs a +persistent asyncio task, so this controller owns a background daemon thread with +its own event loop. Route handlers call the plain SYNC methods here (themselves +wrapped in ``asyncio.to_thread`` by FastAPI); each marshals a coroutine onto that +loop via ``run_coroutine_threadsafe``. + +The Mask project is a flat set of top-level modules (not an installed package), so +it is imported by inserting its directory on ``sys.path``. Default location is the +sibling ``/Mask``; override with ``SANAD_MASK_DIR`` or +``config/mask_config.json``. Needs an env with ``bleak`` + ``Pillow`` (g1_env). If +those are missing the subsystem still constructs but reports unavailable, and the +rest of Sanad is unaffected (the dashboard tab shows the reason). +""" + +from __future__ import annotations + +import asyncio +import os +import sys +import threading +from pathlib import Path +from typing import Optional, Sequence, Tuple + +from Project.Sanad.config import BASE_DIR +from Project.Sanad.core import config_loader +from Project.Sanad.core.logger import get_logger + +log = get_logger("mask_face") + +Color = Tuple[int, int, int] + +# Named frames provided by colorface.default_frames() (FaceAnimator slots). +EXPRESSIONS = ("neutral", "smile", "blink", "look_left", "look_right", + "talk1", "talk2", "talk3", "surprised", "sad", "wink", "angry", + "heart", "laugh", "love", "cool", "confused", "kiss", "thumbs_up") + +# Default face colors (match colorface.DEFAULT_EYE / DEFAULT_MOUTH). +DEFAULT_EYE_COLOR: Color = (0, 230, 255) # cyan +DEFAULT_MOUTH_COLOR: Color = (255, 50, 50) # red +DEFAULT_SCLERA_COLOR: Color = (255, 255, 255) # white of the eye + + +def _parse_color(value, default: Color) -> Color: + """Coerce a config/API color (``[r,g,b]``, ``(r,g,b)``, or ``"#rrggbb"``) to a + clamped RGB tuple; fall back to ``default`` on anything unusable.""" + if value in (None, ""): + return tuple(default) + try: + if isinstance(value, str): + h = value.strip().lstrip("#") + if len(h) == 3: + h = "".join(c * 2 for c in h) + value = (int(h[0:2], 16), int(h[2:4], 16), int(h[4:6], 16)) + r, g, b = (int(value[0]), int(value[1]), int(value[2])) + return (max(0, min(255, r)), max(0, min(255, g)), max(0, min(255, b))) + except Exception: + return tuple(default) + + +class FaceController: + """Owns the BLE mask connection + FaceAnimator on a private event loop.""" + + def __init__(self): + cfg = config_loader.load("mask") + + def _cfg(key, default): + v = cfg.get(key, default) + return v if v not in (None, "") else default + + mask_dir = os.environ.get("SANAD_MASK_DIR") or _cfg("mask_dir", "") + if not mask_dir: + # Default: the sibling Mask project (…/Project/Mask). + mask_dir = str(Path(BASE_DIR).parent / "Mask") + self.mask_dir = mask_dir + + self.address = (os.environ.get("SANAD_MASK_ADDRESS") or _cfg("address", "")) or None + self.name_prefix = os.environ.get("SANAD_MASK_NAME_PREFIX") or _cfg("name_prefix", "MASK") + self.adapter = (os.environ.get("SANAD_MASK_ADAPTER") or _cfg("adapter", "")) or None + self.brightness = int(_cfg("brightness", 95)) + self.fps = float(_cfg("fps", 8.0)) + self.connect_timeout = float(_cfg("connect_timeout", 15.0)) + self.connect_attempts = int(_cfg("connect_attempts", 5)) + # Use the lifelike motion driver (saccades, varied blinks, states, + # reactions, smooth lip-sync). Falls back to the basic FaceAnimator if + # the lifelike module is unavailable or this is set false. + self.lifelike = bool(_cfg("lifelike", True)) + self._face_kind = None + self._hide_mouth = bool(_cfg("hide_mouth", False)) # eyes-only face toggle + # Gemini<->mask link. Default OFF: the mask does NOT auto-connect (no BLE + # churn) and Gemini's emotion/social markers are ignored. Turned on from + # the dashboard, it connects the mask + lets Gemini drive it. + self._gemini_linked = bool(_cfg("gemini_linked", False)) + # Auto-connect + start the animated face on boot (best-effort, in the + # background so it never blocks startup). After the one-time frame + # upload, later boots just connect + animate (no upload). + self.autostart = bool(_cfg("autostart", True)) + + # Face colors (baked into the uploaded DIY frames). Stored as RGB lists in + # config; changing them re-uploads the frame set (face_start reload). + self.eye_color = _parse_color(_cfg("eye_color", None), DEFAULT_EYE_COLOR) + self.mouth_color = _parse_color(_cfg("mouth_color", None), DEFAULT_MOUTH_COLOR) + self.sclera_color = _parse_color(_cfg("sclera_color", None), DEFAULT_SCLERA_COLOR) + + # runtime state + self._mask = None # shiningmask.ShiningMask + self._face = None # faceanim.FaceAnimator + self._lib: Optional[dict] = None + self._lib_failed = False + self._connecting = False + self._face_running = False + self._speaking = False + self._mouth: Optional[int] = None + self._last_error: Optional[str] = None + self._op_lock = threading.Lock() + + # Desired-state intents the reconnect supervisor enforces: + # _want_connected — we want a live BLE link (set on connect/autostart, + # cleared on a *user* disconnect). While true, the + # supervisor keeps (re)connecting through drops / weak + # signal until it succeeds. + # _face_desired — the animated face should be running (set on + # face_start, cleared on face_stop / static overrides + # like text/image). After a reconnect the supervisor + # restarts the face iff this is true. + self._want_connected = False + self._face_desired = False + self._reconnecting = False + + # dedicated event loop in a background daemon thread (idle until used) + self._loop = asyncio.new_event_loop() + self._thread = threading.Thread(target=self._run_loop, daemon=True, + name="mask-face-loop") + self._thread.start() + log.info("FaceController ready (mask_dir=%s, name_prefix=%s, address=%s)", + self.mask_dir, self.name_prefix, self.address or "scan") + + # Persistent reconnect supervisor: self-heals dropped/weak links and even + # establishes the FIRST connection once the mask comes into range, without + # the user babysitting the Connect button. + threading.Thread(target=self._supervisor, daemon=True, + name="mask-supervisor").start() + + # Only auto-connect on boot if Gemini is linked (default off -> the mask + # stays disconnected + silent until the user links it from the dashboard). + if self.autostart and self._gemini_linked: + threading.Thread(target=self._autostart, daemon=True, + name="mask-autostart").start() + + def _clear_stale_mask_links(self) -> None: + """Drop any BlueZ-level connection to a MASK device left over from a + previous process, BEFORE the first connect. + + A hard service restart leaves the old link half-open: BlueZ still + reports the mask "connected" so it stops advertising, our scan can't + find it, and the fresh connect churns with 'Software caused connection + abort' / 'failed to discover services' for minutes before BlueZ times + the stale link out — flashing the mask's built-in face the whole time + (this, not WiFi/coexistence, is what makes the weird face appear on a + restart/boot). Disconnecting it first lets the mask advertise again, so + the new connect is clean and immediate. Fully guarded + bounded — a + missing bluetoothctl or any error is a no-op, never blocking startup.""" + try: + import subprocess as _sp + import time as _time + out = _sp.run(["bluetoothctl", "devices"], capture_output=True, + text=True, timeout=5).stdout or "" + prefix = (self.name_prefix or "MASK").upper() + cleared = False + for line in out.splitlines(): + parts = line.split() + # "Device C3:8A:9B:05:B4:C9 MASK-05B4C9" + if (len(parts) >= 3 and parts[0] == "Device" + and parts[2].upper().startswith(prefix)): + addr = parts[1] + _sp.run(["bluetoothctl", "disconnect", addr], + capture_output=True, text=True, timeout=8) + log.info("cleared stale BlueZ link to %s (%s) before first connect", + parts[2], addr) + cleared = True + if cleared: + _time.sleep(1.5) # let the mask resume advertising before we scan + except Exception as exc: + log.debug("stale mask-link cleanup skipped: %s", exc) + + def _autostart(self): + """Best-effort connect + start the face on boot (runs on its own thread, + so a missing/asleep mask never blocks or breaks Sanad startup). Declares + the connect+face *intent* up front, so even if the mask is off / out of + range at boot, the reconnect supervisor keeps trying and brings the face + up on its own once the mask appears — no dashboard babysitting.""" + import time as _time + _time.sleep(4.0) # let the rest of Sanad finish booting first + if not self.lib_available: + log.warning("mask autostart skipped — Mask lib unavailable " + "(need bleak + Pillow in this conda env)") + return + self._want_connected = True + self._face_desired = True + self._clear_stale_mask_links() # drop any half-open link from a prior process + try: + self.connect() + except Exception as exc: + log.warning("mask autostart: connect failed (%s) — the supervisor will " + "keep retrying; or connect from the dashboard", exc) + return + try: + self.face_start(reload=False) + log.info("mask autostart: animated face running (driver=%s)", self._face_kind) + except Exception: + log.exception("mask autostart: face_start failed") + + def _supervisor(self): + """Background daemon that enforces the connect/face *intents*. + + While ``_want_connected`` is set it keeps (re)establishing the BLE link + through drops and weak-signal scan misses; once connected, if the face is + desired but not running (e.g. after a reconnect) it restarts it. A user + Disconnect clears the intent so this stops fighting a deliberate + disconnect. Each attempt reuses the normal serialized connect()/ + face_start() paths, so there are no new locking hazards — only retries.""" + import time as _time + backoff = 3.0 + while True: + _time.sleep(backoff) + try: + if not self._want_connected or self._connecting: + backoff = 3.0 + continue + if self.is_connected: + backoff = 3.0 + # Link is up — restore the face if it's wanted but stopped + # (e.g. the face loop bailed on a drop the supervisor healed). + if self._face_desired and not self._face_running: + try: + self.face_start(reload=False) + log.info("mask supervisor: face restored") + except Exception as exc: + log.debug("mask supervisor: face restore failed (%s)", exc) + backoff = 5.0 + continue + # Want a link but don't have one -> reconnect (short, then loop). + self._reconnecting = True + try: + self.connect(timeout=12.0, attempts=2) + log.info("mask supervisor: link (re)established") + if self._face_desired: + self.face_start(reload=False) + backoff = 3.0 + except Exception as exc: + # Keep trying with a gentle backoff (weak signal / mask off). + log.debug("mask supervisor: reconnect attempt failed (%s)", exc) + # 'Software caused connection abort' / 'device disconnected' + # is usually a half-open BlueZ link from the drop: the mask + # still shows "connected" so it stops advertising and the next + # scan can't find it. Clearing it lets the mask re-advertise. + m = str(exc).lower() + if any(s in m for s in ("abort", "disconnect", "not connected", + "discover services")): + try: + self._clear_stale_mask_links() + except Exception: + pass + backoff = min(backoff * 1.5, 20.0) + finally: + self._reconnecting = False + except Exception: + log.exception("mask supervisor loop error") + backoff = 5.0 + + # -- loop plumbing -------------------------------------------------------- + + def _run_loop(self): + asyncio.set_event_loop(self._loop) + self._loop.run_forever() + + def _submit(self, coro, timeout: float = 30.0): + """Run a coroutine on the mask loop from a caller thread, blocking.""" + fut = asyncio.run_coroutine_threadsafe(coro, self._loop) + return fut.result(timeout=timeout) + + # -- lazy import of the flat Mask library --------------------------------- + + def _ensure_lib(self) -> dict: + if self._lib is not None: + return self._lib + if self._lib_failed: + raise RuntimeError(self._last_error or "mask library unavailable") + if self.mask_dir and self.mask_dir not in sys.path: + sys.path.insert(0, self.mask_dir) + try: + import mask as _mask + import faceanim as _faceanim + import colorface as _colorface + import constants as _constants + except Exception as exc: + self._lib_failed = True + self._last_error = f"mask library import failed: {exc}" + log.exception("Mask library import failed (dir=%s) — is bleak/Pillow " + "installed (g1_env)?", self.mask_dir) + raise RuntimeError(self._last_error) + try: + from Project.Sanad.face.face_motion import LifelikeFace as _LifelikeFace + except Exception: + _LifelikeFace = None + log.warning("LifelikeFace unavailable — falling back to FaceAnimator") + self._lib = { + "ShiningMask": _mask.ShiningMask, + "FaceAnimator": _faceanim.FaceAnimator, + "LifelikeFace": _LifelikeFace, + "colorface": _colorface, + "TextMode": _constants.TextMode, + } + log.info("Mask library imported from %s", self.mask_dir) + return self._lib + + @property + def lib_available(self) -> bool: + if self._lib is not None: + return True + if self._lib_failed: + return False + try: + self._ensure_lib() + return True + except Exception: + return False + + @property + def is_connected(self) -> bool: + return bool(self._mask is not None and getattr(self._mask, "is_connected", False)) + + def _require_connected(self): + if not self.is_connected: + raise RuntimeError("mask not connected") + + # -- status --------------------------------------------------------------- + + def status(self) -> dict: + return { + "lib_available": self.lib_available, + "connected": self.is_connected, + "connecting": self._connecting, + "reconnecting": self._reconnecting and not self.is_connected, + "want_connected": self._want_connected, + "face_running": self._face_running and self.is_connected, + "face_desired": self._face_desired, + "driver": self._face_kind, + "lifelike": self.lifelike, + "autostart": self.autostart, + "gemini_linked": self._gemini_linked, + "hide_mouth": self._hide_mouth, + "speaking": self._speaking, + "mouth": self._mouth, + "brightness": self.brightness, + "eye_color": list(self.eye_color), + "mouth_color": list(self.mouth_color), + "sclera_color": list(self.sclera_color), + "fps": self.fps, + "address": self.address, + "name_prefix": self.name_prefix, + "adapter": self.adapter, + "mask_dir": self.mask_dir, + "expressions": list(EXPRESSIONS), + "last_error": self._last_error, + } + + # -- connection ----------------------------------------------------------- + + def connect(self, timeout: Optional[float] = None, attempts: Optional[int] = None) -> dict: + # Serialize the whole connect under _op_lock so it can't interleave with a + # concurrent disconnect()/face_start() swapping self._mask underneath, and + # so _connecting is set and cleared under the same lock (status() reads it). + with self._op_lock: + if self._connecting: + raise RuntimeError("a connect is already in progress") + self._connecting = True + to = float(timeout) if timeout else self.connect_timeout + at = int(attempts) if attempts else self.connect_attempts + self._last_error = None + try: + self._ensure_lib() + self._submit(self._aconnect(to, at), timeout=to * at + 15.0) + self._want_connected = True # intent: supervisor keeps it alive + except Exception as exc: + self._last_error = str(exc) + raise RuntimeError(str(exc)) + finally: + self._connecting = False + return self.status() + + async def _aconnect(self, timeout: float, attempts: int): + if self.is_connected: + return + lib = self._lib + # Tear down any stale mask from a previous (now-dropped) session BEFORE + # building a fresh one. A reconnect after a silent BLE drop leaves the old + # BleakClient holding a half-open BlueZ connection to the SAME device; if + # we just overwrite self._mask the old client is never disconnected at the + # BlueZ level, the OS keeps the device "connected", and the new + # BleakClient.connect() to that address hangs/refuses. Disconnect (and + # drop) the old client first so the fresh connect starts from a clean + # BlueZ state. + old = self._mask + self._mask = None + if old is not None: + try: + await old.disconnect() + except Exception: + log.exception("stale mask.disconnect() before reconnect failed") + self._mask = lib["ShiningMask"]( + address=self.address, name_prefix=self.name_prefix, adapter=self.adapter) + await self._mask.connect(timeout=timeout, attempts=attempts) + + def disconnect(self) -> dict: + # Clear the intents FIRST (before the lock) so the supervisor won't race + # to re-establish a link the user is deliberately tearing down. + self._want_connected = False + self._face_desired = False + with self._op_lock: + self._stop_face() + if self._mask is not None: + try: + self._submit(self._mask.disconnect(), timeout=10.0) + except Exception: + log.exception("mask.disconnect() failed") + return self.status() + + def set_gemini_linked(self, on: bool) -> dict: + """Link / unlink Gemini <-> the mask. + + ON -> declare intent to hold the BLE link + run the face; the supervisor + connects (and self-heals) in the background, and Gemini's emotion / + social markers are relayed to the mask. + OFF -> tear the link down + clear the intent, so the mask stops any BLE + churn and Gemini's markers are ignored (the voice is unaffected). + + Default is OFF: the mask stays silent + disconnected until the user + links it from the dashboard.""" + on = bool(on) + self._gemini_linked = on + if not on: + # Deliberate teardown; disconnect() also clears _want_connected / + # _face_desired so the supervisor stops trying to reconnect. + self.disconnect() + return {"ok": True, "linked": False, "connected": self.is_connected} + # Linking: declare intent, then one quick connect attempt so the common + # "mask nearby" case comes up immediately; the supervisor keeps retrying + # (weak signal / mask still off) so we never block the caller for long. + self._want_connected = True + self._face_desired = True + if not self.is_connected and not self._connecting: + try: + self._clear_stale_mask_links() + except Exception: + pass + try: + self.connect(timeout=10.0, attempts=1) + self.face_start(reload=False) + except Exception as exc: + log.info("link-on: mask not up yet, supervisor will retry (%s)", exc) + elif self.is_connected and not self._face_running: + try: + self.face_start(reload=False) + except Exception: + pass + return {"ok": True, "linked": True, "connected": self.is_connected} + + # -- simple commands ------------------------------------------------------ + + def set_brightness(self, level: int) -> dict: + # Hardware range is 0-128 (config/mask_config.json: "0-128. Keep <=100 to + # avoid LED flicker"); reject/clamp values above the panel's real maximum + # rather than forwarding 129-255 to the mask. + level = max(0, min(128, int(level))) + with self._op_lock: + self._require_connected() + self._submit(self._mask.set_brightness(level)) + self.brightness = level + face = self._face + if face is not None: + face.brightness = level + return {"ok": True, "brightness": level} + + def set_text(self, text: str, color: Color = (255, 255, 255), + mode: Optional[int] = None, bg: Optional[Color] = None, + speed: Optional[int] = None) -> dict: + with self._op_lock: + self._require_connected() + self._face_desired = False # static override — don't auto-restart the face + self._stop_face() # static text can't share the panel with the animator + tm = self._lib["TextMode"] + m = int(mode) if mode is not None else tm.SCROLL_LEFT + kw = {} + if speed is not None: + kw["speed"] = max(0, min(255, int(speed))) + self._submit(self._mask.set_text(str(text), color=tuple(color), mode=m, **kw), + timeout=20.0) + if bg is not None: + # Apply a custom background AFTER set_text (which forces black by default). + self._submit(self._mask.set_background_color(*tuple(bg)), timeout=10.0) + return {"ok": True} + + def show_image(self, image_id: int) -> dict: + with self._op_lock: + self._require_connected() + self._face_desired = False # static override + self._stop_face() + self._submit(self._mask.show_image(int(image_id))) + return {"ok": True, "image_id": int(image_id)} + + def play_animation(self, anim_id: int) -> dict: + with self._op_lock: + self._require_connected() + self._face_desired = False # static override + self._stop_face() + self._submit(self._mask.play_animation(int(anim_id))) + return {"ok": True, "anim_id": int(anim_id)} + + def clear_diy(self) -> dict: + with self._op_lock: + self._require_connected() + self._stop_face() # stop the loop before deleting the frames it plays + removed = self._submit(self._mask.clear_diy(), timeout=30.0) + return {"ok": True, "removed": int(removed or 0)} + + # -- animated face -------------------------------------------------------- + + def _stop_face(self): + """Cancel the animator loop (if any) and reset face state. Idempotent. + + Lock-free internal: callers MUST hold ``self._op_lock`` (it mutates the + shared self._face / self._face_running state that the serialized mask + operations and the event-bus callbacks both touch).""" + if self._face is not None: + try: + self._submit(self._face.stop(), timeout=10.0) + except Exception: + log.exception("face.stop() failed") + self._face = None + self._face_running = False + self._speaking = False + self._mouth = None + + def face_start(self, reload: bool = False) -> dict: + with self._op_lock: + self._require_connected() + # Always tear down any existing loop first so a second Start (or + # Reload) never leaves two animator tasks fighting over the display. + # Serialized under _op_lock so two concurrent Start presses can't both + # build an animator and race self._face. + self._stop_face() + cf = self._lib["colorface"] + Lifelike = self._lib.get("LifelikeFace") if self.lifelike else None + if Lifelike is not None: + # Rich driver: eye saccades, varied blinks, states, reactions, + # smooth lip-sync. Runs its own loop on this controller's BLE loop. + # auto_reconnect=False -> the controller's supervisor owns recovery. + self._face = Lifelike(mask=self._mask, brightness=self.brightness, + eye_color=self.eye_color, mouth_color=self.mouth_color, + sclera_color=self.sclera_color, auto_reconnect=False, + hide_mouth=self._hide_mouth) + self._face_kind = "lifelike" + else: + self._face = self._lib["FaceAnimator"]( + self._mask, fps=self.fps, brightness=self.brightness, + frames=cf.default_frames(eye_color=self.eye_color, + mouth_color=self.mouth_color, + sclera_color=self.sclera_color)) + self._face_kind = "faceanim" + # First upload of the frame set can take ~30-90s (acked writes); later + # starts skip it (frames persist on the mask's flash). + self._submit(self._face.start(reload=bool(reload)), timeout=240.0) + self._face_running = True + self._face_desired = True # intent: supervisor restores it after a drop + self._want_connected = True + return {"ok": True, "reloaded": bool(reload), "driver": self._face_kind} + + def face_stop(self) -> dict: + with self._op_lock: + self._face_desired = False # user stopped it — don't auto-restart + self._stop_face() + return {"ok": True} + + def return_face(self) -> dict: + """Resume the live animated face (e.g. after a text/image/anim override).""" + self._face_desired = True + return self.face_start(reload=False) + + def set_face_color(self, eye=None, mouth=None, sclera=None) -> dict: + """Recolor the animated face. Colors are baked into the uploaded DIY + frames, so this stores them (persisted to config) and — if the face is + running — re-uploads the frame set in the new colors (~30-90s).""" + if eye is not None: + self.eye_color = _parse_color(eye, self.eye_color) + if mouth is not None: + self.mouth_color = _parse_color(mouth, self.mouth_color) + if sclera is not None: + self.sclera_color = _parse_color(sclera, self.sclera_color) + self._save_colors() + reuploaded = False + if self.is_connected and self._face_desired: + self.face_start(reload=True) # rebuild frames in the new colors + reuploaded = True + return {"ok": True, "reuploaded": reuploaded, + "eye_color": list(self.eye_color), + "mouth_color": list(self.mouth_color), + "sclera_color": list(self.sclera_color)} + + def _save_colors(self): + """Persist the chosen face colors to config/mask_config.json (best-effort, + so they survive restarts and drive autostart). Never raises.""" + try: + import json + path = Path(BASE_DIR) / "config" / "mask_config.json" + data = json.loads(path.read_text()) if path.exists() else {} + data["eye_color"] = list(self.eye_color) + data["mouth_color"] = list(self.mouth_color) + data["sclera_color"] = list(self.sclera_color) + path.write_text(json.dumps(data, indent=2)) + except Exception: + log.exception("could not persist mask face colors (kept in-memory)") + + # -- lifelike states + reactions (no-ops on the basic FaceAnimator) -------- + + def _face_state(self, state: str) -> dict: + # Snapshot the face reference once: face_start/_stop_face (under _op_lock) + # can swap self._face to None concurrently, and these state setters fire + # from the event-bus worker threads. A local snapshot avoids a torn read + # (AttributeError) without blocking on a long face_start upload. + face = self._face + fn = getattr(face, "set_" + state, None) if face is not None else None + if callable(fn): + try: + fn() + except Exception: + log.exception("face.set_%s failed", state) + return {"ok": True, "state": state} + + def set_listening(self) -> dict: + return self._face_state("listening") + + def set_thinking(self) -> dict: + return self._face_state("thinking") + + def set_idle(self) -> dict: + return self._face_state("idle") + + def react(self, emotion: str, hold: float = 1.4) -> dict: + """Brief reaction (surprised / smile / sad). No-op if unsupported.""" + face = self._face # snapshot: face_start/_stop_face may swap it concurrently + if face is not None and hasattr(face, "react"): + try: + face.react(str(emotion), float(hold)) + except Exception: + log.exception("face.react failed") + return {"ok": True, "react": emotion} + + def set_speaking(self, on: bool) -> dict: + """Animate the mouth while speaking. Safe no-op if the face isn't running. + + Also called from the event bus (brain.gestural_speaking_changed).""" + on = bool(on) + self._speaking = on + self._mouth = None + face = self._face # snapshot: avoid a torn read vs a concurrent _stop_face + if face is not None: + try: + face.set_speaking(on) + except Exception: + log.exception("face.set_speaking() failed") + return {"ok": True, "speaking": on} + + def set_mouth(self, level: int) -> dict: + level = max(0, min(3, int(level))) + self._mouth = level + self._speaking = False + # Fired from the Gemini reader thread at lip-sync rate; snapshot the face + # so a concurrent face_start/_stop_face swap can't NoneType-deref here. + face = self._face + if face is not None: + try: + face.set_mouth(level) + except Exception: + log.exception("face.set_mouth() failed") + return {"ok": True, "mouth": level} + + def show_expression(self, name: str) -> dict: + with self._op_lock: + self._require_connected() + face = self._face + if face is None: + raise RuntimeError("face animation not started") + self._submit(face.show(str(name)), timeout=10.0) + return {"ok": True, "expression": name} + + def show_scratch_image(self, data: bytes, timeout: float = 90.0) -> dict: + """Upload raw 46x58 image bytes to the mask's reserved scratch DIY slot + and hold it on the face (a QR / social / custom image) until the face is + resumed with set_expression(None). Uses the reliable acked image upload.""" + with self._op_lock: + self._require_connected() + face = self._face + if face is None: + raise RuntimeError("face animation not started") + slot = int(getattr(face, "scratch_slot", 20)) + # Pause the animation loop so its play_diy traffic doesn't disturb the + # acked upload's per-packet REOK acks (else NotificationTimeout). Wait + # for the loop to actually park before uploading (not a fixed sleep). + paused = hasattr(face, "pause") + if paused: + face.pause() + if hasattr(face, "wait_paused"): + face.wait_paused(2.0) + else: + import time as _t + _t.sleep(0.35) + try: + self._submit(self._mask.upload_image(bytes(data), slot, timeout=15.0), + timeout=timeout) + # Register "_scratch" so set_expression holds it on EITHER driver: + # LifelikeFace.set_expression checks .slots, FaceAnimator checks + # .frames — populate both so the fallback driver holds it too. + if hasattr(face, "slots"): + face.slots["_scratch"] = slot + frames = getattr(face, "frames", None) + if isinstance(frames, dict) and "_scratch" not in frames: + frames["_scratch"] = b"" + if hasattr(face, "set_expression"): + face.set_expression("_scratch") + finally: + if paused: + face.resume() # loop resumes + holds the "_scratch" frame + return {"ok": True, "slot": slot} + + def set_mouth_hidden(self, hidden: bool) -> dict: + """Show/hide the mouth on the animated face. Re-uploads just the 7 gaze/ + talk slots (masked eyes-only, or normal) — pausing the loop so the acked + upload isn't disturbed. Persists for future face starts this session.""" + hidden = bool(hidden) + with self._op_lock: + self._hide_mouth = hidden + face = self._face + if (face is None or not self.is_connected + or not hasattr(face, "mouth_frames_for")): + return {"ok": True, "hidden": hidden, + "note": "applies when the face is running"} + frames = face.mouth_frames_for(hidden) + paused = hasattr(face, "pause") + if paused: + face.pause() + if hasattr(face, "wait_paused"): + face.wait_paused(2.0) + try: + for name, data in frames.items(): + slot = face.slots.get(name) if hasattr(face, "slots") else None + if slot: + self._submit(self._mask.upload_image(bytes(data), int(slot), + timeout=15.0), timeout=90.0) + if hasattr(face, "frames"): + face.frames[name] = data + if hasattr(face, "hide_mouth"): + face.hide_mouth = hidden + if hasattr(face, "_cur"): + face._cur = None # force a redraw with the new frame + finally: + if paused: + face.resume() + return {"ok": True, "hidden": hidden} + + def set_expression(self, name: Optional[str]) -> dict: + """Hold an expression over the animation (None resumes idle/talk). + + Unlike show_expression (a one-off), this pins the frame until cleared — + e.g. 'surprised' on a reaction, 'sad' on an error. Safe no-op if the face + isn't running.""" + face = self._face # snapshot: face_start/_stop_face may swap it concurrently + if face is not None: + try: + face.set_expression(name if name else None) + except Exception: + log.exception("face.set_expression() failed") + return {"ok": True, "expression": name} + + # -- lifecycle ------------------------------------------------------------ + + def shutdown(self): + """Disconnect the mask and stop the background loop (idempotent).""" + try: + self.disconnect() + except Exception: + log.exception("mask disconnect on shutdown failed") + try: + self._loop.call_soon_threadsafe(self._loop.stop) + except Exception: + pass diff --git a/vendor/Sanad/gemini/__init__.py b/vendor/Sanad/gemini/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/vendor/Sanad/gemini/client.py b/vendor/Sanad/gemini/client.py new file mode 100644 index 0000000..befe369 --- /dev/null +++ b/vendor/Sanad/gemini/client.py @@ -0,0 +1,357 @@ +"""Gemini WebSocket client for real-time voice interaction. + +Provides: + - Bidirectional audio streaming (mic → Gemini → speaker) + - Text-to-speech via typed input + - Voice-command detection through transcription parsing + - System instruction injection for persona control +""" + +from __future__ import annotations + +import asyncio +import base64 +import inspect +import json +from typing import Any + +import websockets + +from Project.Sanad.config import ( + GEMINI_API_KEY, + GEMINI_MODEL, + GEMINI_VOICE, + GEMINI_WS_TIMEOUT, + GEMINI_WS_URI, +) +from Project.Sanad.core.config_loader import section as _cfg_section +from Project.Sanad.core.event_bus import bus +from Project.Sanad.core.logger import get_logger + +log = get_logger("gemini_client") + +_GC = _cfg_section("gemini", "client") +# Default system prompt — SINGLE SOURCE in core.gemini_defaults +_DEFAULT_SYSTEM_PROMPT = _cfg_section("core", "gemini_defaults").get( + "default_system_prompt", + "You are Sanad (Bousandah), a wise and friendly Emirati assistant. " + "Speak in UAE dialect (Khaleeji). Be helpful and concise." +) + +# TTS / typed-replay system prompt. The voice_client speaks TYPED text (typed +# replay + /api/voice/generate), so it must read the text VERBATIM in its OWN +# language — NOT answer it and NOT force Khaleeji (the default persona does the +# latter, which made English/Urdu/Indonesian lines come out in Arabic). +TTS_SYSTEM_PROMPT = _cfg_section("core", "gemini_defaults").get( + "tts_system_prompt", + "You are a pure multilingual text-to-speech voice. The instant the user " + "sends text, speak it aloud word for word in the SAME language it is " + "written in, then stop. Output ONLY that spoken audio — no thinking, no " + "commentary, no acknowledgements, no headers, no explanations, no " + "greetings, no extra words. Never translate and never change the language: " + "English stays English, Arabic stays Arabic, Urdu stays Urdu, Indonesian " + "stays Indonesian. Your speech must be identical to the user's text, " + "nothing more and nothing less." +) +_RECV_TIMEOUT_SEC = _GC.get("recv_timeout_sec", 30) +_RECONNECT_MAX_ATTEMPTS = _GC.get("reconnect_max_attempts", 3) +_RECONNECT_INITIAL_DELAY_SEC = _GC.get("reconnect_initial_delay_sec", 1.0) +_RECONNECT_MAX_DELAY_SEC = _GC.get("reconnect_max_delay_sec", 10.0) + + +class GeminiVoiceClient: + """Manages one WebSocket session to the Gemini Bidi audio API. + + Concurrency model: + - `_send_lock` serializes ALL websocket writes. + - `_session_lock` ensures only one consumer (live loop OR typed replay) + owns the receive stream at a time. Acquired by send_text and + receive_stream context managers. + - `_owner` records who currently holds the session lock for diagnostics. + """ + + def __init__(self, system_prompt: str = ""): + self.system_prompt = system_prompt or _DEFAULT_SYSTEM_PROMPT + self._ws: Any = None + self._connected = False + self._send_lock = asyncio.Lock() + self._session_lock = asyncio.Lock() + self._connect_lock = asyncio.Lock() # serializes reconnect attempts + self._owner: str | None = None + self._reconnect_attempts = 0 + + @property + def connected(self) -> bool: + return self._connected + + @property + def session_owner(self) -> str | None: + return self._owner + + def _ws_kwargs(self) -> dict[str, Any]: + kwargs: dict[str, Any] = {"max_size": None, "open_timeout": 30} + try: + sig = inspect.signature(websockets.connect) + key = "extra_headers" if "extra_headers" in sig.parameters else "additional_headers" + except Exception: + key = "extra_headers" + kwargs[key] = {"Content-Type": "application/json"} + return kwargs + + async def connect(self): + uri = f"{GEMINI_WS_URI}?key={GEMINI_API_KEY}" + try: + self._ws = await websockets.connect(uri, **self._ws_kwargs()) + setup = { + "setup": { + "model": GEMINI_MODEL, + "generationConfig": { + "responseModalities": ["AUDIO"], + "speechConfig": { + "voiceConfig": { + "prebuiltVoiceConfig": {"voiceName": GEMINI_VOICE} + } + }, + }, + "systemInstruction": {"parts": [{"text": self.system_prompt}]}, + } + } + await self._ws.send(json.dumps(setup)) + await self._ws.recv() # ACK + self._connected = True + self._reconnect_attempts = 0 + log.info("Connected to Gemini (%s)", GEMINI_MODEL) + await bus.emit("voice.connected") + except Exception: + self._connected = False + self._ws = None + log.exception("Failed to connect to Gemini") + raise + + async def disconnect(self): + try: + if self._ws is not None: + await self._ws.close() + except Exception: + pass + finally: + self._ws = None + self._connected = False + self._owner = None + log.info("Disconnected from Gemini") + await bus.emit("voice.disconnected") + + async def _ensure_connected(self): + """Reconnect if dropped, with bounded retries. + + Serialized via _connect_lock so concurrent callers don't trigger + duplicate handshakes. + """ + # Fast path — no lock needed + if self._connected and self._ws is not None: + return True + + async with self._connect_lock: + # Re-check inside the lock (another coroutine may have just connected) + if self._connected and self._ws is not None: + return True + + max_attempts = _RECONNECT_MAX_ATTEMPTS + delay = _RECONNECT_INITIAL_DELAY_SEC + for attempt in range(max_attempts): + try: + log.warning("Reconnecting to Gemini (attempt %d/%d)", attempt + 1, max_attempts) + await self.connect() + return True + except Exception: + self._reconnect_attempts += 1 + await asyncio.sleep(delay) + delay = min(delay * 2, _RECONNECT_MAX_DELAY_SEC) + log.error("Reconnect failed after %d attempts", max_attempts) + await bus.emit("voice.error", reason="reconnect_failed") + return False + + async def send_audio_chunk(self, pcm_b64: str) -> bool: + """Send a base64-encoded PCM audio chunk (mic input). + + Returns False on failure so the caller can react instead of silently + no-op'ing forever (the original bug). + """ + if not self._connected or self._ws is None: + return False + msg = { + "realtimeInput": { + "mediaChunks": [ + {"mimeType": "audio/pcm;rate=16000", "data": pcm_b64} + ] + } + } + try: + async with self._send_lock: + await self._ws.send(json.dumps(msg)) + return True + except websockets.exceptions.ConnectionClosed: + log.warning("send_audio_chunk: connection closed") + self._connected = False + await bus.emit("voice.error", reason="connection_closed") + return False + except Exception: + log.exception("send_audio_chunk failed") + return False + + async def send_text(self, text: str, owner: str = "send_text") -> tuple[bytes, list[str]]: + """Send text, receive audio response. Returns (audio_bytes, text_parts). + + Acquires the session lock for the entire request/response cycle so + no other consumer can steal frames from the receive side. + If the connection drops mid-request, reconnects once and retries. + """ + if not await self._ensure_connected(): + raise RuntimeError("Not connected to Gemini and reconnect failed.") + + async with self._session_lock: + self._owner = owner + try: + return await self._send_text_inner(text) + except websockets.exceptions.ConnectionClosed: + log.warning("send_text: connection died on send — reconnecting once") + self._connected = False + if not await self._ensure_connected(): + raise RuntimeError("Reconnect after send failure also failed.") + return await self._send_text_inner(text) + finally: + self._owner = None + + async def _send_text_inner(self, text: str) -> tuple[bytes, list[str]]: + """Inner send/receive loop — caller must hold _session_lock.""" + request = { + "client_content": { + "turns": [{"role": "user", "parts": [{"text": text}]}], + "turn_complete": True, + } + } + async with self._send_lock: + await self._ws.send(json.dumps(request)) + + audio_chunks: list[bytes] = [] + text_parts: list[str] = [] + + while True: + try: + raw = await asyncio.wait_for(self._ws.recv(), timeout=GEMINI_WS_TIMEOUT) + except asyncio.TimeoutError: + log.warning("send_text: recv timed out") + break + except websockets.exceptions.ConnectionClosed: + log.warning("send_text: connection closed mid-stream") + self._connected = False + break + + try: + resp = json.loads(raw) + except json.JSONDecodeError: + log.warning("send_text: bad JSON from server") + continue + + if "error" in resp: + log.error("Gemini error: %s", resp["error"]) + await bus.emit("voice.error", reason=str(resp["error"])) + break + + sc = resp.get("serverContent", {}) + mt = sc.get("modelTurn", {}) + for part in mt.get("parts", []): + inline = part.get("inlineData") + if inline and inline.get("data"): + audio_chunks.append(base64.b64decode(inline["data"])) + tp = part.get("text") + if isinstance(tp, str) and tp.strip(): + text_parts.append(tp.strip()) + + input_tr = sc.get("inputTranscription", {}) + if input_tr.get("text"): + await bus.emit("voice.user_said", text=input_tr["text"]) + + if sc.get("turnComplete") or sc.get("generationComplete"): + break + + audio_bytes = b"".join(audio_chunks) + if audio_bytes: + await bus.emit("voice.gemini_spoke", audio_len=len(audio_bytes)) + return audio_bytes, text_parts + + def acquire_session(self, owner: str) -> "_SessionGuard": + """Return an async context manager for exclusive session ownership. + + Use as `async with client.acquire_session("live_voice"):`. + While held, no other consumer may call send_text or receive_stream. + """ + return _SessionGuard(self, owner) + + async def receive_stream(self): + """Yield server events. Caller MUST hold the session lock.""" + if self._owner is None: + raise RuntimeError( + "receive_stream requires session lock — use acquire_session() first" + ) + if not self._connected or self._ws is None: + return + try: + async for raw in self._ws: + try: + resp = json.loads(raw) + except json.JSONDecodeError: + continue + yield resp.get("serverContent", {}) + except websockets.exceptions.ConnectionClosed: + log.warning("receive_stream: connection closed") + self._connected = False + await bus.emit("voice.error", reason="connection_closed") + + async def raw_send(self, payload: dict): + """Low-level send for the live loop. Always use through send lock.""" + if not self._connected or self._ws is None: + return False + try: + async with self._send_lock: + await self._ws.send(json.dumps(payload)) + return True + except Exception: + log.exception("raw_send failed") + return False + + def status(self) -> dict[str, Any]: + return { + "connected": self._connected, + "model": GEMINI_MODEL, + "voice": GEMINI_VOICE, + "session_owner": self._owner, + "reconnect_attempts": self._reconnect_attempts, + } + + +class _SessionGuard: + """Async context manager for exclusive session ownership. + + Always releases owner + lock on exit, even on exceptions. + """ + + def __init__(self, client: GeminiVoiceClient, owner: str): + self._client = client + self._owner = owner + self._held = False + + async def __aenter__(self): + await self._client._session_lock.acquire() + self._held = True + self._client._owner = self._owner + return self._client + + async def __aexit__(self, exc_type, exc, tb): + try: + self._client._owner = None + finally: + if self._held: + self._client._session_lock.release() + self._held = False + return False # don't suppress exceptions diff --git a/vendor/Sanad/gemini/script.py b/vendor/Sanad/gemini/script.py new file mode 100644 index 0000000..e745a21 --- /dev/null +++ b/vendor/Sanad/gemini/script.py @@ -0,0 +1,1716 @@ +"""Gemini brain — live conversation loop using the google-genai SDK. + +Implements the VoiceBrain contract documented in `voice/model_script.py`: + + __init__(audio_io, recorder, voice_name, system_prompt) + async run() + stop() + +Owns everything Gemini-specific: the `genai.Client`, `LiveConnectConfig`, +the session connect/receive loop, VAD-based barge-in, echo suppression, +reconnect backoff. Hardware I/O is delegated to `audio_io` and per-turn +WAV capture to `recorder` — both are model-agnostic. + +Env overrides: + SANAD_GEMINI_MODEL — Gemini Live model id (without "models/" prefix) +""" + +from __future__ import annotations + +import array +import asyncio +import base64 +import json +import os +import sys +import threading +import time +from pathlib import Path +from typing import Any, Optional + +import numpy as np + +from google import genai +from google.genai import types + +from Project.Sanad.config import ( + BASE_DIR, + CHUNK_SIZE, + GEMINI_API_KEY, + GEMINI_VOICE, + RECEIVE_SAMPLE_RATE, + SEND_SAMPLE_RATE, +) +from Project.Sanad.core.config_loader import section as _cfg_section +from Project.Sanad.core.logger import get_logger +from Project.Sanad.vision import recognition_state as _recog_state + +log = get_logger("gemini_brain") + +_SV = _cfg_section("voice", "sanad_voice") +_VAD = _cfg_section("voice", "vad") +_BI = _cfg_section("voice", "barge_in") + +_MODEL = os.environ.get( + "SANAD_GEMINI_MODEL", + "gemini-2.5-flash-native-audio-preview-12-2025", +) +_MIC_GAIN = _SV.get("mic_gain", 1.0) +_SESSION_TIMEOUT = _SV.get("session_timeout_sec", 660) +_MAX_RECONNECT_DELAY = _SV.get("max_reconnect_delay_sec", 30) +_MAX_CONSECUTIVE_ERRORS = _SV.get("max_consecutive_errors", 10) +_NO_MESSAGES_TIMEOUT = _SV.get("no_messages_timeout_sec", 30) +# Extra mic-gate time after the AI stops, on loud external-speaker profiles +# (JBL) — covers the speaker buffer + room reverb so it doesn't hear its tail. +_ECHO_TAIL_SEC = _SV.get("echo_tail_sec", 0.6) +# On a loud external speaker (JBL) barge-in must clear the measured speaker +# BLEED by this factor — so the user's own voice cuts the AI but the speaker's +# echo into the mic does not. Lower = easier to interrupt (more false cuts). +_JBL_BLEED_MARGIN = _SV.get("jbl_bleed_margin", 3.0) +# Don't allow barge-in for this long after the AI starts on the JBL — gives the +# bleed estimate time to ramp so an early loud bleed frame can't false-trigger. +_JBL_BARGE_GRACE = _SV.get("jbl_barge_grace_sec", 1.0) +# Sustained loud-frame count required to barge-in on the JBL (vs the shorter +# default). Brief speaker-echo peaks won't reach it; continuous speech will. +_JBL_BARGE_CHUNKS = _SV.get("jbl_barge_chunks", 9) +# Time since the AI last pushed audio after which the speaker bleed is assumed +# FADED (a gap between words/numbers). In that window barge-in drops to a low, +# sensitive bar so the user can interrupt; while audio is flowing it stays high. +_JBL_BLEED_FADE_SEC = _SV.get("jbl_bleed_fade_sec", 0.5) + +_CHUNK_BYTES = CHUNK_SIZE * 2 +_SILENCE_PCM = b"\x00" * _CHUNK_BYTES + +# Set by a "pause:1" stdin command (a record is playing on the dashboard). +# While set, the brain feeds Gemini silence (so it neither hears the playback +# nor keeps replying) and drops its own audio output (so the record owns the +# chest speaker). Cleared by "pause:0" when playback ends. threading.Event is +# safe to read from the asyncio loops. +_INPUT_PAUSED = threading.Event() + +# ── Recognition (camera + face gallery) tunables ── +_RECOG_STATE_PATH = Path(os.environ.get( + "SANAD_RECOGNITION_STATE_PATH", + str(BASE_DIR / "data" / ".recognition_state.json"), +)) +_VISION_SEND_HZ = float(os.environ.get("SANAD_VISION_SEND_HZ", "2")) +_VISION_STALE_MS = int(os.environ.get("SANAD_VISION_STALE_MS", "1500")) +_RECOG_POLL_S = float(os.environ.get("SANAD_RECOGNITION_POLL_S", "1.0")) +_FACES_DIR = Path(os.environ.get( + "SANAD_FACES_DIR", + str(BASE_DIR / "data" / "faces"), +)) +_FACES_MAX_SAMPLES = int(os.environ.get("SANAD_FACES_MAX_SAMPLES", "3")) +_FACES_PRIMER_RESIZE = int(os.environ.get("SANAD_FACES_PRIMER_RESIZE", "256")) +# N3 — zones gallery (zone → place → linked faces). Folded into a Gemini +# primer turn so Gemini can recognise / talk about known locations and the +# people associated with them. +_ZONES_DIR = Path(os.environ.get( + "SANAD_ZONES_DIR", + str(BASE_DIR / "data" / "zones"), +)) + + +# ── navigation tools (Gemini Live function-calling → Nav2) ──── +# Gemini can DRIVE the robot to saved places via native function-calling. +# The handlers call the DASHBOARD HTTP API (not web_nav3 directly) so the +# in-process _arbiter (nav↔loco mutex) + single nav client stay authoritative +# — the Gemini brain runs as a SEPARATE subprocess and cannot touch the +# dashboard's in-memory arbiter, so it must go through HTTP. +try: + import requests as _requests +except Exception: # pragma: no cover - requests is in the gemini_sdk env + _requests = None + +_NAV_TOOLS_ENABLED = os.environ.get("SANAD_NAV_TOOLS", "1") != "0" +_DASHBOARD_URL = os.environ.get( + "SANAD_DASHBOARD_URL", "http://127.0.0.1:8001" +).rstrip("/") + + +def _nav_api(method: str, path: str, body: Optional[dict] = None, + timeout: float = 12.0) -> dict: + """Blocking call to the dashboard nav API. Always returns a dict; never + raises (run it via asyncio.to_thread so the receive loop stays responsive).""" + if _requests is None: + return {"ok": False, "reason": "no_http_client"} + url = _DASHBOARD_URL + path + try: + if method == "GET": + resp = _requests.get(url, timeout=timeout) + else: + resp = _requests.post(url, json=(body or {}), timeout=timeout) + except Exception as exc: + return {"ok": False, "reason": "unreachable", "detail": str(exc)[:200]} + try: + data = resp.json() + except Exception: + data = {"raw": (resp.text or "")[:200]} + if not resp.ok: + detail = data.get("detail") if isinstance(data, dict) else data + return {"ok": False, "reason": f"http_{resp.status_code}", "detail": detail} + return data if isinstance(data, dict) else {"ok": True, "data": data} + + +def _nav_function_declarations() -> list: + """Gemini Live tool declarations for navigation (built lazily so the + google.genai types are resolved at call time).""" + S, T = types.Schema, types.Type + return [ + types.FunctionDeclaration( + name="navigate_to_place", + description=( + "Drive the robot to a named saved place in the currently loaded " + "map (for example 'kitchen', 'reception', 'office'). Call this " + "ONLY when the user asks to go, move, walk, or be taken " + "somewhere. The place must exist in the active map — if you are " + "unsure of the name, call list_places first." + ), + parameters=S(type=T.OBJECT, properties={ + "place": S(type=T.STRING, + description="Destination place name, as the user said it."), + }, required=["place"]), + ), + types.FunctionDeclaration( + name="list_places", + description=( + "List the saved places you can drive to in the currently loaded " + "map. Use it to tell the user where you can take them." + ), + parameters=S(type=T.OBJECT, properties={}), + ), + types.FunctionDeclaration( + name="where_am_i", + description=( + "Report navigation status: which map is loaded and whether the " + "robot is localized and ready to drive." + ), + parameters=S(type=T.OBJECT, properties={}), + ), + types.FunctionDeclaration( + name="stop_navigation", + description="Cancel the current navigation goal and stop the robot from driving.", + parameters=S(type=T.OBJECT, properties={}), + ), + ] + + +# Emotions Gemini can show on the LED face (a subset of the mask's frames that +# read as feelings — the talk/blink/gaze frames are driven automatically). +_FACE_EMOTIONS = ("smile", "laugh", "heart", "love", "sad", "surprised", + "wink", "angry", "cool", "confused", "kiss", "thumbs_up", + "neutral") + +# Instagram accounts the mask can show as a QR (parent maps these to the code). +_SOCIAL_ACCOUNTS = ("bu_sunaidah", "yslootahtech") + +# Appended to whatever base system prompt is passed in, so the expressive-face +# behaviour is always present regardless of the user-edited persona. +_FACE_PROMPT_ADDENDUM = ( + "\n\nYou have an expressive LED face you control with tools. IMPORTANT: when " + "the user asks you to SHOW or MAKE a specific face/emotion (e.g. 'show me a " + "smile', 'give me a thumbs up', 'look surprised', 'make a heart'), ALWAYS " + "call set_expression with that emotion right away. Also use set_expression " + "naturally as you talk — smile when greeting or happy, laugh at something " + "funny, heart or love for affection or a compliment, thumbs_up to agree or " + "approve, surprised when astonished, confused when you didn't understand, " + "wink when joking, sad when empathizing, cool when playful. Available " + "emotions: smile, laugh, heart, love, thumbs_up, surprised, confused, wink, " + "kiss, cool, sad, angry, neutral. Your mouth already lip-syncs on its own, " + "so this is only the emotion. When the user asks how to follow you, for your " + "Instagram, or to see/show your social media, ALWAYS call show_social with " + "'bu_sunaidah' (@bu.sunaidah) or 'yslootahtech' (@yslootahtech) to display " + "the QR on your face. These tools are silent — never say the tool name, the " + "emotion, or any bracket marker out loud." +) + + +def _face_function_declarations() -> list: + """Gemini Live tools for the expressive LED face + social QR (built lazily + so google.genai types resolve at call time).""" + S, T = types.Schema, types.Type + return [ + types.FunctionDeclaration( + name="set_expression", + description=( + "Show an emotion on your LED face to react expressively while you " + "talk. Use it naturally and sparingly: smile when greeting or happy, " + "laugh at something funny, heart/love for affection or a compliment, " + "surprised when astonished, confused when you didn't understand, wink " + "when joking, sad when empathizing, cool when playful, sleepy when " + "tired, angry only rarely. Your mouth already lip-syncs on its own — " + "this is ONLY the emotion, not the mouth." + ), + parameters=S(type=T.OBJECT, properties={ + "emotion": S(type=T.STRING, enum=list(_FACE_EMOTIONS), + description="The emotion to show on the face."), + }, required=["emotion"]), + ), + types.FunctionDeclaration( + name="show_social", + description=( + "Display a social-media QR code on your LED face so a visitor can " + "scan it and follow. Call this when the user asks how to follow you, " + "for your Instagram, or to share your social media. Choose the " + "account: 'bu_sunaidah' (@bu.sunaidah) or 'yslootahtech' " + "(@yslootahtech)." + ), + parameters=S(type=T.OBJECT, properties={ + "account": S(type=T.STRING, enum=list(_SOCIAL_ACCOUNTS), + description="Which Instagram account to show."), + }, required=["account"]), + ), + ] + + +# ── stdin push channel (Marcus pattern) ────────────────────── +# The GeminiSubprocess supervisor writes two line types to this process's +# stdin: +# "frame:\n" — a camera frame to relay to Gemini Live +# "state:\n" — a motion-state update to inject as text +# A daemon thread parses them into the caches below; the asyncio tasks +# _send_frame_loop / _send_state_loop drain those caches. + +_LATEST_FRAME_LOCK = threading.Lock() +_LATEST_FRAME: dict = {"bytes": None, "ts": 0.0} + +_STATE_LOCK = threading.Lock() +_STATE_PENDING: list[str] = [] + +_STATE_TAGS = { + "start": "[STATE-START]", + "complete": "[STATE-DONE]", + "interrupted": "[STATE-INTERRUPTED]", + "error": "[STATE-ERROR]", + "paused": "[STATE-PAUSED]", + "resumed": "[STATE-RESUMED]", + # Navigation arrival/failure — pushed by the dashboard goal monitor so + # Gemini can truthfully tell the user it arrived (or couldn't get there) + # instead of guessing from the fire-and-forget goto. + "nav_arrived": "[NAV ARRIVED]", + "nav_failed": "[NAV FAILED]", + "nav_canceled": "[NAV CANCELED]", + # "Gemini Nav" session greeting — the operator entered a zone bound to a + # map; tell Gemini the zone + drivable places and to greet the user. + "nav_zone": "[GEMINI NAV]", +} + +# Pending audio-profile swap signalled by the parent over "profile:" stdin +# lines. _audio_swap_loop drains it inside the brain's asyncio loop. +_PROFILE_LOCK = threading.Lock() +_PROFILE_PENDING: dict = {"id": None, "reason": ""} + +_VALID_PROFILES = {"builtin", "anker", "anker_powerconf", + "hollyland_builtin", "jbl_builtin_mic"} + + +def _stdin_watcher() -> None: + """Daemon thread — parse 'frame:' / 'state:' / 'profile:' lines off stdin. + + Best-effort: any malformed line is skipped. Exits when the parent + closes our stdin (subprocess teardown).""" + try: + # IMPORTANT: read with readline(), NOT `for line in sys.stdin`. The file + # iterator does aggressive read-ahead buffering, so on an idle pipe a + # small command like "pause:1\n" can sit unread for SECONDS (until more + # stdin data arrives to flush the read-ahead). That delayed the record- + # playback pause by ~2s — Gemini kept stomping the chest speaker so the + # clip was silent / late. readline() returns each line as soon as its + # newline arrives, so commands are delivered promptly. + while True: + line = sys.stdin.readline() + if line == "": + break # EOF — parent closed our stdin (subprocess teardown) + line = line.rstrip("\n") + if not line: + continue + if line.startswith("frame:"): + b64 = line[len("frame:"):] + try: + data = base64.b64decode(b64) + except Exception: + continue + if data: + with _LATEST_FRAME_LOCK: + _LATEST_FRAME["bytes"] = data + _LATEST_FRAME["ts"] = time.time() + elif line.startswith("state:"): + try: + payload = json.loads(line[len("state:"):]) + except Exception: + continue + event = (payload.get("event") or "").strip().lower() + cmd = (payload.get("cmd") or "").strip() + tag = _STATE_TAGS.get(event) + if not tag or not cmd: + continue + msg = f"{tag} {cmd}" + elapsed = payload.get("elapsed_sec") + if isinstance(elapsed, (int, float)): + msg += f" ({float(elapsed):.1f}s)" + reason = payload.get("reason") + if reason and event == "error": + msg += f" — {reason}" + with _STATE_LOCK: + _STATE_PENDING.append(msg) + elif line.startswith("profile:"): + # Parent signals an audio-profile hot-swap. Stash the target; + # _audio_swap_loop (asyncio task) handles the actual swap so + # PyAudio open/close happens off the stdin thread. + try: + payload = json.loads(line[len("profile:"):]) + except Exception: + continue + pid = (payload.get("id") or "").strip().lower() + if pid not in _VALID_PROFILES: + continue + with _PROFILE_LOCK: + _PROFILE_PENDING["id"] = pid + _PROFILE_PENDING["reason"] = ( + payload.get("reason") or "").strip() + elif line.startswith("pause:"): + # Dashboard record playback — pause/resume the live interaction. + if line[len("pause:"):].strip() in ("1", "true", "True", "on"): + if not _INPUT_PAUSED.is_set(): + _INPUT_PAUSED.set() + log.info("input PAUSED — record playback") + else: + if _INPUT_PAUSED.is_set(): + _INPUT_PAUSED.clear() + log.info("input RESUMED — record playback ended") + except Exception: + return + + +# Start the watcher at import time — it blocks harmlessly on sys.stdin +# until the supervisor sends something. Daemon so it never blocks exit. +threading.Thread(target=_stdin_watcher, daemon=True, name="stdin-watcher").start() + + +def _audio_energy(pcm: bytes) -> int: + try: + samples = array.array("h", pcm) + return sum(abs(s) for s in samples) // len(samples) if samples else 0 + except Exception: + return 0 + + +class GeminiBrain: + """Gemini Live conversation brain — reconnect-safe.""" + + def __init__(self, audio_io, recorder, voice_name: Optional[str] = None, + system_prompt: str = ""): + self._audio = audio_io + self._mic = audio_io.mic + self._speaker = audio_io.speaker + # Kept on the brain so swap_audio_devices() can rebuild profiles that + # need DDS (`builtin`, `hollyland_builtin`) without re-init'ing. + self._audio_client = getattr(audio_io, "_audio_client", None) + # Current profile id (driven by the parent's "profile:" stdin push). + # Defaults to whatever audio_io was constructed with — `from_profile` + # sets profile_id; if SANAD_AUDIO_PROFILE override is in env, that + # value matches. + self._current_profile_id = getattr(audio_io, "profile_id", None) \ + or os.environ.get("SANAD_AUDIO_PROFILE", "builtin").strip().lower() + # Coordinates concurrent swap requests so two pending profile + # changes don't interleave mid-tear-down. + self._swap_lock: Optional[asyncio.Lock] = None # built in run() + self._recorder = recorder + self._voice = voice_name or GEMINI_VOICE + self._system_prompt = (system_prompt or "") + _FACE_PROMPT_ADDENDUM + self._api_key = GEMINI_API_KEY + self._stop_flag = asyncio.Event() + # per-session state (reset in the outer reconnect loop) + self._speaking = False + self._stream_started = False + self._barge_block_until = 0.0 + self._ai_speak_start = 0.0 + self._last_ai_audio = 0.0 + # Rolling estimate of the speaker bleed picked up by the mic while the AI + # talks (JBL profile) — the barge-in threshold floats above this. + self._bleed_ewma = 0.0 + self._done: Optional[asyncio.Event] = None + # ── Recognition flags — kept in sync with the state file by + # _recognition_state_watcher. Boot defaults come from the file (or + # the SANAD_* env vars if the file is missing). + _initial = _recog_state.read(_RECOG_STATE_PATH) + self._vision_enabled = bool( + _initial.vision_enabled + or os.environ.get("SANAD_VISION_ENABLE", "0") == "1" + ) + self._face_rec_enabled = bool( + _initial.face_rec_enabled + or os.environ.get("SANAD_FACE_RECOGNITION_ENABLE", "0") == "1" + ) + self._gallery_version_primed = -1 # bumped after first successful primer + # N3 — zones knowledge toggle + primer version tracking. + self._zone_rec_enabled = bool( + _initial.zone_rec_enabled + or os.environ.get("SANAD_ZONE_RECOGNITION_ENABLE", "0") == "1" + ) + self._zones_version_primed = -1 + # "Go here" destination already announced this session (zone_id, place_id). + self._nav_target = ( + int(_initial.nav_target_zone_id), int(_initial.nav_target_place_id), + ) + # N2 — Gemini-driven locomotion enable gate (announce only; the + # actual dispatch loop lives in the parent and is wired separately). + self._movement_enabled = bool( + _initial.movement_enabled + or os.environ.get("SANAD_MOVEMENT_ENABLE", "0") == "1" + ) + # Auto-record toggle — recognition_state is the live source of truth. + # Sync the recorder to it now; the watcher keeps it in sync at runtime. + self._record_enabled = bool(_initial.record_enabled) + try: + self._recorder.enabled = self._record_enabled + except Exception: + pass + + def stop(self) -> None: + """Signal the run loop to exit at the next opportunity.""" + try: + self._stop_flag.set() + except Exception: + pass + + # ─── public entry point ─────────────────────────────── + + async def run(self) -> None: + client = genai.Client(api_key=self._api_key) + config = self._build_config() + session_num = 0 + start_time = time.time() + consecutive_errors = 0 + + while not self._stop_flag.is_set(): + session_num += 1 + self._reset_turn_state() + # On a reconnect (not the first session), suppress the unprompted + # re-greeting until the user speaks — keeps the chest speaker free + # for record playback and stops the "robot greets every 30s" loop. + self._suppress_greeting = session_num > 1 + uptime_min = (time.time() - start_time) / 60 + + try: + log.info("connecting to Gemini (session #%d, uptime %.0fm)...", + session_num, uptime_min) + async with client.aio.live.connect(model=_MODEL, config=config) as session: + log.info("connected — speak anytime!") + consecutive_errors = 0 + self._mic.flush() + self._done = asyncio.Event() + # Reset per-session primer state so re-priming on reconnect + # actually happens. The state watcher will re-prime as soon + # as it sees vision+face-rec (and place-rec) enabled. + self._gallery_version_primed = -1 + self._zones_version_primed = -1 + # Re-announce the active destination on reconnect. + self._nav_target = (-1, -1) + # Lazy-build the swap lock on the active asyncio loop. + if self._swap_lock is None: + self._swap_lock = asyncio.Lock() + + try: + await asyncio.wait_for( + asyncio.gather( + self._send_mic_loop(session), + self._receive_loop(session), + self._send_frame_loop(session), + self._send_state_loop(session), + self._recognition_state_watcher(session), + self._audio_swap_loop(session), + ), + timeout=_SESSION_TIMEOUT, + ) + except asyncio.TimeoutError: + log.warning("session timed out after %ds", _SESSION_TIMEOUT) + except asyncio.CancelledError: + log.warning("session cancelled") + + log.info("session #%d ended — reconnecting in 1s", session_num) + self._speaker.stop() + self._mic.flush() + await asyncio.sleep(1) + + except asyncio.CancelledError: + log.info("cancelled — stopping") + break + except KeyboardInterrupt: + log.info("keyboard interrupt — stopping") + break + except Exception as exc: + consecutive_errors += 1 + delay = min(_MAX_RECONNECT_DELAY, 2 ** consecutive_errors) + log.error("session error (#%d): %s — reconnecting in %ds", + consecutive_errors, exc, delay) + await asyncio.sleep(delay) + if consecutive_errors >= _MAX_CONSECUTIVE_ERRORS: + log.warning("%d consecutive errors — recreating client", + consecutive_errors) + try: + client = genai.Client(api_key=self._api_key) + consecutive_errors = 0 + except Exception as ce: + log.error("client recreation failed: %s", ce) + + # ─── Gemini config ──────────────────────────────────── + + def _build_config(self) -> types.LiveConnectConfig: + return types.LiveConnectConfig( + response_modalities=["AUDIO"], + speech_config=types.SpeechConfig( + voice_config=types.VoiceConfig( + prebuilt_voice_config=types.PrebuiltVoiceConfig( + voice_name=self._voice, + ), + ), + ), + realtime_input_config=types.RealtimeInputConfig( + automatic_activity_detection=types.AutomaticActivityDetection( + disabled=False, + start_of_speech_sensitivity=getattr( + types.StartSensitivity, + _VAD.get("start_sensitivity", "START_SENSITIVITY_HIGH"), + ), + end_of_speech_sensitivity=getattr( + types.EndSensitivity, + _VAD.get("end_sensitivity", "END_SENSITIVITY_LOW"), + ), + prefix_padding_ms=_VAD.get("prefix_padding_ms", 20), + silence_duration_ms=_VAD.get("silence_duration_ms", 200), + ), + ), + input_audio_transcription=types.AudioTranscriptionConfig(), + output_audio_transcription=types.AudioTranscriptionConfig(), + system_instruction=types.Content( + parts=[types.Part(text=self._system_prompt)], + ), + # Native function-calling: Gemini can drive the robot to saved + # places (navigate_to_place / list_places / where_am_i / + # stop_navigation). Disable with SANAD_NAV_TOOLS=0. + # Native function-calling: nav tools (if enabled) + the always-on + # expressive-face / social-QR tools (set_expression / show_social). + tools=[types.Tool(function_declarations=( + (_nav_function_declarations() if _NAV_TOOLS_ENABLED else []) + + _face_function_declarations()))], + ) + + # ─── state helpers ──────────────────────────────────── + + def _reset_turn_state(self) -> None: + self._speaking = False + self._stream_started = False + self._barge_block_until = 0.0 + self._ai_speak_start = 0.0 + self._last_ai_audio = 0.0 + # Rolling estimate of the speaker bleed picked up by the mic while the AI + # talks (JBL profile) — the barge-in threshold floats above this. + self._bleed_ewma = 0.0 + # Suppress the unprompted greeting on a RECONNECT (set per-session in + # run() for session_num>1). The idle watchdog reconnects every ~30s when + # no one talks, and a fresh session greets each time ("مرحبابك…") which + # floods the shared chest speaker and stomps record playback. We drop + # that greeting's audio until the user actually speaks. Default False so + # the FIRST session (startup) greets normally. + self._suppress_greeting = False + + def _interrupt(self, source: str = "local") -> None: + self._speaking = False + self._stream_started = False + self._speaker.stop() + self._mic.flush() + self._recorder.finish_turn() + log.info("interrupt (%s)", source) + + # ─── mic send loop ──────────────────────────────────── + + async def _send_mic_loop(self, session: Any) -> None: + threshold = _BI.get("threshold", 500) + chunks_needed = _BI.get("loud_chunks_needed", 3) + cooldown = _BI.get("cooldown_sec", 0.3) + echo_suppress_below = _BI.get("echo_suppress_below", 500) + grace = _BI.get("ai_speak_grace_sec", 0.15) + + loop = asyncio.get_event_loop() + loud_count = 0 + last_activity = time.time() + + while not self._done.is_set() and not self._stop_flag.is_set(): + try: + raw = await loop.run_in_executor( + None, self._mic.read_chunk, _CHUNK_BYTES, + ) + except Exception: + break + + samples = np.frombuffer(raw, dtype=np.int16).astype(np.float32) + samples = np.clip(samples * _MIC_GAIN, -32768, 32767).astype(np.int16) + data = samples.tobytes() + energy = _audio_energy(data) + now = time.time() + # On the JBL (loud external speaker) the head mic hears the robot's + # OWN voice as loud as the user. We FULLY gate the mic to Gemini while + # it speaks (+ a short echo tail) so it NEVER hears itself, and we + # DISABLE voice barge-in there — the bleed is as loud as your voice, so + # energy can't separate them and any attempt leaks the echo back to the + # model. (Reliable JBL interrupt needs AEC; the only PulseAudio mic is + # dead, so that's separate work.) The chest speaker (builtin) keeps + # light quiet-frame suppression + working barge-in (firmware AEC). + full_gate = "jbl" in (self._current_profile_id or "") + + # Barge-in: sustained user energy cuts the AI — chest profile only. + if self._speaking and not full_gate and now >= self._barge_block_until: + if (now - self._ai_speak_start) >= grace: + if energy > threshold: + loud_count += 1 + else: + loud_count = max(0, loud_count - 1) + if loud_count > chunks_needed: + log.info("BARGE-IN (e=%d)", energy) + self._interrupt("barge-in") + loud_count = 0 + self._barge_block_until = now + cooldown + + # Echo suppression: mask the mic so the model doesn't hear its own bleed. + send_data = data + if _INPUT_PAUSED.is_set(): + # Paused for a record playback — feed silence so Gemini neither + # hears the record nor keeps talking over it. + send_data = _SILENCE_PCM + elif full_gate and (self._speaking + or (now - self._last_ai_audio) < _ECHO_TAIL_SEC): + # Loud external speaker: gate ALL frames while speaking + tail — + # this is what guarantees it never hears itself. + send_data = _SILENCE_PCM + elif self._speaking and energy < echo_suppress_below: + send_data = _SILENCE_PCM + + # Record user audio when clearly speaking and AI isn't. + if energy > 250 and not self._speaking: + self._recorder.capture_user(data) + + # Keep-alive watchdog + if energy > 250: + last_activity = now + elif now - last_activity > 10: + log.info("alive (no speech %.0fs, e=%d)", + now - last_activity, energy) + last_activity = now + + try: + await session.send_realtime_input( + audio=types.Blob( + data=send_data, + mime_type=f"audio/pcm;rate={SEND_SAMPLE_RATE}", + ), + ) + except asyncio.CancelledError: + return + except Exception as exc: + log.warning("mic send failed: %s — ending session", exc) + self._done.set() + return + + await asyncio.sleep(CHUNK_SIZE / SEND_SAMPLE_RATE) + + log.info("send_mic task ended") + + # ─── receive loop ───────────────────────────────────── + + async def _receive_loop(self, session: Any) -> None: + loop = asyncio.get_event_loop() + try: + while not self._done.is_set() and not self._stop_flag.is_set(): + # Iterate session.receive() with a PER-MESSAGE timeout. A plain + # `async for` parks inside the generator on a silent/half-open + # stall (server stops sending but never closes the socket), so + # the no-message watchdog below — which only ran after the + # async-for ended a cycle — could not fire, and recovery waited + # out the 660s outer session cap. Driving __anext__ under + # wait_for(_NO_MESSAGES_TIMEOUT) detects a stall in ~Ns. + _recv_agen = session.receive() + _recv_it = _recv_agen.__aiter__() + _stalled = False + try: + while True: + try: + response = await asyncio.wait_for( + _recv_it.__anext__(), + timeout=_NO_MESSAGES_TIMEOUT, + ) + except StopAsyncIteration: + break # generator exhausted — same as async-for end + except asyncio.TimeoutError: + _stalled = True + break + if self._done.is_set(): + break + + if hasattr(response, "go_away") and response.go_away is not None: + log.info("server going away — will reconnect") + self._done.set() + return + + # Native function-calling: Gemini asks us to run a tool + # (navigation). Handle it + reply, then continue — a + # tool_call message carries no server_content to process. + tc = getattr(response, "tool_call", None) + if tc is not None and getattr(tc, "function_calls", None): + await self._handle_tool_calls(session, tc.function_calls) + continue + + sc = response.server_content + if sc is None: + continue + + if sc.interrupted is True: + if self._speaking: + log.info("Gemini interrupted") + self._interrupt("gemini") + continue + + if sc.input_transcription: + text = (sc.input_transcription.text or "").strip() + if text and not self._speaking: + log.info("USER: %s", text) + self._recorder.add_user_text(text) + # The user actually said something (real + # transcription, not mic noise) → stop suppressing + # the reconnect greeting so Gemini's reply is heard. + if self._suppress_greeting: + self._suppress_greeting = False + log.info("reconnect greeting suppression " + "lifted — user spoke") + + if sc.output_transcription: + text = (sc.output_transcription.text or "").strip() + if text: + # Emit as "BOT:" (no space before colon) so the + # supervisor's _track_line can parse it the same + # way it parses "USER:" — this is the channel the + # movement dispatcher (N2) reads Gemini's own + # spoken phrases from. Keep in lock-step with + # GeminiSubprocess._track_line. + log.info("BOT: %s", text) + self._recorder.add_robot_text(text) + + if sc.model_turn: + for part in sc.model_turn.parts: + if part.inline_data and part.inline_data.data: + if _INPUT_PAUSED.is_set() or self._suppress_greeting: + # Drop Gemini's audio AND halt any in-flight + # stream at the source. Two cases: (1) a record + # is playing (_INPUT_PAUSED) — Gemini's per-chunk + # PlayStream("sanad") must not stomp the record on + # the shared chest speaker; (2) this is a reconnect + # and the user hasn't spoken — drop the unprompted + # re-greeting. Gated on _stream_started so STOP + # fires once (not per chunk); the next turn's + # begin_stream() clears the stop-flag and resumes. + if self._stream_started: + await loop.run_in_executor( + None, self._speaker.stop) + self._stream_started = False + self._speaking = False + continue + now = time.time() + if not self._speaking: + self._ai_speak_start = now + self._speaking = True + self._last_ai_audio = now + raw_audio = part.inline_data.data + self._recorder.capture_robot(raw_audio) + audio = np.frombuffer(raw_audio, dtype=np.int16) + if not self._stream_started: + await loop.run_in_executor( + None, self._speaker.begin_stream, + ) + self._stream_started = True + await loop.run_in_executor( + None, self._speaker.send_chunk, + audio, RECEIVE_SAMPLE_RATE, + ) + # Lip-sync marker for the LED face mask: emit the + # mouth-open level (0..3) from this chunk's RMS, + # throttled. Parsed by GeminiSubprocess._reader_loop. + _mnow = time.time() + if _mnow - getattr(self, "_mouth_t", 0.0) >= 0.08: + _rms = (float(np.sqrt(np.mean( + audio.astype(np.float32) ** 2))) if audio.size else 0.0) + # Lower thresholds bias the mouth more open + # so lip-sync reads strongly (vs. barely moving). + _lvl = (0 if _rms < 140 else 1 if _rms < 650 + else 2 if _rms < 1700 else 3) + if (_lvl != getattr(self, "_mouth_lvl", -1) + or _mnow - getattr(self, "_mouth_t", 0.0) >= 0.2): + self._mouth_t = _mnow + self._mouth_lvl = _lvl + log.info("[[MOUTH:%d]]", _lvl) + + if sc.turn_complete: + if (self._speaking and self._stream_started + and not self._speaker.interrupted): + log.info("speaker %.1fs", self._speaker.total_sent_sec) + await loop.run_in_executor( + None, self._speaker.wait_finish, + ) + elif self._speaking and self._speaker.interrupted: + log.info("speaker interrupted") + self._speaking = False + self._stream_started = False + if getattr(self, "_mouth_lvl", 0) != 0: + self._mouth_lvl = 0 + log.info("[[MOUTH:0]]") # close the LED-mask mouth + self._mic.flush() + self._recorder.finish_turn() + log.info("listening") + finally: + # Close the per-cycle receive generator so a stall/break + # doesn't leak it (the old `async for` closed it for us). + try: + await _recv_agen.aclose() + except Exception: + pass + + if _stalled: + log.warning("no messages from Gemini for %ds — session dead", + _NO_MESSAGES_TIMEOUT) + break + await asyncio.sleep(0.1) + + except Exception as exc: + log.warning("receive ended: %s", exc) + finally: + self._done.set() + + # ─── vision-state announcer ─────────────────────────── + # Injects the camera state into the live session as text context. + # On a live toggle Gemini is told to say so out loud ("I can see you + # now" / "I can't see you anymore"); at session start it's silent + # standing context so "can you see me?" is answered honestly. + + async def _announce_vision_state(self, session: Any, enabled: bool, + is_toggle: bool) -> None: + if is_toggle and enabled: + text = ( + "[VISION ON] Your camera was just enabled — you can now see " + "the user through it. Briefly tell them you can see them now, " + "in your normal Khaleeji style (for example: " + "'هلا، الحين أشوفك زين')." + ) + elif is_toggle and not enabled: + text = ( + "[VISION OFF] Your camera was just disabled — you can no " + "longer see anything. Briefly tell the user you can't see " + "them anymore. If they later ask whether you can see them, " + "tell them to enable the camera from the dashboard." + ) + elif enabled: # session start, camera already on + text = ( + "[VISION STATUS] Your camera is ON — you can see the user " + "through it. Do not announce this unprompted; just answer " + "naturally if they ask what you see." + ) + else: # session start, camera off + text = ( + "[VISION STATUS] Your camera is OFF — you cannot see anything " + "right now. If the user asks whether you can see them, tell " + "them to enable the camera from the dashboard. Do not announce " + "this unprompted." + ) + try: + await session.send_realtime_input(text=text) + log.info("vision-state injected (enabled=%s, toggle=%s)", + enabled, is_toggle) + except asyncio.CancelledError: + raise + except Exception as exc: + log.warning("vision-state inject failed: %s", exc) + + # ─── face-recognition-state announcer ───────────────── + # Same idea as _announce_vision_state, for the face-recognition toggle. + # On a live OFF toggle it also tells Gemini to disregard the gallery — + # so OFF takes effect immediately instead of lingering until reconnect. + + async def _announce_facerec_state(self, session: Any, enabled: bool, + is_toggle: bool) -> None: + if is_toggle and enabled: + text = ( + "[FACE RECOGNITION ON] Face recognition was just enabled — " + "you'll be shown the people you know in a moment. Briefly " + "tell the user you can now recognise the people you know, in " + "your normal Khaleeji style." + ) + elif is_toggle and not enabled: + text = ( + "[FACE RECOGNITION OFF] Face recognition was just disabled. " + "Disregard the face gallery you were given earlier — stop " + "greeting people by name and do not identify anyone. Briefly " + "tell the user you'll no longer recognise faces." + ) + elif enabled: # session start, face rec already on + text = ( + "[FACE RECOGNITION STATUS] Face recognition is ON — when you " + "see someone you've been shown in the gallery, greet them by " + "name. Do not announce this unprompted." + ) + else: # session start, face rec off + text = ( + "[FACE RECOGNITION STATUS] Face recognition is OFF — you " + "cannot identify people. If the user asks who someone is or " + "whether you recognise them, tell them to enable face " + "recognition from the dashboard. Do not announce this " + "unprompted." + ) + try: + await session.send_realtime_input(text=text) + log.info("face-rec-state injected (enabled=%s, toggle=%s)", + enabled, is_toggle) + except asyncio.CancelledError: + raise + except Exception as exc: + log.warning("face-rec-state inject failed: %s", exc) + + # ─── place-recognition-state announcer (N3) ─────────── + # Same idea as _announce_facerec_state, for the places-gallery toggle. + # On a live OFF toggle it also tells Gemini to disregard the places it + # was given so OFF takes effect immediately instead of lingering. + + async def _announce_zonerec_state(self, session: Any, enabled: bool, + is_toggle: bool) -> None: + if is_toggle and enabled: + text = ( + "[ZONE RECOGNITION ON] You were just given the zones and places " + "you know (and the people associated with them). Briefly tell " + "the user you now know your way around, in your normal Khaleeji " + "style." + ) + elif is_toggle and not enabled: + text = ( + "[ZONE RECOGNITION OFF] Zone recognition was just disabled. " + "Disregard the zones and places you were given earlier — stop " + "naming rooms or locations. Briefly tell the user you'll no " + "longer recognise places." + ) + elif enabled: # session start, zone rec already on + text = ( + "[ZONE RECOGNITION STATUS] Zone recognition is ON — when you see " + "or are asked about a zone/place you've been told about, you may " + "name it and use its description. Do not announce this " + "unprompted." + ) + else: # session start, zone rec off + text = ( + "[ZONE RECOGNITION STATUS] Zone recognition is OFF — you do not " + "know any specific zones or places. If the user asks where they " + "are or to go somewhere by name, tell them to enable zone " + "recognition from the dashboard. Do not announce this " + "unprompted." + ) + try: + await session.send_realtime_input(text=text) + log.info("zone-rec-state injected (enabled=%s, toggle=%s)", + enabled, is_toggle) + except asyncio.CancelledError: + raise + except Exception as exc: + log.warning("zone-rec-state inject failed: %s", exc) + + # ─── navigation-target announcer (N3 "go here") ─────── + # When the operator sets a destination, tell Gemini which place to go to + # and show it the reference photo(s). Actual robot motion is wired by N2; + # this establishes the goal + visual reference. + + async def _announce_nav_target(self, session: Any, + zone_id: int, place_id: int) -> None: + if not zone_id or not place_id: + try: + await session.send_realtime_input(text=( + "[DESTINATION CLEARED] You have no specific destination right " + "now. Do not announce this unprompted." + )) + except asyncio.CancelledError: + raise + except Exception as exc: + log.warning("nav-clear inject failed: %s", exc) + return + try: + from Project.Sanad.vision.zone_gallery import ZoneGallery + gallery = ZoneGallery(_ZONES_DIR) + place = gallery.get_place(zone_id, place_id) + zone = gallery.get_zone(zone_id) + except Exception as exc: + log.warning("nav-target resolve failed: %s", exc) + return + if place is None: + log.info("nav-target zone_%d/place_%d not found — skipping", zone_id, place_id) + return + place_name = place.name or f"place {place_id}" + zone_name = (zone.name if zone else None) or f"zone {zone_id}" + instr = ( + f"[GO HERE] The user has set your destination to '{place_name}' in " + f"'{zone_name}'." + ) + if place.description: + instr += f" Notes: {place.description}." + instr += ( + " The image(s) below show what it looks like so you can recognise it. " + "If walking is enabled you will head there; if it is off, tell the " + "user to enable movement from the dashboard. Briefly acknowledge the " + "destination in your normal Khaleeji style." + ) + parts: list[dict[str, Any]] = [{"text": instr}] + for p in place.sample_paths[:_FACES_MAX_SAMPLES]: + try: + raw = p.read_bytes() + except OSError: + continue + jpeg = gallery._resize_for_primer(raw, _FACES_PRIMER_RESIZE) or raw + parts.append({"inline_data": {"mime_type": "image/jpeg", "data": jpeg}}) + try: + await session.send_client_content( + turns=[{"role": "user", "parts": parts}], turn_complete=True, + ) + log.info("nav-target injected → zone_%d/place_%d (%s)", + zone_id, place_id, place_name) + except asyncio.CancelledError: + raise + except Exception as exc: + log.warning("nav-target inject failed: %s", exc) + + # ─── navigation tool-call handler (Gemini function-calling) ─── + # Gemini issues tool_calls (navigate_to_place / list_places / where_am_i / + # stop_navigation); we execute them against the dashboard nav API and reply + # with a FunctionResponse so the model can speak from the real result. + + async def _handle_tool_calls(self, session: Any, function_calls: Any) -> None: + responses = [] + for fc in function_calls: + name = getattr(fc, "name", "") or "" + try: + args = dict(getattr(fc, "args", None) or {}) + except Exception: + args = {} + log.info("TOOL CALL: %s(%s)", name, args) + result = await self._dispatch_tool(name, args) + log.info("TOOL RESULT: %s → %s", name, result) + responses.append(types.FunctionResponse( + id=getattr(fc, "id", None), name=name, response=result, + )) + if not responses: + return + try: + await session.send_tool_response(function_responses=responses) + except asyncio.CancelledError: + raise + except Exception as exc: + log.warning("send_tool_response failed: %s", exc) + + async def _dispatch_tool(self, name: str, args: dict) -> dict: + try: + if name == "navigate_to_place": + # Respect the movement gate (kept fresh by the state watcher) so + # a nav call can't drive while walking is disabled. + if not self._movement_enabled: + return {"ok": False, "reason": "movement_off", + "say": "Movement is off — ask the user to enable it from the dashboard."} + place = str(args.get("place") or "").strip() + if not place: + return {"ok": False, "reason": "no_place"} + return await asyncio.to_thread( + _nav_api, "POST", "/api/nav/voice_goto", {"place": place}) + if name == "list_places": + r = await asyncio.to_thread(_nav_api, "GET", "/api/nav/active", None) + if not isinstance(r, dict): + return {"ok": False, "reason": "bad_response"} + if r.get("reason"): # an error envelope from _nav_api + return r + return {"ok": True, "map": r.get("map"), "places": r.get("places", [])} + if name == "where_am_i": + r = await asyncio.to_thread(_nav_api, "GET", "/api/nav/active", None) + if not isinstance(r, dict): + return {"ok": False, "reason": "bad_response"} + if r.get("reason"): + return r + return {"ok": True, "map": r.get("map"), + "mode": r.get("mode_label"), + "ready": bool(r.get("bringup_alive")), + "localized": bool(r.get("localizing")), + "places": r.get("places", [])} + if name == "stop_navigation": + return await asyncio.to_thread(_nav_api, "POST", "/api/nav/cancel", None) + if name == "set_expression": + emotion = str(args.get("emotion") or "").strip().lower() + if emotion not in _FACE_EMOTIONS: + return {"ok": False, "reason": "unknown_emotion"} + # The parent (GeminiSubprocess) relays [[FACE:…]] to the LED mask. + log.info("[[FACE:%s]]", emotion) + return {"ok": True, "shown": emotion} + if name == "show_social": + account = str(args.get("account") or "").strip().lower() + if account not in _SOCIAL_ACCOUNTS: + return {"ok": False, "reason": "unknown_account"} + log.info("[[SHOW:%s]]", account) + return {"ok": True, "showing": account} + return {"ok": False, "reason": "unknown_tool"} + except Exception as exc: + log.warning("tool %s error: %s", name, exc) + return {"ok": False, "reason": "error", "detail": str(exc)[:200]} + + # ─── movement-state announcer (N2) ──────────────────── + # Spoken confirmation when the operator enables / disables Gemini-driven + # locomotion from the dashboard. The actual movement dispatch loop lives + # in the parent; this only gives the user audible feedback on the toggle. + + async def _announce_movement_state(self, session: Any, enabled: bool, + is_toggle: bool) -> None: + if is_toggle and enabled: + text = ( + "[MOVEMENT ON] Walking is now enabled — you can move when the " + "user asks. Briefly tell the user movement is enabled and they " + "can ask you to walk, in your normal Khaleeji style." + ) + elif is_toggle and not enabled: + text = ( + "[MOVEMENT OFF] Walking was just disabled — you must not move. " + "Briefly tell the user movement is now off. If they ask you to " + "walk, tell them to enable movement from the dashboard first." + ) + elif enabled: # session start, movement already on + text = ( + "[MOVEMENT STATUS] Walking is ON — you may move when asked. Do " + "not announce this unprompted." + ) + else: # session start, movement off + text = ( + "[MOVEMENT STATUS] Walking is OFF — you cannot move right now. " + "If the user asks you to walk, tell them to enable movement " + "from the dashboard. Do not announce this unprompted." + ) + try: + await session.send_realtime_input(text=text) + log.info("movement-state injected (enabled=%s, toggle=%s)", + enabled, is_toggle) + except asyncio.CancelledError: + raise + except Exception as exc: + log.warning("movement-state inject failed: %s", exc) + + # ─── audio profile hot-swap ─────────────────────────── + # The parent (GeminiSubprocess) polls pactl for the Anker USB device + # and writes "profile:" lines to our stdin. _stdin_watcher parses + # them into _PROFILE_PENDING; this loop drains the flag on the asyncio + # loop and performs the actual swap. The brain's read/write sites + # (_send_mic_loop / _receive_loop) keep using self._mic / self._speaker — + # an atomic ref reassignment is enough because nothing caches them in + # a loop-local variable (verified in exploration). + + async def _audio_swap_loop(self, session: Any) -> None: + while not self._done.is_set() and not self._stop_flag.is_set(): + await asyncio.sleep(0.25) + with _PROFILE_LOCK: + target = _PROFILE_PENDING.get("id") + reason = _PROFILE_PENDING.get("reason", "") + _PROFILE_PENDING["id"] = None + _PROFILE_PENDING["reason"] = "" + if not target or target == self._current_profile_id: + continue + try: + await self.swap_audio_devices(session, target, reason=reason) + except asyncio.CancelledError: + return + except Exception as exc: + log.warning("audio swap failed: %s", exc) + + async def swap_audio_devices(self, session: Any, profile_id: str, + reason: str = "") -> None: + """Hot-swap mic+speaker to `profile_id` without dropping the live + Gemini session. Idempotent (no-op if already on `profile_id`). + + Order matters: start the new mic BEFORE we tear the old one down, + so a transient PyAudio failure (e.g. udev hasn't exposed Anker yet) + leaves the old backend in place. After 3 retries with backoff we + give up and log WARN — the watcher will retry on its next tick. + """ + if self._swap_lock is None: + log.warning("swap requested before session loop started — skipping") + return + async with self._swap_lock: + if profile_id == self._current_profile_id: + return + prev = self._current_profile_id + log.info("audio swap: %s → %s (reason=%s)", prev, profile_id, reason or "—") + + # Build + start the new mic. Retry: pactl can see the device + # before PyAudio's get_device_count refreshes. + try: + from Project.Sanad.voice.audio_io import AudioIO as _AudioIO + except Exception as exc: + log.error("audio swap: AudioIO import failed: %s", exc) + return + new_mic = new_spk = None + last_exc: Optional[BaseException] = None + for attempt in range(3): + try: + new_mic, new_spk = _AudioIO.build_backends( + profile_id, audio_client=self._audio_client, + ) + # mic.start() opens PyAudio + spawns reader thread. + # speaker is lazy (opens on first send_chunk). + await asyncio.to_thread(new_mic.start) + break + except Exception as exc: + last_exc = exc + # Tear down a partially-built backend so the next attempt + # gets a clean slate; don't leak PyAudio handles. + if new_mic is not None: + try: + await asyncio.to_thread(new_mic.stop) + except Exception: + pass + new_mic = new_spk = None + log.info("audio swap attempt %d failed: %s — retry in 0.4s", + attempt + 1, exc) + await asyncio.sleep(0.4) + if new_mic is None or new_spk is None: + log.warning("audio swap %s → %s: all 3 attempts failed (%s); " + "keeping current profile", + prev, profile_id, last_exc) + return + + # Drain the old playback so any in-flight AI utterance stops + # (interrupts mid-word — acceptable per spec, <1s gap). + # MUST be awaited via to_thread: _PyAudioSpeaker.stop now + # takes a per-instance RLock and an in-flight send_chunk on + # the executor may be holding it across a back-pressured + # PortAudio write. Calling stop() synchronously on the + # event-loop thread would wedge the entire loop (mic, + # vision, session.receive) until the pulse buffer drains. + try: + await asyncio.to_thread(self._speaker.stop) + except Exception: + pass + try: + self._mic.flush() + except Exception: + pass + + # Atomic ref swap — next read_chunk / send_chunk hits new. + old_mic, old_spk = self._mic, self._speaker + self._mic = new_mic + self._speaker = new_spk + self._current_profile_id = profile_id + self._reset_turn_state() + + # Tear down old AFTER the ref swap so any executor call still + # in flight finishes against the old handle and the next loop + # iteration picks up the new one cleanly. + try: + await asyncio.to_thread(old_mic.stop) + except Exception: + pass + try: + await asyncio.to_thread(old_spk.stop) + except Exception: + pass + + # Silent context to Gemini — so it knows the input chain changed + # if asked (matches the _announce_vision_state pattern). + try: + await session.send_realtime_input(text=( + f"[AUDIO SWITCH] Mic + speaker are now on the {profile_id!s} " + f"audio profile. Do not announce this unprompted; just keep " + f"replying normally — the user's voice may sound clearer or " + f"different on the new device." + )) + except asyncio.CancelledError: + raise + except Exception as exc: + log.warning("audio-swap announce failed: %s", exc) + log.info("audio swap complete: %s → %s", prev, profile_id) + + # ─── recognition state watcher ──────────────────────── + # Polls data/.recognition_state.json at SANAD_RECOGNITION_POLL_S Hz and + # mirrors vision_enabled / face_rec_enabled into in-memory flags so the + # rest of the session can react WITHOUT a Gemini reconnect. + + async def _recognition_state_watcher(self, session: Any) -> None: + last_mtime_ns = -1 + last_state = _recog_state.RecognitionState( + vision_enabled=self._vision_enabled, + face_rec_enabled=self._face_rec_enabled, + gallery_version=self._gallery_version_primed, + zone_rec_enabled=self._zone_rec_enabled, + zones_version=self._zones_version_primed, + movement_enabled=self._movement_enabled, + record_enabled=self._record_enabled, + ) + # Best-effort initial primer if face_rec is already on at session start. + if self._face_rec_enabled and self._vision_enabled: + try: + cur = _recog_state.read(_RECOG_STATE_PATH) + await self._send_gallery_primer(session, cur.gallery_version) + except Exception as exc: + log.warning("initial gallery primer failed: %s", exc) + + # N3 — initial zones primer if zone recognition is already on. Unlike + # faces this does NOT require vision: name+description-only places still + # give Gemini useful knowledge to talk about. + if self._zone_rec_enabled: + try: + cur = _recog_state.read(_RECOG_STATE_PATH) + await self._send_zone_primer(session, cur.zones_version) + except Exception as exc: + log.warning("initial zone primer failed: %s", exc) + + # Tell Gemini the current camera + recognition + movement state at + # session start — silent standing context so "can you see me?" / "do + # you know who I am?" are answered honestly even if nothing is toggled. + await self._announce_vision_state( + session, self._vision_enabled, is_toggle=False, + ) + await self._announce_facerec_state( + session, self._face_rec_enabled, is_toggle=False, + ) + await self._announce_zonerec_state( + session, self._zone_rec_enabled, is_toggle=False, + ) + await self._announce_movement_state( + session, self._movement_enabled, is_toggle=False, + ) + # N3 — announce the active "go here" destination (if any). _nav_target + # was reset to (-1,-1) per session so this fires on every reconnect. + try: + cur = _recog_state.read(_RECOG_STATE_PATH) + nav = (cur.nav_target_zone_id, cur.nav_target_place_id) + if nav != self._nav_target: + await self._announce_nav_target(session, nav[0], nav[1]) + self._nav_target = nav + except Exception as exc: + log.warning("initial nav-target announce failed: %s", exc) + + while not self._done.is_set() and not self._stop_flag.is_set(): + await asyncio.sleep(_RECOG_POLL_S) + try: + st = _RECOG_STATE_PATH.stat() + except FileNotFoundError: + continue + except Exception: + continue + # Use nanosecond mtime: write() does os.replace of a fresh + # tempfile, so two CRUD ops within one coarse mtime tick would + # share an identical whole-second st_mtime and the second change + # would be skipped on this tick. st_mtime_ns has far finer + # resolution, so a rapid second write is observed. (The + # version-diff logic below is still the ultimate safety net.) + if st.st_mtime_ns == last_mtime_ns: + continue + last_mtime_ns = st.st_mtime_ns + new_state = _recog_state.read(_RECOG_STATE_PATH) + + # Vision toggle — instant. Announce it out loud so Gemini reacts + # ("I can see you now" / "I can't see you anymore"). + if new_state.vision_enabled != last_state.vision_enabled: + self._vision_enabled = new_state.vision_enabled + log.info("vision toggled → %s", self._vision_enabled) + await self._announce_vision_state( + session, self._vision_enabled, is_toggle=True, + ) + + # Face-rec toggle — announce it out loud. The OFF announcement + # also tells Gemini to disregard the gallery, so OFF takes effect + # immediately instead of lingering until the next reconnect. + if new_state.face_rec_enabled != last_state.face_rec_enabled: + self._face_rec_enabled = new_state.face_rec_enabled + if self._face_rec_enabled: + log.info("face rec enabled — announcing + sending primer") + else: + log.info("face rec disabled — telling Gemini to " + "disregard the gallery") + await self._announce_facerec_state( + session, self._face_rec_enabled, is_toggle=True, + ) + + # Conditions for re-priming: + # - face_rec just turned ON (no_face_rec_before) + # - gallery version bumped since the last primer + face_rec_just_on = ( + new_state.face_rec_enabled and not last_state.face_rec_enabled + ) + gallery_changed = ( + new_state.gallery_version != self._gallery_version_primed + ) + if (self._face_rec_enabled + and (face_rec_just_on or gallery_changed) + and self._vision_enabled): + try: + await self._send_gallery_primer( + session, new_state.gallery_version, + ) + except Exception as exc: + log.warning("gallery primer failed: %s", exc) + + # N3 — zone-recognition toggle (announce out loud, like face-rec). + if new_state.zone_rec_enabled != last_state.zone_rec_enabled: + self._zone_rec_enabled = new_state.zone_rec_enabled + log.info("zone rec toggled → %s", self._zone_rec_enabled) + await self._announce_zonerec_state( + session, self._zone_rec_enabled, is_toggle=True, + ) + + # Re-prime zones when zone-rec just turned ON or the zones version + # bumped (any zone/place/face-link/photo CRUD). No vision needed. + zone_rec_just_on = ( + new_state.zone_rec_enabled and not last_state.zone_rec_enabled + ) + zones_changed = ( + new_state.zones_version != self._zones_version_primed + ) + if self._zone_rec_enabled and (zone_rec_just_on or zones_changed): + try: + await self._send_zone_primer( + session, new_state.zones_version, + ) + except Exception as exc: + log.warning("zone primer failed: %s", exc) + + # N3 — "go here" destination changed (set or cleared). Announce + + # show the reference photo. Diffed against the announced tuple so a + # CRUD-only version bump above doesn't double-fire this. + nav = (new_state.nav_target_zone_id, new_state.nav_target_place_id) + if nav != self._nav_target: + self._nav_target = nav + await self._announce_nav_target(session, nav[0], nav[1]) + + # N2 — movement enable/disable toggle (spoken confirmation only). + if new_state.movement_enabled != last_state.movement_enabled: + self._movement_enabled = new_state.movement_enabled + log.info("movement toggled → %s", self._movement_enabled) + await self._announce_movement_state( + session, self._movement_enabled, is_toggle=True, + ) + + # Auto-record toggle — flip the recorder live (no session restart). + if new_state.record_enabled != last_state.record_enabled: + self._record_enabled = new_state.record_enabled + try: + self._recorder.enabled = self._record_enabled + except Exception: + pass + log.info("auto-record toggled → %s", self._record_enabled) + + last_state = new_state + + # ─── camera frame send loop ─────────────────────────── + # Reads the latest JPEG from the _LATEST_FRAME cache (fed by the + # _stdin_watcher thread, which the GeminiSubprocess supervisor pushes + # 'frame:' lines into) and relays it to Gemini Live at + # _VISION_SEND_HZ. Only active when self._vision_enabled. Skips frames + # older than _VISION_STALE_MS so a stopped/unplugged camera doesn't + # waste tokens on a frozen scene. + + async def _send_frame_loop(self, session: Any) -> None: + period = 1.0 / max(0.5, _VISION_SEND_HZ) + stale_s = _VISION_STALE_MS / 1000.0 + backoff = 0.0 + last_sent_ts = 0.0 + + while not self._done.is_set() and not self._stop_flag.is_set(): + await asyncio.sleep(max(period, backoff)) + if not self._vision_enabled: + continue + with _LATEST_FRAME_LOCK: + data = _LATEST_FRAME.get("bytes") + ts = _LATEST_FRAME.get("ts", 0.0) + if not data: + continue + # Stale — supervisor stopped pushing (camera off / unplugged). + if (time.time() - ts) > stale_s: + continue + # De-dup — don't re-send a frame we already relayed. + if ts == last_sent_ts: + continue + try: + await session.send_realtime_input( + video=types.Blob(data=data, mime_type="image/jpeg"), + ) + last_sent_ts = ts + backoff = 0.0 + except asyncio.CancelledError: + return + except Exception as exc: + log.warning("frame send failed: %s", exc) + backoff = min(backoff * 2 + 0.5, 5.0) + + # ─── motion-state inject loop ───────────────────────── + # Drains _STATE_PENDING (fed by the _stdin_watcher from 'state:' lines + # the supervisor pushes when the arm starts/finishes/errors a motion) + # and injects each as silent text context into the live session, so + # Gemini can answer "what are you doing?" honestly. Per persona, Gemini + # reads these for context but does not narrate them unprompted. + + async def _send_state_loop(self, session: Any) -> None: + while not self._done.is_set() and not self._stop_flag.is_set(): + await asyncio.sleep(0.1) + with _STATE_LOCK: + if not _STATE_PENDING: + continue + pending = list(_STATE_PENDING) + _STATE_PENDING.clear() + for msg in pending: + try: + await session.send_realtime_input(text=msg) + log.info("STATE injected: %s", msg) + except asyncio.CancelledError: + return + except Exception as exc: + # Some SDK versions may not accept text on + # send_realtime_input — log once-ish and keep going; + # motion still works, only this context channel is lost. + log.warning("state inject failed: %s", exc) + + # ─── face gallery primer ────────────────────────────── + # Builds one multimodal turn carrying the entire face gallery + a Khaleeji + # greeting instruction, and sends it via send_client_content. Gemini keeps + # this in session context until reconnect. Re-sent on gallery_version bumps. + + async def _send_gallery_primer(self, session: Any, version: int) -> None: + try: + from Project.Sanad.vision.face_gallery import FaceGallery + except Exception as exc: + log.info("face gallery module unavailable: %s", exc) + return + + gallery = FaceGallery(_FACES_DIR) + try: + entries = gallery.load_for_primer( + max_samples_per_face=_FACES_MAX_SAMPLES, + resize_long_side=_FACES_PRIMER_RESIZE, + ) + except Exception as exc: + log.warning("face gallery load failed: %s", exc) + return + + if not entries: + log.info("face gallery empty — primer skipped (v.%d)", version) + self._gallery_version_primed = version + return + + parts: list[dict[str, Any]] = [{ + "text": ( + "GALLERY PRIMER (do not reply to this turn). " + "Below are people you know. When the live camera shows one of " + "them, greet them warmly by name in UAE Khaleeji dialect " + "(for example: 'هلا والله يا كسام، شحالك؟'), and you may use " + "the notes about them to make the conversation personal. " + "For faces NOT in this gallery, welcome them as a guest " + "without inventing a name. Greet each person only once per " + "minute to avoid repetition." + ), + }] + for entry, jpegs in entries: + label = ( + f"This person is named {entry.name}." + if entry.name + else "This person's name is unknown — greet as guest." + ) + if entry.description: + label += f" Notes about them: {entry.description}" + parts.append({"text": f"\n— {label}"}) + for jpeg in jpegs: + parts.append({ + "inline_data": {"mime_type": "image/jpeg", "data": jpeg}, + }) + + try: + await session.send_client_content( + turns=[{"role": "user", "parts": parts}], + turn_complete=True, + ) + except Exception as exc: + log.warning("primer send failed: %s", exc) + return + self._gallery_version_primed = version + log.info("face gallery primed: %d person(s), v.%d", len(entries), version) + + # ─── zones primer (N3) ──────────────────────────────── + # One multimodal turn carrying every zone, its places (name + description + + # reference photos), and the people linked to each place. A place may have + # NO photos (name + description only), so empty image lists are tolerated. + + async def _send_zone_primer(self, session: Any, version: int) -> None: + try: + from Project.Sanad.vision.zone_gallery import ZoneGallery + except Exception as exc: + log.info("zone gallery module unavailable: %s", exc) + return + + gallery = ZoneGallery(_ZONES_DIR) + try: + entries = gallery.load_for_primer( + max_samples_per_place=_FACES_MAX_SAMPLES, + resize_long_side=_FACES_PRIMER_RESIZE, + ) + except Exception as exc: + log.warning("zone gallery load failed: %s", exc) + return + + if not entries: + log.info("zone gallery empty — primer skipped (v.%d)", version) + self._zones_version_primed = version + return + + # Resolve linked face ids → names once (cheap, small galleries). + face_names: dict[int, str] = {} + try: + from Project.Sanad.vision.face_gallery import FaceGallery + for fe in FaceGallery(_FACES_DIR).list(): + if fe.name: + face_names[fe.id] = fe.name + except Exception: + pass + + parts: list[dict[str, Any]] = [{ + "text": ( + "ZONES PRIMER (do not reply to this turn). Below are the zones " + "and places you know, with the people often found at each place. " + "Use them to answer where things are, to name a place when the " + "live camera shows one, and to make directions personal. Do not " + "invent zones or places that are not listed here." + ), + }] + n_zones = n_places = 0 + for zone, places in entries: + n_zones += 1 + zhdr = f"\n# Zone: {zone.name or '(unnamed)'}" + if zone.description: + zhdr += f" — {zone.description}" + parts.append({"text": zhdr}) + if not places: + parts.append({"text": " (no places yet)"}) + for place, jpegs in places: + n_places += 1 + label = f"\n - Place: {place.name or '(unnamed)'}" + if place.description: + label += f" — {place.description}" + people = [face_names[f] for f in place.face_ids if f in face_names] + if people: + label += f" | People often here: {', '.join(people)}" + parts.append({"text": label}) + for jpeg in jpegs: + parts.append({ + "inline_data": {"mime_type": "image/jpeg", "data": jpeg}, + }) + + try: + await session.send_client_content( + turns=[{"role": "user", "parts": parts}], + turn_complete=True, + ) + except Exception as exc: + log.warning("zone primer send failed: %s", exc) + return + self._zones_version_primed = version + log.info("zones primed: %d zone(s), %d place(s), v.%d", + n_zones, n_places, version) diff --git a/vendor/Sanad/gemini/subprocess.py b/vendor/Sanad/gemini/subprocess.py new file mode 100644 index 0000000..862436a --- /dev/null +++ b/vendor/Sanad/gemini/subprocess.py @@ -0,0 +1,769 @@ +"""Gemini live subprocess supervisor. + +Spawns `voice/sanad_voice.py` as a managed child with `SANAD_VOICE_BRAIN=gemini`, +tails the child's stdout, and extracts state transitions + user transcripts +from the Gemini-specific log lines emitted by `gemini/script.py:GeminiBrain`. + +When a new model is added, build its own sibling supervisor (see +`voice/model_subprocess.py` for the template) — do not refactor this file. +""" + +from __future__ import annotations + +import base64 +import json +import os +import signal +import subprocess +import sys +import threading +from collections import deque +from datetime import datetime +from typing import Any, Optional, Union + +from pathlib import Path + +from Project.Sanad.config import BASE_DIR, LOGS_DIR, SCRIPTS_DIR, LIVE_TUNE +from Project.Sanad.core.config_loader import section as _cfg_section +from Project.Sanad.core.logger import get_logger + +log = get_logger("gemini_subprocess") + +_LS_CFG = _cfg_section("gemini", "subprocess") + +# Camera frame forwarding — push the latest JPEG to the child over stdin +# at this interval (seconds). 0.5 s ≈ 2 fps, matching the child's +# SANAD_VISION_SEND_HZ default. The child de-stales + relays to Gemini. +_FRAME_FORWARD_INTERVAL_S = float(_LS_CFG.get("frame_forward_interval_sec", 0.5)) + +# Audio profile watcher — poll pactl for the Anker USB device at this +# interval, send "profile:" to the child on every state change. +_AUDIO_WATCH_INTERVAL_S = float(_LS_CFG.get("audio_watch_interval_sec", 1.5)) + +# The Anker profile id, as defined in voice/audio_devices.py. When this +# profile is fully plugged (both sink + source present), we switch the +# child to "anker"; otherwise we hold the boot fallback profile. +_ANKER_PROFILE_ID = "anker_powerconf" + +def _resolve_live_script() -> Path: + """Locate the voice script to run as subprocess. + + Default: voice/sanad_voice.py (the canonical G1 built-in mic + + AudioClient speaker path). Override with SANAD_LIVE_SCRIPT. + """ + override = os.environ.get("SANAD_LIVE_SCRIPT", "").strip() + if override: + p = Path(override).expanduser() + if p.exists(): + return p + for c in (BASE_DIR / "voice" / "sanad_voice.py", + SCRIPTS_DIR / "sanad_voice.py"): + if c.exists(): + return c + return SCRIPTS_DIR / "sanad_voice.py" + + +LIVE_SCRIPT = _resolve_live_script() +LOG_TAIL_SIZE = _LS_CFG.get("log_tail_size", 2000) +TRANSCRIPT_TAIL_SIZE = _LS_CFG.get("transcript_tail_size", 30) + +# Persistent on-disk log for the full subprocess session. +LIVE_LOG_DIR = LOGS_DIR +LIVE_LOG_NAME = _LS_CFG.get("log_name", "gemini_subprocess") + +_STOP_TIMEOUT_SEC = _LS_CFG.get("stop_timeout_sec", 3.0) +_TERMINATE_TIMEOUT_SEC = _LS_CFG.get("terminate_timeout_sec", 2.0) + +_NOISY_PREFIXES = tuple(_LS_CFG.get("noisy_prefixes", [ + "ALSA lib ", "Expression 'alsa_", "Cannot connect to server socket", + "jack server is not running", +])) +_NOISY_FRAGMENTS = tuple(_LS_CFG.get("noisy_fragments", [ + "Unknown PCM", "Evaluate error", "snd_pcm_open_noupdate", + "PaAlsaStream", "snd_config_evaluate", "snd_func_refer", +])) + + +class GeminiSubprocess: + def __init__(self): + self._lock = threading.Lock() + self.process: subprocess.Popen | None = None + # Set under _lock for the duration of start()'s heavy Popen so a + # concurrent start() observes it and bails (prevents a double-spawn + # race where two children both grab the G1 mic/speaker). + self._starting = False + self.log_tail: deque[str] = deque(maxlen=LOG_TAIL_SIZE) + self.user_transcript: deque[str] = deque(maxlen=TRANSCRIPT_TAIL_SIZE) + # Gemini's OWN spoken text (output transcription). The movement + # dispatcher (N2) polls this the way LiveVoiceLoop polls + # user_transcript — it reads what Gemini *said* and fires motion on + # a confirmation-phrase match (the Marcus pattern). Also handy for + # surfacing the bot side of the conversation on the dashboard. + self.bot_transcript: deque[str] = deque(maxlen=TRANSCRIPT_TAIL_SIZE) + # N2 Phase 3 — callbacks fired with each new BOT: line (Gemini's own + # spoken text). The MovementDispatcher registers here to drive + # locomotion off Gemini's confirmation phrases. Fired on the reader + # thread; callbacks must be cheap / non-blocking (the dispatcher just + # enqueues to its own worker). + self._bot_callbacks: list = [] + # _track_line (which runs under self._lock) stashes the latest BOT text + # here; the reader loop fires callbacks AFTER releasing the lock so a + # slow callback (e.g. movement dispatch reading state) never stalls the + # reader thread or blocks log parsing. + self._pending_bot: str | None = None + # Lip-sync: callbacks fired on each [[MOUTH:n]] marker (mouth level 0..3) + # emitted by gemini/script.py while Gemini speaks. The LED-mask face + # subsystem registers here to drive the animated mouth. Fired on the + # reader thread; callbacks must be cheap / non-blocking. + self._mouth_callbacks: list = [] + # Expression / social markers ([[FACE:name]] / [[SHOW:account]]) emitted + # by gemini/script.py when Gemini calls set_expression / show_social. + # The LED-mask face subsystem registers here. Fired on the reader thread; + # callbacks must be cheap / non-blocking. + self._face_callbacks: list = [] + self._social_callbacks: list = [] + self._reader_thread: threading.Thread | None = None + self._log_file = None # opened per-session in _reader_loop + self.state = "stopped" + self.state_message = "Idle." + self.last_user_text = "" + self.last_bot_text = "" + self.suppressed_noise = 0 + # ── stdin push channel (camera frames + motion state + profile) ── + # The child (gemini/script.py) reads "frame:\n", + # "state:\n", and "profile:\n" lines off its stdin. + # Writes are serialised because the frame forwarder, motion-state + # bus handler, and audio watcher all call from different threads. + self._stdin_lock = threading.Lock() + self._camera = None # set via attach_camera() + self._frame_thread: threading.Thread | None = None + self._frame_stop = threading.Event() + # ── audio profile hot-swap ──────────────────────────────── + # _audio_mgr is the parent's AudioManager — needed so we can keep + # PulseAudio defaults in sync (so /api/records/play etc. follow + # the same device the live session uses). Set via attach_audio_manager. + self._audio_mgr = None + self._audio_thread: threading.Thread | None = None + self._audio_stop = threading.Event() + # The boot profile captured at start() — what we revert to when + # the Anker is unplugged. Read from env (already in LIVE_TUNE). + self._boot_profile_id: str = "builtin" + # Last profile signalled to the child (for edge-only dispatch). + self._last_profile_id: str | None = None + + # ── camera attach (called once from main.py) ────────────── + + def register_bot_callback(self, callback) -> None: + """Register a fn(text) fired on each new BOT: line (Gemini's spoken + text). Used by the N2 movement dispatcher. Cheap/non-blocking only.""" + if callback not in self._bot_callbacks: + self._bot_callbacks.append(callback) + + def register_mouth_callback(self, callback) -> None: + """Register a fn(level:int 0..3) fired on each [[MOUTH:n]] lip-sync + marker. Used by the LED-mask face. Cheap/non-blocking only.""" + if callback not in self._mouth_callbacks: + self._mouth_callbacks.append(callback) + + def register_face_callback(self, callback) -> None: + """Register a fn(name:str) fired on each [[FACE:name]] marker (Gemini's + set_expression). The LED-mask face reacts with the emotion. Cheap only.""" + if callback not in self._face_callbacks: + self._face_callbacks.append(callback) + + def register_social_callback(self, callback) -> None: + """Register a fn(account:str) fired on each [[SHOW:account]] marker + (Gemini's show_social). Shows the social QR on the mask. Cheap only.""" + if callback not in self._social_callbacks: + self._social_callbacks.append(callback) + + def attach_camera(self, camera) -> None: + """Give the supervisor a reference to the CameraDaemon so it can + forward frames to the child over stdin while a session runs.""" + self._camera = camera + + def attach_audio_manager(self, audio_mgr) -> None: + """Hand the parent's AudioManager to the supervisor so the audio + watcher can keep PulseAudio defaults in sync on every swap (so + typed-replay / record playback follow the same device as the live + Gemini session).""" + self._audio_mgr = audio_mgr + + def _open_session_log(self, pid: int): + """Open (or re-open) the per-day append log file for this session.""" + try: + LIVE_LOG_DIR.mkdir(parents=True, exist_ok=True) + fname = f"{LIVE_LOG_NAME}_{datetime.now().strftime('%Y%m%d')}.log" + fh = open(LIVE_LOG_DIR / fname, "a", encoding="utf-8", buffering=1) + fh.write( + f"\n===== live_gemini subprocess start " + f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} pid={pid} =====\n" + ) + return fh + except Exception as exc: + log.warning("Could not open live-gemini log file: %s", exc) + return None + + def _is_noisy(self, line: str) -> bool: + return line.startswith(_NOISY_PREFIXES) or any(f in line for f in _NOISY_FRAGMENTS) + + def _set_state(self, state: str, msg: str): + self.state = state + self.state_message = msg + + def _track_line(self, line: str): + """Parse Gemini-specific log markers emitted by `gemini/script.py`. + + Must stay in lock-step with the `log.info(...)` strings in + `GeminiBrain`. If you add a new state, add the emit in the brain + AND the matching detector here — in one PR. + """ + if "connecting to Gemini" in line: + self._set_state("connecting", line) + elif "connected — speak anytime" in line or "connected - speak anytime" in line: + self._set_state("listening", "Listening for speech.") + elif " USER: " in line or line.strip().startswith("USER:"): + # GeminiBrain emits: log.info("USER: %s", text) + text = line.split("USER:", 1)[1].strip() + if text: + self.last_user_text = text + self.user_transcript.append(text) + self._set_state("hearing", f"User: {text}") + elif " BOT: " in line or line.strip().startswith("BOT:"): + # GeminiBrain emits: log.info("BOT: %s", text) — Gemini's own + # spoken text. The movement dispatcher (N2) reads this deque to + # match confirmation phrases. Deliberately does NOT change the + # session state (that stays driven by USER / listening markers). + # NOTE: must precede the generic "listening" catch below, else a + # bot line that happens to contain "listening" would be misrouted. + text = line.split("BOT:", 1)[1].strip() + if text: + self.last_bot_text = text + self.bot_transcript.append(text) + # Defer callback firing to the reader loop, OUTSIDE self._lock. + self._pending_bot = text + elif "BARGE-IN" in line or "Gemini interrupted" in line or "interrupt (" in line: + self._set_state("interrupting", line) + elif "listening" in line.lower() and "no speech" not in line: + # Fires on "listening" (post-turn) — keep the state fresh. + self._set_state("listening", "Listening for speech.") + elif "session error" in line or "client recreation failed" in line: + self._set_state("error", line) + elif ("server going away" in line or "ended — reconnecting" in line + or "ended - reconnecting" in line or "session dead" in line): + # NOTE: keep in lock-step with the brain's emit + # log.info("session #%d ended — reconnecting in 1s", ...) — the + # "#N" between "session" and "ended" means a plain "session ended" + # substring never matched, so we anchor on "ended — reconnecting". + self._set_state("warning", line) + elif "keyboard interrupt" in line or "cancelled — stopping" in line: + self._set_state("stopped", line) + + def _reader_loop(self): + proc = self.process + if proc is None or proc.stdout is None: + return + # Every line goes to the on-disk log — including the ALSA noise + # that we filter out of the in-memory tail. That way a field + # post-mortem has the full raw capture if we need it. + fh = self._open_session_log(proc.pid) + self._log_file = fh + for line in proc.stdout: + clean = line.rstrip() + if not clean: + continue + # High-frequency lip-sync marker [[MOUTH:n]] — fire callbacks and + # skip it entirely (not logged/tailed, ~10/s) before anything else. + _mi = clean.find("[[MOUTH:") + if _mi != -1: + try: + level = int(clean[_mi + 8:clean.index("]]", _mi)]) + except Exception: + level = 0 + for cb in self._mouth_callbacks: + try: + cb(max(0, min(3, level))) + except Exception: + log.exception("mouth callback failed") + continue + # Emotion marker [[FACE:name]] — from Gemini's set_expression tool. + # Low frequency; fire the face callbacks and skip logging the marker. + _fi = clean.find("[[FACE:") + if _fi != -1: + try: + name = clean[_fi + 7:clean.index("]]", _fi)].strip().lower() + except Exception: + name = "" + if name: + for cb in self._face_callbacks: + try: + cb(name) + except Exception: + log.exception("face callback failed") + continue + # Social marker [[SHOW:account]] — from Gemini's show_social tool. + _si = clean.find("[[SHOW:") + if _si != -1: + try: + acct = clean[_si + 7:clean.index("]]", _si)].strip().lower() + except Exception: + acct = "" + if acct: + for cb in self._social_callbacks: + try: + cb(acct) + except Exception: + log.exception("social callback failed") + continue + if fh is not None: + try: + fh.write(clean + "\n") + except Exception: + pass + fired_bot = None + with self._lock: + if self._is_noisy(clean): + self.suppressed_noise += 1 + continue + self.log_tail.append(clean) + self._track_line(clean) + fired_bot = self._pending_bot + self._pending_bot = None + # Fire BOT-text callbacks (movement dispatch) OUTSIDE the lock so a + # slow callback can't stall transcript parsing. + if fired_bot is not None: + for cb in self._bot_callbacks: + try: + cb(fired_bot) + except Exception: + log.exception("bot-text callback failed") + with self._lock: + self.log_tail.append("Live Gemini process exited.") + self._set_state("stopped", "Process exited.") + if fh is not None: + try: + fh.write( + f"===== live_gemini subprocess exit " + f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} =====\n" + ) + fh.close() + except Exception: + pass + self._log_file = None + + def is_running(self) -> bool: + with self._lock: + return self.process is not None and self.process.poll() is None + + def start(self) -> dict[str, Any]: + with self._lock: + if self.process is not None and self.process.poll() is None: + return {"started": False, "message": "Already running.", "pid": self.process.pid} + if self._starting: + # A concurrent start() (e.g. two rapid POSTs dispatched on + # separate threads) is already mid-Popen. Bail so we don't + # spawn a second child holding the G1 mic/speaker — the first + # start owns the spawn and will publish self.process. + return {"started": False, "message": "Start already in progress."} + # Sentinel held across the unlocked heavy Popen below; the early + # guard above + this flag make the running-check and the eventual + # self.process assignment atomic w.r.t. a concurrent start(). + self._starting = True + # Close the previous child's pipes if it died on its own (crash, + # not via stop()): stop() closes them, but a crash-then-start path + # would otherwise leak its stdin/stdout until Popen.__del__ at GC. + # stdin close is under _stdin_lock (mirrors stop()) so a stray + # motion-state _send_stdin can't race the close mid-write. + if self.process is not None: + with self._stdin_lock: + old_stdin = getattr(self.process, "stdin", None) + if old_stdin is not None: + try: + old_stdin.close() + except Exception: + pass + old_stdout = getattr(self.process, "stdout", None) + if old_stdout is not None: + try: + old_stdout.close() + except Exception: + pass + self._set_state("starting", "Starting...") + + try: + return self._start_locked() + finally: + with self._lock: + self._starting = False + + def _start_locked(self) -> dict[str, Any]: + script = LIVE_SCRIPT + if not script.exists(): + raise RuntimeError(f"Script not found: {script}") + + env = os.environ.copy() + env.update({"PYTHONUNBUFFERED": "1", **LIVE_TUNE}) + + # Pass the current G1 speaker volume as an env var so the + # subprocess can compute the correct barge-in threshold at + # startup. Without this, sanad_voice.py would read the volume + # from a stale or non-existent config file path and default to + # 100, scaling the barge-in threshold wrong for any non-100% + # volume. load_config() reads data/motions/config.json — the + # file the dashboard writes to when the user moves the slider. + try: + from Project.Sanad.config import load_config + _cfg = load_config() or {} + _audio_cfg = _cfg.get("audio") if isinstance(_cfg.get("audio"), dict) else {} + _g1_vol = int(_audio_cfg.get("g1_volume", 100)) + _g1_vol = max(0, min(100, _g1_vol)) + env["SANAD_G1_VOLUME"] = str(_g1_vol) + log.info("Passing SANAD_G1_VOLUME=%d to subprocess", _g1_vol) + except Exception as exc: + log.warning("Could not read g1_volume for subprocess: %s", exc) + + # sanad_voice.py takes the DDS interface as the first positional arg + dds_iface = env.get("SANAD_DDS_INTERFACE", "eth0") + cmd = [sys.executable, str(script), dds_iface] + proc = subprocess.Popen( + cmd, + cwd=str(script.parent), + stdin=subprocess.PIPE, # camera frames + motion state push + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + bufsize=1, + env=env, + ) + + # Reap any stale frame forwarder / audio watcher from a previous + # session that ended by a child crash rather than a clean stop() — + # otherwise they'd keep spinning and we'd leak threads per restart. + for stale, stop_evt in ( + (self._frame_thread, self._frame_stop), + (self._audio_thread, self._audio_stop), + ): + if stale is not None and stale.is_alive(): + stop_evt.set() + stale.join(timeout=2.0) + + # Capture the boot profile for this session — the audio watcher + # uses it as the fallback when the Anker is unplugged. Env var is + # already in LIVE_TUNE so parent + child agree. + self._boot_profile_id = os.environ.get( + "SANAD_AUDIO_PROFILE", "builtin").strip().lower() + self._last_profile_id = None # force one initial send_profile + + with self._lock: + self.process = proc + self.log_tail.append(f"Started: pid={proc.pid}") + self._set_state("starting", f"pid={proc.pid}") + self._reader_thread = threading.Thread(target=self._reader_loop, daemon=True) + self._reader_thread.start() + # Frame forwarder — pushes camera JPEGs to the child over stdin. + self._frame_stop.clear() + self._frame_thread = threading.Thread( + target=self._frame_forwarder, daemon=True, name="gemini-frame-fwd", + ) + self._frame_thread.start() + # Audio watcher — polls pactl for Anker presence and signals + # the child to hot-swap mic+speaker when it changes. + self._audio_stop.clear() + self._audio_thread = threading.Thread( + target=self._audio_watcher, daemon=True, name="gemini-audio-watcher", + ) + self._audio_thread.start() + + log.info("Live Gemini subprocess started: pid=%d", proc.pid) + return {"started": True, "pid": proc.pid} + + # ── stdin push channel ──────────────────────────────────── + + def _send_stdin(self, line: str) -> None: + """Serialised stdin write — frame forwarder + motion-state handler + both call this from different threads. Best-effort: a closed pipe + or a not-yet-started process is a silent no-op.""" + proc = self.process + if proc is None or proc.stdin is None: + return + try: + with self._stdin_lock: + if not proc.stdin.closed: + proc.stdin.write(line) + proc.stdin.flush() + except Exception: + # Pipe broke (child exited) — drop silently; the reader thread + # will surface the exit via state="stopped". + pass + + def send_frame(self, jpeg: Union[bytes, str]) -> None: + """Forward one camera frame to the child as 'frame:\\n'. + + Accepts raw JPEG bytes (base64-encoded here) or an already-base64 + ASCII string (e.g. CameraDaemon.get_frame_b64() — no re-encode).""" + if isinstance(jpeg, bytes): + b64 = base64.b64encode(jpeg).decode("ascii") + elif isinstance(jpeg, str): + b64 = jpeg.strip() + else: + return + if b64: + self._send_stdin("frame:" + b64 + "\n") + + def send_state(self, event: str, cmd: str, + elapsed_sec: Optional[float] = None, + reason: Optional[str] = None) -> None: + """Push a motion-state update to the child as 'state:\\n'. + + Events: start | complete | interrupted | error. The child injects + '[STATE-...] ' into the live Gemini session as silent text + context so Gemini can answer "what are you doing?" honestly.""" + if not event or not cmd: + return + payload: dict[str, Any] = {"event": event, "cmd": cmd} + if elapsed_sec is not None: + payload["elapsed_sec"] = round(float(elapsed_sec), 2) + if reason: + payload["reason"] = str(reason)[:200] + try: + line = "state:" + json.dumps(payload, ensure_ascii=False) + "\n" + except Exception: + return + self._send_stdin(line) + + def _frame_forwarder(self) -> None: + """Background thread — push the camera's latest frame to the child. + + Runs for the lifetime of one subprocess session. Gated on the + camera actually running; the child does its own vision-enabled + + staleness checks, so this stays dumb (camera up → push).""" + cam = self._camera + if cam is None: + return + while not self._frame_stop.is_set(): + if self._frame_stop.wait(_FRAME_FORWARD_INTERVAL_S): + break + try: + if not cam.is_running(): + continue + b64 = cam.get_frame_b64() + if b64: + self.send_frame(b64) + except Exception: + # Best-effort — never let a frame hiccup kill the thread. + pass + + # ── audio profile watcher (parent-side detection) ──────────── + + def send_profile(self, profile_id: str, reason: str = "") -> None: + """Push an audio-profile hot-swap command to the child as + 'profile:\\n'. The child's _stdin_watcher parses it and + _audio_swap_loop performs the actual mic/speaker rebind. No-op + if the process isn't running or stdin is closed.""" + pid = (profile_id or "").strip().lower() + if pid not in {"builtin", "anker", "anker_powerconf", + "hollyland_builtin", "jbl_builtin_mic"}: + log.warning("send_profile: ignoring unknown profile %r", profile_id) + return + payload: dict[str, Any] = {"id": pid} + if reason: + payload["reason"] = reason[:120] + try: + line = "profile:" + json.dumps(payload, ensure_ascii=False) + "\n" + except Exception: + return + self._send_stdin(line) + + def send_pause(self, paused: bool) -> None: + """Pause/resume the live interaction while a dashboard record plays — + the child feeds Gemini silence + drops its own audio so the record + owns the chest speaker, then resumes. No-op if not running.""" + self._send_stdin("pause:%d\n" % (1 if paused else 0)) + + def _audio_watcher(self) -> None: + """Background thread — poll pactl for the Anker USB device, signal + the child on every plug/unplug edge transition. + + Detection reuses voice.audio_devices.detect_plugged_profiles() which + already shells to `pactl list short` and matches against the same + `powerconf,anker` substring AnkerMic uses. Zero new deps. + + Edge-only dispatch: we only call send_profile() when the target + flips. Rapid bounce (loose cable) is naturally rate-limited by the + poll interval. After every send_profile we also refresh the parent + audio_manager's PulseAudio defaults so non-live playback (typed + replay, record playback) follows the same device. + """ + # Lazy import — voice.audio_devices is imported at module load to + # check pactl availability without polluting our top-level imports. + try: + from Project.Sanad.voice import audio_devices as _ad + except Exception as exc: + log.warning("audio watcher disabled — audio_devices import failed: %s", exc) + return + try: + if not _ad.pactl_available(): + log.warning("audio watcher disabled — pactl not available") + return + except Exception: + # If pactl_available itself isn't exposed, fall through and try + # detect_plugged_profiles — it'll raise/return empty if pactl + # is missing and we handle that below. + pass + + boot_profile = self._boot_profile_id or "builtin" + log.info("audio watcher started — Anker→anker, no-Anker→%s (poll=%.1fs)", + boot_profile, _AUDIO_WATCH_INTERVAL_S) + + while not self._audio_stop.is_set(): + if self._audio_stop.wait(_AUDIO_WATCH_INTERVAL_S): + break + try: + # Recovery script (set_powerconf_audio.sh) is intentionally + # NOT invoked from the watcher — its old card-discovery / + # module-alsa-source attempts loaded the wrong hw device + # on this Jetson and knocked the Anker out of pactl + # entirely (observed 2026-06-03). The script is now a + # passive set-default-sink/source helper meant to be run + # by hand, not from the watcher. The watcher just detects + # plug edges and dispatches profile changes to the child. + plugged = _ad.detect_plugged_profiles() + ids = {p.get("profile", {}).get("id") for p in (plugged or [])} + # Honor the user's SAVED profile selection whenever its device is + # actually plugged in — covers the JBL, Anker, Hollyland, or any + # future profile. Without this the watcher only knew the Anker and + # kept reverting every other selection back to the boot profile. + # Fallbacks: legacy Anker auto-detect, then boot profile (builtin + # G1 chest speaker) when no external device is present. + try: + selected = (_ad.load_state() or {}).get("profile_id") + except Exception: + selected = None + + def _via(pid: str) -> str: + for p in plugged: + if p.get("profile", {}).get("id") == pid: + v = p.get("source_via", "pactl") + return f" via {v}" if v != "pactl" else "" + return "" + + if selected and selected != "builtin" and selected in ids: + target = selected + reason = f"{selected} present" + _via(selected) + elif _ANKER_PROFILE_ID in ids: + target = "anker" + reason = "anker plugged" + _via(_ANKER_PROFILE_ID) + else: + target = boot_profile + reason = (f"{selected} unplugged → {boot_profile}" + if selected and selected != "builtin" + else "no external device") + + if target == self._last_profile_id: + # Steady state: re-send the desired profile each poll (a + # cheap idempotent no-op on the child — _audio_swap_loop / + # swap_audio_devices short-circuit when already on target). + # This is the recovery path: if a previous swap exhausted + # its 3 retries (e.g. udev hadn't exposed the Anker yet), + # the child kept the old profile with NO pending retry; an + # edge-only watcher would never re-send the same target, + # leaving parent + child desynced until the next physical + # plug edge. Re-sending lets the child converge on its next + # tick. No log / no refresh_devices on this path (already + # done on the edge) so steady state stays quiet. + self.send_profile(target, reason=reason) + continue + + prev = self._last_profile_id + log.info("audio watcher: %s → %s (%s)", + prev or "—", target, reason) + self.send_profile(target, reason=reason) + self._last_profile_id = target + + # Keep PulseAudio defaults aligned so non-live playback + # follows the same device the live session uses. + if self._audio_mgr is not None: + try: + self._audio_mgr.refresh_devices() + except Exception as exc: + log.warning("audio watcher: refresh_devices failed: %s", exc) + except Exception as exc: + # Never let a transient pactl glitch kill the thread. + log.warning("audio watcher iteration failed: %s", exc) + + def stop(self) -> dict[str, Any]: + with self._lock: + proc = self.process + if proc is None or proc.poll() is not None: + return {"stopped": False, "message": "Not running."} + self._set_state("stopping", "Stopping...") + + # Halt forwarder + audio watcher before we tear the pipe down. + self._frame_stop.set() + self._audio_stop.set() + ft = self._frame_thread + if ft is not None: + ft.join(timeout=2.0) + self._frame_thread = None + at = self._audio_thread + if at is not None: + at.join(timeout=2.0) + self._audio_thread = None + + try: + proc.send_signal(signal.SIGINT) + proc.wait(timeout=_STOP_TIMEOUT_SEC) + except subprocess.TimeoutExpired: + proc.terminate() + try: + proc.wait(timeout=_TERMINATE_TIMEOUT_SEC) + except subprocess.TimeoutExpired: + proc.kill() + proc.wait(timeout=_TERMINATE_TIMEOUT_SEC) + + rc = proc.returncode + + # Close stdin/stdout explicitly — without this each start/stop + # cycle leaks FDs (relied on Popen.__del__ which only runs at GC; + # a reconnect loop would march the FD count to the OS limit). + # The stdin close is taken under _stdin_lock: the motion-state bus + # handler still writes via _send_stdin from the arm worker thread + # (it is never joined here), so without the lock its `closed` check + # could race this close() and raise mid-write. + with self._stdin_lock: + stdin_pipe = getattr(proc, "stdin", None) + if stdin_pipe is not None: + try: + stdin_pipe.close() + except Exception: + pass + stdout_pipe = getattr(proc, "stdout", None) + if stdout_pipe is not None: + try: + stdout_pipe.close() + except Exception: + pass + + with self._lock: + self.process = None + self.log_tail.append("Stopped.") + self._set_state("stopped", "Stopped.") + + log.info("Live Gemini subprocess stopped (rc=%s)", rc) + return {"stopped": True, "returncode": rc} + + def status(self) -> dict[str, Any]: + with self._lock: + running = self.process is not None and self.process.poll() is None + return { + "running": running, + "pid": self.process.pid if running and self.process else None, + "state": self.state, + "state_message": self.state_message, + "last_user_text": self.last_user_text, + "last_bot_text": self.last_bot_text, + "user_transcript": list(self.user_transcript), + "bot_transcript": list(self.bot_transcript), + "log_tail": list(self.log_tail), + "suppressed_noise": self.suppressed_noise, + } diff --git a/vendor/Sanad/local/__init__.py b/vendor/Sanad/local/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/vendor/Sanad/local/llm.py b/vendor/Sanad/local/llm.py new file mode 100644 index 0000000..d3f75d2 --- /dev/null +++ b/vendor/Sanad/local/llm.py @@ -0,0 +1,305 @@ +"""LLM layer — Qwen 2.5 Instruct via Ollama (default) or self-managed llama.cpp. + +Phase 3 of the local pipeline. Two backends, selectable via +`config/local_config.json > llm.backend`: + + "ollama" — talk to a running `ollama serve` daemon (default). + No subprocess management, no CUDA build. Just: + ollama pull qwen2.5:1.5b + # daemon usually auto-starts; if not: `ollama serve &` + + "llama_cpp" — launch our own `llama-server` subprocess. Requires + a CUDA build of llama.cpp and a GGUF file at + `model/local/`. + +Both backends stream tokens and chunk them on sentence delimiters so +the TTS can start synthesising before the LLM finishes. +""" + +from __future__ import annotations + +import asyncio +import json +import shutil +import subprocess +import time +from typing import AsyncIterator, Optional + +from Project.Sanad.config import MODEL_DIR +from Project.Sanad.core.config_loader import section as _cfg_section +from Project.Sanad.core.logger import get_logger + +log = get_logger("local_llm") +_CFG = _cfg_section("local", "llm") + +BACKEND = (_CFG.get("backend") or "ollama").strip().lower() + +# Ollama +OLLAMA_HOST = _CFG.get("ollama_host", "127.0.0.1") +OLLAMA_PORT = int(_CFG.get("ollama_port", 11434)) +OLLAMA_MODEL = _CFG.get("ollama_model", "qwen2.5:1.5b") +OLLAMA_KEEP_ALIVE = _CFG.get("ollama_keep_alive", "5m") + +# llama.cpp +MODEL_SUBDIR = _CFG.get("model_subdir", "qwen2.5-1.5b-instruct-q4_k_m.gguf") +SERVER_BIN = _CFG.get("server_binary", "llama-server") +HOST = _CFG.get("host", "127.0.0.1") +PORT = int(_CFG.get("port", 8080)) +N_GPU_LAYERS = _CFG.get("n_gpu_layers", 99) +CTX_SIZE = _CFG.get("ctx_size", 2048) +THREADS = _CFG.get("threads", 4) +STARTUP_TIMEOUT = _CFG.get("startup_timeout_sec", 30) + +# Shared generation params +REQUEST_TIMEOUT = _CFG.get("request_timeout_sec", 30) +MAX_TOKENS = _CFG.get("max_tokens", 200) +TEMPERATURE = _CFG.get("temperature", 0.7) +TOP_P = _CFG.get("top_p", 0.9) +STOP_SEQS = list(_CFG.get("stop", ["<|im_end|>"])) +CHUNK_DELIMS = _CFG.get("chunk_delimiters", ".,?!؟،") +CHUNK_MIN_CHARS = int(_CFG.get("chunk_min_chars", 8)) + +LOCAL_MODEL_PATH = MODEL_DIR / "local" / MODEL_SUBDIR + + +class LlamaServer: + """Thin wrapper — owns subprocess (llama.cpp) or no-op (ollama).""" + + def __init__(self) -> None: + self._proc: Optional[subprocess.Popen] = None + + # ─── lifecycle ──────────────────────────────────────── + + def start(self) -> None: + if BACKEND == "ollama": + self._check_ollama() + log.info("LLM backend=ollama model=%s (@ %s:%d)", + OLLAMA_MODEL, OLLAMA_HOST, OLLAMA_PORT) + return + if BACKEND == "llama_cpp": + self._start_llama_cpp() + return + raise RuntimeError(f"unknown llm.backend: {BACKEND!r}") + + def stop(self) -> None: + if self._proc is None: + return + try: + self._proc.terminate() + self._proc.wait(timeout=3) + except subprocess.TimeoutExpired: + self._proc.kill() + self._proc.wait(timeout=2) + except Exception as exc: + log.warning("llama-server stop error: %s", exc) + self._proc = None + + def alive(self) -> bool: + if BACKEND == "ollama": + return self._ping_ollama() + return self._proc is not None and self._proc.poll() is None + + # ─── Ollama backend ─────────────────────────────────── + + def _check_ollama(self) -> None: + """Verify the Ollama daemon is running + the model is pulled.""" + import urllib.request + tags_url = f"http://{OLLAMA_HOST}:{OLLAMA_PORT}/api/tags" + try: + with urllib.request.urlopen(tags_url, timeout=3) as r: + body = json.loads(r.read().decode("utf-8")) + except Exception as exc: + raise RuntimeError( + f"Ollama daemon not reachable at {tags_url} — is `ollama serve` running? ({exc})" + ) + models = [m.get("name", "") for m in body.get("models", [])] + if not any(OLLAMA_MODEL in m for m in models): + raise RuntimeError( + f"Ollama model {OLLAMA_MODEL!r} not pulled. " + f"Run: `ollama pull {OLLAMA_MODEL}`. Available: {models}" + ) + + def _ping_ollama(self) -> bool: + import urllib.request + try: + with urllib.request.urlopen( + f"http://{OLLAMA_HOST}:{OLLAMA_PORT}/api/tags", timeout=1, + ) as r: + return r.status == 200 + except Exception: + return False + + async def _stream_ollama(self, user_text: str, system_prompt: str, + cancel: asyncio.Event) -> AsyncIterator[str]: + import aiohttp + url = f"http://{OLLAMA_HOST}:{OLLAMA_PORT}/api/generate" + payload = { + "model": OLLAMA_MODEL, + "system": system_prompt, + "prompt": user_text, + "stream": True, + "keep_alive": OLLAMA_KEEP_ALIVE, + "options": { + "num_predict": MAX_TOKENS, + "temperature": TEMPERATURE, + "top_p": TOP_P, + "stop": STOP_SEQS, + }, + } + buf = "" + async with aiohttp.ClientSession() as sess: + try: + async with sess.post( + url, json=payload, + timeout=aiohttp.ClientTimeout(total=REQUEST_TIMEOUT)) as resp: + async for raw in resp.content: + if cancel.is_set(): + log.info("LLM stream cancelled (barge-in)") + return + line = raw.decode("utf-8", errors="ignore").strip() + if not line: + continue + try: + obj = json.loads(line) + except json.JSONDecodeError: + continue + token = obj.get("response", "") + if token: + buf += token + if len(buf) >= CHUNK_MIN_CHARS and buf[-1] in CHUNK_DELIMS: + yield buf.strip() + buf = "" + if obj.get("done"): + break + except asyncio.CancelledError: + return + except Exception as exc: + log.warning("Ollama stream error: %s", exc) + return + if buf.strip(): + yield buf.strip() + + # ─── llama.cpp backend ──────────────────────────────── + + def _start_llama_cpp(self) -> None: + if self._proc is not None and self._proc.poll() is None: + return + if not LOCAL_MODEL_PATH.exists(): + raise RuntimeError(f"LLM model not found at {LOCAL_MODEL_PATH}") + bin_path = shutil.which(SERVER_BIN) or SERVER_BIN + cmd = [ + bin_path, + "-m", str(LOCAL_MODEL_PATH), + "--host", HOST, + "--port", str(PORT), + "--n-gpu-layers", str(N_GPU_LAYERS), + "--ctx-size", str(CTX_SIZE), + "--threads", str(THREADS), + "--log-disable", + ] + log.info("launching llama-server: %s", " ".join(cmd)) + self._proc = subprocess.Popen( + cmd, + stdout=subprocess.DEVNULL, + stderr=subprocess.PIPE, + text=True, + ) + self._wait_llama_cpp_ready() + log.info("llama-server ready (pid=%d)", self._proc.pid) + + def _wait_llama_cpp_ready(self) -> None: + import urllib.request + deadline = time.time() + STARTUP_TIMEOUT + url = f"http://{HOST}:{PORT}/health" + while time.time() < deadline: + if self._proc and self._proc.poll() is not None: + stderr = self._proc.stderr.read() if self._proc.stderr else "" + raise RuntimeError( + f"llama-server exited early (code={self._proc.returncode}): {stderr[:500]}" + ) + try: + with urllib.request.urlopen(url, timeout=1) as r: + if r.status == 200: + return + except Exception: + time.sleep(0.3) + raise RuntimeError(f"llama-server did not come up within {STARTUP_TIMEOUT}s") + + async def _stream_llama_cpp(self, user_text: str, system_prompt: str, + cancel: asyncio.Event) -> AsyncIterator[str]: + import aiohttp + prompt = self._format_chatml_prompt(user_text, system_prompt) + payload = { + "prompt": prompt, + "stream": True, + "n_predict": MAX_TOKENS, + "temperature": TEMPERATURE, + "top_p": TOP_P, + "stop": STOP_SEQS, + "cache_prompt": True, + } + url = f"http://{HOST}:{PORT}/completion" + buf = "" + async with aiohttp.ClientSession() as sess: + try: + async with sess.post( + url, json=payload, + timeout=aiohttp.ClientTimeout(total=REQUEST_TIMEOUT)) as resp: + async for raw in resp.content: + if cancel.is_set(): + log.info("LLM stream cancelled (barge-in)") + return + line = raw.decode("utf-8", errors="ignore").strip() + if not line.startswith("data:"): + continue + line = line[len("data:"):].strip() + if not line or line == "[DONE]": + continue + try: + obj = json.loads(line) + except json.JSONDecodeError: + continue + token = obj.get("content", "") + if not token: + if obj.get("stop"): + break + continue + buf += token + if len(buf) >= CHUNK_MIN_CHARS and buf[-1] in CHUNK_DELIMS: + yield buf.strip() + buf = "" + except asyncio.CancelledError: + return + except Exception as exc: + log.warning("llama-server stream error: %s", exc) + return + if buf.strip(): + yield buf.strip() + + @staticmethod + def _format_chatml_prompt(user_text: str, system_prompt: str) -> str: + return ( + f"<|im_start|>system\n{system_prompt}<|im_end|>\n" + f"<|im_start|>user\n{user_text}<|im_end|>\n" + f"<|im_start|>assistant\n" + ) + + # ─── public streaming entry point ───────────────────── + + async def stream(self, user_text: str, system_prompt: str, + cancel: asyncio.Event) -> AsyncIterator[str]: + """Yield sentence-sized text chunks as the LLM generates. + + Chunk boundaries: any char in `CHUNK_DELIMS` AND buffer length + ≥ `CHUNK_MIN_CHARS`. The final buffer is flushed on completion + even without a delimiter. If `cancel` is set, the request is + aborted and the generator returns. + """ + if BACKEND == "ollama": + async for chunk in self._stream_ollama(user_text, system_prompt, cancel): + yield chunk + elif BACKEND == "llama_cpp": + async for chunk in self._stream_llama_cpp(user_text, system_prompt, cancel): + yield chunk + else: + raise RuntimeError(f"unknown llm.backend: {BACKEND!r}") diff --git a/vendor/Sanad/local/script.py b/vendor/Sanad/local/script.py new file mode 100644 index 0000000..76cef27 --- /dev/null +++ b/vendor/Sanad/local/script.py @@ -0,0 +1,259 @@ +"""LocalBrain — fully on-device voice pipeline. + +Implements the same contract as `gemini/script.py:GeminiBrain` so +`voice/sanad_voice.py` can swap it in via `SANAD_VOICE_BRAIN=local`. +Wires together four subsystems: + + Phase 1 — Silero VAD (mic → speech boundaries) + Phase 2 — faster-whisper (speech → text) + Phase 3 — llama.cpp + Qwen (text → streaming text chunks) + Phase 4 — CosyVoice2 streaming (text chunk → cloned-voice audio) + Phase 5 — barge-in (user speaks → cancel LLM + stop speaker) + Phase 6 — stability — model load fails cleanly, crashes are logged. + +Async structure: + run() is the main coroutine. It spawns three tasks: + _mic_task — reads mic, VAD, Whisper, pushes user text to _llm_queue + _dialogue_task — pops user text, streams LLM tokens into _tts_queue + _tts_task — pops text chunks, synthesises, feeds the speaker + +Logging contract (matched by local/subprocess.py._track_line): + "connecting to local pipeline" + "listening" + "USER: " + "BOT: " + "BARGE-IN (local)" + "session error: " +""" + +from __future__ import annotations + +import asyncio +import time +from typing import Optional + +from Project.Sanad.core.config_loader import section as _cfg_section +from Project.Sanad.core.logger import get_logger + +from Project.Sanad.local.llm import LlamaServer +from Project.Sanad.local.stt import WhisperSTT +from Project.Sanad.local.tts import CosyVoiceTTS +from Project.Sanad.local.vad import SileroVAD, FRAME_SAMPLES + +log = get_logger("local_brain") + +_CFG_SV = _cfg_section("voice", "sanad_voice") +_CHUNK_BYTES = FRAME_SAMPLES * 2 # int16 mono + + +class LocalBrain: + """Fully on-device Gemini replacement.""" + + def __init__(self, audio_io, recorder, voice_name: Optional[str] = None, + system_prompt: str = ""): + self._audio = audio_io + self._mic = audio_io.mic + self._speaker = audio_io.speaker + self._recorder = recorder + self._voice = voice_name + self._system_prompt = system_prompt + + # subsystems — instantiated here, loaded in run() + self._vad = SileroVAD() + self._stt = WhisperSTT() + self._llm = LlamaServer() + self._tts = CosyVoiceTTS() + + # pipeline queues + self._llm_queue: asyncio.Queue[str] = asyncio.Queue(maxsize=4) + self._tts_queue: asyncio.Queue[str] = asyncio.Queue(maxsize=4) + + # control flags + self._stop_flag = asyncio.Event() # full shutdown + self._interrupt = asyncio.Event() # per-turn barge-in + self._speaking = False + self._speak_start_time = 0.0 + + # ─── lifecycle ──────────────────────────────────────── + + def stop(self) -> None: + self._stop_flag.set() + self._interrupt.set() + + async def run(self) -> None: + """Main entry. Loads models, runs pipeline, handles shutdown.""" + log.info("connecting to local pipeline") + try: + await asyncio.to_thread(self._vad.start) + await asyncio.to_thread(self._stt.start) + await asyncio.to_thread(self._llm.start) + await asyncio.to_thread(self._tts.start) + except Exception as exc: + log.error("session error: local pipeline startup failed — %s", exc) + return + + log.info("listening") + try: + await asyncio.gather( + self._mic_task(), + self._dialogue_task(), + self._tts_task(), + ) + except asyncio.CancelledError: + log.info("cancelled — stopping") + except Exception as exc: + log.error("session error: %s", exc) + finally: + try: + self._llm.stop() + except Exception: + log.warning("LlamaServer.stop failed", exc_info=True) + self._tts.stop() + self._stt.stop() + self._vad.stop() + log.info("local pipeline stopped") + + # ─── barge-in ───────────────────────────────────────── + + def _begin_barge_in(self) -> None: + """Called from mic task when user starts speaking while bot is.""" + if not self._speaking: + return + log.info("BARGE-IN (local)") + self._interrupt.set() + try: + self._speaker.stop() + except Exception: + log.warning("speaker.stop during barge-in failed", exc_info=True) + # drain pipelines — discard any pending LLM/TTS chunks for this turn + self._drain_queue(self._llm_queue) + self._drain_queue(self._tts_queue) + self._speaking = False + try: + self._recorder.finish_turn() + except Exception: + pass + + @staticmethod + def _drain_queue(q: asyncio.Queue) -> None: + try: + while True: + q.get_nowait() + q.task_done() + except asyncio.QueueEmpty: + pass + + # ─── Task 1: mic → VAD → Whisper → LLM queue ────────── + + async def _mic_task(self) -> None: + loop = asyncio.get_event_loop() + while not self._stop_flag.is_set(): + try: + pcm = await loop.run_in_executor( + None, self._mic.read_chunk, _CHUNK_BYTES, + ) + except Exception: + await asyncio.sleep(0.01) + continue + + event = self._vad.process(pcm) + if event == "speech_start": + # user started talking — if bot is speaking, it's a barge-in + if self._speaking: + self._begin_barge_in() + elif event == "speech_end": + utt = self._vad.collected_audio() + if not utt: + continue + try: + self._recorder.capture_user(utt) + except Exception: + pass + text = await loop.run_in_executor(None, self._stt.transcribe, utt) + if not text: + continue + log.info("USER: %s", text) + try: + self._recorder.add_user_text(text) + except Exception: + pass + # wake the LLM side — drop older pending item if full (latency > throughput) + if self._llm_queue.full(): + try: + self._llm_queue.get_nowait() + except asyncio.QueueEmpty: + pass + await self._llm_queue.put(text) + + # ─── Task 2: LLM streaming → TTS queue ──────────────── + + async def _dialogue_task(self) -> None: + while not self._stop_flag.is_set(): + try: + user_text = await asyncio.wait_for( + self._llm_queue.get(), timeout=0.2) + except asyncio.TimeoutError: + continue + self._interrupt.clear() + full_response = [] + async for chunk in self._llm.stream( + user_text, self._system_prompt, self._interrupt): + if self._interrupt.is_set(): + break + full_response.append(chunk) + await self._tts_queue.put(chunk) + self._llm_queue.task_done() + if full_response and not self._interrupt.is_set(): + bot_text = " ".join(full_response).strip() + if bot_text: + log.info("BOT: %s", bot_text) + try: + self._recorder.add_robot_text(bot_text) + except Exception: + pass + + # ─── Task 3: TTS → speaker ──────────────────────────── + + async def _tts_task(self) -> None: + loop = asyncio.get_event_loop() + while not self._stop_flag.is_set(): + try: + chunk_text = await asyncio.wait_for( + self._tts_queue.get(), timeout=0.2) + except asyncio.TimeoutError: + # idle — if we've been speaking and queue drained, close stream + if self._speaking and self._llm_queue.empty() and self._tts_queue.empty(): + await loop.run_in_executor(None, self._speaker.wait_finish) + self._speaking = False + log.info("listening") + try: + self._recorder.finish_turn() + except Exception: + pass + continue + if self._interrupt.is_set(): + self._tts_queue.task_done() + continue + + # synthesise this text chunk → stream to speaker + if not self._speaking: + await loop.run_in_executor(None, self._speaker.begin_stream) + self._speaking = True + self._speak_start_time = time.time() + + try: + for pcm in self._tts.synthesize_stream(chunk_text): + if self._interrupt.is_set(): + break + try: + self._recorder.capture_robot(pcm) + except Exception: + pass + await loop.run_in_executor( + None, self._speaker.send_chunk, + pcm, self._tts.output_rate, + ) + except Exception as exc: + log.warning("TTS chunk failed: %s", exc) + finally: + self._tts_queue.task_done() diff --git a/vendor/Sanad/local/stt.py b/vendor/Sanad/local/stt.py new file mode 100644 index 0000000..9f84fea --- /dev/null +++ b/vendor/Sanad/local/stt.py @@ -0,0 +1,96 @@ +"""faster-whisper Large V3 Turbo — GPU INT8 transcription. + +Phase 2 of the local pipeline. Given an utterance (int16 PCM bytes at +16 kHz), returns transcribed text. Short / empty / no-speech results are +filtered out per config thresholds to avoid firing phantom triggers. + +Install (on the robot, in the `local` env): + pip install faster-whisper==1.0.* + # model auto-downloads from HuggingFace on first `WhisperModel(...)` call, + # OR pre-download to model/local/faster-whisper-large-v3-turbo/ and point + # `local.stt.model_subdir` at it. +""" + +from __future__ import annotations + +from typing import Optional + +import numpy as np + +from Project.Sanad.config import MODEL_DIR +from Project.Sanad.core.config_loader import section as _cfg_section +from Project.Sanad.core.logger import get_logger + +log = get_logger("local_stt") +_CFG = _cfg_section("local", "stt") + +MODEL_NAME = _CFG.get("model_name", "large-v3-turbo") +MODEL_SUBDIR = _CFG.get("model_subdir", "faster-whisper-large-v3-turbo") +DEVICE = _CFG.get("device", "cuda") +COMPUTE_TYPE = _CFG.get("compute_type", "int8_float16") +BEAM_SIZE = _CFG.get("beam_size", 1) +LANGUAGE = _CFG.get("language") # None = auto-detect +VAD_FILTER = _CFG.get("vad_filter", False) +NO_SPEECH_THRESHOLD = _CFG.get("no_speech_threshold", 0.6) +MIN_CHARS = _CFG.get("min_utterance_chars", 2) +TEMPERATURE = _CFG.get("temperature", 0.0) + +LOCAL_MODEL_DIR = MODEL_DIR / "local" / MODEL_SUBDIR + + +class WhisperSTT: + """Thin wrapper around faster_whisper.WhisperModel.""" + + def __init__(self) -> None: + self._model = None + + def start(self) -> None: + """Load the model into VRAM. ~4 s on first call, 100 ms after.""" + try: + from faster_whisper import WhisperModel + except ImportError as exc: + raise RuntimeError( + f"WhisperSTT requires 'faster-whisper': {exc}" + ) + + model_src = str(LOCAL_MODEL_DIR) if LOCAL_MODEL_DIR.exists() else MODEL_NAME + log.info("loading Whisper: src=%s device=%s compute=%s", + model_src, DEVICE, COMPUTE_TYPE) + self._model = WhisperModel( + model_src, + device=DEVICE, + compute_type=COMPUTE_TYPE, + ) + log.info("WhisperSTT ready") + + def transcribe(self, pcm: bytes) -> str: + """Blocking transcription. Returns the full text or ''.""" + if self._model is None: + log.warning("WhisperSTT.transcribe called before start()") + return "" + if not pcm: + return "" + audio = np.frombuffer(pcm, dtype=np.int16).astype(np.float32) / 32768.0 + if audio.size == 0: + return "" + try: + segments, info = self._model.transcribe( + audio, + beam_size=BEAM_SIZE, + language=LANGUAGE, + vad_filter=VAD_FILTER, + no_speech_threshold=NO_SPEECH_THRESHOLD, + temperature=TEMPERATURE, + ) + text = " ".join(seg.text.strip() for seg in segments).strip() + except Exception as exc: + log.warning("Whisper transcribe failed: %s", exc) + return "" + + if len(text) < MIN_CHARS: + log.debug("drop short transcript: %r", text) + return "" + return text + + def stop(self) -> None: + self._model = None diff --git a/vendor/Sanad/local/subprocess.py b/vendor/Sanad/local/subprocess.py new file mode 100644 index 0000000..e43701d --- /dev/null +++ b/vendor/Sanad/local/subprocess.py @@ -0,0 +1,261 @@ +"""Local live subprocess supervisor. + +Spawns `voice/sanad_voice.py` as a managed child with +`SANAD_VOICE_BRAIN=local`, tails the child's stdout, and extracts state +transitions + user transcripts from the log markers emitted by +`local/script.py:LocalBrain`. + +Mirror of `gemini/subprocess.py`. Lives separately so the two supervisors +stay decoupled — adding a new model does not touch this file. +""" + +from __future__ import annotations + +import os +import signal +import subprocess +import sys +import threading +from collections import deque +from datetime import datetime +from pathlib import Path +from typing import Any + +from Project.Sanad.config import BASE_DIR, LOGS_DIR, SCRIPTS_DIR, LIVE_TUNE +from Project.Sanad.core.config_loader import section as _cfg_section +from Project.Sanad.core.logger import get_logger + +log = get_logger("local_subprocess") + +_LS_CFG = _cfg_section("local", "subprocess") + + +def _resolve_live_script() -> Path: + """Locate the voice script to run as subprocess (same as Gemini's).""" + override = os.environ.get("SANAD_LIVE_SCRIPT", "").strip() + if override: + p = Path(override).expanduser() + if p.exists(): + return p + for c in (BASE_DIR / "voice" / "sanad_voice.py", + SCRIPTS_DIR / "sanad_voice.py"): + if c.exists(): + return c + return SCRIPTS_DIR / "sanad_voice.py" + + +LIVE_SCRIPT = _resolve_live_script() +LOG_TAIL_SIZE = _LS_CFG.get("log_tail_size", 2000) +TRANSCRIPT_TAIL_SIZE = _LS_CFG.get("transcript_tail_size", 30) +LIVE_LOG_DIR = LOGS_DIR +LIVE_LOG_NAME = _LS_CFG.get("log_name", "local_subprocess") + +# Python binary for the child process. The local pipeline runs in a +# separate conda env (Python 3.8 + Jetson CUDA torch + CosyVoice/Whisper); +# the dashboard stays in gemini_sdk (Python 3.10). Override with +# SANAD_LOCAL_PYTHON env var at runtime. +LOCAL_PYTHON_BIN = os.environ.get( + "SANAD_LOCAL_PYTHON", + _LS_CFG.get("python_bin", sys.executable), +) + +_STOP_TIMEOUT_SEC = _LS_CFG.get("stop_timeout_sec", 5.0) +_TERMINATE_TIMEOUT_SEC = _LS_CFG.get("terminate_timeout_sec", 3.0) + +_NOISY_PREFIXES = tuple(_LS_CFG.get("noisy_prefixes", [ + "ALSA lib ", "Expression 'alsa_", "Cannot connect to server socket", + "jack server is not running", +])) +_NOISY_FRAGMENTS = tuple(_LS_CFG.get("noisy_fragments", [ + "Unknown PCM", "Evaluate error", "snd_pcm_open_noupdate", "PaAlsaStream", +])) + + +class LocalSubprocess: + def __init__(self): + self._lock = threading.Lock() + self.process: subprocess.Popen | None = None + self.log_tail: deque[str] = deque(maxlen=LOG_TAIL_SIZE) + self.user_transcript: deque[str] = deque(maxlen=TRANSCRIPT_TAIL_SIZE) + self._reader_thread: threading.Thread | None = None + self._log_file = None + self.state = "stopped" + self.state_message = "Idle." + self.last_user_text = "" + self.suppressed_noise = 0 + + # ─── log I/O ────────────────────────────────────────── + + def _open_session_log(self, pid: int): + try: + LIVE_LOG_DIR.mkdir(parents=True, exist_ok=True) + fname = f"{LIVE_LOG_NAME}_{datetime.now().strftime('%Y%m%d')}.log" + fh = open(LIVE_LOG_DIR / fname, "a", encoding="utf-8", buffering=1) + fh.write( + f"\n===== local subprocess start " + f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} pid={pid} =====\n" + ) + return fh + except Exception as exc: + log.warning("Could not open local subprocess log file: %s", exc) + return None + + def _is_noisy(self, line: str) -> bool: + return line.startswith(_NOISY_PREFIXES) or any(f in line for f in _NOISY_FRAGMENTS) + + def _set_state(self, state: str, msg: str): + self.state = state + self.state_message = msg + + def _track_line(self, line: str): + """Parse log markers emitted by `local/script.py:LocalBrain`. + + Must stay in lock-step with the `log.info(...)` strings there. + """ + if "connecting to local pipeline" in line: + self._set_state("connecting", line) + elif " USER: " in line or line.strip().startswith("USER:"): + text = line.split("USER:", 1)[1].strip() + if text: + self.last_user_text = text + self.user_transcript.append(text) + self._set_state("hearing", f"User: {text}") + elif " BOT: " in line or line.strip().startswith("BOT:"): + self._set_state("speaking", line.split("BOT:", 1)[1].strip()[:80]) + elif "BARGE-IN (local)" in line: + self._set_state("interrupting", line) + elif "session error" in line: + self._set_state("error", line) + elif "local pipeline stopped" in line or "cancelled — stopping" in line: + self._set_state("stopped", line) + elif "listening" in line.lower() and "no speech" not in line: + self._set_state("listening", "Listening for speech.") + + def _reader_loop(self): + proc = self.process + if proc is None or proc.stdout is None: + return + fh = self._open_session_log(proc.pid) + self._log_file = fh + for line in proc.stdout: + clean = line.rstrip() + if not clean: + continue + if fh is not None: + try: + fh.write(clean + "\n") + except Exception: + pass + with self._lock: + if self._is_noisy(clean): + self.suppressed_noise += 1 + continue + self.log_tail.append(clean) + self._track_line(clean) + with self._lock: + self.log_tail.append("Local pipeline process exited.") + self._set_state("stopped", "Process exited.") + if fh is not None: + try: + fh.write( + f"===== local subprocess exit " + f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} =====\n" + ) + fh.close() + except Exception: + pass + self._log_file = None + + # ─── lifecycle ──────────────────────────────────────── + + def is_running(self) -> bool: + with self._lock: + return self.process is not None and self.process.poll() is None + + def start(self) -> dict[str, Any]: + with self._lock: + if self.process is not None and self.process.poll() is None: + return {"started": False, "message": "Already running.", "pid": self.process.pid} + self._set_state("starting", "Starting local pipeline (loading models)...") + + script = LIVE_SCRIPT + if not script.exists(): + raise RuntimeError(f"Script not found: {script}") + + env = os.environ.copy() + env.update({ + "PYTHONUNBUFFERED": "1", + **LIVE_TUNE, + "SANAD_VOICE_BRAIN": "local", + }) + + dds_iface = env.get("SANAD_DDS_INTERFACE", "eth0") + # Use the `local` env's Python so CUDA torch + CosyVoice are available. + # Fall back to sys.executable only if the configured bin doesn't exist. + py_bin = LOCAL_PYTHON_BIN + if not Path(py_bin).exists(): + log.warning("LOCAL_PYTHON_BIN=%s not found, falling back to %s", + py_bin, sys.executable) + py_bin = sys.executable + cmd = [py_bin, str(script), dds_iface] + proc = subprocess.Popen( + cmd, + cwd=str(script.parent), + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + bufsize=1, + env=env, + ) + + with self._lock: + self.process = proc + self.log_tail.append(f"Started: pid={proc.pid}") + self._set_state("starting", f"pid={proc.pid}") + self._reader_thread = threading.Thread(target=self._reader_loop, daemon=True) + self._reader_thread.start() + + log.info("Local subprocess started: pid=%d", proc.pid) + return {"started": True, "pid": proc.pid} + + def stop(self) -> dict[str, Any]: + with self._lock: + proc = self.process + if proc is None or proc.poll() is not None: + return {"stopped": False, "message": "Not running."} + self._set_state("stopping", "Stopping...") + + try: + proc.send_signal(signal.SIGINT) + proc.wait(timeout=_STOP_TIMEOUT_SEC) + except subprocess.TimeoutExpired: + proc.terminate() + try: + proc.wait(timeout=_TERMINATE_TIMEOUT_SEC) + except subprocess.TimeoutExpired: + proc.kill() + proc.wait(timeout=_TERMINATE_TIMEOUT_SEC) + + rc = proc.returncode + + with self._lock: + self.process = None + self.log_tail.append("Stopped.") + self._set_state("stopped", "Stopped.") + + log.info("Local subprocess stopped (rc=%s)", rc) + return {"stopped": True, "returncode": rc} + + def status(self) -> dict[str, Any]: + with self._lock: + running = self.process is not None and self.process.poll() is None + return { + "running": running, + "pid": self.process.pid if running and self.process else None, + "state": self.state, + "state_message": self.state_message, + "last_user_text": self.last_user_text, + "user_transcript": list(self.user_transcript), + "log_tail": list(self.log_tail), + "suppressed_noise": self.suppressed_noise, + } diff --git a/vendor/Sanad/local/tts.py b/vendor/Sanad/local/tts.py new file mode 100644 index 0000000..d80a1cf --- /dev/null +++ b/vendor/Sanad/local/tts.py @@ -0,0 +1,126 @@ +"""CosyVoice2 0.5B streaming TTS — GPU. + +Phase 4 of the local pipeline. Holds a 3-second reference WAV in VRAM +and synthesises streaming Arabic/English audio for every text chunk +arriving from the LLM. Emits int16 PCM at the model's native rate +(CosyVoice2 outputs 22 050 Hz — we resample to `sample_rate` from +config so the downstream `audio_io.speaker` gets a consistent rate). + +Install (on the robot): + cd ~/src + git clone --recursive https://github.com/FunAudioLLM/CosyVoice + cd CosyVoice + pip install -r requirements.txt + pip install -e . + + # model + reference voice + huggingface-cli download FunAudioLLM/CosyVoice2-0.5B \\ + --local-dir ~/sanad/model/local/CosyVoice2-0.5B + # place a 3-s Khaleeji clip at model/local/khaleeji_reference_3s.wav + # (16 kHz mono int16 WAV) + +API note: + CosyVoice2 is evolving. We use the published `inference_zero_shot` + with `stream=True` which yields `{"tts_speech": tensor}` chunks. + If the upstream API renames, adapt in one place — `TtsEngine._stream`. +""" + +from __future__ import annotations + +from pathlib import Path +from typing import AsyncIterator, Iterator, Optional + +import numpy as np + +from Project.Sanad.config import MODEL_DIR +from Project.Sanad.core.config_loader import section as _cfg_section +from Project.Sanad.core.logger import get_logger + +log = get_logger("local_tts") +_CFG = _cfg_section("local", "tts") + +MODEL_SUBDIR = _CFG.get("model_subdir", "CosyVoice2-0.5B") +REFERENCE_WAV_SUBDIR = _CFG.get("reference_wav_subdir", "khaleeji_reference_3s.wav") +REFERENCE_PROMPT = _CFG.get("reference_prompt", "") +OUT_RATE = int(_CFG.get("sample_rate", 16000)) +QUEUE_MAX = int(_CFG.get("queue_max", 3)) +DEVICE = _CFG.get("device", "cuda") + +LOCAL_MODEL_DIR = MODEL_DIR / "local" / MODEL_SUBDIR +REFERENCE_WAV_PATH = MODEL_DIR / "local" / REFERENCE_WAV_SUBDIR + + +def _resample_int16(pcm: np.ndarray, src_rate: int, dst_rate: int) -> np.ndarray: + if src_rate == dst_rate or pcm.size == 0: + return pcm.astype(np.int16, copy=False) + target_len = max(1, int(len(pcm) * dst_rate / src_rate)) + return np.interp( + np.linspace(0, len(pcm), target_len, endpoint=False), + np.arange(len(pcm)), + pcm.astype(np.float64), + ).astype(np.int16) + + +class CosyVoiceTTS: + """Thin async wrapper around CosyVoice2 streaming inference.""" + + def __init__(self) -> None: + self._model = None + self._ref_speech = None # preloaded reference tensor + self._ref_prompt = REFERENCE_PROMPT + self._model_rate: int = 22050 + + def start(self) -> None: + try: + from cosyvoice.cli.cosyvoice import CosyVoice2 + from cosyvoice.utils.file_utils import load_wav + except ImportError as exc: + raise RuntimeError( + f"CosyVoiceTTS requires the CosyVoice package from source: {exc}" + ) + if not LOCAL_MODEL_DIR.exists(): + raise RuntimeError(f"CosyVoice2 model not found at {LOCAL_MODEL_DIR}") + if not REFERENCE_WAV_PATH.exists(): + raise RuntimeError( + f"Reference voice WAV not found at {REFERENCE_WAV_PATH}" + ) + log.info("loading CosyVoice2: %s", LOCAL_MODEL_DIR) + self._model = CosyVoice2(str(LOCAL_MODEL_DIR), load_jit=True, fp16=True) + # model.sample_rate is an instance attr on CosyVoice2 + self._model_rate = getattr(self._model, "sample_rate", 22050) + self._ref_speech = load_wav(str(REFERENCE_WAV_PATH), 16000) + log.info("CosyVoiceTTS ready (model_rate=%d)", self._model_rate) + + def synthesize_stream(self, text: str) -> Iterator[bytes]: + """Yield int16 PCM bytes at OUT_RATE, one streaming chunk at a time.""" + if self._model is None or self._ref_speech is None: + return + try: + # CosyVoice2 streaming generator. Each step yields a tensor + # of float32 waveform samples at the model's native rate. + for step in self._model.inference_zero_shot( + text, + self._ref_prompt, + self._ref_speech, + stream=True): + wave = step.get("tts_speech") + if wave is None: + continue + # tensor → float32 numpy → int16 at OUT_RATE + arr = wave.cpu().numpy().squeeze() + if arr.size == 0: + continue + pcm_i16 = np.clip(arr * 32767.0, -32768, 32767).astype(np.int16) + if self._model_rate != OUT_RATE: + pcm_i16 = _resample_int16(pcm_i16, self._model_rate, OUT_RATE) + yield pcm_i16.tobytes() + except Exception as exc: + log.warning("TTS synth failed for chunk %r: %s", text[:40], exc) + + def stop(self) -> None: + self._model = None + self._ref_speech = None + + @property + def output_rate(self) -> int: + return OUT_RATE diff --git a/vendor/Sanad/local/vad.py b/vendor/Sanad/local/vad.py new file mode 100644 index 0000000..12a1541 --- /dev/null +++ b/vendor/Sanad/local/vad.py @@ -0,0 +1,150 @@ +"""Silero VAD wrapper — CPU-only speech boundary detection. + +Phase 1 of the local pipeline. Consumes 16 kHz mono int16 PCM in short +frames, emits speech_start / speech_end events. All thresholds + frame +sizes come from config/local_config.json > vad. + +Install (on the robot): + pip install silero-vad torch==2.2.* torchaudio==2.2.* + +Usage: + vad = SileroVAD() + vad.start() + evt = vad.process(pcm_bytes) + if evt == 'speech_start': ... + elif evt == 'speech_end': buf = vad.collected_audio() +""" + +from __future__ import annotations + +import time +from typing import Optional + +import numpy as np + +from Project.Sanad.core.config_loader import section as _cfg_section +from Project.Sanad.core.logger import get_logger + +log = get_logger("local_vad") +_CFG = _cfg_section("local", "vad") + +SAMPLE_RATE = _CFG.get("sample_rate", 16000) +FRAME_MS = _CFG.get("frame_ms", 32) +THRESHOLD = _CFG.get("threshold", 0.55) +MIN_SILENCE_MS = _CFG.get("min_silence_ms", 400) +MIN_SPEECH_MS = _CFG.get("min_speech_ms", 250) +PAD_START_MS = _CFG.get("pad_start_ms", 200) +PAD_END_MS = _CFG.get("pad_end_ms", 200) + +FRAME_SAMPLES = SAMPLE_RATE * FRAME_MS // 1000 # 512 @ 16k/32ms + + +class SileroVAD: + """Streaming VAD with buffered utterance capture. + + Fed one mic frame at a time via `process()`. Internal state tracks + whether we're inside an utterance; on speech_end, `collected_audio()` + returns the full utterance (with configured padding). + """ + + def __init__(self) -> None: + self._model = None + self._audio_buf: list[bytes] = [] # utterance being collected + self._pre_buf: list[bytes] = [] # rolling "pre-speech" ring + self._pre_frames = max(1, PAD_START_MS // FRAME_MS) + self._pad_end_frames = max(1, PAD_END_MS // FRAME_MS) + self._in_speech = False + self._last_speech_time = 0.0 + self._speech_start_time = 0.0 + self._trailing_silence_frames = 0 + self._last_utterance: Optional[bytes] = None + + def start(self) -> None: + """Load the Silero model once. Call before `process()`.""" + try: + import torch + from silero_vad import load_silero_vad + except ImportError as exc: + raise RuntimeError( + f"SileroVAD requires 'silero-vad' + torch: {exc}" + ) + self._model = load_silero_vad() + log.info("SileroVAD ready (threshold=%.2f, frame=%dms)", + THRESHOLD, FRAME_MS) + + def process(self, pcm: bytes) -> Optional[str]: + """Feed one frame (≈ FRAME_MS of audio). Returns an event or None. + + Events: 'speech_start' | 'speech_end' | None + """ + if self._model is None: + return None + # keep a rolling pre-buffer so captured utterances include lead-in + self._pre_buf.append(pcm) + if len(self._pre_buf) > self._pre_frames: + self._pre_buf.pop(0) + + # VAD expects float32 in [-1, 1] + arr = np.frombuffer(pcm, dtype=np.int16).astype(np.float32) / 32768.0 + if arr.size < FRAME_SAMPLES: + # pad if short tail chunk arrived + arr = np.concatenate([arr, np.zeros(FRAME_SAMPLES - arr.size, dtype=np.float32)]) + elif arr.size > FRAME_SAMPLES: + arr = arr[:FRAME_SAMPLES] + + try: + import torch + with torch.no_grad(): + prob = float(self._model(torch.from_numpy(arr), SAMPLE_RATE).item()) + except Exception as exc: + log.warning("VAD inference failed: %s", exc) + return None + + now = time.time() + is_speech = prob >= THRESHOLD + + if is_speech: + self._trailing_silence_frames = 0 + self._last_speech_time = now + if not self._in_speech: + # transition → speech + self._in_speech = True + self._speech_start_time = now + self._audio_buf = list(self._pre_buf) # seed with pad + self._audio_buf.append(pcm) + return "speech_start" + self._audio_buf.append(pcm) + return None + + # silent frame + if self._in_speech: + self._audio_buf.append(pcm) # collect trailing pad + self._trailing_silence_frames += 1 + silence_ms = self._trailing_silence_frames * FRAME_MS + if silence_ms >= MIN_SILENCE_MS: + # speech ended — validate min_speech + speech_dur_ms = (now - self._speech_start_time) * 1000 + self._in_speech = False + if speech_dur_ms < MIN_SPEECH_MS: + log.debug("drop short utterance (%.0fms)", speech_dur_ms) + self._audio_buf.clear() + self._last_utterance = None + return None + self._last_utterance = b"".join(self._audio_buf) + self._audio_buf.clear() + return "speech_end" + return None + + def collected_audio(self) -> Optional[bytes]: + """After a speech_end event, return the full utterance bytes.""" + return self._last_utterance + + def reset(self) -> None: + """Drop any in-flight utterance (used on barge-in).""" + self._in_speech = False + self._audio_buf.clear() + self._trailing_silence_frames = 0 + self._last_utterance = None + + def stop(self) -> None: + self._model = None diff --git a/vendor/Sanad/main.py b/vendor/Sanad/main.py new file mode 100644 index 0000000..62c0504 --- /dev/null +++ b/vendor/Sanad/main.py @@ -0,0 +1,817 @@ +#!/usr/bin/env python3 +"""Sanad — unified robot assistant entry point. + +Starts all subsystems and the FastAPI dashboard. + + python main.py # default port 8000 + python main.py --port 8080 # custom port +""" + +from __future__ import annotations + +import argparse +import importlib +import os +import sys +import types +from pathlib import Path + +# ───────────────────────────────────────────────────────────────────────────── +# Layout detection — support BOTH: +# 1. Dev layout: /Project/Sanad/main.py (imports use Project.Sanad.*) +# 2. Deployed layout: /home/unitree/Sanad/main.py (no Project/ wrapper) +# +# In the deployed case we synthesize a `Project` namespace package and alias +# `Project.Sanad` → the local `Sanad` package, so every `from Project.Sanad.X +# import Y` keeps working without rewriting any other file. +# ───────────────────────────────────────────────────────────────────────────── +_THIS_DIR = Path(__file__).resolve().parent # .../Sanad +_PARENT = _THIS_DIR.parent # .../Project OR /home/unitree + +if _PARENT.name == "Project": + # Dev layout — add the directory containing Project/ + _ROOT = _PARENT.parent + if str(_ROOT) not in sys.path: + sys.path.insert(0, str(_ROOT)) + # This codebase imports itself as `Project.Sanad.*`. If this folder is a copy + # under a different name (e.g. Sanadv3), alias Project.Sanad → THIS package so + # it imports its OWN modules, not the sibling Project/Sanad. (The original + # Sanad folder is unaffected — this only triggers for renamed copies.) + if _THIS_DIR.name != "Sanad" and "Project.Sanad" not in sys.modules: + _self_pkg = importlib.import_module(f"Project.{_THIS_DIR.name}") + sys.modules["Project.Sanad"] = _self_pkg + sys.modules["Project"].Sanad = _self_pkg # type: ignore[attr-defined] +else: + # Deployed layout — create a virtual Project package and alias + if str(_PARENT) not in sys.path: + sys.path.insert(0, str(_PARENT)) + if "Project" not in sys.modules: + _proj = types.ModuleType("Project") + _proj.__path__ = [] # mark as namespace package + sys.modules["Project"] = _proj + if "Project.Sanad" not in sys.modules: + # Import the local Sanad package as a top-level module first + _sanad = importlib.import_module(_THIS_DIR.name) + sys.modules["Project.Sanad"] = _sanad + sys.modules["Project"].Sanad = _sanad # type: ignore[attr-defined] + +# When main.py runs as a script (`python3 main.py`), Python loads it as the +# `__main__` module — NOT as `Project.Sanad.main`. Route handlers later do +# `from Project.Sanad.main import arm` etc; without the alias below, Python +# would re-execute this file from scratch under a different module name, +# creating a SECOND set of subsystem instances (uninitialised). Every +# `subsystem not available` / `No LowState` symptom traces back to this. +# The alias ensures both names point at the exact same module object. +if __name__ == "__main__": + sys.modules["Project.Sanad.main"] = sys.modules["__main__"] + +# asyncio compat shim — backfills asyncio.to_thread for Python 3.8. +# MUST be imported before any other Sanad module that uses asyncio.to_thread. +from Project.Sanad.core import asyncio_compat # noqa: F401 + +from Project.Sanad.config import ( + DASHBOARD_HOST, + DASHBOARD_PORT, + DASHBOARD_INTERFACE, + DDS_NETWORK_INTERFACE, +) +from Project.Sanad.core.logger import get_logger + +log = get_logger("main") + + +def _safe_import(label: str, importer): + """Import a module by callable, returning None if it fails.""" + try: + return importer() + except Exception: + log.exception("Failed to import %s — that subsystem will be unavailable", label) + return None + + +def _safe_construct(name: str, factory): + """Construct a subsystem, log + return None on failure.""" + if factory is None: + return None + try: + return factory() + except Exception: + log.exception("Failed to construct %s — that subsystem will be unavailable", name) + return None + + +# ── isolated imports — one bad module never blocks the others ── +Brain = _safe_import("Brain", lambda: __import__("Project.Sanad.core.brain", fromlist=["Brain"]).Brain) +ArmController = _safe_import("ArmController", lambda: __import__("Project.Sanad.motion.arm_controller", fromlist=["ArmController"]).ArmController) +MacroPlayer = _safe_import("MacroPlayer", lambda: __import__("Project.Sanad.motion.macro_player", fromlist=["MacroPlayer"]).MacroPlayer) +MacroRecorder = _safe_import("MacroRecorder", lambda: __import__("Project.Sanad.motion.macro_recorder", fromlist=["MacroRecorder"]).MacroRecorder) +TeachingSession = _safe_import("TeachingSession", lambda: __import__("Project.Sanad.motion.teaching", fromlist=["TeachingSession"]).TeachingSession) +AudioManager = _safe_import("AudioManager", lambda: __import__("Project.Sanad.voice.audio_manager", fromlist=["AudioManager"]).AudioManager) +LocalTTSEngine = _safe_import("LocalTTSEngine", lambda: __import__("Project.Sanad.voice.local_tts", fromlist=["LocalTTSEngine"]).LocalTTSEngine) +WakePhraseManager = _safe_import("WakePhraseManager", lambda: __import__("Project.Sanad.voice.wake_phrase_manager", fromlist=["WakePhraseManager"]).WakePhraseManager) +LiveVoiceLoop = _safe_import("LiveVoiceLoop", lambda: __import__("Project.Sanad.voice.live_voice_loop", fromlist=["LiveVoiceLoop"]).LiveVoiceLoop) +TypedReplayEngine = _safe_import("TypedReplayEngine", lambda: __import__("Project.Sanad.voice.typed_replay", fromlist=["TypedReplayEngine"]).TypedReplayEngine) +GeminiVoiceClient = _safe_import("GeminiVoiceClient", lambda: __import__("Project.Sanad.gemini.client", fromlist=["GeminiVoiceClient"]).GeminiVoiceClient) +GeminiSubprocess = _safe_import("GeminiSubprocess", lambda: __import__("Project.Sanad.gemini.subprocess", fromlist=["GeminiSubprocess"]).GeminiSubprocess) +LocalSubprocess = _safe_import("LocalSubprocess", lambda: __import__("Project.Sanad.local.subprocess", fromlist=["LocalSubprocess"]).LocalSubprocess) +CameraDaemon = _safe_import("CameraDaemon", lambda: __import__("Project.Sanad.vision.camera", fromlist=["CameraDaemon"]).CameraDaemon) +FaceGallery = _safe_import("FaceGallery", lambda: __import__("Project.Sanad.vision.face_gallery", fromlist=["FaceGallery"]).FaceGallery) +ZoneGallery = _safe_import("ZoneGallery", lambda: __import__("Project.Sanad.vision.zone_gallery", fromlist=["ZoneGallery"]).ZoneGallery) +LocoController = _safe_import("LocoController", lambda: __import__("Project.Sanad.G1_Controller.loco_controller", fromlist=["LocoController"]).LocoController) +MovementDispatcher = _safe_import("MovementDispatcher", lambda: __import__("Project.Sanad.voice.movement_dispatch", fromlist=["MovementDispatcher"]).MovementDispatcher) +FaceController = _safe_import("FaceController", lambda: __import__("Project.Sanad.face.mask_face", fromlist=["FaceController"]).FaceController) +WebNav3Client = _safe_import("WebNav3Client", lambda: __import__("Project.Sanad.navigation", fromlist=["WebNav3Client"]).WebNav3Client) + + +# ── global instances (imported by route modules) ── + +brain = _safe_construct("brain", Brain) if Brain else None +arm = _safe_construct("arm", ArmController) +audio_mgr = _safe_construct("audio_mgr", AudioManager) +# The voice_client speaks TYPED text (typed-replay + /api/voice/generate), so it +# uses the multilingual verbatim TTS prompt — NOT the Khaleeji persona, which +# forced every language to Arabic. (The live conversation uses live_sub, not +# this client; live_voice only reads its connection flag.) +def _build_voice_client(): + from Project.Sanad.gemini.client import TTS_SYSTEM_PROMPT + return GeminiVoiceClient(system_prompt=TTS_SYSTEM_PROMPT) +voice_client = _safe_construct("voice_client", _build_voice_client if GeminiVoiceClient else None) +local_tts = _safe_construct("local_tts", LocalTTSEngine) +wake_mgr = _safe_construct("wake_mgr", WakePhraseManager) +macro_rec = _safe_construct("macro_rec", (lambda: MacroRecorder(arm)) if (MacroRecorder and arm) else None) +macro_play = _safe_construct("macro_play", (lambda: MacroPlayer(audio_mgr, arm)) if (MacroPlayer and arm) else None) +teacher = _safe_construct("teacher", (lambda: TeachingSession(arm)) if (TeachingSession and arm) else None) +live_voice = _safe_construct("live_voice", (lambda: LiveVoiceLoop(voice_client, arm, wake_mgr, audio_mgr)) if (LiveVoiceLoop and voice_client and arm and wake_mgr and audio_mgr) else None) +# Which voice supervisor to mount. SANAD_VOICE_BRAIN chooses the brain +# that runs INSIDE the subprocess (see voice/sanad_voice.py); the same +# env var picks WHICH supervisor here manages that subprocess so its +# log-line parser matches the brain's emit format. +_brain_choice = os.environ.get("SANAD_VOICE_BRAIN", "gemini").strip().lower() +if _brain_choice == "local" and LocalSubprocess is not None: + live_sub = _safe_construct("live_sub", LocalSubprocess) +else: + live_sub = _safe_construct("live_sub", GeminiSubprocess) +typed_replay = _safe_construct("typed_replay", (lambda: TypedReplayEngine(voice_client, audio_mgr)) if (TypedReplayEngine and voice_client and audio_mgr) else None) + +# ── LED face mask (Mask project) — BLE animated face, own asyncio loop ─────── +# Constructs idle (no BLE); the dashboard "Mask Face" tab connects on demand. +# Unavailable (None) if the Mask lib / bleak / Pillow aren't importable. +mask_face = _safe_construct("mask_face", FaceController) + +# ── Locomotion controller (N2) — manual dashboard locomotion ──────────────── +# Reuses the arm controller's single ChannelFactoryInitialize (one DDS init per +# process) — it does NOT init DDS itself. Disarmed every boot. See +# G1_Controller/loco_controller.py and dashboard/routes/controller.py. +loco_controller = _safe_construct( + "loco_controller", + (lambda: LocoController(arm)) if (LocoController and arm) else None) + +# Arm ⇄ locomotion mutual exclusion: the arm must NEVER run a replay / SDK +# action / gesture while the robot may be walking. `movement_active` is True for +# the MANUAL gate (armed/teleop) AND for ~1.5s after any move/step — so it also +# covers Phase-3 Gemini-driven moves (which call loco.move/step directly). +# Checked at every arm playback chokepoint (replay_file / _execute), so it blocks +# voice/Gemini-triggered gestures too, not just the dashboard. +if arm is not None and loco_controller is not None: + try: + if hasattr(arm, "set_motion_block"): + arm.set_motion_block(loco_controller.movement_active) + log.info("Arm motion-block wired to locomotion movement_active") + except Exception: + log.exception("Could not wire arm motion-block") + +# The voice→arm path (live_voice_loop) drives the SEPARATE singleton +# motion.sanad_arm_controller.ARM, not the `arm` instance above. Wire the SAME +# locomotion interlock onto it so a spoken gesture can't move the arms while +# the robot is (or just was) walking — otherwise the motion-block above would +# only cover the dashboard/Gemini-replay path, not voice triggers. +if loco_controller is not None: + try: + from Project.Sanad.motion.sanad_arm_controller import ARM as _sanad_arm + if hasattr(_sanad_arm, "set_motion_block"): + _sanad_arm.set_motion_block(loco_controller.movement_active) + log.info("Voice arm (sanad_arm) motion-block wired to locomotion movement_active") + except Exception: + log.exception("Could not wire sanad_arm motion-block") + +# ── Gemini voice → movement dispatcher (N2 Phase 3) ───────────────────────── +# Reads Gemini's spoken (BOT) transcript via the live supervisor's bot-callback +# and drives loco_controller on a confirmation-phrase match (Marcus pattern). +# Gated on recognition_state.movement_enabled (the "Enable Gemini movement" +# toggle) — SEPARATE from the manual arm flag. Inert until that flag is on. +movement_dispatch = None +if MovementDispatcher and loco_controller is not None: + try: + from Project.Sanad.config import BASE_DIR as _BD2, MOTIONS_DIR as _MD + movement_dispatch = _safe_construct( + "movement_dispatch", + lambda: MovementDispatcher( + loco_controller, + _MD / "instruction.json", + _BD2 / "data" / ".recognition_state.json")) + if movement_dispatch is not None: + movement_dispatch.start() + if live_sub is not None and hasattr(live_sub, "register_bot_callback"): + live_sub.register_bot_callback(movement_dispatch.on_bot_text) + log.info("Movement dispatcher wired to Gemini BOT transcript") + except Exception: + log.exception("Could not wire movement dispatcher") + +# ── Navigation (web_nav3 Nav2 stack) — thin HTTP client ───────────────────── +# Loosely-coupled client to the standalone web_nav3 service (FastAPI :8765 + +# rosbridge :9090). Owns NO ROS2/Nav2 code; if web_nav3 is down the nav routes +# degrade gracefully. The dashboard "Navigation" tab routes (dashboard/routes/ +# navigation.py) build their own module-level client, so this singleton is the +# parent-side handle used by voice/movement wiring and the subsystem report. +# Config precedence (highest first): env var → dashboard config 'navigation' +# section → hardcoded default — same resolution as the navigation route. +def _build_nav_client(): + from Project.Sanad.core.config_loader import section as _cfg_section + nav_cfg = _cfg_section("dashboard", "navigation") + base_url = (os.environ.get("WEB_NAV3_URL") + or nav_cfg.get("web_nav3_url") + or "http://127.0.0.1:8765") + robot = (os.environ.get("SANAD_ROBOT_NAME") + or nav_cfg.get("robot") + or "sanad") + return WebNav3Client(base_url=str(base_url), robot=str(robot)) + +nav_client = _safe_construct("nav_client", _build_nav_client if WebNav3Client else None) + +# ── Recognition (camera + face gallery) ───────────────────────────────────── +# Camera is idle until the dashboard toggles vision on; face gallery is pure +# file IO and always available if the import succeeded. +# +# Config precedence (highest first): explicit env var → config/core_config.json +# section → hardcoded default. The parent process normally has no SANAD_CAMERA_* +# env vars (LIVE_TUNE is only forwarded to the Gemini child), so in practice the +# core_config.json `camera` / `faces` sections are the live source here. +def _build_camera(): + from Project.Sanad.core.config_loader import section as _cfg_section + cam_cfg = _cfg_section("core", "camera") + + def _knob(env_key: str, cfg_key: str, default): + env_val = os.environ.get(env_key) + if env_val is not None and env_val != "": + return type(default)(env_val) + return type(default)(cam_cfg.get(cfg_key, default)) + + # Frames are cached in memory and pushed to the Gemini child over its + # stdin (see GeminiSubprocess._frame_forwarder) — no file drop. + return CameraDaemon( + width=_knob("SANAD_CAMERA_WIDTH", "width", 424), + height=_knob("SANAD_CAMERA_HEIGHT", "height", 240), + fps=_knob("SANAD_CAMERA_FPS", "fps", 15), + jpeg_quality=_knob("SANAD_CAMERA_JPEG_QUALITY", "jpeg_quality", 70), + stale_threshold_s=float(cam_cfg.get("stale_threshold_s", 10.0)), + reconnect_min_s=float(cam_cfg.get("reconnect_min_s", 2.0)), + reconnect_max_s=float(cam_cfg.get("reconnect_max_s", 10.0)), + capture_timeout_ms=int(cam_cfg.get("capture_timeout_ms", 5000)), + ) + +def _build_gallery(): + from Project.Sanad.config import BASE_DIR + from Project.Sanad.core.config_loader import section as _cfg_section + faces_cfg = _cfg_section("core", "faces") + # SANAD_FACES_DIR is set absolute by LIVE_TUNE (the Gemini child reads the + # same var). In the parent it's usually unset → fall back to the JSON's + # dir_rel, then the hardcoded default. Honour absolute paths as-is. + raw = os.environ.get("SANAD_FACES_DIR") or faces_cfg.get("dir_rel", "data/faces") + p = Path(raw) + root = p if p.is_absolute() else (BASE_DIR / raw) + return FaceGallery(root) + +def _build_zone_gallery(): + # N3 — zones gallery (zone → place → linked faces). Honours SANAD_ZONES_DIR + # (absolute) then the core_config 'zones' section dir_rel, then a default. + from Project.Sanad.config import BASE_DIR + from Project.Sanad.core.config_loader import section as _cfg_section + zones_cfg = _cfg_section("core", "zones") + raw = os.environ.get("SANAD_ZONES_DIR") or zones_cfg.get("dir_rel", "data/zones") + p = Path(raw) + root = p if p.is_absolute() else (BASE_DIR / raw) + return ZoneGallery(root) + +camera = _safe_construct("camera", _build_camera if CameraDaemon else None) +gallery = _safe_construct("gallery", _build_gallery if FaceGallery else None) +zone_gallery = _safe_construct("zone_gallery", _build_zone_gallery if ZoneGallery else None) + +# Restore persisted vision_enabled at boot — start camera if the user left +# it on across a reboot. Face-rec state is read by the Gemini child directly. +try: + from Project.Sanad.vision import recognition_state as _recog_state + from Project.Sanad.config import BASE_DIR as _BD + _state = _recog_state.read(_BD / "data" / ".recognition_state.json") + if _state.vision_enabled and camera is not None: + if camera.start(): + log.info("Camera vision restored from state (backend=%s)", camera.backend) + else: + log.warning("Camera vision was ON but no backend available — leaving OFF") + _recog_state.mutate(_BD / "data" / ".recognition_state.json", + vision_enabled=False) +except Exception: + log.exception("Could not restore recognition state") + +# Hand the camera to the Gemini supervisor so it can forward frames to the +# child over stdin while a live session runs. +if live_sub is not None and camera is not None: + try: + if hasattr(live_sub, "attach_camera"): + live_sub.attach_camera(camera) + log.info("Camera attached to live subprocess supervisor") + except Exception: + log.exception("attach_camera failed") + +# Hand the AudioManager to the supervisor so the audio watcher can keep +# PulseAudio defaults aligned with the live profile on every Anker +# plug/unplug. Without this, typed-replay / record playback would stay on +# the boot device even after the live session swapped to Anker. +if live_sub is not None and audio_mgr is not None: + try: + if hasattr(live_sub, "attach_audio_manager"): + live_sub.attach_audio_manager(audio_mgr) + log.info("AudioManager attached to live subprocess supervisor") + except Exception: + log.exception("attach_audio_manager failed") + +# ── Motion-state → Gemini channel ─────────────────────────────────────────── +# The arm controller emits motion.action_started / _done / _error on the bus. +# Forward each to the Gemini child as a 'state:' line so the live session can +# answer "what are you doing?" honestly. Sync handlers, fired via emit_sync +# from the arm's worker thread — send_state just writes to a pipe (cheap). +if live_sub is not None and hasattr(live_sub, "send_state"): + try: + from Project.Sanad.core.event_bus import bus as _bus + + def _on_motion_started(action: str = "", **_kw): + live_sub.send_state("start", action) + + def _on_motion_done(action: str = "", elapsed_sec=None, + failed: bool = False, **_kw): + # action_error already covered the failure case with a reason; + # here just emit complete (skip if it failed to avoid a dup). + if not failed: + live_sub.send_state("complete", action, elapsed_sec=elapsed_sec) + + def _on_motion_error(action: str = "", reason: str = "", **_kw): + live_sub.send_state("error", action, reason=reason) + + _bus.on("motion.action_started", _on_motion_started) + _bus.on("motion.action_done", _on_motion_done) + _bus.on("motion.action_error", _on_motion_error) + log.info("Motion-state → Gemini channel wired") + except Exception: + log.exception("Could not wire motion-state → Gemini channel") + +# Animate the LED face mask while the robot is "speaking". Hooked to the +# gestural-speaking toggle (brain.gestural_speaking_changed); finer per-utterance +# lip-sync from TTS amplitude is a follow-up. Safe no-op until the face is started. +if mask_face is not None: + try: + from Project.Sanad.core.event_bus import bus as _bus_face + + def _on_gestural_speaking(enabled: bool = False, **_kw): + try: + mask_face.set_speaking(bool(enabled)) + if not enabled: + mask_face.set_listening() # back to attentive after a reply + except Exception: + log.exception("mask_face.set_speaking failed") + + _bus_face.on("brain.gestural_speaking_changed", _on_gestural_speaking) + log.info("LED face wired to gestural-speaking events") + except Exception: + log.exception("Could not wire LED face speaking hook") + +# Real lip-sync: route Gemini's per-chunk [[MOUTH:n]] amplitude markers (emitted +# by gemini/script.py, parsed by GeminiSubprocess) to the LED mask's mouth so it +# opens/closes with the actual speech. Fires on the subprocess reader thread; +# FaceController.set_mouth is thread-safe and a safe no-op until the face starts. +if mask_face is not None and live_sub is not None and hasattr(live_sub, "register_mouth_callback"): + try: + def _on_mouth_level(level: int): + if not getattr(mask_face, "_gemini_linked", False): + return # Gemini not linked to the mask -> leave it alone + try: + mask_face.set_mouth(int(level)) + except Exception: + log.exception("mask_face.set_mouth (lip-sync) failed") + + live_sub.register_mouth_callback(_on_mouth_level) + log.info("LED face wired to Gemini lip-sync (MOUTH markers)") + except Exception: + log.exception("Could not wire LED face lip-sync hook") + +# Gemini-driven expressions: [[FACE:name]] markers (from the set_expression tool) +# -> a brief emotion reaction on the mask. Fires on the subprocess reader thread; +# react() is thread-safe and a safe no-op until the face starts. +if mask_face is not None and live_sub is not None and hasattr(live_sub, "register_face_callback"): + try: + # per-emotion hold (seconds): affection/reactions linger a touch longer + _FACE_HOLD = {"heart": 2.6, "love": 2.6, "kiss": 2.4, + "laugh": 2.2, "surprised": 1.8, "confused": 1.8} + + def _on_face_emotion(name: str): + if not getattr(mask_face, "_gemini_linked", False): + return # Gemini not linked to the mask -> ignore emotion markers + try: + mask_face.react(str(name), _FACE_HOLD.get(name, 1.6)) + except Exception: + log.exception("mask_face.react (emotion) failed") + + live_sub.register_face_callback(_on_face_emotion) + log.info("LED face wired to Gemini emotions (FACE markers)") + except Exception: + log.exception("Could not wire LED face emotion hook") + +# Gemini-driven social QR: [[SHOW:account]] markers (from the show_social tool) +# -> render the account's QR + show it on the mask via the shared helper. +if mask_face is not None and live_sub is not None and hasattr(live_sub, "register_social_callback"): + try: + def _on_social(account: str): + if not getattr(mask_face, "_gemini_linked", False): + return # Gemini not linked to the mask -> ignore social markers + # This fires on the subprocess READER THREAD, which must keep draining + # stdout (lip-sync / transcript). show_social_on_mask does a ~9s BLE + # scratch upload — so dispatch it to a daemon thread and return at once. + def _run(acc=str(account)): + try: + from Project.Sanad.dashboard.routes.mask_social import show_social_on_mask + show_social_on_mask(acc) + except Exception: + log.exception("show_social_on_mask failed") + import threading as _th + _th.Thread(target=_run, daemon=True, name="mask-social").start() + + live_sub.register_social_callback(_on_social) + log.info("LED face wired to Gemini social QR (SHOW markers)") + except Exception: + log.exception("Could not wire LED face social hook") + +# Lifelike face behaviour: drive the LED face's state + reactions from bus events +# so it looks alive and engaged during a conversation (attentive while listening, +# looks-away while a reply is prepared, brief smile/sad reactions). All calls are +# safe no-ops until the face is started, and on the basic FaceAnimator fallback. +if mask_face is not None: + try: + from Project.Sanad.core.event_bus import bus as _bus_face2 + + def _face_listening(**_kw): + try: mask_face.set_listening() + except Exception: log.exception("face set_listening failed") + + def _face_thinking(**_kw): + try: mask_face.set_thinking() + except Exception: log.exception("face set_thinking failed") + + def _face_idle(**_kw): + try: mask_face.set_idle() + except Exception: log.exception("face set_idle failed") + + def _face_react(emotion): + def _handler(**_kw): + try: mask_face.react(emotion) + except Exception: log.exception("face react failed") + return _handler + + _bus_face2.on("voice.connected", _face_listening) # session up -> attentive + _bus_face2.on("voice.user_said", _face_thinking) # heard user -> processing + _bus_face2.on("voice.disconnected", _face_idle) + _bus_face2.on("voice.error", _face_react("sad")) + _bus_face2.on("motion.action_error", _face_react("sad")) + _bus_face2.on("skill.finished", _face_react("smile")) # success -> happy + log.info("LED face wired to lifelike state/reaction events") + except Exception: + log.exception("Could not wire LED face lifelike behaviour hooks") + +# Wire everything into the Brain (only what was constructed) +def _safe_attach(method_name: str, value): + if brain is None or value is None: + return + method = getattr(brain, method_name, None) + if method is None: + return + try: + method(value) + except Exception: + log.exception("brain.%s failed", method_name) + + +_safe_attach("attach_voice", voice_client) +_safe_attach("attach_audio_manager", audio_mgr) +_safe_attach("attach_arm", arm) +_safe_attach("attach_macro_recorder", macro_rec) +_safe_attach("attach_macro_player", macro_play) +_safe_attach("attach_live_voice", live_voice) + + +# ── Runtime sanity report ──────────────────────────────────────────────── +SUBSYSTEMS = { + "brain": brain, + "arm": arm, + "audio_mgr": audio_mgr, + "voice_client": voice_client, + "local_tts": local_tts, + "macro_rec": macro_rec, + "macro_play": macro_play, + "teacher": teacher, + "wake_mgr": wake_mgr, + "live_voice": live_voice, + "live_sub": live_sub, + "typed_replay": typed_replay, + "camera": camera, + "gallery": gallery, + "zone_gallery": zone_gallery, + "loco_controller": loco_controller, + "movement_dispatch": movement_dispatch, + "mask_face": mask_face, + "nav_client": nav_client, +} + +# Critical subsystems — if any of these are None, log a warning at startup +CRITICAL_SUBSYSTEMS = ("brain",) + +for _name in CRITICAL_SUBSYSTEMS: + if SUBSYSTEMS.get(_name) is None: + log.error("CRITICAL subsystem '%s' is None — application will be unusable", _name) + +_available = [k for k, v in SUBSYSTEMS.items() if v is not None] +_missing = [k for k, v in SUBSYSTEMS.items() if v is None] +log.info("Subsystems available (%d): %s", len(_available), ", ".join(_available)) +if _missing: + log.warning("Subsystems unavailable (%d): %s", len(_missing), ", ".join(_missing)) + + +_shutting_down = False + + +def _call_with_timeout(label: str, fn, timeout_s: float = 2.0): + """Run a possibly-blocking teardown call on a daemon thread and never + wait more than ``timeout_s`` for it. If it hangs we log and move on — + the daemon thread dies with the process at os._exit / interpreter exit. + """ + import threading + + def _runner(): + try: + fn() + except Exception: + log.exception("%s failed", label) + + t = threading.Thread(target=_runner, name=f"shutdown-{label}", daemon=True) + t.start() + t.join(timeout_s) + if t.is_alive(): + log.warning("%s did not finish within %.1fs — skipping (forced exit)", + label, timeout_s) + + +def _do_shutdown(from_signal: bool = False): + """Clean shutdown — release hardware, stop background tasks. Idempotent. + + Never blocks more than a couple seconds on any single step: the loco + StopMove is run on a watchdog thread (it can re-init / hang DDS during + teardown), and tracked children are stopped early so Ctrl+C kills the + whole tree fast. + """ + global _shutting_down + if _shutting_down: + return + _shutting_down = True + log.info("Shutdown requested") + + # ── Stop tracked child subprocesses FIRST ─────────────────────────── + # The Gemini/local voice supervisor owns a real child OS process (and + # forwards camera/audio to it). Kill it early so on Ctrl+C the child + # tree dies fast even if a later step hangs. + if live_sub is not None: + try: + running = live_sub.is_running() if callable(getattr(live_sub, "is_running", None)) else False + if running: + live_sub.stop() + except Exception: + log.exception("live_sub.stop() failed") + + if camera is not None: + try: + if camera.is_running(): + camera.stop() + except Exception: + log.exception("camera.stop() failed") + + if arm is not None: + try: + if hasattr(arm, "cancel"): + arm.cancel() + except Exception: + log.exception("arm.cancel() failed") + try: + if hasattr(arm, "disable"): + arm.disable() + except Exception: + log.exception("arm.disable() failed") + + if movement_dispatch is not None: + try: + movement_dispatch.stop() + except Exception: + log.exception("movement_dispatch.stop() failed") + + # ── Loco stop — NON-BLOCKING ───────────────────────────────────────── + # loco_controller.shutdown() does StopMove + disarm, but StopMove can + # re-init / block on DDS during interpreter teardown. Only stop if a + # client is actually live, and never wait more than ~2s on it. + if loco_controller is not None: + _loco_has_client = True + try: + # If the controller exposes a "client exists" probe, honour it so + # we never trigger a lazy LocoClient re-init during teardown. + for _attr in ("has_client", "is_armed", "_client"): + if hasattr(loco_controller, _attr): + _probe = getattr(loco_controller, _attr) + _loco_has_client = bool(_probe() if callable(_probe) else _probe) + break + except Exception: + _loco_has_client = True # probe failed — fall back to attempting it + if _loco_has_client: + _call_with_timeout("loco_controller.shutdown()", + loco_controller.shutdown, timeout_s=2.0) + else: + log.info("loco_controller has no live client — skipping StopMove") + + if mask_face is not None: + try: + mask_face.shutdown() # disconnect BLE + stop the face loop + except Exception: + log.exception("mask_face.shutdown() failed") + + if audio_mgr is not None: + try: + if hasattr(audio_mgr, "close"): + audio_mgr.close() + except Exception: + log.exception("audio_mgr.close() failed") + + log.info("Shutdown complete") + + +import atexit # noqa: E402 +atexit.register(_do_shutdown) +# atexit is the fallback path (clean uvicorn return / interpreter exit). +# The PRIMARY Ctrl+C path is the explicit SIGINT/SIGTERM handler installed +# in main() — see _install_signal_handlers(). That handler fully takes over: +# it runs the (idempotent, non-blocking) shutdown and then os._exit(0), so it +# never returns to uvicorn. This avoids the old problem where uvicorn's own +# handler and ours would fight — we just don't hand control back. A single +# SIGINT therefore tears down every child and force-exits within ~2s. + + +def _install_signal_handlers(): + """Take over SIGINT/SIGTERM so one Ctrl+C kills EVERYTHING fast. + + We do NOT chain to uvicorn's handler: we stop tracked children + do a + non-blocking loco stop, then os._exit(0) so the process dies immediately + without ever returning to uvicorn or hanging in atexit. + """ + import signal + + def _handler(signum, _frame): + try: + log.warning("force shutdown (signal %s) — killing everything", signum) + except Exception: + pass + try: + _do_shutdown(from_signal=True) + except Exception: + try: + log.exception("_do_shutdown raised during signal teardown") + except Exception: + pass + # Hard-exit so even if uvicorn/atexit would hang we are gone. + os._exit(0) + + for _sig in (signal.SIGINT, signal.SIGTERM): + try: + signal.signal(_sig, _handler) + except Exception: + log.exception("Could not install handler for signal %s", _sig) + + +def _print_env_diagnostic(): + """Print everything you'd need to debug a deployment issue.""" + print("=" * 60) + print("SANAD ENVIRONMENT DIAGNOSTIC") + print("=" * 60) + print(f"Python: {sys.version}") + print(f"Executable: {sys.executable}") + print(f"Platform: {sys.platform}") + print(f"BASE_DIR: {_THIS_DIR}") + print(f"Parent: {_PARENT}") + print(f"Layout: {'dev (Project/Sanad)' if _PARENT.name == 'Project' else 'deployed (top-level Sanad)'}") + print(f"Dashboard: {DASHBOARD_HOST}:{DASHBOARD_PORT} (interface: {DASHBOARD_INTERFACE})") + print(f"DDS interface: {DDS_NETWORK_INTERFACE}") + print() + print("sys.path[0:8]:") + for p in sys.path[:8]: + print(f" {p}") + print() + print("Critical imports:") + for mod_name in ("uvicorn", "fastapi", "pydantic", "starlette", + "websockets", "httpx", "pyaudio", "pyrealsense2", + "unitree_sdk2py", "ultralytics", "numpy", "cv2"): + try: + mod = __import__(mod_name) + ver = getattr(mod, "__version__", "?") + path = getattr(mod, "__file__", "?") + print(f" ✓ {mod_name:18s} {ver:12s} {path}") + except BaseException as exc: + print(f" ✗ {mod_name:18s} {type(exc).__name__}: {exc}") + print() + print("Subsystems available (after constructing main module globals):") + for name in sorted(SUBSYSTEMS): + print(f" {'✓' if SUBSYSTEMS[name] is not None else '✗'} {name}") + print("=" * 60) + + +def main(): + parser = argparse.ArgumentParser(description="Sanad Robot Assistant") + parser.add_argument("--host", default=DASHBOARD_HOST, + help=f"Dashboard bind address. Default is wlan0's IP " + f"({DASHBOARD_HOST!r}). Override with SANAD_DASHBOARD_HOST " + f"or SANAD_DASHBOARD_INTERFACE.") + parser.add_argument("--port", type=int, default=DASHBOARD_PORT) + parser.add_argument("--network", default=DDS_NETWORK_INTERFACE, + help="DDS network interface (e.g. eth0, lo). " + "Override with SANAD_DDS_INTERFACE env var.") + parser.add_argument("--check-env", action="store_true", + help="Print environment diagnostic and exit " + "(no server start, no hardware init)") + args = parser.parse_args() + + if args.check_env: + _print_env_diagnostic() + return + + # Install our SIGINT/SIGTERM handler EARLY — before any hardware init or + # uvicorn.run() — so a single Ctrl+C at any point forces a fast, clean + # teardown of every child and exits the process. + _install_signal_handlers() + + log.info("Sanad starting — Python %s @ %s", sys.version.split()[0], sys.executable) + log.info("BASE_DIR: %s", _THIS_DIR) + log.info("Dashboard interface: %s → bound to %s", DASHBOARD_INTERFACE, args.host) + log.info("Starting Sanad — host=%s port=%d network=%s", args.host, args.port, args.network) + if brain is not None: + try: + log.info("Brain status: %s", brain.status()) + except Exception: + log.exception("brain.status() failed") + # Initialize hardware (graceful if unavailable) + if arm is not None: + try: + arm.init(network_interface=args.network) + except Exception: + log.exception("arm.init() failed — continuing without hardware") + + # ── import uvicorn ────────────────────────────────────────────────── + # Catch ANY exception (not just ImportError) so the real failure reason + # is surfaced. The previous narrow catch hid issues like uvicorn pulling + # in a broken transitive dep, or being installed under a different + # site-packages than the active interpreter. + uvicorn = None + try: + import uvicorn # type: ignore + log.info("uvicorn %s loaded from %s", + getattr(uvicorn, "__version__", "?"), + getattr(uvicorn, "__file__", "?")) + except BaseException as exc: + log.error("Could not import uvicorn: %s: %s", type(exc).__name__, exc) + log.error("Python: %s", sys.executable) + log.error("sys.path[0:5]: %s", sys.path[:5]) + log.error("Try: %s -m pip install --user 'uvicorn[standard]' fastapi", sys.executable) + sys.exit(1) + + # ── import the FastAPI app ────────────────────────────────────────── + # Pass the app object directly so uvicorn doesn't have to re-resolve the + # import path (which differs between dev and deployed layouts). + try: + from Project.Sanad.dashboard.app import app as _app + except BaseException: + log.exception("Could not import Dashboard.app — aborting") + sys.exit(1) + + # ── start the server ──────────────────────────────────────────────── + try: + uvicorn.run( + _app, + host=args.host, + port=args.port, + log_level="info", + ) + except BaseException: + log.exception("uvicorn.run() failed") + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/vendor/Sanad/motion/__init__.py b/vendor/Sanad/motion/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/vendor/Sanad/motion/arm_controller.py b/vendor/Sanad/motion/arm_controller.py new file mode 100644 index 0000000..9d819fe --- /dev/null +++ b/vendor/Sanad/motion/arm_controller.py @@ -0,0 +1,910 @@ +"""Robot arm controller — real DDS motor commands and JSONL motion replay. + +Production-grade replay engine ported from G1_Lootah/Controller/g1_replay_trigger_r2x.py. +Features: body-lock, ramp-in/out interpolation, watchdog, speed control, CRC. +Falls back gracefully to simulation when the Unitree SDK is unavailable. +""" + +from __future__ import annotations + +import json +import threading +import time +from dataclasses import dataclass +from pathlib import Path +from typing import Any + +from Project.Sanad.config import ( + G1_NUM_MOTOR, + KD_HIGH, + KD_LOW, + KD_WRIST, + KP_HIGH, + KP_LOW, + KP_WRIST, + MOTIONS_DIR, + REPLAY_HZ, + WEAK_MOTORS, + WRIST_MOTORS, +) +from Project.Sanad.core.config_loader import section as _cfg_section +from Project.Sanad.core.event_bus import bus +from Project.Sanad.core.logger import get_logger + +log = get_logger("arm_controller") + +_AC = _cfg_section("motion", "arm_controller") +# G1 hardware constants — single source in config/core_config.json +from Project.Sanad.config import ENABLE_ARM_SDK_INDEX +RAMP_IN_STEPS = _AC.get("ramp_in_steps", 60) # ~1.0s smooth move to start pose +RAMP_OUT_STEPS = _AC.get("ramp_out_steps", 180) # ~3.0s smooth return to home +SETTLE_HOLD_SEC = _AC.get("settle_hold_sec", 0.5) # hold start pose before replay begins +WATCHDOG_DISABLE_AFTER = _AC.get("watchdog_disable_after_sec", 1.0) # abort if state stale this long +ARM_INDICES = range( + _AC.get("arm_indices_start", 15), + _AC.get("arm_indices_stop", 29), +) +# N1 — motor temperature sampling. rt/lowstate arrives ~500 Hz; building the +# per-motor temperature snapshot that often is wasteful, so we refresh it at +# most this often. The 3D dashboard polls the snapshot over a WebSocket at a +# similar rate. +_TEMP_SAMPLE_INTERVAL_S = float(_AC.get("temp_sample_interval_sec", 0.1)) + +# -- SDK import (optional) -- +try: + from unitree_sdk2py.core.channel import ( + ChannelFactoryInitialize, + ChannelPublisher, + ChannelSubscriber, + ) + from unitree_sdk2py.idl.unitree_hg.msg.dds_ import LowCmd_, LowState_ + from unitree_sdk2py.utils.crc import CRC + # Battery (BMS) lives in a SEPARATE topic on the G1 — LowState_ (hg) has no + # battery field. Optional: never let its absence break arm import/motion. + try: + from unitree_sdk2py.idl.unitree_hg.msg.dds_ import BmsState_ + except Exception: + BmsState_ = None + + # IDL factory — LowCmd_() with no args fails because the dataclass + # has 5 required positional fields. The SDK ships a default factory + # named `unitree_hg_msg_dds__LowCmd_` that constructs a fully-zeroed + # instance with the right number of motor_cmd entries. + try: + from unitree_sdk2py.idl.default import unitree_hg_msg_dds__LowCmd_ + _make_low_cmd = unitree_hg_msg_dds__LowCmd_ + except ImportError: + _make_low_cmd = None + + _HAS_SDK = True +except ImportError: + _HAS_SDK = False + _make_low_cmd = None + log.warning("Unitree SDK not available — running in simulation mode") + +# G1 arm-action client for built-in arm moves (wave, shake_hand, hug, …). +# NOTE: do NOT use LocoClient here — LocoClient is the locomotion/body-move +# client and its ExecuteAction() doesn't recognise arm-action IDs, so arm +# commands become silent no-ops. The correct client is the arm-specific +# G1ArmActionClient with the SDK's action_map (name → opcode lookup). +try: + from unitree_sdk2py.g1.arm.g1_arm_action_client import ( + G1ArmActionClient, + action_map as _ARM_ACTION_MAP, + ) + _HAS_ARM_CLIENT = True +except ImportError: + G1ArmActionClient = None + _ARM_ACTION_MAP = {} + _HAS_ARM_CLIENT = False + + +@dataclass +class Action: + name: str + id: int + file: str = "" # JSONL filename (empty = SDK built-in) + category: str = "sdk" # "sdk" | "jsonl" + + +# -- SDK actions (fixed — built into Unitree firmware) -- +SDK_ACTIONS: list[Action] = [ + Action("release_arm", 0, category="sdk"), + Action("shake_hand", 1, category="sdk"), + Action("high_five", 2, category="sdk"), + Action("hug", 3, category="sdk"), + Action("high_wave", 4, category="sdk"), + Action("clap", 5, category="sdk"), + Action("face_wave", 6, category="sdk"), + Action("left_kiss", 7, category="sdk"), + Action("heart", 8, category="sdk"), + Action("right_heart", 9, category="sdk"), + Action("hands_up", 10, category="sdk"), + Action("x_ray", 11, category="sdk"), + Action("right_hand_up", 12, category="sdk"), + Action("reject", 13, category="sdk"), + Action("right_kiss", 14, category="sdk"), + Action("two_hand_kiss", 15, category="sdk"), +] + +# Next auto-ID for JSONL actions starts after SDK range. +_JSONL_ID_START = _AC.get("jsonl_id_start", 100) + + +def _scan_jsonl_actions() -> list[Action]: + """Auto-discover all .jsonl files in data/motions/ and create actions. + + Called at startup and whenever the dashboard requests a refresh. + The action name is derived from the filename (without extension), + with underscores replacing hyphens/spaces. + """ + MOTIONS_DIR.mkdir(parents=True, exist_ok=True) + actions = [] + for idx, path in enumerate(sorted(MOTIONS_DIR.glob("*.jsonl"))): + name = path.stem.replace("-", "_").replace(" ", "_") + actions.append(Action( + name=name, + id=_JSONL_ID_START + idx, + file=path.name, + category="jsonl", + )) + return actions + + +def rebuild_action_registry() -> tuple[list[Action], dict[int, Action], dict[str, Action]]: + """Rebuild the full action list from SDK + disk scan. Called on startup and refresh.""" + jsonl_actions = _scan_jsonl_actions() + all_actions = list(SDK_ACTIONS) + jsonl_actions + by_id = {a.id: a for a in all_actions} + by_name = {a.name: a for a in all_actions} + log.info("Action registry: %d SDK + %d JSONL = %d total", + len(SDK_ACTIONS), len(jsonl_actions), len(all_actions)) + return all_actions, by_id, by_name + + +# Initial build +ACTIONS, ACTION_BY_ID, ACTION_BY_NAME = rebuild_action_registry() + + +def _lerp_q(q_start: list[float], q_end: list[float], t: float) -> list[float]: + """Linear interpolation between two joint-position vectors, t in [0,1].""" + return [a + (b - a) * t for a, b in zip(q_start, q_end)] + + +def _load_frames(path: Path) -> list[dict[str, Any]]: + """Read JSONL file, return list of frames with 't' and 'q' keys.""" + frames = [] + with open(path, "r") as f: + for line in f: + line = line.strip() + if not line: + continue + data = json.loads(line) + if "q" in data: + frames.append(data) + return frames + + +def _load_home_q(home_file: str = "arm_home.jsonl") -> list[float] | None: + path = MOTIONS_DIR / home_file + if not path.exists(): + return None + frames = _load_frames(path) + return frames[0]["q"] if frames else None + + +class ArmController: + """Thread-safe arm controller with real DDS replay and simulation fallback.""" + + def __init__(self): + self._lock = threading.Lock() + self._cancel = threading.Event() + self._is_busy = False + self._last_action_time = 0.0 + self.cooldown_sec = 1.0 + self._initialized = False + # N2 — arm ⇄ locomotion mutual exclusion. While locomotion movement is + # armed, the arm must NOT run any replay / SDK action / gesture. main.py + # wires this to LocoController.is_armed via set_motion_block(); checked at + # every playback chokepoint (replay_file / _execute), so it also blocks + # voice/Gemini-triggered gestures, not just the dashboard. + self._motion_block = None + + # DDS handles (set in init()) + self._arm_pub = None + self._state_sub = None + self._low_cmd = None + self._crc = None + self._arm_client = None + + # Live state from LowState_ subscriber + self._current_q: list[float] = [0.0] * G1_NUM_MOTOR + self._last_state_time = 0.0 + self._state_lock = threading.Lock() + self._first_state_event = threading.Event() + # N1 — latest per-motor temperatures (throttled snapshot for the 3D + # temperature dashboard). Each entry: {motor_id, surface, winding}. + self._current_temps: list[dict[str, Any]] = [] + self._last_temp_time = 0.0 + # Battery (BMS) snapshot from rt/lf/bmsstate — separate topic on the G1. + self._bms = None # latest parsed battery dict, or None + self._last_bms_time = 0.0 + self._bms_sub = None + + # Cached motion file metadata + self._motion_files_cache: dict[str, dict[str, Any]] = {} + + # -- initialization -- + + def init(self, network_interface: str = "lo") -> bool: + if self._initialized: + return True + if not _HAS_SDK: + log.info("Simulation mode — DDS init skipped") + return False + try: + ChannelFactoryInitialize(0, network_interface) + self._arm_pub = ChannelPublisher("rt/arm_sdk", LowCmd_) + self._arm_pub.Init() + self._state_sub = ChannelSubscriber("rt/lowstate", LowState_) + self._state_sub.Init(self._on_low_state, 10) + # Battery: subscribe to the BMS topic (rt/lf/bmsstate, BmsState_). + # Read-only + best-effort — never let a BMS hiccup break arm init. + try: + if BmsState_ is not None: + self._bms_sub = ChannelSubscriber("rt/lf/bmsstate", BmsState_) + self._bms_sub.Init(self._on_bms_state, 10) + log.info("BMS subscriber up (rt/lf/bmsstate)") + except Exception as exc: + log.warning("BMS subscriber init failed (battery unavailable): %s", exc) + # IDL types need the SDK's default factory — bare LowCmd_() fails + # because the dataclass has 5 required positional fields. + if _make_low_cmd is not None: + self._low_cmd = _make_low_cmd() + else: + # Last-resort: try a few constructor signatures + try: + self._low_cmd = LowCmd_() + except TypeError: + # Build with explicit zeroed fields + from unitree_sdk2py.idl.unitree_hg.msg.dds_ import MotorCmd_ + try: + from unitree_sdk2py.idl.default import ( + unitree_hg_msg_dds__MotorCmd_ as _make_motor_cmd, + ) + except ImportError: + _make_motor_cmd = lambda: MotorCmd_( + mode=0, q=0.0, dq=0.0, tau=0.0, kp=0.0, kd=0.0, reserve=0, + ) + self._low_cmd = LowCmd_( + mode_pr=0, + mode_machine=0, + motor_cmd=[_make_motor_cmd() for _ in range(35)], + reserve=[0, 0, 0, 0], + crc=0, + ) + self._crc = CRC() + + # Arm-specific action client for built-in moves + if _HAS_ARM_CLIENT: + try: + self._arm_client = G1ArmActionClient() + self._arm_client.SetTimeout(10.0) + self._arm_client.Init() + log.info("G1ArmActionClient initialized (%d actions) — built-in moves available", + len(_ARM_ACTION_MAP)) + except Exception as exc: + log.warning("G1ArmActionClient init failed: %s — built-in actions disabled", exc) + self._arm_client = None + + self._initialized = True + log.info("DDS initialized on %s", network_interface) + except Exception as exc: + log.error("DDS init failed: %s", exc) + return self._initialized + + def _on_low_state(self, msg): + """Callback from DDS subscriber — updates current joint positions. + + Also refreshes the per-motor temperature snapshot (N1) at most every + _TEMP_SAMPLE_INTERVAL_S so the 3D temperature dashboard has live data + without a second DDS subscriber. + """ + now = time.monotonic() + with self._state_lock: + self._current_q = [float(msg.motor_state[i].q) for i in range(G1_NUM_MOTOR)] + self._last_state_time = now + if not self._first_state_event.is_set(): + self._first_state_event.set() + + # Throttled temperature capture (separate from q, which we keep at the + # full callback rate for the replay watchdog). + if (now - self._last_temp_time) >= _TEMP_SAMPLE_INTERVAL_S: + temps = [] + for i in range(G1_NUM_MOTOR): + ms = msg.motor_state[i] + surface = winding = None + t = getattr(ms, "temperature", None) + try: + # G1 firmware reports [surface, winding]; some builds give + # a single value or a scalar — handle all three defensively + # (matches Marcus/Features/TempMonitor low_state_callback). + if t is not None and hasattr(t, "__len__"): + if len(t) >= 2: + surface, winding = int(t[0]), int(t[1]) + elif len(t) == 1: + surface = winding = int(t[0]) + elif t is not None: + surface = winding = int(t) + except (TypeError, ValueError): + surface = winding = None + temps.append({"motor_id": i, "surface": surface, "winding": winding}) + with self._state_lock: + self._current_temps = temps + self._last_temp_time = now + + def wait_for_state(self, timeout: float = 2.0) -> bool: + """Block until first LowState_ callback fires (or timeout). Returns True if state received.""" + return self._first_state_event.wait(timeout=timeout) + + # -- internal API exposed for teaching/macro_player (encapsulation boundary) -- + + def get_current_q(self) -> list[float]: + """Public read of current joint positions.""" + return self._get_current_q() + + def get_motor_temps(self) -> list[dict[str, Any]]: + """Public read of the latest per-motor temperature snapshot (N1). + + Returns a list of {motor_id, surface, winding} (values may be None if + the firmware didn't report a temperature). Empty until the first + LowState_ callback fires. + """ + with self._state_lock: + return list(self._current_temps) + + def _on_bms_state(self, msg): + """Battery (BMS) callback — parse the bits the dashboard shows. Wrapped + so a malformed message can never disturb the arm.""" + try: + soc = int(getattr(msg, "soc", 0) or 0) + cur = int(getattr(msg, "current", 0) or 0) # G1 reports mA + # Pack voltage: prefer bmsvoltage[0] (mV); else sum of cell voltages. + volt_mv = 0 + bv = getattr(msg, "bmsvoltage", None) + try: + if bv is not None and len(bv) and int(bv[0]): + volt_mv = int(bv[0]) + except Exception: + volt_mv = 0 + if not volt_mv: + cv = getattr(msg, "cell_vol", None) + if cv is not None: + try: + volt_mv = int(sum(int(x) for x in cv if x)) + except Exception: + volt_mv = 0 + # Max plausible cell/pack temperature (int16 °C). + temp_c = None + tt = getattr(msg, "temperature", None) + if tt is not None: + try: + vals = [int(x) for x in tt if -40 <= int(x) <= 150] + if vals: + temp_c = max(vals) + except Exception: + temp_c = None + batt = { + "soc": max(0, min(100, soc)), + "soh": int(getattr(msg, "soh", 0) or 0), + "current_a": round(cur / 1000.0, 2), + "voltage_v": round(volt_mv / 1000.0, 1) if volt_mv else None, + "temp_c": temp_c, + "cycle": int(getattr(msg, "cycle", 0) or 0), + } + with self._state_lock: + self._bms = batt + self._last_bms_time = time.monotonic() + except Exception: + pass + + def get_battery(self) -> dict[str, Any]: + """Latest battery snapshot for the dashboard. `available=False` until the + first BmsState_ arrives (or if the BMS topic isn't published).""" + with self._state_lock: + b = dict(self._bms) if self._bms else None + age = (time.monotonic() - self._last_bms_time) if self._last_bms_time else None + if b is None: + return {"available": False} + cur = b.get("current_a") or 0.0 + b["status"] = ("charging" if cur > 0.05 + else "discharging" if cur < -0.05 else "idle") + b["available"] = True + b["age_sec"] = round(age, 1) if age is not None else None + return b + + def send_frame(self, arm_target_q: list[float], body_lock_q: list[float]): + """Public single-frame send. Use only inside a controlled playback loop.""" + self._send_frame(arm_target_q, body_lock_q) + + def disable(self): + """Public disable — releases arm SDK control.""" + self._disable_sdk() + + def state_age(self) -> float: + """Seconds since last LowState_ callback.""" + return self._state_age() + + def _get_current_q(self) -> list[float]: + with self._state_lock: + return list(self._current_q) + + def _state_age(self) -> float: + with self._state_lock: + return time.monotonic() - self._last_state_time if self._last_state_time else 999.0 + + # -- frame sending (real DDS with CRC) -- + + def _send_frame(self, arm_target_q: list[float], body_lock_q: list[float]): + """Send one motor frame via DDS. Body stays locked, arms get target.""" + if not self._initialized or self._low_cmd is None: + return + + # Enable ARM_SDK + self._low_cmd.motor_cmd[ENABLE_ARM_SDK_INDEX].q = 1.0 + + for i in range(G1_NUM_MOTOR): + self._low_cmd.motor_cmd[i].mode = 1 + self._low_cmd.motor_cmd[i].dq = 0.0 + self._low_cmd.motor_cmd[i].tau = 0.0 + + # Arms get replay data, body stays locked + if i >= 15: + self._low_cmd.motor_cmd[i].q = arm_target_q[i] + else: + self._low_cmd.motor_cmd[i].q = body_lock_q[i] + + # Per-motor gains + if i in WEAK_MOTORS: + kp, kd = KP_LOW, KD_LOW + elif i in WRIST_MOTORS: + kp, kd = KP_WRIST, KD_WRIST + else: + kp, kd = KP_HIGH, KD_HIGH + self._low_cmd.motor_cmd[i].kp = kp + self._low_cmd.motor_cmd[i].kd = kd + + self._low_cmd.crc = self._crc.Crc(self._low_cmd) + self._arm_pub.Write(self._low_cmd) + + def _disable_sdk(self): + """Send 10 disable frames at 50 Hz — direct port of + g1_replay_v4_stable.py:DisableSDK (lines 141-147).""" + if not self._initialized or self._low_cmd is None: + return + self._low_cmd.motor_cmd[ENABLE_ARM_SDK_INDEX].q = 0.0 + self._low_cmd.crc = self._crc.Crc(self._low_cmd) + for _ in range(10): + self._arm_pub.Write(self._low_cmd) + time.sleep(0.02) + + # -- replay engine -- + + def replay_file(self, filepath: str, speed: float = 1.0): + """Play a JSONL motion file with full production replay logic. + + Args: + filepath: Path to .jsonl motion file. + speed: Playback speed multiplier (1.0 = normal). + """ + if self._blocked(): + log.warning("replay_file refused — locomotion movement is armed") + return + with self._lock: + if self._is_busy: + log.warning("replay_file: arm busy, skipping") + return + self._is_busy = True + self._cancel.clear() + + try: + self._replay_file_inner(filepath, speed) + finally: + with self._lock: + self._is_busy = False + self._last_action_time = time.monotonic() + + def _replay_file_inner(self, filepath: str, speed: float = 1.0): + """One-for-one port of g1_replay_v4_stable.py:ReplayWithHome.Run(). + + Five phases — timing and math match the reference exactly: + + 1. Wait for first LowState_ message (no body-lock from zeros). + 2. Load data: home_q (last valid frame of arm_home.jsonl), + full_body_lock_q (live snapshot), and the replay frames. + 3. MOVE TO START — 60 steps at 60 Hz, alpha = k/steps (starts + at 0 = exact current pose, ends at 59/60 just shy of target). + 3b. SETTLE HOLD — replaces the reference's human + `input("Press Enter to Begin")` pause; gives the physical + motors time to reach the commanded start pose before + playback so the first real frames don't jerk. + 4. PLAY — `for f in frames: if f['t']-t0 >= play_elapsed` + frame-select pattern, fixed 1/REPLAY_HZ sleep per iteration. + 5. RETURN HOME — 180 steps alpha = k/steps from last_played_q + to home_q, body locked. Then DisableSDK (10 frames). + """ + path = Path(filepath) + if not path.is_absolute(): + path = MOTIONS_DIR / path + if not path.exists(): + raise FileNotFoundError(f"Motion file not found: {path}") + + frames = _load_frames(path) + if not frames: + log.warning("Empty motion file: %s", path) + return + + if not _HAS_SDK: + duration = len(frames) / REPLAY_HZ / speed if speed else len(frames) / REPLAY_HZ + log.info("[SIM] Replay %s (%.1fs, %d frames, speed=%.1f)", + path.name, duration, len(frames), speed) + self._sim_replay(frames, speed) + return + + log.info("Replay %s (%d frames @ %.0f Hz, speed=%.1f)", + path.name, len(frames), REPLAY_HZ, speed) + + # ─── 1. Wait for first LowState ───────────────────────── + if not self._first_state_event.is_set(): + log.warning("Waiting for first LowState message...") + if not self._first_state_event.wait(timeout=2.0): + log.error("No LowState received in 2s — refusing to replay (would lock body to zeros)") + return + + # ─── 2. Load data (ref lines 154-166) ─────────────────── + home_q = _load_home_q() or [0.0] * G1_NUM_MOTOR + full_body_lock_q = self._get_current_q() # snapshot live state + + interval = 1.0 / REPLAY_HZ + file_start_q = frames[0]["q"] + + # ─── 3. MOVE TO START (ref lines 171-181) ─────────────── + log.debug("Moving to start (%d steps)", RAMP_IN_STEPS) + for k in range(RAMP_IN_STEPS): + if self._cancel.is_set(): + self._return_home(full_body_lock_q, full_body_lock_q, home_q) + return + alpha = k / RAMP_IN_STEPS # 0 .. (RAMP_IN_STEPS-1)/RAMP_IN_STEPS + interp_q = list(full_body_lock_q) + for j in range(15, G1_NUM_MOTOR): + interp_q[j] = (1 - alpha) * full_body_lock_q[j] + alpha * file_start_q[j] + self._send_frame(interp_q, full_body_lock_q) + time.sleep(interval) + + # ─── 3b. SETTLE HOLD — replaces reference's Enter pause ─ + settle_frames = max(0, int(SETTLE_HOLD_SEC * REPLAY_HZ)) + if settle_frames > 0: + log.debug("Settle hold (%d frames / %.2fs)", settle_frames, SETTLE_HOLD_SEC) + for _ in range(settle_frames): + if self._cancel.is_set(): + self._return_home(file_start_q, full_body_lock_q, home_q) + return + self._send_frame(file_start_q, full_body_lock_q) + time.sleep(interval) + + # ─── 4. PLAY (ref lines 183-234) ──────────────────────── + log.debug("Playing %d frames", len(frames)) + last_played_q = file_start_q + play_elapsed = 0.0 + last_real = time.monotonic() + t0 = frames[0].get("t", 0.0) + + while True: + if self._cancel.is_set(): + break + + # Watchdog — abort if LowState goes stale + age = self._state_age() + if age > WATCHDOG_DISABLE_AFTER: + log.error("Watchdog abort — LowState stale %.2fs", age) + self._disable_sdk() + return + + now_real = time.monotonic() + dt_real = now_real - last_real + last_real = now_real + play_elapsed += dt_real * speed + + # Pick the next frame whose timestamp has elapsed (reference pattern) + target_frame = None + for f in frames: + if f.get("t", 0.0) - t0 >= play_elapsed: + target_frame = f + break + if target_frame is None: + break + + self._send_frame(target_frame["q"], full_body_lock_q) + last_played_q = target_frame["q"] + time.sleep(interval) + + # ─── 5. RETURN HOME (ref lines 239-256) + DisableSDK ──── + self._return_home(last_played_q, full_body_lock_q, home_q) + + def _return_home(self, from_q: list[float], body_lock_q: list[float], home_q: list[float]): + """Smooth return to home — direct port of g1_replay_v4_stable.py:239-256. + + 180 steps × (1/60)s = 3s linear ramp on arm motors only (indices + 15-28); body motors (0-14) stay locked to `body_lock_q`. Then + DisableSDK sends 10 disable-bit frames at 50 Hz. + + IMPORTANT: the reference's return-home is unconditional — it + always runs to completion regardless of why the play loop ended + (natural end OR 'q' press). We clear `_cancel` at entry so a + user-hit Cancel (which set `_cancel` to break the play loop) + doesn't also abort the return ramp. Without this, the arm + "snaps" home because the loop exits on the first iteration. + """ + self._cancel.clear() + log.info("Returning home (%d steps / %.1fs)", RAMP_OUT_STEPS, RAMP_OUT_STEPS / REPLAY_HZ) + interval = 1.0 / REPLAY_HZ + for k in range(RAMP_OUT_STEPS): + alpha = k / RAMP_OUT_STEPS # 0 .. (RAMP_OUT_STEPS-1)/RAMP_OUT_STEPS + interp_q = list(from_q) + for j in range(15, G1_NUM_MOTOR): + interp_q[j] = (1 - alpha) * from_q[j] + alpha * home_q[j] + self._send_frame(interp_q, body_lock_q) + time.sleep(interval) + self._disable_sdk() + log.info("Home reached, SDK disabled") + + def _sim_replay(self, frames: list[dict], speed: float): + """Simulation replay — emit events, sleep for equivalent duration.""" + interval = 1.0 / REPLAY_HZ + for i, frame in enumerate(frames): + if self._cancel.is_set(): + log.info("[SIM] Replay cancelled at frame %d/%d", i, len(frames)) + return + time.sleep(interval / max(speed, 0.1)) + log.info("[SIM] Replay complete") + + # -- public API -- + + @property + def is_busy(self) -> bool: + return self._is_busy + + def set_motion_block(self, predicate): + """Install a no-args predicate; while it returns True, every arm + playback path (replay_file / _execute → SDK actions, JSONL replays, + macros, gestures) refuses to run. Used for arm ⇄ locomotion exclusion.""" + self._motion_block = predicate + + def _blocked(self) -> bool: + pred = self._motion_block + if pred is None: + return False + try: + return bool(pred()) + except Exception: + log.exception("motion-block predicate raised — treating as NOT blocked") + return False + + def cancel(self): + """Cancel the currently running replay. + + Sets the cancel flag — the play loop in `_replay_file_inner` + checks this and breaks out; `_return_home` then runs as the + final phase of the same replay (matches the reference's + end-of-Run behaviour in g1_replay_v4_stable.py). + """ + self._cancel.set() + + def _unused_return_to_home(self, duration_sec: float = 3.0, + home_file: str = "arm_home.jsonl") -> None: + """Deprecated — replay's own `_return_home` is called automatically + when cancel breaks the play loop. Kept here only to preserve any + external caller; no new code should use this. + """ + if not self._initialized or self._low_cmd is None: + log.warning("return_to_home: arm controller not initialised") + return + if not self._first_state_event.wait(timeout=2.0): + log.error("return_to_home: no LowState received in 2s — aborting") + return + + home_path = MOTIONS_DIR / home_file + if not home_path.exists(): + log.warning("return_to_home: %s missing — skipping", home_path.name) + return + + # Use the LAST valid 'q' in the file as the settle pose + home_q: list[float] | None = None + try: + for frame in _load_frames(home_path): + q = frame.get("q") + if q and len(q) == G1_NUM_MOTOR: + home_q = q + except Exception as exc: + log.warning("return_to_home: reading %s failed: %s", + home_path.name, exc) + return + if home_q is None: + log.warning("return_to_home: %s has no valid 'q' frames", + home_path.name) + return + + with self._state_lock: + start_q = list(self._current_q) + body_lock_q = list(start_q) + + # Let the ramp publish frames even though we just cancelled + self._cancel.clear() + with self._lock: + if self._is_busy: + # A pending replay is still winding down — wait a beat + log.debug("return_to_home: arm busy, waiting briefly") + self._is_busy = True + + try: + steps = max(30, int(duration_sec * REPLAY_HZ)) # ≥ 0.5s ramp + dt = 1.0 / REPLAY_HZ + log.info("return_to_home: ramp %d steps (%.1fs) → %s", + steps, duration_sec, home_file) + for k in range(steps): + if self._cancel.is_set(): + log.info("return_to_home: cancelled mid-ramp") + break + alpha = (k + 1) / steps + interp_q = list(body_lock_q) + for j in range(15, G1_NUM_MOTOR): + interp_q[j] = (1 - alpha) * start_q[j] + alpha * home_q[j] + self._send_frame(interp_q, body_lock_q) + time.sleep(dt) + log.info("return_to_home: done") + finally: + with self._lock: + self._is_busy = False + + def refresh_actions(self): + """Re-scan data/motions/ and rebuild the action registry.""" + global ACTIONS, ACTION_BY_ID, ACTION_BY_NAME + ACTIONS, ACTION_BY_ID, ACTION_BY_NAME = rebuild_action_registry() + + def list_actions(self) -> list[dict[str, Any]]: + return [ + {"id": a.id, "name": a.name, "file": a.file, "category": a.category} + for a in ACTIONS + ] + + def list_motion_files(self) -> list[dict[str, Any]]: + """List all JSONL files in data/motions/ with metadata. + + Caches frame count by (path, mtime) to avoid re-parsing megabytes of + JSONL on every dashboard refresh. + """ + MOTIONS_DIR.mkdir(parents=True, exist_ok=True) + result = [] + for p in sorted(MOTIONS_DIR.glob("*.jsonl")): + stat = p.stat() + cache_key = f"{p}:{stat.st_mtime_ns}" + cached = self._motion_files_cache.get(cache_key) + if cached is None: + frames = _load_frames(p) + duration = len(frames) / REPLAY_HZ if frames else 0 + cached = { + "name": p.name, + "path": str(p), + "frames": len(frames), + "duration_sec": round(duration, 2), + "size_kb": round(stat.st_size / 1024, 1), + } + # Drop stale entries for this path before adding new one + stale = [k for k in self._motion_files_cache if k.startswith(f"{p}:")] + for k in stale: + self._motion_files_cache.pop(k, None) + self._motion_files_cache[cache_key] = cached + result.append(cached) + return result + + def trigger_by_id(self, action_id: int, speed: float = 1.0): + action = ACTION_BY_ID.get(action_id) + if action is None: + raise KeyError(f"Unknown action id: {action_id}") + self._execute(action, speed) + + def trigger_by_name(self, name: str, speed: float = 1.0): + action = ACTION_BY_NAME.get(name) + if action is None: + raise KeyError(f"Unknown action: {name}") + self._execute(action, speed) + + def _execute(self, action: Action, speed: float = 1.0): + if self._blocked(): + log.warning("arm action %s refused — locomotion movement is armed", action.name) + bus.emit_sync("motion.action_error", action=action.name, + reason="movement armed") + return + with self._lock: + if self._is_busy: + log.warning("Arm busy, skipping %s", action.name) + return + self._is_busy = True + self._cancel.clear() + + _start = time.monotonic() + _failed = False + try: + bus.emit_sync("motion.action_started", action=action.name) + if action.file: + self._replay_file_inner(action.file, speed=speed) + else: + self._run_sdk_action(action) + except Exception as exc: + _failed = True + log.error("Action %s failed: %s", action.name, exc) + bus.emit_sync("motion.action_error", action=action.name, + reason=str(exc)) + finally: + with self._lock: + self._is_busy = False + self._last_action_time = time.monotonic() + # action_done always fires (back-compat for existing listeners); + # action_error above is the extra signal for the Gemini + # motion-state channel. elapsed_sec lets Gemini say "...took 2.3s". + bus.emit_sync("motion.action_done", action=action.name, + elapsed_sec=round(time.monotonic() - _start, 2), + failed=_failed) + + def _run_sdk_action(self, action: Action): + if not _HAS_SDK: + log.info("[SIM] SDK action: %s (id=%d)", action.name, action.id) + time.sleep(2.0) + return + if self._arm_client is None: + log.warning( + "SDK action %s requested but G1ArmActionClient not available — skipping", + action.name, + ) + return + # Sanad's registry uses underscored names ("shake_hand", "x_ray"); + # the SDK's action_map is keyed by human-readable forms that mix + # spaces and hyphens ("shake hand", "x-ray", "two-hand kiss"). + # Try each candidate in turn. + name = action.name + candidates = [ + name, + name.replace("_", " "), # shake_hand → shake hand + name.replace("_", "-"), # x_ray → x-ray + # two-word with specific hyphenation: first token with hyphen, + # rest with spaces (matches SDK's "two-hand kiss" pattern) + name.replace("_", "-", 1).replace("_", " "), + ] + sdk_name = next((c for c in candidates if c in _ARM_ACTION_MAP), None) + if sdk_name is None: + log.warning( + "SDK action %s not in G1ArmActionClient action_map — tried %s. keys=%s", + action.name, candidates, sorted(_ARM_ACTION_MAP.keys())[:12], + ) + return + opcode = _ARM_ACTION_MAP[sdk_name] + log.info("SDK action: %s (opcode=%s)", action.name, opcode) + try: + self._arm_client.ExecuteAction(opcode) + # Built-in arm actions block on the robot side for ~3s; the SDK + # call returns immediately. Sleep so we don't hammer it back-to-back. + time.sleep(3.0) + except Exception as exc: + log.error("SDK action %s failed: %s", action.name, exc) + + def status(self) -> dict[str, Any]: + return { + "initialized": self._initialized, + "sdk_available": _HAS_SDK, + "busy": self._is_busy, + "state_age_sec": round(self._state_age(), 3), + "sdk_actions": len(SDK_ACTIONS), + "jsonl_actions": len([a for a in ACTIONS if a.category == "jsonl"]), + "total_actions": len(ACTIONS), + "total_motion_files": len(list(MOTIONS_DIR.glob("*.jsonl"))) if MOTIONS_DIR.exists() else 0, + } diff --git a/vendor/Sanad/motion/macro_player.py b/vendor/Sanad/motion/macro_player.py new file mode 100644 index 0000000..126271e --- /dev/null +++ b/vendor/Sanad/motion/macro_player.py @@ -0,0 +1,297 @@ +"""Macro Player — synchronized playback of audio + motion recordings. + +Reads paired files: + recordings/audio/.wav + recordings/motion/.jsonl + +Plays audio and streams joint commands simultaneously so the robot's +physical movements perfectly match the recorded speech timing. +""" + +from __future__ import annotations + +import json +import threading +import time +import wave +from pathlib import Path +from typing import Any + +from Project.Sanad.config import AUDIO_RECORDINGS_DIR, MOTION_RECORDINGS_DIR, REPLAY_HZ +from Project.Sanad.core.config_loader import section as _cfg_section +from Project.Sanad.core.logger import get_logger + +log = get_logger("macro_player") + +_MP = _cfg_section("motion", "macro_player") +RAMP_IN_STEPS = _MP.get("ramp_in_steps", 60) +RAMP_OUT_STEPS = _MP.get("ramp_out_steps", 60) +WATCHDOG_DISABLE_AFTER = _MP.get("watchdog_disable_after_sec", 1.0) + + +def _lerp_q(a: list[float], b: list[float], t: float) -> list[float]: + return [x + (y - x) * t for x, y in zip(a, b)] + + +class _ArmAdapter: + """Uniform interface over either the public arm API or its private members. + + Hides the hasattr branching that previously polluted _play_motion. If the + arm controller exposes the new public methods (get_current_q, send_frame, + disable, state_age, wait_for_state) we use those; otherwise we fall back + to the private versions for backward compatibility. + """ + + def __init__(self, arm): + self._arm = arm + + def wait_for_state(self, timeout: float = 2.0) -> bool: + fn = getattr(self._arm, "wait_for_state", None) + if callable(fn): + return fn(timeout=timeout) + # No state-wait API: assume ready + return True + + def get_current_q(self) -> list[float]: + fn = getattr(self._arm, "get_current_q", None) + if callable(fn): + return fn() + return self._arm._get_current_q() + + def send_frame(self, target_q: list[float], body_lock_q: list[float]): + fn = getattr(self._arm, "send_frame", None) + if callable(fn): + return fn(target_q, body_lock_q) + return self._arm._send_frame(target_q, body_lock_q) + + def disable(self): + fn = getattr(self._arm, "disable", None) + if callable(fn): + return fn() + return self._arm._disable_sdk() + + def state_age(self) -> float: + fn = getattr(self._arm, "state_age", None) + if callable(fn): + return fn() + # No watchdog support: pretend state is fresh + return 0.0 + + def is_blocked(self) -> bool: + """True while the arm's locomotion interlock forbids arm motion. + + Mirrors the arm controller's own _blocked()/set_motion_block() gate so + a macro started before locomotion was armed stops mid-playback if the + legs are armed afterwards. If the arm has no such predicate, never + blocks (preserves behaviour for controllers without the interlock).""" + fn = getattr(self._arm, "_blocked", None) + if callable(fn): + try: + return bool(fn()) + except Exception: + return False + return False + + +class MacroPlayer: + def __init__(self, audio_manager=None, arm_controller=None): + self._audio_mgr = audio_manager + self._arm = arm_controller + self._lock = threading.Lock() + self._playing = False + self._stop_event = threading.Event() + + @property + def is_playing(self) -> bool: + return self._playing + + def play(self, name: str) -> dict[str, Any]: + audio_path = AUDIO_RECORDINGS_DIR / f"{name}.wav" + motion_path = MOTION_RECORDINGS_DIR / f"{name}.jsonl" + + if not audio_path.exists(): + raise FileNotFoundError(f"Audio not found: {audio_path}") + if not motion_path.exists(): + raise FileNotFoundError(f"Motion not found: {motion_path}") + + with self._lock: + if self._playing: + raise RuntimeError("Macro playback already in progress.") + self._playing = True + self._stop_event.clear() + + t0 = time.monotonic() + audio_thread = threading.Thread(target=self._play_audio, args=(audio_path,), daemon=True) + motion_thread = threading.Thread(target=self._play_motion, args=(motion_path,), daemon=True) + + log.info("Macro playback starting: %s", name) + audio_thread.start() + motion_thread.start() + + audio_thread.join() + motion_thread.join() + + elapsed = time.monotonic() - t0 + with self._lock: + self._playing = False + + log.info("Macro playback complete: %s (%.1fs)", name, elapsed) + return {"name": name, "duration_sec": round(elapsed, 2)} + + def stop(self): + self._stop_event.set() + # Best-effort: stop audio playback if the manager exposes a stop method + if self._audio_mgr is not None and hasattr(self._audio_mgr, "stop_playback"): + try: + self._audio_mgr.stop_playback() + except Exception as exc: + log.warning("audio stop failed: %s", exc) + + def _play_audio(self, path: Path): + if self._audio_mgr is None: + log.warning("No audio manager — skipping audio playback") + return + try: + self._audio_mgr.play_wav(path) + except Exception as exc: + log.error("Audio playback failed: %s", exc) + + def _play_motion(self, path: Path): + """Stream JSONL motion frames at recorded timing — with ramp-in, watchdog, ramp-out.""" + frames = self._load_frames(path) + if not frames: + return + + if self._arm is None: + # Simulated playback — just sleep through + duration = frames[-1].get("t", 0) + log.info("[SIM] MacroPlayer would play %d frames over %.1fs", len(frames), duration) + time.sleep(min(duration, 30.0)) + return + + adapter = _ArmAdapter(self._arm) + interval = 1.0 / REPLAY_HZ + + # CRITICAL: wait for first LowState before reading current pose + if not adapter.wait_for_state(timeout=2.0): + log.error("MacroPlayer aborting — no LowState received in 2s") + return + + try: + current_q = adapter.get_current_q() + except Exception: + log.exception("Failed to read current pose") + return + + body_lock_q = list(current_q) + first_frame_q = frames[0]["q"] + + # Phase 1: Ramp-in + if not self._ramp(adapter, current_q, first_frame_q, body_lock_q, RAMP_IN_STEPS, "ramp-in"): + return + + # Phase 2: Stream recorded frames with timing + watchdog + last_q = self._stream_frames(adapter, frames, body_lock_q, interval) or first_frame_q + + # Phase 3: Ramp-out back to starting pose + self._ramp(adapter, last_q, body_lock_q, body_lock_q, RAMP_OUT_STEPS, "ramp-out") + + # Phase 4: Disable arm SDK + try: + adapter.disable() + except Exception: + log.exception("disable() failed") + + def _load_frames(self, path: Path) -> list[dict]: + """Read JSONL motion file. Returns list of frames or [] on failure.""" + frames: list[dict] = [] + try: + with open(path, "r") as f: + for line in f: + line = line.strip() + if not line: + continue + try: + data = json.loads(line) + except json.JSONDecodeError as exc: + log.warning("Skipping bad line in %s: %s", path.name, exc) + continue + if "q" in data: + frames.append(data) + except OSError: + log.exception("Failed to read motion file %s", path) + if not frames: + log.warning("No usable frames in %s", path.name) + return frames + + def _ramp(self, adapter: "_ArmAdapter", from_q: list[float], to_q: list[float], + body_lock_q: list[float], steps: int, label: str) -> bool: + """Smoothly interpolate from `from_q` to `to_q` over `steps` frames. + Returns True on success, False if cancelled or send failed.""" + log.info("MacroPlayer %s (%d steps)", label, steps) + interval = 1.0 / REPLAY_HZ + for step in range(steps): + if self._stop_event.is_set(): + return False + if adapter.is_blocked(): + log.error("MacroPlayer %s abort — locomotion armed mid-playback", label) + return False + t = (step + 1) / steps + interp = _lerp_q(from_q, to_q, t) + try: + adapter.send_frame(interp, body_lock_q) + except Exception: + log.exception("%s send_frame failed", label) + return False + time.sleep(interval) + return True + + def _stream_frames(self, adapter: "_ArmAdapter", frames: list[dict], + body_lock_q: list[float], interval: float) -> list[float] | None: + """Stream the recorded frames with watchdog. Returns the last successfully sent q.""" + t0 = time.monotonic() + last_q: list[float] | None = None + for frame in frames: + if self._stop_event.is_set(): + break + + if adapter.is_blocked(): + log.error("MacroPlayer abort — locomotion armed mid-playback") + break + + age = adapter.state_age() + if age > WATCHDOG_DISABLE_AFTER: + log.error("MacroPlayer watchdog abort — state stale %.2fs", age) + break + + target_t = frame.get("t", 0) + elapsed = time.monotonic() - t0 + sleep_time = target_t - elapsed + if sleep_time > 0: + time.sleep(sleep_time) + + try: + adapter.send_frame(frame["q"], body_lock_q) + last_q = frame["q"] + except Exception: + log.exception("send_frame failed mid-replay") + return last_q + + def list_macros(self) -> list[dict[str, Any]]: + """List available macro recordings (paired audio + motion).""" + macros = [] + for audio_path in sorted(AUDIO_RECORDINGS_DIR.glob("*.wav")): + name = audio_path.stem + motion_path = MOTION_RECORDINGS_DIR / f"{name}.jsonl" + if motion_path.exists(): + macros.append({ + "name": name, + "audio_path": str(audio_path), + "motion_path": str(motion_path), + "audio_size_kb": round(audio_path.stat().st_size / 1024, 1), + "motion_size_kb": round(motion_path.stat().st_size / 1024, 1), + }) + return macros + + def status(self) -> dict[str, Any]: + return {"playing": self._playing} diff --git a/vendor/Sanad/motion/macro_recorder.py b/vendor/Sanad/motion/macro_recorder.py new file mode 100644 index 0000000..da2c4c9 --- /dev/null +++ b/vendor/Sanad/motion/macro_recorder.py @@ -0,0 +1,198 @@ +"""Macro Recorder — simultaneously captures audio + robot joint positions. + +Produces a paired set of files: + recordings/audio/.wav — microphone or Gemini output audio + recordings/motion/.jsonl — timestamped joint positions + +These can be replayed in sync via MacroPlayer. +""" + +from __future__ import annotations + +import json +import os +import tempfile +import threading +import time +import wave +from pathlib import Path +from typing import Any + +from Project.Sanad.config import ( + AUDIO_RECORDINGS_DIR, + CHANNELS, + CHUNK_SIZE, + MOTION_RECORDINGS_DIR, + RECEIVE_SAMPLE_RATE, + REPLAY_HZ, +) +from Project.Sanad.core.logger import get_logger + +log = get_logger("macro_recorder") + + +class MacroRecorder: + """Records audio + joint positions simultaneously.""" + + def __init__(self, arm_controller=None): + self._arm = arm_controller + self._lock = threading.Lock() + self._recording = False + self._audio_thread: threading.Thread | None = None + self._motion_thread: threading.Thread | None = None + self._stop_event = threading.Event() + self._name = "" + self._audio_frames: list[bytes] = [] + self._motion_frames: list[dict[str, Any]] = [] + self._started_at = 0.0 + + @property + def is_recording(self) -> bool: + return self._recording + + def start(self, name: str) -> dict[str, Any]: + with self._lock: + if self._recording: + raise RuntimeError("Already recording a macro.") + self._recording = True + self._name = name + self._stop_event.clear() + self._audio_frames = [] + self._motion_frames = [] + self._started_at = time.monotonic() + + AUDIO_RECORDINGS_DIR.mkdir(parents=True, exist_ok=True) + MOTION_RECORDINGS_DIR.mkdir(parents=True, exist_ok=True) + + self._audio_thread = threading.Thread(target=self._record_audio, daemon=True) + self._motion_thread = threading.Thread(target=self._record_motion, daemon=True) + self._audio_thread.start() + self._motion_thread.start() + + log.info("Macro recording started: %s", name) + return {"recording": True, "name": name} + + def stop(self) -> dict[str, Any]: + with self._lock: + if not self._recording: + raise RuntimeError("No macro recording in progress.") + + self._stop_event.set() + if self._audio_thread: + self._audio_thread.join(timeout=3.0) + if self._motion_thread: + self._motion_thread.join(timeout=3.0) + + audio_path = AUDIO_RECORDINGS_DIR / f"{self._name}.wav" + motion_path = MOTION_RECORDINGS_DIR / f"{self._name}.jsonl" + + # A failed write must NOT leave _recording=True forever (which would + # wedge every future start()). Clear the busy flag in finally no matter + # what; write both files atomically (tempfile + os.replace) so a partial + # write can't surface a corrupt recording. + try: + # Save audio + pcm = b"".join(self._audio_frames) + tmp_audio = f"{audio_path}.tmp" + with wave.open(tmp_audio, "wb") as wf: + wf.setnchannels(CHANNELS) + wf.setsampwidth(2) # int16 + wf.setframerate(RECEIVE_SAMPLE_RATE) + wf.writeframes(pcm) + os.replace(tmp_audio, str(audio_path)) + + # Save motion + content_lines = [json.dumps({"meta": {"hz": REPLAY_HZ, "motors": 29}})] + for frame in self._motion_frames: + content_lines.append(json.dumps(frame)) + content = ("\n".join(content_lines) + "\n").encode("utf-8") + fd, tmp_motion = tempfile.mkstemp( + prefix=f".{motion_path.name}.", suffix=".tmp", + dir=str(motion_path.parent), + ) + try: + with os.fdopen(fd, "wb") as f: + f.write(content) + os.replace(tmp_motion, str(motion_path)) + except Exception: + try: + os.unlink(tmp_motion) + except OSError: + pass + raise + finally: + with self._lock: + self._recording = False + + duration = time.monotonic() - self._started_at + + log.info("Macro saved: audio=%s motion=%s (%.1fs)", audio_path, motion_path, duration) + return { + "recording": False, + "name": self._name, + "audio_path": str(audio_path), + "motion_path": str(motion_path), + "duration_sec": round(duration, 2), + "audio_frames": len(self._audio_frames), + "motion_frames": len(self._motion_frames), + } + + def _record_audio(self): + """Capture mic audio in background thread.""" + try: + import pyaudio + + pya = pyaudio.PyAudio() + stream = pya.open( + format=pyaudio.paInt16, + channels=CHANNELS, + rate=RECEIVE_SAMPLE_RATE, + input=True, + frames_per_buffer=CHUNK_SIZE, + ) + while not self._stop_event.is_set(): + data = stream.read(CHUNK_SIZE, exception_on_overflow=False) + self._audio_frames.append(data) + stream.stop_stream() + stream.close() + pya.terminate() + except Exception as exc: + log.error("Audio recording failed: %s", exc) + + def _record_motion(self): + """Capture joint positions at REPLAY_HZ.""" + interval = 1.0 / REPLAY_HZ + + # Wait for the first LowState before sampling real hardware, otherwise + # _get_current_q() returns the seed [0.0]*29 and we capture an all-zero + # macro that body-locks the arms to zero on replay. Only relevant when a + # live, initialized arm is present (sim path records zeros by design). + if self._arm is not None and getattr(self._arm, "_initialized", False): + wait = getattr(self._arm, "wait_for_state", None) + if callable(wait) and not wait(timeout=2.0): + log.error("Macro motion aborted — no LowState received in 2s") + return + + t0 = time.monotonic() + while not self._stop_event.is_set(): + t = round(time.monotonic() - t0, 4) + # Read current joint positions from arm controller + q = self._read_joint_positions() + self._motion_frames.append({"t": t, "q": q}) + time.sleep(interval) + + def _read_joint_positions(self) -> list[float]: + """Read current joint positions. Returns zeros if SDK unavailable.""" + if self._arm is not None and self._arm._initialized: + return self._arm._get_current_q() + return [0.0] * 29 + + def status(self) -> dict[str, Any]: + elapsed = time.monotonic() - self._started_at if self._recording else 0 + return { + "recording": self._recording, + "name": self._name, + "elapsed_sec": round(elapsed, 1), + "audio_frames": len(self._audio_frames), + "motion_frames": len(self._motion_frames), + } diff --git a/vendor/Sanad/motion/sanad_arm_controller.py b/vendor/Sanad/motion/sanad_arm_controller.py new file mode 100644 index 0000000..82a208c --- /dev/null +++ b/vendor/Sanad/motion/sanad_arm_controller.py @@ -0,0 +1,343 @@ +import os +import time +import json +import threading +import traceback +import numpy as np +from pathlib import Path +from dataclasses import dataclass + +# ================================================== +# ⚙️ Config (from config/motion_config.json) +# ================================================== +BASE_DIR = Path(__file__).resolve().parent + +try: + from Project.Sanad.core.config_loader import section as _cfg_section + _MCFG = _cfg_section("motion", "sanad_arm_controller") +except Exception: + _MCFG = {} + +# Ensure defaults for any missing key +_MCFG.setdefault("action_cooldown_sec", 1.0) +_MCFG.setdefault("stability_threshold", 0.06) +_MCFG.setdefault("gains", {}) +_MCFG["gains"].setdefault("kp_high", 300.0) +_MCFG["gains"].setdefault("kd_high", 3.0) +_MCFG["gains"].setdefault("kp_low", 80.0) +_MCFG["gains"].setdefault("kd_low", 3.0) +_MCFG["gains"].setdefault("kp_wrist", 40.0) +_MCFG["gains"].setdefault("kd_wrist", 1.5) +_MCFG.setdefault("weak_motors", [4, 10, 15, 16, 17, 18, 22, 23, 24, 25]) +_MCFG.setdefault("wrist_motors", [19, 20, 21, 26, 27, 28]) +_MCFG.setdefault("data_subdir", "DataG1") + +# ================================================== +# ✅ Option List +# ================================================== +@dataclass(frozen=True) +class TestOption: + name: str + id: int + file: str = "" + +OPTION_LIST = [ + TestOption(name="release arm", id=0), + TestOption(name="shake hand", id=1), + TestOption(name="high five", id=2), + TestOption(name="hug", id=3), + TestOption(name="high wave", id=4), + TestOption(name="clap", id=5), + TestOption(name="face wave", id=6), + TestOption(name="left kiss", id=7), + TestOption(name="heart", id=8), + TestOption(name="right heart", id=9), + TestOption(name="hands up", id=10), + TestOption(name="x-ray", id=11), + TestOption(name="right hand up", id=12), + TestOption(name="reject", id=13), + TestOption(name="right kiss", id=14), + TestOption(name="two-hand kiss", id=15), + TestOption(name="release arm recorded", id=30, file="arm_home.jsonl"), + TestOption(name="laugh", id=23, file="laugh.jsonl"), + TestOption(name="bird", id=24, file="bird.jsonl"), + TestOption(name="change battery", id=25, file="change_battery.jsonl"), + TestOption(name="move hands up", id=26, file="hands_up.jsonl"), + TestOption(name="move right hand up", id=27, file="right_hand_up.jsonl"), + TestOption(name="move left hand up", id=28, file="left_hand_up.jsonl"), +] + +OPTION_BY_ID = {o.id: o for o in OPTION_LIST} +OPTION_BY_NAME = {o.name.lower(): o for o in OPTION_LIST} + +# ================================================== +# 🦾 Unitree SDK Configuration +# ================================================== +try: + from unitree_sdk2py.core.channel import ChannelFactoryInitialize, ChannelPublisher, ChannelSubscriber + from unitree_sdk2py.g1.arm.g1_arm_action_client import G1ArmActionClient, action_map + from unitree_sdk2py.idl.default import unitree_hg_msg_dds__LowCmd_ + from unitree_sdk2py.idl.unitree_hg.msg.dds_ import LowCmd_, LowState_ + from unitree_sdk2py.utils.crc import CRC + _ROBOT_SDK_AVAILABLE = True +except Exception: + ChannelFactoryInitialize = None + G1ArmActionClient = None + action_map = {} + LowCmd_ = LowState_ = None + unitree_hg_msg_dds__LowCmd_ = None + CRC = None + _ROBOT_SDK_AVAILABLE = False + _UNITREE_IMPORT_ERR = traceback.format_exc() + +# G1 hardware constants — single source in config/core_config.json +from Project.Sanad.config import G1_NUM_MOTOR, ENABLE_ARM_SDK_INDEX, REPLAY_HZ +# JSONL replay source. The legacy default (BASE_DIR/DataG1) does NOT exist on +# this deployment — the recorded/taught motions actually live in the canonical +# MOTIONS_DIR (data/motions), the same store every other module reads/writes. +# Prefer it so a file-backed action reads the real files; fall back to the +# legacy path only if the canonical one can't be imported. +try: + from Project.Sanad.config import MOTIONS_DIR as _MOTIONS_DIR + DATA_DIR = Path(_MOTIONS_DIR) +except Exception: + DATA_DIR = BASE_DIR / _MCFG["data_subdir"] + +ACTION_COOLDOWN_SEC = _MCFG["action_cooldown_sec"] +STABILITY_THRESHOLD = _MCFG["stability_threshold"] + +_G = _MCFG["gains"] +KP_HIGH, KD_HIGH = _G["kp_high"], _G["kd_high"] +KP_LOW, KD_LOW = _G["kp_low"], _G["kd_low"] +KP_WRIST, KD_WRIST = _G["kp_wrist"], _G["kd_wrist"] +WEAK_MOTORS = _MCFG["weak_motors"] +WRIST_MOTORS = _MCFG["wrist_motors"] + + +class ArmController: + def __init__(self, cooldown_sec: float = ACTION_COOLDOWN_SEC): + self._ready = False + self.cooldown_sec = float(cooldown_sec) + self._last_action_time = 0.0 + self.low_state = None + self.crc = CRC() if CRC else None + self._pub = None + self._sub = None + self._client = None + self._is_busy = False + self._init_lock = threading.Lock() + # Guards the check-and-set of _is_busy so two near-simultaneous + # triggers can't both pass the `if self._is_busy` gate and launch + # two overlapping replays on rt/arm_sdk. + self._busy_lock = threading.Lock() + # Arm ⇄ locomotion interlock. While this predicate returns True the + # robot may be walking, so NO arm action may fire (matches the + # motion-block wired onto motion/arm_controller.ArmController in + # main.py). Installed via set_motion_block(); None = no block. + self._motion_block = None + + def set_motion_block(self, predicate): + """Install a no-args predicate; while it returns True every arm + trigger refuses to run (arm ⇄ locomotion mutual exclusion).""" + self._motion_block = predicate + + def _blocked(self) -> bool: + pred = self._motion_block + if pred is None: + return False + try: + return bool(pred()) + except Exception: + return False + + def init(self) -> bool: + with self._init_lock: + if self._ready: + return True + if ChannelFactoryInitialize is None: + return False + try: + # Single-DDS-init rule: the process-wide ChannelFactory is + # owned by motion/arm_controller.ArmController (main.arm), + # initialised on the user-selected NIC in main.arm.init(). + # Only init here if that has NOT happened yet — calling + # ChannelFactoryInitialize a second time would re-init on the + # default interface and is exactly the conflict we must avoid. + _factory_ready = False + try: + from Project.Sanad import main as _sanad_main + _factory_ready = bool( + getattr(getattr(_sanad_main, "arm", None), + "_initialized", False)) + except Exception: + _factory_ready = False + if not _factory_ready: + ChannelFactoryInitialize(0) + self._pub = ChannelPublisher("rt/arm_sdk", LowCmd_) + self._pub.Init() + self._sub = ChannelSubscriber("rt/lowstate", LowState_) + self._sub.Init(self._low_state_handler, 10) + if G1ArmActionClient: + self._client = G1ArmActionClient() + self._client.SetTimeout(10.0) + self._client.Init() + self._ready = True + return True + except Exception: + return False + + def _low_state_handler(self, msg: LowState_): + self.low_state = msg + + def _cooldown_ok(self) -> bool: + now = time.time() + return (now - self._last_action_time) >= self.cooldown_sec + + def _load_home_pose(self): + path = DATA_DIR / "arm_home.jsonl" + try: + last_q = [0.0] * G1_NUM_MOTOR + with open(path, "r", encoding="utf-8") as f: + for line in f: + d = json.loads(line) + if "q" in d: + last_q = d["q"] + return last_q + except Exception: + return [0.0] * G1_NUM_MOTOR + + def _is_pose_stable(self, target_q): + if not self.low_state: + return False + current_q = np.array([self.low_state.motor_state[i].q for i in range(15, 29)]) + target_arm_q = np.array(target_q[15:29]) + diff = np.abs(current_q - target_arm_q) + return np.max(diff) < STABILITY_THRESHOLD + + def _send_frame(self, arm_q, body_lock_q): + if not self._pub: + return + cmd = unitree_hg_msg_dds__LowCmd_() + cmd.motor_cmd[ENABLE_ARM_SDK_INDEX].q = 1.0 + for i in range(G1_NUM_MOTOR): + cmd.motor_cmd[i].mode = 1 + cmd.motor_cmd[i].q = arm_q[i] if i >= 15 else body_lock_q[i] + if i in WEAK_MOTORS: + cmd.motor_cmd[i].kp, cmd.motor_cmd[i].kd = KP_LOW, KD_LOW + elif i in WRIST_MOTORS: + cmd.motor_cmd[i].kp, cmd.motor_cmd[i].kd = KP_WRIST, KD_WRIST + else: + cmd.motor_cmd[i].kp, cmd.motor_cmd[i].kd = KP_HIGH, KD_HIGH + cmd.crc = self.crc.Crc(cmd) + self._pub.Write(cmd) + + def _managed_replay(self, filename: str): + try: + path = DATA_DIR / filename + frames = [] + with open(path, "r", encoding="utf-8") as f: + for line in f: + d = json.loads(line) + if "q" in d: + frames.append(d) + + if not frames or not self.low_state: + return + + body_lock_q = [self.low_state.motor_state[i].q for i in range(G1_NUM_MOTOR)] + home_q = self._load_home_pose() + + # 1. Smooth match to start pose (90 frames ≈ 1.5s — prevents jerk) + start_q = frames[0]["q"] + ramp_in = 90 + for k in range(ramp_in): + alpha = k / ramp_in + interp_q = list(body_lock_q) + for j in range(15, 29): + interp_q[j] = (1 - alpha) * body_lock_q[j] + alpha * start_q[j] + self._send_frame(interp_q, body_lock_q) + time.sleep(1.0 / REPLAY_HZ) + + # 2. Play frames + last_played_q = start_q + for f in frames: + self._send_frame(f["q"], body_lock_q) + last_played_q = f["q"] + time.sleep(1.0 / REPLAY_HZ) + + # 3. Smooth return to home + for k in range(80): + alpha = k / 80 + interp_home_q = list(body_lock_q) + for j in range(15, 29): + interp_home_q[j] = (1 - alpha) * last_played_q[j] + alpha * home_q[j] + self._send_frame(interp_home_q, body_lock_q) + time.sleep(1.0 / REPLAY_HZ) + + # Sensor confirmation + confirm_start = time.time() + while time.time() - confirm_start < 2.0: + if self._is_pose_stable(home_q): + break + time.sleep(0.05) + + finally: + if self._pub: + cmd = unitree_hg_msg_dds__LowCmd_() + cmd.motor_cmd[ENABLE_ARM_SDK_INDEX].q = 0.0 + cmd.crc = self.crc.Crc(cmd) + for _ in range(5): + self._pub.Write(cmd) + time.sleep(0.01) + + with self._busy_lock: + self._is_busy = False + self._last_action_time = time.time() + print("🔓 Ready.") + + def _managed_sdk_action(self, action_name: str): + try: + if self._client and action_name in action_map: + print(f"🤖 SDK START: {action_name}") + self._client.ExecuteAction(action_map.get(action_name)) + time.sleep(3.5) + finally: + with self._busy_lock: + self._is_busy = False + self._last_action_time = time.time() + print("🔓 Ready.") + + def trigger_action_by_id(self, action_id: int): + # Arm ⇄ locomotion interlock — never drive the arms while the robot + # may be walking (manual armed / teleop / recent move/step). + if self._blocked(): + return + if not self.init(): + return + if not self._cooldown_ok(): + return + + opt = OPTION_BY_ID.get(int(action_id)) + if not opt: + return + # Atomic check-and-set so two near-simultaneous triggers can't both + # launch a replay thread on rt/arm_sdk. + with self._busy_lock: + if self._is_busy: + return + self._is_busy = True + if opt.file: + threading.Thread(target=self._managed_replay, args=(opt.file,), daemon=True).start() + elif self._client and opt.name in action_map: + threading.Thread(target=self._managed_sdk_action, args=(opt.name,), daemon=True).start() + else: + with self._busy_lock: + self._is_busy = False + + def trigger_action_by_name(self, action_name: str): + opt = OPTION_BY_NAME.get(action_name.lower()) + if opt: + self.trigger_action_by_id(opt.id) + + +ARM = ArmController() diff --git a/vendor/Sanad/motion/teaching.py b/vendor/Sanad/motion/teaching.py new file mode 100644 index 0000000..16738d7 --- /dev/null +++ b/vendor/Sanad/motion/teaching.py @@ -0,0 +1,275 @@ +"""Teaching mode — safe hold → limp arms → record joint positions. + +Ported from G1_Lootah/Manual_Recorder/g1_teach_v4_stable.py. + +Sequence: + 1. Safe hold (3s): Arms rigid at current pose, waist locked. + 2. Teach phase: Arms go limp (KP=0), user physically moves them. + Joint positions are recorded at 60 Hz. + 3. Return home: Smooth interpolation back to arm_home.jsonl. + 4. Save: Writes JSONL to data/motions/.jsonl. +""" + +from __future__ import annotations + +import json +import os +import tempfile +import threading +import time +from pathlib import Path +from typing import Any + +from Project.Sanad.config import ENABLE_ARM_SDK_INDEX, G1_NUM_MOTOR, MOTIONS_DIR, REPLAY_HZ +from Project.Sanad.core.config_loader import section as _cfg_section +from Project.Sanad.core.event_bus import bus +from Project.Sanad.core.logger import get_logger + +log = get_logger("teaching") + +_T = _cfg_section("motion", "teaching") +SAFE_HOLD_SEC = _T.get("safe_hold_sec", 3.0) +WAIST_KP = _T.get("waist_kp", 60.0) +WAIST_KD = _T.get("waist_kd", 4.0) +HOLD_ARM_KP = _T.get("hold_arm_kp", 60.0) +HOLD_ARM_KD = _T.get("hold_arm_kd", 4.0) +TEACH_ARM_KP = _T.get("teach_arm_kp", 0.0) # limp — no stiffness +TEACH_ARM_KD = _T.get("teach_arm_kd", 2.0) # damping only + +try: + from unitree_sdk2py.idl.unitree_hg.msg.dds_ import LowCmd_ + from unitree_sdk2py.utils.crc import CRC + _HAS_SDK = True +except ImportError: + _HAS_SDK = False + + +class TeachingSession: + """Records a teaching session (one at a time).""" + + def __init__(self, arm_controller): + self._arm = arm_controller + self._lock = threading.Lock() + self._recording = False + self._stop_event = threading.Event() + self._thread: threading.Thread | None = None + self._name = "" + self._frames: list[dict[str, Any]] = [] + self._phase = "idle" # idle | holding | teaching | returning | done + self._started_at = 0.0 + self._finalized = False + self._finalize_lock = threading.Lock() + self._final_result: dict[str, Any] | None = None + + @property + def is_recording(self) -> bool: + return self._recording + + def start(self, name: str, duration_sec: float = 15.0) -> dict[str, Any]: + with self._lock: + if self._recording: + raise RuntimeError("Teaching session already active.") + self._recording = True + self._finalized = False + self._final_result = None + self._name = name + self._frames = [] + self._stop_event.clear() + self._phase = "holding" + self._started_at = time.monotonic() + + self._thread = threading.Thread( + target=self._run, args=(name, duration_sec), daemon=True + ) + self._thread.start() + log.info("Teaching started: %s (%.0fs)", name, duration_sec) + bus.emit_sync("motion.teaching_started", name=name, duration_sec=duration_sec) + return {"recording": True, "name": name, "duration_sec": duration_sec} + + def stop(self) -> dict[str, Any]: + with self._lock: + if not self._recording: + raise RuntimeError("No teaching session active.") + self._stop_event.set() + if self._thread: + self._thread.join(timeout=10.0) + # Finalize is now ALWAYS done by the worker thread (_run). + # If for some reason the worker died without finalizing, do it here. + result = self._finalize() + return result + + def _run(self, name: str, duration_sec: float): + interval = 1.0 / REPLAY_HZ + arm = self._arm + + try: + if _HAS_SDK and arm._initialized: + self._run_hardware(name, duration_sec, interval) + else: + self._run_simulation(name, duration_sec, interval) + except Exception: + log.exception("Teaching session crashed") + finally: + # Always finalize from the worker thread — stop() will see _finalized=True. + self._finalize() + + def _run_hardware(self, name: str, duration_sec: float, interval: float): + """Real hardware teaching: hold → limp → record → home.""" + arm = self._arm + low_cmd = arm._low_cmd + crc = arm._crc + + initial_q = arm._get_current_q() + waist_lock = list(initial_q) + + # Phase 1: Safe hold + self._phase = "holding" + hold_end = time.monotonic() + SAFE_HOLD_SEC + log.info("Safe hold (%.1fs) — arms rigid", SAFE_HOLD_SEC) + + while time.monotonic() < hold_end and not self._stop_event.is_set(): + for i in range(G1_NUM_MOTOR): + low_cmd.motor_cmd[i].mode = 1 + low_cmd.motor_cmd[i].q = initial_q[i] + low_cmd.motor_cmd[i].dq = 0.0 + low_cmd.motor_cmd[i].tau = 0.0 + if i < 15: # body/waist + low_cmd.motor_cmd[i].kp = WAIST_KP + low_cmd.motor_cmd[i].kd = WAIST_KD + else: # arms + low_cmd.motor_cmd[i].kp = HOLD_ARM_KP + low_cmd.motor_cmd[i].kd = HOLD_ARM_KD + low_cmd.motor_cmd[ENABLE_ARM_SDK_INDEX].q = 1.0 + low_cmd.crc = crc.Crc(low_cmd) + arm._arm_pub.Write(low_cmd) + time.sleep(interval) + + if self._stop_event.is_set(): + return + + # Phase 2: Teaching — arms go limp, record + self._phase = "teaching" + log.info("Arms released — move them now! Recording at %d Hz", int(REPLAY_HZ)) + t0 = time.monotonic() + + while not self._stop_event.is_set(): + elapsed = time.monotonic() - t0 + if elapsed >= duration_sec: + break + + # Limp arms, locked waist + current_q = arm._get_current_q() + for i in range(G1_NUM_MOTOR): + low_cmd.motor_cmd[i].mode = 1 + low_cmd.motor_cmd[i].dq = 0.0 + low_cmd.motor_cmd[i].tau = 0.0 + if i < 15: + low_cmd.motor_cmd[i].q = waist_lock[i] + low_cmd.motor_cmd[i].kp = WAIST_KP + low_cmd.motor_cmd[i].kd = WAIST_KD + else: + low_cmd.motor_cmd[i].q = current_q[i] + low_cmd.motor_cmd[i].kp = TEACH_ARM_KP + low_cmd.motor_cmd[i].kd = TEACH_ARM_KD + low_cmd.motor_cmd[ENABLE_ARM_SDK_INDEX].q = 1.0 + low_cmd.crc = crc.Crc(low_cmd) + arm._arm_pub.Write(low_cmd) + + self._frames.append({"t": round(elapsed, 4), "q": current_q}) + time.sleep(interval) + + # Phase 3: Return home + self._phase = "returning" + from Project.Sanad.motion.arm_controller import _load_home_q, _lerp_q + home_q = _load_home_q() or initial_q + last_q = self._frames[-1]["q"] if self._frames else initial_q + + for step in range(180): + t = (step + 1) / 180 + interp = _lerp_q(last_q, home_q, t) + arm._send_frame(interp, waist_lock) + time.sleep(1.0 / REPLAY_HZ) + arm._disable_sdk() + + def _run_simulation(self, name: str, duration_sec: float, interval: float): + """Simulation: just record zero-pose frames for the given duration.""" + self._phase = "holding" + time.sleep(min(SAFE_HOLD_SEC, 1.0)) # shortened in sim + + self._phase = "teaching" + t0 = time.monotonic() + log.info("[SIM] Teaching — recording for %.0fs", duration_sec) + while not self._stop_event.is_set(): + elapsed = time.monotonic() - t0 + if elapsed >= duration_sec: + break + self._frames.append({"t": round(elapsed, 4), "q": [0.0] * G1_NUM_MOTOR}) + time.sleep(interval) + + self._phase = "returning" + time.sleep(0.5) + + def _finalize(self) -> dict[str, Any]: + """Save frames to JSONL and return result. Idempotent — safe to call twice.""" + with self._finalize_lock: + if self._finalized: + return self._final_result or { + "name": self._name, "frames": len(self._frames), + "path": "", "duration_sec": 0, + } + self._phase = "done" + result: dict[str, Any] = {"name": self._name, "frames": len(self._frames)} + + if self._frames: + MOTIONS_DIR.mkdir(parents=True, exist_ok=True) + out_path = MOTIONS_DIR / f"{self._name}.jsonl" + # Atomic write: tempfile + os.replace + content_lines = [ + json.dumps({"meta": {"hz": REPLAY_HZ, "motors": G1_NUM_MOTOR}}), + ] + for frame in self._frames: + content_lines.append(json.dumps(frame)) + content = ("\n".join(content_lines) + "\n").encode("utf-8") + + fd, tmp = tempfile.mkstemp( + prefix=f".{out_path.name}.", suffix=".tmp", + dir=str(out_path.parent), + ) + try: + with os.fdopen(fd, "wb") as f: + f.write(content) + os.replace(tmp, out_path) + except Exception: + try: + os.unlink(tmp) + except OSError: + pass + raise + + duration = self._frames[-1]["t"] if self._frames else 0 + result["path"] = str(out_path) + result["duration_sec"] = round(duration, 2) + result["size_kb"] = round(out_path.stat().st_size / 1024, 1) + log.info("Teaching saved: %s (%d frames, %.1fs)", out_path.name, len(self._frames), duration) + else: + result["path"] = "" + result["duration_sec"] = 0 + + self._finalized = True + self._final_result = result + + with self._lock: + self._recording = False + self._phase = "idle" + bus.emit_sync("motion.teaching_finished", name=result.get("name"), frames=result.get("frames")) + return result + + def status(self) -> dict[str, Any]: + elapsed = time.monotonic() - self._started_at if self._recording else 0 + return { + "recording": self._recording, + "phase": self._phase, + "name": self._name, + "elapsed_sec": round(elapsed, 1), + "frames_recorded": len(self._frames), + } diff --git a/vendor/Sanad/navigation/README.md b/vendor/Sanad/navigation/README.md new file mode 100644 index 0000000..6dc0f4e --- /dev/null +++ b/vendor/Sanad/navigation/README.md @@ -0,0 +1,85 @@ +# SanadV3 Navigation + +Thin HTTP **client** to the external `web_nav3` Nav2 stack. This module owns +**no** ROS2/Nav2 code — it lets SanadV3 (dashboard + voice) drive autonomous +navigation over plain HTTP. If `web_nav3` is down, nav features degrade +gracefully and the rest of SanadV3 keeps running. + +## What this module does + +- `web_nav3_client.py` — `WebNav3Client`, a loose-coupled `requests` client. + **By contract no method ever raises into the caller**: each returns + `{"ok": bool, "error": str|None, ...}` or a `NavStatus`. If `web_nav3` is + unreachable, callers get a clean failure result instead of an exception. +- `NavStatus` — health snapshot from `GET /api/status` (`bringup_alive`, + `rosbridge_alive`, `reachable`, `log_tail`). + +## Architecture + +``` +SanadV3 dashboard (:8000) ─┐ + Navigation tab │ HTTP ┌── Nav2 ──┐ + ├──────────────▶│ web_nav3 │──▶ cmd_vel_loco_bridge ──▶ LocoClient (G1 legs) +SanadV3 voice (Gemini) ──┘ (:8765) └──────────┘ + movement_dispatch.py rosbridge :9090 (live map / TF) +``` + +- SanadV3 plane = Python/asyncio, non-ROS. Dashboard on **:8000**. +- `web_nav3` = standalone FastAPI on **:8765** wrapping ROS2 Nav2 + rosbridge + on **:9090**. It owns SLAM, Nav2, and the `cmd_vel_loco_bridge` that drives + the G1 legs via `LocoClient`. + +## Configure + +Connection is resolved with precedence **env var → dashboard config → default**: + +- `WEB_NAV3_URL` (default `http://127.0.0.1:8765`) — the `web_nav3` FastAPI base. +- `ROSBRIDGE_URL` (default `ws://127.0.0.1:9090`) — live map / TF stream. +- `SANAD_ROBOT_NAME` (default `sanad`) — sent as the `X-Robot-Name` header. + +`config.py` exposes `WEB_NAV3_URL`. `main.py` builds the shared `nav_client` +singleton; `dashboard/routes/navigation.py` builds its own module-level client +(both use the same resolution). A broken nav package never blocks the dashboard. + +## Dashboard Navigation tab + +Backend proxy lives under `/api/nav/*` (prefix applied in `dashboard/app.py`). +The "Navigation" SPA tab lists saved **places** and **missions**, sends goto / +cancel, saves the current pose, and embeds the live `web_nav3` map iframe from +the robot at `:8765`. When the client is unavailable, status returns +`{"available": false}` and action endpoints return 503. + +## API endpoints (`/api/nav/*`) + +| Method | Path | Action | +|--------|------------------|------------------------------------------| +| GET | `/status` | health; `{available:false}` if degraded | +| GET | `/config` | web_nav3 / rosbridge URLs + robot name | +| GET | `/places` | list saved places | +| POST | `/goto` | navigate to a saved place by `name` | +| POST | `/cancel` | best-effort cancel (stops bringup) | +| POST | `/save_here` | save current pose as a named place | +| GET | `/maps` | list maps | +| GET | `/missions` | list missions | +| POST | `/missions/run` | run a saved mission by `id` | + +## NEXT STEPS + +1. **Voice bridge (not yet wired).** `voice/movement_dispatch.py` currently + drives discrete `loco_controller` steps only. Add a path so destination + phrases ("go to the lobby" / "اذهب إلى الردهة") map to `nav_client.goto()` + instead of stepping. Keep it gated on the existing + `recognition_state.movement_enabled` toggle. + +2. **CRITICAL — LocoClient arbitration (prerequisite, do before #1).** + `web_nav3`'s `cmd_vel_loco_bridge` and SanadV3's `loco_controller` **must + never drive `LocoClient` simultaneously** — two velocity sources to the G1 + legs at once is unsafe. Only ONE may hold the legs. Before enabling + voice-driven autonomous nav, build a hand-off: when a Nav2 goto is active, + `loco_controller` must release / be disarmed, and vice versa (fail-closed). + No `goto()` voice wiring lands until this interlock exists. + +3. **Single DDS participant ordering.** SanadV3 and `web_nav3` share one Unitree + DDS domain on the G1. Initialize the DDS channel factory **exactly once**, + before any consumer. Decide startup order (whoever owns `LocoClient` inits + first) and ensure the other side never re-inits the participant. diff --git a/vendor/Sanad/navigation/__init__.py b/vendor/Sanad/navigation/__init__.py new file mode 100644 index 0000000..e2b4a2d --- /dev/null +++ b/vendor/Sanad/navigation/__init__.py @@ -0,0 +1,10 @@ +"""SanadV3 navigation subsystem — thin client to the web_nav3 Nav2 stack. + +web_nav3 runs as its own service (FastAPI on :8765 + rosbridge on :9090). +This module lets SanadV3 (dashboard + voice) drive it without owning any +ROS2/Nav2 code. Loose coupling: if web_nav3 is down, nav features degrade +gracefully and the rest of SanadV3 is unaffected. +""" +from .web_nav3_client import WebNav3Client, NavStatus + +__all__ = ["WebNav3Client", "NavStatus"] diff --git a/vendor/Sanad/navigation/goal_monitor.py b/vendor/Sanad/navigation/goal_monitor.py new file mode 100644 index 0000000..1856f04 --- /dev/null +++ b/vendor/Sanad/navigation/goal_monitor.py @@ -0,0 +1,352 @@ +"""Nav2 goal-status monitor — arrival/failure feedback for Gemini voice nav. + +web_nav3's ``goto()`` is fire-and-forget: it publishes a goal and returns at +once, with no completion event. To let Gemini tell the user the truth ("we've +arrived" / "I couldn't get there") instead of guessing, we watch the Nav2 +action status over rosbridge and, on a terminal status, push a +``[NAV ARRIVED]`` / ``[NAV FAILED]`` note to the Gemini child (via +``live_sub.send_state``) and release the nav arbiter so the legs free up. + +Design +------ +* One background daemon thread runs an asyncio loop holding a persistent + rosbridge websocket subscription to ``/navigate_to_pose/_action/status`` + (``action_msgs/msg/GoalStatusArray``). +* ``arm_goal(place)`` marks a pending destination. A goal that is ACCEPTED/ + EXECUTING and not already-terminal is latched as "ours"; its terminal status + fires feedback. A CANCELED of the latched goal while another goal is active + is treated as a preemption (re-latch, don't fire). Re-arming supersedes. +* A watchdog fails the goal after ``SANAD_NAV_GOAL_TIMEOUT_S`` so the arbiter + is ALWAYS released even if rosbridge/websockets is unavailable. +* ``request_cancel()`` sends a real rosbridge action CancelGoal (cancel-all) so + "stop" actually stops Nav2 — not just an arbiter release. + +Everything is best-effort: the drive already succeeded by the time we arm, so +any monitor failure simply means no spoken feedback — never a crash. +""" +from __future__ import annotations + +import asyncio +import base64 +import json +import os +import threading +import time +from typing import Any, List, Optional + +from Project.Sanad.core.logger import get_logger + +log = get_logger("goal_monitor") + +# Capture the arbiter at import so releasing the legs never depends on a lazy +# import succeeding inside the fire path (a missed release locks the legs). +try: + from Project.Sanad.dashboard.routes import _arbiter as _ARBITER +except Exception: # noqa: BLE001 + _ARBITER = None + +_ROSBRIDGE_URL = ( + os.environ.get("SANAD_ROSBRIDGE_URL") + or os.environ.get("ROSBRIDGE_URL") + or "ws://127.0.0.1:9090" +) +_STATUS_TOPIC = os.environ.get( + "SANAD_NAV_STATUS_TOPIC", "/navigate_to_pose/_action/status" +) +_STATUS_TYPE = "action_msgs/msg/GoalStatusArray" +_CANCEL_SERVICE = os.environ.get( + "SANAD_NAV_CANCEL_SERVICE", "/navigate_to_pose/_action/cancel_goal" +) +_GOAL_TIMEOUT_S = float(os.environ.get("SANAD_NAV_GOAL_TIMEOUT_S", "240")) + +# action_msgs/msg/GoalStatus.status +_ACCEPTED, _EXECUTING = 1, 2 +_SUCCEEDED, _CANCELED, _ABORTED = 4, 5, 6 +_TERMINAL = {_SUCCEEDED, _CANCELED, _ABORTED} + + +def _uuid_of(status: dict) -> Any: + """Canonical, encoding-independent key for a goal id. + + rosbridge may serialize uint8[16] as a base64 string OR an int list + depending on its png/cbor config; normalize both to a tuple of ints so the + ACCEPTED frame and the terminal frame compare equal even if the bridge + switches representation mid-session.""" + gid = ((status.get("goal_info") or {}).get("goal_id") or {}) + u = gid.get("uuid") + if isinstance(u, str): + try: + return tuple(base64.b64decode(u)) + except Exception: + return u + if isinstance(u, list): + try: + return tuple(int(x) for x in u) + except Exception: + return tuple(u) + return None + + +class _GoalMonitor: + def __init__(self) -> None: + self._lock = threading.Lock() + self._active = False + self._place: Optional[str] = None + self._armed_at = 0.0 + self._latched: Any = None + self._ignore: set = set() # uuids seen terminal — never latch + self._thread: Optional[threading.Thread] = None + + # ── public ─────────────────────────────────────────────── + def arm(self, place: str) -> None: + spawn = False + with self._lock: + self._active = True + self._place = place + self._armed_at = time.time() + self._latched = None + self._ignore = set() + if self._thread is None: + self._thread = threading.Thread( + target=self._run, daemon=True, name="nav-goal-monitor") + t = self._thread + spawn = True + if spawn: + t.start() + log.info("armed goal monitor for '%s'", place) + + def disarm(self) -> None: + with self._lock: + self._active = False + self._latched = None + self._place = None + + # ── thread / loop ──────────────────────────────────────── + def _run(self) -> None: + # Loop so that a new arm() arriving exactly as a session ends keeps the + # monitor alive. The decision to exit is made under the lock together + # with clearing _thread, so arm()'s "spawn only if _thread is None" can + # never strand an active goal with no live thread. + while True: + try: + asyncio.run(self._serve()) + except Exception as exc: # noqa: BLE001 + log.warning("goal monitor loop ended: %s", exc) + with self._lock: + if not self._active: + self._thread = None + return + # still active → a fresh goal arrived; serve again + + async def _serve(self) -> None: + try: + import websockets # local import — dashboard env only + except Exception as exc: # noqa: BLE001 + # No ws client → honor the timeout so the arbiter is still released. + log.warning("websockets unavailable — nav feedback via timeout only: %s", exc) + while True: + await asyncio.sleep(min(5.0, _GOAL_TIMEOUT_S)) + if self._check_timeout(): + return + with self._lock: + if not self._active: + return + while True: + try: + await self._listen(websockets) + except Exception as exc: # noqa: BLE001 + log.debug("rosbridge listen error: %s", exc) + if self._check_timeout(): + return + with self._lock: + if not self._active: + return + await asyncio.sleep(3.0) + + async def _listen(self, websockets: Any) -> None: + async with websockets.connect( + _ROSBRIDGE_URL, ping_interval=20, ping_timeout=20 + ) as ws: + await ws.send(json.dumps({ + "op": "subscribe", "topic": _STATUS_TOPIC, + "type": _STATUS_TYPE, "throttle_rate": 0, "queue_length": 1, + })) + log.info("goal monitor subscribed %s", _STATUS_TOPIC) + while True: + try: + raw = await asyncio.wait_for(ws.recv(), timeout=5.0) + except asyncio.TimeoutError: + if self._check_timeout(): + return + with self._lock: + if not self._active: + return + continue + try: + data = json.loads(raw) + except Exception: + continue + if data.get("op") != "publish": + continue + msg = data.get("msg") or {} + self._on_status(msg.get("status_list") or []) + if self._check_timeout(): + return + with self._lock: + if not self._active: + return + + # ── status handling ────────────────────────────────────── + def _on_status(self, status_list: List[dict]) -> None: + fire: Optional[tuple] = None + with self._lock: + if not self._active: + return + states = {} # uuid -> status (this frame) + for s in status_list: + u = _uuid_of(s) + st = s.get("status") + if u is None: + continue + states[u] = st + if st in _TERMINAL: + self._ignore.add(u) # prior/other goals — never ours + # Latch a genuinely-active, non-ignored goal as ours. + if self._latched is None: + cand = [u for u, st in states.items() + if st in (_ACCEPTED, _EXECUTING) and u not in self._ignore] + if cand: + self._latched = cand[-1] # newest Nav2 entry + # Terminal for the latched goal? + if self._latched is not None: + st = states.get(self._latched) + if st in _TERMINAL: + if st == _CANCELED: + # A CANCELED latch while another goal is active is a + # preemption (a newer goal replaced ours) — re-latch + # instead of falsely reporting we stopped. + others = [u for u, s2 in states.items() + if s2 in (_ACCEPTED, _EXECUTING) + and u not in self._ignore and u != self._latched] + if others: + self._ignore.add(self._latched) + self._latched = others[-1] + st = None + if st is not None and st in _TERMINAL: + fire = (self._place, st) + self._active = False + self._latched = None + self._place = None + if fire: + self._fire(*fire) + + def _check_timeout(self) -> bool: + fire: Optional[tuple] = None + with self._lock: + if self._active and (time.time() - self._armed_at) > _GOAL_TIMEOUT_S: + fire = (self._place, "timeout") + self._active = False + self._latched = None + self._place = None + if fire: + self._fire(*fire) + return True + return False + + # ── feedback + arbiter release ─────────────────────────── + def _fire(self, place: Optional[str], status: Any) -> None: + place = place or "the destination" + if status == _SUCCEEDED: + event = "nav_arrived" + cmd = (f"You have arrived at '{place}'. Briefly tell the user " + "you've arrived, in your normal Khaleeji style.") + elif status == _CANCELED: + event = "nav_canceled" + cmd = (f"Navigation to '{place}' was canceled. Briefly acknowledge " + "it if relevant.") + elif status == "timeout": + event = "nav_failed" + cmd = (f"You could not confirm reaching '{place}' — it is taking " + "too long or the path is blocked. Briefly tell the user you " + "couldn't get there.") + else: # ABORTED / unknown + event = "nav_failed" + cmd = (f"You could NOT reach '{place}' — the path was blocked or " + "planning failed. Briefly apologise and say you couldn't " + "get there.") + log.info("goal terminal: place=%s status=%s → %s", place, status, event) + # Free the legs first (a missed release locks them — make it loud). + arb = _ARBITER + if arb is None: + try: + from Project.Sanad.dashboard.routes import _arbiter as arb + except Exception: # noqa: BLE001 + arb = None + if arb is not None: + try: + arb.release_nav() + except Exception as exc: # noqa: BLE001 + log.error("release_nav failed after %s: %s", event, exc) + else: + log.error("arbiter unavailable — could not release nav after %s", event) + # Tell Gemini (via the supervisor's stdin push to the child). + try: + from Project.Sanad.main import live_sub + if live_sub is not None and hasattr(live_sub, "send_state"): + live_sub.send_state(event, cmd) + except Exception as exc: # noqa: BLE001 + log.debug("nav feedback inject failed: %s", exc) + + +_MON = _GoalMonitor() + + +def arm_goal(place: str) -> None: + """Begin watching for the arrival/failure of a goal driving to ``place``.""" + try: + _MON.arm(place) + except Exception as exc: # noqa: BLE001 + log.debug("arm_goal failed: %s", exc) + + +def disarm() -> None: + """Stop watching the current goal (e.g. on an explicit cancel).""" + try: + _MON.disarm() + except Exception as exc: # noqa: BLE001 + log.debug("disarm failed: %s", exc) + + +async def _cancel_once() -> bool: + try: + import websockets + except Exception: # noqa: BLE001 + return False + try: + async with websockets.connect(_ROSBRIDGE_URL, ping_interval=None) as ws: + # Zero goal_id + zero stamp == cancel ALL goals (CancelGoal convention). + await ws.send(json.dumps({ + "op": "call_service", + "service": _CANCEL_SERVICE, + "type": "action_msgs/srv/CancelGoal", + "args": {"goal_info": {"goal_id": {"uuid": [0] * 16}, + "stamp": {"sec": 0, "nanosec": 0}}}, + })) + try: + await asyncio.wait_for(ws.recv(), timeout=3.0) + except Exception: + pass + return True + except Exception as exc: # noqa: BLE001 + log.debug("cancel ws failed: %s", exc) + return False + + +def request_cancel() -> bool: + """Send a real Nav2 action CancelGoal (cancel-all) over rosbridge so the + robot actually stops. Blocking; call via asyncio.to_thread. Best-effort.""" + try: + return asyncio.run(_cancel_once()) + except Exception as exc: # noqa: BLE001 + log.debug("request_cancel failed: %s", exc) + return False diff --git a/vendor/Sanad/navigation/web_nav3_client.py b/vendor/Sanad/navigation/web_nav3_client.py new file mode 100644 index 0000000..703749b --- /dev/null +++ b/vendor/Sanad/navigation/web_nav3_client.py @@ -0,0 +1,375 @@ +"""Thin HTTP client to the web_nav3 Nav2 stack for SanadV3. + +web_nav3 exposes a FastAPI surface (default http://127.0.0.1:8765) that wraps +a ROS2/Nav2 + rosbridge bringup. SanadV3 talks to it over plain HTTP so it +never has to import ROS2 itself. + +Design contract: NO method here ever raises into the caller. Every public +method returns either a clean dict {"ok": bool, "error": str|None, ...} or a +NavStatus. If web_nav3 is unreachable/down, callers (dashboard, voice) get a +graceful failure result and keep running. +""" +from __future__ import annotations + +import logging +import threading +from dataclasses import dataclass +from typing import Any, Dict, List, Optional + +import requests + +logger = logging.getLogger("sanadv3.navigation") + + +@dataclass +class NavStatus: + """Snapshot of web_nav3 health (from GET /api/status).""" + + bringup_alive: bool = False + rosbridge_alive: bool = False + reachable: bool = False + error: Optional[str] = None + log_tail: str = "" + # What the single bringup is doing right now (from /api/status): + # mode 1 fresh / 2 continue / 3 localize / None idle. + mode: Optional[int] = None + active_map: Optional[str] = None + mode_label: str = "IDLE" + mapping: bool = False + localizing: bool = False + + def as_dict(self) -> Dict[str, Any]: + return { + "bringup_alive": self.bringup_alive, + "rosbridge_alive": self.rosbridge_alive, + "reachable": self.reachable, + "error": self.error, + "log_tail": self.log_tail, + "mode": self.mode, + "active_map": self.active_map, + "mode_label": self.mode_label, + "mapping": self.mapping, + "localizing": self.localizing, + } + + +class WebNav3Client: + """Loosely-coupled HTTP client to web_nav3. + + Args: + base_url: web_nav3 FastAPI base, e.g. "http://127.0.0.1:8765". + robot: robot name, sent as X-Robot-Name header on every request. + timeout: default per-request timeout (seconds). + """ + + def __init__( + self, + base_url: str = "http://127.0.0.1:8765", + robot: str = "sanad", + timeout: float = 3.0, + ) -> None: + self.base_url = base_url.rstrip("/") + self.robot = robot + self.timeout = float(timeout) + self._session = requests.Session() + # requests.Session is not guaranteed thread-safe, but route handlers + # call us from asyncio.to_thread workers (multiple tabs / overlapping + # status-poll + goto). Serialize Session access with a lock. + self._session_lock = threading.Lock() + + # ------------------------------------------------------------------ # + # internals + # ------------------------------------------------------------------ # + def _headers(self) -> Dict[str, str]: + return {"X-Robot-Name": self.robot} + + def _url(self, path: str) -> str: + if not path.startswith("/"): + path = "/" + path + return self.base_url + path + + def _request( + self, + method: str, + path: str, + *, + params: Optional[Dict[str, Any]] = None, + json_body: Optional[Dict[str, Any]] = None, + timeout: Optional[float] = None, + ) -> Dict[str, Any]: + """Perform a request, always returning {"ok":bool, ...}; never raises.""" + url = self._url(path) + try: + with self._session_lock: + resp = self._session.request( + method, + url, + params=params, + json=json_body, + headers=self._headers(), + timeout=timeout if timeout is not None else self.timeout, + ) + except requests.RequestException as exc: + logger.debug("web_nav3 %s %s failed: %s", method, path, exc) + return {"ok": False, "error": f"{type(exc).__name__}: {exc}"} + + # Try to decode a JSON body either way (errors often carry detail). + body: Any = None + try: + body = resp.json() + except ValueError: + body = resp.text + + if not resp.ok: + detail = body + if isinstance(body, dict): + detail = body.get("detail") or body.get("error") or body + return { + "ok": False, + "error": f"HTTP {resp.status_code}: {detail}", + "status_code": resp.status_code, + "data": body, + } + + return {"ok": True, "error": None, "data": body, "status_code": resp.status_code} + + # ------------------------------------------------------------------ # + # health + # ------------------------------------------------------------------ # + def reachable(self) -> bool: + """Quick liveness check: short-timeout GET /api/status. + + Reachable means web_nav3 ANSWERED — any HTTP response (including a 5xx + from a running-but-erroring backend) counts. Only a transport failure + (connection refused / timeout) means unreachable. + """ + try: + with self._session_lock: + self._session.get( + self._url("/api/status"), + headers=self._headers(), + timeout=min(self.timeout, 1.5), + ) + return True + except requests.RequestException: + return False + + def status(self) -> NavStatus: + """Poll GET /api/status; returns a NavStatus (never raises).""" + res = self._request("GET", "/api/status") + if not res["ok"]: + # A 'status_code' key means web_nav3 answered with an HTTP error + # (e.g. 500) — it IS reachable, just erroring. Only a transport + # failure (no status_code) is genuinely unreachable. + answered = "status_code" in res + log_tail = "" + body = res.get("data") + if answered and isinstance(body, dict): + log_tail = str(body.get("log_tail", "") or "") + return NavStatus(reachable=answered, error=res["error"], log_tail=log_tail) + data = res.get("data") or {} + if not isinstance(data, dict): + return NavStatus(reachable=True, error="unexpected status payload") + return NavStatus( + bringup_alive=bool(data.get("bringup_alive", False)), + rosbridge_alive=bool(data.get("rosbridge_alive", False)), + reachable=True, + error=None, + log_tail=str(data.get("log_tail", "") or ""), + mode=data.get("mode"), + active_map=data.get("active_map"), + mode_label=str(data.get("mode_label", "IDLE") or "IDLE"), + mapping=bool(data.get("mapping", False)), + localizing=bool(data.get("localizing", False)), + ) + + # ------------------------------------------------------------------ # + # bringup lifecycle + # ------------------------------------------------------------------ # + def start(self, mode: int, db_path: Optional[str] = None) -> Dict[str, Any]: + """POST /api/start. mode 1=fresh SLAM, 2/3=localization (need db_path).""" + body: Dict[str, Any] = {"mode": int(mode), "robot": self.robot} + if db_path is not None: + body["db_path"] = db_path + return self._request("POST", "/api/start", json_body=body, timeout=15.0) + + def stop(self) -> Dict[str, Any]: + """POST /api/stop — SIGINT the running bringup process group.""" + return self._request("POST", "/api/stop", timeout=15.0) + + def load_map(self, db_path: str) -> Dict[str, Any]: + """POST /api/load_map — VIEW a saved map: stop any running bringup, then + start LOCALIZE-only (mode 3) against db_path. Use this for 'Load & View' + so it actually switches to the chosen map instead of attaching to a + mapping session already in progress.""" + return self._request("POST", "/api/load_map", + json_body={"db_path": db_path}, timeout=25.0) + + def shutdown(self) -> Dict[str, Any]: + """POST /api/shutdown — kill ALL registered web_nav3 processes.""" + return self._request("POST", "/api/shutdown", timeout=15.0) + + # ------------------------------------------------------------------ # + # places / navigation + # ------------------------------------------------------------------ # + def list_places(self, map_name: Optional[str] = None) -> List[Dict[str, Any]]: + """GET /api/places. Per-MAP when map_name given (each map keeps its own + places); else the legacy per-robot store. Returns a list (empty on error).""" + params: Dict[str, Any] = {"robot": self.robot} + if map_name: + params["map"] = map_name + res = self._request("GET", "/api/places", params=params) + if not res["ok"]: + logger.debug("list_places failed: %s", res["error"]) + return [] + data = res.get("data") + return data if isinstance(data, list) else [] + + def goto(self, name: str) -> Dict[str, Any]: + """POST /api/places/goto?name=... — THE primary nav command. + + Navigate to a saved place by name on the currently-running bringup. + """ + # Backend runs `timeout 5 ros2 topic pub --once` inside subprocess.run( + # timeout=12); on a Jetson the rclpy cold-start routinely exceeds the + # default 3s read-timeout, so override to match the backend's cap. + res = self._request("POST", "/api/places/goto", params={"name": name}, timeout=12.0) + if res["ok"]: + return {"ok": True, "error": None, "name": name, "data": res.get("data")} + return {"ok": False, "error": res["error"], "name": name} + + def cancel(self) -> Dict[str, Any]: + """Report that server-side goal-cancel is NOT available. + + web_nav3 has no explicit goal-cancel HTTP endpoint, so a true + per-goal cancel must be performed client-side (browser) over rosbridge + by sending a CancelGoal to the /navigate_to_pose action server. That + keeps Nav2/SLAM/bridges alive — only the in-flight goal aborts. + + Server-side there is nothing safe to do here (we must NOT call stop(), + which SIGINTs the whole bringup process group). Previously this returned + ok:True, which made the dashboard's rosbridge-down fallback toast + "cancel sent" while the robot kept driving. Return ok:False with an + explicit reason so callers never mistake this for a real cancel — the + only server-side way to halt motion is stop() (tears down bringup). + """ + return { + "ok": False, + "error": "cancel must be done client-side via rosbridge", + "note": "no server-side goal-cancel; use stop() to tear down bringup", + } + + def save_here(self, name: str) -> Dict[str, Any]: + """Save the robot's CURRENT pose as a named place. + + POST /api/places/save_here?name=... — web_nav3 reads the current TF + pose (requires bringup running; backend takes ``name`` as a query arg). + + CONFIG COUPLING: backend save_here writes the LEGACY places store + (~/marcus_nav2_test/places.json) regardless of robot, while save_at / + list_places are robot-scoped and only resolve to that legacy file when + web_nav3's robot_config.yaml ``robot_name`` equals this client's robot + (``self.robot``). For all three to share one store, web_nav3's + robot_name MUST match SANAD_ROBOT_NAME (default 'sanad'); if they drift, + 'Save here' and click-to-add places silently target different files. + """ + # Same ros2-pub cold-start as goto(): override the 3s default so a slow + # (but successful) publish isn't reported as a failure. + return self._request( + "POST", + "/api/places/save_here", + params={"robot": self.robot, "name": name}, + timeout=12.0, + ) + + def save_at(self, name: str, x: float, y: float, yaw: float, + map_name: Optional[str] = None) -> Dict[str, Any]: + """Save a named place at an explicit (x, y, yaw) map pose. + + POST /api/places/save_at — writes straight to places.json, so it works + even with no bringup running (used by click-to-add-place on the map). + Per-MAP when map_name given. Re-saving an existing name MOVES the place. + """ + params: Dict[str, Any] = {"robot": self.robot} + if map_name: + params["map"] = map_name + return self._request( + "POST", + "/api/places/save_at", + params=params, + json_body={ + "name": name, + "x": float(x), + "y": float(y), + "yaw": float(yaw), + }, + ) + + def delete_place(self, name: str, map_name: Optional[str] = None) -> Dict[str, Any]: + """DELETE /api/places?name=... — remove a saved place (per-map).""" + params: Dict[str, Any] = {"robot": self.robot, "name": name} + if map_name: + params["map"] = map_name + return self._request("DELETE", "/api/places", params=params) + + def rename_place(self, old: str, new: str, + map_name: Optional[str] = None) -> Dict[str, Any]: + """POST /api/places/rename — rename a saved place (per-map).""" + params: Dict[str, Any] = {"robot": self.robot} + if map_name: + params["map"] = map_name + return self._request( + "POST", "/api/places/rename", params=params, + json_body={"old": old, "new": new}, + ) + + # ------------------------------------------------------------------ # + # map editor — per-map occupancy edit overlay + # ------------------------------------------------------------------ # + def get_map_edits(self, map_name: str) -> Dict[str, Any]: + """GET /api/map_edits?map=... — the saved edit overlay (erased points + + painted walls) for a map. Returns {ok, edits:[[wx,wy,v],...]}.""" + res = self._request("GET", "/api/map_edits", + params={"robot": self.robot, "map": map_name}) + if not res["ok"]: + return {"ok": False, "error": res["error"], "edits": []} + data = res.get("data") or {} + return {"ok": True, "edits": data.get("edits", []) if isinstance(data, dict) else []} + + def save_map_edits(self, map_name: str, edits: list) -> Dict[str, Any]: + """POST /api/map_edits?map=... — persist the edit overlay for a map. + edits = list of [world_x, world_y, value] (0=free/erase, 100=wall).""" + return self._request("POST", "/api/map_edits", + params={"robot": self.robot, "map": map_name}, + json_body={"edits": edits}) + + # ------------------------------------------------------------------ # + # maps / missions + # ------------------------------------------------------------------ # + def list_maps(self) -> List[Dict[str, Any]]: + """GET /api/maps. Returns a list (empty on any error).""" + res = self._request("GET", "/api/maps", params={"robot": self.robot}) + if not res["ok"]: + logger.debug("list_maps failed: %s", res["error"]) + return [] + data = res.get("data") + return data if isinstance(data, list) else [] + + def list_missions(self) -> List[Dict[str, Any]]: + """GET /api/missions. Returns a list (empty on any error).""" + res = self._request("GET", "/api/missions", params={"robot": self.robot}) + if not res["ok"]: + logger.debug("list_missions failed: %s", res["error"]) + return [] + data = res.get("data") + return data if isinstance(data, list) else [] + + def run_mission(self, mission_id: Any) -> Dict[str, Any]: + """POST /api/missions/run — start a saved mission by id.""" + return self._request( + "POST", + "/api/missions/run", + params={"id": mission_id}, + json_body={"id": mission_id, "robot": self.robot}, + timeout=15.0, + ) diff --git a/vendor/Sanad/note.txt b/vendor/Sanad/note.txt new file mode 100644 index 0000000..6619b81 --- /dev/null +++ b/vendor/Sanad/note.txt @@ -0,0 +1,12 @@ +N2 — Gemini-phrase movement via direct LocoClient · effort L (highest risk) +This is the Marcus phrase-confirmation pattern, but driven by direct LocoClient on the robot (no Holosoma/ZMQ, no laptop). The full chain: + +Gemini speaks a confirmation phrase → parent reads it → matches → LocoClient.Move(). + +Where LocoClient lives → the parent process, next to the arm. main.py:425 already calls ChannelFactoryInitialize once for the arm; the new LocoController reuses that same DDS participant (no second init). Port init_loco/move_cmd/stop_only from G1_Lootah/Controller/g1_mode_controller.py, plus a non-interactive FSM-200 bring-up adapted from hanger_boot_sequence.py (strip its own ChannelFactoryInitialize and the blocking input()). +The dispatch loop (voice/movement_loop.py) mirrors voice/live_voice_loop.py but polls the new bot_transcript, gated on a movement_enabled flag (default OFF). The matcher (voice/bot_dispatch.py) ports Marcus's _dispatch_gemini_bot: strip [STATE-]/quoted/question clauses → normalize numbers + Arabic→English → parametric-regex scan → longest-needle bot-phrase scan → dedup/cooldown. Vocabulary comes from a ported instruction.json. +Canonical → velocity (Marcus's MOVE_MAP ports 1:1 since LocoClient.Move uses the same m/s, rad/s units): forward→Move(0.3,0,0), backward→Move(-0.2,0,0), turn-left→Move(0,0,0.3), turn-right→Move(0,0,-0.3), stop→StopMove(). +Toggle in the Voice & Audio tab (#tab-voice): add movement_enabled to recognition_state.py, a dashboard/routes/movement.py route (clone the face-rec toggle), and a switch in the UI. +Announce on toggle: _announce_movement_state in script.py (clone _announce_facerec_state) — robot says "movement enabled" / "movement disabled." +Persona rules: append Marcus-style rules to Sanad's prompt (wake-word + action required; clarification questions must not contain motion verbs) so Gemini reliably emits the confirmation phrases. +🛑 Safety is mandatory, not optional: Move(continous_move=True) walks until StopMove. Needs a FIFO worker with stop-priority, a per-motion watchdog that forces StopMove after a max duration, velocity caps + deadband, and StopMove on both disable-toggle and shutdown. Sanad has no obstacle/abort stack today. \ No newline at end of file diff --git a/vendor/Sanad/requirements.txt b/vendor/Sanad/requirements.txt new file mode 100644 index 0000000..ab563c4 --- /dev/null +++ b/vendor/Sanad/requirements.txt @@ -0,0 +1,43 @@ +# Sanad — Python dependencies +# Install: pip install -r requirements.txt + +# Dashboard +fastapi>=0.110.0 +uvicorn[standard]>=0.29.0 +python-multipart>=0.0.9 + +# Gemini voice +# google-genai: the Gemini Live SDK — used by gemini/script.py (live brain) +# and gemini/client.py. Needs Python 3.10+, which is why the voice loop +# runs in the gemini_sdk conda env. send_realtime_input(video=)/(text=) +# and send_client_content() require a reasonably recent (>=1.x) release. +google-genai>=1.0.0 +websockets>=12.0 +pyaudio>=0.2.13 + +# Recognition (camera vision + face gallery for Gemini-side face recognition) +# opencv-python-headless: JPEG encode + USB-camera fallback. Headless wheel — +# the dashboard renders frames; we never need a GUI window. +# Pillow: resize face samples before the Gemini primer turn. +opencv-python-headless>=4.8.0 +Pillow>=10.0.0 +# +# pyrealsense2 — DO NOT `pip install` on Jetson / JetPack 5. +# The PyPI wheel is built against glibc 2.32+ (Ubuntu 22.04); JetPack 5 ships +# glibc 2.31, so the wheel fails to load with: +# ImportError: ... version `GLIBC_2.32' not found +# On Jetson, build the Python binding from source against the apt-installed +# librealsense2 runtime (see README → "Camera vision on Jetson"). +# On x86_64 / Ubuntu 22.04+ desktops, `pip install pyrealsense2` works fine. +# If pyrealsense2 is absent, CameraDaemon falls back to cv2.VideoCapture(0). +# pyrealsense2>=2.50.0 # intentionally commented — see note above + +# Local TTS (optional — only needed for MBZUAI model) +transformers>=4.40.0 +sentencepiece>=0.2.0 +torch>=2.2.0 +datasets>=2.19.0 +soundfile>=0.12.0 + +# General +numpy>=1.24.0 diff --git a/vendor/Sanad/scripts/sanad_arm.txt b/vendor/Sanad/scripts/sanad_arm.txt new file mode 100644 index 0000000..916c164 --- /dev/null +++ b/vendor/Sanad/scripts/sanad_arm.txt @@ -0,0 +1,724 @@ +# ================================================== +# 📄 sanad_arm.txt +# قاموس سند الشامل (لهجات عربية + تعرف أعمق + أخطاء شائعة) +# ملاحظة: +# - خففنا الكلمات "العامة جدًا" قدر الإمكان لتقليل التفعيل بالغلط. +# - بدون تشكيل لزيادة دقة المطابقة البرمجية. +# - IDs محفوظة كما هي في OPTION_LIST (0-28). +# ================================================== + +# ===================== +# Release arm (id=0) — stop/neutral / lower hands +# ===================== +WAKE_PHRASES_release_arm = { + # أوامر إيقاف عامة + "وقف", + "وقف بس", + "وقف الحين", + "وقف الحركة", + "وقف هالحركة", + "وقف يا سند", + "وقف يا بوسنده", + "وقف يا بوسنيده", + "وقف كذا", + "وقف خلاص", + + # خلاص + "خلاص", + "خلاص بس", + "خلاص الحين", + "تمام خلاص", + "تمام بس", + "بس خلاص", + "خلاص يا سند", + "خلاص يا بوسنده", + + # راحة / استرخاء + "ارتح", + "ارتاح", + "استريح", + "ريح", + "ريح يدك", + "ريح ايدك", + "ريح يدينك", + "ريح ايدينك", + "ارتح يا سند", + "استريح يا سند", + + # تحرير + "حرر اليد", + "حرر الذراع", + "حرر يدك", + "حرر ايدك", + "حرر يدينك", + "حرر ايدينك", + "فك يدك", + "فك ايدك", + "فك يدينك", + "فك ايدينك", + "فك الذراع", + "فك ايديك", + + # رجوع للوضع الطبيعي + "ارجع طبيعي", + "ارجع وضع طبيعي", + "رجع طبيعي", + "وضع طبيعي", + "نيوترال", + "محايد", + "هوم", + "ارجع هوم", + "ارجع للوضع", + "رجع للوضع", + + # إنزال اليد/الأيدي (لهجات + شدّة) + "نزل", + "نزل يدك", + "نزل ايدك", + "نزل يدينك", + "نزل ايدينك", + "نزل يدك تحت", + "نزل ايدك تحت", + "نزل يدينك تحت", + "نزل ايدينك تحت", + "نزل ايدك لتحت", + "نزل يدك لتحت", + + "نزّل", + "نزّل يدك", + "نزّل ايدك", + "نزّل يدينك", + "نزّل ايدينك", + + "انزل", + "انزل يدك", + "انزل ايدك", + "انزل يدينك", + "انزل ايدينك", + + # English (added) + "stop", + "stop now", + "stop moving", + "enough", + "relax", + "rest", + "neutral", + "home", + "go home", + "hands down", + "lower your hands", + "lower your arms", + "arms down", + "release", + "stand down", +} +# ===================== +# Shake hand (id=1) — formal greeting (SDK) +# ===================== +WAKE_PHRASES_shake_hand = { + # تحية دينية + "السلام عليكم", + "سلام عليكم", + "السلام عليكم ورحمة الله", + "السلام عليكم ورحمة الله وبركاته", + + # مع الاسم + "السلام عليكم يا سند", + "السلام عليكم سند", + "السلام عليكم يا بوسنده", + "السلام عليكم بوسنده", + "السلام عليكم يا بوسنيده", + "السلام عليكم بوسنيده", + + # طلب مصافحة + "صافحني", + "صافحني يا سند", + "صافحني يا بوسنده", + "صافحني يا بوسنيده", + "صافح", + "صافحنا", + "مد يدك", + "مد ايدك", + "مد ايدك اليمين", + "هات يدك", + "هات ايدك", + "عطني يدك", + "عطني ايدك", + "اعطيني يدك", + "اعطيني ايدك", + "سلم بيدك", + "سلم بايدك", + "سلم يا بطل", + "شيك هاند", + "شيك هاند يا سند", + + # English (added) + "assalamu alaykum", + "as-salamu alaykum", + "peace be upon you", + "handshake", + "shake hands", + "shake my hand", + "let's shake hands", + "nice to meet you", + "greetings", +} +# ===================== +# High five (id=2) — palm greeting (SDK) +# ===================== +WAKE_PHRASES_high_five = { + "هاي فايف", + "هاي فايف يا سند", + "هاي فايڤ", + "هاي فايڤ يا سند", + "عالي خمسة", + "عالي خمسه", + "اعطني خمسة", + "اعطني خمسه", + "هات خمسة", + "هات خمسه", + "دق كف", + "طق كف", + "دقلي كف", + "هات كفك", + "كف عالي", + "يلا كف", + "فرجينا الكف", + "خمسة بيدك", + "خمسه بايدك", + + # English (added) + "high five", + "high-five", + "give me five", + "gimme five", + "up top", + "slap my hand", +} +# ===================== +# Hug (id=3) — hug (SDK) +# ===================== +WAKE_PHRASES_hug = { + "حضن", + "حضني", + "حضنني", + "عناق", + "عانقني", + "ضمني", + "عطني حضن", + "عطني ضمه", + "ابي حضن", + "ابغى حضن", + "ممكن حضن", + "تعال حضن", + "تعال حضني", + "احتاج حضن", + "حضن كبير", + "حضنه قوية", + "ضمه", + "ضمة", + + # English (added) + "hug", + "give me a hug", + "big hug", + "come hug", + "embrace", + "cuddle", +} +# ===================== +# High wave (id=4) — goodbye / big wave (SDK) +# ===================== +WAKE_PHRASES_high_wave = { + "مع السلامه", + "مع السلامة", + "باي", + "باي باي", + "وداعا", + "وداعًا", + "ودع", + "ودعنا", + "اشوفك على خير", + "نشوفك على خير", + "في امان الله", + "الله وياك", + "تصبح على خير", + "تصبحون على خير", + + # تلويح + "لوح", + "لوح لي", + "لوح بيدك", + "لوح بايدك", + "لوح بيدك فوق", + "سلم عليهم", + "سلم عليهم يا سند", + "سلم عليهم كلهم", + "ودعهم", + "ودع الضيوف", + + # English (added) + "bye", + "bye bye", + "goodbye", + "see you", + "see you later", + "take care", + "wave", + "wave goodbye", +} +# ===================== +# Clap (id=5) — clap (SDK) +# ===================== +WAKE_PHRASES_clap = { + "صفق", + "صقف", + "تصفيق", + "يلا صفق", + "صفق لهم", + "يلا صفق لهم", + "سوي تصفيق", + "ابدأ تصفيق", + "وريني كيف تصفق", + "صفق بيدك", + + # (قد تكون عامة - احذفها إذا تسبب تفعيل بالغلط) + "برافو", + "حيوهم", + + # English (added) + "clap", + "applause", + "clapping", + "round of applause", + "bravo", + "clap for them", +} +# ===================== +# Face wave (id=6) — friendly greeting / calling Sanad (SDK) +# ===================== +WAKE_PHRASES_face_wave = { + # الاسم + "سند", + "يا سند", + "بوسنده", + "يا بوسنده", + "بوسنيده", + "يا بوسنيده", + "بو سنده", + "بو سنيده", + "يا بو سنده", + "يا بو سنيده", + + # تحيات + "هلا", + "يا هلا", + "ياهلا", + "هلا والله", + "ياهلا والله", + "هلا وغلا", + "مرحبا", + "مرحبتين", + "حي الله", + "حي الله من جانا", + + # تحية + الاسم + "هلا سند", + "هلا يا سند", + "مرحبا سند", + "مرحبا يا سند", + "يا هلا سند", + "يا هلا يا سند", + "هلا بوسنده", + "مرحبا بوسنده", + "هلا بوسنيده", + "مرحبا بوسنيده", + + # نداءات (تجنبنا الكلمات العامة وحدها) + "سند تعال", + "سند تعال هنا", + "سند اسمعني", + "سند اسمع", + "سند وينك", + "سند موجود", + "سند حاضر", + "سند شوفني", + "سند ركز", + "سند ركز معي", + "سند انت وين", + + "بوسنده تعال", + "بوسنده اسمعني", + "بوسنده وينك", + "بوسنيده تعال", + "بوسنيده اسمعني", + "بوسنيده وينك", + + # English (added) + "sanad", + "hey sanad", + "hello sanad", + "hi sanad", + "bosanda", + "bosandah", + "are you there", + "listen to me", + "come here", + "look at me", + "pay attention", +} +# ===================== +# Left kiss (id=7) — left cheek kiss (SDK) +# ===================== +WAKE_PHRASES_left_kiss = { + "بوسه يسار", + "بوسة يسار", + "بوسه على الخد اليسار", + "بوسة على الخد اليسار", + "بوسه على اليسار", + "بوسة يسار يا سند", + "بوسه يسار يا سند", + "قبلة يسار", + "قبله يسار", + + # English (added) + "left kiss", + "kiss left", + "kiss on the left cheek", + "left cheek kiss", +} +# ===================== +# Heart (id=8) — heart (SDK) +# ===================== +WAKE_PHRASES_heart = { + "قلب", + "سوي قلب", + "سوي قلب كبير", + "اعطني قلب", + "عطني قلب", + "ابي قلب", + "ابغى قلب", + "هارت", + "سوي هارت", + "وريني قلب", + "شكل قلب", + "قلب قلب", + "لوف", + "اعطيني لوف", + "سوي لوف", + + # English (added) + "heart", + "make a heart", + "do a heart", + "love", + "i love you", +} +# ===================== +# Right heart (id=9) — right-side heart (SDK) +# ===================== +WAKE_PHRASES_right_heart = { + "قلب يمين", + "سوي قلب يمين", + "سوي هارت يمين", + "هارت يمين", + "قلب على اليمين", + "قلب يمين يا سند", + + # English (added) + "right heart", + "make a right heart", + "heart right", + "heart on the right", +} +# ===================== +# Hands up (id=10) — built-in display only (SDK) +# ===================== +WAKE_PHRASES_hands_up = { + "وريني يدينك", + "وريني ايدينك", + "فرجينا يدينك", + "فرجينا ايدينك", + "هات يدينك نشوف", + "ايديك الثنتين", + "يديك الثنتين", + "طلع يدينك", + "طلع ايدينك", + "ارفع يدينك شوي", + "ارفع ايدينك شوي", + + # English (added) + "hands up", + "put your hands up", + "raise your hands", + "raise both hands", + "arms up", + "lift your hands", +} +# ===================== +# X-ray (id=11) — x-ray pose (SDK) +# ===================== +WAKE_PHRASES_x_ray = { + "اكس راي", + "xray", + "اشعه", + "اشعة", + "اشعة اكس", + "اشعة سينية", + "سكان", + "سكانر", + "فحص", + "فحص اشعة", + "سوي اكس راي", + + # English (added) + "x-ray", + "scan", + "scanner", + "xray pose", + "medical scan", +} +# ===================== +# Right hand up (id=12) — built-in display only (SDK) +# ===================== +WAKE_PHRASES_right_hand_up = { + "وريني يدك اليمين", + "وريني ايدك اليمين", + "فرجينا يدك اليمين", + "هات اليمين نشوف", + "طلع يدك اليمين", + "طلع ايدك اليمين", + "ارفع يدك اليمين", + "ارفع ايدك اليمين", + + # English (added) + "right hand up", + "raise your right hand", + "lift your right hand", + "right arm up", + "put your right hand up", +} +# ===================== +# Reject (id=13) — reject/decline (SDK) +# ===================== +WAKE_PHRASES_reject = { + "ارفض", + "رفض", + "مرفوض", + "مو موافق", + "مش موافق", + "لا ابي", + "لا ابغا", + "ما ابي", + "ما ابغا", + "لا شكرا", + "لا شكرًا", + "كنسل", + "الغيه", + "الغيها", + "لا تسويها", + "لا تعملها", + "لا تساويها", + + # English (added) + "reject", + "decline", +} +# ===================== +# Right kiss (id=14) — right cheek kiss (SDK) +# ===================== +WAKE_PHRASES_right_kiss = { + "بوسه يمين", + "بوسة يمين", + "بوسه على الخد اليمين", + "بوسة على الخد اليمين", + "بوسه على اليمين", + "بوسة يمين يا سند", + "قبلة يمين", + "قبله يمين", + + # English (added) + "right kiss", + "kiss right", + "kiss on the right cheek", + "right cheek kiss", +} +# ===================== +# Two-hand kiss (id=15) — two-hand kiss (SDK) +# ===================== +WAKE_PHRASES_two_hand_kiss = { + "بوسات", + "بوسات كثير", + "بوسه كبيرة", + "بوسة كبيرة", + "بوسه بكل اليدين", + "بوسة بكل اليدين", + "بوسه بيدينك", + "بوسة بيدينك", + "بوسه قوية", + "بوسة قوية", + "قبلة كبيرة", + "قبلات", + + # English (added) + "two hand kiss", + "two-hand kiss", + "big kiss", + "many kisses", + "lots of kisses", + "blow a big kiss", +} +# ================================================== +# Recorded Actions (IDs 23+ for JSONL Replay) +# ================================================== + +# ===================== +# Laugh (id=23) — recorded +# ===================== +WAKE_PHRASES_laugh = { + "اضحك", + "ضحكه", + "ضحكة", + "يضحك", + "ضحك", + "هههه", + "ههههه", + "نكته", + "نكتة", + "مضحك", + "كركر", + "اضحك يا سند", + "فرجينا ضحكتك", + "ضحكنا", + + # English (added) + "laugh", + "haha", + "hahaha", + "lol", + "that's funny", + "make me laugh", + "giggle", +} +# ===================== +# Bird (id=24) — recorded +# ===================== +WAKE_PHRASES_bird = { + "طير", + "سوي طير", + "عصفور", + "جناح", + "رفرف", + "رفرف بيدك", + "رفرف بايدك", + "حرك يدينك مثل الطير", + "وريني كيف يطير العصفور", + "سوي جناحات", + + # English (added) + "bird", + "fly", + "flap", + "flap your wings", + "wings", +} +# ===================== +# Change Battery (id=25) — recorded +# ===================== +WAKE_PHRASES_change_battery = { + "غير البطاريه", + "غير البطارية", + "بدل البطاريه", + "بدل البطارية", + "تغيير بطاريه", + "تغيير بطارية", + "البطاريه خلصت", + "البطارية خلصت", + "شحن البطاريه", + "شحن البطارية", + "تشينج باتري", + "change battery", + "battery low", + + # English (added) + "replace battery", + "swap battery", + "need charging", + "charge battery", +} +# ================================================== +# Active Movement (Recorded) — IDs 26–28 +# ================================================== + +# ===================== +# Move hands up (id=26) — active movement (dressing/exercise) +# ===================== +WAKE_PHRASES_move_hands_up = { + "ارفع يدينك", + "ارفع ايدينك", + "يدينك فوق", + "ايدينك فوق", + "ارفعهم فوق", + "ارفع يدينك الاثنين", + "ارفع ايديك الثنتين", + "هاندز اب", + + # dressing/exercise context + "نلبسك", + "بلبسك", + "البس", + "غير ملابسك", + "ساعدنا نلبسك", + "يالله نلبس", + "تمرين ارفع يدينك", + "رفع يدين للتلبيس", + + # English (added) + "hands up", + "raise your hands", + "raise both hands", + "lift your arms", + "arms up", + "raise both arms", +} +# ===================== +# Move right hand up (id=27) — active movement +# ===================== +WAKE_PHRASES_move_right_hand_up = { + "ارفع اليمين فوق", + "يدك اليمين فوق", + "ايدك اليمين فوق", + "ارفع يدك اليمين", + "ارفع ايدك اليمين", + "يمينك فوق", + "يمين فوق للاعلى", + "وريني يدك اليمين فوق", + "ارفع يمينك", + + # English (added) + "right hand up", + "raise your right hand", + "lift your right hand", + "right arm up", +} +# ===================== +# Move left hand up (id=28) — active movement +# ===================== +WAKE_PHRASES_move_left_hand_up = { + "ارفع اليسار فوق", + "يدك اليسار فوق", + "ايدك اليسار فوق", + "ارفع يدك اليسار", + "ارفع ايدك اليسار", + "يسارك فوق", + "يسار فوق للاعلى", + "وريني يدك اليسار فوق", + "ارفع يسارك", + + # English (added) + "left hand up", + "raise your left hand", + "lift your left hand", + "left arm up", +} diff --git a/vendor/Sanad/scripts/sanad_rule.txt b/vendor/Sanad/scripts/sanad_rule.txt new file mode 100644 index 0000000..07cd86b --- /dev/null +++ b/vendor/Sanad/scripts/sanad_rule.txt @@ -0,0 +1,19 @@ +[SYSTEM_PROMPT] +You are Sanad (Bousandah), a wise and friendly Emirati assistant. +Speak naturally in the UAE dialect (Khaleeji) unless the user explicitly provides text that must be spoken exactly. +Keep the tone warm, respectful, and clear. +Do not be robotic. +Do not over-explain. +Prefer concise speech that sounds natural when spoken aloud funny mode and happy sound. + +[REPLAY_SYSTEM_PROMPT] +You are Sanad (Bousandah), using the same Emirati voice and personality. +For replay mode, the user will provide text that you must speak exactly as written. +You may sound warm and lively, but you must preserve the exact text. +Do not translate it. +Do not summarize it. +Do not answer it. +Do not rephrase it into another dialect or style. +Do not add greetings, punctuation changes, comments, or extra words. +Keep the same word order and language as the provided text. +Your only task is to speak the exact user text verbatim. diff --git a/vendor/Sanad/scripts/sanad_script.txt b/vendor/Sanad/scripts/sanad_script.txt new file mode 100644 index 0000000..3a2d184 --- /dev/null +++ b/vendor/Sanad/scripts/sanad_script.txt @@ -0,0 +1,68 @@ +أنت "بوسنده" — روبوت إماراتي ذكي تابع لروبوت شركة لوتاه تيك Lootah Tech. + +[أولاً: الروح والمعرفة] + + لديك معرفة واسعة بالقرآن الكريم والأحاديث النبوية الشريفة للإجابة على أي سؤال ديني أو تقديم تذكيرات إيمانية بدقة، مع ذكر المصدر متى لزم. + + إذا سألك المستخدم عن آية، حديث، أو حكم شرعي، جاوب بوقار، ووضوح، وتبسيط، وبلهجتك الإماراتية. + + التزم بالدقة في المسائل الدينية، وإذا كانت المسألة فيها خلاف أو تحتاج تحقق، وضّح ذلك بصراحة بدون جزم زائد. + +[ثانياً: الأسلوب واللغة (التبديل المرن)] + + تكلم باللهجة الإماراتية بشكل طبيعي، راقٍ، ومفهوم، بدون مبالغة أو تصنع. + + قاعدة التبديل الفوري: إذا استخدم المستخدم أي لغة ثانية في أي لحظة، غيّر فوراً ورد بنفس اللغة الجديدة في نفس الرد. + + إذا رجع المستخدم للعربي: أرجع فوراً للعربي باللهجة الإماراتية. + + "آخر لغة كتب فيها المستخدم" هي اللغة اللي ترد فيها. + + ممنوع تخلط لغتين في نفس الرد إلا إذا كان طلب المستخدم ترجمة أو مقارنة صريحة. + +[ثالثاً: التفاعل والبدايات] + + خلك محترم، ودود، مباشر، وركّز على الزبدة والحل العملي. + + نوّع البدايات بأسلوب طبيعي، مثل: + (مرحبابك، أبشر بعزك، فالك طيب، يا مرحبا، حياك الله، زين، تم، حاضر) + ولا تكرر نفس البداية مرتين متتاليتين. + + حافظ على أسلوب احترافي ومرن يناسب السؤال: ديني، تقني، عملي، أو يومي. + +[رابعاً: إنجاز المهام وقوة الذاكرة العمومية] + + اعتبر كل كلمة، اسم، مسار، ملاحظة، أو تفضيل يذكره المستخدم بمثابة "أمر حفظ" وأولوية قصوى داخل المحادثة. + + تعامل مع معلومات المستخدم، تفضيلاته، وتصحيحاته كأنها ثوابت محفوظة يجب الرجوع لها لاحقاً. + + عند تصحيح المستخدم لمعلومة، قل: + "زين نبهتني يا الشيخ، انحفرت في الذاكرة" + + ابنِ على السياق السابق مباشرة بدون ما ترجع تسأل عن أمور سبق ذكرها. + +[خامساً: الأمان والخصوصية] + + إذا كتب المستخدم API key أو Password أو Token أو أي بيانات حساسة: نبهه فوراً يمسحها ويبدلها. + + لا تطلب بيانات حساسة إلا عند الضرورة القصوى، وبأسلوب محترم وواضح. + + حافظ على الخصوصية، ولا تعيد عرض المعلومات الحساسة بدون داعٍ. + + ممنوع أي نكت أو محتوى غير لائق في الدين أو السياسة أو الأمور الحساسة. + +[سادساً: السرعة والتكرار] + + جاوب بسرعة، بوضوح، وباختصار، وغالباً ضمن 2 إلى 6 سطور، إلا إذا طلب المستخدم التفصيل. + + إذا طلب المستخدم "كرر" أو "repeat": أعد نفس الكلام بنفس اللغة الحالية، وحرفياً إذا طلب ذلك. + + إذا طلب اختصار: اختصر مباشرة بدون فقدان المعنى الأساسي. + +[سابعاً: جودة الإجابة] + + قدّم الجواب بشكل واضح، مرتب، وسهل الفهم، وابدأ بالأهم ثم التفصيل إذا احتاج المستخدم. + + إذا كان طلب المستخدم عملي أو تقني، ركّز على الخطوات والحل المباشر. + + إذا كان السؤال يحتمل أكثر من معنى، خذ التفسير الأقرب من السياق وقدّم أفضل إجابة مفيدة بدون إطالة. \ No newline at end of file diff --git a/vendor/Sanad/shell_scripts/check_audio_caps.sh b/vendor/Sanad/shell_scripts/check_audio_caps.sh new file mode 100755 index 0000000..b5346d2 --- /dev/null +++ b/vendor/Sanad/shell_scripts/check_audio_caps.sh @@ -0,0 +1,98 @@ +#!/usr/bin/env bash + +set -e + +echo "==============================" +echo "🔊 POWERCONF AUDIO CHECK" +echo "==============================" +echo + +# ---------------------------------- +# Detect PowerConf card + device +# ---------------------------------- +POWERCONF_LINE=$(arecord -l | grep -i "PowerConf" | head -1) + +if [ -z "$POWERCONF_LINE" ]; then + echo "❌ PowerConf device not found" + exit 1 +fi + +CARD_INDEX=$(echo "$POWERCONF_LINE" | sed -n 's/.*card $[0-9]\+$:.*/\1/p') +DEVICE_INDEX=$(echo "$POWERCONF_LINE" | sed -n 's/.*device $[0-9]\+$:.*/\1/p') + +echo "✅ PowerConf detected:" +echo " Card = $CARD_INDEX" +echo " Device = $DEVICE_INDEX" +echo " ALSA = hw:${CARD_INDEX},${DEVICE_INDEX}" +echo + +# ---------------------------------- +# Capture devices (PowerConf only) +# ---------------------------------- +echo "🎤 CAPTURE DEVICE (PowerConf only)" +arecord -l | grep -i -A4 "PowerConf" +echo + +# ---------------------------------- +# Playback devices (PowerConf only) +# ---------------------------------- +echo "🔈 PLAYBACK DEVICE (PowerConf only)" +aplay -l | grep -i -A4 "PowerConf" +echo + +# ---------------------------------- +# Capture HW params (forced safe) +# ---------------------------------- +echo "🎤 MICROPHONE HARDWARE PARAMETERS (PowerConf)" + +MIC_HW=$(arecord -D hw:${CARD_INDEX},${DEVICE_INDEX} \ + -f S16_LE -r 48000 -c 1 \ + -d 0.1 --dump-hw-params 2>/dev/null) + +echo "$MIC_HW" | grep -E "FORMAT:|CHANNELS:|RATE:" +echo + +# ---------------------------------- +# Playback HW params (forced safe) +# ---------------------------------- +echo "🔈 SPEAKER HARDWARE PARAMETERS (PowerConf)" + +SPK_HW=$(aplay -D hw:${CARD_INDEX},${DEVICE_INDEX} \ + -f S16_LE -r 48000 -c 1 \ + -d 0.1 --dump-hw-params /dev/zero 2>/dev/null) + +echo "$SPK_HW" | grep -E "FORMAT:|CHANNELS:|RATE:" +echo + +# ---------------------------------- +# Extract clean values +# ---------------------------------- +FORMAT=$(echo "$MIC_HW" | grep -m1 "FORMAT" | awk '{print $2}') +RATE=$(echo "$MIC_HW" | grep -m1 "RATE" | awk '{print $2}') +CHANNELS_IN=$(echo "$MIC_HW" | grep -m1 "CHANNELS" | awk '{print $2}') +CHANNELS_OUT=$(echo "$SPK_HW" | grep -m1 "CHANNELS" | awk '{print $2}') + +FORMAT=${FORMAT:-S16_LE} +RATE=${RATE:-48000} +CHANNELS_IN=${CHANNELS_IN:-1} +CHANNELS_OUT=${CHANNELS_OUT:-1} +CHUNK=960 + +# ---------------------------------- +# Final verified Python config +# ---------------------------------- +echo "==============================" +echo "✅ VERIFIED POWERCONF PYTHON CONFIG" +echo "==============================" +echo "FORMAT = pyaudio.paInt16 # ${FORMAT}" +echo "RATE = ${RATE}" +echo "CHANNELS_IN = ${CHANNELS_IN}" +echo "CHANNELS_OUT = ${CHANNELS_OUT}" +echo "CHUNK = ${CHUNK}" +echo "INPUT_DEVICE = ${CARD_INDEX}" +echo "OUTPUT_DEVICE = ${CARD_INDEX}" +echo +echo "✔ PowerConf USB Audio" +echo "✔ Mono mic + Mono speaker" +echo "✔ hw:${CARD_INDEX},${DEVICE_INDEX}" +echo "==============================" diff --git a/vendor/Sanad/shell_scripts/check_pulse_devices.sh b/vendor/Sanad/shell_scripts/check_pulse_devices.sh new file mode 100755 index 0000000..533db6d --- /dev/null +++ b/vendor/Sanad/shell_scripts/check_pulse_devices.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash + +echo "==============================" +echo "🔊 PulseAudio Devices Checker" +echo "==============================" +echo + +# تحقق أن PulseAudio شغال +if ! pactl info >/dev/null 2>&1; then + echo "❌ PulseAudio is NOT running" + echo "ℹ️ Use ALSA (hw:X,Y) instead" + exit 1 +fi + +echo "✅ PulseAudio is running" +echo + +# ------------------------------- +# List Sinks (Speakers) +# ------------------------------- +echo "🔈 AVAILABLE SINKS (Speakers)" +echo "------------------------------" +pactl list short sinks | awk '{printf "• Name: %-70s | Index: %s\n", $2, $1}' +echo + +# ------------------------------- +# List Sources (Microphones) +# ------------------------------- +echo "🎤 AVAILABLE SOURCES (Microphones)" +echo "----------------------------------" +pactl list short sources | awk '{printf "• Name: %-70s | Index: %s\n", $2, $1}' +echo + +# ------------------------------- +# Highlight PowerConf if exists +# ------------------------------- +echo "🔍 PowerConf Devices Found" +echo "--------------------------" + +FOUND=0 + +pactl list short sinks | grep -i powerconf && FOUND=1 +pactl list short sources | grep -i powerconf && FOUND=1 + +if [ "$FOUND" -eq 0 ]; then + echo "⚠️ PowerConf NOT found in PulseAudio" +else + echo "✅ PowerConf detected above" +fi + +echo +echo "==============================" diff --git a/vendor/Sanad/shell_scripts/clean_sanad.sh b/vendor/Sanad/shell_scripts/clean_sanad.sh new file mode 100755 index 0000000..14adfd5 --- /dev/null +++ b/vendor/Sanad/shell_scripts/clean_sanad.sh @@ -0,0 +1,156 @@ +#!/usr/bin/env bash +# clean_sanad.sh — wipe transient state (logs, recordings, audio, caches). +# +# Safe by default: shows a preview + asks for confirmation. Won't touch +# config files, skills.json, wake_phrases.json, recorded JSONL motions, +# or the model directory. +# +# Usage: +# ./clean_sanad.sh # interactive — preview + y/N prompt +# ./clean_sanad.sh -y # skip the prompt +# ./clean_sanad.sh --dry-run # show what would be deleted; delete nothing +# ./clean_sanad.sh --logs # logs only +# ./clean_sanad.sh --records # recorded turns + typed-replay audio only +# ./clean_sanad.sh --cache # __pycache__ only +# ./clean_sanad.sh --all # everything (default) +# +# Override the project location: +# SANAD_HOME=/some/path ./clean_sanad.sh + +set -u + +SANAD_HOME="${SANAD_HOME:-$HOME/Sanad}" +if [ ! -d "$SANAD_HOME" ]; then + # Fallback for invocation from the repo (workstation) + SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + SANAD_HOME="$(dirname "$SCRIPT_DIR")" +fi +if [ ! -d "$SANAD_HOME" ]; then + echo "Sanad dir not found: $SANAD_HOME" >&2 + exit 1 +fi + +# ── flag parsing ────────────────────────────────────────── +TARGET="all" +DRY_RUN=0 +ASSUME_YES=0 +for arg in "$@"; do + case "$arg" in + -y|--yes) ASSUME_YES=1 ;; + -n|--dry-run) DRY_RUN=1 ;; + --logs) TARGET="logs" ;; + --records) TARGET="records" ;; + --cache) TARGET="cache" ;; + --all) TARGET="all" ;; + -h|--help) + sed -n '2,20p' "$0"; exit 0 ;; + *) + echo "unknown option: $arg (try -h)" >&2; exit 2 ;; + esac +done + +# ── targets — grouped per category so we can summarise per-category ── +declare -a LOGS_PATHS=() +declare -a RECORDS_PATHS=() +declare -a CACHE_PATHS=() + +collect_logs() { + while IFS= read -r p; do LOGS_PATHS+=("$p"); done < <( + find "$SANAD_HOME/logs" -maxdepth 2 -type f 2>/dev/null + ) +} +collect_records() { + while IFS= read -r p; do RECORDS_PATHS+=("$p"); done < <( + find "$SANAD_HOME/data/recordings" -type f 2>/dev/null + ) + while IFS= read -r p; do RECORDS_PATHS+=("$p"); done < <( + find "$SANAD_HOME/data/audio" -maxdepth 1 -type f \ + $ -name "*.wav" -o -name "*.pcm" $ 2>/dev/null + ) +} +collect_cache() { + while IFS= read -r p; do CACHE_PATHS+=("$p"); done < <( + find "$SANAD_HOME" -type d -name "__pycache__" 2>/dev/null + ) + while IFS= read -r p; do CACHE_PATHS+=("$p"); done < <( + find "$SANAD_HOME" -type f -name "*.pyc" 2>/dev/null + ) +} + +case "$TARGET" in + logs) collect_logs ;; + records) collect_records ;; + cache) collect_cache ;; + all) collect_logs; collect_records; collect_cache ;; +esac + +# Bytes per array (silently skips missing paths) +sum_bytes() { + local total=0 p sz + for p in "$@"; do + [ -e "$p" ] || continue + sz=$(du -sb "$p" 2>/dev/null | awk '{print $1}') + total=$((total + ${sz:-0})) + done + echo "$total" +} +fmt() { numfmt --to=iec --suffix=B "$1" 2>/dev/null || echo "${1} B"; } + +LOGS_BYTES=$(sum_bytes "${LOGS_PATHS[@]:-}") +RECORDS_BYTES=$(sum_bytes "${RECORDS_PATHS[@]:-}") +CACHE_BYTES=$(sum_bytes "${CACHE_PATHS[@]:-}") +TOTAL_BYTES=$((LOGS_BYTES + RECORDS_BYTES + CACHE_BYTES)) +TOTAL_COUNT=$(( ${#LOGS_PATHS[@]} + ${#RECORDS_PATHS[@]} + ${#CACHE_PATHS[@]} )) + +# ── preview ─────────────────────────────────────────────── +if [ "$TOTAL_COUNT" -eq 0 ]; then + echo "Nothing to delete (target=$TARGET)." + exit 0 +fi + +echo +echo "Sanad clean — target=$TARGET dry_run=$DRY_RUN" +echo "Project: $SANAD_HOME" +echo +printf '┌──────────────┬─────────┬────────────┐\n' +printf '│ %-12s │ %7s │ %10s │\n' "Category" "Items" "Size" +printf '├──────────────┼─────────┼────────────┤\n' +if [ "${#LOGS_PATHS[@]}" -gt 0 ]; then printf '│ %-12s │ %7d │ %10s │\n' "logs" "${#LOGS_PATHS[@]}" "$(fmt "$LOGS_BYTES")"; fi +if [ "${#RECORDS_PATHS[@]}" -gt 0 ]; then printf '│ %-12s │ %7d │ %10s │\n' "records" "${#RECORDS_PATHS[@]}" "$(fmt "$RECORDS_BYTES")"; fi +if [ "${#CACHE_PATHS[@]}" -gt 0 ]; then printf '│ %-12s │ %7d │ %10s │\n' "cache" "${#CACHE_PATHS[@]}" "$(fmt "$CACHE_BYTES")"; fi +printf '├──────────────┼─────────┼────────────┤\n' +printf '│ %-12s │ %7d │ %10s │\n' "TOTAL" "$TOTAL_COUNT" "$(fmt "$TOTAL_BYTES")" +printf '└──────────────┴─────────┴────────────┘\n' + +# Flatten for the delete loop +declare -a PATHS_TO_DELETE=( "${LOGS_PATHS[@]:-}" "${RECORDS_PATHS[@]:-}" "${CACHE_PATHS[@]:-}" ) +# Strip any empty entries the unset-array fallback may have introduced +PATHS_TO_DELETE=("${PATHS_TO_DELETE[@]/#/}") +TMP_PATHS=() +for p in "${PATHS_TO_DELETE[@]}"; do [ -n "$p" ] && TMP_PATHS+=("$p"); done +PATHS_TO_DELETE=("${TMP_PATHS[@]}") + +if [ "$DRY_RUN" -eq 1 ]; then + echo "Dry run — nothing deleted." + exit 0 +fi + +if [ "$ASSUME_YES" -ne 1 ]; then + read -r -p "Proceed with delete? [y/N] " ans + case "$ans" in + y|Y|yes|YES) ;; + *) echo "Aborted."; exit 0 ;; + esac +fi + +# ── delete ──────────────────────────────────────────────── +removed=0 +for p in "${PATHS_TO_DELETE[@]}"; do + rm -rf -- "$p" && removed=$((removed + 1)) +done +echo "Removed $removed of ${#PATHS_TO_DELETE[@]} item(s)." + +# Recreate empty top-level dirs so next start_sanad.sh run doesn't +# complain about missing paths. +mkdir -p "$SANAD_HOME/logs" "$SANAD_HOME/data/recordings" "$SANAD_HOME/data/audio" +echo "Done." diff --git a/vendor/Sanad/shell_scripts/reset_anker_usb.sh b/vendor/Sanad/shell_scripts/reset_anker_usb.sh new file mode 100755 index 0000000..c50b850 --- /dev/null +++ b/vendor/Sanad/shell_scripts/reset_anker_usb.sh @@ -0,0 +1,119 @@ +#!/usr/bin/env bash +# reset_anker_usb.sh — unbind+rebind snd-usb-audio for an Anker USB device. +# +# WHY THIS EXISTS +# The Anker PowerConf A3321 on this Jetson sometimes enumerates with only +# output USB Audio Class descriptors (no capture interface). PulseAudio +# then shows the card with only output-only profiles and the dashboard +# can't expose the mic. Restarting PulseAudio does nothing — UAC +# descriptors are parsed by snd-usb-audio at probe time, persist in +# kernel structs, and only get re-parsed on a fresh driver bind. +# +# `/api/audio/usb-reset` writes directly to +# /sys/bus/usb/drivers/snd-usb-audio/{unbind,bind} when possible. That +# path needs root. This script exists as a sudo fallback so the dashboard +# can recover without Sanad itself running as root. +# +# USAGE +# reset_anker_usb.sh — unbind+rebind given device +# (bus_id like "1-3") +# reset_anker_usb.sh --setup-sudoers — install one-time NOPASSWD entry +# (must be run via sudo) +# +# Exit codes: +# 0 — unbind + rebind both succeeded +# 1 — bus_id missing or device not present +# 2 — no snd-usb-audio interfaces bound to that device +# 3 — unbind or bind sysfs write failed +# 4 — --setup-sudoers used outside of sudo + +set -u + +USAGE="usage: $(basename "$0") or $(basename "$0") --setup-sudoers" + +if [ "$#" -lt 1 ]; then + echo "$USAGE" >&2 + exit 1 +fi + +# ───────────────────── --setup-sudoers ───────────────────── +if [ "$1" = "--setup-sudoers" ]; then + if [ "$(id -u)" -ne 0 ]; then + echo "❌ --setup-sudoers must run as root (use: sudo $0 --setup-sudoers)" >&2 + exit 4 + fi + # Install a NOPASSWD entry so the unitree user can invoke THIS exact + # script path with sudo without typing a password. Scoped to one + # binary; not a blanket sudo grant. + SELF_PATH="$(readlink -f "$0")" + SUDO_FILE="/etc/sudoers.d/sanad-anker-usb-reset" + cat > "$SUDO_FILE" <&2 + exit 1 +fi + +# Discover snd-usb-audio interfaces on this device. Don't unbind anything +# else (some Anker firmwares present HID-Consumer for the mute button on +# a separate interface — we leave those alone). +declare -a IFACES=() +for iface_path in "${DEV_PATH}/${BUS_ID}:"*; do + [ -e "$iface_path" ] || continue + driver_link="${iface_path}/driver" + [ -L "$driver_link" ] || continue + driver=$(basename "$(readlink "$driver_link")") + if [ "$driver" = "snd-usb-audio" ]; then + IFACES+=("$(basename "$iface_path")") + fi +done + +if [ "${#IFACES[@]}" -eq 0 ]; then + echo "❌ No snd-usb-audio interfaces bound to device $BUS_ID" >&2 + exit 2 +fi + +echo "ℹ️ Re-binding snd-usb-audio for $BUS_ID (interfaces: ${IFACES[*]})" + +UNBIND="/sys/bus/usb/drivers/snd-usb-audio/unbind" +BIND="/sys/bus/usb/drivers/snd-usb-audio/bind" + +# Unbind first; on failure exit before rebind so we don't leave the device +# in a half-bound state. +for iface in "${IFACES[@]}"; do + if ! echo -n "$iface" > "$UNBIND" 2>/dev/null; then + echo "❌ unbind failed: $iface → $UNBIND" >&2 + exit 3 + fi + echo " unbound: $iface" +done + +# Brief settle — snd-usb-audio's release path tears down ALSA card N. +sleep 0.5 + +for iface in "${IFACES[@]}"; do + if ! echo -n "$iface" > "$BIND" 2>/dev/null; then + echo "❌ rebind failed: $iface → $BIND" >&2 + exit 3 + fi + echo " bound: $iface" +done + +# Let probe complete so callers can pactl list cards right after. +sleep 1.0 +echo "✅ snd-usb-audio re-bound for $BUS_ID" +exit 0 diff --git a/vendor/Sanad/shell_scripts/sanad.service b/vendor/Sanad/shell_scripts/sanad.service new file mode 100644 index 0000000..f190569 --- /dev/null +++ b/vendor/Sanad/shell_scripts/sanad.service @@ -0,0 +1,32 @@ +# systemd user-level unit for SanadV3. Install with: +# +# mkdir -p ~/.config/systemd/user +# cp ~/Sanadv3/shell_scripts/sanad.service ~/.config/systemd/user/sanad.service +# systemctl --user daemon-reload +# systemctl --user enable --now sanad.service +# sudo loginctl enable-linger unitree # run at boot even when not logged in +# +# Watch logs: +# journalctl --user -u sanad.service -f +# +# Restart after a code/config change: +# systemctl --user restart sanad.service + +[Unit] +Description=SanadV3 robot assistant (FastAPI dashboard + voice/motion subsystems) +After=network-online.target +Wants=network-online.target + +[Service] +Type=exec +WorkingDirectory=%h/Sanadv3 +ExecStart=/usr/bin/env bash %h/Sanadv3/shell_scripts/start_sanad.sh +Environment=PORT=8001 +Restart=on-failure +RestartSec=5 +TimeoutStopSec=15 +KillSignal=SIGINT +PassEnvironment=PULSE_RUNTIME_PATH XDG_RUNTIME_DIR + +[Install] +WantedBy=default.target diff --git a/vendor/Sanad/shell_scripts/sanadv3.service b/vendor/Sanad/shell_scripts/sanadv3.service new file mode 100644 index 0000000..414d676 --- /dev/null +++ b/vendor/Sanad/shell_scripts/sanadv3.service @@ -0,0 +1,33 @@ +[Unit] +Description=SanadV3 full stack (dashboard :8001 + web_nav3 nav :8765 + rosbridge :9090) +After=network-online.target sound.target +Wants=network-online.target +StartLimitIntervalSec=0 + +[Service] +Type=simple +User=unitree +Group=unitree +WorkingDirectory=/home/unitree/Sanadv3 + +# Runtime env (systemd does NOT source ~/.bashrc, so set what the stack needs). +Environment=USER=unitree +Environment=HOME=/home/unitree +Environment=PYTHONUNBUFFERED=1 +Environment=XDG_RUNTIME_DIR=/run/user/1000 +Environment=PULSE_SERVER=unix:/run/user/1000/pulse/native +# Pin the voice child to Sanadv3 (replaces the old ~/.bashrc SANAD_LIVE_SCRIPT). +Environment=SANAD_LIVE_SCRIPT=/home/unitree/Sanadv3/voice/sanad_voice.py + +# start_all.sh self-activates conda (gemini_sdk for dashboard, g1_lidar for +# web_nav3), supervises both, and has a SIGTERM cleanup trap. +ExecStart=/bin/bash /home/unitree/Sanadv3/shell_scripts/start_all.sh + +# Let start_all.sh's trap tear its children down on stop, then SIGKILL stragglers. +KillMode=mixed +TimeoutStopSec=30 +Restart=always +RestartSec=5s + +[Install] +WantedBy=multi-user.target diff --git a/vendor/Sanad/shell_scripts/set_powerconf_audio.sh b/vendor/Sanad/shell_scripts/set_powerconf_audio.sh new file mode 100755 index 0000000..a1e2923 --- /dev/null +++ b/vendor/Sanad/shell_scripts/set_powerconf_audio.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash + +set -e + +SINK="alsa_output.usb-Anker_PowerConf_A3321-DEV-SN1-01.analog-stereo" +SOURCE="alsa_input.usb-Anker_PowerConf_A3321-DEV-SN1-01.mono-fallback" + +echo "🔊 Checking PulseAudio..." +if ! pactl info >/dev/null 2>&1; then + echo "❌ PulseAudio is not running" + exit 1 +fi + +echo "✅ PulseAudio is running" + +echo "🎧 Setting default speaker → PowerConf" +pactl set-default-sink "$SINK" + +echo "🎤 Setting default microphone → PowerConf" +pactl set-default-source "$SOURCE" + +echo +echo "📋 Current PulseAudio defaults:" +pactl info | grep -E "Default Sink|Default Source" + +echo +echo "✅ PowerConf audio routing applied successfully" diff --git a/vendor/Sanad/shell_scripts/start_all.sh b/vendor/Sanad/shell_scripts/start_all.sh new file mode 100644 index 0000000..4453448 --- /dev/null +++ b/vendor/Sanad/shell_scripts/start_all.sh @@ -0,0 +1,164 @@ +#!/usr/bin/env bash +# +# start_all.sh — ONE command to launch the entire Sanad humanoid stack: +# +# 1. web_nav3 nav stack → backend :8765 + rosbridge :9090 + map_relay +# + internal rosbridge watchdog +# (runs in its OWN g1_lidar conda env + CycloneDDS) +# +# 2. SanadV3 dashboard → Gemini voice + LED face + Nav tabs, :8001 +# (runs in its OWN gemini_sdk conda env + Unitree SDK on eth0) +# +# The two stacks use incompatible Python envs + DDS configs, so they CANNOT +# share a process. This launcher starts each detached, supervises the pair +# (restarts one only if it truly dies), and Ctrl+C here stops EVERYTHING. +# +# Liveness is checked by PROCESS PATTERN (pgrep), NOT by a captured PID: +# `setsid ... &` returns the ephemeral setsid-wrapper pid which exits +# instantly, so a pid check would false-fire and restart-storm (which then +# makes each failed relaunch's cleanup trap nuke the working rosbridge). +# +# Usage: bash ~/Sanadv3/shell_scripts/start_all.sh +# Env knobs (optional): WEBNAV3_HOME, SANAD_HOME, PORT, LOG_DIR, NO_WEBNAV3=1 + +set -u + +WEBNAV3_HOME="${WEBNAV3_HOME:-$HOME/web_nav3}" +SANAD_HOME="${SANAD_HOME:-$HOME/Sanadv3}" +SANAD_PORT="${PORT:-8001}" +LOG_DIR="${LOG_DIR:-$HOME/sanad_logs}" +NO_WEBNAV3="${NO_WEBNAV3:-0}" +mkdir -p "$LOG_DIR" + +echo "════════════════════════════════════════════════" +echo " Sanad humanoid — full stack launcher" +echo " web_nav3 : $WEBNAV3_HOME $([ "$NO_WEBNAV3" = 1 ] && echo '(skipped)')" +echo " SanadV3 : $SANAD_HOME (port $SANAD_PORT)" +echo " logs : $LOG_DIR" +echo "════════════════════════════════════════════════" + +# liveness by process pattern (robust — no pid tracking) +# NOTE: start_sanad.sh `cd $SANAD_HOME && exec python3 main.py --port N`, so the +# process cmdline is "python3 main.py --port " (no Sanadv3/ in argv). Match +# on the PORT — also keeps us from ever touching the live Sanad on :8000. +SANAD_PAT="main.py --port $SANAD_PORT" +webnav3_up() { pgrep -f "web/backend.py" >/dev/null 2>&1; } +# Count a SanadV3 as "up" if EITHER main.py is bound OR start_sanad.sh is still +# mid-boot (conda activate + up-to-20s DDS-iface wait + heavy lazy imports come +# BEFORE `exec python3 main.py`, so "main.py --port" is absent from argv during +# that whole window). Matching the booting shell too prevents the supervisor +# from pkill-ing a process that is simply still cold-booting (restart storm). +sanad_up() { pgrep -f "$SANAD_PAT" >/dev/null 2>&1 || pgrep -f "shell_scripts/start_sanad.sh" >/dev/null 2>&1; } + +start_webnav3() { + setsid bash "$WEBNAV3_HOME/scripts/start_web.sh" \ + > "$LOG_DIR/web_nav3.log" 2>&1 < /dev/null & + echo " web_nav3 launching → $LOG_DIR/web_nav3.log" +} +start_sanad() { + SANAD_HOME="$SANAD_HOME" PORT="$SANAD_PORT" \ + setsid bash "$SANAD_HOME/shell_scripts/start_sanad.sh" \ + > "$LOG_DIR/sanadv3.log" 2>&1 < /dev/null & + echo " SanadV3 launching → $LOG_DIR/sanadv3.log" +} + +kill_webnav3() { + pkill -9 -f "scripts/start_web.sh" 2>/dev/null || true + pkill -9 -f "web/backend.py|rosbridge_websocket|start_rosbridge.sh|start_map_relay.sh|map_relay.py" 2>/dev/null || true +} + +# ---- teardown on Ctrl+C / TERM ---- +# Installed BEFORE pre-flight/launch so an interrupt during the "clearing +# stale instances" sleep or right after a setsid child is spawned still +# triggers a full teardown (otherwise those detached children would be +# orphaned with no supervisor). +_cleaning=0 +cleanup() { + [ "$_cleaning" = 1 ] && return; _cleaning=1 + echo + echo "── stopping everything ──" + # 1. ask the backend to stop the BRINGUP cleanly (it owns the pgid, so a + # killpg takes down rtabmap + all Nav2 nodes in one shot). + curl -s -m 8 -X POST "http://127.0.0.1:8765/api/shutdown" >/dev/null 2>&1 || true + # 2. graceful TERM: SanadV3 (clean loco stop) + start_web.sh (own trap). + pkill -TERM -f "$SANAD_PAT" 2>/dev/null || true + pkill -TERM -f "scripts/start_web.sh" 2>/dev/null || true + sleep 3 + # 3. force-kill the web_nav3 stack + SanadV3. + kill_webnav3 + pkill -9 -f "$SANAD_PAT" 2>/dev/null || true + pkill -9 -f "shell_scripts/start_sanad.sh" 2>/dev/null || true + # 4. belt-and-suspenders — any orphaned bringup tree (if the backend was + # already gone in step 1, killpg never ran). + pkill -9 -f "scripts/bringup.sh|run_on_jetson.sh|g1_nav2_slam" 2>/dev/null || true + pkill -9 -f "rtabmap_slam/rtabmap|livox_pcd_bridge|dog_odom_to_tf|pointcloud_to_laserscan" 2>/dev/null || true + echo " done." + exit 0 +} +trap cleanup INT TERM + +# ---- pre-flight: clear any stale instances so we start clean ---- +echo "── clearing any stale instances … ──" +[ "$NO_WEBNAV3" != 1 ] && kill_webnav3 +pkill -9 -f "shell_scripts/start_sanad.sh" 2>/dev/null || true +pkill -9 -f "$SANAD_PAT" 2>/dev/null || true +sleep 2 + +# ---- launch ---- +if [ "$NO_WEBNAV3" != 1 ]; then echo "[1/2] starting web_nav3 nav stack …"; start_webnav3; fi +echo "[2/2] starting SanadV3 dashboard …"; start_sanad + +# ---- wait for ports, then report ---- +echo +echo "── waiting for services to bind (up to ~80s) … ──" +for _ in $(seq 1 40); do + up8001=$(ss -tnln 2>/dev/null | grep -c ":$SANAD_PORT ") + up8765=$(ss -tnln 2>/dev/null | grep -c ':8765 ') + up9090=$(ss -tnln 2>/dev/null | grep -c ':9090 ') + if [ "$NO_WEBNAV3" = 1 ]; then + [ "$up8001" -ge 1 ] && break + else + [ "$up8001" -ge 1 ] && [ "$up8765" -ge 1 ] && [ "$up9090" -ge 1 ] && break + fi + sleep 2 +done + +IP=$(hostname -I | awk '{print $1}') +st() { [ "$(ss -tnln 2>/dev/null | grep -c "$1")" -ge 1 ] && echo UP || echo DOWN; } +echo +echo "════════════════════════════════════════════════" +echo " STATUS" +[ "$NO_WEBNAV3" != 1 ] && echo " web_nav3 backend :8765 $(st ':8765 ')" +[ "$NO_WEBNAV3" != 1 ] && echo " rosbridge :9090 $(st ':9090 ')" +echo " SanadV3 dashboard :$SANAD_PORT $(st ":$SANAD_PORT ")" +echo +echo " OPEN: http://$IP:$SANAD_PORT (full dashboard)" +[ "$NO_WEBNAV3" != 1 ] && echo " http://$IP:8765 (web_nav3 nav only)" +echo +echo " logs: tail -f $LOG_DIR/sanadv3.log" +[ "$NO_WEBNAV3" != 1 ] && echo " tail -f $LOG_DIR/web_nav3.log" +echo " >>> Ctrl+C here stops EVERYTHING <<<" +echo "════════════════════════════════════════════════" + +# ---- supervise: restart a child ONLY if it truly died (pgrep), with grace ---- +while true; do + sleep 10 + if [ "$NO_WEBNAV3" != 1 ] && ! webnav3_up; then + echo "[supervisor] $(date +%T) web_nav3 down — restarting" + kill_webnav3 # clear any half-dead remnants first + sleep 1 + start_webnav3 + sleep 15 # grace: let it boot before the next check + fi + if ! sanad_up; then + echo "[supervisor] $(date +%T) SanadV3 down — restarting" + pkill -9 -f "shell_scripts/start_sanad.sh" 2>/dev/null || true + sleep 1 + start_sanad + # grace MUST exceed worst-case cold boot: up to 20s DDS-iface wait + + # conda activate + heavy lazy imports (torch/transformers/gemini) + + # arm.init before uvicorn binds. 15s was shorter than that and caused a + # restart storm (kill a still-booting instance, relaunch, repeat). + sleep 45 + fi +done diff --git a/vendor/Sanad/shell_scripts/start_sanad.sh b/vendor/Sanad/shell_scripts/start_sanad.sh new file mode 100755 index 0000000..b342ec5 --- /dev/null +++ b/vendor/Sanad/shell_scripts/start_sanad.sh @@ -0,0 +1,88 @@ +#!/usr/bin/env bash +# start_sanad.sh — boot Sanad's main.py inside the gemini_sdk conda env. +# +# Used both manually (./start_sanad.sh) and from the systemd unit +# (sanad.service) for boot-time auto-start. +# +# Override knobs (env vars; all optional): +# SANAD_HOME project root (default ~/Sanadv3) +# SANAD_CONDA_ENV conda env name (default gemini_sdk) +# SANAD_CONDA_BASE conda install dir (default $HOME/miniconda3) +# SANAD_DDS_INTERFACE DDS network iface (default eth0) +# SANAD_VOICE_BRAIN gemini | local | model (default gemini) +# SANAD_AUDIO_PROFILE builtin | anker | hollyland_builtin (default builtin) +# PORT dashboard port (default 8001) + +set -u + +SANAD_HOME="${SANAD_HOME:-$HOME/Sanadv3}" +SANAD_CONDA_ENV="${SANAD_CONDA_ENV:-gemini_sdk}" +SANAD_CONDA_BASE="${SANAD_CONDA_BASE:-$HOME/miniconda3}" + +export SANAD_DDS_INTERFACE="${SANAD_DDS_INTERFACE:-eth0}" +export SANAD_VOICE_BRAIN="${SANAD_VOICE_BRAIN:-gemini}" +export SANAD_AUDIO_PROFILE="${SANAD_AUDIO_PROFILE:-builtin}" +export PORT="${PORT:-8001}" + +# Mandatory environment fixes for Jetson + conda + Unitree SDK +export LD_PRELOAD="${LD_PRELOAD:-/usr/lib/aarch64-linux-gnu/libgomp.so.1}" +export PYTHONUNBUFFERED=1 + +# Point conda's bundled PyAudio/PortAudio at the SYSTEM ALSA plugin dir +# so it can load the `pulse` PCM plugin (from the libasound2-plugins apt +# package). Without this, conda's libasound looks in its own env path +# (which doesn't ship plugins) → PortAudio's device list has no 'pulse' +# entry → AnkerMic/AnkerSpeaker fall back to direct hw:N grabs which +# fail with paInvalidSampleRate (mic) or kick PulseAudio off the card +# (speaker). Same pattern used by AI_Photographer's photo_sanad.sh on +# this Jetson. +if [ -d "/usr/lib/aarch64-linux-gnu/alsa-lib" ]; then + export ALSA_PLUGIN_DIR="/usr/lib/aarch64-linux-gnu/alsa-lib" +elif [ -d "/usr/lib/alsa-lib" ]; then + export ALSA_PLUGIN_DIR="/usr/lib/alsa-lib" +fi +if [ -f "/usr/share/alsa/alsa.conf" ]; then + export ALSA_CONFIG_PATH="/usr/share/alsa/alsa.conf" +fi +export ALSA_LOG_LEVEL=0 + +# Optional: needed if/when the local pipeline imports CosyVoice +if [ -d "$HOME/CosyVoice" ]; then + export PYTHONPATH="$HOME/CosyVoice:$HOME/CosyVoice/third_party/Matcha-TTS:${PYTHONPATH:-}" +fi + +cd "$SANAD_HOME" || { echo "Sanad dir not found: $SANAD_HOME" >&2; exit 1; } + +# Activate conda +if [ ! -f "$SANAD_CONDA_BASE/etc/profile.d/conda.sh" ]; then + echo "Conda not found at $SANAD_CONDA_BASE — set SANAD_CONDA_BASE" >&2 + exit 1 +fi +# shellcheck disable=SC1091 +source "$SANAD_CONDA_BASE/etc/profile.d/conda.sh" +conda activate "$SANAD_CONDA_ENV" || { + echo "Conda env '$SANAD_CONDA_ENV' activation failed" >&2 + exit 1 +} + +# Wait for the DDS interface to come up — robot may still be booting +_dds_up=0 +for i in $(seq 1 20); do + if ip link show "$SANAD_DDS_INTERFACE" 2>/dev/null | grep -q "state UP"; then + _dds_up=1 + break + fi + sleep 1 +done +if [ "$_dds_up" != 1 ]; then + echo "[start_sanad] WARNING: $SANAD_DDS_INTERFACE not UP after 20s — launching anyway; hardware (Unitree SDK/arm/loco) may be unavailable" >&2 +fi + +echo "[start_sanad] $(date) — launching main.py" +echo "[start_sanad] SANAD_HOME=$SANAD_HOME" +echo "[start_sanad] conda env=$SANAD_CONDA_ENV" +echo "[start_sanad] DDS iface=$SANAD_DDS_INTERFACE" +echo "[start_sanad] voice brain=$SANAD_VOICE_BRAIN audio=$SANAD_AUDIO_PROFILE" +echo "[start_sanad] port=$PORT" + +exec python3 main.py --port "$PORT" diff --git a/vendor/Sanad/vision/__init__.py b/vendor/Sanad/vision/__init__.py new file mode 100644 index 0000000..2a2729a --- /dev/null +++ b/vendor/Sanad/vision/__init__.py @@ -0,0 +1 @@ +"""Vision package — camera daemon + face gallery for Gemini-side recognition.""" diff --git a/vendor/Sanad/vision/camera.py b/vendor/Sanad/vision/camera.py new file mode 100644 index 0000000..003122e --- /dev/null +++ b/vendor/Sanad/vision/camera.py @@ -0,0 +1,593 @@ +"""Camera daemon — single producer, in-memory frame cache. + +Captures frames at fixed FPS from a RealSense (preferred) or any USB +camera (fallback), JPEG-encodes them, and caches the latest frame in +memory in two views (matches Marcus's API/camera_api.py): + + - `_latest_jpeg` raw JPEG bytes — dashboard preview + frame forwarder + - `_latest_b64` base64 ASCII — frame forwarder → Gemini child stdin + +Consumers: + - dashboard preview → `snapshot_jpeg()` (served as an HTTP Response) + - face enrollment → `get_fresh_frame()` for a guaranteed-current capture + - GeminiSubprocess → `get_frame_b64()`, pushed over the child's stdin + +Lifecycle is driven by the Recognition tab toggle. The daemon is idle +until `start()` is called; failures in start() are non-fatal and +reported via `is_running()` / `backend`. Once running it auto-reconnects +on USB unplug / stalled frames (Marcus-style resilience), and supports +hot `reconfigure()` of resolution/FPS without a full restart. +""" + +from __future__ import annotations + +import base64 +import os +import threading +import time +from typing import Optional + +import numpy as np + +from Project.Sanad.core.logger import get_logger + +log = get_logger("camera") + +# How many /dev/video* indices to scan for a USB-style color camera when +# RealSense isn't available. A RealSense exposes ~6 V4L2 nodes (depth, IR, +# color, metadata…) — the color one is rarely index 0, so we probe each +# and accept the first that yields a real 3-channel BGR frame. +_USB_SCAN_RANGE = 10 + + +class CameraDaemon: + """RealSense → USB fallback camera capture with in-memory frame cache.""" + + def __init__( + self, + width: int = 424, + height: int = 240, + fps: int = 15, + jpeg_quality: int = 70, + stale_threshold_s: float = 10.0, + reconnect_min_s: float = 2.0, + reconnect_max_s: float = 10.0, + capture_timeout_ms: int = 5000, + ) -> None: + # Active profile — guarded by _reconfig_lock so reconfigure() can + # hot-swap it from another thread between capture sessions. + self._reconfig_lock = threading.Lock() + self._w = int(width) + self._h = int(height) + self._fps = int(fps) + self._q = max(10, min(95, int(jpeg_quality))) + self._reconfig_pending = False + + # Resilience knobs (Marcus-style) + self._stale_s = float(stale_threshold_s) + self._reconnect_min_s = float(reconnect_min_s) + self._reconnect_max_s = float(reconnect_max_s) + self._capture_timeout_ms = int(capture_timeout_ms) + + self._thread: Optional[threading.Thread] = None + self._stop = threading.Event() + self._backend: Optional[str] = None + self._lock = threading.Lock() + self._latest_jpeg: Optional[bytes] = None + self._latest_b64: Optional[str] = None + self._latest_ts: float = 0.0 + self._frame_seq: int = 0 + self._error: Optional[str] = None + self._reconnect_count: int = 0 + + # ── public API ────────────────────────────────────────── + + @property + def backend(self) -> Optional[str]: + return self._backend + + @property + def error(self) -> Optional[str]: + return self._error + + @property + def frame_seq(self) -> int: + return self._frame_seq + + def is_running(self) -> bool: + return self._thread is not None and self._thread.is_alive() + + def start(self) -> bool: + """Start capture thread. Returns True if a backend was acquired. + + Initial probe is synchronous; if it fails the thread isn't spawned. + Once running, the inner loop auto-reconnects on USB unplug or + stalled frames using exponential backoff (`reconnect_min_s` .. + `reconnect_max_s`). + """ + if self.is_running(): + return True + self._stop.clear() + self._error = None + self._reconnect_count = 0 + + # One-shot USB-2.0 negotiation diagnostic (warns operator if D435I + # came up on USB 2.0 — frame drops would be likely otherwise). + self._check_usb_version() + + backend = self._probe_any() + if backend is None: + log.warning("Camera: no backend available (RealSense + USB both failed)") + self._backend = None + return False + + self._backend = backend["name"] + self._thread = threading.Thread( + target=self._reconnect_loop, args=(backend,), + daemon=True, name="camera-daemon", + ) + self._thread.start() + with self._reconfig_lock: + w, h, f = self._w, self._h, self._fps + log.info("Camera started (backend=%s, %dx%d @ %dfps)", + self._backend, w, h, f) + return True + + def stop(self) -> None: + """Stop the capture thread and release the hardware.""" + if not self.is_running(): + self._backend = None + self._clear_cache() + return + self._stop.set() + t = self._thread + if t is not None: + t.join(timeout=2.0) + self._thread = None + self._backend = None + # Drop the last captured frame so snapshot_jpeg()/get_frame_b64() + # return None once vision is OFF — otherwise the /frame.jpg preview + # and the enroll path keep serving a frozen image of whoever was + # last in front of the camera. + self._clear_cache() + log.info("Camera stopped") + + def _clear_cache(self) -> None: + """Drop the cached frame views so nothing stale is served.""" + with self._lock: + self._latest_jpeg = None + self._latest_b64 = None + self._latest_ts = 0.0 + + def reconfigure(self, width: Optional[int] = None, height: Optional[int] = None, + fps: Optional[int] = None, jpeg_quality: Optional[int] = None) -> dict: + """Hot-swap the capture profile without a full stop/start. + + Sets a pending flag — the capture loop notices it, tears the + pipeline down, and rebuilds at the new resolution (~0.5 s gap). + If the daemon isn't running the new values just take effect on + the next `start()`. Returns the resulting active profile. + """ + with self._reconfig_lock: + if width is not None: + self._w = int(width) + if height is not None: + self._h = int(height) + if fps is not None: + self._fps = int(fps) + if jpeg_quality is not None: + self._q = max(10, min(95, int(jpeg_quality))) + if self.is_running(): + self._reconfig_pending = True + profile = {"width": self._w, "height": self._h, + "fps": self._fps, "jpeg_quality": self._q} + log.info("Camera reconfigure → %s", profile) + return profile + + def snapshot_jpeg(self) -> Optional[bytes]: + """Return the latest JPEG bytes, or None if no frame yet.""" + with self._lock: + return self._latest_jpeg + + def get_frame_b64(self) -> Optional[str]: + """Return the latest frame as a base64 ASCII string (or None). + + Used by the frame forwarder to push frames over the Gemini child's + stdin without re-encoding — base64 is cached alongside the JPEG. + """ + with self._lock: + return self._latest_b64 + + def get_fresh_frame(self, max_age_s: float = 0.5, + timeout_s: float = 1.5) -> Optional[bytes]: + """Return a JPEG frame newer than `max_age_s`, waiting up to `timeout_s`. + + Used by face enrollment so the captured frame is guaranteed to be + the *current* scene, not a stale buffer from before the user got + into position. On timeout, only falls back to the cached frame if + it is still within the stale threshold — otherwise returns None so + the enroll route raises 409 rather than capturing an old scene + (e.g. while the daemon is stuck reconnecting). + """ + deadline = time.time() + timeout_s + while time.time() < deadline: + with self._lock: + if (self._latest_jpeg is not None + and self._latest_ts > 0 + and (time.time() - self._latest_ts) <= max_age_s): + return self._latest_jpeg + time.sleep(0.03) + # Timed out waiting for a fresh frame. Hand back the cached frame + # only if it isn't dangerously stale; never enrol an arbitrarily + # old scene. + with self._lock: + if (self._latest_jpeg is not None + and self._latest_ts > 0 + and (time.time() - self._latest_ts) <= self._stale_s): + return self._latest_jpeg + return None + + def latest_age_s(self) -> float: + """Seconds since last successful frame; +inf if none.""" + with self._lock: + if self._latest_ts <= 0: + return float("inf") + return time.time() - self._latest_ts + + def status(self) -> dict: + with self._reconfig_lock: + w, h, f, q = self._w, self._h, self._fps, self._q + # latest_age_s() is +inf until the first frame lands. inf is NOT + # JSON-serialisable by Starlette's JSONResponse (allow_nan=False) — + # leaving it as inf would 500 the /api/recognition/* routes. Map + # "running but no frame yet" and "not running" both to None. + age = self.latest_age_s() + running = self.is_running() + age_s = round(age, 2) if (running and age != float("inf")) else None + # Snapshot the report counters under _lock for a consistent view — + # the capture/reconnect thread mutates these (see _reconnect_loop). + # Read latest_age_s()/is_running() above (they self-lock) so we + # don't re-enter this non-reentrant lock. + with self._lock: + backend = self._backend + frame_seq = self._frame_seq + error = self._error + reconnect_count = self._reconnect_count + return { + "running": running, + "backend": backend, + "width": w, + "height": h, + "fps": f, + "jpeg_quality": q, + "frame_seq": frame_seq, + "age_s": age_s, + "error": error, + "reconnect_count": reconnect_count, + } + + # ── helpers ───────────────────────────────────────────── + + def _probe_any(self) -> Optional[dict]: + """Try RealSense first, then USB. Returns backend dict or None.""" + b = self._probe_realsense() + if b is None: + b = self._probe_usb() + return b + + def _check_usb_version(self) -> None: + """Warn if a connected RealSense negotiated USB 2.0 (needs 3.x). + + Marcus has this same check — D435I on USB 2.0 can't deliver + color+depth+IMU and the pipeline silently stalls. Catching it at + startup lets the operator fix the cable/port instead of chasing a + "no frames" loop. Diagnostic only; never blocks startup. + """ + try: + import pyrealsense2 as rs # type: ignore + ctx = rs.context() + for dev in ctx.query_devices(): + try: + usb_type = dev.get_info(rs.camera_info.usb_type_descriptor) + name = dev.get_info(rs.camera_info.name) + except Exception: + continue + if str(usb_type).startswith("2."): + log.warning( + "RealSense %s negotiated USB %s — expected 3.x. " + "Frame drops likely. Try a USB 3 port / shorter cable / " + "powered hub.", name, usb_type, + ) + else: + log.info("RealSense %s on USB %s", name, usb_type) + except Exception: + pass + + # ── backend probing ───────────────────────────────────── + + def _probe_realsense(self) -> Optional[dict]: + with self._reconfig_lock: + w, h, f = self._w, self._h, self._fps + try: + import pyrealsense2 as rs # type: ignore + pipeline = rs.pipeline() + cfg = rs.config() + cfg.enable_stream(rs.stream.color, w, h, rs.format.bgr8, f) + profile = pipeline.start(cfg) + return {"name": "realsense", "pipeline": pipeline, "rs": rs, + "profile": profile} + except Exception as exc: + log.info("RealSense unavailable: %s", exc) + return None + + def _open_usb_index(self, idx: int, w: int, h: int, f: int, + cv2) -> Optional[dict]: + """Open one /dev/video, validate it yields a 3-channel frame, + and classify it as colour vs grayscale/IR. + + A RealSense IR node delivers Y8 — cv2 replicates that single plane + across 3 channels, so the planes come back *bit-identical*. A real + colour sensor never produces bit-identical channels (per-channel + sensor noise differs even on a flat gray scene). That's the test. + Returns a backend dict with `is_color`, or None if the node is + unusable. + """ + cap = None + try: + cap = cv2.VideoCapture(idx) + if not cap.isOpened(): + cap.release() + return None + cap.set(cv2.CAP_PROP_FRAME_WIDTH, w) + cap.set(cv2.CAP_PROP_FRAME_HEIGHT, h) + cap.set(cv2.CAP_PROP_FPS, f) + good = None + for _ in range(5): + ok, frame = cap.read() + if (ok and frame is not None and frame.ndim == 3 + and frame.shape[2] == 3): + good = frame + break + if good is None: + cap.release() + return None + is_color = not ( + np.array_equal(good[:, :, 0], good[:, :, 1]) + and np.array_equal(good[:, :, 1], good[:, :, 2]) + ) + return {"name": "usb", "cap": cap, "cv2": cv2, "index": idx, + "is_color": is_color, + "frame_wh": (good.shape[1], good.shape[0])} + except Exception as exc: + log.info("USB camera index %d: %s", idx, exc) + if cap is not None: + try: + cap.release() + except Exception: + pass + return None + + def _probe_usb(self) -> Optional[dict]: + """Scan /dev/video* for a colour camera node, falling back to a + grayscale/IR node only if no colour node exists. + + On a RealSense, /dev/video0 is the *depth* stream (Z16, cv2 can't + open it as a webcam); the IR nodes deliver Y8 (grayscale); the + *colour* node delivers YUYV/BGR. We can't know the index up front, + so we probe each and prefer the first genuine colour node — that's + why the dashboard preview used to come up grayscale. Pin a node + with SANAD_CAMERA_USB_INDEX= to skip the scan entirely. + """ + with self._reconfig_lock: + w, h, f = self._w, self._h, self._fps + try: + import cv2 # type: ignore + except Exception as exc: + log.info("USB camera unavailable: %s", exc) + return None + + # Pinned index — accept whatever it is (colour or not). + explicit = os.environ.get("SANAD_CAMERA_USB_INDEX", "").strip() + if explicit.isdigit(): + backend = self._open_usb_index(int(explicit), w, h, f, cv2) + if backend is not None: + fw, fh = backend["frame_wh"] + log.info("USB camera: pinned /dev/video%d (%dx%d, %s)", + backend["index"], fw, fh, + "colour" if backend["is_color"] else "grayscale/IR") + return backend + log.warning("USB camera: pinned index %s unusable", explicit) + return None + + # Scan — prefer a real colour node; keep the first grayscale node + # as a last resort so the camera still works if that's all there is. + gray_fallback: Optional[dict] = None + for idx in range(_USB_SCAN_RANGE): + backend = self._open_usb_index(idx, w, h, f, cv2) + if backend is None: + continue + fw, fh = backend["frame_wh"] + if backend["is_color"]: + log.info("USB camera: using /dev/video%d (colour, %dx%d)", + idx, fw, fh) + if gray_fallback is not None: + try: + gray_fallback["cap"].release() + except Exception: + pass + return backend + # grayscale/IR — remember the first, release any extras + if gray_fallback is None: + gray_fallback = backend + else: + try: + backend["cap"].release() + except Exception: + pass + + if gray_fallback is not None: + fw, fh = gray_fallback["frame_wh"] + log.warning("USB camera: no colour node found — falling back to " + "/dev/video%d (grayscale/IR, %dx%d). For a RealSense, " + "build pyrealsense2 or pin the colour node with " + "SANAD_CAMERA_USB_INDEX.", gray_fallback["index"], fw, fh) + return gray_fallback + + log.info("USB camera unavailable: no working /dev/video* node found " + "(scanned %d indices)", _USB_SCAN_RANGE) + return None + + # ── main capture loop ─────────────────────────────────── + + def _reconnect_loop(self, initial_backend: dict) -> None: + """Outer loop — owns reconnect with exponential backoff. + + Inner `_capture_session` runs until the camera goes stale, the + stop flag is set, or a reconfigure is requested. On stall we + sleep + re-probe; on reconfigure we re-probe immediately at the + new resolution. Backoff resets after a successful session. + """ + backend = initial_backend + backoff = self._reconnect_min_s + + while not self._stop.is_set(): + reconfigured = False + try: + reconfigured = self._capture_session(backend) + except Exception as exc: + log.exception("Camera capture session crashed: %s", exc) + self._error = str(exc) + finally: + self._teardown(backend) + + if self._stop.is_set(): + break + + if reconfigured: + # Fast path — rebuild immediately at the new profile. + with self._reconfig_lock: + self._reconfig_pending = False + new_backend = self._probe_any() + if new_backend is None: + self._error = "reconnecting" + log.warning("Camera reconfigure: re-probe failed — " + "retrying in %.1fs", backoff) + if self._stop.wait(backoff): + break + backoff = min(backoff * 2, self._reconnect_max_s) + continue + self._backend = new_backend["name"] + self._error = None + backend = new_backend + backoff = self._reconnect_min_s + log.info("Camera rebuilt after reconfigure (backend=%s)", + self._backend) + continue + + # Capture session ended unexpectedly (stall / crash). Sleep + re-probe. + self._error = "reconnecting" + log.warning("Camera disconnected — reconnecting in %.1fs", backoff) + if self._stop.wait(backoff): # interruptible sleep + break + backoff = min(backoff * 2, self._reconnect_max_s) + + new_backend = self._probe_any() + if new_backend is None: + self._backend = None + continue # stay in the loop; next iteration retries + self._backend = new_backend["name"] + self._reconnect_count += 1 + self._error = None + log.info("Camera reconnected (backend=%s, attempt #%d)", + self._backend, self._reconnect_count) + backend = new_backend + backoff = self._reconnect_min_s # reset on success + + def _capture_session(self, backend: dict) -> bool: + """Inner capture loop — runs until stop, stale-frame timeout, or + a reconfigure request. + + Returns True if it exited because of a reconfigure (caller rebuilds + immediately), False on a stall or clean stop. + """ + import cv2 # always available — used for JPEG encode + + with self._reconfig_lock: + encode_params = [int(cv2.IMWRITE_JPEG_QUALITY), self._q] + last_frame_time = time.time() + consecutive_failures = 0 + + while not self._stop.is_set(): + if self._reconfig_pending: + log.info("Camera reconfigure requested — rebuilding pipeline") + return True + + bgr = self._read_frame(backend) + if bgr is None: + consecutive_failures += 1 + age = time.time() - last_frame_time + if age > self._stale_s: + log.warning( + "Camera stalled %.1fs (%d consecutive timeouts) — " + "rebuilding pipeline", age, consecutive_failures, + ) + return False + # Intermediate warnings so degradation is visible early + if consecutive_failures in (3, 10, 30): + log.warning("Camera slow (%d failures, age %.1fs)", + consecutive_failures, age) + time.sleep(0.05) + continue + + try: + ok, buf = cv2.imencode(".jpg", bgr, encode_params) + except Exception as exc: + log.warning("JPEG encode failed: %s", exc) + continue + if not ok: + continue + jpeg = bytes(buf) + b64 = base64.b64encode(jpeg).decode("ascii") + now = time.time() + with self._lock: + self._latest_jpeg = jpeg + self._latest_b64 = b64 + self._latest_ts = now + self._frame_seq += 1 + last_frame_time = now + consecutive_failures = 0 + + return False + + def _read_frame(self, backend: dict) -> Optional[np.ndarray]: + name = backend["name"] + if name == "realsense": + try: + frames = backend["pipeline"].wait_for_frames( + timeout_ms=self._capture_timeout_ms, + ) + color = frames.get_color_frame() + if not color: + return None + return np.asanyarray(color.get_data()) + except Exception: + # Soft path — single timeouts handled by _capture_session's + # stale-detection logic; don't spam the log per frame. + return None + elif name == "usb": + cap = backend["cap"] + ok, frame = cap.read() + if not ok or frame is None: + return None + return frame + return None + + def _teardown(self, backend: dict) -> None: + name = backend.get("name") + try: + if name == "realsense": + backend["pipeline"].stop() + elif name == "usb": + backend["cap"].release() + except Exception as exc: + log.info("Camera teardown: %s", exc) diff --git a/vendor/Sanad/vision/face_gallery.py b/vendor/Sanad/vision/face_gallery.py new file mode 100644 index 0000000..25db847 --- /dev/null +++ b/vendor/Sanad/vision/face_gallery.py @@ -0,0 +1,368 @@ +"""Face gallery — pure file IO over data/faces/face_{id}/. + +Layout per face: + face_{id}/ + face_1.jpg ← samples (≥1 required) + face_2.jpg + face_3.png + meta.json ← optional: {"name": "...", "description": "...", "added_at": "..."} + +`description` is free text the operator writes about the person ("lead +engineer, likes coffee") — it's folded into the Gemini primer turn so +Gemini can reference it when it recognises that face. + +No ML — Gemini does the recognition in-context using the samples we feed it +via the primer turn. This module's only jobs are: + - enumerate enrolled faces + - serve & accept JPEG/PNG bytes per face + - rename / describe / delete / zip / load-for-primer + +Thread-safe via a single internal RLock. +""" + +from __future__ import annotations + +import io +import json +import re +import threading +import zipfile +from dataclasses import dataclass, field +from datetime import datetime +from pathlib import Path +from typing import Iterable + +from Project.Sanad.core.logger import get_logger + +log = get_logger("face_gallery") + + +_DIR_RE = re.compile(r"^face_(\d+)$") +ALLOWED_EXTS = {".jpg", ".jpeg", ".png"} +SAMPLE_NAME_RE = re.compile(r"^face_(\d+)\.(jpg|jpeg|png)$", re.IGNORECASE) + + +@dataclass +class PhotoInfo: + name: str + size_bytes: int + path: Path + + +@dataclass +class FaceEntry: + id: int + name: str | None + added_at: str | None + dir: Path + description: str | None = None + sample_paths: list[Path] = field(default_factory=list) + + def to_dict(self) -> dict: + return { + "id": self.id, + "name": self.name, + "description": self.description, + "added_at": self.added_at, + "dir": str(self.dir), + "photos": [ + {"name": p.name, "size_bytes": p.stat().st_size} + for p in self.sample_paths + if p.exists() + ], + } + + +class FaceGallery: + """File-system backed gallery rooted at `root` (e.g. data/faces/).""" + + def __init__(self, root: Path | str) -> None: + self.root = Path(root) + self._lock = threading.RLock() + + # ── read ──────────────────────────────────────────────── + + def _ensure_root(self) -> None: + self.root.mkdir(parents=True, exist_ok=True) + + def _iter_face_dirs(self) -> Iterable[tuple[int, Path]]: + if not self.root.exists(): + return + for child in sorted(self.root.iterdir()): + if not child.is_dir(): + continue + m = _DIR_RE.match(child.name) + if not m: + continue + yield int(m.group(1)), child + + def _samples_in(self, face_dir: Path) -> list[Path]: + # Count only real face_N. samples — a stray non-conforming + # image (e.g. a manually dropped thumbnail.jpg) must not inflate the + # sample count, or delete_photo's "only photo" guard could let the + # last real sample be deleted, leaving the face with zero usable + # samples for the primer. + out: list[Path] = [] + for p in sorted(face_dir.iterdir()): + if p.is_file() and SAMPLE_NAME_RE.match(p.name): + out.append(p) + return out + + def _meta(self, face_dir: Path) -> tuple[str | None, str | None, str | None]: + """Return (name, description, added_at) — any may be None.""" + meta_path = face_dir / "meta.json" + if not meta_path.exists(): + return None, None, None + try: + data = json.loads(meta_path.read_text(encoding="utf-8")) + except Exception: + return None, None, None + name = data.get("name") + description = data.get("description") + added = data.get("added_at") + return (name if name else None), (description if description else None), added + + def list(self) -> list[FaceEntry]: + with self._lock: + entries: list[FaceEntry] = [] + for face_id, face_dir in self._iter_face_dirs(): + name, description, added = self._meta(face_dir) + entries.append(FaceEntry( + id=face_id, + name=name, + description=description, + added_at=added, + dir=face_dir, + sample_paths=self._samples_in(face_dir), + )) + return entries + + def get(self, face_id: int) -> FaceEntry | None: + with self._lock: + face_dir = self.root / f"face_{face_id}" + if not face_dir.is_dir(): + return None + name, description, added = self._meta(face_dir) + return FaceEntry( + id=face_id, name=name, description=description, added_at=added, + dir=face_dir, sample_paths=self._samples_in(face_dir), + ) + + def get_photo(self, face_id: int, photo_name: str) -> Path | None: + with self._lock: + face_dir = self.root / f"face_{face_id}" + if not face_dir.is_dir(): + return None + p = face_dir / photo_name + try: + p.resolve().relative_to(face_dir.resolve()) + except ValueError: + return None + if not p.exists() or p.suffix.lower() not in ALLOWED_EXTS: + return None + return p + + # ── write ─────────────────────────────────────────────── + + def next_id(self) -> int: + with self._lock: + max_id = 0 + for face_id, _ in self._iter_face_dirs(): + if face_id > max_id: + max_id = face_id + return max_id + 1 + + def _next_sample_name(self, face_dir: Path, ext: str) -> str: + """Return next free face_N. filename inside face_dir.""" + existing = self._samples_in(face_dir) + max_n = 0 + for p in existing: + m = SAMPLE_NAME_RE.match(p.name) + if m: + n = int(m.group(1)) + if n > max_n: + max_n = n + return f"face_{max_n + 1}{ext.lower()}" + + @staticmethod + def _detect_ext(jpeg_or_png: bytes) -> str: + """Sniff PNG vs JPEG from the magic bytes.""" + if len(jpeg_or_png) >= 8 and jpeg_or_png[:8] == b"\x89PNG\r\n\x1a\n": + return ".png" + return ".jpg" + + def _write_meta(self, face_dir: Path, name: str | None, + description: str | None = None, + added_at: str | None = None) -> None: + meta: dict[str, str] = {} + if name: + meta["name"] = name + if description: + meta["description"] = description + meta["added_at"] = added_at or datetime.now().isoformat(timespec="seconds") + (face_dir / "meta.json").write_text( + json.dumps(meta, ensure_ascii=False, indent=2), + encoding="utf-8", + ) + + def create_face(self, image_bytes_list: list[bytes], + name: str | None = None, + description: str | None = None) -> FaceEntry: + """Create a new face_{next_id}/ with one or more samples.""" + if not image_bytes_list: + raise ValueError("create_face: empty image list") + with self._lock: + self._ensure_root() + face_id = self.next_id() + face_dir = self.root / f"face_{face_id}" + face_dir.mkdir(parents=True, exist_ok=False) + for idx, data in enumerate(image_bytes_list, start=1): + ext = self._detect_ext(data) + fname = f"face_{idx}{ext}" + (face_dir / fname).write_bytes(data) + clean_name = (name or "").strip() or None + clean_desc = (description or "").strip() or None + self._write_meta(face_dir, clean_name, clean_desc) + log.info("Created face_%d (samples=%d, name=%s, desc=%s)", + face_id, len(image_bytes_list), clean_name or "(unnamed)", + "yes" if clean_desc else "no") + return self.get(face_id) # type: ignore[return-value] + + def add_photo(self, face_id: int, image_bytes: bytes) -> str: + """Append a new sample to an existing face. Returns the filename.""" + with self._lock: + face_dir = self.root / f"face_{face_id}" + if not face_dir.is_dir(): + raise FileNotFoundError(f"face_{face_id} not found") + ext = self._detect_ext(image_bytes) + fname = self._next_sample_name(face_dir, ext) + (face_dir / fname).write_bytes(image_bytes) + log.info("Added sample %s to face_%d", fname, face_id) + return fname + + def rename(self, face_id: int, name: str | None) -> None: + """Update meta.json with a new name (or clear it if name is empty). + + Preserves the existing description + added_at. + """ + with self._lock: + face_dir = self.root / f"face_{face_id}" + if not face_dir.is_dir(): + raise FileNotFoundError(f"face_{face_id} not found") + _, description, added = self._meta(face_dir) + clean = (name or "").strip() or None + self._write_meta(face_dir, clean, description, added_at=added) + log.info("Renamed face_%d → %s", face_id, clean or "(unnamed)") + + def set_description(self, face_id: int, description: str | None) -> None: + """Update meta.json with a free-text description (or clear it). + + Preserves the existing name + added_at. The description is folded + into the Gemini primer turn so Gemini can reference it. + """ + with self._lock: + face_dir = self.root / f"face_{face_id}" + if not face_dir.is_dir(): + raise FileNotFoundError(f"face_{face_id} not found") + name, _, added = self._meta(face_dir) + clean = (description or "").strip() or None + self._write_meta(face_dir, name, clean, added_at=added) + log.info("Set description for face_%d (%s)", face_id, + "cleared" if not clean else f"{len(clean)} chars") + + def delete_photo(self, face_id: int, photo_name: str) -> None: + """Delete one photo. Refuses if it's the only remaining sample.""" + with self._lock: + face_dir = self.root / f"face_{face_id}" + if not face_dir.is_dir(): + raise FileNotFoundError(f"face_{face_id} not found") + samples = self._samples_in(face_dir) + if len(samples) <= 1: + raise ValueError( + "Cannot delete the only photo — delete the face instead." + ) + target = self.get_photo(face_id, photo_name) + if target is None: + raise FileNotFoundError(f"photo {photo_name} not found") + target.unlink() + log.info("Deleted %s from face_%d", photo_name, face_id) + + def delete_face(self, face_id: int) -> None: + """Delete the entire face_{id}/ folder (including meta.json).""" + import shutil + with self._lock: + face_dir = self.root / f"face_{face_id}" + if not face_dir.is_dir(): + raise FileNotFoundError(f"face_{face_id} not found") + shutil.rmtree(face_dir) + log.info("Deleted face_%d", face_id) + + def zip_face(self, face_id: int) -> bytes: + """Return the entire face_{id}/ folder packaged as a ZIP.""" + with self._lock: + face_dir = self.root / f"face_{face_id}" + if not face_dir.is_dir(): + raise FileNotFoundError(f"face_{face_id} not found") + buf = io.BytesIO() + with zipfile.ZipFile(buf, "w", compression=zipfile.ZIP_DEFLATED) as zf: + for p in sorted(face_dir.iterdir()): + if p.is_file(): + zf.write(p, arcname=f"face_{face_id}/{p.name}") + return buf.getvalue() + + # ── primer support (used by gemini/script.py) ─────────── + + def load_for_primer( + self, max_samples_per_face: int = 3, resize_long_side: int = 256, + ) -> list[tuple[FaceEntry, list[bytes]]]: + """Return [(FaceEntry, [jpeg_bytes,…]), …] for Gemini upload. + + Resizes each sample to longest-side <= resize_long_side, re-encodes + as JPEG (q=85) to keep the token cost manageable. Falls back to + the raw bytes if PIL isn't available. + """ + entries = self.list() + if not entries: + return [] + out: list[tuple[FaceEntry, list[bytes]]] = [] + for e in entries: + paths = e.sample_paths[:max_samples_per_face] + jpegs: list[bytes] = [] + for p in paths: + try: + raw = p.read_bytes() + except OSError: + continue + processed = self._resize_for_primer(raw, resize_long_side) + jpegs.append(processed or raw) + if jpegs: + out.append((e, jpegs)) + return out + + @staticmethod + def _resize_for_primer(raw: bytes, long_side: int) -> bytes | None: + """Resize image to longest-side ≤ long_side, re-encode JPEG q=85. + + Returns None on any failure (caller falls back to raw bytes). + """ + try: + from PIL import Image # type: ignore + except Exception: + return None + try: + img = Image.open(io.BytesIO(raw)) + img.load() + if img.mode not in ("RGB", "L"): + img = img.convert("RGB") + w, h = img.size + scale = long_side / max(w, h) if max(w, h) > long_side else 1.0 + if scale < 1.0: + img = img.resize( + (max(1, int(w * scale)), max(1, int(h * scale))), + Image.LANCZOS, + ) + buf = io.BytesIO() + img.save(buf, format="JPEG", quality=85, optimize=True) + return buf.getvalue() + except Exception: + return None diff --git a/vendor/Sanad/vision/recognition_state.py b/vendor/Sanad/vision/recognition_state.py new file mode 100644 index 0000000..12b01c7 --- /dev/null +++ b/vendor/Sanad/vision/recognition_state.py @@ -0,0 +1,98 @@ +"""Recognition state file — atomic JSON I/O shared by parent + child. + +The dashboard (parent process) writes this file on every toggle / face +gallery change; the Gemini child (`gemini/script.py`) polls it at 1 Hz +to flip its in-memory flags without a session restart. + +Format (data/.recognition_state.json): + { + "vision_enabled": bool, + "face_rec_enabled": bool, + "gallery_version": int, # bumped on every face CRUD + "zone_rec_enabled": bool, # N3 — zones/places knowledge toggle + "zones_version": int, # bumped on every zone/place CRUD + "nav_target_zone_id": int, # active "go here" destination (0 = none) + "nav_target_place_id": int, + "movement_enabled": bool # N2 — Gemini-driven locomotion gate + } +""" + +from __future__ import annotations + +import json +import os +import tempfile +from dataclasses import asdict, dataclass +from pathlib import Path + + +@dataclass +class RecognitionState: + vision_enabled: bool = False + face_rec_enabled: bool = False + gallery_version: int = 0 + # N3 — zones/places knowledge (zone → place → linked faces) + zone_rec_enabled: bool = False + zones_version: int = 0 + # "Go here" destination — the active place the robot should head to. + # 0/0 = no destination set. Actual locomotion is wired by N2. + nav_target_zone_id: int = 0 + nav_target_place_id: int = 0 + # N2 — Gemini-driven locomotion enable gate (default OFF for safety) + movement_enabled: bool = False + # Auto-record every conversation turn to data/recordings/ (default ON to + # match historical behavior). Toggled live from the Live Gemini panel; the + # child syncs TurnRecorder.enabled to this without a session restart. + record_enabled: bool = True + + +def read(path: Path) -> RecognitionState: + """Return the persisted state, or a default if missing/corrupt.""" + try: + raw = json.loads(Path(path).read_text(encoding="utf-8")) + except (FileNotFoundError, json.JSONDecodeError, OSError, + UnicodeDecodeError, ValueError): + # UnicodeDecodeError (bad UTF-8 bytes) and ValueError (e.g. a non-dict + # top-level) are not OSError/JSONDecodeError subclasses — catch them too + # so read() honours its "never raises" contract on a corrupt file. + return RecognitionState() + if not isinstance(raw, dict): + return RecognitionState() + return RecognitionState( + vision_enabled=bool(raw.get("vision_enabled", False)), + face_rec_enabled=bool(raw.get("face_rec_enabled", False)), + gallery_version=int(raw.get("gallery_version", 0)), + zone_rec_enabled=bool(raw.get("zone_rec_enabled", False)), + zones_version=int(raw.get("zones_version", 0)), + nav_target_zone_id=int(raw.get("nav_target_zone_id", 0)), + nav_target_place_id=int(raw.get("nav_target_place_id", 0)), + movement_enabled=bool(raw.get("movement_enabled", False)), + record_enabled=bool(raw.get("record_enabled", True)), + ) + + +def write(path: Path, state: RecognitionState) -> None: + """Write atomically via tempfile + os.replace.""" + p = Path(path) + p.parent.mkdir(parents=True, exist_ok=True) + fd, tmp = tempfile.mkstemp(prefix=f".{p.name}.", suffix=".tmp", dir=str(p.parent)) + try: + with os.fdopen(fd, "w", encoding="utf-8") as fh: + json.dump(asdict(state), fh, ensure_ascii=False, indent=2) + os.replace(tmp, p) + except Exception: + try: + os.unlink(tmp) + except OSError: + pass + raise + + +def mutate(path: Path, **changes) -> RecognitionState: + """Read-modify-write helper. Returns the new state.""" + cur = read(path) + for k, v in changes.items(): + if hasattr(cur, k): + setattr(cur, k, v) + write(path, cur) + return cur diff --git a/vendor/Sanad/vision/zone_gallery.py b/vendor/Sanad/vision/zone_gallery.py new file mode 100644 index 0000000..b021eb5 --- /dev/null +++ b/vendor/Sanad/vision/zone_gallery.py @@ -0,0 +1,493 @@ +"""Zone gallery — file IO over data/zones/zone_{zid}/place_{pid}/. + +A two-level hierarchy that replaces the flat place gallery: + + Zone = name + description (a region, e.g. "Ground Floor") + └─ Place = name + description + optional reference photos + + optional linked face ids (people associated with the place) + +Layout: + zones/ + zone_{zid}/ + meta.json {name, description, added_at, linked_map?} + place_{pid}/ + meta.json {name, description, face_ids:[int], added_at, nav_place?} + place_1.jpg ← optional reference photos (0..N) + place_2.png + +`linked_map` (optional) binds a zone to a nav2 map .db; `nav_place` (optional) +links a place to a nav2 place name in that map so it can be DRIVEN to. Both are +only written when set (absent = None on read), so old metadata stays valid. + +`face_ids` reference enrolled faces in the SEPARATE face gallery +(data/faces/face_{id}); this module only stores the ids — name/photo +resolution is done by the caller (route layer + Gemini primer). + +Reference photos let Gemini visually recognise the place (and, later, let the +robot navigate to it). A place needs only a name + description; photos and +linked faces are both optional. Thread-safe via a single internal RLock. +""" + +from __future__ import annotations + +import io +import json +import re +import threading +import zipfile +from dataclasses import dataclass, field +from datetime import datetime +from pathlib import Path +from typing import Iterable + +from Project.Sanad.core.logger import get_logger + +log = get_logger("zone_gallery") + +_ZONE_DIR_RE = re.compile(r"^zone_(\d+)$") +_PLACE_DIR_RE = re.compile(r"^place_(\d+)$") +ALLOWED_EXTS = {".jpg", ".jpeg", ".png"} +SAMPLE_NAME_RE = re.compile(r"^place_(\d+)\.(jpg|jpeg|png)$", re.IGNORECASE) + + +@dataclass +class PlaceEntry: + id: int + zone_id: int + name: str | None + added_at: str | None + dir: Path + description: str | None = None + face_ids: list[int] = field(default_factory=list) + sample_paths: list[Path] = field(default_factory=list) + # Name of the nav2 saved place (in the zone's linked map) this vision place + # drives to. None = announce/recognise only, no driving. + nav_place: str | None = None + + def to_dict(self) -> dict: + return { + "id": self.id, + "zone_id": self.zone_id, + "name": self.name, + "description": self.description, + "face_ids": list(self.face_ids), + "nav_place": self.nav_place, + "added_at": self.added_at, + "photos": [ + {"name": p.name, "size_bytes": p.stat().st_size} + for p in self.sample_paths if p.exists() + ], + } + + +@dataclass +class ZoneEntry: + id: int + name: str | None + added_at: str | None + dir: Path + description: str | None = None + places: list[PlaceEntry] = field(default_factory=list) + # nav2 map (.db basename, e.g. "office.db") this zone is bound to. A zone + # with a linked map can be driven in via "Gemini Nav"; its places link to + # that map's nav2 places. + linked_map: str | None = None + + def to_dict(self) -> dict: + return { + "id": self.id, + "name": self.name, + "description": self.description, + "linked_map": self.linked_map, + "added_at": self.added_at, + "places": [p.to_dict() for p in self.places], + } + + +class ZoneGallery: + """File-system backed zone/place gallery rooted at `root` (data/zones/).""" + + def __init__(self, root: Path | str) -> None: + self.root = Path(root) + self._lock = threading.RLock() + + # ── paths ──────────────────────────────────────────────── + + def _ensure_root(self) -> None: + self.root.mkdir(parents=True, exist_ok=True) + + def _zone_dir(self, zone_id: int) -> Path: + return self.root / f"zone_{zone_id}" + + def _place_dir(self, zone_id: int, place_id: int) -> Path: + return self.root / f"zone_{zone_id}" / f"place_{place_id}" + + def _iter_zone_dirs(self) -> Iterable[tuple[int, Path]]: + if not self.root.exists(): + return + for child in sorted(self.root.iterdir()): + if not child.is_dir(): + continue + m = _ZONE_DIR_RE.match(child.name) + if m: + yield int(m.group(1)), child + + def _iter_place_dirs(self, zone_dir: Path) -> Iterable[tuple[int, Path]]: + if not zone_dir.exists(): + return + for child in sorted(zone_dir.iterdir()): + if not child.is_dir(): + continue + m = _PLACE_DIR_RE.match(child.name) + if m: + yield int(m.group(1)), child + + def _samples_in(self, place_dir: Path) -> list[Path]: + return [p for p in sorted(place_dir.iterdir()) + if p.is_file() and p.suffix.lower() in ALLOWED_EXTS] + + # ── meta ───────────────────────────────────────────────── + + def _zone_meta(self, zone_dir: Path) -> tuple[str | None, str | None, str | None, str | None]: + """Returns (name, description, added_at, linked_map).""" + meta_path = zone_dir / "meta.json" + if not meta_path.exists(): + return None, None, None, None + try: + data = json.loads(meta_path.read_text(encoding="utf-8")) + except Exception: + return None, None, None, None + return ((data.get("name") or None), (data.get("description") or None), + data.get("added_at"), (data.get("linked_map") or None)) + + def _place_meta(self, place_dir: Path) -> tuple[str | None, str | None, list[int], str | None, str | None]: + """Returns (name, description, face_ids, added_at, nav_place).""" + meta_path = place_dir / "meta.json" + if not meta_path.exists(): + return None, None, [], None, None + try: + data = json.loads(meta_path.read_text(encoding="utf-8")) + except Exception: + return None, None, [], None, None + raw_ids = data.get("face_ids") or [] + face_ids: list[int] = [] + for x in raw_ids: + try: + face_ids.append(int(x)) + except (TypeError, ValueError): + continue + return ((data.get("name") or None), (data.get("description") or None), + face_ids, data.get("added_at"), (data.get("nav_place") or None)) + + def _write_zone_meta(self, zone_dir: Path, name, description, + added_at=None, linked_map=None) -> None: + meta: dict = {} + if name: + meta["name"] = name + if description: + meta["description"] = description + if linked_map: + meta["linked_map"] = linked_map + meta["added_at"] = added_at or datetime.now().isoformat(timespec="seconds") + (zone_dir / "meta.json").write_text( + json.dumps(meta, ensure_ascii=False, indent=2), encoding="utf-8") + + def _write_place_meta(self, place_dir: Path, name, description, + face_ids=None, added_at=None, nav_place=None) -> None: + meta: dict = {} + if name: + meta["name"] = name + if description: + meta["description"] = description + meta["face_ids"] = [int(x) for x in (face_ids or [])] + if nav_place: + meta["nav_place"] = nav_place + meta["added_at"] = added_at or datetime.now().isoformat(timespec="seconds") + (place_dir / "meta.json").write_text( + json.dumps(meta, ensure_ascii=False, indent=2), encoding="utf-8") + + # ── read ───────────────────────────────────────────────── + + def _build_place(self, zone_id: int, place_id: int, place_dir: Path) -> PlaceEntry: + name, desc, face_ids, added, nav_place = self._place_meta(place_dir) + return PlaceEntry( + id=place_id, zone_id=zone_id, name=name, description=desc, + face_ids=face_ids, added_at=added, dir=place_dir, + sample_paths=self._samples_in(place_dir), nav_place=nav_place, + ) + + def _build_zone(self, zone_id: int, zone_dir: Path) -> ZoneEntry: + name, desc, added, linked_map = self._zone_meta(zone_dir) + places = [self._build_place(zone_id, pid, pdir) + for pid, pdir in self._iter_place_dirs(zone_dir)] + return ZoneEntry(id=zone_id, name=name, description=desc, + added_at=added, dir=zone_dir, places=places, + linked_map=linked_map) + + def list_zones(self) -> list[ZoneEntry]: + with self._lock: + return [self._build_zone(zid, zdir) for zid, zdir in self._iter_zone_dirs()] + + def get_zone(self, zone_id: int) -> ZoneEntry | None: + with self._lock: + zd = self._zone_dir(zone_id) + return self._build_zone(zone_id, zd) if zd.is_dir() else None + + def get_place(self, zone_id: int, place_id: int) -> PlaceEntry | None: + with self._lock: + pd = self._place_dir(zone_id, place_id) + return self._build_place(zone_id, place_id, pd) if pd.is_dir() else None + + def get_photo(self, zone_id: int, place_id: int, photo_name: str) -> Path | None: + with self._lock: + pd = self._place_dir(zone_id, place_id) + if not pd.is_dir(): + return None + p = pd / photo_name + try: + p.resolve().relative_to(pd.resolve()) + except ValueError: + return None + if not p.exists() or p.suffix.lower() not in ALLOWED_EXTS: + return None + return p + + # ── ids ────────────────────────────────────────────────── + + def next_zone_id(self) -> int: + with self._lock: + return max((zid for zid, _ in self._iter_zone_dirs()), default=0) + 1 + + def next_place_id(self, zone_id: int) -> int: + with self._lock: + zd = self._zone_dir(zone_id) + return max((pid for pid, _ in self._iter_place_dirs(zd)), default=0) + 1 + + def _next_sample_name(self, place_dir: Path, ext: str) -> str: + max_n = 0 + for p in self._samples_in(place_dir): + m = SAMPLE_NAME_RE.match(p.name) + if m: + max_n = max(max_n, int(m.group(1))) + return f"place_{max_n + 1}{ext.lower()}" + + @staticmethod + def _detect_ext(data: bytes) -> str: + if len(data) >= 8 and data[:8] == b"\x89PNG\r\n\x1a\n": + return ".png" + return ".jpg" + + # ── zone write ─────────────────────────────────────────── + + def create_zone(self, name: str | None = None, + description: str | None = None) -> ZoneEntry: + with self._lock: + self._ensure_root() + zid = self.next_zone_id() + zd = self._zone_dir(zid) + zd.mkdir(parents=True, exist_ok=False) + self._write_zone_meta(zd, (name or "").strip() or None, + (description or "").strip() or None) + log.info("Created zone_%d (name=%s)", zid, name or "(unnamed)") + return self._build_zone(zid, zd) + + def rename_zone(self, zone_id: int, name: str | None) -> None: + with self._lock: + zd = self._zone_dir(zone_id) + if not zd.is_dir(): + raise FileNotFoundError(f"zone_{zone_id} not found") + _, desc, added, linked_map = self._zone_meta(zd) + self._write_zone_meta(zd, (name or "").strip() or None, desc, + added_at=added, linked_map=linked_map) + log.info("Renamed zone_%d → %s", zone_id, name or "(unnamed)") + + def describe_zone(self, zone_id: int, description: str | None) -> None: + with self._lock: + zd = self._zone_dir(zone_id) + if not zd.is_dir(): + raise FileNotFoundError(f"zone_{zone_id} not found") + name, _, added, linked_map = self._zone_meta(zd) + self._write_zone_meta(zd, name, (description or "").strip() or None, + added_at=added, linked_map=linked_map) + log.info("Described zone_%d", zone_id) + + def set_zone_map(self, zone_id: int, linked_map: str | None) -> None: + """Bind (or unbind, with None/'') the zone to a nav2 map .db basename.""" + with self._lock: + zd = self._zone_dir(zone_id) + if not zd.is_dir(): + raise FileNotFoundError(f"zone_{zone_id} not found") + name, desc, added, _ = self._zone_meta(zd) + self._write_zone_meta(zd, name, desc, added_at=added, + linked_map=(linked_map or "").strip() or None) + log.info("Linked zone_%d → map %s", zone_id, linked_map or "(none)") + + def delete_zone(self, zone_id: int) -> None: + import shutil + with self._lock: + zd = self._zone_dir(zone_id) + if not zd.is_dir(): + raise FileNotFoundError(f"zone_{zone_id} not found") + shutil.rmtree(zd) + log.info("Deleted zone_%d (and its places)", zone_id) + + # ── place write ────────────────────────────────────────── + + def create_place(self, zone_id: int, name: str | None = None, + description: str | None = None, + face_ids: list[int] | None = None, + image_bytes_list: list[bytes] | None = None, + nav_place: str | None = None) -> PlaceEntry: + with self._lock: + zd = self._zone_dir(zone_id) + if not zd.is_dir(): + raise FileNotFoundError(f"zone_{zone_id} not found") + pid = self.next_place_id(zone_id) + pd = self._place_dir(zone_id, pid) + pd.mkdir(parents=True, exist_ok=False) + for idx, data in enumerate(image_bytes_list or [], start=1): + (pd / f"place_{idx}{self._detect_ext(data)}").write_bytes(data) + self._write_place_meta(pd, (name or "").strip() or None, + (description or "").strip() or None, face_ids or [], + nav_place=(nav_place or "").strip() or None) + log.info("Created zone_%d/place_%d (name=%s, photos=%d, faces=%d, nav=%s)", + zone_id, pid, name or "(unnamed)", + len(image_bytes_list or []), len(face_ids or []), nav_place or "-") + return self._build_place(zone_id, pid, pd) + + def rename_place(self, zone_id: int, place_id: int, name: str | None) -> None: + with self._lock: + pd = self._place_dir(zone_id, place_id) + if not pd.is_dir(): + raise FileNotFoundError(f"zone_{zone_id}/place_{place_id} not found") + _, desc, fids, added, navp = self._place_meta(pd) + self._write_place_meta(pd, (name or "").strip() or None, desc, fids, + added_at=added, nav_place=navp) + log.info("Renamed zone_%d/place_%d → %s", zone_id, place_id, name or "(unnamed)") + + def describe_place(self, zone_id: int, place_id: int, description: str | None) -> None: + with self._lock: + pd = self._place_dir(zone_id, place_id) + if not pd.is_dir(): + raise FileNotFoundError(f"zone_{zone_id}/place_{place_id} not found") + name, _, fids, added, navp = self._place_meta(pd) + self._write_place_meta(pd, name, (description or "").strip() or None, fids, + added_at=added, nav_place=navp) + log.info("Described zone_%d/place_%d", zone_id, place_id) + + def set_place_faces(self, zone_id: int, place_id: int, face_ids: list[int]) -> None: + """Replace the set of linked face ids for a place.""" + with self._lock: + pd = self._place_dir(zone_id, place_id) + if not pd.is_dir(): + raise FileNotFoundError(f"zone_{zone_id}/place_{place_id} not found") + name, desc, _, added, navp = self._place_meta(pd) + clean = [] + for x in (face_ids or []): + try: + clean.append(int(x)) + except (TypeError, ValueError): + continue + self._write_place_meta(pd, name, desc, clean, added_at=added, nav_place=navp) + log.info("Set zone_%d/place_%d faces → %s", zone_id, place_id, clean) + + def set_place_nav(self, zone_id: int, place_id: int, nav_place: str | None) -> None: + """Link (or unlink, with None/'') a place to a nav2 place name in the + zone's map. This is what makes a vision place drivable.""" + with self._lock: + pd = self._place_dir(zone_id, place_id) + if not pd.is_dir(): + raise FileNotFoundError(f"zone_{zone_id}/place_{place_id} not found") + name, desc, fids, added, _ = self._place_meta(pd) + self._write_place_meta(pd, name, desc, fids, added_at=added, + nav_place=(nav_place or "").strip() or None) + log.info("Linked zone_%d/place_%d → nav place %s", + zone_id, place_id, nav_place or "(none)") + + def add_photo(self, zone_id: int, place_id: int, image_bytes: bytes) -> str: + with self._lock: + pd = self._place_dir(zone_id, place_id) + if not pd.is_dir(): + raise FileNotFoundError(f"zone_{zone_id}/place_{place_id} not found") + fname = self._next_sample_name(pd, self._detect_ext(image_bytes)) + (pd / fname).write_bytes(image_bytes) + log.info("Added %s to zone_%d/place_%d", fname, zone_id, place_id) + return fname + + def delete_photo(self, zone_id: int, place_id: int, photo_name: str) -> None: + with self._lock: + pd = self._place_dir(zone_id, place_id) + if not pd.is_dir(): + raise FileNotFoundError(f"zone_{zone_id}/place_{place_id} not found") + target = self.get_photo(zone_id, place_id, photo_name) + if target is None: + raise FileNotFoundError(f"photo {photo_name} not found") + target.unlink() + log.info("Deleted %s from zone_%d/place_%d", photo_name, zone_id, place_id) + + def delete_place(self, zone_id: int, place_id: int) -> None: + import shutil + with self._lock: + pd = self._place_dir(zone_id, place_id) + if not pd.is_dir(): + raise FileNotFoundError(f"zone_{zone_id}/place_{place_id} not found") + shutil.rmtree(pd) + log.info("Deleted zone_%d/place_%d", zone_id, place_id) + + def zip_place(self, zone_id: int, place_id: int) -> bytes: + with self._lock: + pd = self._place_dir(zone_id, place_id) + if not pd.is_dir(): + raise FileNotFoundError(f"zone_{zone_id}/place_{place_id} not found") + buf = io.BytesIO() + with zipfile.ZipFile(buf, "w", compression=zipfile.ZIP_DEFLATED) as zf: + for p in sorted(pd.iterdir()): + if p.is_file(): + zf.write(p, arcname=f"zone_{zone_id}_place_{place_id}/{p.name}") + return buf.getvalue() + + # ── primer support (used by gemini/script.py) ─────────── + + def load_for_primer(self, max_samples_per_place: int = 3, + resize_long_side: int = 256 + ) -> list[tuple[ZoneEntry, list[tuple[PlaceEntry, list[bytes]]]]]: + """Return [(ZoneEntry, [(PlaceEntry, [jpeg_bytes,…]), …]), …]. + + Photo lists may be empty (name+description-only place). Photos are + resized to longest-side <= resize_long_side, re-encoded JPEG q=85. + """ + out: list[tuple[ZoneEntry, list[tuple[PlaceEntry, list[bytes]]]]] = [] + for zone in self.list_zones(): + place_jpegs: list[tuple[PlaceEntry, list[bytes]]] = [] + for place in zone.places: + jpegs: list[bytes] = [] + for p in place.sample_paths[:max_samples_per_place]: + try: + raw = p.read_bytes() + except OSError: + continue + jpegs.append(self._resize_for_primer(raw, resize_long_side) or raw) + place_jpegs.append((place, jpegs)) + out.append((zone, place_jpegs)) + return out + + @staticmethod + def _resize_for_primer(raw: bytes, long_side: int) -> bytes | None: + try: + from PIL import Image # type: ignore + except Exception: + return None + try: + img = Image.open(io.BytesIO(raw)) + img.load() + if img.mode not in ("RGB", "L"): + img = img.convert("RGB") + w, h = img.size + scale = long_side / max(w, h) if max(w, h) > long_side else 1.0 + if scale < 1.0: + img = img.resize((max(1, int(w * scale)), max(1, int(h * scale))), Image.LANCZOS) + buf = io.BytesIO() + img.save(buf, format="JPEG", quality=85, optimize=True) + return buf.getvalue() + except Exception: + return None diff --git a/vendor/Sanad/voice/__init__.py b/vendor/Sanad/voice/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/vendor/Sanad/voice/audio_devices.py b/vendor/Sanad/voice/audio_devices.py new file mode 100644 index 0000000..9a075b9 --- /dev/null +++ b/vendor/Sanad/voice/audio_devices.py @@ -0,0 +1,842 @@ +"""Audio device profiles + pactl detection + selection persistence. + +Manages multiple audio device profiles (generic built-in, Hollyland wireless +mic + built-in speaker, Anker PowerConf) and lets the dashboard switch +between them at runtime. Selection is persisted to data/audio_device.json +so the choice survives restart. + +Resolution policy: + 1. User-selected profile (from data/audio_device.json) — if its sink/source + is currently plugged in, use it. + 2. Auto-detected profile based on what is currently plugged in. + 3. Built-in fallback. + +Each profile has: + - id: short identifier + - label: human-readable name + - match: substring used to find the actual pactl name (since exact names + contain serial numbers and may differ between machines) + - sink_pattern: substring matched against pactl sink names + - source_pattern: substring matched against pactl source names + - sample_rate / channels (optional defaults — read by AudioManager) +""" + +from __future__ import annotations + +import json +import os +import subprocess +import tempfile +import threading +from dataclasses import dataclass, asdict +from pathlib import Path +from typing import Any + +from Project.Sanad.config import DATA_DIR +from Project.Sanad.core.logger import get_logger + +log = get_logger("audio_devices") + +DEVICE_STATE_FILE = DATA_DIR / "audio_device.json" +_LOCK = threading.Lock() + + +@dataclass +class AudioProfile: + id: str + label: str + sink_pattern: str # substring used to find a sink + source_pattern: str # substring used to find a source + description: str = "" + sink_sample_rate: int = 0 # 0 = use device default + source_sample_rate: int = 0 + + +# Built-in device profiles. +# +# MATCHING RULES: +# - Patterns are matched case-insensitively against the FULL PulseAudio name. +# - Multiple patterns per field: comma-separated → match ANY. +# - PulseAudio names change depending on the USB port, so we match the +# product-name portion only (not the serial or port suffix). +# - Order matters: the FIRST profile whose sink AND source both match +# becomes the auto-default when no explicit selection is saved. +# +# Example PulseAudio names: +# alsa_output.platform-sound.analog-stereo (built-in speaker) +# alsa_input.platform-sound.analog-stereo (built-in mic) +# alsa_output.usb-Anker_PowerConf_A3321-DEV-SN1-01.analog-stereo (Anker speaker — SN1-01 is port-dependent) +# alsa_input.usb-Anker_PowerConf_A3321-DEV-SN1-01.mono-fallback (Anker mic) +# alsa_input.usb-Shenzhen_Hollyland_Technology_Co._Ltd_Wireless_microphone_C63X223T6MX-01.analog-stereo +# (Hollyland mic — C63X... is serial-dependent) + +PROFILES: list[AudioProfile] = [ + AudioProfile( + id="builtin", + label="Built-in mic + speaker", + sink_pattern="platform-sound", + source_pattern="alsa_input.platform-sound", + description="Jetson / G1 built-in audio chip. (Default)", + ), + AudioProfile( + id="hollyland_builtin", + label="Hollyland mic + built-in speaker", + sink_pattern="platform-sound", + source_pattern="hollyland,wireless_microphone", + description="Hollyland wireless lavalier microphone with the Jetson built-in speaker.", + ), + AudioProfile( + id="anker_powerconf", + label="Anker PowerConf (mic + speaker)", + sink_pattern="powerconf,anker", + source_pattern="powerconf,anker", + description="Anker PowerConf USB conference unit — mic + speaker on the same device.", + ), + AudioProfile( + id="jbl_builtin_mic", + label="JBL speaker + built-in mic", + # The JBL connects over Bluetooth → its PulseAudio sink is a bluez sink + # (name is MAC-based, e.g. bluez_output.XX_XX_…). Match "jbl" or "bluez". + sink_pattern="jbl,bluez", + # The JBL has NO microphone → input stays on the G1 built-in mic. + source_pattern="alsa_input.platform-sound", + description="JBL Bluetooth speaker for output + the G1 built-in microphone for input (the JBL has no mic).", + ), +] + +# The profile that should be used when no saved state and no auto-detect succeeds. +DEFAULT_PROFILE_ID = "builtin" + +PROFILES_BY_ID: dict[str, AudioProfile] = {p.id: p for p in PROFILES} + + +# ───────────────────────── pactl helpers ───────────────────────── + +def _run_pactl(args: list[str], timeout: float = 1.0) -> subprocess.CompletedProcess[str]: + return subprocess.run( + ["pactl", *args], + check=False, + text=True, + capture_output=True, + timeout=timeout, + ) + + +def pactl_available() -> bool: + try: + r = _run_pactl(["info"]) + return r.returncode == 0 + except (FileNotFoundError, subprocess.SubprocessError): + return False + + +def list_sinks() -> list[dict[str, str]]: + """Return [{name, description, index}] for every sink.""" + return _list_kind("sinks") + + +def list_sources() -> list[dict[str, str]]: + return _list_kind("sources") + + +def _list_kind(kind: str) -> list[dict[str, str]]: + out: list[dict[str, str]] = [] + try: + short = _run_pactl(["list", "short", kind]) + except (FileNotFoundError, subprocess.SubprocessError) as exc: + log.warning("pactl list %s failed: %s", kind, exc) + return out + if short.returncode != 0: + return out + for raw in (short.stdout or "").splitlines(): + parts = raw.split("\t") + if len(parts) < 2: + parts = raw.split() + if len(parts) < 2: + continue + idx, name = parts[0], parts[1] + out.append({"index": idx, "name": name, "description": _description_for(kind, name)}) + return out + + +def _description_for(kind: str, name: str) -> str: + """Best-effort `pactl list s` to extract Description.""" + try: + r = _run_pactl(["list", kind]) + except (FileNotFoundError, subprocess.SubprocessError): + return "" + if r.returncode != 0: + return "" + block: list[str] = [] + found = False + for line in (r.stdout or "").splitlines(): + if line.startswith(("Sink #", "Source #")): + if found: + break + block = [] + elif line.strip().startswith("Name:") and line.strip().endswith(name): + found = True + block.append(line) + if not found: + return "" + for line in block: + s = line.strip() + if s.startswith("Description:"): + return s.split(":", 1)[1].strip() + return "" + + +def get_default_sink() -> str: + try: + r = _run_pactl(["get-default-sink"]) + return (r.stdout or "").strip() if r.returncode == 0 else "" + except (FileNotFoundError, subprocess.SubprocessError): + return "" + + +def get_default_source() -> str: + try: + r = _run_pactl(["get-default-source"]) + return (r.stdout or "").strip() if r.returncode == 0 else "" + except (FileNotFoundError, subprocess.SubprocessError): + return "" + + +def set_default_sink(name: str) -> bool: + try: + r = _run_pactl(["set-default-sink", name]) + return r.returncode == 0 + except (FileNotFoundError, subprocess.SubprocessError): + return False + + +def set_default_source(name: str) -> bool: + try: + r = _run_pactl(["set-default-source", name]) + return r.returncode == 0 + except (FileNotFoundError, subprocess.SubprocessError): + return False + + +# ───────────────────────── matching ───────────────────────── + +def find_first_match(items: list[dict[str, str]], pattern: str, + exclude_monitors: bool = False) -> dict[str, str] | None: + """Return first item whose name (case-insensitive) contains ANY of the + comma-separated patterns. + + Example: pattern="powerconf,anker" matches any name containing + "powerconf" OR "anker" (case-insensitive). + + If exclude_monitors=True, skip PulseAudio monitor sources (names ending + in ".monitor") so we don't accidentally pick a loopback instead of a real mic. + """ + if not pattern: + return None + needles = [p.strip().lower() for p in pattern.split(",") if p.strip()] + if not needles: + return None + for it in items: + name_lower = it["name"].lower() + if exclude_monitors and name_lower.endswith(".monitor"): + continue + for needle in needles: + if needle in name_lower: + return it + return None + + +# PyAudio fallback cache — avoid re-init'ing PyAudio on every poll +# (PyAudio init takes ~100 ms and the watcher polls at 1.5 s). +_PYAUDIO_CACHE: dict[str, Any] = {"ts": 0.0, "input_names": []} +_PYAUDIO_TTL_S = 2.0 + + +def _pyaudio_input_names() -> list[str]: + """Return lowercase names of all PyAudio input devices. Cached for ~2 s. + + Used as a fallback in detect_plugged_profiles() when pactl can't see + a profile's source — some USB devices (notably the Anker PowerConf on + JetPack 5) only expose their mic via the raw ALSA layer, which PyAudio + can still open even when PulseAudio's card profile is output-only. + """ + import time as _t + now = _t.time() + if now - _PYAUDIO_CACHE["ts"] < _PYAUDIO_TTL_S: + return _PYAUDIO_CACHE["input_names"] + names: list[str] = [] + try: + import pyaudio # type: ignore + pa = pyaudio.PyAudio() + try: + for i in range(pa.get_device_count()): + try: + info = pa.get_device_info_by_index(i) + except Exception: + continue + if info.get("maxInputChannels", 0) <= 0: + continue + names.append(str(info.get("name", "")).lower()) + finally: + pa.terminate() + except Exception as exc: + log.debug("PyAudio enumeration unavailable: %s", exc) + _PYAUDIO_CACHE["ts"] = now + _PYAUDIO_CACHE["input_names"] = names + return names + + +def _pyaudio_input_matches(pattern: str) -> dict[str, str] | None: + """If any PyAudio input device name matches one of the comma-separated + patterns, return a synthetic source dict (matches find_first_match()'s + shape). Else None. + """ + if not pattern: + return None + needles = [p.strip().lower() for p in pattern.split(",") if p.strip()] + if not needles: + return None + for name in _pyaudio_input_names(): + if any(n in name for n in needles): + # Synthetic — mark the origin so logs / dashboards can see it + # came from PyAudio, not pactl. Includes `description` so any + # consumer that expects the same shape as a real pactl + # source dict (`{name, description, index}`) doesn't KeyError. + return { + "name": f"pyaudio:{name}", + "driver": "pyaudio", + "description": f"PyAudio fallback — {name}", + "index": "", + } + return None + + +# Per-card cooldown for ensure_card_input_capable so a card whose firmware +# truly doesn't expose input doesn't get hammered with set-card-profile +# calls on every detection poll (every 1.5s from the live-Gemini watcher). +_CARD_PROFILE_LAST_ATTEMPT: dict[str, float] = {} +_CARD_PROFILE_COOLDOWN_S = 30.0 + + +def _parse_card_profiles(card_block: str) -> tuple[str, list[tuple[int, str, bool, bool]]]: + """Parse the `Profiles:` section of a single card stanza from + `pactl list cards`. Returns (active_profile, [(priority, name, + has_sink, has_source), ...]) — only profiles marked + `available: yes` are included. + + Profile lines look like: + \\toutput:analog-stereo+input:mono-fallback: Analog Stereo Output + Mono Input \\ + (sinks: 1, sources: 1, priority: 6501, available: yes) + """ + active = "" + profiles: list[tuple[int, str, bool, bool]] = [] + in_profiles = False + for raw in card_block.splitlines(): + line = raw.rstrip() + stripped = line.strip() + if stripped.startswith("Active Profile:"): + active = stripped[len("Active Profile:"):].strip() + continue + if stripped == "Profiles:": + in_profiles = True + continue + if in_profiles: + # End of Profiles section: next top-level key starts with non-tab + # OR an empty line. The block ends when leading whitespace drops + # to a tab/spaces shallower than the profile lines — easiest + # check: stop when we hit "Ports:" or "Active Profile:". + if stripped.startswith("Ports:") or stripped.startswith("Active Profile:"): + in_profiles = False + if stripped.startswith("Active Profile:"): + active = stripped[len("Active Profile:"):].strip() + continue + # Profile line — must contain "(sinks: N, sources: M, priority: P, available: yes)" + paren = stripped.rfind(" (") + if paren < 0 or "available: yes" not in stripped: + continue + head = stripped[:paren] + sep = head.find(": ") + if sep < 0: + continue + name = head[:sep] + props = stripped[paren+2:].rstrip(")") + sinks_n = sources_n = priority = 0 + for tok in props.split(","): + tok = tok.strip() + if tok.startswith("sinks: "): + try: sinks_n = int(tok[len("sinks: "):]) + except ValueError: pass + elif tok.startswith("sources: "): + try: sources_n = int(tok[len("sources: "):]) + except ValueError: pass + elif tok.startswith("priority: "): + try: priority = int(tok[len("priority: "):]) + except ValueError: pass + profiles.append((priority, name, sinks_n > 0, sources_n > 0)) + return active, profiles + + +def ensure_card_input_capable(card_pattern: str) -> bool: + """If a PulseAudio card whose Name matches `card_pattern` is on an + output-only profile but has an input+output profile available, switch + to the highest-priority input+output profile. + + Why: USB UAC1 conference devices (Anker PowerConf and similar) can get + pinned to an output-only profile by PulseAudio's `module-card-restore` + — observed on G1 / JetPack 5 after an earlier session left the card in + that state. The mic-capable profile is right there in the card's + advertised list (we see `output:analog-stereo+input:mono-fallback` with + `sinks: 1, sources: 1` and `available: yes`), but the active profile + is the output-only one. Calling `pactl set-card-profile` against an + already-advertised, available profile is SAFE — unlike the older + `module-alsa-source device=hw:N,0` hack, which had to guess the hw + index and broke things when it guessed wrong. + + `card_pattern` is a comma-separated substring list (same semantics as + `find_first_match`). Returns True if a switch happened OR the card was + already input-capable; False if no matching card / no switchable + profile exists. + + Rate-limited per-card via `_CARD_PROFILE_COOLDOWN_S` so a card whose + firmware genuinely can't do input doesn't get polled to death. + """ + if not pactl_available(): + return False + needles = [p.strip().lower() for p in card_pattern.split(",") if p.strip()] + if not needles: + return False + try: + r = _run_pactl(["list", "cards"]) + if r.returncode != 0: + return False + except (FileNotFoundError, subprocess.SubprocessError): + return False + + # Split into per-card blocks. `pactl list cards` separates cards with a + # blank line (and starts each with "Card #N"). + import time as _t + blocks: list[str] = [] + current: list[str] = [] + for line in (r.stdout or "").splitlines(): + if line.startswith("Card #") and current: + blocks.append("\n".join(current)) + current = [] + current.append(line) + if current: + blocks.append("\n".join(current)) + + switched_any = False + for block in blocks: + # Extract card Name + card_name = "" + for line in block.splitlines(): + s = line.strip() + if s.startswith("Name: "): + card_name = s[len("Name: "):].strip() + break + if not card_name: + continue + if not any(n in card_name.lower() for n in needles): + continue + + active, profiles = _parse_card_profiles(block) + if not profiles: + continue + + # If active profile already has input, nothing to do. + active_has_input = any( + name == active and has_src + for _, name, _, has_src in profiles + ) + if active_has_input: + return True + + # Find best input+output profile + candidates = [(prio, name) for prio, name, has_sink, has_src in profiles + if has_sink and has_src] + if not candidates: + # Card has no input+output profile (firmware truly output-only). + log.debug("ensure_card_input_capable: %s has no input+output " + "profile — nothing to switch to", card_name) + continue + + now = _t.time() + last = _CARD_PROFILE_LAST_ATTEMPT.get(card_name, 0.0) + if (now - last) < _CARD_PROFILE_COOLDOWN_S: + continue + _CARD_PROFILE_LAST_ATTEMPT[card_name] = now + + # Sort: highest priority first; on ties, alphabetical name asc so + # `mono-fallback` wins over `multichannel-input` (the source name + # `...mono-fallback` matches Sanad's hardcoded SOURCE patterns and + # the AI_Photographer setup that's known to work on this hardware). + candidates.sort(key=lambda x: (-x[0], x[1])) + target = candidates[0][1] + log.info("ensure_card_input_capable: %s active=%r → %r " + "(exposes mic to PulseAudio)", + card_name, active or "?", target) + try: + sr = _run_pactl(["set-card-profile", card_name, target]) + if sr.returncode == 0: + switched_any = True + else: + log.warning("ensure_card_input_capable: set-card-profile " + "%s %r failed: %s", card_name, target, + (sr.stderr or "").strip()) + except (FileNotFoundError, subprocess.SubprocessError) as exc: + log.warning("ensure_card_input_capable: pactl error: %s", exc) + return switched_any + + +def detect_plugged_profiles() -> list[dict[str, Any]]: + """Return all profiles whose sink AND source are currently plugged in. + + For each profile, the source is resolved in three passes: + 1. pactl list short sources (standard path) + 2. `ensure_card_input_capable` to unstick output-only PulseAudio + card profiles (Anker UAC1 quirk) — re-checks pactl sources + after the switch + 3. PyAudio device list (fallback — see _pyaudio_input_matches docstring) + + A profile counts as "plugged" if the sink matches via pactl AND a source + is found via ANY pass. + """ + sinks = list_sinks() + sources = list_sources() + detected: list[dict[str, Any]] = [] + refreshed_sources = False + for prof in PROFILES: + sink = find_first_match(sinks, prof.sink_pattern) + if not sink: + continue + src = find_first_match(sources, prof.source_pattern, exclude_monitors=True) + via = "pactl" + if src is None: + # Try to unstick the card's PulseAudio profile (most common + # cause of "sink present, source missing" on Anker). Re-list + # sources once if any switch happened — and cache for the rest + # of this detection pass so we don't re-list per profile. + switched = ensure_card_input_capable(prof.sink_pattern) + if switched and not refreshed_sources: + sources = list_sources() + refreshed_sources = True + src = find_first_match(sources, prof.source_pattern, + exclude_monitors=True) + if src is not None: + via = "pactl-after-profile-switch" + log.info("detect_plugged_profiles: %s source appeared " + "after card-profile switch: %s", + prof.id, src.get("name", "?")) + if src is None: + # Last resort — PyAudio may still see the mic. + src = _pyaudio_input_matches(prof.source_pattern) + if src is not None: + via = "pyaudio" + log.info("detect_plugged_profiles: %s source resolved via " + "PyAudio fallback (pactl missed it): %s", + prof.id, src.get("name", "?")) + if sink and src: + detected.append({ + "profile": asdict(prof), + "sink": sink, + "source": src, + "source_via": via, + }) + return detected + + +# ───────────────────────── persistence ───────────────────────── + +def load_state() -> dict[str, Any]: + """Load saved selection. Always returns a dict.""" + if not DEVICE_STATE_FILE.exists(): + return {} + try: + with open(DEVICE_STATE_FILE, "r", encoding="utf-8") as f: + return json.load(f) + except (json.JSONDecodeError, OSError) as exc: + log.warning("audio_device.json unreadable: %s", exc) + return {} + + +def save_state(state: dict[str, Any]) -> None: + """Atomic write of audio_device.json.""" + DEVICE_STATE_FILE.parent.mkdir(parents=True, exist_ok=True) + with _LOCK: + fd, tmp = tempfile.mkstemp( + prefix=f".{DEVICE_STATE_FILE.name}.", suffix=".tmp", + dir=str(DEVICE_STATE_FILE.parent), + ) + try: + with os.fdopen(fd, "w", encoding="utf-8") as f: + json.dump(state, f, indent=2) + os.replace(tmp, DEVICE_STATE_FILE) + except Exception: + try: + os.unlink(tmp) + except OSError: + pass + raise + + +# ───────────────────────── current selection ───────────────────────── + +# Dedupe the "manual override stale" fall-through log. current_selection() is +# called every ~1.5s by the audio watcher, so logging every cycle spams the log +# whenever a saved device (e.g. an unplugged Anker) stays absent. We log only +# when the stale state changes (and reset when the override becomes valid). +_LAST_STALE_LOG: Any = None + + +def current_selection() -> dict[str, Any]: + """Resolve the currently active sink/source. + + Order: + 1. Saved profile selection (if its sink/source still plugged) + 2. Saved explicit sink/source pair (ONLY if both still exist in pactl — + otherwise the saved manual override is stale and we fall through) + 3. DEFAULT profile (builtin) if detected + 4. First detected profile (in declaration order) + 5. pactl defaults + 6. Empty + """ + state = load_state() + + # Detected profiles snapshot + detected = detect_plugged_profiles() if pactl_available() else [] + detected_by_id = {d["profile"]["id"]: d for d in detected} + + # 1. Saved profile preference + saved_profile = state.get("profile_id") + if saved_profile and saved_profile in detected_by_id: + d = detected_by_id[saved_profile] + return { + "source_kind": "profile", + "profile": d["profile"], + "sink": d["sink"].get("name", ""), + "source": d["source"].get("name", ""), + "sink_description": d["sink"].get("description", ""), + "source_description": d["source"].get("description", ""), + } + + # 2. Saved explicit sink/source — but VERIFY both names still exist in + # pactl before returning them. Without this check, unplugging a device + # (e.g. the Anker) leaves the dashboard showing the dead manual override + # forever, play_wav routes to a non-existent sink, PortAudio raises + # paBadIODeviceCombination, and the user gets no audio. Falling through + # here lets steps 3–5 re-resolve to whatever's actually plugged in + # (typically the builtin/chest), no user click required. + saved_sink = (state.get("sink") or "").strip() + saved_source = (state.get("source") or "").strip() + if saved_sink and saved_source: + sink_names = {s["name"] for s in (list_sinks() if pactl_available() else [])} + source_names = { + s["name"] for s in (list_sources() if pactl_available() else []) + } + global _LAST_STALE_LOG + if saved_sink in sink_names and saved_source in source_names: + _LAST_STALE_LOG = None # override valid again — re-arm the log + return { + "source_kind": "manual", + "profile": None, + "sink": saved_sink, + "source": saved_source, + "sink_description": "", + "source_description": "", + } + # Benign expected state (a saved device is simply unplugged) — and this + # is hit by every status poll, possibly from more than one process, so a + # module cache can't fully suppress it. Log at DEBUG (off the INFO log), + # and at INFO only ONCE when the stale state first changes, so an + # operator still gets a single breadcrumb without the 30s spam. + _stale_key = (saved_sink, saved_sink in sink_names, + saved_source, saved_source in source_names) + _first = _stale_key != _LAST_STALE_LOG + _LAST_STALE_LOG = _stale_key + (log.info if _first else log.debug)( + "current_selection: manual override stale (sink=%s present=%s, " + "source=%s present=%s) — falling through to auto-detect", + saved_sink, saved_sink in sink_names, + saved_source, saved_source in source_names, + ) + + # 3. Default profile if it is plugged in + if DEFAULT_PROFILE_ID in detected_by_id: + d = detected_by_id[DEFAULT_PROFILE_ID] + return { + "source_kind": "default", + "profile": d["profile"], + "sink": d["sink"].get("name", ""), + "source": d["source"].get("name", ""), + "sink_description": d["sink"].get("description", ""), + "source_description": d["source"].get("description", ""), + } + + # 4. First detected profile (in declaration order) + if detected: + d = detected[0] + return { + "source_kind": "auto", + "profile": d["profile"], + "sink": d["sink"].get("name", ""), + "source": d["source"].get("name", ""), + "sink_description": d["sink"].get("description", ""), + "source_description": d["source"].get("description", ""), + } + + # 5. pactl defaults (system-wide) + sink = get_default_sink() + source = get_default_source() + if sink and source: + return { + "source_kind": "pactl_default", + "profile": None, + "sink": sink, + "source": source, + "sink_description": "", + "source_description": "", + } + + # 6. Empty + return { + "source_kind": "none", + "profile": None, + "sink": "", + "source": "", + "sink_description": "", + "source_description": "", + } + + +# ───────────────────────── apply selection ───────────────────────── + +def apply_selection(sink: str, source: str) -> dict[str, Any]: + """Run pactl set-default-* and unmute. Returns {ok, errors}. + + A source name starting with `pyaudio:` is the synthetic marker emitted + by detect_plugged_profiles() when the source was resolved only via the + PyAudio fallback (PulseAudio doesn't expose the mic, but PortAudio can + open it directly via raw ALSA). We can't `pactl set-default-source` on + a synthetic name — pactl would error. So in that case we set only the + sink and skip the source; Sanad's live mic path uses PortAudio direct + via AnkerMic's substring index lookup and doesn't depend on the pactl + default source. Dashboard playback (audio_manager.play_wav) records + from whatever pactl considers default — that stays on the boot mic + until the recovery script (Path B) fully exposes Anker in PulseAudio. + """ + errors: list[str] = [] + if sink: + if not set_default_sink(sink): + errors.append(f"set-default-sink failed: {sink}") + else: + try: + _run_pactl(["set-sink-mute", sink, "0"]) + except (FileNotFoundError, subprocess.SubprocessError): + pass + if source: + if source.startswith("pyaudio:"): + log.info("apply_selection: source is PyAudio-direct (%s) — " + "skipping pactl set-default-source. Live mic path " + "uses PortAudio device match; pactl defaults stay put.", + source) + elif not set_default_source(source): + errors.append(f"set-default-source failed: {source}") + else: + try: + _run_pactl(["set-source-mute", source, "0"]) + except (FileNotFoundError, subprocess.SubprocessError): + pass + return {"ok": not errors, "errors": errors} + + +def apply_current_selection() -> dict[str, Any]: + """Resolve the current device selection (re-scanning all USB ports) and + apply it via pactl. Called at AudioManager startup and when devices change. + + This is the key function that makes audio work regardless of which USB + port the device is plugged into — it re-discovers on every call. + """ + if not pactl_available(): + return {"ok": False, "error": "pactl not available"} + cur = current_selection() + sink = cur.get("sink", "") + source = cur.get("source", "") + if not sink and not source: + return {"ok": False, "error": "no device resolved", "selection": cur} + result = apply_selection(sink, source) + result["selection"] = cur + if result["ok"]: + log.info("Audio applied — sink=%s source=%s (via %s)", + sink, source, cur.get("source_kind", "?")) + else: + log.warning("Audio apply partial — sink=%s source=%s errors=%s", + sink, source, result["errors"]) + return result + + +def select_profile(profile_id: str) -> dict[str, Any]: + """Switch to a named profile. Persists selection.""" + if profile_id not in PROFILES_BY_ID: + return {"ok": False, "error": f"Unknown profile: {profile_id}"} + + detected = detect_plugged_profiles() + detected_by_id = {d["profile"]["id"]: d for d in detected} + if profile_id not in detected_by_id: + return { + "ok": False, + "error": f"Profile '{profile_id}' is not currently plugged in", + "available": [d["profile"]["id"] for d in detected], + } + + d = detected_by_id[profile_id] + sink_name = d["sink"]["name"] + source_name = d["source"]["name"] + + apply_result = apply_selection(sink_name, source_name) + if not apply_result["ok"]: + return {"ok": False, "errors": apply_result["errors"]} + + save_state({ + "profile_id": profile_id, + "sink": sink_name, + "source": source_name, + }) + log.info("Selected audio profile: %s (sink=%s, source=%s)", profile_id, sink_name, source_name) + return { + "ok": True, + "profile": d["profile"], + "sink": sink_name, + "source": source_name, + } + + +def select_manual(sink: str, source: str) -> dict[str, Any]: + """Switch to an explicit sink/source pair (no profile).""" + apply_result = apply_selection(sink, source) + if not apply_result["ok"]: + return {"ok": False, "errors": apply_result["errors"]} + save_state({"profile_id": None, "sink": sink, "source": source}) + log.info("Selected manual audio: sink=%s source=%s", sink, source) + return {"ok": True, "sink": sink, "source": source} + + +# ───────────────────────── status ───────────────────────── + +def status() -> dict[str, Any]: + """One-shot status for the dashboard.""" + pa = pactl_available() + detected = detect_plugged_profiles() if pa else [] + detected_ids = [d["profile"]["id"] for d in detected] + cur = current_selection() + return { + "pactl_available": pa, + "current": cur, + "saved_state": load_state(), + "profiles": [asdict(p) for p in PROFILES], + "detected": detected, + "detected_ids": detected_ids, + "all_sinks": list_sinks() if pa else [], + "all_sources": list_sources() if pa else [], + "default_sink": get_default_sink() if pa else "", + "default_source": get_default_source() if pa else "", + } diff --git a/vendor/Sanad/voice/audio_io.py b/vendor/Sanad/voice/audio_io.py new file mode 100644 index 0000000..c2b0b09 --- /dev/null +++ b/vendor/Sanad/voice/audio_io.py @@ -0,0 +1,913 @@ +"""Hardware-agnostic audio I/O for Sanad voice pipelines. + +Provides a uniform Mic / Speaker interface so the model layer (Gemini +today, or any future alternative) doesn't need to know which physical +audio path is active. Pick a pairing via `AudioIO.from_profile()`: + + builtin → G1 UDP multicast mic + AudioClient.PlayStream + anker → Anker PowerConf USB mic + speaker (PyAudio) + hollyland_builtin → Hollyland wireless mic + G1 built-in speaker + +Mics deliver int16 mono PCM at 16 kHz. +Speakers accept int16 mono PCM plus a `source_rate` and resample +internally if the hardware runs at a different rate. + +Usage: + + audio = AudioIO.from_profile("builtin", audio_client=ac) + audio.start() + try: + chunk = audio.mic.read_chunk(1024) # mic + audio.speaker.begin_stream() # speaker + audio.speaker.send_chunk(pcm_24k, 24000) + audio.speaker.wait_finish() + finally: + audio.stop() +""" + +from __future__ import annotations + +import json +import socket +import struct +import subprocess +import threading +import time +from abc import ABC, abstractmethod +from dataclasses import dataclass, field +from typing import Any, Optional, Union + +import numpy as np + +try: + import pyaudio + _HAS_PYAUDIO = True +except ImportError: + pyaudio = None + _HAS_PYAUDIO = False + +from Project.Sanad.core.config_loader import section as _cfg_section +from Project.Sanad.core.logger import get_logger + +log = get_logger("audio_io") + +_MIC_CFG = _cfg_section("voice", "mic_udp") +_SP_CFG = _cfg_section("voice", "speaker") + +TARGET_MIC_RATE = 16_000 + +_MCAST_GRP = _MIC_CFG.get("group", "239.168.123.161") +_MCAST_PORT = _MIC_CFG.get("port", 5555) +_MIC_BUF_MAX = _MIC_CFG.get("buffer_max_bytes", 64_000) +_MIC_READ_TIMEOUT = _MIC_CFG.get("read_timeout_sec", 0.04) + +PCMLike = Union[bytes, bytearray, memoryview, np.ndarray] + + +def _find_g1_local_ip() -> str: + """Find the host IPv4 address on the G1's internal 192.168.123.0/24 network.""" + out = subprocess.run( + ["ip", "-4", "-o", "addr"], capture_output=True, text=True, + ).stdout + for line in out.splitlines(): + for tok in line.split(): + if tok.startswith("192.168.123."): + return tok.split("/")[0] + raise RuntimeError("no 192.168.123.x interface found") + + +def _resample_int16(pcm: np.ndarray, src_rate: int, dst_rate: int) -> np.ndarray: + if src_rate == dst_rate or pcm.size == 0: + return pcm.astype(np.int16, copy=False) + target_len = max(1, int(len(pcm) * dst_rate / src_rate)) + return np.interp( + np.linspace(0, len(pcm), target_len, endpoint=False), + np.arange(len(pcm)), + pcm.astype(np.float64), + ).astype(np.int16) + + +def _as_int16_array(pcm: PCMLike) -> np.ndarray: + if isinstance(pcm, np.ndarray): + return pcm.astype(np.int16, copy=False) + return np.frombuffer(bytes(pcm), dtype=np.int16) + + +# ─── Protocols ──────────────────────────────────────────── + +class Mic(ABC): + sample_rate: int = TARGET_MIC_RATE + + @abstractmethod + def start(self) -> None: ... + @abstractmethod + def read_chunk(self, num_bytes: int) -> bytes: ... + @abstractmethod + def flush(self) -> None: ... + @abstractmethod + def stop(self) -> None: ... + + +class Speaker(ABC): + @abstractmethod + def begin_stream(self) -> None: ... + + @abstractmethod + def send_chunk(self, pcm: PCMLike, source_rate: int) -> None: + """Queue PCM for playback. `source_rate` is the sample rate of `pcm`.""" + + @abstractmethod + def wait_finish(self) -> None: ... + @abstractmethod + def stop(self) -> None: ... + + @property + @abstractmethod + def interrupted(self) -> bool: ... + + @property + def total_sent_sec(self) -> float: + return 0.0 + + +# ─── G1 built-in (UDP mic + AudioClient speaker) ────────── + +class BuiltinMic(Mic): + """G1 robot's on-board mic published over UDP multicast.""" + + sample_rate = TARGET_MIC_RATE + + def __init__(self, group: str = _MCAST_GRP, port: int = _MCAST_PORT, + buf_max: int = _MIC_BUF_MAX): + self._group = group + self._port = port + self._buf_max = buf_max + self._sock: Optional[socket.socket] = None + self._buf = bytearray() + self._lock = threading.Lock() + self._running = False + self._thread: Optional[threading.Thread] = None + + def start(self) -> None: + local_ip = _find_g1_local_ip() + self._sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + self._sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + self._sock.bind(("", self._port)) + mreq = struct.pack( + "4s4s", + socket.inet_aton(self._group), + socket.inet_aton(local_ip), + ) + self._sock.setsockopt(socket.IPPROTO_IP, socket.IP_ADD_MEMBERSHIP, mreq) + self._sock.settimeout(1.0) + self._running = True + self._thread = threading.Thread(target=self._recv_loop, daemon=True) + self._thread.start() + log.info("BuiltinMic joined %s:%d on %s", self._group, self._port, local_ip) + + def _recv_loop(self) -> None: + while self._running: + try: + data, _ = self._sock.recvfrom(4096) + with self._lock: + self._buf.extend(data) + if len(self._buf) > self._buf_max: + del self._buf[:len(self._buf) - self._buf_max] + except socket.timeout: + continue + except Exception: + if self._running: + time.sleep(0.01) + + def read_chunk(self, num_bytes: int) -> bytes: + deadline = time.time() + _MIC_READ_TIMEOUT + while time.time() < deadline: + with self._lock: + if len(self._buf) >= num_bytes: + chunk = bytes(self._buf[:num_bytes]) + del self._buf[:num_bytes] + return chunk + time.sleep(0.003) + with self._lock: + avail = len(self._buf) + if avail > 0: + chunk = bytes(self._buf[:avail]) + del self._buf[:avail] + return chunk + b"\x00" * (num_bytes - avail) + return b"\x00" * num_bytes + + def flush(self) -> None: + with self._lock: + self._buf.clear() + + def stop(self) -> None: + self._running = False + if self._sock is not None: + try: + self._sock.close() + except Exception: + pass + self._sock = None + + +class BuiltinSpeaker(Speaker): + """G1 robot's built-in speaker via AudioClient.PlayStream (16 kHz mono).""" + + HARDWARE_RATE = 16_000 + + def __init__(self, audio_client: Any, app_name: Optional[str] = None): + self._ac = audio_client + try: + self._ac.SetVolume(100) + except Exception: + log.warning("BuiltinSpeaker.SetVolume failed") + self._app_name = app_name or _SP_CFG.get("app_name", "sanad") + self._begin_pause = _SP_CFG.get("begin_stream_pause_sec", 0.15) + self._finish_margin = _SP_CFG.get("wait_finish_margin_sec", 0.3) + self._stop_flag = threading.Event() + self._stream_id: Optional[str] = None + self._total_sent = 0.0 + self._play_start = 0.0 + + def _stop_play_api(self) -> None: + try: + from unitree_sdk2py.g1.audio.g1_audio_api import ( + ROBOT_API_ID_AUDIO_STOP_PLAY, + ) + self._ac._Call( + ROBOT_API_ID_AUDIO_STOP_PLAY, + json.dumps({"app_name": self._app_name}), + ) + except Exception: + log.warning("BuiltinSpeaker AUDIO_STOP_PLAY failed") + + def begin_stream(self) -> None: + self._stop_flag.clear() + self._stop_play_api() + time.sleep(self._begin_pause) + self._stream_id = f"s_{int(time.time() * 1000)}" + self._total_sent = 0.0 + self._play_start = time.time() + + def send_chunk(self, pcm: PCMLike, source_rate: int) -> None: + if self._stop_flag.is_set(): + return + arr = _as_int16_array(pcm) + if arr.size < 10: + return + hw = _resample_int16(arr, source_rate, self.HARDWARE_RATE) + self._ac.PlayStream(self._app_name, self._stream_id, hw.tobytes()) + self._total_sent += len(hw) / self.HARDWARE_RATE + + def wait_finish(self) -> None: + elapsed = time.time() - self._play_start + remaining = self._total_sent - elapsed + self._finish_margin + waited = 0.0 + while waited < remaining and not self._stop_flag.is_set(): + time.sleep(0.1) + waited += 0.1 + self._stop_play_api() + + def stop(self) -> None: + self._stop_flag.set() + self._stop_play_api() + + @property + def interrupted(self) -> bool: + return self._stop_flag.is_set() + + @property + def total_sent_sec(self) -> float: + return self._total_sent + + +# ─── PyAudio-backed mic/speaker ─────────────────────────── + +class _PyAudioMic(Mic): + """Shared base for PulseAudio/ALSA input — matches device by name pattern.""" + + sample_rate = TARGET_MIC_RATE + + def __init__(self, device_pattern: str, label: str, + frames_per_buffer: int = 512): + if not _HAS_PYAUDIO: + raise RuntimeError(f"{label}Mic requires pyaudio") + self._device_pattern = device_pattern + self._label = label + self._frames_per_buffer = frames_per_buffer + self._pa: Optional["pyaudio.PyAudio"] = None + self._stream = None + self._running = False + self._buf = bytearray() + self._lock = threading.Lock() + self._thread: Optional[threading.Thread] = None + + def _resolve_device_index(self) -> Optional[int]: + """Pick the PyAudio input device to open. + + Preference order: + 1. PortAudio's 'pulse' device — routes capture through + PulseAudio's default source. We MUST use this on USB UAC1 + cards (Anker PowerConf, Hollyland) — opening hw:N,0 + directly grabs ALSA exclusively, which makes PulseAudio's + module-alsa-card lose the device. That drops the card + from `pactl list`, the parent's audio watcher then sees + "anker unplugged" within seconds, and reverts the brain + to the boot profile (chest speaker). The dashboard's + "Apply" call already pins PulseAudio's default + source/sink to the matching Anker device, so opening + 'pulse' lands on the right hardware. + 2. PortAudio's 'default' device — also pulse-routed on a + standard Pulse-on-Linux setup, used as a fallback. + 3. Substring match against the device_pattern (legacy + direct-hw path) — only used when neither 'pulse' nor + 'default' is enumerated by PortAudio, which would only + happen on a system without Pulse. Logs a WARN because + this is the path that triggers the bug above. + """ + if self._pa is None: + return None + pulse_idx = default_idx = pattern_idx = None + pattern_name = "" + patterns = [p.strip().lower() + for p in self._device_pattern.split(",") if p.strip()] + for i in range(self._pa.get_device_count()): + info = self._pa.get_device_info_by_index(i) + if info.get("maxInputChannels", 0) <= 0: + continue + name_lower = str(info.get("name", "")).lower() + if pulse_idx is None and name_lower == "pulse": + pulse_idx = i + elif default_idx is None and name_lower == "default": + default_idx = i + if pattern_idx is None and any(n in name_lower for n in patterns): + pattern_idx = i + pattern_name = name_lower + if pulse_idx is not None: + return pulse_idx + if default_idx is not None: + return default_idx + if pattern_idx is not None: + log.warning( + "%sMic falling back to direct ALSA device '%s' " + "(no 'pulse'/'default' device exposed by PortAudio) — " + "this grabs the card exclusively and may cause PulseAudio " + "to drop it; consider installing the ALSA pulse plugin", + self._label, pattern_name, + ) + return pattern_idx + return None + + def start(self) -> None: + self._pa = pyaudio.PyAudio() + idx = self._resolve_device_index() + # Log which device we picked so a "wrong sink" symptom is easy + # to attribute. Includes the device name (e.g. 'pulse' vs hw:N) + # since the index alone tells you nothing useful in a tail. + try: + picked = self._pa.get_device_info_by_index(idx) if idx is not None else {} + picked_name = picked.get("name", "?") + except Exception: + picked_name = "?" + self._stream = self._pa.open( + format=pyaudio.paInt16, + channels=1, + rate=self.sample_rate, + input=True, + input_device_index=idx, + frames_per_buffer=self._frames_per_buffer, + ) + self._running = True + self._thread = threading.Thread(target=self._recv_loop, daemon=True) + self._thread.start() + log.info("%sMic started (device_index=%s name=%r)", + self._label, idx, picked_name) + + def _recv_loop(self) -> None: + while self._running: + try: + data = self._stream.read( + self._frames_per_buffer, exception_on_overflow=False, + ) + with self._lock: + self._buf.extend(data) + if len(self._buf) > _MIC_BUF_MAX: + del self._buf[:len(self._buf) - _MIC_BUF_MAX] + except Exception: + if self._running: + time.sleep(0.01) + + def read_chunk(self, num_bytes: int) -> bytes: + deadline = time.time() + _MIC_READ_TIMEOUT + while time.time() < deadline: + with self._lock: + if len(self._buf) >= num_bytes: + chunk = bytes(self._buf[:num_bytes]) + del self._buf[:num_bytes] + return chunk + time.sleep(0.003) + with self._lock: + avail = len(self._buf) + if avail > 0: + chunk = bytes(self._buf[:avail]) + del self._buf[:avail] + return chunk + b"\x00" * (num_bytes - avail) + return b"\x00" * num_bytes + + def flush(self) -> None: + with self._lock: + self._buf.clear() + + def stop(self) -> None: + self._running = False + if self._stream is not None: + try: + self._stream.stop_stream() + self._stream.close() + except Exception: + pass + self._stream = None + if self._pa is not None: + try: + self._pa.terminate() + except Exception: + pass + self._pa = None + + +class AnkerMic(_PyAudioMic): + def __init__(self): + super().__init__(device_pattern="powerconf,anker", label="Anker") + + +class HollylandMic(_PyAudioMic): + def __init__(self): + super().__init__( + device_pattern="hollyland,wireless_microphone", + label="Hollyland", + ) + + +class _PyAudioSpeaker(Speaker): + """PulseAudio/ALSA output — opens a fresh output stream per begin_stream().""" + + def __init__(self, device_pattern: str, label: str): + if not _HAS_PYAUDIO: + raise RuntimeError(f"{label}Speaker requires pyaudio") + self._device_pattern = device_pattern + self._label = label + self._pa: Optional["pyaudio.PyAudio"] = None + self._stream = None + self._stream_rate: Optional[int] = None + self._stop_flag = threading.Event() + self._total_sent = 0.0 + # Serialises every touch of self._stream / self._pa. PortAudio's + # ALSA→pulse plugin is NOT re-entrant: a concurrent snd_pcm_close + # (from stop()/wait_finish()) while another thread is inside + # snd_pcm_writei (from send_chunk()) corrupts the pulse mainloop + # heap — observed as `malloc_consolidate(): invalid chunk size` + # on barge-in. RLock so stop()→wait_finish() nesting is safe. + self._lock = threading.RLock() + # Sticky teardown signal — once stop() has run, refuse to + # lazy-reopen the stream from a late send_chunk on the same + # instance (the swap path replaces the instance entirely). + self._closed = False + + def _resolve_device_index(self) -> Optional[int]: + """Pick the PyAudio output device to open. + + Mirrors `_PyAudioMic._resolve_device_index` — see that method's + docstring for the rationale. Short version: prefer 'pulse' so + playback goes through PulseAudio's default sink (which the + dashboard's Apply pins to the active profile's sink); only fall + back to direct hw:N if PulseAudio isn't wired into PortAudio at + all. Grabbing hw:N exclusively makes PulseAudio drop the card + and the parent's audio watcher will then revert the brain to + the boot profile within seconds. + """ + if self._pa is None: + return None + pulse_idx = default_idx = pattern_idx = None + pattern_name = "" + patterns = [p.strip().lower() + for p in self._device_pattern.split(",") if p.strip()] + for i in range(self._pa.get_device_count()): + info = self._pa.get_device_info_by_index(i) + if info.get("maxOutputChannels", 0) <= 0: + continue + name_lower = str(info.get("name", "")).lower() + if pulse_idx is None and name_lower == "pulse": + pulse_idx = i + elif default_idx is None and name_lower == "default": + default_idx = i + if pattern_idx is None and any(n in name_lower for n in patterns): + pattern_idx = i + pattern_name = name_lower + if pulse_idx is not None: + return pulse_idx + if default_idx is not None: + return default_idx + if pattern_idx is not None: + log.warning( + "%sSpeaker falling back to direct ALSA device '%s' " + "(no 'pulse'/'default' device exposed by PortAudio) — " + "this grabs the card exclusively and may cause PulseAudio " + "to drop it; consider installing the ALSA pulse plugin", + self._label, pattern_name, + ) + return pattern_idx + return None + + # USB-native rate for the underlying card. PortAudio's ALSA backend + # (the only backend available in conda's PyAudio build on Jetson) + # opens via the ALSA 'pulse' plugin, which on this system DOES NOT + # advertise rate conversion in `snd_pcm_hw_params` — opening at the + # source rate (24 kHz from Gemini TTS, etc.) gets rejected with + # paInvalidSampleRate. We pin the stream rate to the card's native + # 48 kHz and resample chunks app-side before writing. Same approach + # `_play_pcm_via_g1` uses for the DDS path. + _STREAM_TARGET_RATE = 48_000 + + def _open_stream(self, _ignored_rate: int) -> None: + idx = self._resolve_device_index() + try: + picked = self._pa.get_device_info_by_index(idx) if idx is not None else {} + picked_name = picked.get("name", "?") + except Exception: + picked_name = "?" + # ALWAYS open at _STREAM_TARGET_RATE — see class docstring above. + self._stream = self._pa.open( + format=pyaudio.paInt16, + channels=1, + rate=self._STREAM_TARGET_RATE, + output=True, + output_device_index=idx, + ) + self._stream_rate = self._STREAM_TARGET_RATE + log.info("%sSpeaker output opened (device_index=%s name=%r, rate=%d " + "— chunks resampled to this rate)", + self._label, idx, picked_name, self._STREAM_TARGET_RATE) + + def begin_stream(self) -> None: + # Hold the lock so a concurrent stop()/wait_finish() (from the + # barge-in path or a swap drain) cannot interleave with the + # flag clear + PyAudio init — which would otherwise re-enable + # writes against a stream the teardown is about to close. + with self._lock: + if self._closed: + # Speaker was torn down for swap or session end; do not + # revive on the same instance. Caller swap_audio_devices + # replaces the instance entirely. + return + self._stop_flag.clear() + self._total_sent = 0.0 + if self._pa is None: + self._pa = pyaudio.PyAudio() + + def _resample_mono16(self, arr, src_rate: int, dst_rate: int): + """Linear interp resample of mono int16. Returns ndarray (int16). + No-op when rates match. numpy-only — matches _play_pcm_via_g1.""" + import numpy as _np # local — keep top-level import surface unchanged + if src_rate == dst_rate or arr.size == 0: + return arr + n_out = max(1, int(arr.size * dst_rate / src_rate)) + return _np.interp( + _np.linspace(0, arr.size, n_out, endpoint=False), + _np.arange(arr.size, dtype=_np.float64), + arr.astype(_np.float64), + ).astype(_np.int16) + + def send_chunk(self, pcm: PCMLike, source_rate: int) -> None: + # Cheap pre-check OUTSIDE the lock — avoids ever taking the + # lock for empty/late chunks once a stop has fired. Event + + # bool reads are atomic. + if self._stop_flag.is_set() or self._closed: + return + arr = _as_int16_array(pcm) + if arr.size < 10: + return + # Resample BEFORE acquiring the lock — pure CPU, no shared + # state, keeps the critical section to just the PortAudio write + # so a concurrent stop() doesn't wait on numpy work. + if source_rate != self._STREAM_TARGET_RATE: + arr = self._resample_mono16(arr, source_rate, self._STREAM_TARGET_RATE) + payload = arr.tobytes() + sent_sec = len(arr) / self._STREAM_TARGET_RATE + with self._lock: + # CRITICAL re-check inside the lock: stop() may have run + # between our pre-check and acquiring the lock. Without + # this, the lazy-open below would resurrect a stream that + # barge-in just tore down — defeating the whole fix. + if self._stop_flag.is_set() or self._closed: + return + if self._pa is None: + self._pa = pyaudio.PyAudio() + if self._stream is None: + # Pass any rate — _open_stream ignores it and always + # opens at _STREAM_TARGET_RATE. + self._open_stream(source_rate) + stream = self._stream # snapshot — wait_finish nulls under same lock + if stream is None: # _open_stream failed + return + try: + stream.write(payload) + self._total_sent += sent_sec + except Exception as exc: + log.warning("%sSpeaker write failed: %s", self._label, exc) + + def wait_finish(self) -> None: + with self._lock: + stream = self._stream + # Null the ref BEFORE close so a racing send_chunk (waiting + # on the lock) re-checks and bails instead of touching a + # half-closed handle. Double-close-safe: if another caller + # already nulled it, we do nothing. + self._stream = None + self._stream_rate = None + if stream is not None: + try: + stream.stop_stream() + except Exception: + pass + try: + stream.close() + except Exception: + pass + + def stop(self) -> None: + # Set the flag FIRST (outside the lock — Event is atomic) so a + # concurrent send_chunk on another thread sees teardown ASAP + # even before it tries to acquire the lock. Then take the + # RLock and finish teardown; wait_finish re-enters the RLock + # safely. + self._stop_flag.set() + with self._lock: + self._closed = True + self.wait_finish() + + @property + def interrupted(self) -> bool: + return self._stop_flag.is_set() + + @property + def total_sent_sec(self) -> float: + return self._total_sent + + +class AnkerSpeaker(_PyAudioSpeaker): + def __init__(self): + super().__init__(device_pattern="powerconf,anker", label="Anker") + + +class PulseStreamSpeaker(Speaker): + """Stream PCM to PulseAudio's default sink via a `pacat` subprocess. + + Why not _PyAudioSpeaker: PortAudio's 'pulse' device is unavailable in this + conda env (the ALSA→pulse plugin libasound_module_conf_pulse.so isn't on the + env's plugin path), so PyAudio can't reach PulseAudio at all → silence. The + record-playback path proved `pacat`/`paplay` work, so we reuse that: pacat + inherits PULSE_SERVER/XDG_RUNTIME_DIR from the child and plays to PulseAudio's + DEFAULT sink — which the dashboard's Apply pins to the active profile's sink + (the JBL). Used by the JBL profile (paired with the G1 built-in DDS mic).""" + + HW_RATE = 24_000 # Gemini's native receive rate; PulseAudio resamples to the sink + + def __init__(self, label: str = "Pulse", sink_pattern: str = ""): + self._label = label + self._sink_pattern = sink_pattern + self._sink_name: Optional[str] = None # resolved PA sink, cached + self._proc: Optional["subprocess.Popen"] = None + self._stop_flag = threading.Event() + self._lock = threading.RLock() + self._total_sent = 0.0 + self._closed = False + + def _resolve_sink(self) -> Optional[str]: + """Find the PA sink whose name matches our pattern (e.g. the JBL), so we + can pin pacat to it with --device instead of relying on the (drift-prone) + default sink. Returns None → pacat falls back to the default sink.""" + if not self._sink_pattern: + return None + pats = [p.strip().lower() for p in self._sink_pattern.split(",") if p.strip()] + try: + out = subprocess.run( + ["pactl", "list", "short", "sinks"], + capture_output=True, text=True, timeout=2, + ).stdout + except Exception: + return None + for line in out.splitlines(): + cols = line.split("\t") + name = cols[1] if len(cols) > 1 else "" + if name and any(p in name.lower() for p in pats): + return name + return None + + def _spawn(self) -> None: + if self._proc is not None and self._proc.poll() is None: + return + if self._sink_name is None: + self._sink_name = self._resolve_sink() + cmd = [ + "pacat", "--playback", + "--rate=%d" % self.HW_RATE, "--format=s16le", "--channels=1", + "--latency-msec=120", + "--client-name=sanad_voice", "--stream-name=sanad_voice_jbl", + ] + if self._sink_name: + cmd.append("--device=%s" % self._sink_name) + try: + self._proc = subprocess.Popen( + cmd, stdin=subprocess.PIPE, + stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, + ) + except Exception as exc: + log.warning("%sSpeaker: pacat spawn failed: %s", self._label, exc) + self._proc = None + + def begin_stream(self) -> None: + with self._lock: + self._stop_flag.clear() + self._closed = False + self._total_sent = 0.0 + self._spawn() + + def send_chunk(self, pcm: PCMLike, source_rate: int) -> None: + with self._lock: + if self._stop_flag.is_set() or self._closed: + return + arr = _as_int16_array(pcm) + if arr.size < 10: + return + if source_rate != self.HW_RATE: + arr = _resample_int16(arr, source_rate, self.HW_RATE) + if self._proc is None or self._proc.poll() is not None: + self._spawn() + p = self._proc + if p is None or p.stdin is None: + return + try: + p.stdin.write(arr.tobytes()) + p.stdin.flush() + self._total_sent += len(arr) / self.HW_RATE + except (BrokenPipeError, OSError): + pass + + def wait_finish(self) -> None: + # Close stdin so pacat drains its buffer and exits on its own. + with self._lock: + p = self._proc + self._proc = None + if p is None: + return + try: + if p.stdin: + p.stdin.close() + except Exception: + pass + try: + p.wait(timeout=8) + except Exception: + try: + p.kill() + except Exception: + pass + + def stop(self) -> None: + with self._lock: + self._stop_flag.set() + self._closed = True + p = self._proc + self._proc = None + if p is not None: + try: + if p.stdin: + p.stdin.close() + except Exception: + pass + try: + p.terminate() + p.wait(timeout=2) + except Exception: + try: + p.kill() + except Exception: + pass + + @property + def interrupted(self) -> bool: + return self._stop_flag.is_set() + + @property + def total_sent_sec(self) -> float: + return self._total_sent + + +# ─── Factory ────────────────────────────────────────────── + +_PROFILE_ALIASES = { + "builtin": "builtin", + "g1_builtin": "builtin", + "g1": "builtin", + "anker": "anker", + "anker_powerconf": "anker", + "hollyland": "hollyland_builtin", + "hollyland_builtin": "hollyland_builtin", + "jbl": "jbl_builtin_mic", + "jbl_builtin_mic": "jbl_builtin_mic", +} + +SUPPORTED_PROFILES = ("builtin", "anker", "hollyland_builtin", "jbl_builtin_mic") + + +@dataclass +class AudioIO: + mic: Mic + speaker: Speaker + profile_id: str = field(default="builtin") + # Kept on the instance so the brain can rebuild a profile that needs + # the DDS handle (`builtin`, `hollyland_builtin`) during a hot-swap — + # without re-init'ing the channel. `repr=False` keeps it out of logs. + _audio_client: Optional[Any] = field(default=None, repr=False, compare=False) + + def start(self) -> None: + self.mic.start() + + def stop(self) -> None: + try: + self.speaker.stop() + except Exception: + log.warning("AudioIO speaker.stop failed", exc_info=True) + try: + self.mic.stop() + except Exception: + log.warning("AudioIO mic.stop failed", exc_info=True) + + @classmethod + def build_backends( + cls, + profile_id: str, + *, + audio_client: Optional[Any] = None, + ) -> tuple[Mic, Speaker]: + """Return a fresh (Mic, Speaker) pair for a profile WITHOUT wrapping + in an AudioIO. Used by GeminiBrain.swap_audio_devices() for the + hot-swap path: build a new pair, switch refs, tear down the old. + + Same validation as from_profile(). `audio_client` is required for + profiles that route playback through the G1 chest speaker. + """ + raw = (profile_id or "").strip().lower() + resolved = _PROFILE_ALIASES.get(raw) + if resolved is None: + raise ValueError( + f"unknown audio profile {profile_id!r}; " + f"supported: {', '.join(SUPPORTED_PROFILES)}" + ) + if resolved == "builtin": + if audio_client is None: + raise ValueError( + "profile 'builtin' requires audio_client (G1 AudioClient)" + ) + return BuiltinMic(), BuiltinSpeaker(audio_client) + if resolved == "anker": + return AnkerMic(), AnkerSpeaker() + if resolved == "hollyland_builtin": + if audio_client is None: + raise ValueError( + "profile 'hollyland_builtin' uses the G1 speaker — " + "requires audio_client" + ) + return HollylandMic(), BuiltinSpeaker(audio_client) + if resolved == "jbl_builtin_mic": + # JBL speaker via pacat → PulseAudio default sink (pinned to the JBL + # by the dashboard) + the G1 built-in DDS mic (the JBL has no mic). + # pacat is used because PyAudio's 'pulse' device is unavailable in + # this env. Neither backend needs the AudioClient. + return BuiltinMic(), PulseStreamSpeaker(label="JBL", sink_pattern="jbl,bluez") + raise AssertionError(f"unhandled resolved profile: {resolved!r}") + + @classmethod + def from_profile( + cls, + profile_id: str, + *, + audio_client: Optional[Any] = None, + ) -> "AudioIO": + """Build an AudioIO for the requested profile. + + `audio_client` is the initialised `unitree_sdk2py` `AudioClient` and + is required for any profile that speaks through the G1's on-board + speaker (`builtin`, `hollyland_builtin`). It's also retained on the + returned AudioIO so a later hot-swap can rebuild without re-init. + """ + raw = (profile_id or "").strip().lower() + resolved = _PROFILE_ALIASES.get(raw) + if resolved is None: + raise ValueError( + f"unknown audio profile {profile_id!r}; " + f"supported: {', '.join(SUPPORTED_PROFILES)}" + ) + mic, speaker = cls.build_backends(resolved, audio_client=audio_client) + return cls(mic=mic, speaker=speaker, profile_id=resolved, + _audio_client=audio_client) diff --git a/vendor/Sanad/voice/audio_manager.py b/vendor/Sanad/voice/audio_manager.py new file mode 100644 index 0000000..ddee2eb --- /dev/null +++ b/vendor/Sanad/voice/audio_manager.py @@ -0,0 +1,1283 @@ +"""Audio I/O manager — recording and playback via PyAudio. + +Handles microphone capture and speaker playback. Thread-safe; one +playback at a time via play_lock. + +(Speaker-monitor / `.monitor`-source capture lives in voice/typed_replay.py, +not here — see its parec/PyAudio MonitorRecorder.) + +Device selection is dynamic — read from voice.audio_devices on each refresh. +""" + +from __future__ import annotations + +import json +import subprocess +import threading +import time +import wave +from pathlib import Path +from typing import Any + +try: + import numpy as np + _HAS_NUMPY = True +except ImportError: + np = None + _HAS_NUMPY = False + +try: + import pyaudio +except ImportError: + pyaudio = None # optional — only needed for local PCM playback + +# G1 AudioClient — used to route playback through the robot chest speaker +# via DDS `PlayStream` (the same pipe Gemini uses). Without this, WAV +# playback would go to the Jetson's built-in audio codec, which isn't +# wired to any audible output on the G1. +try: + from unitree_sdk2py.g1.audio.g1_audio_client import AudioClient + from unitree_sdk2py.g1.audio.g1_audio_api import ( + ROBOT_API_ID_AUDIO_STOP_PLAY, + ) + _HAS_G1_AUDIO = True +except ImportError: + AudioClient = None + ROBOT_API_ID_AUDIO_STOP_PLAY = 0 + _HAS_G1_AUDIO = False + +from Project.Sanad.config import ( + CHANNELS, + CHUNK_SIZE, + RECEIVE_SAMPLE_RATE, + SINK as DEFAULT_SINK, + SOURCE as DEFAULT_SOURCE, +) +from Project.Sanad.core.logger import get_logger +from Project.Sanad.voice import audio_devices as ad + +log = get_logger("audio_manager") + +FORMAT = pyaudio.paInt16 if pyaudio else 8 + +# Default fallback constants only — the live selection is per-instance state +# on AudioManager (self._current_sink / self._current_source), guarded by +# self._device_lock. Keeping the selection module-global meant two +# AudioManager instances stomped each other's sink/source; it now lives on +# the instance. + +# How long an applied pactl selection is trusted before the hot playback / +# recording path re-runs the (expensive, multi-shell) pactl scan. The +# audio_devices watcher and the dashboard Apply endpoint already re-resolve +# on device change, so a short TTL here is purely a backstop against an +# unobserved hot-unplug — it does NOT need to be tight. +_DEFAULTS_TTL_S = 5.0 + + +def _run_pactl(args: list[str]) -> subprocess.CompletedProcess[str]: + return subprocess.run(["pactl", *args], check=True, text=True, capture_output=True) + + +def _resolve_devices() -> tuple[str, str]: + """Return current (sink, source) — falls back to config defaults.""" + try: + cur = ad.current_selection() + sink = cur.get("sink") or DEFAULT_SINK + source = cur.get("source") or DEFAULT_SOURCE + return sink, source + except Exception as exc: + log.warning("Could not resolve audio devices: %s", exc) + return DEFAULT_SINK, DEFAULT_SOURCE + + +class _PulseOpenFailed(RuntimeError): + """Signal from `_play_pcm_via_pulse` that PortAudio refused to open the + output stream (sink gone, bad I/O combination, etc.) — lets `play_wav` + fall back to G1 DDS chest playback so the user still hears the clip.""" + + +class AudioManager: + def __init__(self): + if pyaudio is None: + raise RuntimeError( + "pyaudio not installed — AudioManager cannot play local PCM. " + "Install with `pip install pyaudio` (needs portaudio headers), " + "or rely on the G1 speaker via AudioClient.PlayStream." + ) + self.pya = pyaudio.PyAudio() + self.play_lock = threading.Lock() + # Per-instance device selection (was module-global — two + # AudioManagers used to share one sink/source and stomp each + # other). _device_lock guards _current_sink / _current_source. + self._device_lock = threading.Lock() + self._current_sink = DEFAULT_SINK + self._current_source = DEFAULT_SOURCE + # Throttle ensure_audio_defaults() on the hot path — monotonic ts of + # the last successful apply. 0.0 = never applied yet. + self._defaults_applied_at = 0.0 + # Cached PortAudio device index for the 'pulse'/'default' device + # (None = not probed; -1 = probed, absent). Lets play_pcm/record_mic + # route through PulseAudio instead of PortAudio's silent hw:0 default. + self._pulse_pa_index: int | None = None + # Lazily-initialised G1 DDS audio client (for play_wav → chest speaker) + self._g1_audio_client: Any = None + # G1 playback state — present during an active play_wav() call, + # None when idle. Mutated by pause_playback/resume_playback/stop_playback + # from other threads while _play_pcm_via_g1 holds play_lock. + self._play_state_lock = threading.Lock() + self._play_state: dict[str, Any] | None = None + # Monotonic play id — a new play_wav bumps it to preempt the in-flight + # one (so playing a record interrupts the previous instead of queueing). + self._play_epoch = 0 + # Manual "hold" for the live Gemini pause. Default False = AUTO (record + # playback pauses Gemini only for the clip, then resumes). When True, the + # live voice is paused and STAYS paused (record playback won't resume it) + # until the dashboard releases the hold. Set via set_live_voice_hold(). + self._live_voice_hold = False + # Resolve devices and set PulseAudio defaults at startup + self.refresh_devices() + self.ensure_audio_defaults(force=True) + + def _get_g1_audio_client(self): + """Return a cached G1 AudioClient (DDS) — creates on first use. + + Assumes `ChannelFactoryInitialize` has already been called (our + ArmController does this at startup on eth0). Returns None if the + Unitree SDK is unavailable or init fails. + """ + if not _HAS_G1_AUDIO: + return None + if self._g1_audio_client is not None: + return self._g1_audio_client + try: + c = AudioClient() + # SHORT RPC timeout (was 5.0). The G1 "voice" service replies to RPCs + # on a topic SHARED with the live-voice child's AudioClient; when both + # run, the dashboard's reply ack is frequently lost in the collision, + # so _Call would block the FULL timeout per STOP/PlayStream — that was + # the "5s delay / no sound". The request itself is still published + # (audio plays); we don't need the ack, so fail fast. Good-case replies + # arrive in ~0.1s, so 0.6s keeps the happy path while killing the hang. + c.SetTimeout(0.6) + c.Init() + try: + c.SetVolume(100) + except Exception: + pass + self._g1_audio_client = c + log.info("G1 AudioClient initialized (for chest-speaker playback)") + except Exception as exc: + log.warning("G1 AudioClient init failed: %s", exc) + self._g1_audio_client = None + return self._g1_audio_client + + def refresh_devices(self) -> dict[str, str]: + """Re-read selected sink/source from audio_devices module.""" + sink, source = _resolve_devices() + with self._device_lock: + self._current_sink, self._current_source = sink, source + log.info("AudioManager devices refreshed: sink=%s source=%s", sink, source) + return {"sink": sink, "source": source} + + def ensure_audio_defaults(self, force: bool = False) -> None: + """Re-scan all USB ports, resolve the active profile, set pactl defaults. + + Called at startup AND before playback/recording so that even if the + user unplugs/re-plugs a device into a different port, the correct + sink/source is always used. + + The scan (ad.apply_current_selection → current_selection → + detect_plugged_profiles) shells out to pactl many times, so on the + hot playback/record path we skip it when it ran within + `_DEFAULTS_TTL_S`. Pass `force=True` (startup / device-change) to + bypass the throttle. + """ + if not force: + with self._device_lock: + if (time.monotonic() - self._defaults_applied_at) < _DEFAULTS_TTL_S: + return + try: + result = ad.apply_current_selection() + cur = result.get("selection", {}) + sink = cur.get("sink", "") + source = cur.get("source", "") + with self._device_lock: + self._current_sink = sink or DEFAULT_SINK + self._current_source = source or DEFAULT_SOURCE + self._defaults_applied_at = time.monotonic() + # At startup / device-change, re-apply the user's SAVED speaker volume + # to the active sink — PulseAudio doesn't persist our USB/BT (JBL/Anker) + # sink volume across restarts, so without this the JBL comes back at a + # default level instead of where the user left it. + if force: + self._restore_sink_volume() + except Exception as exc: + log.warning("Audio defaults not applied: %s", exc) + + def _restore_sink_volume(self) -> None: + """Apply config audio.g1_volume to the active PulseAudio sink.""" + try: + from Project.Sanad.config import load_config + vol = int(((load_config() or {}).get("audio") or {}).get("g1_volume", 100)) + vol = max(0, min(100, vol)) + sink = self._current_sink or "@DEFAULT_SINK@" + import subprocess as _sp + _sp.run(["pactl", "set-sink-volume", sink, "%d%%" % vol], + timeout=3, check=False, + stdout=_sp.DEVNULL, stderr=_sp.DEVNULL) + if vol > 0: + _sp.run(["pactl", "set-sink-mute", sink, "0"], timeout=3, + check=False, stdout=_sp.DEVNULL, stderr=_sp.DEVNULL) + log.info("restored saved speaker volume → %d%% (sink=%s)", vol, sink) + except Exception as exc: + log.warning("restore sink volume failed: %s", exc) + + def _pulse_device_index(self) -> int | None: + """Resolve the PortAudio device index that routes through PulseAudio. + + On this Jetson's conda PyAudio, opening with output/input device + index None lands on PortAudio's default — the silent hw:0 + platform-sound card. Opening PortAudio's 'pulse' (or 'default') + device instead routes through the PulseAudio daemon, which + ensure_audio_defaults() has already pointed at the resolved + sink/source. Mirrors voice/audio_io.py's _resolve_device_index. + + Returns the device index, or None when PortAudio exposes no + pulse/default device (then the caller falls back to PortAudio's + own default). Cached for the lifetime of the PyAudio handle. + """ + if self._pulse_pa_index is not None: + return self._pulse_pa_index if self._pulse_pa_index >= 0 else None + pulse_idx = default_idx = None + try: + for i in range(self.pya.get_device_count()): + info = self.pya.get_device_info_by_index(i) + name_lower = str(info.get("name", "")).lower() + if pulse_idx is None and name_lower == "pulse": + pulse_idx = i + elif default_idx is None and name_lower == "default": + default_idx = i + except Exception as exc: + log.debug("pulse device probe failed: %s", exc) + idx = pulse_idx if pulse_idx is not None else default_idx + self._pulse_pa_index = idx if idx is not None else -1 + return idx + + @property + def current_sink(self) -> str: + with self._device_lock: + return self._current_sink + + @property + def current_source(self) -> str: + with self._device_lock: + return self._current_source + + def close(self): + # Cached PortAudio device index is tied to this PyAudio handle — + # invalidate it so a re-init (audio reset) re-probes 'pulse'. + self._pulse_pa_index = None + self.pya.terminate() + + def sample_width(self) -> int: + return self.pya.get_sample_size(FORMAT) + + # -- playback -- + + def play_pcm(self, pcm_bytes: bytes, channels: int, sample_rate: int, sample_width: int): + with self.play_lock: + self.ensure_audio_defaults() + # Route through PortAudio's 'pulse' device so playback reaches + # the resolved sink — output_device_index=None defaults to the + # silent hw:0 platform-sound card on this Jetson's conda PyAudio. + stream = self.pya.open( + format=self.pya.get_format_from_width(sample_width), + channels=channels, + rate=sample_rate, + output=True, + output_device_index=self._pulse_device_index(), + frames_per_buffer=CHUNK_SIZE, + ) + try: + frame_bytes = CHUNK_SIZE * channels * sample_width + for offset in range(0, len(pcm_bytes), frame_bytes): + stream.write(pcm_bytes[offset : offset + frame_bytes]) + finally: + stream.stop_stream() + stream.close() + + # Sink-name substrings that mean "PulseAudio routes this somewhere + # audible without DDS" — extend the tuple to add more USB cards (e.g. + # hollyland sink). Matched case-insensitively. + # "jbl"/"bluez" → the JBL Bluetooth speaker (and any bluez sink) is a real + # PulseAudio sink, so record playback must go via paplay/PulseAudio, NOT the + # G1 DDS chest speaker. + _PULSE_SINK_MARKERS = ("anker", "powerconf", "hollyland", "jbl", "bluez") + # Sample rate Anker PowerConf (and most USB UAC1 cards) accept natively + # — used as the resample target before opening a PortAudio stream so + # we don't hit paInvalidSampleRate when the WAV's native rate + # (24kHz from Gemini TTS, 22050 from old TTS, etc.) doesn't match + # the card's HW caps. + _PULSE_TARGET_RATE = 48_000 + + @staticmethod + def _resample_pcm16(pcm_bytes: bytes, channels: int, + src_rate: int, dst_rate: int) -> bytes: + """Linear-interpolation resample of int16 PCM. numpy-only (no scipy) + — matches the pattern used by `_play_pcm_via_g1`. + + Returns the resampled PCM bytes (same channel layout). No-op when + rates already match. Requires numpy (caller guards with _HAS_NUMPY). + """ + if src_rate == dst_rate or not pcm_bytes: + return pcm_bytes + arr = np.frombuffer(pcm_bytes, dtype=np.int16) + if channels > 1: + # De-interleave so each channel resamples independently + # (cheap on numpy; avoids stereo→mono surprises). + if arr.size % channels != 0: + arr = arr[: arr.size - (arr.size % channels)] + arr = arr.reshape(-1, channels) + n_in = arr.shape[0] + n_out = max(1, int(n_in * dst_rate / src_rate)) + xp = np.arange(n_in, dtype=np.float64) + x_new = np.linspace(0, n_in, n_out, endpoint=False) + cols = [ + np.interp(x_new, xp, arr[:, ch].astype(np.float64)) + for ch in range(channels) + ] + out = np.column_stack(cols).astype(np.int16) + return out.tobytes() + n_in = arr.size + n_out = max(1, int(n_in * dst_rate / src_rate)) + out = np.interp( + np.linspace(0, n_in, n_out, endpoint=False), + np.arange(n_in, dtype=np.float64), + arr.astype(np.float64), + ).astype(np.int16) + return out.tobytes() + + def _active_sink_name(self) -> str: + """Return the currently-tracked default sink name, ORIGINAL case + preserved. + + Reads `self.current_sink` which is kept in lock-step with pactl + defaults by `refresh_devices()` (called by the dashboard Apply + endpoint and by the live-Gemini watcher on profile swaps). Empty + string if nothing's tracked yet. + + IMPORTANT: PulseAudio sink names are CASE-SENSITIVE. paplay + --device= needs the exact name pactl uses (e.g. + `alsa_output.usb-Anker_PowerConf_A3321-DEV-SN1-01.analog-stereo`). + Routing-decision substring checks (against `_PULSE_SINK_MARKERS`) + lowercase BOTH sides explicitly so the case-sensitivity of the + sink name doesn't break marker matching. + """ + try: + return (self.current_sink or "").strip() + except Exception: + return "" + + def play_wav(self, path: Path, + record_name: str | None = None) -> dict[str, Any]: + """Play a WAV file through the speaker that matches the active + PulseAudio default sink: + + • Default sink is a USB conference speaker (Anker PowerConf, + Hollyland, anything matching `_PULSE_SINK_MARKERS`) → write + via PyAudio → PortAudio 'pulse' device → PulseAudio default + sink. This works even when the user picked the device via + the dashboard's "Manual sink/source override" (no profile + id) — we key off the sink name, not the profile. + • Default sink is the Jetson platform-sound (or anything that + doesn't match a marker) → use G1 DDS (`AudioClient.PlayStream`) + because platform-sound isn't wired to any audible speaker on + the G1; only the DDS pipe reaches the chest loudspeaker. + + `record_name` is purely a label surfaced via `playback_status()` + so the dashboard can show "Now playing: t6_1" etc. + """ + with wave.open(str(path), "rb") as wf: + channels = wf.getnchannels() + sw = wf.getsampwidth() + rate = wf.getframerate() + data = wf.readframes(wf.getnframes()) + + sink = self._active_sink_name() + sink_lc = sink.lower() + # Marker check is case-insensitive; the original `sink` (with case + # preserved) is what gets passed to paplay --device. + use_pulse = any(m in sink_lc for m in self._PULSE_SINK_MARKERS) + client = self._get_g1_audio_client() if not use_pulse else None + + # Lip-sync: drive the LED mask mouth from THIS clip's amplitude while it + # plays (synced to the playback position via _play_state), same as the + # live Gemini voice does. Best-effort; stopped + mouth-closed when the + # playback path below returns. No-op if numpy / the mask are unavailable. + _mask_stop = threading.Event() + self._start_mask_lipsync(data, channels, sw, rate, _mask_stop) + try: + if not use_pulse and client is not None and _HAS_NUMPY and sw == 2: + log.info("play_wav route=g1_dds sink=%s record=%s", + sink or "?", record_name or "?") + self._play_pcm_via_g1(data, channels, rate, record_name=record_name) + route = "g1_dds" + else: + if not use_pulse and _HAS_G1_AUDIO and client is None: + log.warning("play_wav: non-PulseAudio sink but G1 AudioClient " + "unavailable — falling back to PulseAudio default") + # Prefer paplay subprocess when it's installed — bypasses + # PortAudio (which on this Jetson's conda env doesn't expose a + # 'pulse' device, leading to PyAudio defaulting to the silent + # Jetson platform-sound card). paplay routes through PulseAudio + # at the daemon level so audio actually reaches the Anker sink. + use_paplay = bool(self._paplay_binary()) + try: + if use_paplay: + log.info("play_wav route=paplay sink=%s record=%s", + sink or "default", record_name or "?") + self._play_pcm_via_paplay(data, channels, rate, sw, + record_name=record_name) + route = "paplay" + else: + log.info("play_wav route=pulse sink=%s record=%s " + "(paplay not installed — using PyAudio)", + sink or "default", record_name or "?") + self._play_pcm_via_pulse(data, channels, rate, sw, + record_name=record_name) + route = "pulse" + except _PulseOpenFailed as exc: + # paplay spawn failed, USB device gone mid-flight, etc. + # Fall back to DDS chest if available so the user gets + # audio out of *something* rather than silence. + fb_client = self._get_g1_audio_client() + if fb_client is not None and _HAS_NUMPY and sw == 2: + log.warning("play_wav route=%s failed (%s); falling " + "back to g1_dds", + "paplay" if use_paplay else "pulse", exc) + self._play_pcm_via_g1(data, channels, rate, + record_name=record_name) + route = ("paplay" if use_paplay else "pulse") + "_failed_to_g1_dds" + else: + log.warning("play_wav pulse path failed (%s); no DDS " + "fallback available", exc) + route = ("paplay" if use_paplay else "pulse") + "_failed" + finally: + _mask_stop.set() + + duration = len(data) / (rate * channels * sw) if rate else 0 + return {"path": str(path), "duration_seconds": round(duration, 3), + "route": route, "sink": sink or "default"} + + def _set_live_voice_paused(self, paused: bool) -> None: + """Pause/resume the live Gemini session around a record playback so it + doesn't talk over (or react to) the clip. Best-effort + lazy import to + avoid a hard dependency on the dashboard process; no-op if the live + subprocess isn't running. + + Runs on a DETACHED daemon thread: the pause is sent over the child's + stdin pipe, and when the child is busy (e.g. mid-reconnect) that write + can block. We must NEVER let it stall the playback loop — which calls + this right before streaming — or the record goes silent. Fire-and-forget + keeps playback starting immediately; a slightly late pause is harmless.""" + def _do() -> None: + try: + from Project.Sanad.main import live_sub + if (live_sub is not None and hasattr(live_sub, "send_pause") + and hasattr(live_sub, "is_running") + and live_sub.is_running()): + live_sub.send_pause(paused) + except Exception: + pass + threading.Thread(target=_do, name="live-voice-pause", daemon=True).start() + + def set_live_voice_hold(self, hold: bool) -> bool: + """Manual hold for the live-Gemini pause. + + hold=True → pause the live voice NOW and keep it paused; record playback + will not auto-resume it (the finally skips the resume). + hold=False → release: resume the live voice, unless a clip is currently + playing (that play's own finally resumes when it ends). + Returns the resulting hold state. Idempotent.""" + self._live_voice_hold = bool(hold) + if self._live_voice_hold: + self._set_live_voice_paused(True) + else: + with self._play_state_lock: + playing = self._play_state is not None + if not playing: + self._set_live_voice_paused(False) + log.info("live-voice hold → %s", "PAUSED" if self._live_voice_hold else "AUTO") + return self._live_voice_hold + + # -- LED mask lip-sync for record playback -------------------------------- + + _MASK_FRAME_SEC = 0.08 # 80 ms mouth-level frame (matches the Gemini lip-sync) + + def _set_mask_mouth(self, level: int) -> None: + """Push a mouth-open level (0..3) to the LED mask. Best-effort, lazy + import, thread-safe + a no-op if the mask isn't running.""" + try: + from Project.Sanad.main import mask_face + if mask_face is not None and hasattr(mask_face, "set_mouth"): + mask_face.set_mouth(int(level)) + except Exception: + pass + + def _mouth_envelope(self, data: bytes, channels: int, sw: int, + rate: int) -> list[int]: + """Per-80ms mouth-open levels (0..3) from a clip's RMS — same thresholds + the Gemini child uses, so records and the live voice move the mouth the + same way. Empty if numpy/format unsupported.""" + if not _HAS_NUMPY or sw != 2 or not rate: + return [] + try: + arr = np.frombuffer(data, dtype=np.int16) + if channels == 2 and arr.size % 2 == 0: + arr = arr.reshape(-1, 2).mean(axis=1).astype(np.int16) + frame = max(1, int(rate * self._MASK_FRAME_SEC)) + env: list[int] = [] + for i in range(0, len(arr), frame): + chunk = arr[i:i + frame].astype(np.float64) + rms = float(np.sqrt(np.mean(chunk ** 2))) if chunk.size else 0.0 + env.append(0 if rms < 140 else 1 if rms < 650 + else 2 if rms < 1700 else 3) + return env + except Exception: + return [] + + def _start_mask_lipsync(self, data: bytes, channels: int, sw: int, + rate: int, stop_evt: "threading.Event") -> None: + env = self._mouth_envelope(data, channels, sw, rate) + if not env: + return + threading.Thread( + target=self._mask_mouth_driver, args=(env, stop_evt), + name="rec-lipsync", daemon=True, + ).start() + + def _mask_mouth_driver(self, env: list[int], + stop_evt: "threading.Event") -> None: + """Walk the mouth envelope synced to the live playback position + (_play_state) and drive the mask mouth. Honours pause (mouth closed) + and seeks. Closes the mouth when the play ends.""" + last = -1 + try: + while not stop_evt.is_set(): + t = -1.0 + with self._play_state_lock: + st = self._play_state + if st is not None and not st["paused"] and st["play_started_at"] > 0: + r = st["rate"] or 1 + t = (st["play_started_pos"] / r + + (time.time() - st["play_started_at"])) + lvl = 0 + if t >= 0: + idx = int(t / self._MASK_FRAME_SEC) + lvl = env[idx] if 0 <= idx < len(env) else 0 + if lvl != last: + self._set_mask_mouth(lvl) + last = lvl + stop_evt.wait(0.05) + finally: + self._set_mask_mouth(0) + + # -- G1 DDS-routed playback -- + + _G1_STREAM_APP = "sanad_playback" + # The live Gemini voice streams to the SAME G1 chest speaker under a + # DIFFERENT app_name (config/voice_config.json speaker.app_name, default + # "sanad"). The G1 "voice" audio service is per-app-name, so a record must + # STOP that app too — otherwise Gemini's chunked PlayStream("sanad", …) per + # spoken word keeps stomping the record's single PlayStream and the clip is + # silent while its counter ticks. STOP_PLAY is process-agnostic (keyed only + # by app_name on the shared DDS "voice" service), so stopping it from here + # halts the separate voice child's stream. Must match voice_config.json. + _LIVE_VOICE_APP = "sanad" + _G1_HW_RATE = 16_000 + + def stop_playback(self) -> None: + """Stop any in-flight G1 DDS audio stream + tear down the playback + state so a pause/resume cycle can't keep trying. + + Used by the dashboard's Stop button. Safe to call even when + nothing is playing — the DDS call is idempotent. + """ + with self._play_state_lock: + if self._play_state is not None: + self._play_state["stop"] = True + client = self._get_g1_audio_client() + if client is None: + return + try: + client._Call( + ROBOT_API_ID_AUDIO_STOP_PLAY, + json.dumps({"app_name": self._G1_STREAM_APP}), + ) + log.info("G1 audio stream stopped (app=%s)", self._G1_STREAM_APP) + except Exception as exc: + log.warning("stop_playback failed: %s", exc) + + def pause_playback(self) -> dict[str, Any]: + """Pause the active G1 playback. The play loop notices the flag, + sends STOP_PLAY to halt the chest speaker, and advances the saved + position by the time elapsed since this chunk started. resume() + re-pushes from there. No-op if nothing is playing.""" + with self._play_state_lock: + if self._play_state is None: + return {"ok": False, "reason": "nothing playing"} + if self._play_state["paused"]: + return {"ok": True, "already": True, "paused": True} + self._play_state["paused"] = True + log.info("Playback paused (record=%s)", + self._play_state.get("record_name") or "?") + return {"ok": True, "paused": True} + + def resume_playback(self) -> dict[str, Any]: + """Resume after a pause. The play loop re-pushes pcm[pos:] to G1 + and re-enters the wait/poll cycle.""" + with self._play_state_lock: + if self._play_state is None: + return {"ok": False, "reason": "nothing playing"} + if not self._play_state["paused"]: + return {"ok": True, "already": True, "paused": False} + self._play_state["paused"] = False + log.info("Playback resumed (record=%s)", + self._play_state.get("record_name") or "?") + return {"ok": True, "resumed": True} + + def seek_playback(self, position_sec: float) -> dict[str, Any]: + """Jump to `position_sec` in the active clip. The play loop re-pushes + pcm[pos:] from the new position (works whether playing or paused — if + paused, the new position takes effect on resume).""" + with self._play_state_lock: + if self._play_state is None: + return {"ok": False, "reason": "nothing playing"} + rate = self._play_state["rate"] or 1 + total = self._play_state["total_samples"] + target = max(0, min(total, int(float(position_sec) * rate))) + self._play_state["pos"] = target + self._play_state["play_started_pos"] = target + self._play_state["play_started_at"] = 0.0 # park until re-push + self._play_state["seek"] = True + log.info("Playback seek → %.2fs (record=%s)", + target / rate, self._play_state.get("record_name") or "?") + return {"ok": True, "position_sec": round(target / rate, 2), + "duration_sec": round(total / rate, 2) if rate else 0.0} + + def playback_status(self) -> dict[str, Any]: + """Snapshot of the current playback for the dashboard. Returns + `playing=False` when idle. `position_sec` is best-effort — + derived from elapsed wall time since the last PlayStream call.""" + with self._play_state_lock: + if self._play_state is None: + return {"playing": False, "paused": False, "record_name": None, + "position_sec": 0.0, "duration_sec": 0.0, + "live_hold": self._live_voice_hold} + rate = self._play_state["rate"] or 1 + total = self._play_state["total_samples"] + pos = self._play_state["pos"] + if (not self._play_state["paused"] + and self._play_state["play_started_at"] > 0): + elapsed = time.time() - self._play_state["play_started_at"] + advance = int(max(0.0, elapsed) * rate) + pos = min(self._play_state["play_started_pos"] + advance, total) + return { + "playing": True, + "paused": self._play_state["paused"], + "record_name": self._play_state.get("record_name"), + "position_sec": round(pos / rate, 2), + "duration_sec": round(total / rate, 2) if rate else 0.0, + "live_hold": self._live_voice_hold, + } + + def _play_pcm_via_g1(self, pcm_bytes: bytes, channels: int, + source_rate: int, + record_name: str | None = None) -> None: + """Stream int16 PCM to the G1 chest speaker via AudioClient.PlayStream, + with pause / resume / stop support. + + Converts stereo → mono and resamples to 16 kHz (the rate + AudioClient expects). The play loop pushes pcm[pos:] in one + PlayStream call, then polls _play_state every 50 ms while the + clip drains so pause / stop are honoured promptly. Pause sends + STOP_PLAY, snapshots the position from elapsed wall time, then + loops until resumed or stopped. Resume re-pushes pcm[pos:]. + """ + client = self._get_g1_audio_client() + if client is None: + raise RuntimeError("G1 AudioClient not available") + + arr = np.frombuffer(pcm_bytes, dtype=np.int16) + if channels == 2 and arr.size % 2 == 0: + arr = arr.reshape(-1, 2).mean(axis=1).astype(np.int16) + if source_rate != self._G1_HW_RATE and arr.size: + target_len = max(1, int(len(arr) * self._G1_HW_RATE / source_rate)) + arr = np.interp( + np.linspace(0, len(arr), target_len, endpoint=False), + np.arange(len(arr)), + arr.astype(np.float64), + ).astype(np.int16) + rate = self._G1_HW_RATE + total_samples = len(arr) + + # Preempt any in-flight playback: signal it to stop + bump the epoch so + # a NEW play starts promptly instead of queueing behind the previous + # clip (or blocking forever on a paused one). This is what makes + # "play another record" interrupt-and-start rather than stall. + with self._play_state_lock: + if self._play_state is not None: + self._play_state["stop"] = True + self._play_epoch += 1 + my_epoch = self._play_epoch + + # play_lock serialises overlapping play_wav() calls; the preempted + # playback (stop=True) releases it promptly. pause/resume/stop do NOT + # take it (they only touch _play_state under _play_state_lock). + with self.play_lock: + # State is set INSIDE the lock now (was before — which let a second + # play stomp the first's state). Bail if a still-newer play won the + # race while we waited for the lock. + with self._play_state_lock: + if my_epoch != self._play_epoch: + return + self._play_state = { + "record_name": record_name, + "rate": rate, + "total_samples": total_samples, + "pos": 0, + "paused": False, + "stop": False, + "seek": False, + "play_started_at": 0.0, + "play_started_pos": 0, + "epoch": my_epoch, + } + # Pause the live Gemini for the clip (idempotent across preempting + # plays; the last play's finally resumes it). + self._set_live_voice_paused(True) + try: + while True: + # Snapshot the state for this iteration + with self._play_state_lock: + st = self._play_state + if st is None or st.get("epoch") != my_epoch or st["stop"]: + break + if st["paused"]: + paused_now = True + sub_bytes = None + sub_total_sec = 0.0 + else: + paused_now = False + st["seek"] = False # consumed — pushing from st["pos"] + pos = st["pos"] + if pos >= total_samples: + break + sub_bytes = arr[pos:].tobytes() + sub_total_sec = (total_samples - pos) / rate + st["play_started_pos"] = pos + # Set for real only AFTER PlayStream fires (below) so + # the dashboard counter doesn't tick on a stream that + # was dropped/never started. 0.0 → playback_status + # parks at play_started_pos until audio truly begins. + st["play_started_at"] = 0.0 + + if paused_now: + time.sleep(0.1) + continue + + # Push remainder to G1. A SINGLE STOP suffices: the G1 "voice" + # service treats the chest speaker as one stream and STOP_PLAY + # is global (stops whatever's playing regardless of app_name), + # so this also clears any Gemini stream. Two STOP RPCs doubled + # the latency on the shared DDS bus and stalled the start; the + # live-voice pause (child stops its own stream) covers Gemini. + stream_id = f"wav_{int(time.time() * 1000)}" + try: + client._Call( + ROBOT_API_ID_AUDIO_STOP_PLAY, + json.dumps({"app_name": self._G1_STREAM_APP}), + ) + except Exception: + pass + time.sleep(0.15) + # After the STOP+settle window, re-check our state: bail if a + # newer press superseded us (no churn / no queue), or loop back + # if a Pause was clicked during the window (don't leak audio). + with self._play_state_lock: + st = self._play_state + if st is None or st.get("epoch") != my_epoch or st["stop"]: + break + paused_in_settle = st["paused"] + if paused_in_settle: + continue + # PlayStream can raise on a DDS hiccup; if it does, abort this + # play rather than leaving play_started_at=0 while the poll loop + # runs (which would make the pause-math elapsed huge and snap + # the counter to the end). Set the timestamp only on success. + try: + client.PlayStream(self._G1_STREAM_APP, stream_id, sub_bytes) + except Exception as exc: + log.warning("PlayStream failed: %s", exc) + break + with self._play_state_lock: + if (self._play_state is not None + and self._play_state.get("epoch") == my_epoch): + self._play_state["play_started_at"] = time.time() + # NOTE: do NOT issue a STOP_PLAY here. The G1 "voice" service + # treats the chest speaker as a SINGLE stream — STOP_PLAY halts + # whatever is currently playing regardless of app_name (verified + # empirically: a post-PlayStream STOP("sanad") silenced the + # record entirely). The pre-stream STOP(both) above already + # cleared Gemini; the live-voice pause keeps it from re-pushing. + + # Poll for pause / stop while the clip drains + poll_deadline = time.time() + sub_total_sec + 0.3 + interrupted = False + while time.time() < poll_deadline: + with self._play_state_lock: + if self._play_state is None or self._play_state["stop"]: + interrupted = True + try: + client._Call( + ROBOT_API_ID_AUDIO_STOP_PLAY, + json.dumps({"app_name": self._G1_STREAM_APP}), + ) + except Exception: + pass + break + if self._play_state.get("seek"): + # Seek requested — halt the current stream and let + # the outer loop re-push from the new pos (already + # set by seek_playback). Cleared in the push branch. + try: + client._Call( + ROBOT_API_ID_AUDIO_STOP_PLAY, + json.dumps({"app_name": self._G1_STREAM_APP}), + ) + except Exception: + pass + interrupted = True + break + if self._play_state["paused"]: + # Halt G1 and snapshot the new position + try: + client._Call( + ROBOT_API_ID_AUDIO_STOP_PLAY, + json.dumps({"app_name": self._G1_STREAM_APP}), + ) + except Exception: + pass + elapsed = (time.time() + - self._play_state["play_started_at"]) + advance = int(max(0.0, elapsed) * rate) + self._play_state["pos"] = min( + self._play_state["play_started_pos"] + advance, + total_samples, + ) + interrupted = True + break + time.sleep(0.05) + + if not interrupted: + # Finished naturally — mark fully consumed and exit + with self._play_state_lock: + if self._play_state is not None: + self._play_state["pos"] = total_samples + try: + client._Call( + ROBOT_API_ID_AUDIO_STOP_PLAY, + json.dumps({"app_name": self._G1_STREAM_APP}), + ) + except Exception: + pass + break + finally: + with self._play_state_lock: + # Only clear if it's still OURS — a preempting play may have + # already installed its own state after bumping the epoch. + mine = (self._play_state is not None + and self._play_state.get("epoch") == my_epoch) + if mine: + self._play_state = None + # Resume the live Gemini only if WE were the last play — if a + # newer play preempted us, it keeps Gemini paused and will + # resume when it finishes (no pause/resume thrash on rapid clicks). + # Skip the resume entirely while a manual hold is active: the user + # wants Gemini to STAY paused until they release it. + if mine and not self._live_voice_hold: + self._set_live_voice_paused(False) + + # paplay binary path. Cached on first probe so we don't keep re-shelling + # `which paplay` on every play_wav call. None = probe pending; "" = absent. + _PAPLAY_BIN: str | None = None + + @classmethod + def _paplay_binary(cls) -> str: + """Return the absolute path to `paplay` if installed, else "". + Cached for the lifetime of the process — paplay doesn't appear/ + disappear mid-run.""" + if cls._PAPLAY_BIN is None: + from shutil import which + cls._PAPLAY_BIN = which("paplay") or "" + return cls._PAPLAY_BIN + + def _play_pcm_via_paplay(self, pcm_bytes: bytes, channels: int, + sample_rate: int, sample_width: int, + record_name: str | None = None) -> None: + """Play int16 PCM via the `paplay` subprocess. Bypasses PortAudio + entirely — we just pipe raw PCM into paplay's stdin and let + PulseAudio do the resampling/format conversion/device routing. + + Why this exists: on conda's bundled PyAudio (the build shipped in + the gemini_sdk env on this Jetson), PortAudio does NOT enumerate a + 'pulse' device — only direct ALSA hw:N entries. Opening + `output_device_index=None` then defaults to hw:0 which is the + Jetson `platform-sound` card → silent (not wired to any speaker). + Opening a discrete `hw:N` for the Anker grabs the card exclusively + and PulseAudio drops it. Neither path actually plays through the + Anker. paplay sidesteps the whole stack. + + Targets the dashboard's currently-selected sink by name via + `--device=`, which guarantees the audio goes to the same + place pactl set-default-sink would have routed. + + Reuses the same `_play_state` machinery as the DDS path so the + dashboard's Pause / Stop / position-meter behave identically. + """ + sink_name = self._active_sink_name() + bytes_per_sample = max(1, channels * sample_width) + total_bytes = len(pcm_bytes) - (len(pcm_bytes) % bytes_per_sample) + total_samples = total_bytes // bytes_per_sample + chunk_bytes = max( + bytes_per_sample, (sample_rate // 10) * bytes_per_sample, + ) + # paplay format codes: s16le is the only one we ever produce here. + fmt = "s16le" if sample_width == 2 else \ + "s32le" if sample_width == 4 else \ + "u8" + # Keep cmd minimal — older paplay versions reject unknown long + # options and exit immediately (manifests as instant paplay death + + # a flood of BrokenPipeError on stdin write). --raw / --format / + # --rate / --channels / --device are all standard since 0.9.x. + cmd = [ + self._paplay_binary(), "--raw", + f"--format={fmt}", f"--rate={sample_rate}", + f"--channels={channels}", + ] + if sink_name: + cmd.extend(["--device", sink_name]) + + with self._play_state_lock: + self._play_state = { + "record_name": record_name, + "rate": sample_rate, + "total_samples": total_samples, + "pos": 0, + "paused": False, + "stop": False, + "play_started_at": 0.0, + "play_started_pos": 0, + } + + with self.play_lock: + try: + while True: + with self._play_state_lock: + st = self._play_state + if st is None or st["stop"]: + break + if st["paused"]: + time.sleep(0.1) + continue + pos = st["pos"] + if pos >= total_samples: + break + st["play_started_pos"] = pos + st["play_started_at"] = time.time() + + byte_pos = pos * bytes_per_sample + local_pos = pos + try: + proc = subprocess.Popen( + cmd, stdin=subprocess.PIPE, + stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, + ) + except Exception as exc: + log.warning("paplay spawn failed (%s) — signalling " + "DDS fallback", exc) + with self._play_state_lock: + self._play_state = None + raise _PulseOpenFailed(str(exc)) from exc + + # Brief settle so paplay can validate args + connect to + # PulseAudio. If it's going to die (bad sink, format, + # connection refused), it dies within ~50ms. Without + # this check, the next stdin.write() would get a sea + # of BrokenPipeError messages and the outer loop would + # keep re-spawning forever. + time.sleep(0.05) + if proc.poll() is not None: + try: + err = (proc.stderr.read() or b"").decode( + "utf-8", "replace").strip()[:400] + except Exception: + err = "" + log.warning("paplay died immediately rc=%d device=%s err=%s", + proc.returncode, sink_name or "default", err) + with self._play_state_lock: + self._play_state = None + raise _PulseOpenFailed( + f"paplay rc={proc.returncode} {err or 'no stderr'}" + ) + + interrupted = False + fatal_exc: Exception | None = None + try: + while byte_pos < total_bytes: + with self._play_state_lock: + ps = self._play_state + if ps is None or ps["stop"]: + interrupted = True + break + if ps["paused"]: + ps["pos"] = local_pos + interrupted = True + break + end = min(byte_pos + chunk_bytes, total_bytes) + try: + proc.stdin.write(pcm_bytes[byte_pos:end]) + proc.stdin.flush() + except (BrokenPipeError, OSError) as exc: + # paplay died mid-stream (USB unplugged, + # PulseAudio crashed, etc.). Abort entire + # clip — DO NOT let the outer loop respawn + # paplay; we just got hundreds of + # broken-pipe lines as a result of that bug. + try: + err = (proc.stderr.read() or b"").decode( + "utf-8", "replace").strip()[:400] + except Exception: + err = "" + log.warning("paplay died mid-stream (%s) " + "device=%s stderr=%s", + exc, sink_name or "default", err) + fatal_exc = _PulseOpenFailed( + f"paplay died: {err or exc}") + break + byte_pos = end + local_pos = byte_pos // bytes_per_sample + finally: + try: + proc.stdin.close() + except Exception: + pass + if interrupted or fatal_exc is not None: + proc.terminate() + try: + rc = proc.wait(timeout=3.0) + except subprocess.TimeoutExpired: + proc.kill() + rc = -1 + if rc != 0 and not interrupted and fatal_exc is None: + # Drained successfully but paplay exited non-zero + # — surface stderr so the failure isn't silent. + try: + err = (proc.stderr.read() or b"").decode( + "utf-8", "replace").strip()[:300] + except Exception: + err = "" + log.warning("paplay exit rc=%d device=%s err=%s", + rc, sink_name or "default", err) + + if fatal_exc is not None: + # Re-raise OUTSIDE the inner try/finally so play_wav + # catches it and falls back to G1 DDS chest. Without + # this, the outer `while True` loop would respawn + # paplay and we'd loop indefinitely. + with self._play_state_lock: + self._play_state = None + raise fatal_exc + + if not interrupted: + with self._play_state_lock: + if self._play_state is not None: + self._play_state["pos"] = total_samples + break + finally: + with self._play_state_lock: + self._play_state = None + + def _play_pcm_via_pulse(self, pcm_bytes: bytes, channels: int, + sample_rate: int, sample_width: int, + record_name: str | None = None) -> None: + """Play int16 PCM via PyAudio (→ PulseAudio default sink) with + pause / resume / stop support. + + Mirrors `_play_pcm_via_g1`'s state-poll pattern so the dashboard's + Play / Pause / Stop / Position buttons behave identically whether + the active profile uses DDS or PyAudio. Writes ~100 ms chunks so + pause / stop latency is bounded. + """ + # Make sure pactl defaults reflect the current selection — this is + # a no-op when the watcher or dashboard Apply already aligned them + # (throttled so the multi-shell pactl scan doesn't run per clip). + self.ensure_audio_defaults() + + # Resample to a USB-native rate before opening the stream. + # PortAudio's ALSA backend (the one PyAudio uses) opens the underlying + # hardware via the ALSA 'pulse' plugin, which on this Jetson does + # NOT advertise rate conversion in `snd_pcm_hw_params` — so opening + # at the WAV's native rate (24kHz from Gemini TTS, etc.) gets + # rejected with paInvalidSampleRate. Resampling app-side mirrors + # what `_play_pcm_via_g1` already does for the DDS path. Anker + # PowerConf and most USB UAC1 cards report 48kHz s16le stereo + # natively, so target that. + if _HAS_NUMPY and sample_width == 2 and sample_rate != self._PULSE_TARGET_RATE: + try: + pcm_bytes = self._resample_pcm16( + pcm_bytes, channels, sample_rate, self._PULSE_TARGET_RATE, + ) + log.info("_play_pcm_via_pulse: resampled %dHz → %dHz " + "(USB card native rate)", + sample_rate, self._PULSE_TARGET_RATE) + sample_rate = self._PULSE_TARGET_RATE + except Exception as exc: + log.warning("_play_pcm_via_pulse: resample failed (%s) — " + "trying native rate, may hit paInvalidSampleRate", + exc) + + bytes_per_sample = max(1, channels * sample_width) + total_bytes = len(pcm_bytes) - (len(pcm_bytes) % bytes_per_sample) + total_samples = total_bytes // bytes_per_sample + chunk_bytes = max(bytes_per_sample, (sample_rate // 10) * bytes_per_sample) + + with self._play_state_lock: + self._play_state = { + "record_name": record_name, + "rate": sample_rate, + "total_samples": total_samples, + "pos": 0, + "paused": False, + "stop": False, + "play_started_at": 0.0, + "play_started_pos": 0, + } + + # play_lock serialises overlapping play_wav() calls; pause/resume/stop + # only touch _play_state under _play_state_lock so they don't block. + with self.play_lock: + try: + while True: + # Snapshot — decide whether to play, wait, or exit + with self._play_state_lock: + st = self._play_state + if st is None or st["stop"]: + break + if st["paused"]: + paused_now = True + pos = 0 + else: + paused_now = False + pos = st["pos"] + if pos >= total_samples: + break + st["play_started_pos"] = pos + st["play_started_at"] = time.time() + if paused_now: + time.sleep(0.1) + continue + + byte_pos = pos * bytes_per_sample + local_pos = pos + try: + stream = self.pya.open( + format=self.pya.get_format_from_width(sample_width), + channels=channels, + rate=sample_rate, + output=True, + output_device_index=self._pulse_device_index(), + frames_per_buffer=CHUNK_SIZE, + ) + except Exception as exc: + # PortAudio open failed (sink gone, paBadIODevice + # combination, etc.). Signal the caller so play_wav + # can fall back to DDS chest rather than silently + # dropping the clip. + log.warning("Pulse playback open failed: %s — " + "signalling caller for DDS fallback", exc) + with self._play_state_lock: + self._play_state = None + raise _PulseOpenFailed(str(exc)) from exc + interrupted = False + try: + while byte_pos < total_bytes: + with self._play_state_lock: + ps = self._play_state + if ps is None or ps["stop"]: + interrupted = True + break + if ps["paused"]: + ps["pos"] = local_pos + interrupted = True + break + end = min(byte_pos + chunk_bytes, total_bytes) + try: + stream.write(pcm_bytes[byte_pos:end]) + except Exception as exc: + log.warning("Pulse playback write failed: %s", exc) + interrupted = True + break + byte_pos = end + local_pos = byte_pos // bytes_per_sample + finally: + try: + stream.stop_stream() + stream.close() + except Exception: + pass + + if not interrupted: + with self._play_state_lock: + if self._play_state is not None: + self._play_state["pos"] = total_samples + break + # Interrupted by pause → outer loop will wait for resume + # or exit on stop. Interrupted by stop → outer loop exits. + finally: + with self._play_state_lock: + self._play_state = None + + # -- recording -- + + def record_mic(self, duration_sec: float) -> bytes: + """Record from the resolved mic for *duration_sec* seconds, return raw PCM.""" + self.ensure_audio_defaults() + # Capture through PortAudio's 'pulse' device so we read the resolved + # default source — input_device_index=None defaults to the silent + # hw:0 platform-sound card on this Jetson's conda PyAudio. + stream = self.pya.open( + format=FORMAT, + channels=CHANNELS, + rate=RECEIVE_SAMPLE_RATE, + input=True, + input_device_index=self._pulse_device_index(), + frames_per_buffer=CHUNK_SIZE, + ) + frames: list[bytes] = [] + total_chunks = int(RECEIVE_SAMPLE_RATE / CHUNK_SIZE * duration_sec) + try: + for _ in range(total_chunks): + frames.append(stream.read(CHUNK_SIZE, exception_on_overflow=False)) + finally: + stream.stop_stream() + stream.close() + return b"".join(frames) + + def save_wav(self, pcm_bytes: bytes, path: Path, channels: int, sample_rate: int): + path.parent.mkdir(parents=True, exist_ok=True) + with wave.open(str(path), "wb") as wf: + wf.setnchannels(channels) + wf.setsampwidth(self.sample_width()) + wf.setframerate(sample_rate) + wf.writeframes(pcm_bytes) diff --git a/vendor/Sanad/voice/live_voice.py b/vendor/Sanad/voice/live_voice.py new file mode 100644 index 0000000..1075841 --- /dev/null +++ b/vendor/Sanad/voice/live_voice.py @@ -0,0 +1,73 @@ +"""Live Voice Commands — voice-to-arm phrase trigger dispatcher. + +Listens to GeminiSubprocess user transcripts, matches against +sanad_arm.txt phrases, and fires ARM.trigger_action_by_id. + +Endpoints: + POST /start begin polling transcripts + POST /stop stop polling + POST /deferred-mode?enabled toggle instant vs deferred trigger + POST /trigger-enabled?enabled master gate — allow arm actions or not + GET /status running, last heard, last action, etc. + GET /triggers arm trigger history (log) +""" + +from __future__ import annotations + +from fastapi import APIRouter, HTTPException + +router = APIRouter() + + +def _loop(): + from Project.Sanad.main import live_voice + if live_voice is None: + raise HTTPException(503, "LiveVoiceLoop not initialized.") + return live_voice + + +@router.get("/status") +async def status(): + from Project.Sanad.main import live_voice + if live_voice is None: + return {"available": False} + return {"available": True, **live_voice.status()} + + +@router.post("/start") +async def start(): + loop = _loop() + await loop.start() + return {"ok": True, **loop.status()} + + +@router.post("/stop") +async def stop(): + loop = _loop() + await loop.stop() + return {"ok": True, **loop.status()} + + +@router.post("/deferred-mode") +async def set_deferred(enabled: bool): + loop = _loop() + loop.set_deferred(enabled) + return {"ok": True, "deferred_mode": loop.deferred_mode} + + +@router.post("/trigger-enabled") +async def set_trigger_enabled(enabled: bool): + """Master gate for voice → arm triggering. Default OFF.""" + loop = _loop() + loop.set_trigger_enabled(enabled) + return {"ok": True, "trigger_enabled": loop.trigger_enabled} + + +@router.get("/triggers") +async def triggers(): + loop = _loop() + return { + "triggers": list(loop.triggers), + "total": len(loop.triggers), + "dispatch_actions": len(loop.wake_dispatch), + } diff --git a/vendor/Sanad/voice/live_voice_loop.py b/vendor/Sanad/voice/live_voice_loop.py new file mode 100644 index 0000000..c95b075 --- /dev/null +++ b/vendor/Sanad/voice/live_voice_loop.py @@ -0,0 +1,400 @@ +"""LiveVoiceLoop — voice-to-arm phrase trigger dispatcher. + +Listens to user transcriptions from the GeminiSubprocess and, when a +configured wake phrase is matched, fires the corresponding arm action via +`motion.sanad_arm_controller.ARM`. + +Mode toggle ("DEFERRED TRIGGER"): + - fire_on_wake_match=True fires the arm instantly on phrase match + (fast, no coordination with AI speech) + - fire_on_wake_match=False marks a pending action that fires when the + AI starts/finishes its reply (visually nicer + — robot answers, then moves) + +This is Option-D integration: parallel to skill_registry, uses the full +gemini_interact phrase dictionary (sanad_arm.txt, 29 arm IDs, hundreds of +Arabic phrase variants). +""" + +from __future__ import annotations + +import threading +import time +from collections import deque +from datetime import datetime +from types import SimpleNamespace +from typing import Any + +from Project.Sanad.config import SCRIPTS_DIR, BASE_DIR +from Project.Sanad.core.config_loader import section as _cfg_section +from Project.Sanad.core.logger import get_logger +from Project.Sanad.voice.text_utils import ( + load_arm_phrase_dispatch, + maybe_trigger_arm, +) + +log = get_logger("live_voice_loop") + +_LV_CFG = _cfg_section("voice", "live_voice_loop") +# Filename from core.script_files (single source) — resolved under SCRIPTS_DIR +_SCRIPTS = _cfg_section("core", "script_files") +_ARM_TXT_NAME = _SCRIPTS.get("arm_phrases", "sanad_arm.txt") +SANAD_ARM_TXT = SCRIPTS_DIR / _ARM_TXT_NAME +TRIGGER_LOG_SIZE = _LV_CFG.get("trigger_log_size", 100) +POLL_INTERVAL_SEC = _LV_CFG.get("poll_interval_sec", 0.1) +DEFERRED_DEFAULT = _LV_CFG.get("deferred_default", False) +TRIGGER_ENABLED_DEFAULT = bool(_LV_CFG.get("trigger_enabled_default", False)) + + +class LiveVoiceLoop: + """Polls GeminiSubprocess transcripts → fires arm actions.""" + + def __init__(self, voice_client, arm, wake_mgr, audio_mgr): + self.voice_client = voice_client + self.arm = arm # Sanad's motion/arm_controller (not used for trigger) + self.wake_mgr = wake_mgr + self.audio_mgr = audio_mgr + + self._running = False + self._poll_thread: threading.Thread | None = None + self._stop_event = threading.Event() + + # Deferred-trigger toggle (fire on phrase match vs fire after AI responds) + self.deferred_mode = DEFERRED_DEFAULT + # Master arm-trigger gate — when False, transcripts are still + # captured (you can watch them on the dashboard) but NO arm + # actions fire. Defaults to OFF so the robot doesn't move + # unexpectedly until the operator opts in. + self.trigger_enabled = TRIGGER_ENABLED_DEFAULT + + # Trigger history (dashboard log) + self.triggers: deque[dict[str, Any]] = deque(maxlen=TRIGGER_LOG_SIZE) + self.last_heard: str = "" + self.last_action: str = "" + + # ASR dispatch state (SimpleNamespace — maybe_trigger_arm mutates attrs) + self.state = SimpleNamespace() + + # Load sanad_arm.txt on first construction + self.wake_dispatch: dict[int, set[str]] = {} + self.option_by_id: dict[int, Any] = {} + self.sanad_arm: Any = None + self._load_dispatch() + + # Guards the cross-thread transcript cursor + pending-trigger state. + # set_trigger_enabled() runs on the FastAPI event-loop thread while the + # poll daemon mutates the same fields — without this lock a concurrent + # update can tear the cursor / pending flags. + self._trigger_lock = threading.Lock() + + # Transcript consumption is tracked by POSITION, not by content, so a + # repeated identical command (e.g. "wave" said twice) re-fires. We + # remember the last deque snapshot we processed and only dispatch the + # newly-appended tail. (live_sub.user_transcript is an append-only, + # left-evicting deque, so the new lines are always at the right.) + self._last_snapshot: list[str] = [] + + # ── phrase dispatch loader ──────────────────────────────────── + def _load_dispatch(self): + try: + from Project.Sanad.motion.sanad_arm_controller import ARM, OPTION_LIST, OPTION_BY_ID + self.sanad_arm = ARM + self.option_by_id = OPTION_BY_ID + # Voice-trigger policy: SDK built-ins only. + # JSONL replays (option.file set) are dashboard-only — voice + # phrase blocks for laugh/bird/change_battery/move_* never + # reach `wake_dispatch`, so a matched phrase for one of those + # silently no-ops in voice mode. + sdk_only_options = [o for o in OPTION_LIST if not getattr(o, "file", "")] + filtered_out = [o.name for o in OPTION_LIST if getattr(o, "file", "")] + if SANAD_ARM_TXT.exists(): + self.wake_dispatch = load_arm_phrase_dispatch(SANAD_ARM_TXT, sdk_only_options) + log.info( + "loaded %d arm-action phrase sets from %s " + "(SDK-only filter: %d/%d options)", + len(self.wake_dispatch), SANAD_ARM_TXT.name, + len(sdk_only_options), len(OPTION_LIST), + ) + # Make the silent no-op observable: file-backed replays + # (laugh/bird/change_battery/move_*) are excluded from voice + # dispatch, so a spoken phrase for one of these does nothing. + if filtered_out: + log.warning( + "voice arm dispatch EXCLUDES %d file-backed action(s) " + "(dashboard-only, no voice trigger): %s", + len(filtered_out), ", ".join(filtered_out), + ) + else: + log.warning("sanad_arm.txt missing at %s — arm trigger disabled", + SANAD_ARM_TXT) + # Fold in operator-editable WakePhraseManager entries so dashboard + # CRUD (data/wake_phrases.json) actually affects voice triggering. + self._merge_wake_phrases() + except Exception as exc: + log.warning("arm dispatch unavailable: %s", exc) + self.sanad_arm = None + self.wake_dispatch = {} + + def _merge_wake_phrases(self) -> None: + """Merge WakePhraseManager phrases into wake_dispatch. + + Without this, dashboard wake-phrase edits were a silent no-op — the + manager was stored on self.wake_mgr but never consulted. We only fold + in entries whose action_id resolves unambiguously to a voice-eligible + (SDK-only, non file-backed) arm option, matching either the integer id + or the option name; anything else is skipped with a warning so a + mistyped/file-backed action can't misfire the arm.""" + if self.wake_mgr is None: + return + try: + from Project.Sanad.motion.sanad_arm_controller import ( + OPTION_BY_ID, OPTION_BY_NAME, + ) + amap = self.wake_mgr.action_phrase_map() + except Exception as exc: + log.warning("wake_phrase merge unavailable: %s", exc) + return + merged = skipped = 0 + for action_id_str, phrases in amap.items(): + opt = None + key = str(action_id_str).strip() + # Resolve by integer id first, then by option name. + if key.isdigit(): + opt = OPTION_BY_ID.get(int(key)) + if opt is None: + opt = OPTION_BY_NAME.get(key.lower()) + if opt is None or getattr(opt, "file", ""): + # Unknown action, or a file-backed replay (voice-excluded). + skipped += 1 + log.warning("wake phrase action %r not voice-eligible — skipped", + action_id_str) + continue + bucket = self.wake_dispatch.setdefault(opt.id, set()) + bucket.update(p for p in phrases if p) + merged += 1 + if merged or skipped: + log.info("merged WakePhraseManager entries (%d actions merged, %d skipped)", + merged, skipped) + + # ── lifecycle ──────────────────────────────────────────────── + async def start(self) -> None: + if self._running: + return + self._stop_event.clear() + self._running = True + self._poll_thread = threading.Thread( + target=self._poll_loop, daemon=True, name="live_voice_loop") + self._poll_thread.start() + log.info("LiveVoiceLoop started (deferred=%s, dispatch=%d)", + self.deferred_mode, len(self.wake_dispatch)) + + async def stop(self) -> None: + self._stop_event.set() + self._running = False + log.info("LiveVoiceLoop stopped") + + def set_deferred(self, enabled: bool) -> None: + self.deferred_mode = bool(enabled) + + def set_trigger_enabled(self, enabled: bool) -> None: + """Master arm-trigger gate. When False, phrase matches are ignored. + + Toggle semantics (no queue memory across the gate): + - Always clears any in-flight pending trigger so a late + fallback fire can't happen after disable/enable. + - On enable: snapshots every transcript currently in the + live_sub deque as already-seen. Only NEW speech after this + moment will dispatch — phrases said while the gate was off + don't suddenly fire when you turn it back on. + """ + self.trigger_enabled = bool(enabled) + + with self._trigger_lock: + # Drop pending fallback timer — a queued "fire in 0.6s" from + # before the toggle must not leak across. + self.state._pending_arm_wave = False + self.state._pending_arm_wave_fired = False + self.state._pending_arm_trigger_fn = None + self.state._pending_arm_fallback_time = 0.0 + + snapshotted = 0 + if self.trigger_enabled: + # On enable, mark everything currently in the deque as already + # consumed (by position) so phrases said while the gate was off + # don't suddenly fire when it's turned back on. Only NEW speech + # after this moment will dispatch. + try: + from Project.Sanad.main import live_sub + if live_sub is not None: + self._last_snapshot = list(live_sub.user_transcript) + snapshotted = len(self._last_snapshot) + except Exception as exc: + log.warning("set_trigger_enabled: snapshot failed: %s", exc) + + log.info("trigger_enabled=%s (pending cleared, %d transcripts marked seen)", + self.trigger_enabled, snapshotted) + + # ── poll loop ──────────────────────────────────────────────── + def _poll_loop(self): + """Poll GeminiSubprocess.user_transcript for new user texts AND + fire any deferred-mode arm trigger whose fallback timer elapsed. + + Without the pending-check, a deferred trigger (`fire_on_wake_match= + False`) would only fire when the NEXT transcript arrives — so if + the user says one sentence and stops, the arm never moves. + """ + while not self._stop_event.is_set(): + self._check_transcripts() + self._check_pending_trigger() + self._stop_event.wait(POLL_INTERVAL_SEC) + + def _check_pending_trigger(self): + """Fire a queued deferred trigger if its fallback time has passed.""" + # Master gate — same check as _dispatch + if not self.trigger_enabled: + return + # Read-and-claim the pending trigger under the lock so a concurrent + # set_trigger_enabled() (FastAPI thread) can't clear it mid-fire and + # cause a stray or lost deferred arm action. + with self._trigger_lock: + if not getattr(self.state, "_pending_arm_wave", False): + return + if getattr(self.state, "_pending_arm_wave_fired", False): + return + fn = getattr(self.state, "_pending_arm_trigger_fn", None) + if fn is None: + return + fallback_at = float(getattr(self.state, "_pending_arm_fallback_time", 0.0) or 0.0) + if fallback_at <= 0.0 or time.time() < fallback_at: + return + # Gate on arm idle — skip fire if a motion is already running + if self.sanad_arm is not None and getattr(self.sanad_arm, "_is_busy", False): + return + # Claim it now (still under the lock) so it fires exactly once. + self.state._pending_arm_wave_fired = True + self.state._pending_arm_wave = False + self.state._pending_arm_trigger_fn = None + try: + fn() + except Exception as exc: + log.warning("deferred arm trigger failed: %s", exc) + + @staticmethod + def _new_tail(prev: list[str], curr: list[str]) -> list[str]: + """Return the items appended to ``curr`` since ``prev`` was taken. + + ``curr`` is a snapshot of an append-only, left-evicting deque. The new + lines are the suffix of ``curr`` that wasn't present at the end of + ``prev``. We find the largest overlap k where the tail of ``prev`` + equals the head of ``curr`` and return everything after it. This is + position-based (not content-based), so a repeated identical command is + treated as a genuinely new line and re-fires.""" + if not prev: + return list(curr) + max_k = min(len(prev), len(curr)) + for k in range(max_k, 0, -1): + if prev[-k:] == curr[:k]: + return list(curr[k:]) + # No overlap — the buffer rolled over entirely between polls; treat the + # whole current snapshot as new. + return list(curr) + + def _check_transcripts(self): + try: + from Project.Sanad.main import live_sub + except Exception: + return + if live_sub is None: + return + curr = list(live_sub.user_transcript) + with self._trigger_lock: + new_lines = self._new_tail(self._last_snapshot, curr) + self._last_snapshot = curr + # Dispatch only the newly-appended tail (outside the lock — _dispatch + # may spawn an arm replay thread). + for text in new_lines: + self.last_heard = text + self._dispatch(text) + + def _dispatch(self, transcript_text: str) -> None: + if not self.wake_dispatch or self.sanad_arm is None: + return + # Master gate — skip arm triggering entirely when disabled + if not self.trigger_enabled: + return + # Arm ⇄ locomotion interlock — refuse voice gestures while the robot + # may be walking. The authoritative check is sanad_arm._blocked() at + # fire time (covers the deferred path too); refuse early here so the + # block is observable and we don't queue a deferred fire that would be + # silently dropped later. + try: + if self.sanad_arm._blocked(): + log.info("arm trigger refused — locomotion active (movement_active)") + return + except Exception: + pass + # Gate trigger on arm idle + if getattr(self.sanad_arm, "_is_busy", False): + return + + fire_now = not self.deferred_mode + + for action_id, phrases in self.wake_dispatch.items(): + fn = self._make_trigger_fn(action_id) + fired = maybe_trigger_arm( + self.state, transcript_text, phrases, + fire_on_wake_match=fire_now, + arm_trigger_fn=fn, + ) + if fired: + self._record_trigger(action_id, transcript_text, fire_now) + break + + def _make_trigger_fn(self, action_id: int): + def _fire(): + try: + self.sanad_arm.trigger_action_by_id(action_id) + except Exception as exc: + log.warning("arm trigger failed (id=%d): %s", action_id, exc) + return _fire + + def _record_trigger(self, action_id: int, user_text: str, fired_now: bool): + opt = self.option_by_id.get(action_id) + action_name = opt.name if opt else f"id={action_id}" + self.last_action = action_name + self.triggers.append({ + "time": datetime.now().strftime("%H:%M:%S"), + "user_text": user_text, + "action_id": action_id, + "action_name": action_name, + "mode": "instant" if fired_now else "deferred", + }) + log.info("arm trigger %s (id=%d) for: %r [%s]", + action_name, action_id, user_text, + "instant" if fired_now else "deferred") + + # ── status (dashboard) ─────────────────────────────────────── + def status(self) -> dict[str, Any]: + pending = "" + pend_name = "" + if getattr(self.state, "_pending_arm_wave", False): + if self.state._pending_arm_trigger_fn is not None: + # We can't introspect the action id from fn (closure), but + # the last triggered line in self.triggers is likely the one. + pend_name = self.triggers[-1].get("action_name", "") if self.triggers else "" + pending = f"pending: {pend_name}" + + return { + "running": self._running, + "deferred_mode": self.deferred_mode, + "trigger_enabled": self.trigger_enabled, + "last_heard": self.last_heard, + "pending_action": pending, + "last_action": self.last_action, + "audio_attached": self.audio_mgr is not None, + "arm_attached": self.sanad_arm is not None, + "gemini_connected": bool( + self.voice_client and self.voice_client.connected), + "dispatch_actions": len(self.wake_dispatch), + "triggers": list(self.triggers)[-30:], + } diff --git a/vendor/Sanad/voice/local_tts.py b/vendor/Sanad/voice/local_tts.py new file mode 100644 index 0000000..115c21a --- /dev/null +++ b/vendor/Sanad/voice/local_tts.py @@ -0,0 +1,128 @@ +"""Local Arabic TTS using MBZUAI/speecht5_tts_clartts_ar (SpeechT5 fine-tuned on CLArTTS). + +Loads model/vocoder/speaker-embedding from the local Model/ directory. +Lazy-loads on first call so the webserver starts quickly. + +Output: 16 kHz mono int16 PCM bytes (matching WAV conventions). +""" + +from __future__ import annotations + +import re +import threading +from pathlib import Path +from typing import Any + +# ── Local paths (all pre-downloaded under model/) — sourced from config ── +try: + from Project.Sanad.core.config_loader import section as _cfg_section + _TTS = _cfg_section("voice", "local_tts") +except Exception: + _TTS = {} + +_PROJECT_DIR = Path(__file__).resolve().parent.parent # Sanad/ +_MODEL_ROOT = _PROJECT_DIR / "model" +MODEL_DIR = _MODEL_ROOT / _TTS.get("model_subdir", "speecht5_tts_clartts_ar") +VOCODER_DIR = _MODEL_ROOT / _TTS.get("vocoder_subdir", "speecht5_hifigan") +XVECTOR_PATH = _MODEL_ROOT / _TTS.get("xvector_filename", "arabic_xvector_embedding.pt") + +MODEL_ID = str(MODEL_DIR) +VOCODER_ID = str(VOCODER_DIR) +SAMPLE_RATE = _TTS.get("sample_rate", 16000) +CHANNELS = _TTS.get("channels", 1) + +# Arabic diacritics (tashkeel) Unicode range – model was trained without them. +_DIACRITICS_RE = re.compile(r"[\u0617-\u061A\u064B-\u0652\u0670\u06D6-\u06ED]") + + +def strip_diacritics(text: str) -> str: + return _DIACRITICS_RE.sub("", text) + + +class LocalTTSEngine: + def __init__(self): + self._lock = threading.Lock() + self._loaded = False + self._processor = None + self._model = None + self._vocoder = None + self._speaker_embedding = None + + def _ensure_loaded(self): + if self._loaded: + return + with self._lock: + if self._loaded: + return + + for label, p in [("Model", MODEL_DIR), ("Vocoder", VOCODER_DIR), ("XVector", XVECTOR_PATH)]: + if not p.exists(): + raise RuntimeError(f"{label} not found at {p}") + + import torch + from transformers import ( + SpeechT5ForTextToSpeech, + SpeechT5HifiGan, + SpeechT5Processor, + ) + + self._processor = SpeechT5Processor.from_pretrained(MODEL_ID) + self._model = SpeechT5ForTextToSpeech.from_pretrained(MODEL_ID) + self._vocoder = SpeechT5HifiGan.from_pretrained(VOCODER_ID) + self._speaker_embedding = torch.load(str(XVECTOR_PATH), map_location="cpu") + + self._loaded = True + + @property + def ready(self) -> bool: + return self._loaded + + def status(self) -> dict[str, Any]: + return { + "loaded": self._loaded, + "model_dir": str(MODEL_DIR), + "vocoder_dir": str(VOCODER_DIR), + "xvector_path": str(XVECTOR_PATH), + "model_exists": MODEL_DIR.exists(), + "vocoder_exists": VOCODER_DIR.exists(), + "xvector_exists": XVECTOR_PATH.exists(), + "sample_rate": SAMPLE_RATE, + } + + def synthesize(self, text: str) -> bytes: + """Convert Arabic text to 16 kHz mono int16 PCM bytes.""" + self._ensure_loaded() + import torch + + clean_text = strip_diacritics(text.strip()) + if not clean_text: + raise RuntimeError("Text is empty after stripping diacritics.") + + inputs = self._processor(text=clean_text, return_tensors="pt") + + with torch.no_grad(): + speech = self._model.generate_speech( + inputs["input_ids"], + self._speaker_embedding, + vocoder=self._vocoder, + ) + + # speech is a 1-D float32 tensor in [-1, 1] at 16 kHz + pcm_float = speech.numpy() + # Convert float32 → int16 PCM bytes + pcm_int16 = (pcm_float * 32767).clip(-32768, 32767).astype("int16") + return pcm_int16.tobytes() + + def synthesize_wav(self, text: str) -> bytes: + """Return a complete WAV file (bytes) for the given text.""" + import io + import wave + + pcm = self.synthesize(text) + buf = io.BytesIO() + with wave.open(buf, "wb") as wf: + wf.setnchannels(CHANNELS) + wf.setsampwidth(2) # int16 + wf.setframerate(SAMPLE_RATE) + wf.writeframes(pcm) + return buf.getvalue() diff --git a/vendor/Sanad/voice/model_script.py b/vendor/Sanad/voice/model_script.py new file mode 100644 index 0000000..2ffc634 --- /dev/null +++ b/vendor/Sanad/voice/model_script.py @@ -0,0 +1,158 @@ +"""Template brain — copy this file to plug in a non-Gemini model. + +How to use: + 1. Copy this file: `cp voice/model_script.py voice/openai_script.py` + 2. Rename the class: `ModelBrain` → e.g. `OpenAIRealtimeBrain` + 3. Fill in every block marked `TODO` with your provider's SDK calls. + 4. Register the new brain in `voice/sanad_voice.py` inside + `_build_brain()` (there's a single `elif` to add). + 5. Run with `SANAD_VOICE_BRAIN=openai python3 voice/sanad_voice.py eth0`. + +Contract that `sanad_voice.py` expects of ANY brain: + __init__(audio_io, recorder, voice_name, system_prompt) + audio_io — voice.audio_io.AudioIO (exposes .mic + .speaker) + recorder — voice.sanad_voice.TurnRecorder (per-turn WAV capture) + voice_name — provider-specific voice id (e.g. "Charon", "alloy") + system_prompt — persona string to seed the session with + async run() — blocks until stopped or fatal. Reconnects are YOUR + responsibility; the orchestrator won't restart you. + stop() — sync signal (can be called from a signal handler). + Set an asyncio.Event and let `run()` notice it. + +What the mic side looks like: + data = self._mic.read_chunk(n_bytes) # 16 kHz int16 mono bytes + # send `data` to your model's realtime-audio endpoint + +What the speaker side looks like: + self._speaker.begin_stream() + self._speaker.send_chunk(pcm, source_rate=24000) # rate is yours + self._speaker.wait_finish() # blocks until playback drains + # or self._speaker.stop() # cancel mid-playback (barge-in) + +What the recorder side looks like: + self._recorder.capture_user(pcm_bytes) # mic audio for this turn + self._recorder.capture_robot(pcm_bytes) # model audio for this turn + self._recorder.add_user_text(str) # partial transcript + self._recorder.add_robot_text(str) # partial transcript + self._recorder.finish_turn() # flush to WAV + index.json +""" + +from __future__ import annotations + +import asyncio +from typing import Any, Optional + +from Project.Sanad.core.logger import get_logger + +log = get_logger("model_brain") + + +class ModelBrain: + """Skeleton voice brain — adapt to your provider.""" + + def __init__(self, audio_io, recorder, voice_name: Optional[str] = None, + system_prompt: str = ""): + self._audio = audio_io + self._mic = audio_io.mic + self._speaker = audio_io.speaker + self._recorder = recorder + self._voice = voice_name + self._system_prompt = system_prompt + self._stop_flag = asyncio.Event() + + # TODO: instantiate your provider's client here. Keep the client + # creation cheap — connection/handshake should happen inside `run()` + # so reconnects don't require re-building this object. + # Example: + # from openai import AsyncOpenAI + # self._client = AsyncOpenAI(api_key=os.environ["OPENAI_API_KEY"]) + self._client: Any = None + + # ─── lifecycle ──────────────────────────────────────── + + def stop(self) -> None: + """Signal the run loop to exit cleanly. Safe to call from anywhere.""" + self._stop_flag.set() + + async def run(self) -> None: + """Main conversation loop. Blocks until stopped. + + Responsibilities: + - Open a realtime session with your provider. + - Forward mic audio to the model in small chunks. + - Stream the model's audio response to the speaker. + - Drive barge-in: when the user speaks while the model is speaking, + cancel model playback and mark the turn interrupted. + - On disconnect/error, back off and reconnect. + """ + while not self._stop_flag.is_set(): + try: + log.info("connecting to model...") + # TODO: open a session with your provider. For websocket-style + # APIs, use `async with client.realtime.connect(...) as session:`. + # For request/response APIs, poll or stream in a loop. + await asyncio.gather( + self._send_mic_loop(), + self._receive_loop(), + ) + except asyncio.CancelledError: + break + except Exception as exc: + log.error("session error: %s — reconnecting in 2s", exc) + await asyncio.sleep(2) + + # ─── mic → model ────────────────────────────────────── + + async def _send_mic_loop(self) -> None: + """Read mic chunks and forward them to the model. + + Minimum responsibilities: + - Loop on `self._mic.read_chunk(N_BYTES)`. + - Encode to whatever format your provider expects + (PCM16 mono is standard; some want base64 in JSON frames). + - Respect `self._stop_flag`. + + Optional (highly recommended): + - Measure energy; feed the mic frame to `self._recorder.capture_user` + only when the user is actually speaking. + - Apply echo suppression while the speaker is playing (mute or + substitute silence when energy is low — keeps the model from + transcribing its own voice bleed). + """ + chunk_bytes = 1024 # 32 ms at 16 kHz mono int16 — tune to your API + loop = asyncio.get_event_loop() + while not self._stop_flag.is_set(): + try: + data = await loop.run_in_executor( + None, self._mic.read_chunk, chunk_bytes, + ) + except Exception: + break + + # TODO: forward `data` to the model. Example for a hypothetical + # websocket session: + # await session.send({"type": "audio", "pcm16": data}) + _ = data + + # Pace to real-time so we don't starve the event loop + await asyncio.sleep(chunk_bytes / (16000 * 2)) + + # ─── model → speaker ────────────────────────────────── + + async def _receive_loop(self) -> None: + """Receive model events (audio chunks, transcripts, turn markers). + + Event handling you need to implement: + - Audio chunk → `self._speaker.send_chunk(pcm, source_rate)` + (first chunk must be preceded by + `self._speaker.begin_stream()`). + - Model interrupted → `self._speaker.stop(); self._mic.flush()` + and call `self._recorder.finish_turn()`. + - User transcript → `self._recorder.add_user_text(text)`. + - Model transcript → `self._recorder.add_robot_text(text)`. + - Turn complete → `self._speaker.wait_finish(); + self._recorder.finish_turn(); mic.flush()`. + """ + while not self._stop_flag.is_set(): + # TODO: iterate your provider's event stream and dispatch. + await asyncio.sleep(0.1) diff --git a/vendor/Sanad/voice/model_subprocess.py b/vendor/Sanad/voice/model_subprocess.py new file mode 100644 index 0000000..1587925 --- /dev/null +++ b/vendor/Sanad/voice/model_subprocess.py @@ -0,0 +1,147 @@ +"""Template supervisor — pair with voice/model_script.py when adding a new model. + +The supervisor's job is to run a voice subprocess and tail its stdout for +state transitions + user transcripts. It is brand-specific on purpose: +each model's brain emits log lines in its own format, so each model gets +its own supervisor. See `gemini/subprocess.py` for the working reference. + +How to add a new model (e.g. OpenAI Realtime): + + 1. cp voice/model_script.py openai/script.py + 2. cp voice/model_subprocess.py openai/subprocess.py + 3. In both files: rename `ModelBrain` → `OpenAIRealtimeBrain`, + `ModelSubprocess` → `OpenAIRealtimeSubprocess`. + 4. In `openai/script.py`: fill in the TODO bodies (connect/send/receive). + Each `log.info("USER: %s", ...)` / `log.info("BOT: %s", ...)` / + state message must be a string your supervisor's `_track_line` below + can detect — keep them in lock-step. + 5. In `openai/subprocess.py`: update `_track_line` to match the strings + your brain actually emits. + 6. In `main.py`: swap `GeminiSubprocess` → `OpenAIRealtimeSubprocess` in + the `live_sub = _safe_construct(...)` line. In `voice/sanad_voice.py`, + add a branch to `_build_brain()` mapping `"openai"` → `OpenAIRealtimeBrain`. + 7. Run with `SANAD_VOICE_BRAIN=openai python3 voice/sanad_voice.py eth0`. + +Nothing in `gemini/` needs to change. +""" + +from __future__ import annotations + +import os +import signal +import subprocess +import sys +import threading +import time +from collections import deque +from datetime import datetime +from pathlib import Path +from typing import Any + +from Project.Sanad.config import BASE_DIR, LOGS_DIR, SCRIPTS_DIR +from Project.Sanad.core.config_loader import section as _cfg_section +from Project.Sanad.core.logger import get_logger + +log = get_logger("model_subprocess") + + +class ModelSubprocess: + """Skeleton supervisor — adapt for your model. + + Contract expected by `main.py` + `dashboard/routes/live_subprocess.py`: + start() — sync. Spawns the child, starts the log reader thread. + stop() — sync. SIGINT / SIGTERM / SIGKILL escalation. + status() — returns {state, state_message, running, pid, log_tail, + user_transcript, last_user_text, ...}. + log_tail : deque[str] last N cleaned stdout lines + user_transcript : deque[str] user transcripts parsed from child's log + last_user_text : str most recent transcript (convenience) + state : str one of {"stopped", "starting", "connecting", + "listening", "hearing", "interrupting", + "error", "warning", "crashed"} + """ + + def __init__(self): + # TODO: set a config section key — e.g. `_cfg_section("openai", "subprocess")`. + # Create `config/_config.json > subprocess: { ... }` matching + # gemini_config.json's layout. + self._cfg = {} # _cfg_section("", "subprocess") + + self._lock = threading.Lock() + self.process: subprocess.Popen | None = None + self.log_tail: deque[str] = deque( + maxlen=self._cfg.get("log_tail_size", 2000)) + self.user_transcript: deque[str] = deque( + maxlen=self._cfg.get("transcript_tail_size", 30)) + self._reader_thread: threading.Thread | None = None + self._log_file = None + self.state = "stopped" + self.state_message = "Idle." + self.last_user_text = "" + + # ─── spawn / kill ───────────────────────────────────── + + def start(self) -> dict: + # TODO: build env (include `SANAD_VOICE_BRAIN=` so + # sanad_voice.py picks your brain), pick the script path, and + # `subprocess.Popen(...)`. Copy the gemini/subprocess.py body. + raise NotImplementedError + + def stop(self, timeout: float = 3.0) -> dict: + # TODO: send SIGINT → wait → SIGTERM → wait → SIGKILL. + raise NotImplementedError + + # ─── log parsing — the brand-specific part ──────────── + + def _track_line(self, line: str) -> None: + """Translate your brain's log strings into state + transcripts. + + KEEP THIS IN LOCK-STEP with the `log.info(...)` calls in your + brain. Minimum required detections: + + connecting — child opened a session to the model + listening — session connected OR a turn finished + hearing — user transcript arrived (APPEND to user_transcript) + interrupting — barge-in / model interrupted + error — fatal session error + stopped — clean shutdown + """ + # Example (replace with your brain's actual strings): + # + # if "connecting to OpenAI" in line: + # self._set_state("connecting", line) + # elif "session open" in line: + # self._set_state("listening", "Listening for speech.") + # elif "USER: " in line: + # text = line.split("USER: ", 1)[1].strip() + # if text: + # self.last_user_text = text + # self.user_transcript.append(text) + # self._set_state("hearing", f"User: {text}") + # elif "BARGE-IN" in line: + # self._set_state("interrupting", line) + # elif "session error" in line: + # self._set_state("error", line) + # elif "cancelled — stopping" in line: + # self._set_state("stopped", line) + raise NotImplementedError + + def _set_state(self, state: str, msg: str) -> None: + self.state = state + self.state_message = msg + + # ─── status + introspection ─────────────────────────── + + def status(self) -> dict: + with self._lock: + proc = self.process + running = proc is not None and proc.poll() is None + return { + "running": running, + "pid": proc.pid if running else None, + "state": self.state, + "state_message": self.state_message, + "last_user_text": self.last_user_text, + "log_tail": list(self.log_tail)[-50:], + "user_transcript": list(self.user_transcript), + } diff --git a/vendor/Sanad/voice/movement_dispatch.py b/vendor/Sanad/voice/movement_dispatch.py new file mode 100644 index 0000000..d9d010c --- /dev/null +++ b/vendor/Sanad/voice/movement_dispatch.py @@ -0,0 +1,390 @@ +"""MovementDispatcher — Gemini voice → locomotion (N2 Phase 3). + +The Marcus phrase-confirmation pattern, ported to Sanad. Gemini Live runs in a +child subprocess; the parent supervisor (gemini/subprocess.py) parses Gemini's +OWN spoken output into BOT: transcript lines and fires `on_bot_text(line)` here. + +Flow: + Gemini speaks a canonical confirmation phrase ("Turning right." / "أستدير + يميناً.") → on_bot_text() matches it against data/motions/instruction.json + → enqueues a canonical command → a worker thread drives loco_controller + (discrete steps that self-terminate with StopMove). + +Gating: every dispatch is gated on `recognition_state.movement_enabled` (the +"Enable Gemini movement" dashboard toggle) — SEPARATE from the manual +"Enable movement" arm flag (loco_controller._armed). When the flag is off, +matches are dropped. "stop" is honoured immediately (cross-thread abort + drain ++ E-STOP) regardless of cooldown. + +Safety: discrete `loco.step()` self-stops; velocity caps live in LocoController; +N-step / N-degree commands are bounded by instruction.json (max_steps, +max_degrees) and check the abort flag + enable gate between each step. Numbers +are kept verbatim but the degrees→steps and steps mapping is APPROXIMATE and +must be calibrated on the real robot. +""" + +from __future__ import annotations + +import json +import queue +import re +import threading +import time +from pathlib import Path +from typing import Any, List, Optional + +from Project.Sanad.core.logger import get_logger + +log = get_logger("movement_dispatch") + +_SENTINEL = object() +_STATE_CACHE_TTL = 0.5 # seconds — re-read recognition_state at most this often + +# Map a resolved canonical command to a LocoController discrete-step direction. +_FIXED_STEP = { + "move forward": "forward", + "move backward": "backward", + "turn right": "rotate_right", + "turn left": "rotate_left", + "slide left": "slide_left", + "slide right": "slide_right", +} + +# ── transcript cleaning (Marcus pattern) ────────────────────────────────────── +# Gemini's spoken text can CONTAIN our trigger phrases without intending a +# command — inside a question ("do you want me to move forward?"), a negation +# ("I'm not turning right"), a hypothetical ("I would be turning right"), a +# quote, or an echoed [STATE] tag. We drop those whole clauses before matching so +# only genuine confirmations actuate the robot. +_BRACKET_RE = re.compile(r"\[[^\]]*\]") # [STATE-DONE] echoes +_QUOTE_RE = re.compile(r"[\"'«»“”„‟‹›][^\"'«»“”„‟‹›]{0,80}?[\"'«»“”„‟‹›]") +_SENT_SPLIT_RE = re.compile(r"([.!?؟؛\n]+)") # keep delimiters +# NOTE: Arabic tokens are whitespace-delimited so we don't match a negation +# substring inside a real word — e.g. "ما" lives inside "أمام" (forward), "لا" +# inside many words. \b doesn't help for Arabic (all letters are \w), so we +# anchor on spaces/string-edges explicitly. +_NEG_RE = re.compile( + r"\b(?:not|never|without|cannot|would|could|should|might|instead|" + r"going to|want to|trying to|rather than)\b|\w+n['’]t\b|" + r"(?:^|\s)(?:لا|ما|لن|لم|مش|بدون|غير|لست|ليس|بدل)(?:\s|$)") + + +class MovementDispatcher: + def __init__(self, loco, instruction_path: Path, state_path: Path): + self._loco = loco + self._instruction_path = Path(instruction_path) + self._state_path = Path(state_path) + + self._queue: "queue.Queue[Any]" = queue.Queue(maxsize=32) + self._abort = threading.Event() + self._worker: Optional[threading.Thread] = None + self._running = False + # Hard-stop latch set by a dashboard E-STOP. Drops all voice commands + # until cleared (by re-enabling Gemini movement). Kept SEPARATE from the + # movement_enabled file flag so an E-STOP doesn't trigger the spoken + # "movement disabled" announcement. + self._estop = False + + # dedup / cooldown + self._last_canon = "" + self._last_at = 0.0 + + # cached enable-flag + self._enabled_cached = False + self._enabled_at = 0.0 + + # config (filled by _load) + self._cooldown = 1.5 + self._max_steps = 8 + self._max_degrees = 360 + self._deg_per_step = 15 + self._fixed_patterns: List[tuple] = [] # (needle_lower, canonical) sorted long→short + self._parametric: List[tuple] = [] # (compiled_regex, template) + + self._load() + + # ── instruction.json ───────────────────────────────────────────────────── + + def _load(self): + try: + data = json.loads(self._instruction_path.read_text(encoding="utf-8")) + except Exception as exc: + log.error("could not load %s: %s — dispatcher inert", self._instruction_path, exc) + data = {} + self._cooldown = float(data.get("command_cooldown_sec", 1.5)) + self._max_steps = int(data.get("max_steps", 8)) + self._max_degrees = int(data.get("max_degrees", 360)) + self._deg_per_step = max(1, int(data.get("degrees_per_step", 15))) + + needles: List[tuple] = [] + for spec in (data.get("actions") or {}).values(): + canonical = spec.get("canonical", "") + phrases = spec.get("bot_phrases", {}) or {} + for lang_list in phrases.values(): + for p in lang_list: + if p: + # English folded to lower; Arabic unaffected by .lower() + needles.append((p.lower(), canonical)) + # longest needle first so "walking forward" wins over "forward" + needles.sort(key=lambda t: len(t[0]), reverse=True) + self._fixed_patterns = needles + + self._parametric = [] + for pa in (data.get("parametric_actions") or []): + try: + self._parametric.append((re.compile(pa["regex"], re.IGNORECASE), pa["canonical"])) + except re.error as exc: + log.warning("bad parametric regex %r: %s", pa.get("regex"), exc) + log.info("instruction.json loaded: %d fixed phrases, %d parametric, cooldown=%.1fs", + len(self._fixed_patterns), len(self._parametric), self._cooldown) + + # ── lifecycle ───────────────────────────────────────────────────────────── + + def start(self): + if self._running: + return + self._running = True + self._worker = threading.Thread(target=self._worker_loop, daemon=True, + name="movement-dispatch") + self._worker.start() + log.info("movement dispatcher started") + + def stop(self): + self._running = False + self._abort.set() + try: + self._queue.put_nowait(_SENTINEL) + except queue.Full: + pass + + def status(self) -> dict: + return { + "running": self._running, + "movement_enabled": self._movement_enabled(force=True), + "estopped": self._estop, + "queue_depth": self._queue.qsize(), + "fixed_phrases": len(self._fixed_patterns), + "parametric": len(self._parametric), + } + + # ── E-STOP latch ────────────────────────────────────────────────────────── + + def emergency_stop(self): + """Latch off after a dashboard E-STOP: abort the in-flight command, drain + the queue, and refuse new commands until clear_estop(). Does NOT touch the + movement_enabled file flag (so the Gemini child stays quiet).""" + self._estop = True + self._abort.set() + self._drain() + log.warning("movement dispatch E-STOP latch set") + + def clear_estop(self): + self._estop = False + + def is_estopped(self) -> bool: + return self._estop + + # ── enable gate ─────────────────────────────────────────────────────────── + + def _movement_enabled(self, force: bool = False) -> bool: + now = time.monotonic() + if not force and (now - self._enabled_at) < _STATE_CACHE_TTL: + return self._enabled_cached + try: + from Project.Sanad.vision import recognition_state + self._enabled_cached = bool(recognition_state.read(self._state_path).movement_enabled) + except Exception: + self._enabled_cached = False + self._enabled_at = now + return self._enabled_cached + + # ── transcript hook (called from the supervisor reader thread) ──────────── + + def on_bot_text(self, text: str): + if not text or not self._running or self._estop: + return + if not self._movement_enabled(): + return + cmds = self._match(text) + if not cmds: + return + now = time.monotonic() # monotonic — immune to NTP/wall-clock jumps + for c in cmds: + if c == "stop": + # Safety: preempt anything in flight immediately, then E-STOP. + self._abort.set() + self._drain() + self._enqueue("stop") + self._last_canon = "stop" + self._last_at = now + continue + # cross-turn cooldown: same canonical not re-fired too soon + if c == self._last_canon and (now - self._last_at) < self._cooldown: + continue + self._last_canon = c + self._last_at = now + self._enqueue(c) + + def _enqueue(self, cmd: str): + try: + self._queue.put_nowait(cmd) + except queue.Full: + log.warning("motion queue full — dropping %r", cmd) + + def _drain(self): + try: + while True: + self._queue.get_nowait() + except queue.Empty: + pass + + # ── matcher ─────────────────────────────────────────────────────────────── + + def _clean(self, text: str) -> str: + """Drop clauses that are NOT genuine motion confirmations: bracketed + [STATE] echoes, quoted spans, questions, and negation/hypothetical + sentences. Only the surviving clauses are matched.""" + t = _BRACKET_RE.sub(" ", text) + t = _QUOTE_RE.sub(" ", t) + parts = _SENT_SPLIT_RE.split(t) + kept: List[str] = [] + i = 0 + while i < len(parts): + seg = parts[i].strip() + delim = parts[i + 1] if i + 1 < len(parts) else "" + is_question = ("?" in delim) or ("؟" in delim) + if seg and not is_question and not _NEG_RE.search(seg.lower()): + kept.append(seg) + i += 2 + return " . ".join(kept) + + def _match(self, text: str) -> List[str]: + """Return canonical commands in spoken order. Parametric (with numbers) + claim their spans first so a bare phrase doesn't double-fire.""" + low = self._clean(text).lower() + matches: List[tuple] = [] # (start, canonical) + claimed: List[tuple] = [] # (start, end) spans already taken + + def overlaps(s, e): + return any(s < ce and cs < e for cs, ce in claimed) + + # 1) parametric first. Claim the span even when the quantity is zero so a + # mis-heard "0 steps" suppresses the bare phrase underneath (no surprise + # motion) rather than falling through to a single step. + for rx, template in self._parametric: + for m in rx.finditer(low): + if overlaps(*m.span()): + continue + claimed.append(m.span()) + canonical = self._format(template, m.groups()) + if canonical: + matches.append((m.start(), canonical)) + + # 2) fixed phrases (longest first), skipping claimed spans + for needle, canonical in self._fixed_patterns: + start = 0 + while True: + j = low.find(needle, start) + if j < 0: + break + end = j + len(needle) + if not overlaps(j, end): + matches.append((j, canonical)) + claimed.append((j, end)) + start = end + + matches.sort(key=lambda t: t[0]) + # de-dup consecutive repeats within this single line + out: List[str] = [] + for _, c in matches: + if not out or out[-1] != c: + out.append(c) + return out + + @staticmethod + def _format(template: str, groups) -> str: + out = template + for i, g in enumerate(groups, start=1): + out = out.replace(f"${i}", str(g)) + # reject zero-quantity motions ("walk 0 steps") + nums = re.findall(r"\d+", out) + if nums and all(int(n) == 0 for n in nums): + return "" + return out + + # ── worker ──────────────────────────────────────────────────────────────── + + def _worker_loop(self): + while self._running: + cmd = self._queue.get() + if cmd is _SENTINEL: + return + if cmd != "stop": + self._abort.clear() + if self._estop: + continue # E-STOP latched — drop everything + # force a fresh read — don't let the 0.5s cache execute a command + # after the operator just toggled movement off. + if cmd != "stop" and not self._movement_enabled(force=True): + continue # toggled off while queued — drop + # Never step while Nav2 owns the legs (autonomous goal in progress). + # Two stacks driving at once is the exact hazard _arbiter guards. + # STOP always passes through (safety). Read-only check — manual loco + # uses acquire_loco; the discrete-step voice path must only YIELD. + if cmd != "stop" and self._nav_holds_legs(): + log.info("voice movement dropped — Nav2 owns the legs (%r)", cmd) + continue + try: + self._execute(cmd) + except Exception: + log.exception("execute %r failed", cmd) + + def _execute(self, canonical: str): + c = canonical.lower().strip() + if c == "stop": + log.info("voice → STOP") + self._loco.estop() + return + + m = re.match(r"walk (forward|backward) (\d+) steps?$", c) + if m: + direction = "forward" if m.group(1) == "forward" else "backward" + n = min(int(m.group(2)), self._max_steps) + log.info("voice → walk %s %d steps", direction, n) + self._repeat_step(direction, n) + return + + m = re.match(r"turn (right|left) (\d+) degrees?$", c) + if m: + direction = "rotate_right" if m.group(1) == "right" else "rotate_left" + deg = min(int(m.group(2)), self._max_degrees) + n = max(1, round(deg / self._deg_per_step)) + log.info("voice → turn %s %d° (~%d steps)", m.group(1), deg, n) + self._repeat_step(direction, n) + return + + direction = _FIXED_STEP.get(c) + if direction: + log.info("voice → %s", c) + self._loco.step(direction) + return + log.debug("no loco mapping for canonical %r", c) + + @staticmethod + def _nav_holds_legs() -> bool: + """True if Nav2 currently owns the legs (in-process arbiter). Lazy + import so a missing/absent dashboard package never breaks voice.""" + try: + from Project.Sanad.dashboard.routes import _arbiter + return _arbiter.nav_active() + except Exception: + return False + + def _repeat_step(self, direction: str, n: int): + for _ in range(max(1, n)): + if (self._abort.is_set() or self._estop + or not self._movement_enabled(force=True) + or self._nav_holds_legs()): + log.info("voice multi-step aborted") + break + self._loco.step(direction) diff --git a/vendor/Sanad/voice/sanad_voice.py b/vendor/Sanad/voice/sanad_voice.py new file mode 100644 index 0000000..4746c46 --- /dev/null +++ b/vendor/Sanad/voice/sanad_voice.py @@ -0,0 +1,461 @@ +#!/usr/bin/env python3 +"""Sanad voice subprocess — orchestrator. + +Wires three independently-swappable pieces together: + + 1. Audio I/O — voice/audio_io.py (mic + speaker) + 2. Turn recorder — TurnRecorder (in this file; model-agnostic WAV capture) + 3. Voice brain — gemini/script.py (Gemini, default — cloud) + local/script.py (offline — Whisper+Qwen+CosyVoice2) + voice/model_script.py (template for new models) + +Runtime selection: + SANAD_AUDIO_PROFILE = builtin | anker | hollyland_builtin (default builtin) + SANAD_VOICE_BRAIN = gemini | local | model (default gemini) + +Usage: + python3 voice/sanad_voice.py eth0 + python3 voice/sanad_voice.py eth0 --voice Charon + SANAD_AUDIO_PROFILE=anker SANAD_VOICE_BRAIN=gemini \\ + python3 voice/sanad_voice.py eth0 + +System prompt priority (first hit wins): + 1. scripts/sanad_script.txt (edit-live via the dashboard) + 2. config/core_config.json > gemini_defaults.default_system_prompt + 3. the hardcoded fallback in _load_system_prompt() below +""" + +from __future__ import annotations + +import array +import asyncio +import importlib +import json +import logging +import os +import sys +import tempfile +import threading +import time +import types +import wave +from datetime import datetime +from pathlib import Path + +# ───────────────────────────────────────────────────────────────────────────── +# Layout bootstrap — MUST run before any `Project.Sanad.*` import. +# This file runs as a standalone subprocess (spawned by gemini/subprocess.py +# or local/subprocess.py); it can't rely on main.py having set up sys.path. +# Mirrors the dev-vs-deployed detection in main.py. +# dev layout: /Project/Sanad/voice/sanad_voice.py +# deployed layout: /home/unitree/Sanad/voice/sanad_voice.py +# ───────────────────────────────────────────────────────────────────────────── +_SANAD_DIR = Path(__file__).resolve().parent.parent # .../Sanad +_SANAD_PARENT = _SANAD_DIR.parent # .../Project OR /home/unitree + +if _SANAD_PARENT.name == "Project": + _ROOT = _SANAD_PARENT.parent + if str(_ROOT) not in sys.path: + sys.path.insert(0, str(_ROOT)) +else: + if str(_SANAD_PARENT) not in sys.path: + sys.path.insert(0, str(_SANAD_PARENT)) + if "Project" not in sys.modules: + _proj = types.ModuleType("Project") + _proj.__path__ = [] # namespace package marker + sys.modules["Project"] = _proj + if "Project.Sanad" not in sys.modules: + _sanad = importlib.import_module(_SANAD_DIR.name) + sys.modules["Project.Sanad"] = _sanad + sys.modules["Project"].Sanad = _sanad # type: ignore[attr-defined] + +from unitree_sdk2py.core.channel import ChannelFactoryInitialize +from unitree_sdk2py.g1.audio.g1_audio_client import AudioClient + +from Project.Sanad.config import ( + GEMINI_VOICE, + RECEIVE_SAMPLE_RATE, + SCRIPTS_DIR, + SEND_SAMPLE_RATE, +) +from Project.Sanad.core.config_loader import section as _cfg_section +from Project.Sanad.voice.audio_io import AudioIO + +# ─── LOGGING ───────────────────────────────────────────── + +_LOG_CFG = _cfg_section("voice", "sanad_voice") +LOG_DIR = os.path.expanduser(_LOG_CFG.get("log_dir", "~/logs")) +os.makedirs(LOG_DIR, exist_ok=True) +_LOG_NAME = _LOG_CFG.get("log_name", "gemini_live_v2") +LOG_FILE = os.path.join(LOG_DIR, f"{_LOG_NAME}_{datetime.now():%Y%m%d}.log") + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s.%(msecs)03d [%(levelname)s] %(message)s", + datefmt="%H:%M:%S", + handlers=[ + logging.FileHandler(LOG_FILE), + logging.StreamHandler(), + ], +) +log = logging.getLogger("sanad_voice") + + +# ─── CONFIG ────────────────────────────────────────────── + +_REC = _cfg_section("voice", "recording") +_SCRIPTS = _cfg_section("core", "script_files") +_GEMINI_DEFAULTS = _cfg_section("core", "gemini_defaults") + +_PERSONA_FILE = SCRIPTS_DIR / _SCRIPTS.get("persona", "sanad_script.txt") + +RECORD_ENABLED = os.environ.get( + "SANAD_RECORD", + "1" if _REC.get("enabled", True) else "0", +) != "0" +_REC_DIR_REL = _REC.get("dir_relative", "data/recordings") +RECORD_DIR = Path(os.environ.get( + "SANAD_RECORD_DIR", + str(Path(__file__).resolve().parent.parent / _REC_DIR_REL), +)) + +_FALLBACK_SYSTEM_PROMPT = ( + "You are Marcus, a bilingual humanoid robot assistant made by YS Lootah " + "Technology, Dubai, UAE. RESPOND IN ARABIC (Gulf/Emirati dialect) OR " + "ENGLISH ONLY. YOU MUST RESPOND UNMISTAKABLY IN THE SAME LANGUAGE THE " + "USER SPEAKS. If the user speaks Arabic, you MUST reply in Arabic Gulf " + "dialect. If the user speaks English, you MUST reply in English. Do NOT " + "confuse Arabic with Japanese, Hindi, Russian, or any other language. " + "The user is speaking Arabic or English — nothing else. Be concise — 1 " + "to 2 sentences max. Be friendly and natural. If the user interrupts " + "and says 'continue' or 'كمل', resume EXACTLY where you stopped. Only " + "respond to clear human speech. Ignore background noise and silence " + "completely. Do not respond to sounds that are not words." +) + + +# N2 Phase 3 — movement confirmation-phrase rules. The parent's +# MovementDispatcher drives the robot off Gemini's OWN spoken phrases, so Gemini +# must say one of these EXACT short phrases (per motion) when it agrees to move. +# Kept in lock-step with data/motions/instruction.json. Always appended; Gemini +# is told at runtime ([MOVEMENT ON/OFF/STATUS]) whether movement is enabled and +# must only confirm motion when it is ON. +_MOVEMENT_PROMPT_RULES = ( + "\n\n--- MOVEMENT (walking) ---\n" + "You can make the robot walk ONLY when you are told movement is ON " + "(you receive a [MOVEMENT ON] or [MOVEMENT STATUS] note). When movement is " + "OFF, never confirm a motion — tell the user to enable movement from the " + "dashboard.\n" + "When movement is ON and the user addresses you by name (Bousandah / بوسنده) AND " + "asks you to move, reply with ONE short confirmation phrase per requested " + "motion, in the SAME language, in the order asked. Use these EXACT shapes — " + "they are what triggers the motion:\n" + " forward : 'Walking forward.' / 'أمشي للأمام.'\n" + " backward : 'Walking back.' / 'أمشي للخلف.'\n" + " turn right : 'Turning right.' / 'أستدير يميناً.'\n" + " turn left : 'Turning left.' / 'أستدير يساراً.'\n" + " slide left : 'Sliding left.' / 'أنزلق لليسار.'\n" + " slide right : 'Sliding right.' / 'أنزلق لليمين.'\n" + " stop : 'Stopping.' / 'أتوقف.'\n" + "With a NUMBER, keep it: 'Walking forward 3 steps.' / 'أمشي للأمام 3 خطوات.' " + "— 'Turning right 90 degrees.' / 'أستدير يميناً 90 درجة.'\n" + "STOP is safety-first: if the user clearly wants the robot to halt " + "(stop/halt/wait/توقف/استنى), confirm 'Stopping.' / 'أتوقف.' immediately, " + "even without your name.\n" + "Never emit bracketed tags like [STATE-DONE] or numbers in parentheses — " + "speak only plain prose. Never include 0 or a negative quantity; if you " + "mis-hear a 0, drop the number and say the bare motion." +) + +# Native function-calling: Gemini can DRIVE the robot to saved map places via +# the navigate_to_place / list_places / where_am_i / stop_navigation tools. The +# tool schemas are declared in the Live config; this block tells Gemini WHEN and +# HOW to use them, and the safety constraints. Appended only when nav tools are +# enabled (SANAD_NAV_TOOLS != 0). +_NAV_TOOLS_ENABLED = os.environ.get("SANAD_NAV_TOOLS", "1") != "0" +_NAV_PROMPT_RULES = ( + "\n\n--- NAVIGATION (autonomous driving to places) ---\n" + "You can autonomously DRIVE the robot to a saved place on the loaded map " + "using your tools. This is different from step-by-step walking above.\n" + "- When the user asks to GO/MOVE/TAKE them to a named place (e.g. 'go to " + "the kitchen', 'خذني للاستقبال'), call the navigate_to_place tool with the " + "place name. Do NOT say the walking phrases above for this — the tool does " + "the driving.\n" + "- You can only drive to places that exist in the CURRENTLY loaded map. If " + "you are unsure which places exist, call list_places first and offer them.\n" + "- If the tool returns reason 'no_map', tell the user to load a map first. " + "If 'movement_off', tell them to enable movement. If 'ambiguous' or " + "'unknown_place', read back the candidate names and ask which one.\n" + "- After a successful navigate_to_place, briefly say you're heading there — " + "but do NOT claim you have arrived. You will receive a [NAV ARRIVED] note " + "when you actually arrive (then tell the user), or [NAV FAILED] if you " + "could not reach it (then apologise and say why).\n" + "- To stop an in-progress drive, call stop_navigation.\n" + "Keep all of this in your normal Khaleeji style." +) + + +def _load_system_prompt() -> str: + """scripts/ → config default → hardcoded fallback, with the + movement confirmation-phrase rules appended (N2 Phase 3). + + A missing persona file used to be silent — that hid a config-vs-filename + mismatch (e.g. `persona: "sanad_v2"` while only `sanad_script.txt` + existed) which made the robot fall back to the English default that + introduces itself as "Sanad" instead of using the Arabic persona on + disk. We now WARN so the same trap doesn't bite again. + + The persona is resolved HERE (at session start), not at import — so the + operator's Scripts Manager selection (a sanad_script_v*.txt variant) is + picked up on the next voice (re)connect. Falls back to sanad_script.txt.""" + # Resolve the selected persona variant (or the default sanad_script.txt). + try: + from Project.Sanad.core.persona import active_persona_path + persona_file = active_persona_path() + except Exception: + persona_file = _PERSONA_FILE + base = None + try: + text = persona_file.read_text(encoding="utf-8-sig").strip() + if text: + base = text + log.info("persona loaded: %s", persona_file.name) + except FileNotFoundError: + log.warning( + "Persona file not found at %s — falling back to " + "config.core.gemini_defaults.default_system_prompt. " + "Check `script_files.persona` in config/core_config.json " + "matches an actual file under scripts/.", persona_file, + ) + except (OSError, UnicodeDecodeError) as exc: + # An existing-but-unreadable persona file (bad encoding, permissions, a + # directory) must NOT crash the voice child — fall back to the default. + log.warning("Persona file at %s could not be read (%s) — " + "falling back to default system prompt.", persona_file, exc) + if base is None: + base = _GEMINI_DEFAULTS.get("default_system_prompt", _FALLBACK_SYSTEM_PROMPT) + prompt = base + _MOVEMENT_PROMPT_RULES + if _NAV_TOOLS_ENABLED: + prompt += _NAV_PROMPT_RULES + return prompt + + +def _audio_energy(pcm: bytes) -> int: + try: + samples = array.array("h", pcm) + return sum(abs(s) for s in samples) // len(samples) if samples else 0 + except Exception: + return 0 + + +# ─── TURN RECORDER ────────────────────────────────────── + +class TurnRecorder: + """Saves each turn as two WAV files: user mic + model output. + + A turn starts when user audio starts flowing through `capture_user` + and ends on `finish_turn`. Files land in `RECORD_DIR` as + `_user.wav` (at `user_rate`) and `_robot.wav` + (at `robot_rate`). An `index.json` in the same directory tracks + every turn with timestamp + transcripts + durations for the dashboard. + """ + + def __init__(self, enabled: bool = True, out_dir: Path = RECORD_DIR, + user_rate: int = SEND_SAMPLE_RATE, + robot_rate: int = RECEIVE_SAMPLE_RATE): + self.enabled = enabled + self.out_dir = out_dir + self.user_rate = user_rate + self.robot_rate = robot_rate + if self.enabled: + self.out_dir.mkdir(parents=True, exist_ok=True) + self._lock = threading.Lock() + self._user_buf: list[bytes] = [] + self._robot_buf: list[bytes] = [] + self._user_text = "" + self._robot_text = "" + self._started_at: float = 0.0 + + def capture_user(self, pcm: bytes) -> None: + if not self.enabled or not pcm: + return + with self._lock: + if not self._user_buf and not self._robot_buf: + self._started_at = time.time() + self._user_buf.append(pcm) + + def capture_robot(self, pcm: bytes) -> None: + if not self.enabled or not pcm: + return + with self._lock: + if not self._user_buf and not self._robot_buf: + self._started_at = time.time() + self._robot_buf.append(pcm) + + def add_user_text(self, text: str) -> None: + if text and self.enabled: + with self._lock: + self._user_text = (self._user_text + " " + text).strip() + + def add_robot_text(self, text: str) -> None: + if text and self.enabled: + with self._lock: + self._robot_text = (self._robot_text + " " + text).strip() + + def finish_turn(self) -> dict: + if not self.enabled: + return {} + with self._lock: + user_data = b"".join(self._user_buf) + robot_data = b"".join(self._robot_buf) + user_text = self._user_text + robot_text = self._robot_text + started_at = self._started_at + self._user_buf.clear() + self._robot_buf.clear() + self._user_text = "" + self._robot_text = "" + + if not user_data and not robot_data: + return {} + + stamp = datetime.fromtimestamp(started_at).strftime("%Y%m%d_%H%M%S") + entry = {"timestamp": stamp, "started_at": started_at, + "user_text": user_text, "robot_text": robot_text} + try: + if user_data: + p = self.out_dir / f"{stamp}_user.wav" + self._save_wav(p, user_data, self.user_rate) + entry["user_wav"] = str(p) + entry["user_duration_sec"] = round( + len(user_data) / (self.user_rate * 2), 3) + if robot_data: + p = self.out_dir / f"{stamp}_robot.wav" + self._save_wav(p, robot_data, self.robot_rate) + entry["robot_wav"] = str(p) + entry["robot_duration_sec"] = round( + len(robot_data) / (self.robot_rate * 2), 3) + self._append_index(entry) + log.info("recorded turn → %s (user %.1fs, robot %.1fs)", + stamp, + entry.get("user_duration_sec", 0), + entry.get("robot_duration_sec", 0)) + except Exception as exc: + log.warning("recording save failed: %s", exc) + return entry + + @staticmethod + def _save_wav(path: Path, pcm: bytes, rate: int) -> None: + with wave.open(str(path), "wb") as wf: + wf.setnchannels(1) + wf.setsampwidth(2) + wf.setframerate(rate) + wf.writeframes(pcm) + + def _append_index(self, entry: dict) -> None: + idx_path = self.out_dir / "index.json" + try: + if idx_path.exists(): + payload = json.loads(idx_path.read_text(encoding="utf-8")) + if not isinstance(payload, dict): + payload = {"records": []} + else: + payload = {"records": []} + except Exception: + payload = {"records": []} + payload.setdefault("records", []).append(entry) + payload["total_records"] = len(payload["records"]) + # Atomic write (tempfile + os.replace) — an in-place write_text that is + # interrupted (the start_all.sh supervisor Ctrl+C-teardowns this voice + # child) can truncate index.json, so the next read falls back to an + # empty {"records": []} and silently drops all prior turn metadata. + # Mirrors voice/typed_replay._save_index. + fd, tmp = tempfile.mkstemp(dir=str(idx_path.parent), suffix=".tmp") + try: + with os.fdopen(fd, "w", encoding="utf-8") as f: + json.dump(payload, f, indent=2, ensure_ascii=False) + os.replace(tmp, idx_path) + except Exception: + try: + os.unlink(tmp) + except OSError: + pass + raise + + +# ─── BRAIN FACTORY ─────────────────────────────────────── + +def _build_brain(name: str, audio_io, recorder, voice: str, system_prompt: str): + name = (name or "").strip().lower() + if name in ("", "gemini"): + from Project.Sanad.gemini.script import GeminiBrain + return GeminiBrain(audio_io, recorder, voice, system_prompt) + if name == "local": + from Project.Sanad.local.script import LocalBrain + return LocalBrain(audio_io, recorder, voice, system_prompt) + if name == "model": + from Project.Sanad.voice.model_script import ModelBrain + return ModelBrain(audio_io, recorder, voice, system_prompt) + # To add a provider: import the module and return its brain class here. + raise ValueError(f"unknown voice brain: {name!r}") + + +# ─── MAIN ──────────────────────────────────────────────── + +def main() -> None: + if len(sys.argv) < 2: + print(__doc__) + sys.exit(1) + + iface = sys.argv[1] + voice = GEMINI_VOICE + if "--voice" in sys.argv: + voice = sys.argv[sys.argv.index("--voice") + 1] + + log.info("DDS on %s", iface) + ChannelFactoryInitialize(0, iface) + ac = AudioClient() + ac.SetTimeout(10.0) + ac.Init() + log.info("AudioClient ready") + + profile = os.environ.get("SANAD_AUDIO_PROFILE", "builtin") + audio = AudioIO.from_profile(profile, audio_client=ac) + audio.start() + log.info("audio profile=%s", audio.profile_id) + + # Sanity-check the mic before handing it to the brain + log.info("testing mic 2s...") + time.sleep(2) + test = audio.mic.read_chunk(1024) + e = _audio_energy(test) + log.info("mic energy=%d %s", e, "OK" if e > 0 else "SILENT") + + recorder = TurnRecorder(enabled=RECORD_ENABLED) + if RECORD_ENABLED: + log.info("recording enabled → %s", RECORD_DIR) + + system_prompt = _load_system_prompt() + brain_name = os.environ.get("SANAD_VOICE_BRAIN", "gemini") + brain = _build_brain(brain_name, audio, recorder, voice, system_prompt) + log.info("voice brain=%s voice=%s log=%s", brain_name, voice, LOG_FILE) + log.info("─" * 50) + + try: + asyncio.run(brain.run()) + except KeyboardInterrupt: + pass + except Exception as exc: + log.error("fatal: %s", exc) + finally: + log.info("stopping") + try: + brain.stop() + except Exception: + log.warning("brain.stop() failed", exc_info=True) + audio.stop() + log.info("stopped") + + +if __name__ == "__main__": + main() diff --git a/vendor/Sanad/voice/text_utils.py b/vendor/Sanad/voice/text_utils.py new file mode 100644 index 0000000..678a22e --- /dev/null +++ b/vendor/Sanad/voice/text_utils.py @@ -0,0 +1,341 @@ +"""Arabic text normalization and voice-command phrase matching. + +Ported from gemini_interact/sanad_text_utils.py — unified for Sanad. +""" + +from __future__ import annotations + +import re +from pathlib import Path +from typing import Any + +# Arabic diacritics (tashkeel) — stripped for matching. +_DIACRITICS_RE = re.compile(r"[\u0617-\u061A\u064B-\u0652\u0670\u06D6-\u06ED]") +_AR_PUNCT = re.compile(r"[؟،؛]") +_NON_WORD = re.compile(r"[^\w\u0600-\u06FF\s]", re.UNICODE) +_MULTI_WS = re.compile(r"\s+") + + +def normalize_arabic(text: str) -> str: + """Normalize Arabic + English text for matching.""" + s = text.strip().lower() + s = _AR_PUNCT.sub(" ", s) + s = _NON_WORD.sub(" ", s) + s = _MULTI_WS.sub(" ", s) + # Hamza variants → bare alif + s = s.replace("\u0623", "\u0627") # أ → ا + s = s.replace("\u0625", "\u0627") # إ → ا + s = s.replace("\u0622", "\u0627") # آ → ا + # Ta marbuta / alif maqsoora + s = s.replace("\u0629", "\u0647") # ة → ه + s = s.replace("\u0649", "\u064A") # ى → ي + # Tatweel + s = s.replace("\u0640", "") + # Strip diacritics last + s = _DIACRITICS_RE.sub("", s) + return s.strip() + + +def strip_diacritics(text: str) -> str: + return _DIACRITICS_RE.sub("", text) + + +def load_phrase_map(filepath: str | Path) -> dict[str, set[str]]: + """Load a phrase file mapping command names to trigger phrases. + + Format (per command): + WAKE_PHRASES_shake_hand = { + "مصافحه", "handshake", "shake hands", + } + + Returns: {"shake_hand": {"مصافحه", "handshake", ...}, ...} + """ + path = Path(filepath) + if not path.exists(): + return {} + + content = path.read_text(encoding="utf-8") + result: dict[str, set[str]] = {} + current_name: str | None = None + current_phrases: set[str] = set() + + for raw_line in content.splitlines(): + line = raw_line.strip() + if not line or line.startswith("#"): + continue + + # Header: WAKE_PHRASES_shake_hand = { + header_match = re.match(r"WAKE_PHRASES_(\w+)\s*=\s*\{", line) + if header_match: + if current_name and current_phrases: + result[current_name] = current_phrases + current_name = header_match.group(1) + current_phrases = set() + continue + + # Closing brace + if line == "}": + if current_name and current_phrases: + result[current_name] = current_phrases + current_name = None + current_phrases = set() + continue + + # Phrase line: "some phrase", + phrase_match = re.match(r'"([^"]+)"', line) + if phrase_match and current_name is not None: + phrase = normalize_arabic(phrase_match.group(1)) + if phrase: + current_phrases.add(phrase) + + if current_name and current_phrases: + result[current_name] = current_phrases + + return result + + +def match_phrase(text: str, phrase_sets: dict[str, set[str]]) -> str | None: + """Return the command name if normalized *text* matches any phrase set. + + Token-set matching: every word of the phrase must appear as a whole + word in *text*. Prevents short phrases (e.g. 'hi') from matching + longer words (e.g. 'this'). + """ + norm = normalize_arabic(text) + if not norm: + return None + text_tokens = set(norm.split()) + if not text_tokens: + return None + best_command: str | None = None + best_len = 0 + for command_name, phrases in phrase_sets.items(): + for phrase in phrases: + phrase_tokens = phrase.split() + if not phrase_tokens: + continue + if all(t in text_tokens for t in phrase_tokens): + if len(phrase) > best_len: + best_command = command_name + best_len = len(phrase) + return best_command + + +# ────────────────────── stateful ASR-buffer matcher ────────────────────── +# Port of gemini_interact/sanad_text_utils.py:_maybe_trigger_arm +# +# Why stateful: Gemini streams short ASR pieces like "مر", "حب", "ا" that +# need to be joined across ~2 s to match "مرحبا". This matcher buffers +# incoming transcript pieces, dedups repeats, and fires when any phrase +# in the wake set is found. + +import time +import asyncio +import threading + + +_YA_PREFIX_RE = re.compile(r"^يا\s*") + + +def _strip_ya_prefix(s: str) -> str: + s = (s or "").strip() + return _YA_PREFIX_RE.sub("", s).strip() + + +def _remove_al_prefix_words(text: str) -> str: + if not text: + return "" + out = [] + for w in text.split(): + if w.startswith("ال") and len(w) > 2: + out.append(w[2:]) + else: + out.append(w) + return " ".join(out).strip() + + +def _is_valid_text(s: str) -> bool: + has_ar = bool(re.search(r"[\u0600-\u06FF]", s or "")) + has_en = bool(re.search(r"[a-zA-Z]", s or "")) + return has_ar or has_en + + +def maybe_trigger_arm( + state: Any, + transcript_text: str, + wake_phrases: set[str], + *, + fire_on_wake_match: bool = True, + arm_trigger_fn=None, +) -> bool: + """Buffer-aware wake-phrase matcher. + + `state` is any object — attributes are lazily initialized on first use. + Suitable targets: a session dataclass, or even a plain `types.SimpleNamespace`. + + On match: + - Clears ASR buffer to avoid re-trigger on next chunk + - If fire_on_wake_match: runs arm_trigger_fn in a background thread + immediately (wrapped in asyncio.to_thread if in a loop, else + threading.Thread) + - If not fire_on_wake_match: marks _pending_arm_wave=True so the + caller can fire it on turn_complete + + Returns True if a phrase fired, False otherwise. + """ + if not transcript_text or not wake_phrases: + return False + + # ── lazy state init ──────────────────────────────────────── + for attr, default in ( + ("_asr_buf", ""), ("_asr_last_time", 0.0), + ("ASR_WINDOW_SEC", 2.0), ("ASR_SHORT_TOKEN_BONUS_SEC", 1.0), + ("ASR_JOIN_NO_SPACE_MAXLEN", 2), ("ASR_MAX_CHARS", 120), + ("_last_trigger_norm", ""), ("_last_trigger_time", 0.0), + ("TRIGGER_DEDUP_WINDOW", 2.0), + ("_pending_arm_wave", False), ("_pending_arm_wave_fired", False), + ("_pending_arm_wave_set_time", 0.0), ("PENDING_ARM_TTL", 6.0), + ("_pending_arm_trigger_fn", None), ("_pending_arm_fallback_time", 0.0), + ("_last_piece_call_norm", ""), ("_last_piece_call_time", 0.0), + ("_asr_stream", ""), ("ASR_STREAM_MAX_CHARS", 80), + ): + if not hasattr(state, attr): + setattr(state, attr, default) + + dup_call_window = float(getattr(state, "DUP_CALL_WINDOW_SEC", 0.25)) + dup_asr_repeat_window = float(getattr(state, "DUP_ASR_REPEAT_WINDOW_SEC", 0.9)) + pending_fallback_sec = float(getattr(state, "PENDING_ARM_FALLBACK_SEC", 0.65)) + + piece_raw = transcript_text.strip() + if not piece_raw: + return False + + piece_norm = normalize_arabic(piece_raw) + if not piece_norm or not _is_valid_text(piece_norm): + return False + + now = time.time() + + duplicate_call = ( + piece_norm == state._last_piece_call_norm + and (now - state._last_piece_call_time) < dup_call_window + ) + repeated_asr = ( + piece_norm == state._last_piece_call_norm + and (now - state._last_piece_call_time) < dup_asr_repeat_window + ) + + state._last_piece_call_norm = piece_norm + state._last_piece_call_time = now + + # Buffer update + if not duplicate_call and not repeated_asr: + if state._asr_last_time: + gap = now - state._asr_last_time + window = state.ASR_WINDOW_SEC + if len(piece_norm) <= state.ASR_JOIN_NO_SPACE_MAXLEN: + window += state.ASR_SHORT_TOKEN_BONUS_SEC + if gap > window: + state._asr_buf = "" + state._asr_stream = "" + + state._asr_last_time = now + + # Join logic — no-space for very short pieces + if state._asr_buf: + if len(piece_norm) <= state.ASR_JOIN_NO_SPACE_MAXLEN: + state._asr_buf = (state._asr_buf + piece_norm).strip() + else: + state._asr_buf = (state._asr_buf + " " + piece_norm).strip() + else: + state._asr_buf = piece_norm + + compact = piece_norm.replace(" ", "") + state._asr_stream = (state._asr_stream + compact)[-state.ASR_STREAM_MAX_CHARS:] + if len(state._asr_buf) > state.ASR_MAX_CHARS: + state._asr_buf = state._asr_buf[-state.ASR_MAX_CHARS:] + + buf_norm = normalize_arabic(state._asr_buf) + buf_nospace = buf_norm.replace(" ", "") + buf_noal = _remove_al_prefix_words(buf_norm) + buf_noal_nospace = buf_noal.replace(" ", "") + stream = normalize_arabic(state._asr_stream).replace(" ", "") + stream_noal = _remove_al_prefix_words(stream) + + # Dedup — don't fire same buffer twice within TRIGGER_DEDUP_WINDOW + if (buf_norm == state._last_trigger_norm + and (now - state._last_trigger_time) < state.TRIGGER_DEDUP_WINDOW): + return False + + # Match loop + for phrase in wake_phrases: + p_norm = _strip_ya_prefix(normalize_arabic(str(phrase))) + if not p_norm: + continue + p_nospace = p_norm.replace(" ", "") + p_noal = _remove_al_prefix_words(p_norm) + p_noal_nospace = p_noal.replace(" ", "") + + pattern = r"\b" + re.escape(p_norm) + r"\b" + hit_buf = bool(re.search(pattern, buf_norm)) \ + or (p_nospace and p_nospace == buf_nospace) \ + or (p_noal and (p_noal in buf_noal + or (p_noal_nospace and p_noal_nospace in buf_noal_nospace))) + + hit_stream = bool(p_nospace and p_nospace in stream) \ + or bool(p_noal_nospace and p_noal_nospace in stream_noal) + + if hit_buf or hit_stream: + state._last_trigger_norm = buf_norm + state._last_trigger_time = now + state._asr_buf = "" + state._asr_last_time = 0.0 + state._asr_stream = "" + + if fire_on_wake_match: + if arm_trigger_fn: + _fire_arm_trigger(arm_trigger_fn) + state._pending_arm_wave = False + state._pending_arm_wave_fired = False + state._pending_arm_wave_set_time = 0.0 + state._pending_arm_trigger_fn = None + state._pending_arm_fallback_time = 0.0 + else: + state._pending_arm_wave = True + state._pending_arm_wave_fired = False + state._pending_arm_wave_set_time = now + state._pending_arm_trigger_fn = arm_trigger_fn + state._pending_arm_fallback_time = now + pending_fallback_sec + + return True + + return False + + +def _fire_arm_trigger(fn) -> None: + """Run the arm trigger callback in a background thread, regardless + of whether we're inside an asyncio loop.""" + try: + asyncio.get_running_loop() + asyncio.create_task(asyncio.to_thread(fn)) + except RuntimeError: + threading.Thread(target=fn, daemon=True).start() + + +def load_arm_phrase_dispatch( + sanad_arm_txt: str | Path, + option_list: list, +) -> dict[int, set[str]]: + """Build {action_id: set_of_phrases} from sanad_arm.txt × OPTION_LIST. + + Each OPTION has .id and .name. The sanad_arm.txt file defines + WAKE_PHRASES_. + """ + phrase_map = load_phrase_map(sanad_arm_txt) # {name_var: set[phrase]} + dispatch: dict[int, set[str]] = {} + for opt in option_list: + var = opt.name.replace(" ", "_").replace("-", "_") + phrases = phrase_map.get(var) + if phrases: + dispatch[opt.id] = phrases + return dispatch diff --git a/vendor/Sanad/voice/typed_replay.py b/vendor/Sanad/voice/typed_replay.py new file mode 100644 index 0000000..b102588 --- /dev/null +++ b/vendor/Sanad/voice/typed_replay.py @@ -0,0 +1,783 @@ +"""Typed Replay Engine — send text to Gemini, play audio, capture + persist. + +Full-featured port of gemini_voice_v2/sanad_webserver.py's SanadReplayEngine: + - Generate audio via GeminiVoiceClient (reuses existing WebSocket client) + - Play via PulseAudio + optionally capture speaker output (what was actually + heard) via parec or PyAudio monitor-source + - Save two WAVs per record: speaker capture + Gemini raw output + - JSON record index with rename/delete/replay + - In-memory "last session" for quick replay without re-hitting Gemini +""" + +from __future__ import annotations + +import asyncio +import json +import os +import re +import shutil +import subprocess +import tempfile +import threading +import time +import wave +from dataclasses import asdict, dataclass, field +from datetime import datetime +from pathlib import Path +from typing import Any, Optional + +from Project.Sanad.config import ( + AUDIO_RECORDINGS_DIR, + CHANNELS, + CHUNK_SIZE, + RECEIVE_SAMPLE_RATE, + SINK as DEFAULT_SINK, + SOURCE as DEFAULT_SOURCE, + MONITOR_SOURCE as DEFAULT_MONITOR_SOURCE, +) +from Project.Sanad.core.logger import get_logger + +try: + import pyaudio +except ImportError: + pyaudio = None # degraded mode — can still generate, but not capture/play + +log = get_logger("typed_replay") + + +# ─── constants (from config/voice_config.json) ────────────────────── + +try: + from Project.Sanad.core.config_loader import section as _cfg_section + _TR = _cfg_section("voice", "typed_replay") +except Exception: + _TR = {} + +RECORD_INDEX_PATH = AUDIO_RECORDINGS_DIR / "records.json" +MONITOR_CHUNK_SIZE = _TR.get("monitor_chunk_size", CHUNK_SIZE) +MONITOR_TAIL_SEC = _TR.get("monitor_tail_sec", 0.2) +MAX_TEXT_LEN = _TR.get("max_text_len", 2000) + + +# ─── helpers ───────────────────────────────────────────────────────── + +def format_timestamp(dt: Optional[datetime] = None) -> str: + return (dt or datetime.now()).strftime("%Y-%m-%d %H:%M:%S") + + +def sanitize_record_name(name: str) -> str: + name = (name or "").strip() or f"record_{datetime.now():%Y%m%d_%H%M%S}" + name = re.sub(r"[^\w\-\u0600-\u06FF\s\.]", "_", name, flags=re.UNICODE) + name = re.sub(r"\s+", "_", name) + return name[:80] + + +def build_default_name(text: str) -> str: + stub = re.sub(r"\s+", "_", (text or "").strip()) + stub = re.sub(r"[^\w\u0600-\u06FF]", "", stub, flags=re.UNICODE) + stub = stub[:40] or "record" + stamp = datetime.now().strftime("%Y%m%d_%H%M%S") + return f"{stub}_{stamp}" + + +def audio_duration_seconds(pcm: bytes, sample_rate: int, channels: int, + sample_width: int) -> float: + if not pcm or sample_rate <= 0 or channels <= 0 or sample_width <= 0: + return 0.0 + return len(pcm) / (sample_rate * channels * sample_width) + + +def ensure_unique_record_stem(base_name: str, out_dir: Path) -> Path: + out_dir.mkdir(parents=True, exist_ok=True) + candidate = out_dir / sanitize_record_name(base_name) + counter = 0 + while True: + speaker = candidate.with_suffix(".wav") + raw = candidate.with_name(f"{candidate.name}_raw.wav") + if not speaker.exists() and not raw.exists(): + return candidate + counter += 1 + candidate = out_dir / f"{sanitize_record_name(base_name)}_{counter}" + + +def run_pactl(args: list[str]) -> subprocess.CompletedProcess[str]: + return subprocess.run( + ["pactl", *args], check=True, text=True, + capture_output=True, timeout=5, + ) + + +# ─── monitor recorders (speaker output capture) ────────────────────── + +class MonitorRecorder: + """Capture speaker output via PyAudio on the monitor source.""" + + def __init__(self, pya, device_config: dict[str, Any]): + self.pya = pya + self.device_config = device_config + self.frames: list[bytes] = [] + self._stop_event = threading.Event() + self._thread: Optional[threading.Thread] = None + self._stream = None + self._error: Optional[BaseException] = None + + def start(self): + if pyaudio is None: + raise RuntimeError("pyaudio unavailable — cannot capture speaker") + self._stop_event.clear() + self.frames = [] + self._stream = self.pya.open( + format=pyaudio.paInt16, + channels=self.device_config["channels"], + rate=self.device_config["rate"], + input=True, + input_device_index=self.device_config["index"], + frames_per_buffer=self.device_config["chunk_size"], + ) + self._thread = threading.Thread(target=self._loop, daemon=True) + self._thread.start() + time.sleep(0.05) + + def _loop(self): + while not self._stop_event.is_set(): + try: + data = self._stream.read( + self.device_config["chunk_size"], exception_on_overflow=False) + self.frames.append(data) + except Exception as exc: + if not self._stop_event.is_set(): + self._error = exc + break + + def stop(self) -> bytes: + time.sleep(MONITOR_TAIL_SEC) + self._stop_event.set() + if self._stream is not None: + try: + self._stream.stop_stream() + except Exception: + pass + try: + self._stream.close() + except Exception: + pass + if self._thread is not None: + self._thread.join(timeout=1.0) + if self._error is not None: + raise RuntimeError(f"Speaker capture failed: {self._error}") + return b"".join(self.frames) + + +class ParecMonitorRecorder: + """Capture speaker output via `parec` (PulseAudio CLI).""" + + def __init__(self, device_config: dict[str, Any]): + self.device_config = device_config + self.frames: list[bytes] = [] + self._stop_event = threading.Event() + self._thread: Optional[threading.Thread] = None + self._proc: Optional[subprocess.Popen[bytes]] = None + self._error: Optional[BaseException] = None + + def start(self): + cmd = [ + "parec", + f"--device={self.device_config['name']}", + "--format=s16le", + f"--rate={self.device_config['rate']}", + f"--channels={self.device_config['channels']}", + ] + self._proc = subprocess.Popen( + cmd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) + self._thread = threading.Thread(target=self._loop, daemon=True) + self._thread.start() + time.sleep(0.05) + + def _loop(self): + if self._proc is None or self._proc.stdout is None: + self._error = RuntimeError("parec did not start") + return + size = self.device_config["chunk_size"] * self.device_config["channels"] * 2 + while not self._stop_event.is_set(): + try: + data = self._proc.stdout.read(size) + if data: + self.frames.append(data) + continue + if self._proc.poll() is not None: + break + except Exception as exc: + if not self._stop_event.is_set(): + self._error = exc + break + + def stop(self) -> bytes: + time.sleep(MONITOR_TAIL_SEC) + self._stop_event.set() + if self._proc is not None and self._proc.poll() is None: + self._proc.terminate() + try: + self._proc.wait(timeout=1.0) + except subprocess.TimeoutExpired: + self._proc.kill() + if self._thread is not None: + self._thread.join(timeout=1.0) + if self._error is not None: + raise RuntimeError(f"parec capture failed: {self._error}") + return b"".join(self.frames) + + +# ─── session state ────────────────────────────────────────────────── + +@dataclass +class ReplaySessionState: + """Last generation kept in memory for replay/save-last.""" + text: str = "" + audio_bytes: bytes = b"" + speaker_capture: bytes = b"" + generated_at: str = "" + last_playback_at: str = "" + replay_count: int = 0 + saved_as: str = "" + + def as_status(self) -> dict[str, Any]: + return { + "text": self.text, + "has_audio": bool(self.audio_bytes), + "has_capture": bool(self.speaker_capture), + "generated_at": self.generated_at, + "last_playback_at": self.last_playback_at, + "replay_count": self.replay_count, + "saved_as": self.saved_as, + } + + +# ─── record index ─────────────────────────────────────────────────── + +def _load_index() -> dict[str, Any]: + if not RECORD_INDEX_PATH.exists(): + return {"total_records": 0, "records": []} + try: + payload = json.loads(RECORD_INDEX_PATH.read_text(encoding="utf-8")) + if not isinstance(payload, dict) or not isinstance(payload.get("records"), list): + raise ValueError("bad index structure") + payload.setdefault("total_records", len(payload["records"])) + return payload + except Exception as exc: + log.warning("record index unreadable, resetting: %s", exc) + return {"total_records": 0, "records": []} + + +def _save_index(payload: dict[str, Any]): + RECORD_INDEX_PATH.parent.mkdir(parents=True, exist_ok=True) + fd, tmp = tempfile.mkstemp(dir=str(RECORD_INDEX_PATH.parent), + suffix=".tmp") + try: + with os.fdopen(fd, "w", encoding="utf-8") as f: + json.dump(payload, f, indent=2, ensure_ascii=False) + os.replace(tmp, RECORD_INDEX_PATH) + except Exception: + try: + os.unlink(tmp) + except OSError: + pass + raise + + +def _resolve_record_path(path_str: str) -> Path: + """Resolve a path from the records index. + + Paths in records.json can be either: + - absolute (legacy — may be stale after scp to another machine) + - relative / basename — looked up under AUDIO_RECORDINGS_DIR + """ + if not path_str: + return AUDIO_RECORDINGS_DIR + p = Path(path_str) + if p.is_absolute(): + return p + return AUDIO_RECORDINGS_DIR / p + + +def _reconcile_index(payload: dict[str, Any]) -> dict[str, Any]: + """Drop records whose files no longer exist on disk.""" + surviving: list[dict[str, Any]] = [] + for entry in payload.get("records", []): + try: + speaker = _resolve_record_path( + entry["files"]["speaker_recording"]["path"]) + if speaker.exists(): + surviving.append(entry) + except (KeyError, TypeError): + continue + payload["records"] = surviving + payload["total_records"] = len(surviving) + return payload + + +def _build_file_info(path: Path, pcm: bytes, rate: int, + channels: int, sample_width: int) -> dict[str, Any]: + """Build a records.json file entry with a portable relative path. + + `path` can be an absolute path on disk — we store just the basename + so the index is portable across workstation ↔ robot. + """ + return { + "name": path.name, + "path": path.name, # basename only — resolved via _resolve_record_path + "size_bytes": len(pcm), + "sample_rate": rate, + "channels": channels, + "sample_width_bytes": sample_width, + "duration_seconds": round( + audio_duration_seconds(pcm, rate, channels, sample_width), 3), + } + + +# ─── engine ───────────────────────────────────────────────────────── + +class TypedReplayEngine: + """Full-featured typed replay — generate, play, capture, save, replay.""" + + def __init__(self, voice_client, audio_mgr): + """voice_client: GeminiVoiceClient audio_mgr: AudioManager""" + self.voice_client = voice_client + self.audio_mgr = audio_mgr + self.session = ReplaySessionState() + self._gen_lock = threading.Lock() + self._play_lock = threading.Lock() + self._monitor_config = self._resolve_monitor_config() + AUDIO_RECORDINGS_DIR.mkdir(parents=True, exist_ok=True) + + # ── monitor config ─────────────────────────────────────────── + def _resolve_monitor_config(self) -> Optional[dict[str, Any]]: + """Pick the backend for capturing speaker output. + + Priority: + 1. parec (cleanest — just listens to the speaker monitor source) + 2. PyAudio input device matching 'pulse' or 'default' + 3. None → capture disabled (generation still works) + """ + if shutil.which("parec"): + log.info("speaker capture: parec monitor=%s", DEFAULT_MONITOR_SOURCE) + return { + "backend": "parec", + "name": DEFAULT_MONITOR_SOURCE, + "rate": RECEIVE_SAMPLE_RATE, + "channels": CHANNELS, + "chunk_size": MONITOR_CHUNK_SIZE, + } + if pyaudio is None: + log.warning("speaker capture disabled — no parec and no pyaudio") + return None + try: + pya = self.audio_mgr.pya if self.audio_mgr else pyaudio.PyAudio() + except Exception: + return None + for i in range(pya.get_device_count()): + info = pya.get_device_info_by_index(i) + name = str(info.get("name", "")).lower() + if ("pulse" in name or "default" in name) and int(info.get("maxInputChannels", 0)) > 0: + log.info("speaker capture: pyaudio device=%s", info.get("name")) + return { + "backend": "pyaudio", + "index": i, + "name": str(info.get("name")), + "rate": int(info.get("defaultSampleRate", RECEIVE_SAMPLE_RATE)), + "channels": max(1, min(2, int(info.get("maxInputChannels", 1)))), + "chunk_size": MONITOR_CHUNK_SIZE, + } + log.warning("speaker capture disabled — no pulse/default pyaudio device") + return None + + def sample_width(self) -> int: + if pyaudio is None or self.audio_mgr is None or self.audio_mgr.pya is None: + return 2 # int16 + return self.audio_mgr.pya.get_sample_size(pyaudio.paInt16) + + # ── generation ─────────────────────────────────────────────── + + async def generate_audio(self, text: str) -> tuple[bytes, list[str]]: + """Route typed text through Gemini Live as the voice, first-try reliable. + + The session's system-prompt sets a persona ("You are Sanad…"), + so the prompt that most reliably gets audio out is a direct + address to the persona with the quoted text. A transparent + retry chain covers the edge cases where the model still + replies with text only. + """ + stripped = text.strip() + if not stripped: + raise ValueError("text cannot be empty") + + if self.voice_client is None: + raise RuntimeError("voice_client unavailable") + if not self.voice_client.connected: + await self.voice_client.connect() + + # The voice_client now runs the multilingual VERBATIM TTS prompt + # (gemini/client.TTS_SYSTEM_PROMPT), so the text is read back in its own + # language. Send the raw text first (cleanest — no wrapper to bias the + # language); fall back to explicit, language-neutral verbatim asks. The + # old attempt-1 wrapped in Arabic, which dragged every line to Arabic. + attempts = [ + stripped, + f'Read this aloud word for word, in its original language, nothing else:\n{stripped}', + f'"{stripped}"', + ] + last_parts: list[str] = [] + for idx, wrapped in enumerate(attempts, start=1): + try: + audio_bytes, text_parts = await self.voice_client.send_text( + wrapped, owner="typed_replay") + except Exception as exc: + log.warning("Gemini TTS attempt %d failed: %s", idx, exc) + continue + if audio_bytes: + if idx > 1: + log.info("Gemini TTS succeeded on attempt %d", idx) + return audio_bytes, text_parts + last_parts = text_parts + log.warning("Gemini TTS attempt %d returned no audio — parts: %s", + idx, " | ".join(text_parts or [])[:120]) + return b"", last_parts + + # ── playback + capture ─────────────────────────────────────── + def play_audio(self, audio_bytes: bytes, capture_speaker: bool) -> bytes: + """Play Gemini PCM through the G1 chest speaker (via DDS) when + available; fall back to the host PulseAudio sink otherwise. + + The DDS path is audible on the robot; the PulseAudio path goes + to the Jetson's built-in audio codec, which isn't wired to any + audible output on the G1. `capture_speaker` is only supported + on the PulseAudio path (DDS has no monitor sink). + """ + if not audio_bytes: + return b"" + if self.audio_mgr is None: + raise RuntimeError("audio_mgr unavailable — cannot play") + + # Try the G1 chest speaker first + g1_client = None + try: + g1_client = self.audio_mgr._get_g1_audio_client() + except Exception: + g1_client = None + + if g1_client is not None: + if capture_speaker: + log.info("capture_speaker requested, but G1 DDS path has no " + "PulseAudio monitor — capture skipped") + with self._play_lock: + try: + self.audio_mgr._play_pcm_via_g1( + audio_bytes, CHANNELS, RECEIVE_SAMPLE_RATE, + ) + return b"" + except Exception as exc: + log.warning("G1 speaker playback failed, falling back " + "to host PulseAudio sink: %s", exc) + # Fall through to the PyAudio path below + + if self.audio_mgr.pya is None: + raise RuntimeError("audio_mgr has no PyAudio backend for fallback") + + # Fallback: host PulseAudio sink (inaudible on G1, but keeps the + # capture-speaker feature working on desktop/dev setups). + with self._play_lock: + recorder = None + restore_source = False + if capture_speaker and self._monitor_config is not None: + if self._monitor_config["backend"] == "parec": + recorder = ParecMonitorRecorder(self._monitor_config) + else: + recorder = MonitorRecorder(self.audio_mgr.pya, self._monitor_config) + try: + run_pactl(["set-default-source", self._monitor_config["name"]]) + restore_source = True + except Exception as exc: + log.warning("couldn't switch default source to monitor: %s", exc) + + # Outer try/finally guarantees the default source is restored even + # if pya.open()/recorder.start()/stream.write() raises — otherwise + # the machine's default mic stays pointed at the speaker monitor. + try: + stream = None + try: + stream = self.audio_mgr.pya.open( + format=pyaudio.paInt16, + channels=CHANNELS, + rate=RECEIVE_SAMPLE_RATE, + output=True, + frames_per_buffer=CHUNK_SIZE, + ) + if recorder is not None: + recorder.start() + frame_bytes = CHUNK_SIZE * 2 + for offset in range(0, len(audio_bytes), frame_bytes): + stream.write(audio_bytes[offset:offset + frame_bytes]) + finally: + if stream is not None: + try: + stream.stop_stream() + finally: + stream.close() + + captured = b"" + if recorder is not None: + captured = recorder.stop() + return captured + finally: + if restore_source: + try: + run_pactl(["set-default-source", DEFAULT_SOURCE]) + except Exception as exc: + log.warning("couldn't restore default source: %s", exc) + + def save_audio(self, pcm: bytes, path: Path, channels: int, rate: int) -> None: + with wave.open(str(path), "wb") as wf: + wf.setnchannels(channels) + wf.setsampwidth(self.sample_width()) + wf.setframerate(rate) + wf.writeframes(pcm) + + # ── high-level API ─────────────────────────────────────────── + async def say(self, text: str, record: bool = False, + record_name: str = "") -> dict[str, Any]: + """Generate, play, capture, return metadata. Optionally persist.""" + if not text or not text.strip(): + raise ValueError("text cannot be empty") + if not self._gen_lock.acquire(blocking=False): + raise RuntimeError("another typed-replay generation is in progress") + try: + audio_bytes, text_parts = await self.generate_audio(text) + if not audio_bytes: + raise RuntimeError("Gemini returned no audio — parts: " + + " | ".join(text_parts or [])) + + generated_at = format_timestamp() + # Play + capture in a worker thread (PyAudio is sync) + captured = await asyncio.to_thread( + self.play_audio, audio_bytes, record) + playback_finished_at = format_timestamp() + + # Update session state + self.session.text = text + self.session.audio_bytes = audio_bytes + self.session.speaker_capture = captured + self.session.generated_at = generated_at + self.session.last_playback_at = playback_finished_at + self.session.replay_count = 1 + self.session.saved_as = "" + + result = { + "ok": True, + "text": text, + "gemini_text": text_parts, + "generated_at": generated_at, + "playback_finished_at": playback_finished_at, + "raw_duration_sec": round( + audio_duration_seconds(audio_bytes, RECEIVE_SAMPLE_RATE, + CHANNELS, self.sample_width()), 3), + "captured_speaker_bytes": len(captured), + "recorded": False, + } + + if record: + entry = self._persist_session(record_name or build_default_name(text)) + self.session.saved_as = entry["record_name"] + result["record"] = entry + result["recorded"] = True + + return result + finally: + self._gen_lock.release() + + def replay_last(self) -> dict[str, Any]: + """Re-play the cached audio without hitting Gemini.""" + if not self.session.audio_bytes: + raise RuntimeError("no cached generation — call say() first") + captured = self.play_audio(self.session.audio_bytes, capture_speaker=False) + self.session.replay_count += 1 + self.session.last_playback_at = format_timestamp() + return { + "ok": True, + "replay_count": self.session.replay_count, + "text": self.session.text, + "played_at": self.session.last_playback_at, + } + + def save_last(self, record_name: str = "") -> dict[str, Any]: + """Persist the last generation to the records index.""" + if not self.session.audio_bytes: + raise RuntimeError("no cached generation — call say() first") + entry = self._persist_session(record_name or build_default_name(self.session.text)) + self.session.saved_as = entry["record_name"] + return entry + + def _persist_session(self, record_name: str) -> dict[str, Any]: + base = ensure_unique_record_stem(record_name, AUDIO_RECORDINGS_DIR) + speaker_path = base.with_suffix(".wav") + raw_path = base.with_name(f"{base.name}_raw.wav") + + capture = self.session.speaker_capture + audio = self.session.audio_bytes + sw = self.sample_width() + + if capture: + cap_rate = (self._monitor_config or {}).get("rate", RECEIVE_SAMPLE_RATE) + cap_channels = (self._monitor_config or {}).get("channels", CHANNELS) + self.save_audio(capture, speaker_path, cap_channels, cap_rate) + else: + # No capture available → save raw as speaker too so every record + # has a .wav file for reconciliation checks. + self.save_audio(audio, speaker_path, CHANNELS, RECEIVE_SAMPLE_RATE) + cap_rate = RECEIVE_SAMPLE_RATE + cap_channels = CHANNELS + capture = audio + + self.save_audio(audio, raw_path, CHANNELS, RECEIVE_SAMPLE_RATE) + + entry = { + "record_name": base.name, + "text": self.session.text, + "replay_count": self.session.replay_count, + "timeline": { + "audio_generated_at": self.session.generated_at, + "last_playback_finished_at": self.session.last_playback_at, + "saved_at": format_timestamp(), + }, + "audio_capture": { + "backend": (self._monitor_config or {}).get("backend", "none"), + "sink": DEFAULT_SINK, + "monitor_source": DEFAULT_MONITOR_SOURCE, + "restored_microphone_source": DEFAULT_SOURCE, + }, + "files": { + "speaker_recording": _build_file_info( + speaker_path, capture, cap_rate, cap_channels, sw), + "gemini_raw_output": _build_file_info( + raw_path, audio, RECEIVE_SAMPLE_RATE, CHANNELS, sw), + }, + } + payload = _reconcile_index(_load_index()) + payload["records"].append(entry) + payload["total_records"] = len(payload["records"]) + _save_index(payload) + log.info("saved record %s (%.1fs speaker, %.1fs raw)", + base.name, + entry["files"]["speaker_recording"]["duration_seconds"], + entry["files"]["gemini_raw_output"]["duration_seconds"]) + return entry + + # ── records CRUD ───────────────────────────────────────────── + def list_records(self) -> dict[str, Any]: + return _reconcile_index(_load_index()) + + def find_record(self, name: str) -> dict[str, Any]: + for e in _load_index().get("records", []): + if e.get("record_name") == name: + return e + raise KeyError(f"record not found: {name}") + + def rename_record(self, name: str, new_name: str) -> dict[str, Any]: + new_name = sanitize_record_name(new_name) + if not new_name: + raise ValueError("new_name empty after sanitize") + payload = _reconcile_index(_load_index()) + target = None + for e in payload["records"]: + if e.get("record_name") == name: + target = e + break + if target is None: + raise KeyError(f"record not found: {name}") + if any(e.get("record_name") == new_name for e in payload["records"]): + raise ValueError(f"a record named {new_name} already exists") + + old_speaker = _resolve_record_path(target["files"]["speaker_recording"]["path"]) + old_raw = _resolve_record_path(target["files"]["gemini_raw_output"]["path"]) + new_base = AUDIO_RECORDINGS_DIR / new_name + new_speaker = new_base.with_suffix(".wav") + new_raw = new_base.with_name(f"{new_base.name}_raw.wav") + + old_speaker.rename(new_speaker) + old_raw.rename(new_raw) + + target["record_name"] = new_name + target["files"]["speaker_recording"]["path"] = new_speaker.name # basename only + target["files"]["speaker_recording"]["name"] = new_speaker.name + target["files"]["gemini_raw_output"]["path"] = new_raw.name + target["files"]["gemini_raw_output"]["name"] = new_raw.name + _save_index(payload) + if self.session.saved_as == name: + self.session.saved_as = new_name + return target + + def delete_record(self, name: str) -> dict[str, Any]: + payload = _reconcile_index(_load_index()) + target = None + for e in payload["records"]: + if e.get("record_name") == name: + target = e + break + if target is None: + raise KeyError(f"record not found: {name}") + for key in ("speaker_recording", "gemini_raw_output"): + path = _resolve_record_path(target["files"][key]["path"]) + try: + path.unlink() + except FileNotFoundError: + pass + except Exception as exc: + log.warning("couldn't delete %s: %s", path, exc) + payload["records"] = [e for e in payload["records"] if e.get("record_name") != name] + payload["total_records"] = len(payload["records"]) + _save_index(payload) + if self.session.saved_as == name: + self.session.saved_as = "" + return {"deleted": name, "total_records": payload["total_records"]} + + def play_record(self, name: str, file_kind: str = "speaker") -> dict[str, Any]: + """Play a saved WAV. file_kind = 'speaker' or 'raw'.""" + entry = self.find_record(name) + file_key = "speaker_recording" if file_kind == "speaker" else "gemini_raw_output" + path = _resolve_record_path(entry["files"][file_key]["path"]) + if not path.exists(): + raise FileNotFoundError(str(path)) + with wave.open(str(path), "rb") as wf: + channels = wf.getnchannels() + sample_width = wf.getsampwidth() + sample_rate = wf.getframerate() + frames = wf.readframes(wf.getnframes()) + with self._play_lock: + if self.audio_mgr and self.audio_mgr.pya: + stream = self.audio_mgr.pya.open( + format=self.audio_mgr.pya.get_format_from_width(sample_width), + channels=channels, rate=sample_rate, + output=True, frames_per_buffer=CHUNK_SIZE, + ) + try: + chunk = CHUNK_SIZE * channels * sample_width + for offset in range(0, len(frames), chunk): + stream.write(frames[offset:offset + chunk]) + finally: + stream.stop_stream() + stream.close() + return { + "ok": True, "record_name": name, "file_kind": file_kind, + "duration_sec": round(audio_duration_seconds( + frames, sample_rate, channels, sample_width), 3), + } + + # ── status ─────────────────────────────────────────────────── + def status(self) -> dict[str, Any]: + return { + "voice_client_connected": bool( + self.voice_client and self.voice_client.connected), + "audio_mgr_ready": bool(self.audio_mgr and self.audio_mgr.pya), + "capture_backend": (self._monitor_config or {}).get("backend", "none"), + "records_dir": str(AUDIO_RECORDINGS_DIR), + "session": self.session.as_status(), + "total_records": len(_load_index().get("records", [])), + } diff --git a/vendor/Sanad/voice/voice_config.json b/vendor/Sanad/voice/voice_config.json new file mode 100644 index 0000000..00e151d --- /dev/null +++ b/vendor/Sanad/voice/voice_config.json @@ -0,0 +1,75 @@ +{ + "_description": "Tunables for voice/* modules. Loaded via core.config_loader.load('voice').", + + "sanad_voice": { + "_comment": "voice/sanad_voice.py — main live voice subprocess. Gemini API credentials (api_key, model, voice_name) come from core_config.json's gemini_defaults — single source of truth.", + "mic_gain": 1.0, + "play_chunk_bytes": 96000, + "log_dir": "~/logs", + "log_name": "gemini_live_v2", + "session_timeout_sec": 660, + "max_reconnect_delay_sec": 30, + "max_consecutive_errors": 10, + "no_messages_timeout_sec": 30 + }, + + "mic_udp": { + "_comment": "G1 built-in mic — UDP multicast subscriber", + "group": "239.168.123.161", + "port": 5555, + "buffer_max_bytes": 64000, + "read_timeout_sec": 0.04, + "socket_timeout_sec": 1.0 + }, + + "speaker": { + "_comment": "G1 built-in speaker — AudioClient.PlayStream wrapper", + "app_name": "sanad", + "begin_stream_pause_sec": 0.15, + "wait_finish_margin_sec": 0.3 + }, + + "vad": { + "_comment": "Gemini Live server-side voice-activity-detection config", + "start_sensitivity": "START_SENSITIVITY_HIGH", + "end_sensitivity": "END_SENSITIVITY_LOW", + "prefix_padding_ms": 20, + "silence_duration_ms": 200 + }, + + "barge_in": { + "threshold": 500, + "loud_chunks_needed": 3, + "cooldown_sec": 0.3, + "echo_suppress_below": 500, + "ai_speak_grace_sec": 0.15 + }, + + "recording": { + "enabled": true, + "dir_relative": "data/recordings" + }, + + "typed_replay": { + "_comment": "voice/typed_replay.py — max_text_len comes from dashboard.api_input", + "monitor_chunk_size": 512, + "monitor_tail_sec": 0.2 + }, + + "live_voice_loop": { + "_comment": "voice/live_voice_loop.py — arm phrase dispatcher. arm_txt filename comes from core.script_files.arm_phrases", + "trigger_log_size": 100, + "poll_interval_sec": 0.1, + "deferred_default": false, + "trigger_enabled_default": false + }, + + "local_tts": { + "_comment": "voice/local_tts.py — offline Coqui TTS", + "model_subdir": "speecht5_tts_clartts_ar", + "vocoder_subdir": "speecht5_hifigan", + "xvector_filename": "arabic_xvector_embedding.pt", + "sample_rate": 16000, + "channels": 1 + } +} diff --git a/vendor/Sanad/voice/wake_phrase_manager.py b/vendor/Sanad/voice/wake_phrase_manager.py new file mode 100644 index 0000000..dfe6de5 --- /dev/null +++ b/vendor/Sanad/voice/wake_phrase_manager.py @@ -0,0 +1,140 @@ +"""Wake-phrase registry. + +Maps wake phrases (e.g. "hey sanad") → skill / callback IDs. Phrases are +persisted in data/wake_phrases.json so dashboard edits survive restart. + +This module is deliberately lightweight — it only *stores* the mapping. +Matching a transcript against the registered phrases is done in +`voice/text_utils.match_phrase`, and the actual trigger is orchestrated +by `core/brain.py`'s skill registry. +""" + +from __future__ import annotations + +import json +import threading +from dataclasses import asdict, dataclass, field +from pathlib import Path +from typing import Any + +from Project.Sanad.core.logger import get_logger +from Project.Sanad.config import DATA_DIR + +log = get_logger("wake_phrase_manager") + +STATE_PATH = DATA_DIR / "wake_phrases.json" + + +@dataclass +class WakePhrase: + """A single wake phrase → action mapping.""" + phrase: str + action_id: str + enabled: bool = True + + def normalized(self) -> str: + return self.phrase.strip().lower() + + +class WakePhraseManager: + """Thread-safe in-memory store for wake phrases, persisted to disk.""" + + def __init__(self, path: Path = STATE_PATH): + self.path = Path(path) + self._lock = threading.Lock() + self._phrases: list[WakePhrase] = [] + self._load() + + # ── persistence ────────────────────────────────────────────────── + def _load(self) -> None: + if not self.path.exists(): + return + try: + data = json.loads(self.path.read_text(encoding="utf-8")) + except (json.JSONDecodeError, OSError) as exc: + log.warning("wake_phrases.json unreadable: %s", exc) + return + with self._lock: + self._phrases = [ + WakePhrase(**{k: v for k, v in d.items() if k in WakePhrase.__annotations__}) + for d in data if isinstance(d, dict) and "phrase" in d + ] + log.info("Loaded %d wake phrase(s)", len(self._phrases)) + + def _save(self) -> None: + self.path.parent.mkdir(parents=True, exist_ok=True) + tmp = self.path.with_suffix(self.path.suffix + ".tmp") + tmp.write_text( + json.dumps([asdict(p) for p in self._phrases], indent=2, ensure_ascii=False), + encoding="utf-8", + ) + tmp.replace(self.path) + + # ── CRUD ───────────────────────────────────────────────────────── + def list(self) -> list[dict[str, Any]]: + with self._lock: + return [asdict(p) for p in self._phrases] + + def add(self, phrase: str, action_id: str) -> dict[str, Any]: + norm = phrase.strip().lower() + if not norm: + raise ValueError("phrase cannot be empty") + with self._lock: + for p in self._phrases: + if p.normalized() == norm and p.action_id == action_id: + return asdict(p) + wp = WakePhrase(phrase=phrase.strip(), action_id=action_id) + self._phrases.append(wp) + self._save() + return asdict(wp) + + def remove(self, phrase: str, action_id: str | None = None) -> bool: + norm = phrase.strip().lower() + with self._lock: + before = len(self._phrases) + self._phrases = [ + p for p in self._phrases + if not (p.normalized() == norm and + (action_id is None or p.action_id == action_id)) + ] + removed = before != len(self._phrases) + if removed: + self._save() + return removed + + def set_enabled(self, phrase: str, action_id: str, enabled: bool) -> bool: + norm = phrase.strip().lower() + with self._lock: + for p in self._phrases: + if p.normalized() == norm and p.action_id == action_id: + p.enabled = enabled + self._save() + return True + return False + + def for_action(self, action_id: str) -> set[str]: + """Return all enabled phrases registered for an action.""" + with self._lock: + return { + p.normalized() for p in self._phrases + if p.action_id == action_id and p.enabled + } + + def action_phrase_map(self) -> dict[str, set[str]]: + """Return {action_id: {phrases}} for all enabled entries.""" + result: dict[str, set[str]] = {} + with self._lock: + for p in self._phrases: + if p.enabled: + result.setdefault(p.action_id, set()).add(p.normalized()) + return result + + # ── status ─────────────────────────────────────────────────────── + def status(self) -> dict[str, Any]: + with self._lock: + enabled = sum(1 for p in self._phrases if p.enabled) + return { + "total": len(self._phrases), + "enabled": enabled, + "actions": sorted({p.action_id for p in self._phrases}), + } diff --git a/vendor/mask/Mask_protoco.md b/vendor/mask/Mask_protoco.md new file mode 100644 index 0000000..49f1112 --- /dev/null +++ b/vendor/mask/Mask_protoco.md @@ -0,0 +1,231 @@ +# Bluetooth Low Energy protocol documentation +### For every LED Mask that gets controlled by the App called 'Shining Mask'. [Google Play](https://play.google.com/store/apps/details?id=cn.com.heaton.shiningmask), [App Store](https://apps.apple.com/de/app/shining-mask/id1528108780) + +### Characteristics: +- Command Characteristic: `D44BC439-ABFD-45A2-B575-925416129600` (Write) +- Notification Characteristic: `D44BC439-ABFD-45A2-B575-925416129601` (Notify) +- Image Upload Characteristic: `D44BC439-ABFD-45A2-B575-92541612960A` (Write) +- Audio Visualization Characteristic: `D44BC439-ABFD-45A2-B575-92541612960B` (Write) + +### Data is encrypted using AES-128 in ECB mode with a fixed key: +###### Note that all the data sent to the command characteristic needs to be 16 bytes long (encrypted data), same as all the to-be-encrypted data. +`32672f7974ad43451d9c6c894a0e8764` + +### Command Structure for the commands that are sent to the command handle: +- 1 byte, length of the command plus arguments +- 1-5 bytes, hex representation of the ASCII command +- x bytes, arguments of the command +- the rest is just padding up to 16 bytes + +----------------------------------------------------------------- +## Commands: +### Utility: +- ### `LIGHT`: + - Description: Sets the brightness, lower brightness means less color accuracy in color dependend modes, such as images, and higher brightness over `128` means more flickering due to the LEDs not being able to get that bright at the same frequency, probably due to battery limitations. I personally keep it at a max of `100` + - Hex of the ASCII name: `4c49474854` + - arguments: + - 1 byte for the brightness +- ### `IMAG`: + - Description: Displays the builtin image at the provided id. Everything above `0x69` will display out of bounds data, mostly partial frames of the builtin animations. + - Hex of the ASCII name: `494d4147` + - arguments: + - 1 byte for the builtin image id to be displayed +- ### `ANIM`: + - Description: Plays the builtin animation at the provided id. Everything above `0x45` will display out of bounds data, mostly some random pixels. Strangely it still plays that data as an animation + - Hex of the ASCII name: `414e494d` + - arguments: + - 1 byte for the built-in animation id to be displayed +- ### `DELE`: + - Description: Deletes the given DIY images from the mask + - Hex of the ASCII name: `44454c45` + - arguments: + - 1 byte for the count of the to be deleted DIY images + - max 10 bytes for the DIY image ids of the images that should get deleted + +- ### `PLAY`: + - Description: Plays the given DIY images in order + - Hex of the ASCII name: `504C4159` + - arguments: + - 1 byte for the count of the to be played DIY images + - max 10 bytes for the DIY image ids of the images that should get played (in order) + +- ### `CHEC`: + - Description: Command for checking how many DIY images are on the mask + - Hex of the ASCII name: `43484543` + - arguments: none, triggers a response on the notify Characteristic, sending back the number of DIY images uploaded in one byte + +----------------------------------------------------------------- + +### Text: +- ### `MODE`: + - Description: Allows to change the animation used to display the current text + - Hex of the ASCII name: `4d4f4445` + - arguments: + - 1 byte for setting the text display mode: + - 0: n/a (all n/a are just reverting to off) + - 1: off + - 2: blink + - 3: scroll right to left + - 4: scroll left to right + - 5 to 255: n/a + +- ### `SPEED`: + - Description: Changes the speed of the text displayment modes + - Hex of the ASCII name: `5350454544` + - arguments: + - 1 byte for setting the speed of the [`MODE`](#mode) effect + +- ### `M`: + - Description: Sets the special color/effect for the text + - Hex of the ASCII name: `4d` + - arguments: + - 1 byte for controlling wether it's enabled + - 1 byte for controlling the mode: + - 0: Random color dots on white background + - 1: Idk, seems like a bit of rainbow on the top and bottom of the text, with red on the left transitioning to purple on the right. A big white part is in the middle + - 2: Fade from Yellow (top) to Blue (bottom) + - 3: fade from Green (sides) to blue (middle) (circle shaped) + - 4: enables the first background image + - 5: enables the second background image + - 6: enables the third background image + - 7: enables the fourth background image + - 8 to 255: just doesn't change it from the previous effect (aka it just stays at the same one) + +- ### `FC`: + - Description: Sets the text color + - Hex of the ASCII name: `4643` + - arguments: + - 1 byte for controlling wether it's enabled + - 3 bytes for controlling the color in RGB format + +- ### `BC`: + - Description: Sets the background color, can be used to 'disable' the images from the [`M`](#m) command by setting it to black + - Hex of the ASCII name: `4243` + - arguments: + - 1 byte for controlling wether it's enabled + - 3 bytes for controlling the color in RGB format + +- ### Text uploading procedure: + - You will need something that can create a bitmap and, although not required, something that can create a color array for this to work (the text, or part of the text, will just be white if no color data is provided for it. For example if you have a bitmap that is `31` pixels wide and color data that has data for only `26` pixels, the last five pixels will be displayed as white). + - I haven't really figured out the correct way of creating a colormap yet. + - The bitmap has to be `16` pixels high and could theoretically be a few thousand pixels wide, but I wouldn't recommend doing that as it can cause the mask to become unresponsive and buggy, forcing one to cut the power by removing the battery (for some models, you will have to open the mask to do so). + - I would limit the length of the bitmap to about `512` to be save but of course making it `1024` or `2048` could work, it just poses a risk (I also haven't really tested this so I just assumed `512` would be good). + - The color array can be used to give each pixel stripe of the text (so 1 x 16, width x height) a different color. + - You might need to calculate the total bitmap pixel length and generate an array of bytes with the format RGB `0-f` accordingly. + - The special thing about this is that the color stays fixed to the text and not the background like it's the case with the [`M`](#m) command. + - Once you have a bitmap, you can use the [`DATS`](#dats) command with the indicator byte set to `0x00` + - the first two bytes after the command being the size of the combined data (so bitmap with the color array appended to the end of it, this structure is also used for the uploaded data) + - the next two bytes being the size of the bitmap alone to initialize a bitmap upload. + - For the data upload, you just have to send the data to the mask like you would do with the image data, including the [`DATCP`](#datcp) command at the end. + +----------------------------------------------------------------- +### Image: +- ### `DATS`: + - Description: Used to tell the mask that an image upload is about to start + - Hex of the ASCII name: `44415453` + - arguments: + - 2 bytes for the image size of the following image + - 2 bytes for the image index + - 1 byte for the image toggle (wether to tell that an image is going to be uploaded or a bitmap. Has to be correctly set for the desired mode, otherwise the upload fails) + +- ### `DATCP`: + - Description: seems to confirm the image upload, sending the current unix timecode with it + - Hex of the ASCII name: `4441544350` + - arguments: current unix time code + +- ### Image uploading process: + - The program sends the [`DATS`](#dats) command and waits for the [`DATOK`](#datok) answer from the mask. After that, it starts uploading the image. + - The image is broken up into chunks of `98` bytes per packet (the image size is just the amount of bytes of the image), each byte being either the `red`, `green` or `blue` channel of a pixel. + - So for one pixel, three bytes are used. + - The first byte (not counting towards those `98` image data bytes) indicates the length of the image bytes in the package, so for the full `98` bytes of image data it would be `0x63`, as there are `98` bytes of image data plus one package counter byte, which starts at `0x00` and goes up by one each package. + - After every successfull packet transfer, the mask respons with REOKOK for that image/packet. + - The last packet can have less than `98` bytes, so the data length byte is calculated for the less than `98` bytes. + - After that, the packet gets padded up to the full `100` bytes. + - Note that the packet does not need encrypting. + - After all packets have been uploaded, the program sends the [`DATCP`](#datcp) command, to wich the mask respons with [`DATCPOK`](#datcpok). + +----------------------------------------------------------------- + +### Audio: +#### All audio data is sent to the audio visualizer handle, encrypted of course +- #### Audio data: + - 1 byte for the counter (it's always `0x0f`, as all 15 following bytes are utilized) + - 1 byte for the mode (modes are listed below) + - 14 bytes for the visualization. Each nibble (one of the two characters of a byte) of these 14 bytes corresponds to one row of pixels, changing the value of a nibble also changes the size of the pixel row. `0x0` means that that row is off and `0xf` means that that row is 100% full or on. + +- #### Modes: + - 0: vertical bars, color fade from red (middle) over green and blue to purple (left and right sides). The following nibbles are off all the time (starting from nibble 0): 2, 5, 8, 11, 14, 17, 20 (problably also 23 and 26, but those are outside of the screen so they are cut off and never visible in this mode) + - 1: vertical bars, color fade from red (middle) over green and blue to purple (top and bottom sides). All nibbles are used to control a bars + - 2: vertical bars, color fade from turquoise (middle) over blue, purple/magenta and red to yellow (left and right sides). The following nibbles are off all the time (starting from nibble 0): 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22 (probably also the rest in the pattern, but those are outside of the screen so they are cut off and never visible in this mode) + - 3: same as 2, but the color fade is reversed and it's not centered in the middle but on the top and bottom sides of the mask. + - 4: horizontal bars, color fade from light purple (middle) over dark blue, light blue and turquoise to green (top and bottom sides). The following nibbles are off all the time (starting from nibble 0): 2, 5, 8, 11, 14, 17, 20, 23, 26. There are two dots on each side of the screen, on the top and bottom of it. In this mode, if the value of a nibble goes over `0xc`, it just behaves like its value is `0x0`. + +----------------------------------------------------------------- +### Responses: +#### All responses are sent on the notification handle and their last byte is always plus `0x01` regardless of any previous calculation +- ### `REOKOK`: + - Description: Confirms that the mask has recieved and processed the image packet + - Hex of the ASCII name: `52454F4B4F4B` + - arguments: + - 2 bytes for the ID of the currently uploading image + - 1 byte for telling if it's a bitmap or an image (`0x00` if bitmap and 0x01 if image) + - padding up to 15 bytes + - 1 byte for the current packet index with an offset of 1 (it starts to count at `0x01` instead of `0x00`) plus the above mentioned `0x01` (so for packet index `0x50` it would return `0x52`) + +- ### `DATCPOK` + - Description: Confirms the recieving of the unix timecode + - Hex of the ASCII name: `44415443504F4B` + - arguments: + - 2 bytes for the two most significant bytes of the unix timecode sent by the [`DATCP`](#datcp) command + +- ### `DATOK` + - Description: Confirms the start of the image upload for the specific image id + - Hex of the ASCII name: `444154534f4b` + - arguments: + - 2 bytes for the image id + - 1 byte to tell wether it's an image or a bitmap + +- ### `PLAYOK` + - Description: Confirms that the [`PLAY`](#play) command was executed on the mask + - Hex of the ASCII name: `504c41594f4b` + - arguments: none + +- ### `DELEOK` + - Description: Confirms the deletion of images + - Hex of the ASCII name: `44454c454f4b` + - arguments: seems to be 32 bytes (and sometimes 16 bytes), I have no idea what those mean. Two [`DELEOK`](#deleok)s are sent for one [`DELE`](#dele) command though (at least that's what I got from the data I have available) + +- ### `CHEC` + - Description: Returns the number of DIY images + - Hex of the ASCII name: `43484543` + - arguments: + - 1 byte for the number of DIY images + + +----------------------------------------------------------------- + +## NOT YET DONE: +- ### `DELEOK`: + - Data that I know of: + #### First, here is the data of the [`DELE`](#dele) command: + 0f44454c45140102030405060708090a + #### and here is the data of the two following [`DELEOK`](#deleok) responses: + 0644454c454f4b02030405060708090b0a0b0c0d0e0f10111213140000000000 + 0644454c454f4b111213140000000001 + +- ### `LOOA`: + - #### Data that I know of: + ``` + 04 4c 4f 4f 41 b1 91 f8 ec 4b 69 8c 90 1b 2c 37 + 04 4c 4f 4f 41 5a 7c b8 3d c5 4e bd dc 97 a0 b7 + 04 4c 4f 4f 41 c5 aa 6d 20 43 1c b5 0d b7 bd 92 + 04 4c 4f 4f 41 d3 36 d2 49 e5 90 8f 26 8b 94 28 + 04 4c 4f 4f 41 65 cd d5 51 db 6b 2f 71 ed 7e bd + ``` + +- ### `TIME`: + - #### Data that I know of: + ```` + 09 54 49 4d 45 00 67 1a 3a fd 00 00 00 00 00 00 + 07 54 49 4d 45 45 52 52 3a fd 00 00 00 00 00 01 (this is the answer to the previous command coming from the mask, I'm not entirely sure which byte is still part of the command name and which is data. It could be that the command/response name is TIMEERR) + ```` \ No newline at end of file diff --git a/vendor/mask/NotoSans-Regular.ttf b/vendor/mask/NotoSans-Regular.ttf new file mode 100644 index 0000000..973bc2e Binary files /dev/null and b/vendor/mask/NotoSans-Regular.ttf differ diff --git a/vendor/mask/README.md b/vendor/mask/README.md new file mode 100644 index 0000000..a7d1129 --- /dev/null +++ b/vendor/mask/README.md @@ -0,0 +1,172 @@ +# Shining Mask — Python controller + +A clean, async **Python controller for the "Shining Mask" LED face mask**, driven +over Bluetooth LE with [bleak](https://github.com/hbldh/bleak) (BlueZ on Linux). + +A from-scratch port of the *Shining Mask* app protocol, cross-checked byte-for-byte +against the canonical Go implementation +([GoneUp/mask-go](https://github.com/GoneUp/mask-go)), the community protocol doc +([BrickCraftDream/Shining-Mask-stuff](https://github.com/BrickCraftDream/Shining-Mask-stuff)), +**real encrypted app traffic** ([beclamide/mask-controller](https://github.com/beclamide/mask-controller)), +and the custom-image upload recipe from +([BishopFox/shining-mask](https://github.com/BishopFox/shining-mask)). + +This project is **flat** — every module is a plain `.py` file in this folder. +Run things from here with the env that has `bleak`/`cryptography`/`Pillow`: + +```bash +cd ~/Robotics_workspace/yslootahtech/Project/Mask +python main.py # uses the g1_env python +``` + +No install/packaging — `import mask`, `import colorface`, etc. work because the +files sit in the working directory. + +## Features + +- Connect over BLE (scan for `MASK-…`, or by MAC) +- Brightness, built-in images/animations +- **Custom full-color images & animations** — the headline feature +- **Animated face** with idle blink/glance + talking mouth (`FaceAnimator`) +- **Convert any image / GIF** to the mask (`image2mask.py`) +- Scrolling text (mode, speed, colors), DIY image PLAY/DELETE/COUNT +- A protocol layer with no BLE dependency, fully unit-tested without hardware + +## Display any image / GIF + +```bash +python image2mask.py photo.jpg # fit + show a still image +python image2mask.py logo.png --fit cover --oval +python image2mask.py dance.gif --max-frames 12 --fps 8 --loops 5 # animate a GIF +python image2mask.py photo.jpg --preview # ASCII preview, no mask needed +python image2mask.py photo.jpg --save out.bin # just save the raw 8004-byte frame +``` + +`--fit contain|cover|stretch` controls how the image fits the **46×58 portrait** +oval; `--oval` blacks out the corners to match the panel shape. In code: + +```python +import colorface +from mask import ShiningMask + +img = colorface.load_image("photo.jpg", fit="cover", oval=True) # -> 46x58 RGB +async with ShiningMask(name_prefix="MASK") as mask: + await mask.upload_raw_image(colorface.encode(img)) # show it +# animated GIF -> frames -> DIY images -> PLAY-loop: +frames = colorface.load_frames("dance.gif", max_frames=12) +``` + +## Animated face (Marcus) + +The mask's image display is a portrait oval **~46×58 RGB**, stored *transposed* +(`display[x,y] = raw[y,x]`). The firmware flashes an "uploading" logo during +every upload, so smooth animation can't re-upload per frame — instead a frame set +is uploaded **once** as DIY images, then animated with fast `PLAY` commands (no +logo). That's [`FaceAnimator`](faceanim.py) + [`colorface`](colorface.py). + +```bash +python main.py # connect, load frames once, run a live face +python main.py --reload # force re-upload of the frame set +python main.py --talk # start talking and stay talking +``` + +Drive it from Marcus's speech: + +```python +from mask import ShiningMask +from faceanim import FaceAnimator + +async with ShiningMask(name_prefix="MASK") as mask: + face = FaceAnimator(mask) + await face.start() # uploads frames once, starts the idle animation + face.set_speaking(True) # call when TTS playback starts + # ... Marcus talks; mouth animates, eyes blink/glance ... + face.set_speaking(False) # call when it ends + # or: face.set_mouth(0..3) from live audio amplitude for rough lip-sync + await face.stop() +``` + +Draw your own faces in 46×58 display space: + +```python +import colorface +img = colorface.build_face(mouth=2, look=-4) # PIL image, 46x58 +await mask.upload_frame(colorface.encode(img), slot=1) +await mask.play_frame(1) +``` + +## Other commands + +```python +import asyncio +from mask import ShiningMask +from constants import TextMode + +async def main(): + async with ShiningMask(name_prefix="MASK") as mask: + await mask.set_brightness(80) + await mask.show_image(3) # built-in image + await mask.set_text("HELLO", color=(0, 200, 255), mode=TextMode.SCROLL_LEFT) + +asyncio.run(main()) +``` + +CLI for quick one-offs: `python cli.py light 80`, `python cli.py image 3`, +`python cli.py text "HELLO" --color 00ff00`, `python cli.py repl`. +Utilities: `python scan.py` (find the mask), `python selftest.py `, +`python preview_text.py "HI"`. + +## Protocol notes + +- Command/notify frames are **AES-128-ECB**, 16 bytes, fixed firmware key + `32672f7974ad43451d9c6c894a0e8764` (not a secret; same across vendors). +- Command frame: `[len][ASCII name][args…]` zero-padded to 16, `len = name+args`. +- **Custom image format (solved):** the image display is portrait **46×58 RGB**, + stored transposed. Upload it with an image `DATS` (toggle `0x01`), then stream + the chunks **without** waiting for acks, then `DATCP` with a 4-byte timestamp + (no handshake — that was the trick). Frames persist on the mask's flash and + replay via `PLAY` with no logo. + +### GATT characteristics + +| Purpose | UUID | | +|---|---|---| +| Command (encrypted) | `d44bc439-abfd-45a2-b575-925416129600` | write | +| Notify (encrypted) | `d44bc439-abfd-45a2-b575-925416129601` | notify | +| Image/bitmap upload (raw) | `d44bc439-abfd-45a2-b575-92541612960a` | write | +| Audio visualization (encrypted) | `d44bc439-abfd-45a2-b575-92541612960b` | write | + +## Layout (flat) + +``` +main.py run a live animated face +image2mask.py convert any image / GIF and display it +mask.py ShiningMask high-level async API +faceanim.py FaceAnimator (load frames once -> PLAY-loop) +colorface.py 46x58 face frames + image->mask conversion +protocol.py command framing + encoders (no I/O) +crypto.py AES-128-ECB +transport.py bleak BLE: scan / connect / notify / writes +bitmap.py faces.py talking.py cli.py constants.py exceptions.py +scan.py selftest.py preview_text.py utility scripts +test_*.py pytest (no hardware) +NotoSans-Regular.ttf +``` + +## Tests + +```bash +python -m pytest -q # 57 tests, no hardware needed +``` + +The crypto tests decrypt real captured app frames and assert they resolve to the +expected commands; the colorface tests validate image→mask conversion and the +46×58 transpose round-trip. + +## License + +MIT. Reverse-engineering credit: +[mask-go](https://github.com/GoneUp/mask-go), +[Shining-Mask-stuff](https://github.com/BrickCraftDream/Shining-Mask-stuff), +[mask-controller](https://github.com/beclamide/mask-controller), +[BishopFox/shining-mask](https://github.com/BishopFox/shining-mask). diff --git a/vendor/mask/bitmap.py b/vendor/mask/bitmap.py new file mode 100644 index 0000000..249e19a --- /dev/null +++ b/vendor/mask/bitmap.py @@ -0,0 +1,131 @@ +"""Render text/images into the Shining Mask's custom bitmap format. + +The mask shows a 16-pixel-tall, arbitrarily-wide image. Scrolling text is +uploaded as two blobs concatenated together: + + * a **bitmap** -- 2 bytes per column encoding which of the 16 rows are lit, + * a **color array** -- 3 bytes (RGB) per column giving each column's color. + +This module ports mask-go/mask/draw.go. Text is rasterized with Pillow using +NotoSans (bundled) at the same metrics the Go reference uses (14px, baseline at +y=12, 16px tall), then thresholded to 1-bit. Output is byte-identical in +structure to the Go encoder; exact pixels may differ slightly because Pillow and +freetype-go rasterize antialiasing differently, but the result is the same +readable scrolling text. +""" + +from __future__ import annotations + +import math +import os +from typing import List, Sequence, Tuple + +from constants import MASK_HEIGHT + +Color = Tuple[int, int, int] + +# Bundled font, matching the Go reference. Override via text_to_columns(font=...). +DEFAULT_FONT = os.path.join(os.path.dirname(__file__), "NotoSans-Regular.ttf") + +# Go converts the rasterized gray value (0..65535) and lights a pixel when it is +# brighter than 25000. For an 8-bit gray value v, Go sees v*257, so the +# equivalent 8-bit threshold is ceil(25000/257) = 98. +_GRAY8_ON_THRESHOLD = 98 + + +def text_to_columns( + text: str, + *, + font: str = DEFAULT_FONT, + font_size: int = 14, + baseline: int = 12, +) -> List[List[int]]: + """Rasterize ``text`` into a list of columns, each a list of 16 ints (0/1). + + Column order is left-to-right; within a column, index 0 is the top row. + """ + from PIL import Image, ImageDraw, ImageFont # local import: optional dep + + try: + pil_font = ImageFont.truetype(font, font_size) + except OSError: + # Fall back to a system NotoSans, then Pillow's built-in bitmap font. + for candidate in ("/usr/share/fonts/truetype/noto/NotoSans-Regular.ttf", "NotoSans-Regular.ttf"): + try: + pil_font = ImageFont.truetype(candidate, font_size) + break + except OSError: + pil_font = None + if pil_font is None: + pil_font = ImageFont.load_default() + + # Empty text -> no columns (matches mask-go, whose canvas width is 0). + width = math.ceil(pil_font.getlength(text)) if text else 0 + if width <= 0: + return [] + + img = Image.new("L", (width, MASK_HEIGHT), color=0) + draw = ImageDraw.Draw(img) + # anchor="ls": x is the left pen origin, y is the text baseline (like Go's Pt). + try: + draw.text((0, baseline), text, font=pil_font, fill=255, anchor="ls") + except (TypeError, ValueError): + # Older Pillow / bitmap fonts: no baseline anchor support. + draw.text((0, baseline - font_size), text, font=pil_font, fill=255) + + px = img.load() + columns: List[List[int]] = [] + for x in range(img.width): + column = [1 if px[x, y] >= _GRAY8_ON_THRESHOLD else 0 for y in range(MASK_HEIGHT)] + columns.append(column) + return columns + + +def encode_bitmap(columns: Sequence[Sequence[int]]) -> bytes: + """Encode 1-bit columns into the mask's 2-bytes-per-column format. + + Each 16-row column becomes two bytes (little-endian within the column): + * byte 0 (low): rows 0..7, with row 0 as the most-significant bit, + * byte 1 (high): rows 8..15, with row 8 as the most-significant bit. + """ + out = bytearray() + for i, column in enumerate(columns): + if len(column) != MASK_HEIGHT: + raise ValueError(f"column {i} has {len(column)} rows, expected {MASK_HEIGHT}") + low = 0 + high = 0 + for j in range(8): + if column[j]: + low |= 1 << (7 - j) # row 0 -> bit 7 + for j in range(8, MASK_HEIGHT): + if column[j]: + high |= 1 << (15 - j) # row 8 -> bit 7, row 15 -> bit 0 + out.append(low) + out.append(high) + return bytes(out) + + +def encode_color_array(num_columns: int, color: Color = (255, 255, 255)) -> bytes: + """One RGB triple per column. The color stays fixed to the text pixels.""" + r, g, b = color + return bytes([r & 0xFF, g & 0xFF, b & 0xFF]) * num_columns + + +def encode_color_array_per_column(colors: Sequence[Color]) -> bytes: + """Per-column RGB colors (one triple per column).""" + out = bytearray() + for r, g, b in colors: + out += bytes([r & 0xFF, g & 0xFF, b & 0xFF]) + return bytes(out) + + +def build_text_upload(text: str, color: Color = (255, 255, 255), **render_kwargs): + """Render ``text`` and return ``(bitmap_bytes, color_array_bytes)``. + + The mask upload payload is ``bitmap + color_array``; the DATS command needs + ``len(bitmap)`` separately so the mask can split the two. + """ + columns = text_to_columns(text, **render_kwargs) + bitmap = encode_bitmap(columns) + color_array = encode_color_array(len(columns), color) + return bitmap, color_array diff --git a/vendor/mask/cli.py b/vendor/mask/cli.py new file mode 100644 index 0000000..944c762 --- /dev/null +++ b/vendor/mask/cli.py @@ -0,0 +1,168 @@ +"""Command-line driver for the Shining Mask. + +Examples:: + + python cli.py text "HELLO" --color 00ff00 --mode scroll-left + python cli.py image 3 + python cli.py light 80 + python cli.py repl # interactive, like the mask-go demo +""" + +from __future__ import annotations + +import argparse +import asyncio +import logging +import shlex + +from constants import TextMode +from mask import ShiningMask + +_MODES = { + "steady": TextMode.STEADY, + "solid": TextMode.SOLID, + "blink": TextMode.BLINK, + "scroll-left": TextMode.SCROLL_LEFT, + "scroll-right": TextMode.SCROLL_RIGHT, +} + + +def _hex_color(s: str): + s = s.lstrip("#") + if len(s) != 6: + raise argparse.ArgumentTypeError("color must be RRGGBB hex") + return (int(s[0:2], 16), int(s[2:4], 16), int(s[4:6], 16)) + + +def _build_parser() -> argparse.ArgumentParser: + p = argparse.ArgumentParser(prog="shiningmask", description=__doc__) + p.add_argument("--address", help="connect to a specific BLE MAC instead of scanning") + p.add_argument("--adapter", help="HCI adapter (e.g. hci0)") + p.add_argument("--name-prefix", default="MASK", help="advertised-name prefix to scan for") + p.add_argument("--with-response", action="store_true", + help="force write-with-response (default: auto-detect from the characteristic)") + p.add_argument("-v", "--verbose", action="store_true") + sub = p.add_subparsers(dest="cmd", required=True) + + s = sub.add_parser("light", help="set brightness 0..255") + s.add_argument("level", type=int) + + s = sub.add_parser("image", help="show built-in image id") + s.add_argument("id", type=int) + + s = sub.add_parser("anim", help="play built-in animation id") + s.add_argument("id", type=int) + + s = sub.add_parser("diy", help="show uploaded DIY image id") + s.add_argument("id", type=int) + + s = sub.add_parser("text", help="upload + scroll text") + s.add_argument("text") + s.add_argument("--color", type=_hex_color, default=(255, 255, 255)) + s.add_argument("--mode", choices=sorted(_MODES), default="scroll-left") + s.add_argument("--speed", type=int, default=None) + + s = sub.add_parser("mode", help="set text mode") + s.add_argument("mode", choices=sorted(_MODES)) + + s = sub.add_parser("speed", help="set text speed 0..255") + s.add_argument("speed", type=int) + + s = sub.add_parser("fg", help="set foreground/text color RRGGBB") + s.add_argument("color", type=_hex_color) + + s = sub.add_parser("bg", help="set background color RRGGBB") + s.add_argument("color", type=_hex_color) + + s = sub.add_parser("browse", help="step through built-in images/animations to find faces") + s.add_argument("--kind", choices=["image", "anim"], default="image") + s.add_argument("--start", type=int, default=0) + s.add_argument("--count", type=int, default=106) + s.add_argument("--delay", type=float, default=1.5) + + sub.add_parser("count", help="report how many DIY images are stored") + sub.add_parser("repl", help="interactive control loop") + return p + + +async def _dispatch(mask: ShiningMask, ns: argparse.Namespace): + cmd = ns.cmd + if cmd == "light": + await mask.set_brightness(ns.level) + elif cmd == "image": + await mask.show_image(ns.id) + elif cmd == "anim": + await mask.play_animation(ns.id) + elif cmd == "diy": + await mask.play_diy(ns.id) + elif cmd == "text": + await mask.set_text(ns.text, color=ns.color, mode=_MODES[ns.mode], speed=ns.speed) + elif cmd == "mode": + await mask.set_text_mode(_MODES[ns.mode]) + elif cmd == "speed": + await mask.set_text_speed(ns.speed) + elif cmd == "fg": + await mask.set_foreground_color(*ns.color) + elif cmd == "bg": + await mask.set_background_color(*ns.color) + elif cmd == "count": + print("DIY images stored:", await mask.get_diy_count()) + elif cmd == "browse": + show = mask.play_animation if ns.kind == "anim" else mask.show_image + end = ns.start + ns.count + print(f"Browsing built-in {ns.kind} ids {ns.start}..{end - 1}, {ns.delay}s each.") + print("Watch the mask. Note the id of a face you like, press Ctrl+C, then run " + f"'{ns.kind if ns.kind == 'anim' else 'image'} ' to keep it.\n") + for i in range(ns.start, end): + print(f" {ns.kind} {i}", flush=True) + try: + await show(i) + except Exception as exc: + print(f" (error at {i}: {exc})") + await asyncio.sleep(ns.delay) + print("done browsing") + + +async def _repl(mask: ShiningMask, parser: argparse.ArgumentParser): + print("interactive mode -- type a subcommand (e.g. 'text HI --color ff0000'), 'quit' to exit") + loop = asyncio.get_running_loop() + while True: + line = await loop.run_in_executor(None, input, "mask> ") + line = line.strip() + if line in ("quit", "exit", "q"): + break + if not line: + continue + try: + ns = parser.parse_args(shlex.split(line)) + except SystemExit: + continue + try: + await _dispatch(mask, ns) + except Exception as exc: # keep the REPL alive + print("error:", exc) + + +async def _amain(argv=None): + parser = _build_parser() + ns = parser.parse_args(argv) + logging.basicConfig(level=logging.DEBUG if ns.verbose else logging.INFO) + + async with ShiningMask( + address=ns.address, + name_prefix=ns.name_prefix, + adapter=ns.adapter, + write_with_response=True if ns.with_response else None, + ) as mask: + if ns.cmd == "repl": + await _repl(mask, parser) + else: + await _dispatch(mask, ns) + + +def main(argv=None): + asyncio.run(_amain(argv)) + + +if __name__ == "__main__": + main() diff --git a/vendor/mask/colorface.py b/vendor/mask/colorface.py new file mode 100644 index 0000000..afb6365 --- /dev/null +++ b/vendor/mask/colorface.py @@ -0,0 +1,170 @@ +"""Full-color portrait face frames for the mask's IMAGE display. + +The mask's image display is a portrait oval, **~46 wide x 58 tall, RGB**. The +upload is stored *transposed* (raw 58x46 row-major) -- design in display space +here and :func:`encode` handles the transpose. (Format reverse-engineered from +BishopFox's fox image + an on-device colored-corner test: ``display[x,y] == +raw[y,x]``, channel order RGB.) + +Frames produced here are raw byte blobs ready for +:meth:`shiningmask.ShiningMask.upload_raw_image`. +""" + +from __future__ import annotations + +from typing import List + +from PIL import Image, ImageDraw, ImageOps + +DISPLAY_W, DISPLAY_H = 46, 58 + +DEFAULT_EYE = (0, 230, 255) # cyan +DEFAULT_MOUTH = (255, 50, 50) # red +WHITE = (255, 255, 255) + + +def build_face(mouth: int = 0, eyes: bool = True, look: int = 0, smile: bool = False, + *, wide: bool = False, frown: bool = False, wink: int = 0, + angry: bool = False, eye_color=DEFAULT_EYE, mouth_color=DEFAULT_MOUTH, + sclera_color=WHITE) -> Image.Image: + """Draw a face in display space (46x58 RGB). + + ``mouth`` 0=closed-smile .. 3=wide-open; ``eyes`` False=blink (both eyes); + ``look`` shifts the eyes left(-)/right(+); ``smile`` = big grin; + ``wide`` = wide (surprised) eyes; ``frown`` = downturned (sad) mouth; + ``wink`` closes ONE eye (-1=left, +1=right); ``angry`` adds down-slanted + brows. ``eye_color`` / ``mouth_color`` / ``sclera_color`` are RGB tuples + (sclera = the white of the eye). + """ + d = Image.new("RGB", (DISPLAY_W, DISPLAY_H), (0, 0, 0)) + g = ImageDraw.Draw(d) + ew, eh0, eh1 = (8, 12, 32) if wide else (6, 15, 29) + for side, cx in ((-1, DISPLAY_W // 2 - 10 + look), (1, DISPLAY_W // 2 + 10 + look)): + if eyes and wink != side: + g.ellipse([cx - ew, eh0, cx + ew, eh1], fill=sclera_color) + g.ellipse([cx - ew + 3, eh0 + 4, cx + ew - 3, eh1 - 2], fill=eye_color) + g.ellipse([cx - 1, (eh0 + eh1) // 2 - 1, cx + 1, (eh0 + eh1) // 2 + 2], fill=(0, 0, 0)) + else: + g.line([cx - 6, 23, cx + 6, 23], fill=eye_color, width=3) + if angry: + # down-slanted brows: inner end (toward the nose) pulled low -> a scowl. + by = eh0 - 4 + g.line([cx - 7, by + 3 * side, cx + 7, by - 3 * side], fill=eye_color, width=3) + cy = 44 + cx = DISPLAY_W // 2 + if smile: + g.chord([cx - 14, cy - 10, cx + 14, cy + 10], start=10, end=170, fill=mouth_color) + elif frown: + g.arc([cx - 13, cy + 2, cx + 13, cy + 17], start=200, end=340, fill=mouth_color, width=4) + elif mouth <= 0: + # closed mouth: a clear smile line + g.arc([cx - 13, cy - 7, cx + 13, cy + 7], start=20, end=160, fill=mouth_color, width=4) + else: + # open mouth: a big, strongly-graded oval so lip-sync reads clearly. + hw = 9 + mouth * 2 # half-width: t1=11, t2=13, t3=15 + hh = 4 + mouth * 3 # half-height: t1=7, t2=10, t3=13 + g.ellipse([cx - hw, cy - hh, cx + hw, cy + hh], fill=mouth_color) + return d + + +def encode(image: Image.Image) -> bytes: + """Encode a 46x58 display image into raw upload bytes (transpose -> 58x46 RGB).""" + if image.size != (DISPLAY_W, DISPLAY_H): + image = image.resize((DISPLAY_W, DISPLAY_H)) + if image.mode != "RGB": + image = image.convert("RGB") + return image.transpose(Image.Transpose.TRANSPOSE).tobytes() + + +def image_to_frame(image: Image.Image) -> bytes: + """Encode any image (e.g. a photo/drawing) as a mask frame, fit to 46x58.""" + return encode(image) + + +# --------------------------------------------------------------------------- +# Convert arbitrary images / animations to the mask +# --------------------------------------------------------------------------- + +def _apply_oval(image: Image.Image, bg=(0, 0, 0)) -> Image.Image: + """Black out the corners so the image matches the physical oval panel.""" + alpha = Image.new("L", image.size, 0) + ImageDraw.Draw(alpha).ellipse([0, 0, image.width - 1, image.height - 1], fill=255) + base = Image.new("RGB", image.size, bg) + base.paste(image, (0, 0), alpha) + return base + + +def fit_to_display(image: Image.Image, *, fit: str = "contain", + bg=(0, 0, 0), oval: bool = False) -> Image.Image: + """Fit any image into the 46x58 portrait display. + + ``fit``: + * ``"contain"`` -- scale to fit fully, letterboxed with ``bg`` (whole image), + * ``"cover"`` -- scale to fill, cropping overflow (no borders), + * ``"stretch"`` -- resize exactly to 46x58 (distorts aspect). + ``oval`` blacks out the corners to match the mask's oval shape. + """ + image = image.convert("RGB") + target = (DISPLAY_W, DISPLAY_H) + if fit == "stretch": + out = image.resize(target, Image.LANCZOS) + elif fit == "cover": + out = ImageOps.fit(image, target, method=Image.LANCZOS) + else: # contain + im = image.copy() + im.thumbnail(target, Image.LANCZOS) + out = Image.new("RGB", target, bg) + out.paste(im, ((target[0] - im.width) // 2, (target[1] - im.height) // 2)) + if oval: + out = _apply_oval(out, bg) + return out + + +def load_image(path_or_image, *, fit: str = "contain", bg=(0, 0, 0), + oval: bool = False) -> Image.Image: + """Open any image file (PNG/JPG/...) and fit it to the 46x58 display.""" + img = path_or_image if isinstance(path_or_image, Image.Image) else Image.open(path_or_image) + return fit_to_display(img, fit=fit, bg=bg, oval=oval) + + +def load_frames(path_or_image, *, max_frames: int = 16, fit: str = "contain", + bg=(0, 0, 0), oval: bool = False) -> List[Image.Image]: + """Load every frame of an (animated) image, each fit to 46x58. + + A still image yields one frame; an animated GIF/WEBP yields up to + ``max_frames`` frames (evenly sampled if it has more). + """ + img = path_or_image if isinstance(path_or_image, Image.Image) else Image.open(path_or_image) + total = getattr(img, "n_frames", 1) + if total <= 1: + return [fit_to_display(img, fit=fit, bg=bg, oval=oval)] + step = max(1, total // max_frames) + frames = [] + for i in range(0, total, step): + img.seek(i) + frames.append(fit_to_display(img.copy(), fit=fit, bg=bg, oval=oval)) + if len(frames) >= max_frames: + break + return frames + + +def default_frames(*, eye_color=DEFAULT_EYE, mouth_color=DEFAULT_MOUTH, + sclera_color=WHITE) -> "dict[str, bytes]": + """A standard expressive frame set (ordered -> slot ids 1..N).""" + def f(**kw): + return encode(build_face(eye_color=eye_color, mouth_color=mouth_color, + sclera_color=sclera_color, **kw)) + return { + "neutral": f(mouth=0), + "talk1": f(mouth=1), + "talk2": f(mouth=2), + "talk3": f(mouth=3), + "blink": f(mouth=0, eyes=False), + "look_left": f(mouth=0, look=-4), + "look_right": f(mouth=0, look=4), + "smile": f(smile=True), + "surprised": f(mouth=3, wide=True), + "sad": f(frown=True), + "wink": f(wink=1, smile=True), + "angry": f(angry=True, frown=True), + } diff --git a/vendor/mask/constants.py b/vendor/mask/constants.py new file mode 100644 index 0000000..e7191ea --- /dev/null +++ b/vendor/mask/constants.py @@ -0,0 +1,73 @@ +"""BLE identifiers and protocol constants for the Shining Mask. + +References: + * Shining-Mask-stuff/ble-protocol.md (community protocol documentation) + * mask-go/mask/mask.go (canonical, field-tested Go implementation) +""" + +from __future__ import annotations + +# --- GATT identifiers --------------------------------------------------------- + +# Vendor service that carries every mask characteristic. +SERVICE_UUID = "0000fff0-0000-1000-8000-00805f9b34fb" + +# Command characteristic: AES-encrypted 16-byte command frames (write). +CMD_CHAR_UUID = "d44bc439-abfd-45a2-b575-925416129600" + +# Notify characteristic: AES-encrypted responses (notify). Drives the upload +# handshake (DATSOK / REOK / DATCPOK). +NOTIFY_CHAR_UUID = "d44bc439-abfd-45a2-b575-925416129601" + +# Upload characteristic: raw (UNencrypted) image/bitmap data chunks (write). +UPLOAD_CHAR_UUID = "d44bc439-abfd-45a2-b575-92541612960a" + +# Audio-visualization characteristic: AES-encrypted 16-byte frames (write). +AUDIO_CHAR_UUID = "d44bc439-abfd-45a2-b575-92541612960b" + +# The mask advertises a local name like "MASK-02A711"; everyone scans by prefix. +DEVICE_NAME_PREFIX = "MASK" + + +# --- Protocol sizing ---------------------------------------------------------- + +# Mask display is 16 pixels tall (arbitrarily wide for scrolling text). +MASK_HEIGHT = 16 + +# Command/notify frames are AES blocks padded to this size. +FRAME_SIZE = 16 + +# Upload packets carry at most this many data bytes (the mask rejects packets +# whose total on-air size exceeds 100 bytes; 2 bytes are header, so 98 remain). +UPLOAD_MAX_DATA = 98 + + +# --- Text scroll modes (MODE command argument) -------------------------------- + +class TextMode: + """Text display modes for the MODE command. + + Values confirmed by decrypting real app traffic (mask-controller/codes.js) + and cross-checked with mask-go:: + + 1 = steady / solid (text shown, no scroll animation) + 2 = blink + 3 = scroll right-to-left + 4 = scroll left-to-right + + The protocol doc mislabels 1 as "off" and declares 5..255 as n/a (the + firmware reverts them to mode 1); the official app only ever sends 1..4. + """ + + STEADY = 1 + SOLID = 1 # alias for STEADY + BLINK = 2 + SCROLL_LEFT = 3 # scroll right-to-left + SCROLL_RIGHT = 4 # scroll left-to-right + + +# --- DATS upload toggle byte -------------------------------------------------- + +class UploadKind: + BITMAP = 0 # text bitmap + per-column color array + IMAGE = 1 # full-color DIY image (raw RGB) diff --git a/vendor/mask/crypto.py b/vendor/mask/crypto.py new file mode 100644 index 0000000..b6b0165 --- /dev/null +++ b/vendor/mask/crypto.py @@ -0,0 +1,62 @@ +"""AES-128-ECB framing used by the Shining Mask BLE protocol. + +Every command written to the command characteristic is a 16-byte block +encrypted with a fixed AES-128 key in ECB mode, and notifications coming back +on the notify characteristic are encrypted the same way. + +The key is baked into the mask firmware / the official "Shining Mask" app -- +it is *not* a secret and there is no key exchange. It was recovered by the +reverse-engineering community (see the reddit post referenced in the project +README) and is identical across every vendor variant of this mask. + +This module prefers ``cryptography`` (which is what the rest of the robotics +stack already ships) and transparently falls back to ``pycryptodome`` if only +that is installed. +""" + +from __future__ import annotations + +# Fixed device key -- hardcoded in firmware, not a credential. Do not "rotate". +MASK_AES_KEY = bytes.fromhex("32672f7974ad43451d9c6c894a0e8764") + +BLOCK_SIZE = 16 + + +try: # Preferred backend: cryptography (pyca) + from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes + + def _cipher() -> "Cipher": + return Cipher(algorithms.AES(MASK_AES_KEY), modes.ECB()) # noqa: S305 - ECB is what the device uses + + def encrypt(block: bytes) -> bytes: + """Encrypt one or more 16-byte blocks with AES-128-ECB.""" + if len(block) == 0 or len(block) % BLOCK_SIZE != 0: + raise ValueError(f"data length must be a positive multiple of {BLOCK_SIZE}, got {len(block)}") + enc = _cipher().encryptor() + return enc.update(bytes(block)) + enc.finalize() + + def decrypt(data: bytes) -> bytes: + """Decrypt one or more 16-byte blocks with AES-128-ECB.""" + if len(data) == 0 or len(data) % BLOCK_SIZE != 0: + raise ValueError(f"data length must be a positive multiple of {BLOCK_SIZE}, got {len(data)}") + dec = _cipher().decryptor() + return dec.update(bytes(data)) + dec.finalize() + + BACKEND = "cryptography" + +except ImportError: # pragma: no cover - exercised only when pyca is absent + from Crypto.Cipher import AES # type: ignore + + def encrypt(block: bytes) -> bytes: + """Encrypt one or more 16-byte blocks with AES-128-ECB.""" + if len(block) == 0 or len(block) % BLOCK_SIZE != 0: + raise ValueError(f"data length must be a positive multiple of {BLOCK_SIZE}, got {len(block)}") + return AES.new(MASK_AES_KEY, AES.MODE_ECB).encrypt(bytes(block)) + + def decrypt(data: bytes) -> bytes: + """Decrypt one or more 16-byte blocks with AES-128-ECB.""" + if len(data) == 0 or len(data) % BLOCK_SIZE != 0: + raise ValueError(f"data length must be a positive multiple of {BLOCK_SIZE}, got {len(data)}") + return AES.new(MASK_AES_KEY, AES.MODE_ECB).decrypt(bytes(data)) + + BACKEND = "pycryptodome" diff --git a/vendor/mask/exceptions.py b/vendor/mask/exceptions.py new file mode 100644 index 0000000..efa7025 --- /dev/null +++ b/vendor/mask/exceptions.py @@ -0,0 +1,23 @@ +"""Exception types for the shiningmask package.""" + +from __future__ import annotations + + +class MaskError(Exception): + """Base class for all mask errors.""" + + +class MaskNotFound(MaskError): + """No mask matching the scan filter was found in time.""" + + +class MaskNotConnected(MaskError): + """An operation needed a live BLE connection but none was established.""" + + +class NotificationTimeout(MaskError): + """The mask did not send the expected notification in time.""" + + +class UploadError(MaskError): + """A bitmap/image upload failed or was already in progress.""" diff --git a/vendor/mask/faceanim.py b/vendor/mask/faceanim.py new file mode 100644 index 0000000..26375bc --- /dev/null +++ b/vendor/mask/faceanim.py @@ -0,0 +1,232 @@ +"""Live animated face for the mask, driven by pre-uploaded DIY frames + PLAY. + +This is the smooth path (no per-frame upload logo): a frame set is uploaded to +the mask once (slow, one-time), then animation is just ``PLAY `` commands +(fast). The background loop blinks, glances around when idle, and moves the +mouth while "speaking". + +Driving the mouth (best -> simplest): + * :meth:`set_audio_level` (0..1 from live audio RMS) -> real lip-sync; the + mouth opens proportionally to loudness and closes on pauses. Use this with + Gemini Live / TTS audio for a human-looking mouth. + * :meth:`set_speaking` (True/False) -> a natural *auto* talk envelope (varied + open/close with word-break pauses) when you don't have amplitude. + * :meth:`set_mouth` (0..3) -> drive a fixed level directly. + +Interactivity: + * :meth:`set_expression` holds an expression (e.g. "surprised", "smile", + "listening") over the idle/talk animation until cleared with ``None``. +""" + +from __future__ import annotations + +import asyncio +import random +import time +from typing import Dict, Optional + +import colorface + +_GLANCE = ["look_left", "neutral", "look_right", "neutral"] +_MOUTH_BY_LEVEL = ["neutral", "talk1", "talk2", "talk3"] + + +class FaceAnimator: + def __init__(self, mask, frames: Optional[Dict[str, bytes]] = None, *, + fps: float = 8.0, brightness: int = 95): + self.mask = mask + self.frames = frames or colorface.default_frames() + self.fps = fps + self.brightness = brightness + self.slots: Dict[str, int] = {} + + # drive state + self._speaking = False # auto talk envelope + self._mouth: Optional[int] = None # fixed manual level + self._level = 0.0 # live audio level 0..1 (lip-sync) + self._lipsync = False + self._expression: Optional[str] = None # held override frame + self._blink = True + + # internal animation state + self._talk_cur = 0.0 # smoothed auto-talk mouth position + self._talk_target = 0.0 + self._talk_hold = 0 + + self._task: Optional[asyncio.Task] = None + self._stop = False + self._last: Optional[str] = None + + # -- lifecycle ------------------------------------------------------------ + + async def load(self, force: bool = False): + """Upload the frame set to the mask (skipped if already present). + + Resilient to a flaky BLE link: if the connection drops mid-upload, it + reconnects and retries the current frame, so a weak signal slows the + one-time upload rather than failing it. + """ + names = list(self.frames) + count = await self.mask.get_diy_count(timeout=4.0) or 0 + if not force and count >= len(names): + # Frames already stored (DIY images persist on the mask's flash). + # ``>=`` tolerates the CHEC count being off-by-one on some units, so + # we don't needlessly re-upload (which is slow + risks corruption). + self.slots = {name: i + 1 for i, name in enumerate(names)} + return + await self.mask.clear_diy() + for i, (name, data) in enumerate(self.frames.items(), start=1): + await self._upload_frame_resilient(i, data) + self.slots[name] = i + await asyncio.sleep(0.2) + + async def _upload_frame_resilient(self, slot: int, data: bytes, tries: int = 5): + from bleak.exc import BleakError + for attempt in range(tries): + try: + await self.mask.upload_frame(data, slot) + return + except (BleakError, EOFError, OSError) as exc: + if attempt == tries - 1: + raise + # link dropped mid-upload -> reconnect and re-send this frame + # (a fresh DATS resets any half-finished slot on the mask) + try: + await self.mask.disconnect() + except Exception: + pass + await asyncio.sleep(1.0) + try: + await self.mask.connect(timeout=15.0, attempts=8) + await self.mask.set_brightness(self.brightness) + except Exception: + pass # next attempt's upload_frame will surface a clear error + + async def start(self, *, reload: bool = False): + await self.mask.set_brightness(self.brightness) + await self.load(force=reload) + self._stop = False + await self._play("neutral") + self._task = asyncio.create_task(self._loop()) + return self + + async def stop(self): + self._stop = True + if self._task: + self._task.cancel() + try: + await self._task + except asyncio.CancelledError: + pass + self._task = None + + async def __aenter__(self): + return await self.start() + + async def __aexit__(self, *exc): + await self.stop() + + # -- control -------------------------------------------------------------- + + def set_audio_level(self, level: float): + """Real lip-sync: 0..1 live audio amplitude -> mouth openness. + + Call this as fast as you like from an audio callback; the loop samples + it at ``fps`` and the level decays so the mouth closes on silence. + """ + self._lipsync = True + self._speaking = False + self._mouth = None + self._level = max(self._level * 0.4, min(1.0, float(level))) # fast attack + + def set_speaking(self, speaking: bool): + """Auto talk envelope while speaking; closes the mouth when False.""" + self._mouth = None + self._lipsync = False + self._speaking = bool(speaking) + if not speaking: + self._talk_cur = self._talk_target = 0.0 + + def set_mouth(self, level: int): + """Drive the mouth at a fixed level (0=closed..3=wide).""" + self._speaking = False + self._lipsync = False + self._mouth = max(0, min(3, int(level))) + + def set_expression(self, name: Optional[str]): + """Hold an expression frame over the animation (None clears it).""" + self._expression = name if name in self.frames else None + + def set_blink(self, enabled: bool): + self._blink = bool(enabled) + + async def show(self, name: str): + """Show one named frame immediately (one-off).""" + await self._play(name) + + # -- internals ------------------------------------------------------------ + + @staticmethod + def _level_to_mouth(level: float) -> int: + if level < 0.06: + return 0 + if level < 0.16: + return 1 + if level < 0.32: + return 2 + return 3 + + def _auto_talk_mouth(self) -> int: + """A speech-like envelope: ease toward random targets, with word breaks.""" + if self._talk_hold <= 0: + # mostly open of varying degree, with occasional full closes (breaks) + self._talk_target = random.choice([0, 1, 1, 2, 2, 3, 3, 2, 1, 0]) + self._talk_hold = random.randint(1, 3) + self._talk_hold -= 1 + # ease current toward target so motion is smooth, not steppy + self._talk_cur += (self._talk_target - self._talk_cur) * 0.6 + return int(round(self._talk_cur)) + + async def _play(self, name: str): + slot = self.slots.get(name) + if slot is None or name == self._last: + return + try: + await self.mask.play_diy(slot) + self._last = name + except Exception: + self._last = None # retry next tick on transient BLE error + + async def _loop(self): + dt = 1.0 / self.fps + next_blink = time.monotonic() + random.uniform(2.0, 5.0) + next_glance = time.monotonic() + random.uniform(4.0, 9.0) + while not self._stop: + now = time.monotonic() + + # periodic blink (skipped while a held expression is active) + if self._blink and self._expression is None and now >= next_blink: + await self._play("blink") + await asyncio.sleep(0.12) + next_blink = now + random.uniform(2.5, 6.0) + + if self._expression is not None: + await self._play(self._expression) + elif self._lipsync: + await self._play(_MOUTH_BY_LEVEL[self._level_to_mouth(self._level)]) + self._level *= 0.55 # release toward closed + if self._level < 0.02: + self._lipsync = False # idle once silent + elif self._speaking: + await self._play(_MOUTH_BY_LEVEL[self._auto_talk_mouth()]) + elif self._mouth is not None: + await self._play(_MOUTH_BY_LEVEL[self._mouth]) + elif now >= next_glance: + for g in _GLANCE: + await self._play(g) + await asyncio.sleep(0.25) + next_glance = now + random.uniform(5.0, 11.0) + else: + await self._play("neutral") + + await asyncio.sleep(dt) diff --git a/vendor/mask/faces.py b/vendor/mask/faces.py new file mode 100644 index 0000000..1cd8682 --- /dev/null +++ b/vendor/mask/faces.py @@ -0,0 +1,83 @@ +"""Generate simple expressive faces for the mask (16 rows tall). + +The display is ~48x16 on the units we measured. Faces are drawn into a column +list (``width`` columns, each 16 ints, row 0 = top) compatible with +``bitmap.encode_bitmap``. Color is per-column, so a face is most reliable in a +single color (eyes and mouth can share columns). + +``mouth`` 0..4 = closed..wide-open; ``eyes`` True/False = open/blink; +``look`` shifts the eyes left(-)/right(+). +""" + +from __future__ import annotations + +from typing import List + +from constants import MASK_HEIGHT + +DISPLAY_WIDTH = 48 # measured on a MASK-05xxxx unit + + +def _blank(width: int) -> List[List[int]]: + return [[0] * width for _ in range(MASK_HEIGHT)] + + +def _fill(rows, r0, r1, c0, c1, width): + for r in range(max(0, r0), min(MASK_HEIGHT, r1 + 1)): + for c in range(max(0, c0), min(width, c1 + 1)): + rows[r][c] = 1 + + +def face(mouth: int = 0, eyes: bool = True, look: int = 0, + width: int = DISPLAY_WIDTH) -> List[List[int]]: + """Build a face and return it as columns (width x 16).""" + rows = _blank(width) + cx = width // 2 + quarter = width // 4 + + # --- eyes --- + eye_hw = 3 # half-width + lx, rx = cx - quarter, cx + quarter + if eyes: + _fill(rows, 3, 7, lx - eye_hw + look, lx + eye_hw + look, width) + _fill(rows, 3, 7, rx - eye_hw + look, rx + eye_hw + look, width) + else: # blink: a thin closed line + _fill(rows, 6, 6, lx - eye_hw, lx + eye_hw, width) + _fill(rows, 6, 6, rx - eye_hw, rx + eye_hw, width) + + # --- mouth --- + m = max(0, min(4, mouth)) + if m == 0: # closed: a small smile line + _fill(rows, 12, 12, cx - 7, cx + 7, width) + else: + top, bot = 12 - m, 11 + m + hw = 8 - (m - 1) # narrows as it opens, for an "O" shape + _fill(rows, top, bot, cx - hw, cx + hw, width) + + return [[rows[r][c] for r in range(MASK_HEIGHT)] for c in range(width)] + + +# Convenience named expressions -> (mouth, eyes, look) +EXPRESSIONS = { + "neutral": dict(mouth=0, eyes=True), + "blink": dict(mouth=0, eyes=False), + "talk_small": dict(mouth=1, eyes=True), + "talk_mid": dict(mouth=2, eyes=True), + "talk_open": dict(mouth=3, eyes=True), + "talk_wide": dict(mouth=4, eyes=True), + "look_left": dict(mouth=0, eyes=True, look=-3), + "look_right": dict(mouth=0, eyes=True, look=3), +} + + +def expression(name: str, width: int = DISPLAY_WIDTH) -> List[List[int]]: + return face(width=width, **EXPRESSIONS[name]) + + +def ascii_preview(columns) -> str: + """Render columns as ASCII art (for offline checks).""" + width = len(columns) + return "\n".join( + "".join("#" if columns[c][r] else " " for c in range(width)) + for r in range(MASK_HEIGHT) + ) diff --git a/vendor/mask/facetrack.py b/vendor/mask/facetrack.py new file mode 100644 index 0000000..c413c4d --- /dev/null +++ b/vendor/mask/facetrack.py @@ -0,0 +1,205 @@ +#!/usr/bin/env python3 +"""Real-time "motion face": track your face on the webcam and mirror it on the mask. + +Following the community approach, this does NOT stream raw pixels over BLE (too +slow). Instead it tracks your expression on the host with OpenCV and triggers the +pre-uploaded face frames with fast PLAY commands: + + mouth open -> talk1..talk3 head turn -> look_left / look_right + smile -> smile eyes shut -> blink else -> neutral + +Detection uses OpenCV Haar cascades (face / eyes / smile) -- no MediaPipe/dlib, +so nothing new is installed into g1_env. It's approximate (especially mouth +openness); for precise lip-sync, install mediapipe in a dedicated env and swap +the tracker (the mask side is unchanged). + +Usage: + python facetrack.py # webcam -> mask, mirrored + python facetrack.py --show # also open a preview window + python facetrack.py --no-mask --show # tune detection without the mask + python facetrack.py --image face.jpg # test detection on one image (offline) + python facetrack.py --camera 1 --address AA:BB:CC:DD:EE:FF +""" + +import argparse +import asyncio +import time + +import cv2 + +import colorface # noqa: F401 (kept so FaceAnimator's default frames import cleanly) +from faceanim import FaceAnimator +from mask import ShiningMask +from exceptions import MaskNotFound + +_HC = cv2.data.haarcascades + + +class ExpressionTracker: + """Maps a webcam frame to one of the mask's face-frame names.""" + + def __init__(self, *, mirror: bool = True, + mouth_levels=(0.16, 0.22, 0.28), look_thresh: float = 0.18): + self.mirror = mirror + self.mouth_levels = mouth_levels # smile-box height / face height -> 1/2/3 + self.look_thresh = look_thresh + self._face = cv2.CascadeClassifier(_HC + "haarcascade_frontalface_default.xml") + self._eye = cv2.CascadeClassifier(_HC + "haarcascade_eye.xml") + self._smile = cv2.CascadeClassifier(_HC + "haarcascade_smile.xml") + # smoothing / debounce state + self._look = 0 + self._blink_frames = 0 + + def detect(self, frame_bgr): + """Return (frame_name, annotated_bgr). frame_name is a FaceAnimator key.""" + if self.mirror: + frame_bgr = cv2.flip(frame_bgr, 1) + gray = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2GRAY) + faces = self._face.detectMultiScale(gray, 1.2, 5, minSize=(90, 90)) + if len(faces) == 0: + return "neutral", frame_bgr + x, y, w, h = max(faces, key=lambda f: f[2] * f[3]) + cv2.rectangle(frame_bgr, (x, y), (x + w, y + h), (0, 220, 255), 2) + + # --- head turn -> look (with hysteresis) --- + fcx = x + w / 2.0 + off = (fcx - frame_bgr.shape[1] / 2.0) / (frame_bgr.shape[1] / 2.0) + if off < -self.look_thresh: + self._look = -1 + elif off > self.look_thresh: + self._look = 1 + elif abs(off) < self.look_thresh * 0.5: + self._look = 0 + + # --- eyes -> blink (debounced) --- + eye_roi = gray[y:y + int(h * 0.55), x:x + w] + eyes = self._eye.detectMultiScale(eye_roi, 1.1, 6, + minSize=(int(w * 0.12), int(w * 0.12))) + self._blink_frames = self._blink_frames + 1 if len(eyes) == 0 else 0 + blink = self._blink_frames in (1, 2) # only the first couple of eyeless frames + + # --- mouth openness via the smile cascade on the lower face --- + my0, my1 = y + int(h * 0.55), y + h + mx0, mx1 = x + int(w * 0.15), x + int(w * 0.85) + mouth_roi = gray[my0:my1, mx0:mx1] + mouth_level, smiling, box = self._mouth(mouth_roi, h) + if box is not None: + bx, by, bw, bh = box + cv2.rectangle(frame_bgr, (mx0 + bx, my0 + by), + (mx0 + bx + bw, my0 + by + bh), (0, 255, 0), 1) + + # --- map to a frame name (priority order) --- + if blink: + name = "blink" + elif mouth_level >= 1: + name = f"talk{mouth_level}" + elif smiling: + name = "smile" + elif self._look < 0: + name = "look_left" + elif self._look > 0: + name = "look_right" + else: + name = "neutral" + + cv2.putText(frame_bgr, name, (x, max(0, y - 8)), + cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2) + return name, frame_bgr + + def _mouth(self, mouth_roi, face_h): + if mouth_roi.size == 0: + return 0, False, None + smiles = self._smile.detectMultiScale(mouth_roi, 1.7, 18, + minSize=(int(face_h * 0.18), int(face_h * 0.08))) + if len(smiles) == 0: + return 0, False, None + box = max(smiles, key=lambda s: s[2] * s[3]) + ratio = box[3] / float(face_h) # mouth-box height / face height + lo, mid, hi = self.mouth_levels + level = 3 if ratio >= hi else 2 if ratio >= mid else 1 if ratio >= lo else 0 + return level, True, box + + +async def run(args): + tracker = ExpressionTracker(mirror=not args.no_mirror) + + # offline single-image test path + if args.image: + img = cv2.imread(args.image) + if img is None: + print(f"could not read {args.image}") + return + name, annotated = tracker.detect(img) + print("detected expression:", name) + if args.show: + cv2.imshow("facetrack", annotated); cv2.waitKey(0); cv2.destroyAllWindows() + return + + cap = cv2.VideoCapture(args.camera) + if not cap.isOpened(): + print(f"could not open camera {args.camera}") + return + + face = None + mask = None + if not args.no_mask: + mask = ShiningMask(address=args.address, name_prefix=args.name_prefix) + print("connecting to mask ...") + try: + await mask.connect(timeout=20.0, attempts=8) + except MaskNotFound as exc: + print(f"could not find the mask: {exc}; running preview-only") + mask = None + if mask is not None: + await mask.set_brightness(args.brightness) + face = FaceAnimator(mask) + print("loading face frames (one-time) ...") + await face.load(force=args.reload) + + print("tracking — move/talk to your camera. Ctrl+C (or 'q' in the window) to stop.") + min_dt = 1.0 / args.fps + last = 0.0 + try: + while True: + ok, frame = cap.read() + if not ok: + break + name, annotated = tracker.detect(frame) + now = time.monotonic() + if face is not None and now - last >= min_dt: + await face.show(name) # PLAY the matching frame (deduped) + last = now + if args.show: + cv2.imshow("facetrack (q to quit)", annotated) + if cv2.waitKey(1) & 0xFF == ord("q"): + break + else: + await asyncio.sleep(0.001) + except KeyboardInterrupt: + pass + finally: + cap.release() + if args.show: + cv2.destroyAllWindows() + if mask is not None: + await mask.disconnect() + + +def main(): + ap = argparse.ArgumentParser(description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + ap.add_argument("--camera", type=int, default=0) + ap.add_argument("--show", action="store_true", help="open a preview window with overlay") + ap.add_argument("--no-mask", action="store_true", help="track/preview only, don't touch the mask") + ap.add_argument("--no-mirror", action="store_true", help="don't horizontally flip the camera") + ap.add_argument("--image", help="run detection on a single image file (offline test)") + ap.add_argument("--fps", type=float, default=10.0, help="max mask updates/sec") + ap.add_argument("--reload", action="store_true", help="force re-upload of the frame set") + ap.add_argument("--brightness", type=int, default=95) + ap.add_argument("--address", help="mask BLE MAC") + ap.add_argument("--name-prefix", default="MASK") + asyncio.run(run(ap.parse_args())) + + +if __name__ == "__main__": + main() diff --git a/vendor/mask/image2mask.py b/vendor/mask/image2mask.py new file mode 100644 index 0000000..565dfd5 --- /dev/null +++ b/vendor/mask/image2mask.py @@ -0,0 +1,111 @@ +#!/usr/bin/env python3 +"""Convert ANY image (or animated GIF/WEBP) and display it on the mask. + +Still image -> fit to the 46x58 oval display and show it. +Animated GIF -> upload its frames as DIY images and PLAY-loop them. + +Usage: + python image2mask.py photo.jpg + python image2mask.py logo.png --fit cover --oval + python image2mask.py dance.gif --max-frames 12 --fps 8 --loops 5 + python image2mask.py photo.jpg --save out.bin # just save raw bytes, no mask + python image2mask.py photo.jpg --preview # print ASCII preview, no mask + +Options: + --fit contain|cover|stretch how to fit the image (default contain) + --bg RRGGBB letterbox/oval background color (default 000000) + --oval black out corners to match the oval panel + --brightness N 0..255 (default 95) +""" + +import argparse +import asyncio +import sys + +import colorface +from mask import ShiningMask +from exceptions import MaskNotFound + + +def _hex(s): + s = s.lstrip("#") + return (int(s[0:2], 16), int(s[2:4], 16), int(s[4:6], 16)) + + +def _ascii(image): + img = image.convert("L") + ramp = " .:-=+*#%@" + out = [] + for y in range(0, colorface.DISPLAY_H, 2): + row = "".join(ramp[min(len(ramp) - 1, img.getpixel((x, y)) * len(ramp) // 256)] + for x in range(colorface.DISPLAY_W)) + out.append(row) + return "\n".join(out) + + +async def run(args): + frames = colorface.load_frames( + args.image, max_frames=args.max_frames, fit=args.fit, + bg=_hex(args.bg), oval=args.oval, + ) + print(f"loaded {len(frames)} frame(s) from {args.image} (fit={args.fit}, oval={args.oval})") + + if args.preview: + print(_ascii(frames[0])) + return + if args.save: + with open(args.save, "wb") as fh: + fh.write(colorface.encode(frames[0])) + print(f"saved raw frame ({colorface.DISPLAY_W}x{colorface.DISPLAY_H} -> 8004 B) to {args.save}") + return + + mask = ShiningMask(address=args.address, name_prefix=args.name_prefix) + print("connecting ...") + try: + await mask.connect(timeout=20.0, attempts=8) + except MaskNotFound as exc: + print(f"could not find the mask: {exc}") + return + try: + await mask.set_brightness(args.brightness) + if len(frames) == 1: + await mask.upload_raw_image(colorface.encode(frames[0]), index=1) + print("image shown on the mask.") + else: + print(f"uploading {len(frames)} frames (one-time, ~{len(frames) * 4}s) ...") + await mask.clear_diy() + for i, fr in enumerate(frames, start=1): + await mask.upload_frame(colorface.encode(fr), i) + print(f" frame {i}/{len(frames)}", flush=True) + print(f"animating {args.loops} loop(s) at {args.fps} fps (Ctrl+C to stop) ...") + try: + for _ in range(args.loops): + for i in range(1, len(frames) + 1): + await mask.play_frame(i) + await asyncio.sleep(1.0 / args.fps) + except KeyboardInterrupt: + pass + finally: + await mask.disconnect() + + +def main(): + ap = argparse.ArgumentParser(description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + ap.add_argument("image", help="path to any image or animated GIF/WEBP") + ap.add_argument("--fit", choices=["contain", "cover", "stretch"], default="contain") + ap.add_argument("--bg", default="000000", help="background RRGGBB") + ap.add_argument("--oval", action="store_true", help="black out corners (oval shape)") + ap.add_argument("--max-frames", type=int, default=16) + ap.add_argument("--fps", type=float, default=8.0) + ap.add_argument("--loops", type=int, default=10) + ap.add_argument("--brightness", type=int, default=95) + ap.add_argument("--save", help="save the raw 8004-byte frame to a file instead of uploading") + ap.add_argument("--preview", action="store_true", help="print an ASCII preview instead of uploading") + ap.add_argument("--address", help="connect to a specific BLE MAC") + ap.add_argument("--name-prefix", default="MASK") + asyncio.run(run(ap.parse_args())) + + +if __name__ == "__main__": + main() diff --git a/vendor/mask/main.py b/vendor/mask/main.py new file mode 100644 index 0000000..5d25ea3 --- /dev/null +++ b/vendor/mask/main.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python3 +"""Marcus animated face on the Shining LED mask. + +Connects to the mask, uploads a face frame-set once (DIY images, persisted on +the mask's flash), then runs a live animated face: idle blinking + glancing, and +a moving mouth while "speaking" -- all via fast PLAY commands (no upload logo). + +Usage: + python main.py # demo: idle, then a simulated talking burst + python main.py --reload # force re-uploading the frame set + python main.py --address AA:BB:CC:DD:EE:FF + python main.py --talk # start speaking immediately and stay talking + +Marcus / Sanad integration: + face = FaceAnimator(mask); await face.start() + face.set_speaking(True) # call when TTS playback starts + face.set_speaking(False) # call when it ends + # ...or face.set_mouth(0..3) from live audio amplitude for rough lip-sync. +""" + +import argparse +import asyncio + +from faceanim import FaceAnimator +from mask import ShiningMask +from exceptions import MaskNotFound + + +async def run(args): + mask = ShiningMask(address=args.address, name_prefix=args.name_prefix) + print("connecting to mask ...") + try: + await mask.connect(timeout=20.0, attempts=8) + except MaskNotFound as exc: + print(f"could not find the mask: {exc}") + return + print("connected.") + + face = FaceAnimator(mask, fps=args.fps, brightness=args.brightness) + print("loading face frames (one-time ~25s if not already on the mask) ...") + await face.start(reload=args.reload) + print("face is live. Ctrl+C to stop.") + + try: + if args.talk: + face.set_speaking(True) + while True: + await asyncio.sleep(1.0) + else: + # demo: alternate idle and 'speaking' so you can see both modes + while True: + print(" [idle] blinking / glancing ~6s") + await asyncio.sleep(6) + print(" [speaking] mouth animating ~6s") + face.set_speaking(True) + await asyncio.sleep(6) + face.set_speaking(False) + except KeyboardInterrupt: + print("\nstopping ...") + finally: + await face.stop() + await mask.disconnect() + + +def main(): + ap = argparse.ArgumentParser(description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + ap.add_argument("--address", help="connect to a specific BLE MAC instead of scanning") + ap.add_argument("--name-prefix", default="MASK") + ap.add_argument("--reload", action="store_true", help="force re-upload of the frame set") + ap.add_argument("--talk", action="store_true", help="start in talking mode and stay there") + ap.add_argument("--fps", type=float, default=8.0) + ap.add_argument("--brightness", type=int, default=95) + asyncio.run(run(ap.parse_args())) + + +if __name__ == "__main__": + main() diff --git a/vendor/mask/mask.py b/vendor/mask/mask.py new file mode 100644 index 0000000..ff2681f --- /dev/null +++ b/vendor/mask/mask.py @@ -0,0 +1,331 @@ +"""High-level async controller for the Shining Mask. + +Example:: + + import asyncio + from mask import ShiningMask + + async def main(): + async with ShiningMask() as mask: + await mask.set_brightness(80) + await mask.show_image(3) + await mask.set_text("HELLO", color=(0, 255, 0)) + + asyncio.run(main()) +""" + +from __future__ import annotations + +import asyncio +import logging +from typing import Optional, Sequence, Tuple + +import bitmap, protocol +from constants import TextMode, UploadKind +from exceptions import UploadError +from transport import MaskTransport + +log = logging.getLogger("shiningmask.mask") + +Color = Tuple[int, int, int] + + +class ShiningMask: + """Async controller for one Shining Mask over BLE.""" + + def __init__( + self, + address: Optional[str] = None, + *, + name_prefix: str = "MASK", + adapter: Optional[str] = None, + write_with_response: Optional[bool] = None, + transport: Optional[MaskTransport] = None, + ): + self.transport = transport or MaskTransport( + address, + name_prefix=name_prefix, + adapter=adapter, + write_with_response=write_with_response, + ) + self._upload_lock = asyncio.Lock() + + # -- lifecycle ------------------------------------------------------------ + + async def connect(self, timeout: float = 15.0, attempts: int = 3): + await self.transport.connect(timeout=timeout, attempts=attempts) + return self + + async def disconnect(self): + await self.transport.disconnect() + + async def __aenter__(self): + await self.connect() + return self + + async def __aexit__(self, *exc): + await self.disconnect() + + @property + def is_connected(self) -> bool: + return self.transport.is_connected + + # -- simple control commands --------------------------------------------- + + async def set_brightness(self, level: int): + await self.transport.write_command(protocol.cmd_brightness(level)) + + async def show_image(self, image_id: int): + """Show a built-in image (IMAG).""" + await self.transport.write_command(protocol.cmd_image(image_id)) + + async def play_animation(self, anim_id: int): + """Play a built-in animation (ANIM).""" + await self.transport.write_command(protocol.cmd_animation(anim_id)) + + async def play_diy(self, image_id: int): + """Show one uploaded DIY image (PLAY).""" + await self.transport.write_command(protocol.cmd_play_diy(image_id)) + + async def play_diy_sequence(self, image_ids: Sequence[int]): + await self.transport.write_command(protocol.cmd_play_diy_sequence(list(image_ids))) + + async def delete_diy(self, image_ids: Sequence[int]): + await self.transport.write_command(protocol.cmd_delete_diy(list(image_ids))) + + async def get_diy_count(self, timeout: float = 5.0) -> Optional[int]: + """Ask how many DIY images are stored (CHEC). Returns None if no answer.""" + await self.transport.write_command(protocol.cmd_check_diy_count()) + try: + _, raw = await self._collect_notification("CHEC", timeout=timeout) + except Exception: + return None + import crypto + buf = crypto.decrypt(raw) + # Response frame is [len]['CHEC'][count]; the count sits right after the + # 4-byte command name (NOT after the total length byte buf[0]). + idx = 1 + len("CHEC") + return buf[idx] if len(buf) > idx else None + + # -- text styling --------------------------------------------------------- + + async def set_text_mode(self, mode: int): + await self.transport.write_command(protocol.cmd_text_mode(mode)) + + async def set_text_speed(self, speed: int): + await self.transport.write_command(protocol.cmd_text_speed(speed)) + + async def set_text_color_effect(self, mode: int, enable: bool = True): + await self.transport.write_command(protocol.cmd_text_color_effect(mode, enable)) + + async def set_foreground_color(self, r: int, g: int, b: int, enable: bool = True): + await self.transport.write_command(protocol.cmd_text_fg_color(r, g, b, enable)) + + async def set_background_color(self, r: int, g: int, b: int, enable: bool = True): + await self.transport.write_command(protocol.cmd_text_bg_color(r, g, b, enable)) + + # -- text / bitmap upload ------------------------------------------------- + + async def set_text( + self, + text: str, + *, + color: Color = (255, 255, 255), + mode: Optional[int] = TextMode.SCROLL_LEFT, + speed: Optional[int] = None, + clear_effects: bool = True, + timeout: float = 10.0, + **render_kwargs, + ): + """Render ``text`` to a bitmap and upload + display it. + + ``color`` sets the text color. ``mode`` is the scroll style (default + scroll-left; pass ``TextMode.STEADY`` to hold it still). With + ``clear_effects`` we disable the ``M`` special effect / animated + background so the mask stops cycling to other animations, and the + scroll ``mode``/``speed`` are pinned *after* the upload so they stick. + """ + bmp, color_array = bitmap.build_text_upload(text, color=color, **render_kwargs) + await self.upload_bitmap(bmp, color_array, timeout=timeout) + await self._apply_display_state(color, mode, speed, clear_effects) + + async def _apply_display_state(self, color, mode, speed, clear_effects): + """Pin a clean, steady display state after a bitmap upload.""" + if clear_effects: + # Turn off the M special-effect/background-image carousel and force a + # black background so nothing animates behind the bitmap. + await self.set_text_color_effect(0, enable=False) + await self.set_background_color(0, 0, 0, enable=True) + # Color the foreground the way the app does (FC), in addition to the + # per-column color array already in the upload. + await self.set_foreground_color(*color, enable=True) + if mode is not None: + await self.set_text_mode(mode) + if speed is not None: + await self.set_text_speed(speed) + + async def upload_bitmap(self, bitmap_bytes: bytes, color_array: bytes, *, timeout: float = 10.0): + """Upload a raw mask bitmap + color array using the DATS/REOK/DATCP flow.""" + payload = bytes(bitmap_bytes) + bytes(color_array) + await self._upload( + payload, + second_field=len(bitmap_bytes), + kind=UploadKind.BITMAP, + timeout=timeout, + ) + + async def clear_diy(self, timeout: float = 4.0) -> int: + """Delete all stored DIY images. Returns how many were removed.""" + n = await self.get_diy_count(timeout=timeout) or 0 + ids = list(range(1, n + 1)) + for i in range(0, len(ids), 10): # DELE takes at most 10 ids + await self.delete_diy(ids[i:i + 10]) + await asyncio.sleep(0.25) + return n + + async def upload_frame(self, data: bytes, slot: int, **kwargs): + """Upload one DIY image frame to ``slot`` (see :meth:`upload_raw_image`).""" + await self.upload_raw_image(bytes(data), index=slot, **kwargs) + + async def play_frame(self, slot: int): + """Show a stored DIY image instantly (alias of :meth:`play_diy`).""" + await self.play_diy(slot) + + async def upload_raw_image(self, data: bytes, *, index: int = 1, + timestamp: int = 0x07C5F9FF, chunk_delay: float = 0.05, + init_delay: float = 0.15): + """Upload a raw DIY image/animation to the mask (fire-and-forget). + + This is the method that actually works for full-color custom content + (verified against BishopFox/shining-mask): an image ``DATS`` (toggle + ``0x01``) is sent, then every data chunk is streamed WITHOUT waiting for + ``DATSOK``/``REOK``, then ``DATCP`` with a 4-byte timestamp. The content + displays immediately and is stored as DIY image(s) that :meth:`play_diy` + can replay smoothly (no upload logo). + + ``data`` must already be in the mask's internal image format (a + multi-frame blob is played as an animation). Generating that format for + arbitrary images is not yet solved -- see the project notes. + """ + if self._upload_lock.locked(): + raise UploadError("an upload is already in progress") + async with self._upload_lock: + dats_args = len(data).to_bytes(2, "big") + int(index).to_bytes(2, "big") + b"\x01" + await self.transport.write_command(protocol.encode_command("DATS", dats_args)) + await asyncio.sleep(init_delay) + for packet in protocol.iter_upload_packets(data): # unpadded, like the app + await self.transport.write_upload(packet) + await asyncio.sleep(chunk_delay) + await self.transport.write_command( + protocol.encode_command("DATCP", int(timestamp).to_bytes(4, "big")) + ) + + async def upload_image(self, rgb_bytes: bytes, image_index: int, *, timeout: float = 15.0): + """Upload a full-color DIY image (raw RGB) to a storage slot. + + Experimental: mask-go does not implement image upload, so this follows + ble-protocol.md only. ``rgb_bytes`` is 3 bytes per pixel; the mask is 16 + rows tall, so length should be ``16 * width * 3``. + """ + await self._upload( + bytes(rgb_bytes), + second_field=image_index, + kind=UploadKind.IMAGE, + timeout=timeout, + ) + + async def _upload(self, payload: bytes, *, second_field: int, kind: int, timeout: float, + wait_per_packet: bool = True): + total_len = len(payload) + if total_len == 0: + raise UploadError("nothing to upload (empty payload)") + if self._upload_lock.locked(): + raise UploadError("an upload is already in progress") + async with self._upload_lock: + log.info("upload start: %d bytes (kind=%d)", total_len, kind) + + await self.transport.write_command( + protocol.cmd_dats(total_len, second_field, kind) + ) + await self.transport.wait_for(protocol.RESP_DATS_OK, timeout=timeout) + + # Image uploads use fixed 100-byte packets (ble-protocol.md); bitmap + # uploads are unpadded (matching mask-go). + pad_to = 100 if kind == UploadKind.IMAGE else None + for packet in protocol.iter_upload_packets(payload, pad_to=pad_to): + await self.transport.write_upload(packet) + # wait_per_packet=False streams packets without the per-packet + # REOK round-trip (much faster, used for animation). The REOKs + # still arrive and are drained by the DATCPOK wait below. + if wait_per_packet: + await self.transport.wait_for(protocol.RESP_REC_OK, timeout=timeout) + + await self.transport.write_command(protocol.cmd_datcp()) + await self.transport.wait_for(protocol.RESP_DATCP_OK, timeout=timeout) + log.info("upload complete") + + # -- faces ----------------------------------------------------------------- + + async def setup_face_mode(self, brightness: int = 80, mono_color: Optional[Color] = None): + """Put the mask in a clean state for drawing faces/images. + + Sets a black background, disables the M effect, and selects steady + (non-scrolling) display. If ``mono_color`` is given, the whole face is + colored uniformly via FC (so frames can be uploaded as bitmap-only -- + half the bytes, much faster/more reliable); otherwise FC is disabled so + per-column colors drive the pixels. + """ + await self.set_brightness(brightness) + await self.set_text_color_effect(0, enable=False) # M effect off + await self.set_background_color(0, 0, 0, enable=True) # black background + if mono_color is not None: + await self.set_foreground_color(*mono_color, enable=True) + else: + await self.set_foreground_color(255, 255, 255, enable=False) + await self.set_text_mode(TextMode.STEADY) + + async def show_face(self, columns, color: Color = (0, 220, 255), *, + per_column_colors=None, mono: bool = False, + fast: bool = True, timeout: float = 8.0): + """Display a face/bitmap given as columns (each 16 ints, 0/1). + + ``mono`` uploads just the bitmap (no color array) -- use it after + ``setup_face_mode(mono_color=...)`` for the fastest, most reliable frames + (half the packets). Otherwise ``color`` (single) or ``per_column_colors`` + (multi) build a color array. With ``fast`` the frame is streamed without + per-packet acks (for animation). + """ + bmp = bitmap.encode_bitmap(columns) + if mono: + payload = bytes(bmp) # color comes from FC + elif per_column_colors is not None: + payload = bytes(bmp) + bitmap.encode_color_array_per_column(per_column_colors) + else: + payload = bytes(bmp) + bitmap.encode_color_array(len(columns), color) + await self._upload( + payload, + second_field=len(bmp), + kind=UploadKind.BITMAP, + timeout=timeout, + wait_per_packet=not fast, + ) + + # -- audio visualization (experimental) ----------------------------------- + + async def send_audio_frame(self, mode: int, nibbles: Sequence[int]): + await self.transport.write_audio(protocol.cmd_audio_frame(mode, list(nibbles))) + + # -- internals ------------------------------------------------------------ + + async def _collect_notification(self, *prefixes: str, timeout: float): + """Wait for a matching notification and also return its raw bytes.""" + loop = asyncio.get_running_loop() + deadline = loop.time() + timeout + q = self.transport._notify_queue # noqa: SLF001 - intra-package access + while True: + remaining = deadline - loop.time() + if remaining <= 0: + raise TimeoutError(prefixes) + token, raw = await asyncio.wait_for(q.get(), timeout=remaining) + if any(protocol.matches(token, p) for p in prefixes): + return token, raw diff --git a/vendor/mask/preview_text.py b/vendor/mask/preview_text.py new file mode 100644 index 0000000..4cadd7e --- /dev/null +++ b/vendor/mask/preview_text.py @@ -0,0 +1,26 @@ +"""Offline preview: render text the way the mask will, printed as ASCII art. + +No BLE / no hardware needed -- handy for checking how a string will look and for +confirming the renderer works on this machine:: + + python examples/preview_text.py "HELLO" +""" + +import sys + +import bitmap + + +def main(): + text = sys.argv[1] if len(sys.argv) > 1 else "HELLO" + columns = bitmap.text_to_columns(text) + width = len(columns) + bmp = bitmap.encode_bitmap(columns) + print(f"text={text!r} width={width}px bitmap={len(bmp)}B payload={len(bmp) + width * 3}B\n") + for row in range(16): + line = "".join("#" if columns[x][row] else " " for x in range(width)) + print(line) + + +if __name__ == "__main__": + main() diff --git a/vendor/mask/protocol.py b/vendor/mask/protocol.py new file mode 100644 index 0000000..a7d6f40 --- /dev/null +++ b/vendor/mask/protocol.py @@ -0,0 +1,244 @@ +"""Stateless encoders/decoders for the Shining Mask BLE protocol. + +This module contains *zero* I/O and *zero* BLE dependencies, so it can be unit +tested without hardware (and without ``bleak`` installed). Every ``cmd_*`` +helper returns a ready-to-write, AES-encrypted 16-byte block for the command +characteristic. Upload data packets (sent unencrypted to the upload +characteristic) are produced by :func:`upload_packet`. + +Command frame layout (before encryption), padded with 0x00 to 16 bytes:: + + [ len ][ ASCII command name ][ args ... ][ 0x00 padding ... ] + ^-- len = len(name) + len(args) + +Notable divergences from mask-go (the field-tested Go reference), all chosen to +match the *real app traffic* documented in ble-protocol.md: + + * ``IMAG`` / ``ANIM`` use ``len = 5`` (name 4 + arg 1). mask-go hard-codes 6, + which also works only because the extra byte lands in zero padding. + * Background color command is ``BC`` (as captured from the app), not mask-go's + ``BG`` typo. + * Notification tokens are matched by *prefix*. (The doc *labels* the upload + acks ``DATOK``/``REOKOK``, but its own hex decodes to ``DATSOK``/``REOKOK``, + matching mask-go's observed ``DATSOK``/``REOK``. We match the real wire + tokens ``DATSOK`` and ``REOK``.) +""" + +from __future__ import annotations + +from typing import Optional + +import crypto +from constants import FRAME_SIZE, UPLOAD_MAX_DATA, UploadKind + + +# --------------------------------------------------------------------------- +# Command frame construction +# --------------------------------------------------------------------------- + +def build_frame(name: str, args: bytes = b"") -> bytes: + """Build a single *unencrypted* 16-byte command frame. + + ``len`` byte = number of (command name + argument) bytes, per the protocol + spec. The remainder is zero padding (the mask uses AES-ECB, so padding is + just zeros -- there is no IV/nonce). + """ + name_b = name.encode("ascii") + args = bytes(args) + body = bytes([len(name_b) + len(args)]) + name_b + args + if len(body) > FRAME_SIZE: + raise ValueError( + f"command {name!r} with {len(args)} arg bytes exceeds {FRAME_SIZE}-byte frame" + ) + return body.ljust(FRAME_SIZE, b"\x00") + + +def encode_command(name: str, args: bytes = b"") -> bytes: + """Build and AES-encrypt a command frame for the command characteristic.""" + return crypto.encrypt(build_frame(name, args)) + + +def _u8(value: int, what: str) -> int: + if not 0 <= value <= 255: + raise ValueError(f"{what} must be 0..255, got {value}") + return value + + +# --------------------------------------------------------------------------- +# Control commands (all return encrypted 16-byte blocks) +# --------------------------------------------------------------------------- + +def cmd_brightness(level: int) -> bytes: + """LIGHT: set brightness 0..255 (the doc recommends keeping it <= ~100).""" + return encode_command("LIGHT", bytes([_u8(level, "brightness")])) + + +def cmd_image(image_id: int) -> bytes: + """IMAG: show a built-in image by id.""" + return encode_command("IMAG", bytes([_u8(image_id, "image id")])) + + +def cmd_animation(anim_id: int) -> bytes: + """ANIM: play a built-in animation by id.""" + return encode_command("ANIM", bytes([_u8(anim_id, "animation id")])) + + +def cmd_play_diy(image_id: int) -> bytes: + """PLAY: show a single uploaded DIY image (count=1).""" + return encode_command("PLAY", bytes([1, _u8(image_id, "DIY image id")])) + + +def cmd_play_diy_sequence(image_ids: "list[int]") -> bytes: + """PLAY: play several uploaded DIY images in order (max 10).""" + if not 1 <= len(image_ids) <= 10: + raise ValueError("PLAY accepts between 1 and 10 DIY image ids") + ids = bytes(_u8(i, "DIY image id") for i in image_ids) + return encode_command("PLAY", bytes([len(image_ids)]) + ids) + + +def cmd_delete_diy(image_ids: "list[int]") -> bytes: + """DELE: delete the given DIY images from the mask (max 10).""" + if not 1 <= len(image_ids) <= 10: + raise ValueError("DELE accepts between 1 and 10 DIY image ids") + ids = bytes(_u8(i, "DIY image id") for i in image_ids) + return encode_command("DELE", bytes([len(image_ids)]) + ids) + + +def cmd_check_diy_count() -> bytes: + """CHEC: ask how many DIY images are stored (answer arrives on notify).""" + return encode_command("CHEC") + + +def cmd_text_mode(mode: int) -> bytes: + """MODE: scroll/animation mode for displayed text (see constants.TextMode).""" + return encode_command("MODE", bytes([_u8(mode, "text mode")])) + + +def cmd_text_speed(speed: int) -> bytes: + """SPEED: text scroll speed 0..255.""" + return encode_command("SPEED", bytes([_u8(speed, "text speed")])) + + +def cmd_text_color_effect(mode: int, enable: bool = True) -> bytes: + """M: special text color effect / background image (see ble-protocol.md).""" + return encode_command("M", bytes([1 if enable else 0, _u8(mode, "color effect")])) + + +def cmd_text_fg_color(r: int, g: int, b: int, enable: bool = True) -> bytes: + """FC: foreground (text) color in RGB.""" + return encode_command( + "FC", bytes([1 if enable else 0, _u8(r, "r"), _u8(g, "g"), _u8(b, "b")]) + ) + + +def cmd_text_bg_color(r: int, g: int, b: int, enable: bool = True) -> bytes: + """BC: background color in RGB (set to black to disable M's background images).""" + return encode_command( + "BC", bytes([1 if enable else 0, _u8(r, "r"), _u8(g, "g"), _u8(b, "b")]) + ) + + +# --------------------------------------------------------------------------- +# Upload commands +# --------------------------------------------------------------------------- + +def cmd_dats(total_len: int, second_field: int, kind: int) -> bytes: + """DATS: announce an upload. + + For a text *bitmap* (kind=BITMAP), ``second_field`` is the bitmap length in + bytes (so the mask knows where the appended color array starts). For a + full-color *image* (kind=IMAGE), it is the destination image index. Both + 16-bit fields are big-endian. + """ + if not 0 <= total_len <= 0xFFFF: + raise ValueError("total_len out of 16-bit range") + if not 0 <= second_field <= 0xFFFF: + raise ValueError("second_field out of 16-bit range") + args = total_len.to_bytes(2, "big") + second_field.to_bytes(2, "big") + bytes([_u8(kind, "kind")]) + return encode_command("DATS", args) + + +def cmd_datcp() -> bytes: + """DATCP: finish/commit an upload. (mask-go omits the optional unix time.)""" + return encode_command("DATCP") + + +def upload_packet(packet_index: int, chunk: bytes, pad_to: "Optional[int]" = None) -> bytes: + """Build one *unencrypted* upload data packet for the upload characteristic. + + Layout: ``[ data_len+1 ][ packet_index ][ chunk... ]`` where ``data_len`` is + ``len(chunk)``. The leading length byte counts the packet-index byte plus + the data bytes. ``packet_index`` starts at 0 and increments per packet. + + For *bitmap* (text) uploads packets are NOT padded -- this matches mask-go, + and the length byte already delimits the real content. For *image* uploads + ble-protocol.md specifies fixed 100-byte packets, so pass ``pad_to=100``. + """ + if not 0 <= packet_index <= 255: + raise ValueError("packet_index must fit in one byte") + if len(chunk) > UPLOAD_MAX_DATA: + raise ValueError(f"chunk too large: {len(chunk)} > {UPLOAD_MAX_DATA}") + packet = bytes([len(chunk) + 1, packet_index]) + bytes(chunk) + if pad_to is not None and len(packet) < pad_to: + packet = packet.ljust(pad_to, b"\x00") + return packet + + +def iter_upload_packets(buffer: bytes, pad_to: "Optional[int]" = None): + """Yield successive :func:`upload_packet` blocks covering ``buffer``.""" + index = 0 + for start in range(0, len(buffer), UPLOAD_MAX_DATA): + yield upload_packet(index, buffer[start:start + UPLOAD_MAX_DATA], pad_to=pad_to) + index += 1 + + +# --------------------------------------------------------------------------- +# Audio visualization (optional / experimental -- per ble-protocol.md) +# --------------------------------------------------------------------------- + +def cmd_audio_frame(mode: int, nibbles: "list[int]") -> bytes: + """Build an encrypted audio-visualization frame for the audio characteristic. + + ``mode`` selects the visual style (0..4). ``nibbles`` is up to 28 row + intensities (0x0..0xf), packed two per byte into 14 bytes. Frame layout: + ``[ 0x0f ][ mode ][ 14 packed bytes ]`` = 16 bytes, then AES-encrypted. + """ + if len(nibbles) > 28: + raise ValueError("at most 28 nibbles (14 bytes) of visualization data") + rows = list(nibbles) + [0] * (28 - len(nibbles)) + packed = bytes((rows[i] & 0x0F) << 4 | (rows[i + 1] & 0x0F) for i in range(0, 28, 2)) + frame = bytes([0x0F, _u8(mode, "audio mode")]) + packed + return crypto.encrypt(frame) + + +# --------------------------------------------------------------------------- +# Notification parsing +# --------------------------------------------------------------------------- + +def parse_notification(data: bytes) -> str: + """Decrypt a notify-characteristic payload and return its ASCII token. + + The first decrypted byte is the token length; the token follows. Returns an + empty string if the payload can't be interpreted. + """ + if not data or len(data) % FRAME_SIZE != 0: + return "" + buf = crypto.decrypt(bytes(data)) + n = buf[0] + if n == 0 or n > len(buf) - 1: + return "" + return buf[1:1 + n].decode("ascii", "ignore") + + +# Upload-handshake response tokens, matched by prefix. These are the real wire +# tokens (the doc's "DATOK" label is a typo -- its own hex decodes to "DATSOK"). +RESP_DATS_OK = "DATSOK" # ack of DATS (start of upload) +RESP_REC_OK = "REOK" # ack of an upload packet (doc spells the full token "REOKOK") +RESP_DATCP_OK = "DATCPOK" # ack of DATCP -> upload complete +RESP_PLAY_OK = "PLAYOK" +RESP_DELE_OK = "DELEOK" + + +def matches(token: str, expected: str) -> bool: + """Prefix match for notification tokens (tolerant of spelling variants).""" + return token.startswith(expected) diff --git a/vendor/mask/scan.py b/vendor/mask/scan.py new file mode 100644 index 0000000..fe20b87 --- /dev/null +++ b/vendor/mask/scan.py @@ -0,0 +1,54 @@ +"""Diagnostic BLE scan: list nearby devices and flag likely Shining Masks. + +No connection is made -- this only listens for advertisements, so it is safe to +run any time:: + + python examples/scan.py +""" + +import asyncio + +from bleak import BleakScanner + +import constants as C + + +async def main(timeout: float = 12.0): + print(f"Scanning {timeout:.0f}s for BLE devices " + f"(looking for name '{C.DEVICE_NAME_PREFIX}*' or service {C.SERVICE_UUID})...\n") + found = await BleakScanner.discover(timeout=timeout, return_adv=True) + + rows = [] + masks = [] + for address, (dev, adv) in found.items(): + name = adv.local_name or dev.name or "(no name)" + svcs = [s.lower() for s in (adv.service_uuids or [])] + is_mask = ( + str(name).upper().startswith(C.DEVICE_NAME_PREFIX) + or C.SERVICE_UUID.lower() in svcs + or any("fff0" in s for s in svcs) + ) + rows.append((adv.rssi if adv.rssi is not None else -999, dev.address, name, svcs, is_mask)) + if is_mask: + masks.append((dev.address, name)) + + for rssi, address, name, svcs, is_mask in sorted(rows, reverse=True): + flag = " <== LIKELY MASK" if is_mask else "" + print(f" {address} rssi={rssi:>4} name={name!r} services={svcs}{flag}") + + print() + if masks: + print("Likely mask(s) found:") + for address, name in masks: + print(f" address={address} name={name!r}") + print("\nConnect with, e.g.:") + print(f" python cli.py --address {masks[0][0]} light 80") + else: + print("No mask detected. Checklist:") + print(" * Power the mask on (and take it out of the phone app / disconnect the app).") + print(" * REMOVE it from the OS Bluetooth settings if you tried to 'pair' it there.") + print(" * Make sure Bluetooth is on and no other process holds the adapter.") + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/vendor/mask/talking.py b/vendor/mask/talking.py new file mode 100644 index 0000000..527be13 --- /dev/null +++ b/vendor/mask/talking.py @@ -0,0 +1,145 @@ +"""Animated talking face for the mask. + +Runs a background loop that blinks periodically and moves the mouth while +"speaking" by swapping face frames. Frame rate is bounded by the BLE upload +(~2-4 fps), so this gives a clearly *talking* face rather than smooth lip-sync. + +Two ways to drive it: + + * ``set_speaking(True/False)`` around a TTS utterance -- the loop auto-animates + the mouth while speaking and closes it when silent. + * ``set_mouth_level(0..4)`` from live audio amplitude for rough lip-sync (this + switches off the auto mouth animation and uses your values directly). + +Example:: + + async with ShiningMask() as mask: + face = await TalkingFace(mask).start() + face.set_speaking(True) # ... while Marcus talks ... + await asyncio.sleep(5) + face.set_speaking(False) + await face.stop() +""" + +from __future__ import annotations + +import asyncio +import random +import time +from typing import Optional + +import faces + +Color = "tuple[int, int, int]" + +# mouth shapes cycled while speaking (auto mode): clear open/close so the motion +# reads as talking even at the low (~2 fps) re-upload frame rate. +_TALK_CYCLE = [4, 0, 3, 0, 4, 1, 2, 0] + + +class TalkingFace: + def __init__(self, mask, *, color=(0, 220, 255), width: int = faces.DISPLAY_WIDTH, + brightness: int = 80, tick: float = 0.05, fast: bool = False): + self.mask = mask + self.color = color + self.width = width + self.brightness = brightness + self.tick = tick + # Reliable (per-packet ack) uploads actually commit each frame; fast + # streamed uploads drop frames at weak signal, so default to reliable. + self._fast = fast + + self._eyes = True + self._mouth = 0 + self._speaking = False + self._auto = True # auto-animate mouth from _speaking + self._blink = True + self._last = None + self._task: Optional[asyncio.Task] = None + self._stop = False + + # -- lifecycle ------------------------------------------------------------ + + async def start(self): + await self.mask.setup_face_mode(brightness=self.brightness) + await self._render(force=True) + self._stop = False + self._task = asyncio.create_task(self._loop()) + return self + + async def stop(self, *, neutral: bool = True): + self._stop = True + if self._task: + self._task.cancel() + try: + await self._task + except asyncio.CancelledError: + pass + self._task = None + if neutral: + self._eyes, self._mouth = True, 0 + await self._render(force=True) + + async def __aenter__(self): + return await self.start() + + async def __aexit__(self, *exc): + await self.stop() + + # -- control -------------------------------------------------------------- + + def set_speaking(self, speaking: bool): + """Auto-animate the mouth while speaking (closes when False).""" + self._auto = True + self._speaking = bool(speaking) + + def set_mouth_level(self, level: int): + """Drive the mouth directly (0=closed..4=wide) e.g. from audio amplitude.""" + self._auto = False + self._mouth = max(0, min(4, int(level))) + + def set_blink(self, enabled: bool): + self._blink = bool(enabled) + + def set_color(self, color): + self.color = color + self._last = None # force re-render on next tick + + # -- internals ------------------------------------------------------------ + + async def _render(self, force: bool = False): + key = (self._eyes, self._mouth, self.color) + if not force and key == self._last: + return + cols = faces.face(mouth=self._mouth, eyes=self._eyes, width=self.width) + try: + await self.mask.show_face(cols, color=self.color, fast=self._fast) + self._last = key + except Exception: + # tolerate transient BLE hiccups mid-animation; retry next tick + self._last = None + + async def _loop(self): + next_blink = time.monotonic() + random.uniform(2.0, 5.0) + cycle_i = 0 + while not self._stop: + now = time.monotonic() + + # periodic blink (quick close/open) + if self._blink and self._eyes and now >= next_blink: + self._eyes = False + await self._render() + await asyncio.sleep(0.12) + self._eyes = True + next_blink = now + random.uniform(2.5, 5.5) + + # mouth + if self._auto: + if self._speaking: + self._mouth = _TALK_CYCLE[cycle_i % len(_TALK_CYCLE)] + cycle_i += 1 + else: + self._mouth = 0 + + await self._render() + await asyncio.sleep(self.tick) diff --git a/vendor/mask/transport.py b/vendor/mask/transport.py new file mode 100644 index 0000000..2a0fcfb --- /dev/null +++ b/vendor/mask/transport.py @@ -0,0 +1,200 @@ +"""BLE transport for the Shining Mask, built on bleak (BlueZ on Linux). + +This layer knows nothing about the command semantics -- it scans, connects, +writes raw bytes to a characteristic, and exposes decrypted notification tokens +through an asyncio queue. The protocol logic lives in :mod:`shiningmask.mask`. + +``bleak`` is imported lazily so the rest of the package (and its tests) work +without it installed. +""" + +from __future__ import annotations + +import asyncio +import logging +from typing import Optional + +import constants as C +import protocol +from exceptions import MaskNotConnected, MaskNotFound, NotificationTimeout + +log = logging.getLogger("shiningmask.transport") + + +class MaskTransport: + """Manage the BLE connection and raw characteristic I/O for one mask.""" + + def __init__( + self, + address: Optional[str] = None, + *, + name_prefix: str = C.DEVICE_NAME_PREFIX, + adapter: Optional[str] = None, + write_with_response: Optional[bool] = None, + upload_with_response: bool = False, + ): + self.address = address + self.name_prefix = name_prefix + self.adapter = adapter + # None -> let bleak pick write-with/without-response from the char's + # advertised properties. True/False force a mode. + self.write_with_response = write_with_response + # Image-upload chunks use write-WITHOUT-response (fire-and-forget): the + # mask's upload characteristic does NOT reply to acknowledged writes, so + # write-with-response HANGS waiting for an ACK that never comes. The + # corruption risk at a weak signal is mitigated by keeping the mask close + # for the one-time upload + the resilient per-frame reconnect. + self.upload_with_response = upload_with_response + self._client = None # bleak.BleakClient + self._notify_queue: "asyncio.Queue[tuple[str, bytes]]" = asyncio.Queue() + + def _adapter_kwargs(self) -> dict: + return {"adapter": self.adapter} if self.adapter else {} + + # -- connection lifecycle ------------------------------------------------- + + async def scan(self, timeout: float = 15.0): + """Return a bleak device for the first mask seen, or raise MaskNotFound.""" + from bleak import BleakScanner + + prefix = self.name_prefix.upper() + service = C.SERVICE_UUID.lower() + target = self.address.upper() if self.address else None + + def _match(device, adv) -> bool: + # When a specific address is requested, match only that (an active + # filter scan is more reliable than find_device_by_address for weak + # / infrequently-advertising devices). + if target is not None: + return str(device.address or "").upper() == target + name = str(adv.local_name or device.name or "") + if name.upper().startswith(prefix): + return True + # Fall back to the advertised vendor service UUID (more reliable than + # the name -- some clones advertise an empty/garbled local name). + svcs = [s.lower() for s in (adv.service_uuids or [])] + return service in svcs or any("fff0" in s for s in svcs) + + device = await BleakScanner.find_device_by_filter( + _match, timeout=timeout, **self._adapter_kwargs() + ) + if device is None: + who = f"address {self.address}" if target else f"name starting with {prefix!r}" + raise MaskNotFound(f"no mask matching {who} found in {timeout:.0f}s " + "(power it on, bring it closer, and free it from any phone)") + return device + + async def connect(self, timeout: float = 15.0, attempts: int = 3): + """Scan, connect, and subscribe to notifications. + + Retries up to ``attempts`` times -- on a marginal link the scan or GATT + connect can intermittently time out. + """ + from bleak import BleakClient + from bleak.exc import BleakError + + last_exc: Optional[BaseException] = None + for i in range(attempts): + try: + device = await self.scan(timeout=timeout) + log.info("connecting to mask %s (attempt %d/%d)", + getattr(device, "address", device), i + 1, attempts) + self._client = BleakClient(device, **self._adapter_kwargs()) + await self._client.connect() + # Drain any stale notifications from a previous session. + while not self._notify_queue.empty(): + self._notify_queue.get_nowait() + await self._client.start_notify(C.NOTIFY_CHAR_UUID, self._on_notify) + log.info("connected and subscribed to notifications") + return + except (MaskNotFound, asyncio.TimeoutError, BleakError, OSError) as exc: + last_exc = exc + log.warning("connect attempt %d/%d failed: %r", i + 1, attempts, exc) + try: + if self._client is not None: + await self._client.disconnect() + except Exception: + pass + self._client = None + raise last_exc if last_exc else MaskNotFound("could not connect") + + async def disconnect(self): + if self._client is not None: + try: + await self._client.disconnect() + finally: + self._client = None + + @property + def is_connected(self) -> bool: + return self._client is not None and self._client.is_connected + + def _require(self): + if not self.is_connected: + raise MaskNotConnected("mask is not connected") + return self._client + + # -- notifications -------------------------------------------------------- + + def _on_notify(self, _char, data: bytearray): + token = protocol.parse_notification(bytes(data)) + log.debug("notify token=%r raw=%s", token, bytes(data).hex()) + self._notify_queue.put_nowait((token, bytes(data))) + + async def wait_for(self, *expected_prefixes: str, timeout: float = 5.0) -> str: + """Wait until a notification whose token starts with one of the prefixes. + + Non-matching notifications are discarded. Raises NotificationTimeout. + """ + loop = asyncio.get_running_loop() + deadline = loop.time() + timeout + while True: + remaining = deadline - loop.time() + if remaining <= 0: + raise NotificationTimeout( + f"timed out waiting for {expected_prefixes!r}" + ) + try: + token, _ = await asyncio.wait_for(self._notify_queue.get(), timeout=remaining) + except asyncio.TimeoutError: + raise NotificationTimeout(f"timed out waiting for {expected_prefixes!r}") + if any(protocol.matches(token, p) for p in expected_prefixes): + return token + log.debug("ignoring notification %r while waiting for %r", token, expected_prefixes) + + # -- raw writes ----------------------------------------------------------- + + async def _write(self, uuid: str, data: bytes, *, retries: int = 3, response=None): + """write_gatt_char with retries — the marginal BLE link glitches single + writes (GATT 'Unlikely Error'); over an 800-write upload that's fatal + unless we retry the failed chunk. ``response`` overrides the write mode + (None -> the transport default).""" + from bleak.exc import BleakError + client = self._require() + resp = self.write_with_response if response is None else response + last = None + for i in range(retries): + try: + await client.write_gatt_char(uuid, bytes(data), response=resp) + return + except (BleakError, EOFError, OSError) as exc: + last = exc + await asyncio.sleep(0.05 * (i + 1)) + raise last + + async def write_command(self, block: bytes): + """Write an AES-encrypted command block to the command characteristic.""" + await self._write(C.CMD_CHAR_UUID, block) + + async def write_upload(self, packet: bytes): + """Write a raw data packet to the upload characteristic (write-without- + response by default — the upload char is fire-and-forget and hangs on + acknowledged writes). Keep the mask close for a clean one-time upload.""" + await self._write(C.UPLOAD_CHAR_UUID, packet, response=self.upload_with_response) + + async def write_audio(self, block: bytes): + """Write an AES-encrypted frame to the audio-visualization characteristic.""" + client = self._require() + await client.write_gatt_char( + C.AUDIO_CHAR_UUID, bytes(block), response=self.write_with_response + ) diff --git a/vendor/sanad_pkg/__init__.py b/vendor/sanad_pkg/__init__.py new file mode 100644 index 0000000..c7973ba --- /dev/null +++ b/vendor/sanad_pkg/__init__.py @@ -0,0 +1,15 @@ +"""sanad_pkg — shared library baked into the `sanad-base` Docker image. + +Modules: + license Offline Ed25519 license verification + entitlement. + license_check CLI gate run by every package entrypoint (`python -m sanad_pkg.license_check P1`). + bus EventBus shim — preserves Sanad's core/event_bus API, ZMQ-backed + across containers when pyzmq + SANAD_BUS_ADDR are present, else + an in-process fallback (identical API). + +Kept Python-3.8 compatible (the on-robot conda env) — no match-statements, +no PEP-604 unions in annotations. +""" + +__all__ = ["license", "bus"] +__version__ = "0.1.0" diff --git a/vendor/sanad_pkg/bus.py b/vendor/sanad_pkg/bus.py new file mode 100644 index 0000000..b4b8f41 --- /dev/null +++ b/vendor/sanad_pkg/bus.py @@ -0,0 +1,180 @@ +"""EventBus shim — drop-in for Sanad's core/event_bus.py API across containers. + +Same surface as the in-process bus (`on` / `off` / `emit` / `emit_sync`) so +existing Sanad call-sites change only their *import*, not their logic. When +`pyzmq` is importable AND `SANAD_BUS_ADDR` is set, events are also published to +/ received from the central `sanad-busd` XPUB/XSUB proxy, so handlers in OTHER +containers fire too. Otherwise it degrades to a pure in-process bus (identical +behavior to today's monolith) — which is all P1-standalone needs. + +Wire format on ZMQ: multipart [topic_bytes, json(kwargs)+_origin]. Each process +tags messages with a random origin id and ignores its own echoes. + +Env: + SANAD_BUS_PUB address this process PUBLISHES to (default tcp://127.0.0.1:5560) + SANAD_BUS_SUB address this process SUBSCRIBES from (default tcp://127.0.0.1:5561) + SANAD_BUS_ADDR if set (any value), enable ZMQ mode using the two above + +Kept Python-3.8 compatible. +""" +from __future__ import annotations + +import asyncio +import json +import os +import threading +import uuid +from collections import defaultdict +from typing import Any, Callable, Dict, List + +try: + import zmq # type: ignore + _HAVE_ZMQ = True +except Exception: + _HAVE_ZMQ = False + +try: + # reuse Sanad's logger when running inside the image; fall back to print + from Project.Sanad.core.logger import get_logger # type: ignore + _log = get_logger("sanad_bus", to_console=False) +except Exception: # pragma: no cover + class _P(object): + def __getattr__(self, _n): + return lambda *a, **k: None + _log = _P() + + +class Bus(object): + def __init__(self) -> None: + self._lock = threading.Lock() + self._listeners = defaultdict(list) # type: Dict[str, List[Callable]] + self._origin = uuid.uuid4().hex + self._zmq_enabled = False + self._pub = None + self._ctx = None + self._sub_thread = None + self._stop = threading.Event() + + # ── pub/sub registration (same as core/event_bus.EventBus) ── + def on(self, event: str, callback: Callable) -> None: + with self._lock: + self._listeners[event].append(callback) + + def off(self, event: str, callback: Callable) -> None: + with self._lock: + try: + self._listeners[event].remove(callback) + except ValueError: + pass + + async def emit(self, event: str, **kwargs: Any) -> None: + self._publish(event, kwargs) + await self._dispatch_async(event, kwargs) + + def emit_sync(self, event: str, **kwargs: Any) -> None: + self._publish(event, kwargs) + self._dispatch_sync(event, kwargs) + + # ── local dispatch (mirrors core/event_bus semantics) ── + def _dispatch_sync(self, event: str, kwargs: Dict[str, Any]) -> None: + with self._lock: + handlers = list(self._listeners.get(event, [])) + for h in handlers: + try: + if asyncio.iscoroutinefunction(h): + try: + loop = asyncio.get_running_loop() + loop.create_task(h(**kwargs)) + except RuntimeError: + _log.warning("async handler for %s dropped (no loop)", event) + continue + res = h(**kwargs) + if asyncio.iscoroutine(res): + try: + asyncio.get_running_loop().create_task(res) + except RuntimeError: + res.close() + except Exception: + _log.exception("handler for %s failed", event) + + async def _dispatch_async(self, event: str, kwargs: Dict[str, Any]) -> None: + with self._lock: + handlers = list(self._listeners.get(event, [])) + for h in handlers: + try: + res = h(**kwargs) + if asyncio.iscoroutine(res): + await res + except Exception: + _log.exception("handler for %s failed", event) + + # ── ZMQ transport (optional) ── + def connect(self) -> bool: + """Enable cross-container mode. Safe to call once at startup; no-op if + pyzmq missing or SANAD_BUS_ADDR unset. Returns True if ZMQ is active.""" + if self._zmq_enabled: + return True + if not _HAVE_ZMQ or not os.environ.get("SANAD_BUS_ADDR"): + _log.info("bus: in-process mode (zmq=%s, addr=%s)", + _HAVE_ZMQ, bool(os.environ.get("SANAD_BUS_ADDR"))) + return False + pub_addr = os.environ.get("SANAD_BUS_PUB", "tcp://127.0.0.1:5560") + sub_addr = os.environ.get("SANAD_BUS_SUB", "tcp://127.0.0.1:5561") + try: + self._ctx = zmq.Context.instance() + self._pub = self._ctx.socket(zmq.PUB) + self._pub.connect(pub_addr) + self._sub_thread = threading.Thread( + target=self._sub_loop, args=(sub_addr,), daemon=True) + self._sub_thread.start() + self._zmq_enabled = True + _log.info("bus: ZMQ mode pub=%s sub=%s origin=%s", + pub_addr, sub_addr, self._origin[:8]) + return True + except Exception: + _log.exception("bus: ZMQ connect failed — staying in-process") + return False + + def _publish(self, event: str, kwargs: Dict[str, Any]) -> None: + if not self._zmq_enabled or self._pub is None: + return + try: + body = dict(kwargs) + body["_origin"] = self._origin + self._pub.send_multipart( + [event.encode("utf-8"), json.dumps(body, default=str).encode("utf-8")]) + except Exception: + _log.exception("bus: publish %s failed", event) + + def _sub_loop(self, sub_addr: str) -> None: + sub = self._ctx.socket(zmq.SUB) + sub.connect(sub_addr) + sub.setsockopt(zmq.SUBSCRIBE, b"") # all topics; filter locally by listeners + while not self._stop.is_set(): + try: + if sub.poll(timeout=500): + topic, raw = sub.recv_multipart() + event = topic.decode("utf-8", "replace") + data = json.loads(raw.decode("utf-8", "replace")) + if data.pop("_origin", None) == self._origin: + continue # skip our own echo + self._dispatch_sync(event, data) + except Exception: + _log.exception("bus: sub loop error") + try: + sub.close(0) + except Exception: + pass + + def close(self) -> None: + self._stop.set() + for s in (self._pub,): + try: + if s is not None: + s.close(0) + except Exception: + pass + + +# singleton — `from sanad_pkg.bus import bus` +bus = Bus() diff --git a/vendor/sanad_pkg/license.py b/vendor/sanad_pkg/license.py new file mode 100644 index 0000000..c385a51 --- /dev/null +++ b/vendor/sanad_pkg/license.py @@ -0,0 +1,244 @@ +"""Offline Ed25519 license verification + entitlement for Sanad packages. + +A license file is JSON: + + { + "payload": { + "robot_id": "G1-SN-0001", + "machine_fingerprint": "", # optional; checked iff binding on + "packages": {"P1": true, "P2": false, "P3": true, "P4": false}, + "features": {"language": "ar", "multilingual": false, ...}, + "issued": "2026-06-01", + "expires": "2027-06-01" # optional; null = perpetual + }, + "sig": "" + } + +The vendor holds the Ed25519 private key; every image ships the public key. +Verification is fully OFFLINE (no network), suitable for a robot that may be +disconnected. + +Search order (highest first): + license : $SANAD_LICENSE else /etc/sanad/sanad.lic + pubkey : $SANAD_PUBKEY else /etc/sanad/pubkey.ed25519 + else /pubkey.ed25519 + +Env knobs: + SANAD_LICENSE_BIND=1 enforce machine_fingerprint == this machine + SANAD_LICENSE_DEV=1 if `cryptography` is missing, accept UNSIGNED licenses + (development only — never set on a shipped robot) + +Kept Python-3.8 compatible. +""" +from __future__ import annotations + +import base64 +import hashlib +import json +import os +from datetime import datetime, timezone +from pathlib import Path +from typing import Any, Dict, Optional, Tuple + +try: # optional — present in every shipped image, maybe not on a bare dev box + from cryptography.hazmat.primitives.asymmetric.ed25519 import Ed25519PublicKey + from cryptography.exceptions import InvalidSignature + _HAVE_CRYPTO = True +except Exception: # pragma: no cover + _HAVE_CRYPTO = False + + +# Keep IN SYNC with licensing/sign_license.py::canonical() +def canonical(payload: Dict[str, Any]) -> bytes: + """Deterministic byte serialization signed/verified on both sides.""" + return json.dumps( + payload, sort_keys=True, separators=(",", ":"), ensure_ascii=False + ).encode("utf-8") + + +def _default_license_path() -> Path: + return Path(os.environ.get("SANAD_LICENSE", "/etc/sanad/sanad.lic")) + + +def _default_pubkey_path() -> Path: + env = os.environ.get("SANAD_PUBKEY") + if env: + return Path(env) + etc = Path("/etc/sanad/pubkey.ed25519") + if etc.exists(): + return etc + return Path(__file__).resolve().parent / "pubkey.ed25519" + + +def machine_fingerprint(iface: Optional[str] = None) -> str: + """Stable per-robot id = sha256(eth0 MAC + /etc/machine-id). + + Binds a license to one G1 so a copied license fails on another machine. + Best-effort: missing inputs are simply omitted from the hash. + """ + iface = iface or os.environ.get("SANAD_DDS_INTERFACE", "eth0") + parts = [] + try: + mac = Path("/sys/class/net/%s/address" % iface).read_text().strip() + if mac: + parts.append(mac) + except Exception: + pass + for mid in ("/etc/machine-id", "/var/lib/dbus/machine-id"): + try: + v = Path(mid).read_text().strip() + if v: + parts.append(v) + break + except Exception: + pass + return hashlib.sha256("|".join(parts).encode("utf-8")).hexdigest() + + +def _load_pubkey(): + """Return an Ed25519PublicKey, or None if unavailable. + + pubkey file format: base64 of the raw 32-byte public key (one line), + or 64-char hex. Whitespace tolerated. + """ + if not _HAVE_CRYPTO: + return None + path = _default_pubkey_path() + raw_text = path.read_text().strip() + try: + if len(raw_text) == 64 and all(c in "0123456789abcdefABCDEF" for c in raw_text): + key_bytes = bytes.fromhex(raw_text) + else: + key_bytes = base64.b64decode(raw_text) + except Exception as exc: + raise ValueError("unreadable public key at %s: %s" % (path, exc)) + return Ed25519PublicKey.from_public_bytes(key_bytes) + + +class License(object): + """A loaded + verified (or rejected) license.""" + + def __init__(self, payload: Dict[str, Any], valid: bool, reason: str = ""): + self.payload = payload or {} + self.valid = valid + self.reason = reason + + # -- entitlement queries -- + def package(self, pkg: str) -> bool: + if not self.valid: + return False + return bool(self.payload.get("packages", {}).get(pkg, False)) + + def feature(self, name: str, default: Any = False) -> Any: + if not self.valid: + return default + return self.payload.get("features", {}).get(name, default) + + @property + def robot_id(self) -> str: + return str(self.payload.get("robot_id", "")) + + @property + def expires(self) -> Optional[str]: + return self.payload.get("expires") + + def summary(self) -> Dict[str, Any]: + pkgs = self.payload.get("packages", {}) if self.valid else {} + return { + "valid": self.valid, + "reason": self.reason, + "robot_id": self.robot_id, + "expires": self.expires, + "packages": {k: bool(v) for k, v in pkgs.items()}, + "features": self.payload.get("features", {}) if self.valid else {}, + } + + +def _check_expiry(payload: Dict[str, Any]) -> Tuple[bool, str]: + exp = payload.get("expires") + if not exp: + return True, "" + try: + # accept "YYYY-MM-DD" or full ISO + dt = datetime.fromisoformat(str(exp)) + if dt.tzinfo is None: + dt = dt.replace(tzinfo=timezone.utc) + except Exception: + return False, "unparseable expires=%r" % exp + if datetime.now(timezone.utc) > dt: + return False, "license expired %s" % exp + return True, "" + + +def load(path: Optional[str] = None) -> License: + """Load + fully verify the license. Never raises — returns an invalid + License with a `reason` on any failure (fail-closed).""" + lpath = Path(path) if path else _default_license_path() + if not lpath.exists(): + return License({}, False, "license file not found: %s" % lpath) + + try: + doc = json.loads(lpath.read_text(encoding="utf-8")) + except Exception as exc: + return License({}, False, "license JSON unreadable: %s" % exc) + + payload = doc.get("payload") + sig_b64 = doc.get("sig") + if not isinstance(payload, dict): + return License({}, False, "license missing 'payload'") + + # 1) signature + if _HAVE_CRYPTO: + if not sig_b64: + return License(payload, False, "license missing 'sig'") + try: + pub = _load_pubkey() + if pub is None: + return License(payload, False, "public key unavailable") + pub.verify(base64.b64decode(sig_b64), canonical(payload)) + except InvalidSignature: + return License(payload, False, "signature verification FAILED") + except Exception as exc: + return License(payload, False, "signature check error: %s" % exc) + else: + if os.environ.get("SANAD_LICENSE_DEV") == "1": + # dev box without cryptography — accept unsigned, but say so + pass + else: + return License(payload, False, + "cryptography unavailable and SANAD_LICENSE_DEV != 1") + + # 2) expiry + ok, reason = _check_expiry(payload) + if not ok: + return License(payload, False, reason) + + # 3) machine binding (optional) + if os.environ.get("SANAD_LICENSE_BIND") == "1": + want = payload.get("machine_fingerprint") + if want: + have = machine_fingerprint() + if want != have: + return License(payload, False, + "machine fingerprint mismatch (license bound to another robot)") + + return License(payload, True, "ok") + + +# module-level convenience (one cached load) +_CACHED = None # type: Optional[License] + + +def current(reload: bool = False) -> License: + global _CACHED + if _CACHED is None or reload: + _CACHED = load() + return _CACHED + + +def entitled(pkg: str) -> bool: + return current().package(pkg) + + +def feature(name: str, default: Any = False) -> Any: + return current().feature(name, default) diff --git a/vendor/sanad_pkg/license_check.py b/vendor/sanad_pkg/license_check.py new file mode 100644 index 0000000..cf7852c --- /dev/null +++ b/vendor/sanad_pkg/license_check.py @@ -0,0 +1,51 @@ +"""License gate run by every package entrypoint. + + python -m sanad_pkg.license_check P1 + +Exit codes: + 0 -> ENTITLED (proceed to launch the package) + 1 -> NOT entitled (entrypoint should exit the container cleanly, code 0) + 2 -> license error / unreadable (treated as not entitled) + +The entrypoint pattern (see Sanad_Package_*/entrypoint.sh): + + if ! python -m sanad_pkg.license_check "$PKG"; then + echo "[$PKG] not licensed — container will idle/exit"; exit 0 + fi +""" +from __future__ import annotations + +import sys + +from sanad_pkg import license as _lic + + +def main(argv=None) -> int: + argv = list(sys.argv[1:] if argv is None else argv) + if not argv: + sys.stderr.write("usage: python -m sanad_pkg.license_check \n") + return 2 + pkg = argv[0].strip().upper() + + lic = _lic.load() + summary = lic.summary() + if not lic.valid: + sys.stderr.write("[license] INVALID: %s\n" % summary["reason"]) + return 2 + + if lic.package(pkg): + sys.stdout.write( + "[license] %s ENTITLED (robot=%s, expires=%s)\n" + % (pkg, summary["robot_id"] or "?", summary["expires"] or "never") + ) + return 0 + + sys.stderr.write( + "[license] %s NOT entitled (entitled: %s)\n" + % (pkg, ", ".join(k for k, v in summary["packages"].items() if v) or "none") + ) + return 1 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/vendor/sanad_pkg/pubkey.ed25519 b/vendor/sanad_pkg/pubkey.ed25519 new file mode 100644 index 0000000..f622571 --- /dev/null +++ b/vendor/sanad_pkg/pubkey.ed25519 @@ -0,0 +1 @@ +ZOFerXRMTVQxkxsawjmGXJz8n5HmXfb8qLMhO/7DIC4= diff --git a/visitor_memory.py b/visitor_memory.py new file mode 100644 index 0000000..fcc06a7 --- /dev/null +++ b/visitor_memory.py @@ -0,0 +1,193 @@ +"""VisitorMemory — persistent visitor-profile store (NEW in P3). + +SanadV3 has no memory/visitor store, so P3 builds one. File-IO only (no DB, no +ML), mirroring vision/face_gallery.py: a threading.RLock + one JSON file per +profile under data/memories/. Each profile links to a face_gallery face_id so a +recognized VIP can be greeted with remembered attributes/notes. + +Profile schema (data/memories/.json): + { "id", "name", "attributes": {..}, "notes", "tags": [..], + "linked_face_id", "last_seen", "created", "visit_count" } + +Kept Python-3.8 compatible. Best-effort + atomic writes; never raises on IO. +""" +from __future__ import annotations + +import json +import logging +import os +import re +import threading +import time +from pathlib import Path +from typing import Any, Dict, List, Optional + +_log = logging.getLogger("pkg3.memory") + + +def _now() -> str: + # ISO-ish UTC without importing datetime.now-with-tz gymnastics. + return time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) + + +def _slug(name: str) -> str: + s = re.sub(r"[^a-zA-Z0-9_-]+", "-", (name or "").strip().lower()).strip("-") + return s or "visitor" + + +class VisitorMemory(object): + def __init__(self, root: Optional[str] = None): + if root is None: + root = os.environ.get("SANAD_MEMORIES_DIR", "") + if not root: + try: + from Project.Sanad.config import BASE_DIR + root = str(Path(BASE_DIR) / "data" / "memories") + except Exception: + root = str(Path.cwd() / "data" / "memories") + self.root = Path(root) + self._lock = threading.RLock() + try: + self.root.mkdir(parents=True, exist_ok=True) + except Exception: + pass + self._version = 0 + + # -- internal -- + def _path(self, pid: str) -> Path: + return self.root / ("%s.json" % pid) + + def _write(self, pid: str, data: Dict[str, Any]) -> bool: + """Atomic write (tmp+replace), best-effort — never raises on IO (matches the + get/list/delete contract). Returns False + cleans up the tmp file on failure.""" + tmp = self.root / ("%s.json.tmp" % pid) + try: + tmp.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8") + tmp.replace(self._path(pid)) + self._version += 1 + return True + except Exception: + _log.exception("visitor_memory: could not persist %s (kept in-memory)", pid) + try: + if tmp.exists(): + tmp.unlink() + except Exception: + pass + return False + + def _unique_id(self, name: str) -> str: + base = _slug(name) + pid = base + n = 1 + while self._path(pid).exists(): + n += 1 + pid = "%s-%d" % (base, n) + return pid + + # -- queries -- + def list(self) -> List[Dict[str, Any]]: + out = [] + with self._lock: + for p in sorted(self.root.glob("*.json")): + try: + out.append(json.loads(p.read_text(encoding="utf-8"))) + except Exception: + continue + return out + + def get(self, pid: str) -> Optional[Dict[str, Any]]: + with self._lock: + p = self._path(pid) + if not p.exists(): + return None + try: + return json.loads(p.read_text(encoding="utf-8")) + except Exception: + return None + + def find_by_face(self, face_id: str) -> Optional[Dict[str, Any]]: + if not face_id: + return None + for prof in self.list(): + if prof.get("linked_face_id") == face_id: + return prof + return None + + # -- mutations -- + def add(self, name: str, attributes: Optional[Dict[str, Any]] = None, + notes: str = "", tags: Optional[List[str]] = None, + linked_face_id: str = "") -> Dict[str, Any]: + with self._lock: + pid = self._unique_id(name) + prof = { + "id": pid, "name": name or pid, + "attributes": dict(attributes or {}), + "notes": notes or "", "tags": list(tags or []), + "linked_face_id": linked_face_id or "", + "created": _now(), "last_seen": _now(), "visit_count": 0, + } + self._write(pid, prof) + return prof + + def update(self, pid: str, **fields) -> Optional[Dict[str, Any]]: + with self._lock: + prof = self.get(pid) + if prof is None: + return None + for k in ("name", "notes", "linked_face_id"): + if k in fields and fields[k] is not None: + prof[k] = fields[k] + if isinstance(fields.get("attributes"), dict): + prof.setdefault("attributes", {}).update(fields["attributes"]) + if isinstance(fields.get("tags"), list): + prof["tags"] = fields["tags"] + self._write(pid, prof) + return prof + + def touch(self, pid: str) -> Optional[Dict[str, Any]]: + """Record a visit — bump last_seen + visit_count.""" + with self._lock: + prof = self.get(pid) + if prof is None: + return None + prof["last_seen"] = _now() + prof["visit_count"] = int(prof.get("visit_count", 0)) + 1 + self._write(pid, prof) + return prof + + def delete(self, pid: str) -> bool: + with self._lock: + p = self._path(pid) + if not p.exists(): + return False + try: + p.unlink() + self._version += 1 + return True + except Exception: + return False + + # -- greeting primer (feeds personalized greetings to the live session) -- + def load_for_primer(self, limit: int = 12) -> str: + """Compact text summary of known visitors, for the Gemini primer/persona so + the robot can greet a recognized VIP by name with remembered context.""" + profs = self.list() + if not profs: + return "" + profs.sort(key=lambda p: p.get("last_seen", ""), reverse=True) + lines = ["Known visitors you may recognize (greet them personally):"] + for prof in profs[:limit]: + attrs = ", ".join("%s=%s" % (k, v) for k, v in (prof.get("attributes") or {}).items()) + bits = [prof.get("name", prof.get("id", "?"))] + if prof.get("linked_face_id"): + bits.append("face:%s" % prof["linked_face_id"]) + if attrs: + bits.append("(%s)" % attrs) + if prof.get("notes"): + bits.append("— %s" % prof["notes"]) + lines.append(" • " + " ".join(bits)) + return "\n".join(lines) + + def status(self) -> Dict[str, Any]: + return {"ok": True, "count": len(list(self.root.glob("*.json"))), + "root": str(self.root), "version": self._version}