diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..97ecb64 --- /dev/null +++ b/.env.example @@ -0,0 +1,25 @@ +# Copy to .env — docker compose reads it automatically from this directory. + +# Signed license for THIS robot (default = the bundled example, entitles P1). +SANAD_LICENSE_FILE=./license/sanad.lic.example +# Enforce machine-fingerprint binding (1 on a delivered robot). When 1, also +# uncomment the /etc/machine-id mount in docker-compose.yml so the in-container +# fingerprint matches the host. +SANAD_LICENSE_BIND=0 + +# Audio: builtin (G1 chest mic+speaker over DDS) | plugged (USB e.g. Anker via PulseAudio) +SANAD_AUDIO_PROFILE=builtin +# DDS interface to the G1 firmware. +SANAD_DDS_INTERFACE=eth0 + +# Conversation language (overrides the license `language` feature if set). +# e.g. ar, en, hi, ur, zh, ru, fr ... (any Gemini-supported language) +SANAD_LANGUAGE= + +# Bundle the chest-audio Unitree SDK into the image at build time +# (1 = yes, default; 0 = leaner USB/plugged-only image). +WITH_UNITREE_SDK=1 +# Base image (override only for a GPU build). +BASE_OS_IMAGE=python:3.10-slim-bookworm +# Image name/tag (e.g. a registry path for pull-and-run). +# SANAD_IMAGE=sanad-p1:latest diff --git a/.gitignore b/.gitignore index c02264b..0807d19 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,20 @@ +# Python caches __pycache__/ *.pyc -Logs/ + +# Logs *.log +Logs/ + +# Customer license — NEVER commit a real signed license; ship only the example. +license/sanad.lic + +# Runtime data (keep the seed structure + config; ignore generated media). +data/recordings/* +data/audio/* +data/faces/* +data/photos/* +!data/**/.gitkeep + +# NOTE: ./vendor IS committed on purpose — it is the vendored Sanad engine that +# makes this package build standalone. Only its caches are ignored (above). diff --git a/Dockerfile b/Dockerfile index cdc3b13..665c2df 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,39 +1,98 @@ # syntax=docker/dockerfile:1 # ───────────────────────────────────────────────────────────────────────────── # Sanad Package 1 — Basic Communication. -# BUILD CONTEXT MUST BE Packages/ (FROM the prebuilt sanad-base): -# docker build -f Sanad_Package_1/Dockerfile -t sanad-p1:latest . -# (the top-level Packages/docker-compose.yml sets context: . for this service.) +# +# SELF-CONTAINED: builds from a public base image with NO dependency on a +# `sanad-base` image or a sibling `Sanad/` / `sanad_pkg/` checkout. The Sanad +# engine is vendored at ./vendor/Sanad and the license/bus lib at +# ./vendor/sanad_pkg, so the package repo builds and runs entirely on its own. +# +# Build context MUST be THIS package directory: +# docker build -t sanad-p1:latest . +# (docker-compose.yml uses `context: .`. On a Jetson Docker without buildx: +# DOCKER_BUILDKIT=0 docker build -t sanad-p1:latest .) # ───────────────────────────────────────────────────────────────────────────── -ARG BASE_IMAGE=sanad-base:latest -FROM ${BASE_IMAGE} +ARG BASE_OS_IMAGE=python:3.10-slim-bookworm +FROM ${BASE_OS_IMAGE} -# P1 (comms) extra system deps — PortAudio + a C toolchain so pyaudio's C -# extension compiles on the slim base (python:3.10-slim has no compiler). +ENV DEBIAN_FRONTEND=noninteractive \ + PYTHONUNBUFFERED=1 \ + PYTHONDONTWRITEBYTECODE=1 \ + PYTHONPATH=/app +WORKDIR /app + +# System deps: shared (audio) + P1 (PortAudio + a C toolchain so pyaudio's +# extension compiles on the slim base). RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates libsndfile1 alsa-utils pulseaudio-utils \ portaudio19-dev libportaudio2 build-essential python3-dev \ && rm -rf /var/lib/apt/lists/* -# P1 Python deps (sanad-base is python:3.10 → google-genai installs cleanly). -COPY Sanad_Package_1/requirements-p1.txt /tmp/requirements-p1.txt -RUN python3 -m pip install --no-cache-dir -r /tmp/requirements-p1.txt +# Python deps (base + P1 merged). +COPY requirements.txt /tmp/requirements.txt +RUN python3 -m pip install --no-cache-dir --upgrade pip \ + && python3 -m pip install --no-cache-dir -r /tmp/requirements.txt -# P1 launcher + routes + entrypoint + config (Sanad source baked into sanad-base). -COPY Sanad_Package_1/app_p1.py /app/app_p1.py -COPY Sanad_Package_1/routes_p1.py /app/routes_p1.py -COPY Sanad_Package_1/entrypoint.sh /app/entrypoint.sh -COPY Sanad_Package_1/config /app/pkg1_config -COPY Sanad_Package_1/static /app/pkg1_static +# ── Optional: Unitree SDK — G1 chest (builtin) audio over DDS ───────────────── +# WITH_UNITREE_SDK=1 (default) builds CycloneDDS + installs unitree_sdk2_python so +# the chest mic/speaker work out of the box. Wrapped so a failure NEVER breaks the +# image — chest audio is then unavailable (use SANAD_AUDIO_PROFILE=plugged); USB +# (plugged) audio always works without the SDK. Set =0 for a leaner image. +# NOTE: build the FULL CycloneDDS (do NOT pass -DBUILD_IDLC=NO) — the `cyclonedds` +# Python binding's find_package(CycloneDDS) needs idlc, else it fails with +# "Could not locate cyclonedds". Pin the binding to match the 0.10.x C library. +ARG WITH_UNITREE_SDK=1 +ENV CYCLONEDDS_HOME=/usr/local \ + LD_LIBRARY_PATH=/usr/local/lib +RUN if [ "$WITH_UNITREE_SDK" = "1" ]; then \ + ( set -eux; \ + apt-get update; \ + apt-get install -y --no-install-recommends git cmake build-essential; \ + git clone --depth 1 -b releases/0.10.x https://github.com/eclipse-cyclonedds/cyclonedds /tmp/cyclonedds; \ + cmake -S /tmp/cyclonedds -B /tmp/cyclonedds/build -DCMAKE_INSTALL_PREFIX=/usr/local; \ + cmake --build /tmp/cyclonedds/build --target install -j"$(nproc)"; \ + CYCLONEDDS_HOME=/usr/local CMAKE_PREFIX_PATH=/usr/local python3 -m pip install --no-cache-dir "cyclonedds==0.10.2"; \ + git clone --depth 1 https://github.com/unitreerobotics/unitree_sdk2_python /opt/unitree_sdk2_python; \ + python3 -m pip install --no-cache-dir -e /opt/unitree_sdk2_python; \ + python3 -c "import unitree_sdk2py; print('unitree_sdk2py OK')"; \ + rm -rf /tmp/cyclonedds /var/lib/apt/lists/*; \ + ) || echo "WARN[P1]: Unitree SDK build failed — chest (builtin) audio unavailable; use SANAD_AUDIO_PROFILE=plugged"; \ + else echo "WITH_UNITREE_SDK=0 — skipping Unitree SDK (USB/plugged audio only)"; fi + +# License/bus shim + PUBLIC verification key (vendored — no sanad_pkg sibling). +COPY vendor/sanad_pkg /app/sanad_pkg +RUN mkdir -p /etc/sanad && cp /app/sanad_pkg/pubkey.ed25519 /etc/sanad/pubkey.ed25519 + +# Canonical Sanad engine (vendored — no Sanad/ sibling, no sanad-base). +COPY vendor/Sanad /app/Sanad + +# P1 launcher + routes + entrypoint + config + static. +COPY app_p1.py /app/app_p1.py +COPY routes_p1.py /app/routes_p1.py +COPY entrypoint.sh /app/entrypoint.sh +COPY config /app/pkg1_config +COPY static /app/pkg1_static RUN chmod +x /app/entrypoint.sh -# Ship KEYLESS — strip any Gemini key baked into the Sanad config so the vendor -# key never ships in the image; the customer adds their own via the dashboard. -COPY Sanad_Package_1/strip_key.py /tmp/strip_key.py +# Ship KEYLESS — blank any Gemini key baked into the vendored Sanad config so the +# vendor key never ships; the customer adds their own via the dashboard. +COPY strip_key.py /tmp/strip_key.py RUN python3 /tmp/strip_key.py && rm -f /tmp/strip_key.py +# Sanity: the vendored namespace imports cleanly. +RUN python3 - <<'PY' +import importlib.util as u, sys +mods = ("sanad_pkg.license", "sanad_pkg.bus", "Sanad") +ok = all(u.find_spec(m) for m in mods) +print("P1 self-contained: vendored modules importable:", ok) +sys.exit(0 if ok else 1) +PY + ENV SANAD_PACKAGE=P1 \ SANAD_DASHBOARD_PORT=8011 \ SANAD_DASHBOARD_HOST=0.0.0.0 \ - SANAD_P1_STATIC=/app/pkg1_static + SANAD_P1_STATIC=/app/pkg1_static \ + SANAD_LICENSE=/etc/sanad/sanad.lic \ + SANAD_PUBKEY=/etc/sanad/pubkey.ed25519 EXPOSE 8011 ENTRYPOINT ["/app/entrypoint.sh"] diff --git a/NEW_ROBOT_SETUP.md b/NEW_ROBOT_SETUP.md index 726b56f..45d526d 100644 --- a/NEW_ROBOT_SETUP.md +++ b/NEW_ROBOT_SETUP.md @@ -9,8 +9,8 @@ Roles: - **Vendor side** (your workstation) — holds the Ed25519 **private** key (`licensing/privkey.ed25519`) and signs licenses. Never goes on the robot. - **Robot side** (the new Jetson) — runs the container; carries only the signed - `sanad.lic`. The **public** key is already baked into every image - (`sanad-base` copies it to `/etc/sanad/pubkey.ed25519`). + `sanad.lic`. The **public** key is already baked into the image (the Dockerfile + copies `vendor/sanad_pkg/pubkey.ed25519` to `/etc/sanad/pubkey.ed25519`). --- @@ -24,38 +24,36 @@ sudo usermod -aG docker "$USER" # Optional: plug a USB speaker/mic (Anker) if you'll use the `plugged` profile. ``` -## 1. Get the code onto the robot (so it can build) +## 1. Get the package onto the robot (so it can build) -`sanad-base` bakes the canonical `Sanad/` source, so the build needs **both** -`Sanad/` and `Packages/` present. From the workstation: +P1 is **self-contained** — it vendors the Sanad engine under `Sanad_Package_1/vendor`, +so you only copy the **package folder** (no `Sanad/` sibling, no `sanad-base`). +From the workstation: ```bash rsync -az --exclude __pycache__ \ - Project/Sanad Project/Packages \ + Project/Packages/Sanad_Package_1 \ unitree@:~/sanad_deploy/ ``` -This lays out `~/sanad_deploy/Sanad` + `~/sanad_deploy/Packages` (siblings — the -compose file mounts `../Sanad/data`, so they must sit side by side). +Everything P1 needs to build and run lives under `~/sanad_deploy/Sanad_Package_1`. -## 2. Build the images on the robot +## 2. Build the image on the robot ```bash -cd ~/sanad_deploy/Packages -# If this Docker has buildx (modern): -docker compose --profile base build # shared base (incl. chest-audio SDK) -docker compose --profile p1 build # P1 image - -# If this Docker has NO buildx (e.g. Jetson Docker 26.x) use legacy build: -DOCKER_BUILDKIT=0 docker build -f sanad-base/Dockerfile -t sanad-base:latest .. # context = ~/sanad_deploy -DOCKER_BUILDKIT=0 docker build -f Sanad_Package_1/Dockerfile -t sanad-p1:latest . # context = Packages/ +cd ~/sanad_deploy/Sanad_Package_1 +# Modern Docker (buildx): +docker compose build +# Jetson Docker without buildx: +DOCKER_BUILDKIT=0 docker build -t sanad-p1:latest . ``` +The build vendors the engine and (by default, `WITH_UNITREE_SDK=1`) compiles the +chest-audio SDK — first build takes a few minutes; later builds are cached. -> **Alternative — registry (build once, deploy to many robots):** on an x86 box -> with buildx + QEMU run -> `SANAD_REGISTRY=/ SANAD_TAG=1.0.0 PUSH=1 ./scripts/build_and_push.sh base p1`, -> then on each robot set `SANAD_REGISTRY`/`SANAD_TAG` in `.env` and -> `docker compose --profile p1 pull`. +> **Alternative — registry (build once, deploy to many robots):** build the +> `linux/arm64` image on an x86 box with buildx + QEMU, push to a registry, then +> on each robot set `SANAD_IMAGE=/sanad-p1:` in `.env` and +> `docker compose pull` instead of building. ## 3. License THIS robot @@ -74,8 +72,8 @@ the host's machine-id (step 4 does this) and compute the fingerprint the same wa 1. **Read the new robot's fingerprint on the host** (matches what the container will see once `/etc/machine-id` is mounted): ```bash - cd ~/sanad_deploy/Packages - PYTHONPATH=. python3 -c 'from sanad_pkg import license as L; print(L.machine_fingerprint())' + cd ~/sanad_deploy/Sanad_Package_1 + PYTHONPATH=vendor python3 -c 'from sanad_pkg import license as L; print(L.machine_fingerprint())' ``` 2. **On the workstation**, write `claims.json`: ```json @@ -94,34 +92,34 @@ the host's machine-id (step 4 does this) and compute the fingerprint the same wa --key licensing/privkey.ed25519 --in claims.json --out sanad.lic ``` (First time only, if no keypair yet: `python licensing/sign_license.py gen-keys --out-dir licensing` - — then rebuild images so the new `pubkey.ed25519` is baked in.) + in the monorepo, then run `./sync_vendor.sh` so the new `pubkey.ed25519` is + vendored into the package, and rebuild the image so it's baked in.) 4. **Copy to the robot:** ```bash - scp sanad.lic unitree@:~/sanad_deploy/Packages/licensing/sanad.lic + scp sanad.lic unitree@:~/sanad_deploy/Sanad_Package_1/license/sanad.lic ``` ## 4. Configure `.env` and run -On the robot, `cd ~/sanad_deploy/Packages` and create `.env` from `.env.example`: +On the robot, `cd ~/sanad_deploy/Sanad_Package_1` and create `.env` from `.env.example`: ```ini -SANAD_LICENSE_FILE=./licensing/sanad.lic # the one you signed (3b) — or sanad.lic.example -SANAD_LICENSE_BIND=1 # 1 to enforce the fingerprint; 0 = unbound -SANAD_AUDIO_PROFILE=builtin # chest mic+speaker | plugged for USB/Anker -SANAD_LANGUAGE=ar # optional; else license feature / persona +SANAD_LICENSE_FILE=./license/sanad.lic # the one you signed (3b) — or sanad.lic.example +SANAD_LICENSE_BIND=1 # 1 to enforce the fingerprint; 0 = unbound +SANAD_AUDIO_PROFILE=builtin # chest mic+speaker | plugged for USB/Anker +SANAD_LANGUAGE=ar # optional; else license feature / persona ``` -**If using a bound license (3b), add the host machine-id mount** to the `p1` -service in `docker-compose.yml` (under `volumes:`) so the container's fingerprint -matches the host's: +**If using a bound license (3b), uncomment the host machine-id mount** in the `p1` +service of `docker-compose.yml` so the container's fingerprint matches the host's: ```yaml - "/etc/machine-id:/etc/machine-id:ro" ``` Then: ```bash -docker compose --profile p1 up -d -docker compose --profile p1 logs -f p1 # should print "[P1] entitled — lang=… port=8011" +docker compose up -d +docker compose logs -f # should print "[P1] entitled — lang=… port=8011" ``` Dashboard: **http://<NEWROBOT>:8011** @@ -152,17 +150,15 @@ through the chest (or USB) speaker. > enabled at boot. Pick **one** of the two options below. ### Option A — Docker-native (simplest, no extra files) +The compose file already sets `restart: unless-stopped`, so you only need the +Docker daemon to start at boot: ```bash -# 1) make the daemon start at boot: sudo systemctl enable docker -# 2) make P1 survive reboot + crash (overrides the on-failure policy on the -# live container — no compose edit needed): -docker update --restart unless-stopped sanad-p1 +docker compose up -d # once; the container then returns on every boot ``` -(Equivalent permanent form: change the `p1` service's `restart: on-failure` → -`restart: unless-stopped` in `docker-compose.yml`, then `docker compose --profile -p1 up -d`.) After this, `docker compose --profile p1 up -d` once and the container -returns on every boot until you explicitly `docker compose --profile p1 down`. +(If the policy was ever switched back to `on-failure`, restore boot-survival with +`docker update --restart unless-stopped sanad-p1`.) The container keeps coming +back until you explicitly `docker compose down`. ### Option B — systemd unit (clean start/stop/status, mirrors the Sanad unit) Create `/etc/systemd/system/sanad-p1.service` (adjust `User=` and the path to @@ -177,9 +173,9 @@ Wants=network-online.target [Service] Type=simple User=unitree -WorkingDirectory=/home/unitree/sanad_deploy/Packages -ExecStart=/usr/bin/docker compose --profile p1 up -ExecStop=/usr/bin/docker compose --profile p1 down +WorkingDirectory=/home/unitree/sanad_deploy/Sanad_Package_1 +ExecStart=/usr/bin/docker compose up +ExecStop=/usr/bin/docker compose down Restart=on-failure RestartSec=5 TimeoutStopSec=30 diff --git a/README.md b/README.md index 10f43d9..45c16da 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,34 @@ Hands-free conversation in **one operator-selected language** (Gemini Live), audio via the **G1 chest** or **any plugged USB mic/speaker (Anker)**. **No** voice-command motion, vision, recognition, or navigation. Dashboard on **:8011**. +This package is **self-contained** — it vendors the Sanad engine under `vendor/`, +so a clone of this folder builds and runs with no sibling folders or `sanad-base`. + +## Quick start (how to start) + +On the robot, **from this folder** (`Sanad_Package_1/`) — nothing else needed: +```bash +cp .env.example .env # optional: set language / audio / license path +docker compose up -d --build # build (vendored engine) + run +# Jetson Docker without buildx: DOCKER_BUILDKIT=0 docker compose up -d --build +``` +Open **http://<robot-ip>:8011** and (the image ships **keyless**): +1. **Gemini API key** card → paste your key. +2. **Persona** card → set who the robot is + the language/dialect it speaks (saving restarts the live session). +3. **Audio** card → pick chest vs USB/Anker speaker, volume, mute. +4. Press **Start** in the **Conversation** card and talk. + +Manage it: +```bash +docker compose logs -f # live logs +docker compose down # stop +./test_p1.sh :8011 # smoke test (expect 11/11 PASS) +``` +- **Auto-start on boot:** `sudo systemctl enable docker` — compose already runs P1 + `restart: unless-stopped`, so it returns after a reboot. +- **A brand-new G1** (build, sign a license, bind to the robot) → **`NEW_ROBOT_SETUP.md`**. +- **No Docker?** dev mode in the `gemini_sdk` conda env → `./p1ctl.sh start` (runs against `./vendor`). + ## What it ships - `app_p1.py` — launcher: bootstraps the `Project.Sanad` namespace, constructs ONLY the comms subsystems (`brain`, `audio_mgr`, `voice_client`, `local_tts`, @@ -13,38 +41,48 @@ voice-command motion, vision, recognition, or navigation. Dashboard on **:8011** the logs websocket. Serves the real Sanad SPA with non-P1 tabs hidden. - `entrypoint.sh` — license gate (`license_check P1`; clean exit if unlicensed), resolves language/audio/port (env > license feature > `config/p1_config.json`). -- `Dockerfile` / `requirements-p1.txt` — `FROM sanad-base`, adds PortAudio + - `google-genai`. +- `Dockerfile` / `requirements.txt` — **SELF-CONTAINED**: `FROM python:3.10-slim`, + installs all deps, and bakes the vendored engine — **no `sanad-base`**. +- `vendor/Sanad` + `vendor/sanad_pkg` — the vendored Sanad engine + license/bus + lib. Refresh from a monorepo checkout with `./sync_vendor.sh`. - `config/p1_config.json` — defaults (language, audio profile, port, tab set). -- `docker-compose.p1.yml` — standalone run; top-level compose wires `--profile p1`. +- `docker-compose.yml` — standalone build + run (`context: .`); the top-level + `Packages/docker-compose.yml` can still run P1 in the fleet via `--profile p1`. +- `license/` — public verification key + an example license for the default mount. -It does **not** fork Sanad — it reuses the canonical source baked into -`sanad-base`. +It does **not** fork Sanad — it **vendors** the canonical source under `vendor/` +(re-synced by `sync_vendor.sh`), so the package builds and runs entirely on its +own with **no sibling folders**. ## Run & stop P1 -**A) Docker (the productized way)** — from `Project/Packages` on the robot: +**A) Docker (the productized way)** — **self-contained**; from this package dir +(`Sanad_Package_1/`) on the robot, nothing else required: ```bash -docker compose --profile base build # build sanad-base once -docker compose --profile p1 up -d --build # run -> http://:8011 -docker compose --profile p1 logs -f p1 # view logs -docker compose --profile p1 down # stop -# audio: SANAD_AUDIO_PROFILE=builtin (chest) | plugged (USB/Anker) -# language: license `language` feature, or SANAD_LANGUAGE=en docker compose --profile p1 up -d +docker compose up -d --build # build (vendored engine) + run -> http://:8011 +docker compose logs -f # view logs +docker compose down # stop +# Jetson Docker without buildx: DOCKER_BUILDKIT=0 docker compose up -d --build +# audio: SANAD_AUDIO_PROFILE=builtin (chest) | plugged (USB/Anker) +# language: license `language` feature, or SANAD_LANGUAGE=en docker compose up -d --build ``` +No `sanad-base` image and no sibling `Sanad/` checkout are needed — the engine is +vendored under `vendor/`. (The top-level `Packages/docker-compose.yml` can still +run P1 in the multi-package fleet via `--profile p1`.) **B) Dev mode (no Docker)** — run P1 in the robot's `gemini_sdk` conda env via the -control script (deployed to `~/sanad_deploy/Packages/Sanad_Package_1/p1ctl.sh`): +control script. It runs against the vendored engine in `./vendor`, so only the +package folder is needed: ```bash -cd ~/sanad_deploy/Packages/Sanad_Package_1 -./p1ctl.sh start # launch on :8011 (coexists with Sanad on :8000) +cd ~/sanad_deploy/Sanad_Package_1 +./p1ctl.sh start # launch on :8011 (runs against ./vendor) ./p1ctl.sh status # process + /api/health ./p1ctl.sh logs 80 # tail the P1 log ./p1ctl.sh restart ./p1ctl.sh stop ``` Deploy/update from the workstation first: -`rsync -az --exclude __pycache__ Project/Packages Project/Sanad unitree@:~/sanad_deploy/` +`rsync -az --exclude __pycache__ Project/Packages/Sanad_Package_1 unitree@:~/sanad_deploy/` **Logs:** the dashboard's **Logs** card streams live (`/ws/logs`) and the **⬇ Download** button saves the full bundle (`/api/logs/bundle`) as `sanad_p1_logs_.txt`. @@ -113,13 +151,15 @@ of `SANAD_GEMINI_API_KEY` env and `core_config.json`). ## Plug-and-play status - **Base:** `python:3.10-slim` (multi-arch) → `google-genai` installs cleanly, no - CUDA needed. Build on the Jetson (or x86) with `docker compose --profile base build`. + CUDA needed. Build on the Jetson (or x86) with `docker compose up -d --build`. - **Works out of the box** with a plugged USB speaker/mic. The entrypoint runs a **preflight** (python / google-genai / pyaudio / Unitree-SDK / audio profile) and prints clear guidance if something's missing. - **Language** is set via the **Persona** card (put the dialect/language directive in the system prompt — saving applies it to the live session immediately). -- **Pending for true "pull-and-run":** prebuilt `linux/arm64` image in a registry; - bundling `unitree_sdk2_python` for turnkey chest (`builtin`) audio (today: use - `plugged`, or mount the SDK). In a multi-package deployment, audio output later - routes through the `Sanad_Core` hwbroker audio-lock (P1 standalone speaks directly). +- **Self-contained:** the Sanad engine is vendored under `vendor/` and chest-audio + `unitree_sdk2_python` is built into the image (`WITH_UNITREE_SDK=1`), so a clone + of this repo builds and runs with no sibling folders. For pull-and-run at fleet + scale, publish the `linux/arm64` image to a registry and `docker compose pull`. + In a multi-package deployment, audio output can route through the `Sanad_Core` + hwbroker audio-lock (P1 standalone speaks directly). diff --git a/data/audio/.gitkeep b/data/audio/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/data/audio_device.json b/data/audio_device.json new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/data/audio_device.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/data/camera_device.json b/data/camera_device.json new file mode 100644 index 0000000..7930d09 --- /dev/null +++ b/data/camera_device.json @@ -0,0 +1,5 @@ +{ + "profile_serial_assignments": { + "realsense_primary": "" + } +} \ No newline at end of file diff --git a/data/faces/.gitkeep b/data/faces/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/data/motions/config.json b/data/motions/config.json new file mode 100644 index 0000000..40a0170 --- /dev/null +++ b/data/motions/config.json @@ -0,0 +1,21 @@ +{ + "gemini": { + "api_key": "", + "model": "models/gemini-2.5-flash-native-audio-preview-12-2025", + "voice_name": "Charon" + }, + "audio": { + "send_sample_rate": 16000, + "receive_sample_rate": 24000, + "chunk_size": 512, + "g1_volume": 100 + }, + "motion": { + "action_cooldown_sec": 1.0, + "replay_hz": 60.0 + }, + "dashboard": { + "host": "0.0.0.0", + "port": 8000 + } +} \ No newline at end of file diff --git a/data/photos/.gitkeep b/data/photos/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/data/recordings/.gitkeep b/data/recordings/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/docker-compose.p1.yml b/docker-compose.p1.yml deleted file mode 100644 index 89f148d..0000000 --- a/docker-compose.p1.yml +++ /dev/null @@ -1,38 +0,0 @@ -# Standalone compose for Package 1 (Basic Communication). -# Prereq: build the base image first: -# docker build -f sanad-base/Dockerfile -t sanad-base:latest .. -# Then from Packages/: -# docker compose -f Sanad_Package_1/docker-compose.p1.yml up --build -# -# (The top-level Packages/docker-compose.yml wires this under the `p1` profile.) -services: - p1: - build: - context: .. # = Project/Packages - dockerfile: Sanad_Package_1/Dockerfile - args: - BASE_IMAGE: sanad-base:latest - image: sanad-p1:latest - container_name: sanad-p1 - # Host networking is REQUIRED — the G1 DDS link + Gemini cloud + chest audio. - network_mode: host - restart: on-failure - environment: - SANAD_PACKAGE: P1 - SANAD_DASHBOARD_PORT: "8011" - SANAD_DASHBOARD_HOST: "0.0.0.0" - SANAD_VOICE_BRAIN: gemini - SANAD_AUDIO_PROFILE: "${SANAD_AUDIO_PROFILE:-builtin}" # builtin (chest) | plugged (USB/Anker) - SANAD_DDS_INTERFACE: "${SANAD_DDS_INTERFACE:-eth0}" - SANAD_LICENSE: /etc/sanad/sanad.lic - SANAD_PUBKEY: /etc/sanad/pubkey.ed25519 - SANAD_LICENSE_BIND: "${SANAD_LICENSE_BIND:-0}" - # SANAD_LANGUAGE overrides the license `language` feature if set: - SANAD_LANGUAGE: "${SANAD_LANGUAGE:-}" - devices: - - "/dev/snd:/dev/snd" # USB/plugged audio (Anker) via ALSA/Pulse - volumes: - - "${SANAD_LICENSE_FILE:-./licensing/sanad.lic.example}:/etc/sanad/sanad.lic:ro" - - "../Sanad/data:/app/Sanad/data" # faces/recordings/state persist on host - # Optional chest-audio over DDS — mount the vendored SDK if using 'builtin': - # - "${UNITREE_SDK_DIR:-/home/unitree/unitree_sdk2_python}:/opt/unitree_sdk2_python:ro" diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..12faa37 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,46 @@ +# Self-contained compose for Sanad Package 1 (Basic Communication). +# NO sibling folders, NO sanad-base — the Sanad engine is vendored in ./vendor. +# This file + this directory are all you need to build and run P1. +# +# docker compose up -d --build # build + run -> http://:8011 +# docker compose logs -f # view logs +# docker compose down # stop +# # Jetson Docker without buildx: DOCKER_BUILDKIT=0 docker compose up -d --build +# +# Audio: SANAD_AUDIO_PROFILE=builtin (G1 chest, needs the Unitree SDK baked at +# build) | plugged (USB/Anker via PulseAudio, no SDK needed) +# License: point SANAD_LICENSE_FILE at your signed sanad.lic (default = example). +# Language: set SANAD_LANGUAGE, or via the license `language` feature, or persona. +services: + p1: + build: + context: . + dockerfile: Dockerfile + args: + BASE_OS_IMAGE: "${BASE_OS_IMAGE:-python:3.10-slim-bookworm}" + WITH_UNITREE_SDK: "${WITH_UNITREE_SDK:-1}" + image: "${SANAD_IMAGE:-sanad-p1:latest}" + container_name: sanad-p1 + # Host networking REQUIRED — the G1 DDS link + Gemini cloud + chest audio. + network_mode: host + # `unless-stopped` survives reboot (with `sudo systemctl enable docker`). + restart: unless-stopped + environment: + SANAD_PACKAGE: P1 + SANAD_DASHBOARD_PORT: "8011" + SANAD_DASHBOARD_HOST: "0.0.0.0" + SANAD_VOICE_BRAIN: gemini + SANAD_AUDIO_PROFILE: "${SANAD_AUDIO_PROFILE:-builtin}" # builtin (chest) | plugged (USB/Anker) + SANAD_DDS_INTERFACE: "${SANAD_DDS_INTERFACE:-eth0}" + SANAD_LICENSE: /etc/sanad/sanad.lic + SANAD_PUBKEY: /etc/sanad/pubkey.ed25519 + SANAD_LICENSE_BIND: "${SANAD_LICENSE_BIND:-0}" # 1 = enforce machine fingerprint + SANAD_LANGUAGE: "${SANAD_LANGUAGE:-}" + devices: + - "/dev/snd:/dev/snd" # USB/plugged audio (Anker) via ALSA/Pulse + volumes: + - "${SANAD_LICENSE_FILE:-./license/sanad.lic.example}:/etc/sanad/sanad.lic:ro" + - "./data:/app/Sanad/data" # persist persona/recordings/config on host + # Bound license (SANAD_LICENSE_BIND=1) also needs the host machine-id so the + # in-container fingerprint matches the host — uncomment: + # - "/etc/machine-id:/etc/machine-id:ro" diff --git a/license/pubkey.ed25519 b/license/pubkey.ed25519 new file mode 100644 index 0000000..f622571 --- /dev/null +++ b/license/pubkey.ed25519 @@ -0,0 +1 @@ +ZOFerXRMTVQxkxsawjmGXJz8n5HmXfb8qLMhO/7DIC4= diff --git a/license/sanad.lic.example b/license/sanad.lic.example new file mode 100644 index 0000000..3763666 --- /dev/null +++ b/license/sanad.lic.example @@ -0,0 +1,27 @@ +{ + "payload": { + "robot_id": "G1-SN-DEMO-0001", + "machine_fingerprint": null, + "packages": { + "P1": true, + "P2": false, + "P3": true, + "P4": false + }, + "features": { + "language": "ar", + "multilingual": false, + "voice_command_motion": false, + "lipsync": false, + "mask": false, + "face_rec": true, + "places": true, + "memory": true, + "guide_tour": false, + "navigation": false + }, + "issued": "2026-06-01", + "expires": "2030-01-01" + }, + "sig": "cww/6qRfRsZhMa7G6D7A3V5MrdqU3Mg/nKTed/q1wHLcBOv7qKkeisPZRMcynj4E6RAcpAV1iiN2GbrlutVCCA==" +} diff --git a/p1ctl.sh b/p1ctl.sh index 12d4d0b..478b4f7 100755 --- a/p1ctl.sh +++ b/p1ctl.sh @@ -8,27 +8,30 @@ # ./p1ctl.sh status # process + /api/health # ./p1ctl.sh logs [N] # tail N lines of the P1 log # -# Overridable env: SANAD_DEPLOY_ROOT (default ~/sanad_deploy), SANAD_P1_PY, -# SANAD_DASHBOARD_PORT (8011), SANAD_AUDIO_PROFILE (builtin), SANAD_DDS_INTERFACE (eth0). +# Self-contained: runs against the vendored engine in ./vendor — no sibling +# Sanad/ or Packages/ checkout needed. Overridable env: SANAD_P1_PY, +# SANAD_DASHBOARD_PORT (8011), SANAD_AUDIO_PROFILE (builtin), SANAD_DDS_INTERFACE (eth0), +# SANAD_LICENSE / SANAD_PUBKEY. set -u -ROOT="${SANAD_DEPLOY_ROOT:-$HOME/sanad_deploy}" +PKG_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PY="${SANAD_P1_PY:-$HOME/miniconda3/envs/gemini_sdk/bin/python}" PORT="${SANAD_DASHBOARD_PORT:-8011}" -APP="$ROOT/Packages/Sanad_Package_1/app_p1.py" -LOG="$ROOT/p1.log" +APP="$PKG_DIR/app_p1.py" +LOG="$PKG_DIR/p1.log" +LIC="${SANAD_LICENSE:-$PKG_DIR/license/sanad.lic}"; [ -f "$LIC" ] || LIC="$PKG_DIR/license/sanad.lic.example" _start() { if pgrep -f app_p1.py >/dev/null 2>&1; then echo "P1 already running on :$PORT"; return 0 fi [ -f "$APP" ] || { echo "ERROR: $APP not found (deploy first)"; return 1; } - cd "$ROOT" - export SANAD_APP_DIR="$ROOT" \ - SANAD_LICENSE="$ROOT/Packages/licensing/sanad.lic.example" \ - SANAD_PUBKEY="$ROOT/Packages/sanad_pkg/pubkey.ed25519" \ - SANAD_P1_STATIC="$ROOT/Packages/Sanad_Package_1/static" \ - PYTHONPATH="$ROOT:$ROOT/Packages" \ + cd "$PKG_DIR" + export SANAD_APP_DIR="$PKG_DIR/vendor" \ + SANAD_LICENSE="$LIC" \ + SANAD_PUBKEY="${SANAD_PUBKEY:-$PKG_DIR/license/pubkey.ed25519}" \ + SANAD_P1_STATIC="$PKG_DIR/static" \ + PYTHONPATH="$PKG_DIR/vendor" \ SANAD_DASHBOARD_PORT="$PORT" SANAD_DASHBOARD_HOST="0.0.0.0" \ SANAD_VOICE_BRAIN="gemini" \ SANAD_AUDIO_PROFILE="${SANAD_AUDIO_PROFILE:-builtin}" \ diff --git a/requirements-p1.txt b/requirements-p1.txt deleted file mode 100644 index 3992f64..0000000 --- a/requirements-p1.txt +++ /dev/null @@ -1,10 +0,0 @@ -# Sanad Package 1 (Basic Communication) extra deps, on top of sanad-base. -# Comms-minimal subset of Sanad/requirements.txt. sanad-base is python:3.10-slim, -# so google-genai installs cleanly (no version gymnastics). -google-genai>=1.0.0 -pyaudio -soundfile -requests -# unitree_sdk2py is NOT on PyPI — needed only for the 'builtin' (chest) -# audio profile over DDS. Provide it via the vendored unitree_sdk2_python -# (COPY/volume) or run P1 with SANAD_AUDIO_PROFILE=plugged (PulseAudio). diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..9be62d5 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,29 @@ +# Sanad Package 1 — Basic Communication — self-contained dependency set. +# python:3.10-slim base, so google-genai installs cleanly with no version +# gymnastics. NO torch / transformers / opencv / pyrealsense2 — P1 is comms +# only (no local-TTS model, no vision, no CUDA). + +# Web dashboard (FastAPI SPA + websockets) +fastapi +uvicorn[standard] +pydantic +python-multipart +websockets + +# IPC bus shim + offline license verification +pyzmq +cryptography + +# Audio framing +numpy + +# Gemini Live voice + audio I/O +google-genai>=1.0.0 +pyaudio +soundfile +requests + +# NOTE: unitree_sdk2py is NOT on PyPI. The 'builtin' (G1 chest) audio profile +# over DDS needs it — the Dockerfile builds it from source when +# WITH_UNITREE_SDK=1 (default). With SANAD_AUDIO_PROFILE=plugged (USB/Anker) it +# is not required. diff --git a/sync_vendor.sh b/sync_vendor.sh new file mode 100755 index 0000000..554c443 --- /dev/null +++ b/sync_vendor.sh @@ -0,0 +1,65 @@ +#!/usr/bin/env bash +# Refresh the vendored Sanad engine + sanad_pkg from a full monorepo checkout. +# P1 ships a SELF-CONTAINED copy of the Sanad source under ./vendor so the repo +# builds standalone. When Sanad/ changes upstream, run this from a checkout that +# has Project/Sanad + Project/Packages, then commit the updated ./vendor. +# +# ./sync_vendor.sh [/path/to/Project] # default: ../../ (Packages/.. = Project/) +# +# Excludes runtime data (recordings/audio/faces), Logs, caches, the 53M temp3d +# 3D viewer (P1 hides that tab), and tests — keeps code + config + dashboard SPA. +set -euo pipefail +HERE="$(cd "$(dirname "$0")" && pwd)" +PROJECT="${1:-$(cd "$HERE/../.." && pwd)}" # Packages/Sanad_Package_1 -> ../../ = Project/ +SRC_SANAD="$PROJECT/Sanad" +SRC_PKG="$PROJECT/Packages/sanad_pkg" +SRC_LIC="$PROJECT/Packages/licensing" + +[ -d "$SRC_SANAD" ] || { echo "ERROR: no Sanad/ at $SRC_SANAD (pass the Project/ path)"; exit 1; } +[ -d "$SRC_PKG" ] || { echo "ERROR: no sanad_pkg at $SRC_PKG"; exit 1; } + +echo ">> vendoring Sanad engine from $SRC_SANAD" +rm -rf "$HERE/vendor"; mkdir -p "$HERE/vendor" +rsync -a \ + --exclude 'data/' --exclude 'Logs/' --exclude '__pycache__/' --exclude '*.pyc' \ + --exclude '.git/' --exclude 'dashboard/static/temp3d/' --exclude 'tests/' \ + "$SRC_SANAD/" "$HERE/vendor/Sanad/" + +echo ">> seeding minimal data/" +mkdir -p "$HERE/vendor/Sanad/data/motions" +cp "$SRC_SANAD/data/motions/config.json" "$HERE/vendor/Sanad/data/motions/config.json" +for j in audio_device.json camera_device.json; do + [ -f "$SRC_SANAD/data/$j" ] && cp "$SRC_SANAD/data/$j" "$HERE/vendor/Sanad/data/$j" || true +done +for d in recordings audio faces photos; do mkdir -p "$HERE/vendor/Sanad/data/$d"; touch "$HERE/vendor/Sanad/data/$d/.gitkeep"; done + +echo ">> vendoring sanad_pkg + public key" +rm -rf "$HERE/vendor/sanad_pkg"; cp -r "$SRC_PKG" "$HERE/vendor/sanad_pkg" +find "$HERE/vendor/sanad_pkg" -name __pycache__ -type d -prune -exec rm -rf {} + 2>/dev/null || true +mkdir -p "$HERE/license" +cp "$SRC_LIC/pubkey.ed25519" "$HERE/license/pubkey.ed25519" + +echo ">> ship keyless (blank any baked Gemini key in the seed)" +python3 - "$HERE" <<'PY' +import json, sys +h = sys.argv[1] +for p, sec in ((h+"/vendor/Sanad/config/core_config.json", "gemini_defaults"), + (h+"/vendor/Sanad/data/motions/config.json", "gemini")): + try: + d = json.load(open(p)) + except Exception: + continue + s = d.get(sec) + if isinstance(s, dict) and s.get("api_key"): + s["api_key"] = "" + json.dump(d, open(p, "w"), ensure_ascii=False, indent=2) + print(" blanked", sec, "in", p) +PY + +echo ">> refresh ./data seed mirror (keep structure, drop runtime media)" +rsync -a --delete \ + --exclude 'recordings/*' --exclude 'audio/*' --exclude 'faces/*' --exclude 'photos/*' \ + "$HERE/vendor/Sanad/data/" "$HERE/data/" +for d in recordings audio faces photos; do mkdir -p "$HERE/data/$d"; touch "$HERE/data/$d/.gitkeep"; done + +echo ">> done. vendor: $(du -sh "$HERE/vendor" | cut -f1) — review & commit ./vendor ./data ./license" diff --git a/vendor/Sanad/.claude/settings.json b/vendor/Sanad/.claude/settings.json new file mode 100644 index 0000000..49630dd --- /dev/null +++ b/vendor/Sanad/.claude/settings.json @@ -0,0 +1,7 @@ +{ + "permissions": { + "allow": [ + "Bash(node -e ' *)" + ] + } +} diff --git a/vendor/Sanad/.gitignore b/vendor/Sanad/.gitignore new file mode 100644 index 0000000..c02264b --- /dev/null +++ b/vendor/Sanad/.gitignore @@ -0,0 +1,4 @@ +__pycache__/ +*.pyc +Logs/ +*.log diff --git a/vendor/Sanad/G1_Controller/__init__.py b/vendor/Sanad/G1_Controller/__init__.py new file mode 100644 index 0000000..bee07a0 --- /dev/null +++ b/vendor/Sanad/G1_Controller/__init__.py @@ -0,0 +1,12 @@ +"""G1_Controller — manual dashboard locomotion control (N2 Phase 1). + +`LocoController` wraps the Unitree `LocoClient` + `MotionSwitcherClient` for +operator-driven walking, postures and a discrete step pad. It reuses the arm +controller's single process-wide DDS init (one `ChannelFactoryInitialize`) and +is gated behind an in-memory "Enable movement" arm flag that defaults OFF every +boot. See dashboard/routes/controller.py for the REST surface. +""" + +from Project.Sanad.G1_Controller.loco_controller import LocoController + +__all__ = ["LocoController"] diff --git a/vendor/Sanad/G1_Controller/loco_controller.py b/vendor/Sanad/G1_Controller/loco_controller.py new file mode 100644 index 0000000..f2746db --- /dev/null +++ b/vendor/Sanad/G1_Controller/loco_controller.py @@ -0,0 +1,567 @@ +"""LocoController — manual G1 locomotion via the Unitree LocoClient (N2 Phase 1). + +Ported from the proven scripts in G1_Lootah/Controller (g1_mode_controller.py, +keyboard_controller.py, hanger_boot_sequence.py). Design notes: + +* **One DDS init per process.** The arm controller owns the single + `ChannelFactoryInitialize(0, nic)` (motion/arm_controller.py). This class + NEVER initialises DDS — it lazily builds its `LocoClient` / + `MotionSwitcherClient` only after `arm._initialized` is True. +* **Default DISARMED.** `_armed` starts False every boot and gates every WRITE + method. Reads (status / fsm / joints), E-STOP and disarm are ALWAYS allowed. +* **StopMove watchdog.** Continuous `Move(..., True)` never self-terminates, so a + daemon thread StopMoves if no `move()` refresh arrives within + `watchdog_timeout_sec`. The frontend re-sends setpoints at ~10 Hz, so a tab + close / network drop trips the watchdog within the timeout. +* **Velocity caps.** Symmetric clamp on vx/vy/vyaw — Walk 0.6, Run 1.2. +* **Allow-anytime-warn.** move/step never hard-block on FSM; if not walk-ready + they still execute but return a `warning`. +* **Sim fallback.** When `unitree_sdk2py` is absent (workstation), every write + returns `{"simulated": True}` (never raises) so the whole UI is testable. + +SDK facts confirmed from source — do not "fix" them: +* `LocoClient.Move(vx, vy, vyaw, True)` — the continuous-mode kwarg is misspelled + `continous_move` (one n); we pass it POSITIONALLY to avoid a TypeError. +* `LocoClient` has NO StandUp()/Squat() — use SetFsmId(4)/SetFsmId(2). +* FSM id / mode are read via the private RPC `bot._Call(7001/7002, "{}")`. +""" + +from __future__ import annotations + +import json +import threading +import time +from typing import Any, Optional + +from Project.Sanad.core.config_loader import section as _cfg_section +from Project.Sanad.core.logger import get_logger + +log = get_logger("loco_controller") + +# -- SDK import (optional) ----------------------------------------------------- +try: + from unitree_sdk2py.g1.loco.g1_loco_client import LocoClient + from unitree_sdk2py.comm.motion_switcher.motion_switcher_client import ( + MotionSwitcherClient, + ) + _HAS_SDK = True +except ImportError: + LocoClient = None + MotionSwitcherClient = None + _HAS_SDK = False + log.warning("Unitree SDK not available — LocoController in simulation mode") + +# LocoClient general RPC api-ids for FSM read-back (stable across SDK builds). +ROBOT_API_ID_LOCO_GET_FSM_ID = 7001 +ROBOT_API_ID_LOCO_GET_FSM_MODE = 7002 + +# G1 29-DoF joint names for indices 12-28 (0-11 legs, 12-14 waist, 15-21 left +# arm, 22-28 right arm). Used by the Diagnostics joint read-out. +JOINT_NAMES = { + 12: "WAIST_YAW", 13: "WAIST_ROLL", 14: "WAIST_PITCH", + 15: "L_SHOULDER_PITCH", 16: "L_SHOULDER_ROLL", 17: "L_SHOULDER_YAW", + 18: "L_ELBOW", 19: "L_WRIST_ROLL", 20: "L_WRIST_PITCH", 21: "L_WRIST_YAW", + 22: "R_SHOULDER_PITCH", 23: "R_SHOULDER_ROLL", 24: "R_SHOULDER_YAW", + 25: "R_ELBOW", 26: "R_WRIST_ROLL", 27: "R_WRIST_PITCH", 28: "R_WRIST_YAW", +} + +# Discrete step pad — (vx, vy, vyaw) sign per direction; magnitude is +# step_speed_frac * cap_walk (a gentle single step). +_STEP_DIRS = { + "forward": (1.0, 0.0, 0.0), + "backward": (-1.0, 0.0, 0.0), + "slide_left": (0.0, 1.0, 0.0), + "slide_right": (0.0, -1.0, 0.0), + "rotate_left": (0.0, 0.0, 1.0), + "rotate_right": (0.0, 0.0, -1.0), +} + +_POSTURES = ( + "zero_torque", "damp", "stand_up", "squat", "sit", + "low_stand", "high_stand", "lie_to_stand", +) + + +class LocoController: + """Thread-safe manual locomotion control with a simulation fallback.""" + + def __init__(self, arm=None): + self._arm = arm # shared ArmController (owns the ONE DDS init) + self._bot = None # LocoClient (lazy) + self._msc = None # MotionSwitcherClient (lazy) + self._lc_ready = False + self._lock = threading.RLock() # serialise all loco client WRITE calls + self._armed = False # in-memory MANUAL gate — OFF every boot + + self._cur_v = (0.0, 0.0, 0.0) # last commanded (vx, vy, vyaw) + self._teleop_active = False + self._last_msc_mode: Optional[str] = None + + # watchdog + self._last_move_ts = 0.0 + self._wd_thread: Optional[threading.Thread] = None + self._wd_stop = threading.Event() + self._wd_stop.set() # not running until armed + # Monotonic stop-generation counter, bumped under _lock by + # estop/stop/disarm. move()/step()/prep_mode() capture it at start and + # bail the instant it changes — so E-STOP preempts an in-flight motion + # immediately AND can never be silently "un-cancelled" by a concurrent + # command (a lock-free Event clear() could; an int compare under the + # lock cannot). + self._stop_gen = 0 + # Serializes the discrete blocking operations (step/prep_mode) so two + # can't overlap and interleave Move commands. Continuous teleop move() + # is intentionally NOT guarded by this. + self._discrete_busy = False + + cfg = _cfg_section("motion", "loco_controller") + self._cap_walk = float(cfg.get("cap_walk", 0.6)) + self._cap_run = float(cfg.get("cap_run", 1.2)) + self._lin_step = float(cfg.get("lin_step", 0.05)) + self._ang_step = float(cfg.get("ang_step", 0.2)) + self._wd_timeout = float(cfg.get("watchdog_timeout_sec", 0.5)) + self._block_window = float(cfg.get("arm_block_window_sec", 1.5)) + self._step_dur = float(cfg.get("step_duration_sec", 0.6)) + self._step_frac = float(cfg.get("step_speed_frac", 0.5)) + self._loco_timeout = float(cfg.get("loco_timeout_sec", 10.0)) + self._msc_timeout = float(cfg.get("msc_timeout_sec", 5.0)) + + # ── client lifecycle ───────────────────────────────────────────────────── + + def _ensure_client(self) -> bool: + """Lazily build LocoClient + MotionSwitcherClient. Returns readiness. + + Never initialises DDS — requires the shared arm to have already run the + single ChannelFactoryInitialize. + """ + if not _HAS_SDK: + return False + if self._lc_ready: + return True + if self._arm is None or not getattr(self._arm, "_initialized", False): + return False + with self._lock: + if self._lc_ready: + return True + try: + bot = LocoClient() + bot.SetTimeout(self._loco_timeout) + bot.Init() + msc = MotionSwitcherClient() + msc.SetTimeout(self._msc_timeout) + msc.Init() + self._bot = bot + self._msc = msc + self._lc_ready = True + log.info("LocoClient + MotionSwitcherClient ready") + except Exception as exc: + log.error("LocoClient init failed: %s", exc) + self._lc_ready = False + return self._lc_ready + + def _safe_call(self, name: str, fn, *a, **kw): + try: + return True, fn(*a, **kw) + except Exception as exc: + log.error("%s failed: %s", name, exc) + return False, None + + def _rpc_get_int(self, api_id: int): + bot = self._bot + if bot is None: + return None + try: + code, data = bot._Call(api_id, "{}") + if code == 0 and data: + return json.loads(data).get("data") + except Exception: + pass + return None + + @staticmethod + def _clamp(v: float, cap: float) -> float: + return max(-cap, min(cap, float(v))) + + # ── FSM / readiness ────────────────────────────────────────────────────── + + def fsm_id(self): + return self._rpc_get_int(ROBOT_API_ID_LOCO_GET_FSM_ID) + + def fsm_mode(self): + return self._rpc_get_int(ROBOT_API_ID_LOCO_GET_FSM_MODE) + + def _walk_ready_warning(self) -> Optional[str]: + """allow-anytime-warn: None when ready, else a human message.""" + if not self._lc_ready: + return None + fid = self.fsm_id() + fmode = self.fsm_mode() + if fid == 200 and fmode not in (None, 2): + return None + return (f"Robot not in walk-ready FSM (id={fid}, mode={fmode}). " + f"Command sent anyway.") + + # ── arm flag + watchdog ────────────────────────────────────────────────── + + def is_armed(self) -> bool: + return self._armed + + def movement_active(self) -> bool: + """True when the robot may be walking: manual armed, teleop active, OR a + move/step issued within the block window. Used as the arm's motion-block + predicate so the arm never replays while the robot is (or just was) + moving — regardless of whether the MANUAL gate or the GEMINI gate + (Phase 3 voice dispatch, which calls move/step directly) triggered it.""" + if self._armed or self._teleop_active: + return True + return (time.monotonic() - self._last_move_ts) < self._block_window + + def arm_movement(self) -> dict: + """Unlock manual control. Cancels any in-flight arm motion first so the + arm and locomotion are never active simultaneously (movement wins).""" + try: + if self._arm is not None and getattr(self._arm, "is_busy", False): + log.info("arming movement — cancelling in-flight arm motion") + self._arm.cancel() + except Exception: + log.exception("arm.cancel() on arm_movement failed") + with self._lock: + self._armed = True + self._start_watchdog() + log.info("movement ARMED") + return {"ok": True, "armed": True} + + def disarm_movement(self) -> dict: + with self._lock: + self._stop_gen += 1 # break any in-flight step/prep/move + self._armed = False + self._teleop_active = False + self._wd_stop.set() + try: + self._raw_stop() + except Exception: + log.exception("StopMove on disarm failed") + log.info("movement DISARMED") + return {"ok": True, "armed": False} + + def _start_watchdog(self): + self._wd_stop.clear() + if self._wd_thread is None or not self._wd_thread.is_alive(): + self._wd_thread = threading.Thread( + target=self._watchdog_loop, daemon=True, name="loco-watchdog") + self._wd_thread.start() + + def _watchdog_loop(self): + period = max(0.02, min(0.1, self._wd_timeout / 2.0)) + while not self._wd_stop.is_set(): + fire = False + # Read-and-decide under the lock (atomic check-then-act); the actual + # StopMove runs after release so the critical section stays tiny. + with self._lock: + if self._teleop_active and (time.monotonic() - self._last_move_ts) > self._wd_timeout: + self._teleop_active = False + fire = True + if fire: + log.warning("watchdog: teleop setpoint stale (>%.2fs) — StopMove", + self._wd_timeout) + try: + self._raw_stop() + except Exception: + log.exception("watchdog StopMove failed") + self._wd_stop.wait(period) + + def _raw_stop(self) -> bool: + """Issue StopMove if the client is up; no-op in sim. Lock-light.""" + if not self._lc_ready or self._bot is None: + return False + with self._lock: + ok, _ = self._safe_call("StopMove", self._bot.StopMove) + return ok + + # ── movement ───────────────────────────────────────────────────────────── + + def move(self, vx: float, vy: float, vyaw: float, run: bool = False) -> dict: + cap = self._cap_run if run else self._cap_walk + cvx, cvy, cvyaw = self._clamp(vx, cap), self._clamp(vy, cap), self._clamp(vyaw, cap) + capped = (cvx, cvy, cvyaw) != (float(vx), float(vy), float(vyaw)) + warning = self._walk_ready_warning() + sent = {"vx": cvx, "vy": cvy, "vyaw": cvyaw} + + with self._lock: + my_gen = self._stop_gen # capture under lock + + if not self._ensure_client(): + with self._lock: # sim: record intent for UI/watchdog + self._cur_v = (cvx, cvy, cvyaw) + self._last_move_ts = time.monotonic() + self._teleop_active = True + self._start_watchdog() + return {"ok": True, "sent": sent, "capped": capped, + "warning": warning, "simulated": True} + with self._lock: + # If an E-STOP / stop / disarm landed since we captured my_gen, do NOT + # (re)command velocity — and do NOT stamp the motion flags (so a + # cancelled tick doesn't extend the arm-block window). + if self._stop_gen != my_gen: + return {"ok": False, "cancelled": True, "sent": sent, + "capped": capped, "warning": warning, "simulated": False} + self._cur_v = (cvx, cvy, cvyaw) + self._last_move_ts = time.monotonic() + self._teleop_active = True + self._safe_call("SetBalanceMode", self._bot.SetBalanceMode, 1) + ok, _ = self._safe_call("Move", self._bot.Move, cvx, cvy, cvyaw, True) + self._start_watchdog() + return {"ok": bool(ok), "sent": sent, "capped": capped, + "warning": warning, "simulated": False} + + def stop_move(self) -> dict: + """Halt translation/rotation. Allowed even when disarmed.""" + with self._lock: + self._stop_gen += 1 + self._teleop_active = False + if not self._ensure_client(): + return {"ok": True, "simulated": True} + ok = self._raw_stop() + return {"ok": bool(ok), "simulated": False} + + def estop(self) -> dict: + """Emergency stop = StopMove only (no Damp / FSM change → keeps posture). + ALWAYS allowed, even disarmed and in sim. Bumps the stop generation so any + in-flight move()/step()/prep_mode() bails immediately (no lock wait).""" + with self._lock: + self._stop_gen += 1 + self._teleop_active = False + self._cur_v = (0.0, 0.0, 0.0) + if not self._ensure_client(): + log.warning("E-STOP (sim)") + return {"ok": True, "simulated": True} + ok = self._raw_stop() + log.warning("E-STOP — StopMove issued") + return {"ok": bool(ok), "simulated": False} + + def step(self, direction: str) -> dict: + """Discrete one-step pad: Move for step_duration then StopMove. + Blocking (~step_duration); call via asyncio.to_thread from the route. + + The sleep loop does NOT hold self._lock, so E-STOP / StopMove (which take + the lock briefly) preempt it immediately; the loop also bails the moment + the stop generation changes.""" + if direction not in _STEP_DIRS: + return {"ok": False, "reason": f"unknown direction: {direction}"} + sx, sy, syaw = _STEP_DIRS[direction] + k = self._cap_walk * self._step_frac + vx, vy, vyaw = sx * k, sy * k, syaw * k + warning = self._walk_ready_warning() + with self._lock: + if self._discrete_busy: + return {"ok": False, "dir": direction, "reason": "busy", + "warning": warning, "simulated": not self._lc_ready} + self._discrete_busy = True + my_gen = self._stop_gen + self._last_move_ts = time.monotonic() + self._teleop_active = True + self._start_watchdog() + if not self._ensure_client(): + with self._lock: + self._teleop_active = False + self._discrete_busy = False + return {"ok": True, "dir": direction, "warning": warning, "simulated": True} + try: + with self._lock: + if self._stop_gen != my_gen: # stopped before we began + return {"ok": False, "dir": direction, "cancelled": True, + "warning": warning, "simulated": False} + self._safe_call("SetBalanceMode", self._bot.SetBalanceMode, 1) + self._safe_call("Move", self._bot.Move, vx, vy, vyaw, True) + t_end = time.monotonic() + self._step_dur + while time.monotonic() < t_end: + if self._stop_gen != my_gen: + break + with self._lock: + self._last_move_ts = time.monotonic() # keep watchdog fed + time.sleep(0.05) + finally: + with self._lock: + self._safe_call("StopMove", self._bot.StopMove) + self._teleop_active = False + self._discrete_busy = False + return {"ok": True, "dir": direction, "warning": warning, "simulated": False} + + # ── postures / modes ───────────────────────────────────────────────────── + + def prep_mode(self) -> dict: + """PREP — StopMove → Damp → StandUp(FSM4) → height ramp → BalanceStand(0). + Exact order from g1_mode_controller.prep_mode, minus the blocking input(). + Blocking (~1s); call via asyncio.to_thread.""" + if not self._ensure_client(): + return {"ok": True, "mode": "prep", "simulated": True} + with self._lock: + if self._discrete_busy: + return {"ok": False, "mode": "prep", "reason": "busy", "simulated": False} + self._discrete_busy = True + my_gen = self._stop_gen + self._safe_call("StopMove", self._bot.StopMove) + self._safe_call("Damp", self._bot.Damp) + self._safe_call("SetFsmId(4)", self._bot.SetFsmId, 4) + try: + # Height ramp OUTSIDE the lock so E-STOP can preempt at any time. + h = 0.02 + while h <= 0.5 + 1e-9: + if self._stop_gen != my_gen: + log.warning("PREP cancelled (E-STOP)") + return {"ok": False, "mode": "prep", "cancelled": True, "simulated": False} + with self._lock: + self._safe_call("SetStandHeight", self._bot.SetStandHeight, round(h, 3)) + time.sleep(0.03) + h += 0.02 + with self._lock: + self._safe_call("BalanceStand", self._bot.BalanceStand, 0) + self._safe_call("SetStandHeight", self._bot.SetStandHeight, 0.22) + finally: + with self._lock: + self._discrete_busy = False + log.info("PREP complete") + return {"ok": True, "mode": "prep", "simulated": False} + + def ready_start_mode(self) -> dict: + """READY = PREP then Start (FSM 200 / balance engaged).""" + self.prep_mode() + if not self._ensure_client(): + return {"ok": True, "mode": "ready", "simulated": True} + with self._lock: + if hasattr(self._bot, "Start"): + ok, _ = self._safe_call("Start", self._bot.Start) + else: + ok, _ = self._safe_call("SetFsmId(200)", self._bot.SetFsmId, 200) + log.info("READY/START complete") + return {"ok": bool(ok), "mode": "ready", "simulated": False} + + def posture(self, name: str) -> dict: + if name not in _POSTURES: + return {"ok": False, "reason": f"unknown posture: {name}"} + if not self._ensure_client(): + return {"ok": True, "posture": name, "simulated": True} + bot = self._bot + with self._lock: + if name == "zero_torque": + ok, _ = self._safe_call("ZeroTorque", bot.ZeroTorque) + elif name == "damp": + ok, _ = self._safe_call("Damp", bot.Damp) + elif name == "stand_up": + ok, _ = self._safe_call("SetFsmId(4)", bot.SetFsmId, 4) + elif name == "squat": + ok, _ = self._safe_call("SetFsmId(2)", bot.SetFsmId, 2) + elif name == "sit": + ok, _ = self._safe_call("Sit", bot.Sit) + elif name == "low_stand": + ok, _ = self._safe_call("LowStand", bot.LowStand) + elif name == "high_stand": + ok, _ = self._safe_call("HighStand", bot.HighStand) + elif name == "lie_to_stand": + if hasattr(bot, "Lie2StandUp"): + ok, _ = self._safe_call("Lie2StandUp", bot.Lie2StandUp) + else: + ok, _ = self._safe_call("SetFsmId(702)", bot.SetFsmId, 702) + else: # unreachable (guarded above) + ok = False + return {"ok": bool(ok), "posture": name, "simulated": False} + + def set_balance_mode(self, mode: int) -> dict: + if not self._ensure_client(): + return {"ok": True, "balance_mode": int(mode), "simulated": True} + with self._lock: + ok, _ = self._safe_call("SetBalanceMode", self._bot.SetBalanceMode, int(mode)) + return {"ok": bool(ok), "balance_mode": int(mode), "simulated": False} + + def set_stand_height(self, h: float) -> dict: + if not self._ensure_client(): + return {"ok": True, "height": float(h), "simulated": True} + with self._lock: + ok, _ = self._safe_call("SetStandHeight", self._bot.SetStandHeight, float(h)) + return {"ok": bool(ok), "height": float(h), "simulated": False} + + # ── MotionSwitcher ─────────────────────────────────────────────────────── + + def msc_check(self) -> dict: + if not self._ensure_client() or self._msc is None: + return {"mode_name": None, "simulated": not self._lc_ready} + try: + ret = self._msc.CheckMode() + name = None + if isinstance(ret, tuple) and len(ret) >= 2 and isinstance(ret[1], dict): + name = ret[1].get("name") + elif isinstance(ret, dict): + name = ret.get("name") + self._last_msc_mode = name + return {"mode_name": name} + except Exception as exc: + log.error("msc_check failed: %s", exc) + return {"mode_name": None} + + def msc_select_ai(self) -> dict: + if not self._ensure_client() or self._msc is None: + return {"ok": True, "simulated": True} + with self._lock: + ok, _ = self._safe_call("SelectMode(ai)", self._msc.SelectMode, "ai") + return {"ok": bool(ok), "simulated": False} + + def msc_release(self) -> dict: + if not self._ensure_client() or self._msc is None: + return {"ok": True, "simulated": True} + with self._lock: + ok, _ = self._safe_call("ReleaseMode", self._msc.ReleaseMode) + return {"ok": bool(ok), "simulated": False} + + def reconnect(self) -> dict: + """Drop and rebuild Loco + MSC clients (does NOT re-init the DDS factory).""" + with self._lock: + self._bot = None + self._msc = None + self._lc_ready = False + ok = self._ensure_client() + return {"ok": bool(ok), "lc_ready": self._lc_ready} + + # ── reads ──────────────────────────────────────────────────────────────── + + def joints(self) -> dict: + q: list = [] + try: + if self._arm is not None: + q = self._arm.get_current_q() + except Exception: + q = [] + out = [] + for idx in range(12, 29): + val = q[idx] if idx < len(q) else 0.0 + out.append({"idx": idx, "name": JOINT_NAMES.get(idx, f"motor_{idx}"), + "q": float(val)}) + return {"joints": out} + + def status(self) -> dict: + # Polling /status lazily brings up the client once arm DDS is ready. + self._ensure_client() + fid = self.fsm_id() if self._lc_ready else None + fmode = self.fsm_mode() if self._lc_ready else None + walk_ready = bool(self._lc_ready and fid == 200 and fmode not in (None, 2)) + return { + "sdk_available": _HAS_SDK, + "lc_ready": self._lc_ready, + "armed": self._armed, + "fsm_id": fid, + "fsm_mode": fmode, + "walk_ready": walk_ready, + "msc_mode": self._last_msc_mode, + "teleop_active": self._teleop_active, + "last_velocity": {"vx": self._cur_v[0], "vy": self._cur_v[1], "vyaw": self._cur_v[2]}, + "caps": {"walk": self._cap_walk, "run": self._cap_run}, + "arm_initialized": bool(self._arm is not None and getattr(self._arm, "_initialized", False)), + } + + # ── shutdown helper ────────────────────────────────────────────────────── + + def shutdown(self): + """Best-effort StopMove + disarm for process shutdown.""" + try: + self.estop() + finally: + self.disarm_movement() diff --git a/vendor/Sanad/README.md b/vendor/Sanad/README.md new file mode 100644 index 0000000..5289e99 --- /dev/null +++ b/vendor/Sanad/README.md @@ -0,0 +1,412 @@ +# Sanad + +Voice + motion assistant for the Unitree G1 humanoid. **Gemini Live** (or a +fully-offline pipeline) handles bilingual Arabic/English conversation; an arm +controller plays built-in SDK poses and recorded JSONL macros; a locomotion +controller walks/turns the robot; an optional camera feeds **Gemini-side face & +place recognition**; everything is orchestrated through a fault-isolated +**FastAPI dashboard** on `http://:8000`. + +``` +┌──────────────────────────────────────────────────────────────────────┐ +│ Dashboard (FastAPI) ── http://:8000 │ +│ ├─ Operations Quick-fire arm actions + gestural-speaking │ +│ ├─ Voice & Audio Live Gemini, Typed Replay, Wake Phrases, Audio │ +│ ├─ Motion & Replay SDK actions, JSONL replays, macros, teaching │ +│ ├─ Controller Locomotion teleop, postures, FSM modes, E-STOP │ +│ ├─ Recognition Camera vision + face gallery + zones/places │ +│ ├─ Recordings Skill registry, saved Gemini turns │ +│ ├─ Temperature Live 3D motor-temperature heatmap (three.js) │ +│ ├─ Terminal In-browser shell (PTY) to the robot │ +│ └─ Settings & Logs System info, tail/stream live logs │ +└──────────────────────────────────────────────────────────────────────┘ + │ + ├─ voice/sanad_voice.py (subprocess — model-agnostic voice loop) + │ ├─ gemini/script.py (Gemini Live brain — audio+video+state) + │ └─ local/script.py (offline brain — VAD→STT→LLM→TTS) + ├─ gemini/client.py (short-session client for Typed Replay) + ├─ gemini/subprocess.py (spawns+supervises sanad_voice.py; + │ pushes camera frames + motion state + │ to the child over its stdin) + ├─ voice/movement_dispatch.py(Gemini spoken phrase → locomotion) + ├─ vision/camera.py (RealSense/USB capture daemon) + ├─ vision/face_gallery.py (data/faces/ CRUD for the primer turn) + ├─ vision/zone_gallery.py (data/zones/ places + "go here" targets) + ├─ motion/arm_controller.py (G1 arm DDS publisher — owns DDS init) + ├─ G1_Controller/loco_controller.py (G1 locomotion via LocoClient) + ├─ voice/audio_io.py (mic + speaker abstraction — 3 profiles) + └─ core/brain.py (skill dispatcher, event bus) +``` + +### Camera + face/place recognition data flow + +``` +CameraDaemon (parent, in-memory JPEG+b64 cache) + ├─→ dashboard /api/recognition/frame.jpg ── snapshot_jpeg() + └─→ GeminiSubprocess._frame_forwarder ── get_frame_b64() + │ "frame:\n" over stdin +ArmController ─emit→ event bus ─→ main.py ─→ live_sub.send_state() + │ "state:\n" over stdin + ▼ + gemini/script.py _stdin_watcher thread + ├─ frame: → _LATEST_FRAME → _send_frame_loop → + │ session.send_realtime_input(video=Blob) + └─ state: → _STATE_PENDING → _send_state_loop → + session.send_realtime_input(text=…) + +Recognition toggles (vision / face-rec / zone-rec / movement) are written by the +dashboard to data/.recognition_state.json and POLLED by the Gemini child at 1 Hz +— so flipping a toggle takes effect mid-session with NO restart. +``` + + +## Quick start (on the robot) + +```bash +conda activate gemini_sdk +cd ~/Sanad +python3 main.py +``` + +Then open `http://:8000` in a browser. (The dashboard binds to the +`wlan0` IP by default — see *Runtime selection* to override.) + +Fully-offline brain (no cloud): `SANAD_VOICE_BRAIN=local python3 main.py` +(requires `ollama serve` + the local model env — see *Voice brains*). + +> **Gemini API key — required, none ships with the repo.** The `api_key` +> fields in `config/core_config.json` (`gemini_defaults`) and +> `data/motions/config.json` (`gemini`) are intentionally empty (`""`). +> The voice loop cannot connect until you supply one, by any of: +> - **Dashboard** → *Voice & Audio → Gemini API Key* — paste + save, hot-swaps live (no restart). Persists to `data/motions/config.json`. +> - **Env var** — `export SANAD_GEMINI_API_KEY=AIza...` before `python3 main.py`. +> - **Config file** — set `gemini_defaults.api_key` in `config/core_config.json`. +> +> Precedence (highest first): `data/motions/config.json` → `SANAD_GEMINI_API_KEY` → `config/core_config.json`. Get a key at . + + +## Dashboard features + +### Operations +Quick-fire SDK + JSONL arm actions (chip buttons), gestural-speaking toggle. + +### Voice & Audio +- **Live Voice Commands** — fire arm gestures from the *user's* transcript + (wake-phrase → arm action). Master gate + Deferred-trigger toggle. +- **Live Gemini Process** — start/stop the voice conversation subprocess, tail + its log. Choose the Gemini cloud brain or the offline brain via + `SANAD_VOICE_BRAIN`. +- **Typed Replay** — Gemini reads typed text aloud (wrapped with a + "repeat verbatim" prompt); optionally records the clip. +- **Gemini API Key** — hot-swap the key without restart. +- **Wake Phrase Manager** — add/remove phrase → action bindings. +- **Audio Controls** — mic/speaker mute, G1 chest-speaker volume (DDS), device + profile selection, PulseAudio soft-reset and Anker USB hard-reset. + +### Motion & Replay +- **Motion Control** — list SDK (built-in) + JSONL (recorded) actions, select + + play. Cancel smoothly returns to `arm_home.jsonl`. +- **Replay Manager** — upload `.jsonl` files, test-play with speed, Teaching + Mode (kinesthetic record — limp the arm and hand-guide it). +- **Macro Recorder** — record a new audio+motion pair, OR pick any WAV + any + motion (SDK or JSONL) and play them in parallel. + +### Controller *(locomotion)* +Manual teleoperation of the G1's **legs** via the Unitree `LocoClient`. +**Disarmed every boot**; all motion writes require Arm first. +- **Move / Step** — continuous teleop (vx/vy/vyaw) or discrete one-shot steps. +- **Postures & FSM modes** — zero-torque, damp, squat, sit, stand, balance, + stand-height; prep/ready sequences; MotionSwitcher select-AI/release. +- **Gemini Movement** — toggle voice-driven walking: the `MovementDispatcher` + parses Gemini's *own spoken confirmation phrases* ("Turning right." / + "أستدير يميناً.") and drives the legs (gated on this toggle + an E-STOP latch). +- **E-STOP** — always available; `StopMove` + disarm + latch the dispatcher. + +> **Safety:** the arm and locomotion are **mutually exclusive** — +> `arm.set_motion_block(loco.movement_active)` makes every arm +> replay/gesture refuse while the robot is (or just was, within ~1.5 s) walking. + +### Recognition +Camera vision + Gemini-side **face** and **zone/place** recognition. All are +**off by default**; each is a **hot toggle** (≈1 s to take effect, no restart). +- **Camera Vision** — `CameraDaemon` captures from a RealSense (preferred) or + USB camera; the supervisor streams JPEG frames to Gemini Live so it can answer + "what do you see?". Live preview panel. Auto-reconnects on USB unplug/stall + and warns if a RealSense negotiated USB 2.0 (Marcus-ported resilience). +- **Face Recognition** — manage `data/faces/face_{id}/` galleries: enroll from + the live camera or upload photos, rename, describe, download (per-photo or + ZIP), delete. On session start (and on any gallery change) the child sends a + **primer turn** carrying every enrolled face + a Khaleeji greeting + instruction — **Gemini matches in-context, so there is no local + face-recognition model**. Recognition needs vision on. +- **Zones & Places** — `data/zones/zone_{zid}/place_{pid}/` two-level gallery: + reference photos per place, optional linked face_ids, and a **"go here"** nav + target (`nav_target_zone/place_id` in the recognition-state file) for + place-aware navigation. +- **Sync Gallery** — force-resend the face/zone primer to the live session. + +### Recordings +Skill Registry (predefined audio+motion+callback skills from `skills.json`) + +Saved Records (captured Gemini turn recordings; play/pause/stop/rename/delete). + +### Temperature +Live **3D motor-temperature heatmap** — a standalone three.js viewer +(`dashboard/static/temp3d/`) loads the G1 29-DoF URDF + STL meshes and colors +each joint blue→red from the arm controller's throttled `rt/lowstate` snapshot, +streamed over `/ws/motor-temps` at ~8 fps. No second DDS subscriber. + +### Terminal +In-browser **PTY shell** to the robot (`/ws/terminal`, xterm.js) — a `bash -i` +as the dashboard's user, with resize + backpressure, bounded to 4 sessions. +(See *Security* — this is full shell access to whoever reaches the URL.) + +### Settings & Logs +System info (host, network interfaces, DDS interface, bound dashboard host/port, +per-subsystem status, audio devices), live log stream (`/ws/logs`), per-file +tail, snapshot, and a one-blob "Copy All Logs" bundle. + + +## Directory layout + +| Path | Contents | +|---|---| +| `main.py` | Entry point — fault-isolated boot of all subsystems + the dashboard. Doubles as the service container (route handlers `import` its module globals). | +| `config.py` | Runtime constants + layout-agnostic path resolution; layers `data/motions/config.json` over the JSON config at import. | +| `config/` | Per-subsystem JSON: `core`, `voice`, `gemini`, `local`, `motion`, `dashboard`. | +| `core/` | `brain.py` (skill dispatcher), `event_bus.py`, `skill_registry.py`, `config_loader.py`, `logger.py` (rotating + WS push), `asyncio_compat.py` (3.8 `to_thread` shim). | +| `gemini/` | Gemini Live — `client.py` (one-shot), `script.py` (live brain: audio + video + motion-state), `subprocess.py` (supervisor + stdin frame/state push). | +| `local/` | Fully-offline brain — `vad.py` (Silero), `stt.py` (faster-whisper), `llm.py` (Qwen via Ollama/llama.cpp), `tts.py` (CosyVoice2), `script.py` (the brain), `subprocess.py` (supervisor). Opt-in via `SANAD_VOICE_BRAIN=local`. | +| `voice/` | `sanad_voice.py` (subprocess entry, model-agnostic), `audio_io.py` / `audio_manager.py` / `audio_devices.py` (mic/speaker), `local_tts.py` (SpeechT5 Arabic TTS), `live_voice_loop.py` (user-transcript → arm gesture), `movement_dispatch.py` (Gemini-phrase → locomotion), `typed_replay.py`, `wake_phrase_manager.py`, `text_utils.py` (Arabic normalization + phrase matching), `model_script.py` / `model_subprocess.py` (brain templates). | +| `motion/` | `arm_controller.py` (production 5-phase JSONL replay engine, owns the single DDS init), `macro_player.py`, `macro_recorder.py`, `teaching.py`. (`sanad_arm_controller.py` is a legacy alternate — not wired by `main.py`.) | +| `G1_Controller/` | `loco_controller.py` — locomotion via Unitree `LocoClient` (move/step/postures/FSM/E-STOP); reuses the arm's DDS participant. | +| `vision/` | `camera.py` (RealSense/USB daemon, auto-reconnect), `face_gallery.py`, `zone_gallery.py`, `recognition_state.py` (atomic-JSON toggle IPC). | +| `dashboard/` | `app.py` (FastAPI factory + fault-isolated router registration), `routes/*.py` (20 REST routers), `websockets/*.py` (logs, motor-temps, terminal), `static/index.html` (single-page UI), `static/temp3d/` (3D viewer). | +| `scripts/` | Persona files — `sanad_script.txt` (voice persona "Bousandah"), `sanad_rule.txt`, `sanad_arm.txt` (voice→arm phrases). | +| `data/` | Runtime state — `motions/*.jsonl` (arm trajectories) + `instruction.json` (locomotion phrase map) + `skills.json` + `config.json` (dashboard-editable), `recordings/` (captured turns + macros), `faces/face_{id}/` + `zones/zone_{zid}/place_{pid}/` (galleries), `audio/` (typed-replay WAVs + records index), `.recognition_state.json` (toggle IPC). | +| `model/` | Local SpeechT5 / Whisper / CosyVoice2 weights when using the offline pipeline. | +| `logs/` | Per-module rotating logs. | + + +## Voice brains + +The child `voice/sanad_voice.py` is model-agnostic and selects a brain via +`SANAD_VOICE_BRAIN`. Every brain implements the same contract +(`__init__(audio_io, recorder, voice, system_prompt)`, `async run()`, `stop()`) +and ships a sibling supervisor that spawns the child and parses its +`USER:` / `BOT:` / state log markers. + +| Value | Brain | Pipeline | +|---|---|---| +| `gemini` *(default)* | `gemini/script.py` | Gemini Live native-audio (full-duplex speech-to-speech, server-side VAD, vision frames, face/zone primers, voice→movement). Cloud. | +| `local` | `local/script.py` | Silero VAD → faster-whisper (large-v3-turbo, CUDA int8) → Qwen2.5 (Ollama/llama.cpp) → CosyVoice2 streaming TTS. Fully on-device. | +| `model` | `voice/model_script.py` | Template/stub for adding a new provider (OpenAI Realtime, Claude Voice, …). | + +To add a brain: drop a file in `voice/` or a new `/` folder and add a +branch to `voice/sanad_voice.py:_build_brain()`; ship a supervisor modeled on +`voice/model_subprocess.py`. + + +## Runtime selection (env vars) + +| Var | Values | Default | Effect | +|---|---|---|---| +| `SANAD_VOICE_BRAIN` | `gemini`, `local`, `model` | `gemini` | Which brain the subprocess loads (see `voice/sanad_voice.py:_build_brain`). | +| `SANAD_AUDIO_PROFILE` | `builtin`, `anker`, `hollyland_builtin` | `builtin` | Mic + speaker pair. `builtin` = G1 UDP mic + G1 chest speaker via DDS. | +| `SANAD_DDS_INTERFACE` | network iface | `eth0` | DDS network for G1 low-level comms (arm + locomotion + speaker). | +| `SANAD_DASHBOARD_HOST` / `_INTERFACE` | IP / iface | `wlan0` IP | Dashboard bind address. | +| `SANAD_GEMINI_API_KEY` | string | `""` (empty) | Gemini API key. No key ships in the repo — set this, paste one in the dashboard (**Voice & Audio → Gemini API Key**), or fill `gemini_defaults.api_key` in `config/core_config.json`. See [Quick start](#quick-start-on-the-robot). | +| `SANAD_GEMINI_MODEL` / `_VOICE` | string | reads config | Override the Gemini model id / prebuilt voice. | +| `SANAD_G1_VOLUME` | `0`–`100` | `100` | G1 chest-speaker volume; also scales the barge-in threshold. | +| `SANAD_LIVE_SCRIPT` | path | auto | Override the subprocess entry script path. | +| `SANAD_RECORD` | `0` or `1` | `1` | Record every Gemini turn to `data/recordings/`. | +| `SANAD_AEC_ENABLE` | `0` or `1` | `1` | Enable WebRTC AEC3 (if the Python binding is installed). | +| `SANAD_VISION_ENABLE` | `0` or `1` | `0` | Boot default for camera vision. **Runtime truth is the Recognition-tab toggle** → `data/.recognition_state.json`, hot-applied without a restart. | +| `SANAD_FACE_RECOGNITION_ENABLE` | `0` or `1` | `0` | Boot default for Gemini-side face recognition. Also a hot toggle. | +| `SANAD_VISION_SEND_HZ` | float | `2` | Frames/sec the Gemini child relays to Live. | +| `SANAD_CAMERA_WIDTH` / `_HEIGHT` / `_FPS` | int | `424` / `240` / `15` | Capture profile. Also settable per-deploy in `config/core_config.json > camera`. | +| `SANAD_CAMERA_USB_INDEX` | int | auto | Pin a `/dev/videoN` node (avoids picking a RealSense IR stream). | +| `SANAD_FACES_MAX_SAMPLES` | int | `3` | Max photos per person fed into the gallery primer turn (token budget). | +| `SANAD_PROJECT_ROOT` | path | auto | Override the project root (see *Dynamic paths*). | + +> All `SANAD_VISION_*` / `SANAD_CAMERA_*` / `SANAD_FACE_*` vars are **boot +> defaults** forwarded to the Gemini child via `LIVE_TUNE`. Once running, the +> Recognition tab's toggles (vision / face-rec / zone-rec / movement) are the +> live source of truth in `data/.recognition_state.json`, polled at 1 Hz. + +CLI flags: `python3 main.py --host --port 8000 --network `; +`--check-env` prints a subsystem/environment diagnostic and exits. + + +## API surface + +All routes are registered defensively — a router whose import fails is recorded +(`GET /api/_dashboard_status`) and the server still boots without it. + +**REST** (prefix → controls): `/api` health · `/api/system` info · +`/api/voice` Gemini/local generate+connect+key · `/api/motion` arm actions · +`/api/skills` skill registry · `/api/macros` record/play · `/api/replay` JSONL +CRUD + teaching · `/api/audio` mute/volume/devices/reset · `/api/scripts` +persona files · `/api/records` saved WAVs · `/api/prompt` system prompt · +`/api/wake-phrases` bindings · `/api/live-voice` arm-phrase dispatcher · +`/api/live-subprocess` Gemini child · `/api/typed-replay` TTS · `/api/recognition` +vision + face gallery · `/api/zones` zones/places + nav target · `/api/temp` +motor map + snapshot · `/api/controller` locomotion (move/step/postures/modes/ +E-STOP). + +**WebSockets**: `/ws/logs` (live log stream + 500-line replay) · +`/ws/motor-temps` (3D heatmap data, ~8 fps) · `/ws/terminal` (PTY shell). + + +## Architecture notes + +- **Subprocess isolation**: `voice/sanad_voice.py` runs as a child of `main.py` + via the supervisor. If the voice loop crashes, the dashboard + arm + legs stay + up. +- **Single DDS init**: `motion/arm_controller.py` owns the one + `ChannelFactoryInitialize`; `LocoController` and the audio routes reuse that + participant rather than re-initializing. +- **Brain contract**: see `voice/model_script.py` — any new model implements + `__init__(audio_io, recorder, voice, system_prompt)`, `async run()`, `stop()`. +- **Supervisor contract**: each brain ships a sibling supervisor (e.g. + `gemini/subprocess.py`) that spawns `sanad_voice.py` with its + `SANAD_VOICE_BRAIN` and parses the brain's log markers. Template: + `voice/model_subprocess.py`. +- **Locomotion safety**: `LocoController` is disarmed every boot, has velocity + caps + a `StopMove` watchdog, and is mutually exclusive with the arm. + Voice-driven movement is **off by default** and gated by the Controller + toggle. Distances/degrees in `data/motions/instruction.json` are + **approximate and must be calibrated on the real robot** — there is no + obstacle/abort stack. +- **Audio routing**: the G1's platform-sound PulseAudio sink is NOT wired to a + physical speaker. All dashboard-triggered playback (`play_wav`, typed-replay + audio, record playback) routes through DDS `AudioClient.PlayStream` via + `audio_manager._play_pcm_via_g1`. The PyAudio path is a desktop/dev fallback. +- **Arm replay**: `motion/arm_controller.py:_replay_file_inner()` is a port of + `G1_Lootah/Manual_Recorder/g1_replay_v4_stable.py:Run()` — ramp-in → settle + hold → playback → smooth return → disable SDK. Body motors (0–14) lock to a + live snapshot while arm motors (15–28) follow the file at 60 Hz. `_return_home()` + runs unconditionally after a cancel for a jerk-free return. +- **Camera frame transport (stdin push)**: the `CameraDaemon` lives in the + parent and caches frames in memory. `GeminiSubprocess` base64-encodes the + latest frame to the child's stdin (~2 fps); the child's `_stdin_watcher` + relays it to Gemini Live with a staleness guard. Chosen over a file drop so + the parent owns the camera once and the dashboard preview reads the same cache. +- **Motion-state channel**: `arm_controller._execute()` emits + `motion.action_started` / `_done` / `_error` on the event bus. `main.py` + forwards each to the child as `state:\n`, injected to Gemini Live as + silent `[STATE-START] wave_hand` / `[STATE-DONE] wave_hand (2.3s)` text so it + can honestly answer "what are you doing?". +- **Recognition is Gemini-side**: no dlib/insightface/onnxruntime. Galleries are + pure file IO; `gemini/script.py:_send_gallery_primer()` builds one multimodal + `send_client_content` turn — every enrolled face/place's photos + a greeting + instruction — and Gemini matches incoming frames against it in-context. + + +## Camera vision on Jetson + +The Recognition tab needs `pyrealsense2` to talk to the Intel RealSense. +**Do not `pip install pyrealsense2` on JetPack 5** — the PyPI wheel is built +against glibc 2.32+ (Ubuntu 22.04) and fails to load on JetPack 5's glibc +2.31 with `ImportError: ... version 'GLIBC_2.32' not found`. + +The native runtime is already there (`apt`-installed `librealsense2`). Build +just the Python binding from source against it, into the `gemini_sdk` env: + +```bash +rs-enumerate-devices # confirm the D435I shows up at OS level first + +source ~/miniconda3/etc/profile.d/conda.sh && conda activate gemini_sdk +pip uninstall -y pyrealsense2 # remove the broken wheel if present +sudo apt install -y cmake build-essential git python3-dev libusb-1.0-0-dev pkg-config libssl-dev + +cd /tmp && rm -rf librealsense +git clone --depth=1 --branch v2.56.5 https://github.com/IntelRealSense/librealsense.git +cd librealsense && mkdir -p build && cd build +cmake .. -DBUILD_PYTHON_BINDINGS=ON -DPYTHON_EXECUTABLE=$(which python3) \ + -DBUILD_EXAMPLES=OFF -DBUILD_GRAPHICAL_EXAMPLES=OFF \ + -DBUILD_UNIT_TESTS=OFF -DCHECK_FOR_UPDATES=OFF -DCMAKE_BUILD_TYPE=Release +make -j$(nproc) pyrealsense2 +SITE=$(python3 -c "import sysconfig; print(sysconfig.get_paths()['purelib'])") +mkdir -p "$SITE/pyrealsense2" +cp wrappers/python/pyrealsense2*.so "$SITE/pyrealsense2/" +cp ../wrappers/python/pyrealsense2/__init__.py "$SITE/pyrealsense2/" 2>/dev/null || true + +python3 -c 'import pyrealsense2 as rs; print([d.get_info(rs.camera_info.name) for d in rs.context().query_devices()])' +``` + +Match the `--branch` tag to the installed runtime (`dpkg -l | grep librealsense2`). +If the build isn't worth it, `CameraDaemon` falls back to `cv2.VideoCapture(0)` +automatically — fine for a plain USB webcam, but note a RealSense exposes its +*depth* stream at `/dev/video0`, not RGB, so a real USB cam is the cleaner +fallback (or pin `SANAD_CAMERA_USB_INDEX`). On x86_64 / Ubuntu 22.04+ desktops, +`pip install pyrealsense2` just works. + + +## Dynamic paths + +Every path is derived at runtime — no hard-coded `/home/...` anywhere. +Resolution order for `BASE_DIR` in `config.py`: + +1. `SANAD_PROJECT_ROOT` env var (if set). +2. `PROJECT_BASE + PROJECT_NAME` from a `.env` file in `Sanad/` or its parent. +3. `Path(__file__).resolve().parent` — auto-detected. + +The project runs unchanged from either layout: +- dev: `/Project/Sanad/` +- deployed: `/home/unitree/Sanad/` + + +## Deployment (workstation → robot) + +```bash +rsync -av --delete \ + --exclude=__pycache__ --exclude=logs --exclude=model --exclude=.git \ + /path/to/Sanad/ \ + unitree@192.168.123.164:/home/unitree/Sanad/ +``` + +Then on the robot: `Ctrl+C` the running `main.py` and re-run. + + +## Security + +The dashboard has **no authentication**. Anyone who can reach +`http://:8000` gets full robot control — locomotion, arm, audio, file +upload/delete — and, via the **Terminal tab**, an interactive shell as the +dashboard's user. Bind it to a **trusted LAN only**; add auth before any wider +exposure. + + +## Troubleshooting + +| Symptom | Fix | +|---|---| +| `No LowState received in 2s — refusing to replay` | `main.py` was re-executed as both `__main__` and `Project.Sanad.main`, creating two arm instances. Fix lives in the `sys.modules` alias near the top of `main.py`. Restart. | +| `G1ArmActionClient not available — skipping` for SDK actions | Same duplicate-init issue as above. | +| `No module named 'Project'` in subprocess | Bootstrap preamble in `voice/sanad_voice.py:~30` synthesises the `Project.Sanad` namespace when run as `__main__`. | +| Controller moves rejected (409) | The Controller is **disarmed by default** — hit Arm first. Reads + E-STOP are always allowed. | +| Arm action refused while "movement armed" | Arm ↔ locomotion are mutually exclusive. Disarm/stop locomotion, then trigger the arm. | +| Voice-driven walking does nothing | "Gemini Movement" toggle off, or E-STOP latched. Toggle on; clear E-STOP. Distances are uncalibrated. | +| Arm jumps at start of JSONL replay | `SETTLE_HOLD_SEC` (in `config/motion_config.json > arm_controller`) too low — try `0.7` or `1.0`. | +| Record playback silent | `audio_mgr.play_wav` only routes to G1 DDS if the Unitree SDK is importable; on desktop it falls back to the PulseAudio sink. | +| Live Voice Commands transcript stuck | Deferred trigger was queued but `trigger_enabled` toggle was off. Toggle on — or the pending-trigger poll fires it automatically once enabled. | +| Gemini "no audio" on Typed Replay | Non-deterministic; the retry chain in `voice/typed_replay.py:generate_audio` tries three prompt variants. For reliable TTS, use the offline `local_tts` SpeechT5 path. | +| Local brain exits immediately | `ollama serve` not running / model not pulled, or weights missing under `model/`. Check `logs/local_subprocess.log`. The Gemini brain is the safe default. | +| Recognition tab: "Camera could not start (no backend)" | No camera backend acquired. Check `rs-enumerate-devices` (RealSense at OS level) and `python3 -c 'import pyrealsense2'` in the `gemini_sdk` env. The glibc `ImportError` means the pip wheel is incompatible — see "Camera vision on Jetson" above. | +| Camera badge stuck on "reconnecting…" | `CameraDaemon` lost the device and is retrying with exponential backoff. Re-seat the USB 3 cable; check `logs/camera.log` for the USB-2.0 warning. | +| Gemini doesn't greet an enrolled face | Face Recognition toggle on? Vision on? (Face rec needs frames.) Check `logs/gemini_brain.log` for `face gallery primed: N person(s)`. Hit "Sync Gallery" to force a re-prime. | +| Gemini unaware of motion state | The `motion.action_*` → `send_state` chain only runs when Live Gemini is up. Check `logs/gemini_subprocess.log` and `logs/gemini_brain.log` for `STATE injected:` lines. | + + +## License / attribution + +Internal project for YS Lootah Technology. Reuses/ports patterns from: +- `G1_Lootah/Manual_Recorder/g1_replay_v4_stable.py` (arm replay math) +- `SanadVoice/gemini_interact` (arm-phrase dispatch, skill registry) +- `SanadVoice/gemini_voice_v2` (local SpeechT5 TTS) +- `Project/Marcus` — camera→Gemini stdin-push transport, motion-state + injection, camera daemon resilience (auto-reconnect, USB-2.0 warning), the + `API/camera_api.py` cache shape (`get_frame_b64` / `get_fresh_frame`), and the + confirmation-phrase → locomotion pattern (`movement_dispatch`). +- Unitree `unitree_sdk2py` (G1 low-level SDK, `LocoClient`, `G1ArmActionClient`, + `AudioClient.PlayStream`). diff --git a/vendor/Sanad/__init__.py b/vendor/Sanad/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/vendor/Sanad/config.py b/vendor/Sanad/config.py new file mode 100644 index 0000000..a25c673 --- /dev/null +++ b/vendor/Sanad/config.py @@ -0,0 +1,465 @@ +"""Centralized configuration for the Sanad robot assistant. + +Resolution order for BASE_DIR (highest priority first): + 1. SANAD_PROJECT_ROOT environment variable + 2. PROJECT_BASE + PROJECT_NAME from .env file (or env vars) + 3. Path(__file__).resolve().parent.parent (auto-detected from this file's location) + +Every other directory is derived from BASE_DIR — never hardcode an absolute path. +""" + +from __future__ import annotations + +import json +import os +from pathlib import Path +from typing import Any + + +def _read_env_file(env_path: Path) -> dict[str, str]: + """Minimal .env reader (no python-dotenv dependency).""" + out: dict[str, str] = {} + if not env_path.exists(): + return out + try: + for raw in env_path.read_text(encoding="utf-8").splitlines(): + line = raw.strip() + if not line or line.startswith("#") or "=" not in line: + continue + k, v = line.split("=", 1) + out[k.strip()] = v.strip().strip('"').strip("'") + except OSError: + pass + return out + + +def _resolve_base_dir() -> Path: + """Resolve the Sanad project root with override support.""" + # 1. Direct env override + override = os.environ.get("SANAD_PROJECT_ROOT", "").strip() + if override: + p = Path(override).expanduser().resolve() + if p.exists(): + return p + + # 2. PROJECT_BASE + PROJECT_NAME pattern + _here = Path(__file__).resolve().parent # Sanad/ + env_files = [ + _here / ".env", # Sanad/.env + _here.parent / ".env", # Project/.env + ] + for env_path in env_files: + env = _read_env_file(env_path) + base = env.get("PROJECT_BASE") or os.environ.get("PROJECT_BASE", "") + name = env.get("PROJECT_NAME") or os.environ.get("PROJECT_NAME", "") + if base and name: + candidate = Path(base).expanduser().resolve() / name + if candidate.exists(): + return candidate + + # 3. Auto-detect — this file lives at Sanad/config.py, so parent = Sanad/ + return _here + + +BASE_DIR = _resolve_base_dir() +DATA_DIR = BASE_DIR / "data" +LOGS_DIR = BASE_DIR / "logs" +SCRIPTS_DIR = BASE_DIR / "scripts" +MODEL_DIR = BASE_DIR / "model" + +# Audio recordings (typed-replay, etc.) live under data/audio +AUDIO_RECORDINGS_DIR = DATA_DIR / "audio" +# Motion macro recordings (paired with audio) live under data/recordings/motion +MOTION_RECORDINGS_DIR = DATA_DIR / "recordings" / "motion" +# Motion JSONL macros (auto-discovered as actions) +MOTIONS_DIR = DATA_DIR / "motions" + +SKILLS_FILE = MOTIONS_DIR / "skills.json" +CONFIG_FILE = MOTIONS_DIR / "config.json" + +# ─── Load baseline defaults from config/core_config.json ─── +# Single source of truth. Runtime overrides via: +# 1. env vars (SANAD_GEMINI_API_KEY, SANAD_GEMINI_MODEL, ...) +# 2. data/motions/config.json (dashboard-editable — see load_config()) +# 3. config/core_config.json (this file) +def _load_core_config() -> dict[str, Any]: + cfg_path = BASE_DIR / "config" / "core_config.json" + if not cfg_path.exists(): + return {} + try: + raw = json.loads(cfg_path.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError): + return {} + # Strip _comment / _description noise + return {k: v for k, v in raw.items() if not k.startswith("_")} + + +_CORE_CFG = _load_core_config() +_GEMINI = _CORE_CFG.get("gemini_defaults", {}) +_AUDIO = _CORE_CFG.get("audio_defaults", {}) + +# -- Gemini defaults (override via data/motions/config.json or env) -- +GEMINI_API_KEY = os.environ.get( + "SANAD_GEMINI_API_KEY", + _GEMINI.get("api_key", "")) +GEMINI_MODEL = os.environ.get( + "SANAD_GEMINI_MODEL", + "models/" + _GEMINI.get("model_live", "gemini-2.5-flash-native-audio-preview-12-2025")) +GEMINI_VOICE = os.environ.get( + "SANAD_GEMINI_VOICE", + _GEMINI.get("voice_name", "Charon")) +GEMINI_WS_URI = _GEMINI.get( + "model_ws_uri", + "wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent") +GEMINI_WS_TIMEOUT = _GEMINI.get("ws_timeout_sec", 30) + +# -- Audio defaults -- +SEND_SAMPLE_RATE = _AUDIO.get("send_sample_rate", 16000) +RECEIVE_SAMPLE_RATE = _AUDIO.get("receive_sample_rate", 24000) +CHUNK_SIZE = _AUDIO.get("chunk_size", 512) +CHANNELS = _AUDIO.get("channels", 1) + +# -- PulseAudio hardware IDs -- +SINK = _AUDIO.get("sink", "alsa_output.usb-Anker_PowerConf_A3321-DEV-SN1-01.analog-stereo") +SOURCE = _AUDIO.get("source", "alsa_input.usb-Anker_PowerConf_A3321-DEV-SN1-01.mono-fallback") +MONITOR_SOURCE = f"{SINK}.monitor" + +# -- Dashboard -- +# Default: bind to wlan0's IP (auto-detected at startup) so the dashboard is +# reachable on the wireless network. Falls back to 0.0.0.0 (all interfaces) +# if wlan0 isn't present. +# +# Resolution order (highest priority first): +# 1. SANAD_DASHBOARD_HOST env var (explicit IP or hostname) +# 2. SANAD_DASHBOARD_INTERFACE env var → that interface's IP +# 3. wlan0 interface IP (default) +# 4. 0.0.0.0 (bind to all) +# +# Override via --host CLI flag too. +DASHBOARD_INTERFACE = os.environ.get("SANAD_DASHBOARD_INTERFACE", "wlan0") + + +def _get_interface_ip(iface: str) -> str | None: + """Return the IPv4 address bound to `iface`, or None if not present. + + Tries multiple strategies in order — different Linux setups expose + interface info via different mechanisms. + """ + # Strategy 1: fcntl SIOCGIFADDR (fastest, no subprocess) + ip = _get_iface_ip_fcntl(iface) + if ip: + return ip + # Strategy 2: parse `ip -4 -o addr show ` (works on Ubuntu/Jetson) + ip = _get_iface_ip_via_ip_cmd(iface) + if ip: + return ip + # Strategy 3: parse `/proc/net/fib_trie` (last resort) + ip = _get_iface_ip_via_proc(iface) + if ip: + return ip + return None + + +def _get_iface_ip_fcntl(iface: str) -> str | None: + try: + import fcntl + import socket + import struct + s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + try: + ifname = iface[:15].encode("utf-8") + packed = fcntl.ioctl( + s.fileno(), + 0x8915, # SIOCGIFADDR + struct.pack("256s", ifname), + ) + return socket.inet_ntoa(packed[20:24]) + finally: + s.close() + except Exception: + return None + + +def _get_iface_ip_via_ip_cmd(iface: str) -> str | None: + try: + import subprocess + r = subprocess.run( + ["ip", "-4", "-o", "addr", "show", iface], + capture_output=True, text=True, timeout=2.0, + ) + if r.returncode != 0: + return None + # Output: "5: wlan0 inet 10.255.254.86/24 brd ..." + for line in r.stdout.splitlines(): + parts = line.split() + for i, p in enumerate(parts): + if p == "inet" and i + 1 < len(parts): + return parts[i + 1].split("/")[0] + except Exception: + return None + return None + + +def _get_iface_ip_via_proc(iface: str) -> str | None: + """Fallback: scrape /proc/net/fib_trie for an IP advertised on this iface. + + Less reliable than fcntl/ip cmd but doesn't need any external tooling. + """ + try: + import subprocess + # Try `hostname -I` as a final fallback (returns space-separated IPs) + r = subprocess.run(["hostname", "-I"], capture_output=True, text=True, timeout=1.0) + if r.returncode == 0: + ips = (r.stdout or "").strip().split() + # Return first non-loopback IPv4 + for ip in ips: + if "." in ip and not ip.startswith("127."): + return ip + except Exception: + return None + return None + + +def list_network_interfaces() -> list[dict]: + """Return [{name, ip, is_up}] for every interface on the box. + + Used by the dashboard's system-info panel. + """ + out: list[dict] = [] + try: + import socket + for idx, name in socket.if_nameindex(): + ip = _get_interface_ip(name) + out.append({ + "name": name, + "index": idx, + "ip": ip or "", + "is_up": ip is not None, + }) + except Exception: + pass + return out + + +def _resolve_dashboard_host() -> str: + """Resolve the host the dashboard should bind to. + + Order: + 1. SANAD_DASHBOARD_HOST env var (explicit IP/hostname) + 2. SANAD_DASHBOARD_INTERFACE → that interface's IP + 3. wlan0's IP (default) + 4. First non-loopback IP from `hostname -I` + 5. 0.0.0.0 (bind everywhere) + """ + explicit = os.environ.get("SANAD_DASHBOARD_HOST", "").strip() + if explicit: + return explicit + iface_ip = _get_interface_ip(DASHBOARD_INTERFACE) + if iface_ip: + return iface_ip + # Try `hostname -I` as a final non-loopback fallback + try: + import subprocess + r = subprocess.run(["hostname", "-I"], capture_output=True, text=True, timeout=1.0) + if r.returncode == 0: + for ip in (r.stdout or "").strip().split(): + if "." in ip and not ip.startswith("127."): + return ip + except Exception: + pass + return "0.0.0.0" + + +DASHBOARD_HOST = _resolve_dashboard_host() +DASHBOARD_PORT = 8000 + +# -- Local TTS -- +LOCAL_TTS_MODEL = "MBZUAI/speecht5_tts_clartts_ar" +LOCAL_TTS_MODEL_PATH = str(MODEL_DIR / "speecht5_tts_clartts_ar") +LOCAL_TTS_HIFIGAN_PATH = str(MODEL_DIR / "speecht5_hifigan") +LOCAL_TTS_XVECTOR_PATH = str(MODEL_DIR / "arabic_xvector_embedding.pt") + +# -- Motion -- +_G1 = _CORE_CFG.get("g1_hardware", {}) +REPLAY_HZ = _G1.get("replay_hz", 60.0) +G1_NUM_MOTOR = _G1.get("num_motor", 29) +ENABLE_ARM_SDK_INDEX = _G1.get("enable_arm_sdk_index", 29) +KP_HIGH = 300.0 +KD_HIGH = 3.0 +KP_LOW = 80.0 +KD_LOW = 3.0 +KP_WRIST = 40.0 +KD_WRIST = 1.5 +WEAK_MOTORS = {4, 10, 15, 16, 17, 18, 22, 23, 24, 25} +WRIST_MOTORS = {19, 20, 21, 26, 27, 28} + +# -- Live Gemini subprocess tuning -- +LIVE_TUNE: dict[str, str] = { + "SANAD_REQUIRED_LOUD_CHUNKS": "5", + "SANAD_PREBUFFER_CHUNKS": "3", + "SANAD_PLAYBACK_TIMEOUT": "0.25", + "SANAD_BARGE_IN_COOLDOWN": "1.0", + "SANAD_AI_SPEAK_GRACE": "0.5", + # ECHO_GUARD_SEC suppresses USER SAID log lines for this many seconds + # after the robot finishes a chunk. Previously 1.2 — caused a visible + # lag where "robot finished talking" was followed by silence in the + # log even though Gemini was transcribing the user's new speech + # immediately. Lowered to 0.3 to match typical room reverb tail; the + # real echo protection is the silence-during-speaking gate, not this. + "SANAD_ECHO_GUARD_SEC": "0.3", + "SANAD_SPEAKING_ENERGY_GATE": "0.90", + "SANAD_CALIBRATION_CHUNKS": "30", + "SANAD_THRESHOLD_MULTIPLIER": "4.0", + # Base barge-in threshold calibrated at the REFERENCE volume (50%). + # At runtime, scaled QUADRATICALLY with actual G1 volume: + # scale = (actual_vol / ref_vol) ** 2 + # + # Physical reason: doubling digital speaker volume doubles sample + # amplitude, which means RECEIVED energy at the mic quadruples + # (energy ~ amplitude²). Linear scaling under-threshold echo at + # high volumes → caused "robot listening to himself" feedback. + # + # Measured on Hollyland + G1 speaker at 100% volume: + # echo peak (no user) up to ~15700 + # voice peak (user) 25000-32000+ (often saturates 32767) + # Safe threshold at 100% vol: ~18000, above echo / below voice. + # + # Working back with quadratic scale: base × (100/50)² = 18000 + # base × 4 = 18000 → base = 4500 at 50% ref volume. + "SANAD_MIN_THRESHOLD": "800", + "SANAD_PLAYBACK_BARGE_MIN": "2500", + "SANAD_PLAYBACK_BARGE_MULT": "1.5", + # Sustained-chunk requirement for barge-in. Balance: + # higher = fewer false triggers from echo bursts + # lower = quicker response to short commands ("stop", "توقف") + # Default 5 = ~160ms sustained voice. Real speech reliably + # sustains that long; single-chunk echo spikes don't. + "SANAD_PLAYBACK_REQUIRED_CHUNKS": "2", + "SANAD_SILENCE_AFTER_SPEECH": "1.2", + "SANAD_SPEECH_THRESHOLD": "300", + "SANAD_DDS_INTERFACE": os.environ.get("SANAD_DDS_INTERFACE", "eth0"), + # G1 built-in mic — UDP multicast 239.168.123.161:5555. + # Requires wake-up conversation mode ON in Unitree app. + "SANAD_USE_G1_MIC": "1", + + # ── Recognition (camera vision + face recognition) ── + # All of these are BOOT defaults. The runtime source of truth is the + # state file data/.recognition_state.json — toggled live from the + # Recognition tab and polled by the Gemini child at 1 Hz. + "SANAD_VISION_ENABLE": "0", + "SANAD_VISION_SEND_HZ": "2", + "SANAD_VISION_STALE_MS": "1500", + "SANAD_CAMERA_WIDTH": "424", + "SANAD_CAMERA_HEIGHT": "240", + "SANAD_CAMERA_FPS": "15", + "SANAD_CAMERA_JPEG_QUALITY": "70", + "SANAD_FACE_RECOGNITION_ENABLE": "0", + "SANAD_FACES_DIR": str(DATA_DIR / "faces"), + "SANAD_FACES_MAX_SAMPLES": "3", + "SANAD_FACES_PRIMER_RESIZE": "256", + "SANAD_RECOGNITION_STATE_PATH": str(DATA_DIR / ".recognition_state.json"), + "SANAD_RECOGNITION_POLL_S": "1.0", +} + +# -- Camera -- +CAMERA_SERVICE_PORT = 8091 +DIRECT_CAMERA_URL = f"http://127.0.0.1:{CAMERA_SERVICE_PORT}" + +# -- DDS / hardware -- +# Jetson G1 default is eth0 (the robot's internal network). +# Override with SANAD_DDS_INTERFACE=lo for desktop/sim development. +DDS_NETWORK_INTERFACE = os.environ.get("SANAD_DDS_INTERFACE", "eth0") + + +def _ensure_dirs() -> list[str]: + """Create runtime directories. Failures are collected, not raised. + + Returns the list of directories that failed to create — caller can decide + whether to log/abort. The module import never crashes due to a single + permission error on a single directory. + """ + failed: list[str] = [] + for d in (DATA_DIR, LOGS_DIR, SCRIPTS_DIR, AUDIO_RECORDINGS_DIR, + MOTION_RECORDINGS_DIR, MOTIONS_DIR): + try: + d.mkdir(parents=True, exist_ok=True) + except OSError: + failed.append(str(d)) + return failed + + +# Best-effort: create dirs at import. Ignore failures here — individual +# subsystems will handle missing dirs at usage time and isolation prevents +# cascading import failures. +_DIRS_FAILED = _ensure_dirs() + + +def load_config() -> dict[str, Any]: + """Load runtime config overrides from CONFIG_FILE (if present).""" + if CONFIG_FILE.exists(): + try: + with open(CONFIG_FILE, "r", encoding="utf-8") as f: + return json.load(f) + except (json.JSONDecodeError, OSError): + return {} + return {} + + +def save_config(cfg: dict[str, Any]): + CONFIG_FILE.parent.mkdir(parents=True, exist_ok=True) + import os, tempfile + fd, tmp = tempfile.mkstemp( + prefix=f".{CONFIG_FILE.name}.", suffix=".tmp", + dir=str(CONFIG_FILE.parent), + ) + try: + with os.fdopen(fd, "w", encoding="utf-8") as f: + json.dump(cfg, f, ensure_ascii=False, indent=2) + os.replace(tmp, CONFIG_FILE) + except Exception: + try: + os.unlink(tmp) + except OSError: + pass + raise + + +# Apply config.json overrides on top of module constants (was previously dead code). +def _apply_overrides(): + cfg = load_config() + if not cfg: + return + g = globals() + gemini = cfg.get("gemini", {}) + if isinstance(gemini, dict): + if "api_key" in gemini and gemini["api_key"]: + g["GEMINI_API_KEY"] = gemini["api_key"] + if "model" in gemini: + g["GEMINI_MODEL"] = gemini["model"] + if "voice" in gemini: + g["GEMINI_VOICE"] = gemini["voice"] + audio = cfg.get("audio", {}) + if isinstance(audio, dict): + if "send_sample_rate" in audio: + g["SEND_SAMPLE_RATE"] = int(audio["send_sample_rate"]) + if "receive_sample_rate" in audio: + g["RECEIVE_SAMPLE_RATE"] = int(audio["receive_sample_rate"]) + if "chunk_size" in audio: + g["CHUNK_SIZE"] = int(audio["chunk_size"]) + if "sink" in audio: + g["SINK"] = audio["sink"] + if "source" in audio: + g["SOURCE"] = audio["source"] + dashboard = cfg.get("dashboard", {}) + if isinstance(dashboard, dict): + if "host" in dashboard: + g["DASHBOARD_HOST"] = dashboard["host"] + if "port" in dashboard: + g["DASHBOARD_PORT"] = int(dashboard["port"]) + + +try: + _apply_overrides() +except Exception: + # Never let a malformed config.json kill module import. + pass diff --git a/vendor/Sanad/config/core_config.json b/vendor/Sanad/config/core_config.json new file mode 100644 index 0000000..a7126bc --- /dev/null +++ b/vendor/Sanad/config/core_config.json @@ -0,0 +1,89 @@ +{ + "_description": "Tunables for core/* modules. Loaded via core.config_loader.load('core').", + "brain": { + "allowed_callback_prefixes": [ + "Project.Sanad.motion.", + "Project.Sanad.voice.", + "motion.", + "voice." + ], + "gestural_speaking_default": false + }, + "logger": { + "log_level": "INFO", + "format": "%(asctime)s [%(name)s] %(levelname)-7s %(message)s", + "datefmt": "%Y-%m-%d %H:%M:%S", + "file_max_bytes": 10485760, + "file_backup_count": 7 + }, + "event_bus": { + "emit_timeout_sec": 0.5 + }, + "paths": { + "_comment": "Path roots — resolved against BASE_DIR in core/config.py", + "data": "data", + "logs": "logs", + "scripts": "scripts", + "model": "model", + "audio_recordings": "data/audio", + "motion_recordings": "data/recordings/motion", + "motions": "data/motions" + }, + "gemini_defaults": { + "_comment": "Baseline Gemini API config — SINGLE SOURCE OF TRUTH. All voice modules read from here.", + "api_key": "", + "model_live": "gemini-2.5-flash-native-audio-preview-12-2025", + "model_ws_uri": "wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent", + "voice_name": "Charon", + "ws_timeout_sec": 30, + "default_system_prompt": "You are Bousandah, a wise and friendly Emirati assistant. Speak strictly in the UAE dialect (Khaleeji). Be helpful, concise, and use local greetings like 'Marhaba' and 'Ya Khoy'." + }, + "g1_hardware": { + "_comment": "G1 humanoid hardware constants — shared by every motion/voice module that talks to the arm.", + "num_motor": 29, + "enable_arm_sdk_index": 29, + "replay_hz": 60.0 + }, + "script_files": { + "_comment": "Filenames (under scripts/) used across voice + dashboard", + "persona": "sanad_script.txt", + "rules": "sanad_rule.txt", + "arm_phrases": "sanad_arm.txt" + }, + "dashboard_defaults": { + "host": null, + "port": 8000, + "interface": "wlan0" + }, + "audio_defaults": { + "_comment": "Host PulseAudio fallback only — the G1 deployment uses UDP multicast mic + AudioClient.PlayStream speaker (see SANAD_USE_G1_MIC in config.py LIVE_TUNE). Default here is the Jetson/G1 built-in platform-sound chip.", + "send_sample_rate": 16000, + "receive_sample_rate": 24000, + "chunk_size": 512, + "channels": 1, + "sink": "alsa_output.platform-sound.analog-stereo", + "source": "alsa_input.platform-sound.analog-stereo" + }, + "dds": { + "network_interface_default": "eth0" + }, + "camera": { + "_comment": "Recognition tab camera daemon (parent process reads this). width/height/fps/jpeg_quality + the reconnect knobs configure CameraDaemon. Frames are cached in memory and pushed to the Gemini child over its stdin (no file drop). send_hz/stale_ms are read by the Gemini child via SANAD_VISION_SEND_HZ / SANAD_VISION_STALE_MS env vars (LIVE_TUNE).", + "width": 424, + "height": 240, + "fps": 15, + "jpeg_quality": 70, + "send_hz": 2, + "stale_ms": 1500, + "stale_threshold_s": 10.0, + "reconnect_min_s": 2.0, + "reconnect_max_s": 10.0, + "capture_timeout_ms": 5000 + }, + "faces": { + "_comment": "Face gallery for Gemini-side recognition. Folder layout: data/faces/face_{id}/{face_1.jpg, ...} + optional meta.json {\"name\": \"...\"}. Gemini does the matching — no local ML model.", + "dir_rel": "data/faces", + "max_samples_per_face": 3, + "primer_resize_long_side": 256 + } +} \ No newline at end of file diff --git a/vendor/Sanad/config/dashboard_config.json b/vendor/Sanad/config/dashboard_config.json new file mode 100644 index 0000000..e11ecdc --- /dev/null +++ b/vendor/Sanad/config/dashboard_config.json @@ -0,0 +1,49 @@ +{ + "_description": "Tunables for dashboard/* modules. Loaded via core.config_loader.load('dashboard').", + + "app": { + "_comment": "dashboard/app.py — FastAPI app", + "title": "Sanad Dashboard", + "version": "1.0.0", + "static_subdir": "dashboard/static" + }, + + "api_input": { + "_comment": "Shared by every route that accepts user text input / uploads. Single source of truth.", + "max_text_len": 2000, + "max_upload_bytes": 8388608 + }, + + "voice_route": { + "_comment": "dashboard/routes/voice.py — reads max_text_len from api_input above", + "api_key_mask_visible": 4 + }, + + "typed_replay_route": { + "_comment": "dashboard/routes/typed_replay.py — reads max_text_len from api_input above" + }, + + "records_route": { + "_comment": "dashboard/routes/records.py", + "index_filename": "records.json" + }, + + "prompt_route": { + "_comment": "dashboard/routes/prompt.py — script/rule filenames come from core.script_files; default prompt from core.gemini_defaults.default_system_prompt" + }, + + "logs_route": { + "_comment": "dashboard/routes/logs.py", + "default_tail_lines": 200, + "max_tail_lines": 5000 + }, + + "scripts_route": { + "_comment": "dashboard/routes/scripts.py — max_script_bytes reads from api_input.max_upload_bytes" + }, + + "live_subprocess_route": { + "_comment": "dashboard/routes/live_subprocess.py", + "tail_default_lines": 100 + } +} diff --git a/vendor/Sanad/config/gemini_config.json b/vendor/Sanad/config/gemini_config.json new file mode 100644 index 0000000..39c84ca --- /dev/null +++ b/vendor/Sanad/config/gemini_config.json @@ -0,0 +1,35 @@ +{ + "_description": "Tunables for gemini/* modules. Loaded via core.config_loader.load('gemini'). API credentials (api_key, model, voice_name) still live in core_config.json > gemini_defaults — single source of truth shared with config.py.", + + "client": { + "_comment": "gemini/client.py — short-session WebSocket client used by dashboard /generate + typed replay. default_system_prompt comes from core.gemini_defaults.", + "recv_timeout_sec": 30, + "reconnect_max_attempts": 3, + "reconnect_initial_delay_sec": 1.0, + "reconnect_max_delay_sec": 10.0 + }, + + "subprocess": { + "_comment": "gemini/subprocess.py — GeminiSubprocess supervisor. Spawns voice/sanad_voice.py as a child, tails stdout for Gemini-specific log markers, pushes camera frames + motion state to the child over its stdin, exposes transcript + state to the dashboard.", + "log_tail_size": 2000, + "transcript_tail_size": 30, + "log_name": "gemini_subprocess", + "stop_timeout_sec": 3.0, + "terminate_timeout_sec": 2.0, + "frame_forward_interval_sec": 0.5, + "noisy_prefixes": [ + "ALSA lib ", + "Expression 'alsa_", + "Cannot connect to server socket", + "jack server is not running" + ], + "noisy_fragments": [ + "Unknown PCM", + "Evaluate error", + "snd_pcm_open_noupdate", + "PaAlsaStream", + "snd_config_evaluate", + "snd_func_refer" + ] + } +} diff --git a/vendor/Sanad/config/local_config.json b/vendor/Sanad/config/local_config.json new file mode 100644 index 0000000..c542d7d --- /dev/null +++ b/vendor/Sanad/config/local_config.json @@ -0,0 +1,92 @@ +{ + "_description": "Tunables for local/* — fully on-device voice pipeline (Silero VAD → Whisper → Qwen via llama.cpp → CosyVoice2). Loaded via core.config_loader.load('local').", + + "subprocess": { + "_comment": "local/subprocess.py — LocalSubprocess supervisor. Mirrors gemini/subprocess.py. IMPORTANT: python_bin points at the `local` conda env (Python 3.8 + Jetson CUDA torch) so CosyVoice+Whisper run with GPU, while the dashboard/Gemini stack stays in gemini_sdk (Python 3.10).", + "python_bin": "/home/unitree/miniconda3/envs/local/bin/python", + "log_tail_size": 2000, + "transcript_tail_size": 30, + "log_name": "local_subprocess", + "stop_timeout_sec": 5.0, + "terminate_timeout_sec": 3.0, + "noisy_prefixes": [ + "ALSA lib ", + "Expression 'alsa_", + "Cannot connect to server socket", + "jack server is not running" + ], + "noisy_fragments": [ + "Unknown PCM", + "Evaluate error", + "snd_pcm_open_noupdate", + "PaAlsaStream" + ] + }, + + "vad": { + "_comment": "Silero VAD — CPU. Emits speech_start / speech_end events.", + "sample_rate": 16000, + "frame_ms": 32, + "threshold": 0.55, + "min_silence_ms": 400, + "min_speech_ms": 250, + "pad_start_ms": 200, + "pad_end_ms": 200, + "device": "cpu" + }, + + "stt": { + "_comment": "faster-whisper Large V3 Turbo, INT8 on GPU.", + "model_name": "large-v3-turbo", + "model_subdir": "faster-whisper-large-v3-turbo", + "device": "cuda", + "compute_type": "int8_float16", + "beam_size": 1, + "language": null, + "vad_filter": false, + "no_speech_threshold": 0.6, + "min_utterance_chars": 2, + "temperature": 0.0 + }, + + "llm": { + "_comment": "Qwen 2.5 Instruct via Ollama (default) OR self-managed llama.cpp. Set backend to pick.", + "backend": "ollama", + + "_ollama_comment": "Ollama daemon — assumes `ollama serve` is running; `ollama pull qwen2.5:1.5b` to fetch.", + "ollama_host": "127.0.0.1", + "ollama_port": 11434, + "ollama_model": "qwen2.5:1.5b", + "ollama_keep_alive": "5m", + + "_llamacpp_comment": "Self-managed llama-server subprocess. Only used when backend='llama_cpp'.", + "model_subdir": "qwen2.5-1.5b-instruct-q4_k_m.gguf", + "server_binary": "llama-server", + "host": "127.0.0.1", + "port": 8080, + "n_gpu_layers": 99, + "ctx_size": 2048, + "threads": 4, + "startup_timeout_sec": 30, + + "_shared_comment": "Generation params — both backends.", + "request_timeout_sec": 30, + "max_tokens": 200, + "temperature": 0.7, + "top_p": 0.9, + "stop": ["<|im_end|>", "\n\n\n"], + "chunk_delimiters": ".,?!؟،", + "chunk_min_chars": 8 + }, + + "tts": { + "_comment": "CosyVoice2 0.5B streaming — GPU. Uses a 3s reference WAV for voice cloning.", + "model_subdir": "CosyVoice2-0.5B", + "reference_wav_subdir": "khaleeji_reference_3s.wav", + "reference_prompt": "", + "stream_chunk_sec": 0.25, + "sample_rate": 16000, + "queue_max": 3, + "device": "cuda" + } +} diff --git a/vendor/Sanad/config/motion_config.json b/vendor/Sanad/config/motion_config.json new file mode 100644 index 0000000..9070df1 --- /dev/null +++ b/vendor/Sanad/config/motion_config.json @@ -0,0 +1,70 @@ +{ + "_description": "Tunables for motion/* modules. Loaded via core.config_loader.load('motion').", + + "arm_controller": { + "_comment": "motion/arm_controller.py — enable_arm_sdk_index + replay_hz come from core.g1_hardware", + "ramp_in_steps": 60, + "ramp_out_steps": 180, + "settle_hold_sec": 0.5, + "watchdog_timeout_sec": 0.25, + "watchdog_disable_after_sec": 1.0, + "arm_indices_start": 15, + "arm_indices_stop": 29, + "jsonl_id_start": 100 + }, + + "loco_controller": { + "_comment": "G1_Controller/loco_controller.py — manual locomotion. NIC is shared from the arm's DDS init (config core.dds / SANAD_DDS_INTERFACE), not set here.", + "cap_walk": 0.6, + "cap_run": 1.2, + "lin_step": 0.05, + "ang_step": 0.2, + "watchdog_timeout_sec": 0.5, + "arm_block_window_sec": 1.5, + "step_duration_sec": 0.6, + "step_speed_frac": 0.5, + "loco_timeout_sec": 10.0, + "msc_timeout_sec": 5.0 + }, + + "macro_player": { + "_comment": "motion/macro_player.py — JSONL playback", + "ramp_in_steps": 60, + "ramp_out_steps": 60, + "watchdog_disable_after_sec": 1.0 + }, + + "macro_recorder": { + "_comment": "motion/macro_recorder.py — record arm trajectories", + "sample_rate_hz": 60.0, + "smoothing_window": 5 + }, + + "teaching": { + "_comment": "motion/teaching.py — teach-by-demo", + "safe_hold_sec": 3.0, + "waist_kp": 60.0, + "waist_kd": 4.0, + "hold_arm_kp": 60.0, + "hold_arm_kd": 4.0, + "teach_arm_kp": 0.0, + "teach_arm_kd": 2.0 + }, + + "sanad_arm_controller": { + "_comment": "motion/sanad_arm_controller.py — g1_num_motor + enable_arm_sdk_index + replay_hz come from core.g1_hardware", + "action_cooldown_sec": 1.0, + "stability_threshold": 0.06, + "gains": { + "kp_high": 300.0, + "kd_high": 3.0, + "kp_low": 80.0, + "kd_low": 3.0, + "kp_wrist": 40.0, + "kd_wrist": 1.5 + }, + "weak_motors": [4, 10, 15, 16, 17, 18, 22, 23, 24, 25], + "wrist_motors": [19, 20, 21, 26, 27, 28], + "data_subdir": "DataG1" + } +} diff --git a/vendor/Sanad/config/voice_config.json b/vendor/Sanad/config/voice_config.json new file mode 100644 index 0000000..00e151d --- /dev/null +++ b/vendor/Sanad/config/voice_config.json @@ -0,0 +1,75 @@ +{ + "_description": "Tunables for voice/* modules. Loaded via core.config_loader.load('voice').", + + "sanad_voice": { + "_comment": "voice/sanad_voice.py — main live voice subprocess. Gemini API credentials (api_key, model, voice_name) come from core_config.json's gemini_defaults — single source of truth.", + "mic_gain": 1.0, + "play_chunk_bytes": 96000, + "log_dir": "~/logs", + "log_name": "gemini_live_v2", + "session_timeout_sec": 660, + "max_reconnect_delay_sec": 30, + "max_consecutive_errors": 10, + "no_messages_timeout_sec": 30 + }, + + "mic_udp": { + "_comment": "G1 built-in mic — UDP multicast subscriber", + "group": "239.168.123.161", + "port": 5555, + "buffer_max_bytes": 64000, + "read_timeout_sec": 0.04, + "socket_timeout_sec": 1.0 + }, + + "speaker": { + "_comment": "G1 built-in speaker — AudioClient.PlayStream wrapper", + "app_name": "sanad", + "begin_stream_pause_sec": 0.15, + "wait_finish_margin_sec": 0.3 + }, + + "vad": { + "_comment": "Gemini Live server-side voice-activity-detection config", + "start_sensitivity": "START_SENSITIVITY_HIGH", + "end_sensitivity": "END_SENSITIVITY_LOW", + "prefix_padding_ms": 20, + "silence_duration_ms": 200 + }, + + "barge_in": { + "threshold": 500, + "loud_chunks_needed": 3, + "cooldown_sec": 0.3, + "echo_suppress_below": 500, + "ai_speak_grace_sec": 0.15 + }, + + "recording": { + "enabled": true, + "dir_relative": "data/recordings" + }, + + "typed_replay": { + "_comment": "voice/typed_replay.py — max_text_len comes from dashboard.api_input", + "monitor_chunk_size": 512, + "monitor_tail_sec": 0.2 + }, + + "live_voice_loop": { + "_comment": "voice/live_voice_loop.py — arm phrase dispatcher. arm_txt filename comes from core.script_files.arm_phrases", + "trigger_log_size": 100, + "poll_interval_sec": 0.1, + "deferred_default": false, + "trigger_enabled_default": false + }, + + "local_tts": { + "_comment": "voice/local_tts.py — offline Coqui TTS", + "model_subdir": "speecht5_tts_clartts_ar", + "vocoder_subdir": "speecht5_hifigan", + "xvector_filename": "arabic_xvector_embedding.pt", + "sample_rate": 16000, + "channels": 1 + } +} diff --git a/vendor/Sanad/core/__init__.py b/vendor/Sanad/core/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/vendor/Sanad/core/asyncio_compat.py b/vendor/Sanad/core/asyncio_compat.py new file mode 100644 index 0000000..d4debaf --- /dev/null +++ b/vendor/Sanad/core/asyncio_compat.py @@ -0,0 +1,33 @@ +"""asyncio compatibility shim for Python 3.8. + +`asyncio.to_thread` only exists from Python 3.9. The Jetson runs 3.8, so we +backfill it via run_in_executor on the default thread pool. + +Usage: + from Project.Sanad.core.asyncio_compat import to_thread + result = await to_thread(blocking_fn, arg1, arg2, kw=val) +""" + +from __future__ import annotations + +import asyncio +import functools +import sys +from typing import Any, Callable, TypeVar + +_T = TypeVar("_T") + +if sys.version_info >= (3, 9): + # Native implementation + to_thread = asyncio.to_thread # type: ignore[attr-defined] +else: + async def to_thread(func: Callable[..., _T], /, *args: Any, **kwargs: Any) -> _T: + """Backport of asyncio.to_thread for Python 3.8.""" + loop = asyncio.get_event_loop() + ctx = functools.partial(func, *args, **kwargs) + return await loop.run_in_executor(None, ctx) + +# Also patch the asyncio module so existing `asyncio.to_thread` calls work +# without rewriting every consumer file. Done lazily — only if missing. +if not hasattr(asyncio, "to_thread"): + asyncio.to_thread = to_thread # type: ignore[attr-defined] diff --git a/vendor/Sanad/core/brain.py b/vendor/Sanad/core/brain.py new file mode 100644 index 0000000..d4fdc90 --- /dev/null +++ b/vendor/Sanad/core/brain.py @@ -0,0 +1,272 @@ +"""The Brain — central orchestrator for the Sanad robot assistant. + +Responsibilities: + 1. Owns the SkillRegistry, resolves callbacks at runtime. + 2. Coordinates voice → motion → vision pipelines. + 3. Executes skills (audio + motion + callback) with configurable sync modes. + 4. Exposes a thread-safe API consumed by the FastAPI dashboard. +""" + +from __future__ import annotations + +import asyncio +import importlib +import time +from pathlib import Path +from typing import Any, Callable + +from Project.Sanad.config import ( + AUDIO_RECORDINGS_DIR, + MOTIONS_DIR, + MOTION_RECORDINGS_DIR, +) +from Project.Sanad.core.event_bus import bus +from Project.Sanad.core.logger import get_logger +from Project.Sanad.core.skill_registry import Skill, SkillRegistry + +log = get_logger("brain") + +# Whitelist of module path prefixes allowed for skill callbacks. +# Prevents arbitrary code execution via dashboard-editable skills.json. +from Project.Sanad.core.config_loader import section as _cfg_section +_BRAIN_CFG = _cfg_section("core", "brain") +ALLOWED_CALLBACK_PREFIXES = tuple(_BRAIN_CFG.get("allowed_callback_prefixes", [ + "Project.Sanad.motion.", + "Project.Sanad.voice.", + "motion.", + "voice.", +])) + + +class Brain: + """Singleton-style manager that bridges all subsystems.""" + + def __init__(self): + self.registry = SkillRegistry() + self._lock = asyncio.Lock() + + # Sub-modules are injected after construction so imports stay lazy. + self._voice = None # gemini.client.GeminiVoiceClient + self._audio_mgr = None # voice.audio_manager.AudioManager + self._arm = None # motion.arm_controller.ArmController + self._macro_rec = None # motion.macro_recorder.MacroRecorder + self._macro_play = None # motion.macro_player.MacroPlayer + self._live_voice = None # voice.live_voice_loop.LiveVoiceLoop + + self.gestural_speaking = False # toggle: move while Gemini speaks + self._running_skill: str | None = None + + # -- dependency injection -- + + def attach_voice(self, client): + self._voice = client + log.info("Voice client attached") + + def attach_audio_manager(self, mgr): + self._audio_mgr = mgr + log.info("Audio manager attached") + + def attach_arm(self, arm): + self._arm = arm + log.info("Arm controller attached") + + def attach_macro_recorder(self, rec): + self._macro_rec = rec + + def attach_macro_player(self, player): + self._macro_play = player + + def attach_live_voice(self, lv): + self._live_voice = lv + log.info("LiveVoiceLoop attached") + + # -- callback resolution -- + + def _resolve_callback(self, callback_str: str) -> Callable | None: + """Resolve 'module.submodule:function_name' → callable. + + SECURITY: only modules under ALLOWED_CALLBACK_PREFIXES may be imported. + Skill JSON is dashboard-editable and otherwise an arbitrary-import RCE. + + Examples: + "Project.Sanad.motion.arm_controller:wave_hand" + "motion.arm_controller:wave_hand" + """ + if not callback_str: + return None + if ":" not in callback_str: + log.error("Invalid callback (missing ':'): %s", callback_str) + return None + module_path, func_name = callback_str.rsplit(":", 1) + if not any(module_path.startswith(prefix) or module_path == prefix.rstrip(".") + for prefix in ALLOWED_CALLBACK_PREFIXES): + log.error( + "Callback %s rejected — module '%s' not in whitelist", + callback_str, module_path, + ) + return None + try: + mod = importlib.import_module(module_path) + return getattr(mod, func_name) + except Exception: + log.exception("Cannot resolve callback '%s'", callback_str) + return None + + # -- skill execution -- + + async def execute_skill(self, skill_id: str) -> dict[str, Any]: + """Run a skill: play audio + execute motion + fire callback.""" + skill = self.registry.get(skill_id) + if skill is None: + raise KeyError(f"Skill not found: {skill_id}") + if not skill.enabled: + raise RuntimeError(f"Skill '{skill_id}' is disabled.") + + async with self._lock: + if self._running_skill: + raise RuntimeError(f"Skill '{self._running_skill}' is already running.") + self._running_skill = skill_id + + t0 = time.monotonic() + result: dict[str, Any] = {"skill_id": skill_id, "ok": True} + + try: + await bus.emit("skill.started", skill_id=skill_id) + + # Validate required attachments before partial execution + if skill.audio_file and self._audio_mgr is None: + raise RuntimeError("AudioManager not attached but skill requires audio") + if skill.motion_file and self._arm is None: + raise RuntimeError("ArmController not attached but skill requires motion") + + if skill.sync_mode == "parallel": + await self._exec_parallel(skill, result) + elif skill.sync_mode == "audio_first": + await self._exec_audio_first(skill, result) + elif skill.sync_mode == "motion_first": + await self._exec_motion_first(skill, result) + else: + await self._exec_parallel(skill, result) + + # Fire callback — run blocking callbacks in a thread to avoid stalling the loop + cb = self._resolve_callback(skill.callback) + if cb is not None: + if asyncio.iscoroutinefunction(cb): + cb_result = await cb() + else: + cb_result = await asyncio.to_thread(cb) + result["callback_result"] = str(cb_result) if cb_result else "ok" + + except Exception as exc: + result["ok"] = False + result["error"] = str(exc) + log.exception("Skill %s failed", skill_id) + finally: + elapsed = time.monotonic() - t0 + result["elapsed_sec"] = round(elapsed, 3) + async with self._lock: + self._running_skill = None + await bus.emit("skill.finished", skill_id=skill_id, result=result) + + return result + + async def cancel_skill(self) -> dict[str, Any]: + """Cancel any running skill — sends cancel to arm controller.""" + cancelled = self._running_skill + if self._arm is not None and hasattr(self._arm, "cancel"): + try: + self._arm.cancel() + except Exception: + log.exception("arm.cancel() failed") + if self._audio_mgr is not None and hasattr(self._audio_mgr, "stop_playback"): + try: + self._audio_mgr.stop_playback() + except Exception: + pass + return {"cancelled": cancelled} + + async def _exec_parallel(self, skill: Skill, result: dict): + tasks = [] + if skill.audio_file: + tasks.append(asyncio.create_task(self._play_audio(skill.audio_file, result))) + if skill.motion_file: + tasks.append(asyncio.create_task(self._play_motion(skill.motion_file, result))) + if tasks: + await asyncio.gather(*tasks) + + async def _exec_audio_first(self, skill: Skill, result: dict): + if skill.audio_file: + await self._play_audio(skill.audio_file, result) + if skill.motion_file: + await self._play_motion(skill.motion_file, result) + + async def _exec_motion_first(self, skill: Skill, result: dict): + if skill.motion_file: + await self._play_motion(skill.motion_file, result) + if skill.audio_file: + await self._play_audio(skill.audio_file, result) + + async def _play_audio(self, audio_file: str, result: dict): + path = Path(audio_file) + if not path.is_absolute(): + path = AUDIO_RECORDINGS_DIR / path + if not path.exists(): + result["audio_error"] = f"File not found: {path}" + log.warning("Audio file missing: %s", path) + return + if self._audio_mgr is not None: + await asyncio.to_thread(self._audio_mgr.play_wav, path) + result["audio_played"] = str(path) + else: + result["audio_error"] = "AudioManager not attached" + + async def _play_motion(self, motion_file: str, result: dict): + path = Path(motion_file) + if not path.is_absolute(): + path = MOTIONS_DIR / path + if not path.exists(): + result["motion_error"] = f"File not found: {path}" + log.warning("Motion file missing: %s", path) + return + if self._arm is not None: + await asyncio.to_thread(self._arm.replay_file, str(path)) + result["motion_played"] = str(path) + else: + result["motion_error"] = "ArmController not attached" + + # -- macro recording -- + + async def start_macro_recording(self, name: str) -> dict[str, Any]: + if self._macro_rec is None: + raise RuntimeError("MacroRecorder not attached.") + return await asyncio.to_thread(self._macro_rec.start, name) + + async def stop_macro_recording(self) -> dict[str, Any]: + if self._macro_rec is None: + raise RuntimeError("MacroRecorder not attached.") + return await asyncio.to_thread(self._macro_rec.stop) + + async def play_macro(self, name: str) -> dict[str, Any]: + if self._macro_play is None: + raise RuntimeError("MacroPlayer not attached.") + return await asyncio.to_thread(self._macro_play.play, name) + + # -- gestural speaking toggle -- + + def set_gestural_speaking(self, enabled: bool): + self.gestural_speaking = enabled + bus.emit_sync("brain.gestural_speaking_changed", enabled=enabled) + log.info("Gestural speaking: %s", "ON" if enabled else "OFF") + + # -- status -- + + def status(self) -> dict[str, Any]: + return { + "voice_attached": self._voice is not None, + "arm_attached": self._arm is not None, + "audio_manager_attached": self._audio_mgr is not None, + "live_voice_attached": self._live_voice is not None, + "gestural_speaking": self.gestural_speaking, + "running_skill": self._running_skill, + "total_skills": len(self.registry.list_skills()), + } diff --git a/vendor/Sanad/core/config_loader.py b/vendor/Sanad/core/config_loader.py new file mode 100644 index 0000000..5cfe859 --- /dev/null +++ b/vendor/Sanad/core/config_loader.py @@ -0,0 +1,124 @@ +"""Single-source config loader for all Sanad subsystems. + +Each subsystem (core, voice, motion, dashboard) has its own JSON file at +`config/_config.json`. This module loads them on demand, caches +the result, and exposes helpers for pulling nested sections. + +Usage: + from Project.Sanad.core.config_loader import load, get + + cfg = load("voice") # full voice config dict + threshold = get("voice", "barge_in.threshold", 500) + rates = get("voice", "sanad_voice", {}) # whole section + +Why JSON (not TOML/YAML): standard library only, editable in any text +editor, commented via "_comment" keys. No third-party dep. +""" + +from __future__ import annotations + +import json +import threading +from pathlib import Path +from typing import Any + +from Project.Sanad.core.logger import get_logger + +log = get_logger("config_loader") + +# Resolved at first-load time (avoids circular import with config.py) +_BASE_DIR: Path | None = None +_CONFIG_DIR: Path | None = None + +_CACHE: dict[str, dict[str, Any]] = {} +_LOCK = threading.Lock() + + +def _resolve_dirs() -> tuple[Path, Path]: + """Find Sanad's root and config/ directory (lazy + cached).""" + global _BASE_DIR, _CONFIG_DIR + if _BASE_DIR is not None and _CONFIG_DIR is not None: + return _BASE_DIR, _CONFIG_DIR + here = Path(__file__).resolve().parent # Sanad/core + base = here.parent # Sanad/ + _BASE_DIR = base + _CONFIG_DIR = base / "config" + return _BASE_DIR, _CONFIG_DIR + + +def _strip_comments(d: Any) -> Any: + """Remove top-level "_comment"/"_description" keys — noise for callers.""" + if isinstance(d, dict): + return { + k: _strip_comments(v) for k, v in d.items() + if not (isinstance(k, str) and k.startswith("_")) + } + if isinstance(d, list): + return [_strip_comments(x) for x in d] + return d + + +def load(subsystem: str) -> dict[str, Any]: + """Load + cache config/_config.json. + + Returns a dict with all leading-underscore keys stripped. Missing + file returns an empty dict (callers supply their own defaults via + `get(..., default)`). + """ + with _LOCK: + if subsystem in _CACHE: + return _CACHE[subsystem] + + _, cfg_dir = _resolve_dirs() + path = cfg_dir / f"{subsystem}_config.json" + if not path.exists(): + log.warning("config file missing: %s — using empty dict", path) + _CACHE[subsystem] = {} + return _CACHE[subsystem] + + try: + raw = json.loads(path.read_text(encoding="utf-8")) + except json.JSONDecodeError as exc: + log.error("config file %s unreadable: %s", path, exc) + _CACHE[subsystem] = {} + return _CACHE[subsystem] + + cleaned = _strip_comments(raw) + _CACHE[subsystem] = cleaned + return cleaned + + +def get(subsystem: str, dotted_key: str, default: Any = None) -> Any: + """Fetch a nested key. Supports dotted-paths: 'barge_in.threshold'.""" + cfg = load(subsystem) + parts = dotted_key.split(".") + cur: Any = cfg + for p in parts: + if not isinstance(cur, dict) or p not in cur: + return default + cur = cur[p] + return cur + + +def section(subsystem: str, name: str) -> dict[str, Any]: + """Convenience — load one top-level section, always returning a dict. + + Example: `section("voice", "sanad_voice")` → dict of that section. + """ + s = get(subsystem, name, {}) + return s if isinstance(s, dict) else {} + + +def reload(subsystem: str | None = None) -> None: + """Drop cached config so next load() re-reads from disk.""" + with _LOCK: + if subsystem is None: + _CACHE.clear() + else: + _CACHE.pop(subsystem, None) + + +def config_dir() -> Path: + """Absolute path to Sanad/config/.""" + _, d = _resolve_dirs() + return d diff --git a/vendor/Sanad/core/event_bus.py b/vendor/Sanad/core/event_bus.py new file mode 100644 index 0000000..6c80ca6 --- /dev/null +++ b/vendor/Sanad/core/event_bus.py @@ -0,0 +1,91 @@ +"""Lightweight in-process event bus for inter-module communication. + +Usage: + from core.event_bus import bus + + # Subscribe + bus.on("voice.user_said", my_handler) # sync or async callable + bus.on("motion.action_done", other_handler) + + # Publish + await bus.emit("voice.user_said", text="hello") +""" + +from __future__ import annotations + +import asyncio +import threading +from collections import defaultdict +from typing import Any, Callable + +from Project.Sanad.core.logger import get_logger + +log = get_logger("event_bus", to_console=False) + + +class EventBus: + def __init__(self): + self._lock = threading.Lock() + self._listeners: dict[str, list[Callable]] = defaultdict(list) + + def on(self, event: str, callback: Callable): + with self._lock: + self._listeners[event].append(callback) + log.debug("Subscribed %s → %s", event, callback.__qualname__) + + def off(self, event: str, callback: Callable): + with self._lock: + try: + self._listeners[event].remove(callback) + except ValueError: + pass + + async def emit(self, event: str, **kwargs: Any): + with self._lock: + handlers = list(self._listeners.get(event, [])) + for handler in handlers: + try: + result = handler(**kwargs) + if asyncio.iscoroutine(result): + await result + except Exception: + log.exception("Handler %s for event '%s' failed", handler.__qualname__, event) + + def emit_sync(self, event: str, **kwargs: Any): + """Fire-and-forget from a sync context. + + Async handlers are scheduled on the running event loop if one exists. + Otherwise they are dropped with a warning (the original silent-no-op + bug — at least now it's logged). + """ + with self._lock: + handlers = list(self._listeners.get(event, [])) + for handler in handlers: + try: + if asyncio.iscoroutinefunction(handler): + try: + loop = asyncio.get_running_loop() + loop.create_task(handler(**kwargs)) + except RuntimeError: + log.warning( + "Async handler %s for '%s' dropped — no running loop", + handler.__qualname__, event, + ) + continue + result = handler(**kwargs) + if asyncio.iscoroutine(result): + # Sync handler returned a coroutine — schedule it + try: + loop = asyncio.get_running_loop() + loop.create_task(result) + except RuntimeError: + result.close() + log.warning( + "Coroutine result from %s for '%s' dropped — no running loop", + handler.__qualname__, event, + ) + except Exception: + log.exception("Handler %s for event '%s' failed", handler.__qualname__, event) + + +bus = EventBus() diff --git a/vendor/Sanad/core/logger.py b/vendor/Sanad/core/logger.py new file mode 100644 index 0000000..3c1be9f --- /dev/null +++ b/vendor/Sanad/core/logger.py @@ -0,0 +1,67 @@ +"""Unified logging with RotatingFileHandler for all Sanad modules.""" + +from __future__ import annotations + +import logging +import sys +from logging.handlers import RotatingFileHandler +from pathlib import Path + +from Project.Sanad.config import LOGS_DIR + +_MAX_BYTES = 10 * 1024 * 1024 # 10 MB +_BACKUP_COUNT = 3 +_FMT = "%(asctime)s [%(name)s] %(levelname)s %(message)s" +_formatter = logging.Formatter(_FMT) + +# Callback for the WebSocket log stream — set by log_stream.py at import time. +_ws_push_fn = None + + +def set_ws_push(fn): + """Register the push function from dashboard.websockets.log_stream.""" + global _ws_push_fn + _ws_push_fn = fn + + +class _WSHandler(logging.Handler): + """Forwards every log record to the WebSocket log stream.""" + + def emit(self, record: logging.LogRecord): + if _ws_push_fn is not None: + try: + _ws_push_fn(self.format(record)) + except Exception: + pass + + +def get_logger(name: str, *, to_console: bool = True) -> logging.Logger: + """Return a module-level logger that writes to logs/.log (rotating).""" + logger = logging.getLogger(f"sanad.{name}") + if logger.handlers: + return logger + + logger.setLevel(logging.DEBUG) + logger.propagate = False + + LOGS_DIR.mkdir(parents=True, exist_ok=True) + fh = RotatingFileHandler( + LOGS_DIR / f"{name}.log", maxBytes=_MAX_BYTES, backupCount=_BACKUP_COUNT + ) + fh.setFormatter(_formatter) + fh.setLevel(logging.DEBUG) + logger.addHandler(fh) + + if to_console: + sh = logging.StreamHandler(sys.stdout) + sh.setFormatter(_formatter) + sh.setLevel(logging.INFO) + logger.addHandler(sh) + + # WebSocket stream handler + wsh = _WSHandler() + wsh.setFormatter(_formatter) + wsh.setLevel(logging.INFO) + logger.addHandler(wsh) + + return logger diff --git a/vendor/Sanad/core/skill_registry.py b/vendor/Sanad/core/skill_registry.py new file mode 100644 index 0000000..5722ddb --- /dev/null +++ b/vendor/Sanad/core/skill_registry.py @@ -0,0 +1,175 @@ +"""Skill Registry — maps audio files to motion commands and callback functions. + +A "skill" is a named unit that ties together: + - An audio clip (e.g. recordings/audio/intro.wav) + - A motion file (e.g. data/motions/wave.jsonl) — optional + - A callback (e.g. "motion.wave_hand") — resolved at runtime + +The registry is persisted in data/skills.json and can be edited via the +dashboard or programmatically through the Brain. + +Skill entry schema: +{ + "id": "intro_greeting", + "audio_file": "recordings/audio/intro.wav", + "motion_file": "data/motions/right_hand_up.jsonl", + "callback": "motion.trigger:wave_hand", + "sync_mode": "parallel", # parallel | audio_first | motion_first + "enabled": true, + "description": "Wave hand while playing intro audio" +} +""" + +from __future__ import annotations + +import json +import os +import tempfile +import threading +import uuid +from dataclasses import dataclass, field, asdict +from pathlib import Path +from typing import Any + +from Project.Sanad.config import SKILLS_FILE +from Project.Sanad.core.logger import get_logger + +log = get_logger("skill_registry") + + +@dataclass +class Skill: + id: str + audio_file: str = "" + motion_file: str = "" + callback: str = "" + sync_mode: str = "parallel" + enabled: bool = True + description: str = "" + meta: dict[str, Any] = field(default_factory=dict) + + def to_dict(self) -> dict[str, Any]: + return asdict(self) + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> Skill: + known = {f.name for f in cls.__dataclass_fields__.values()} + filtered = {k: v for k, v in data.items() if k in known} + return cls(**filtered) + + +class SkillRegistry: + """Thread-safe, JSON-backed registry of skills.""" + + def __init__(self, path: Path = SKILLS_FILE): + self._path = path + self._lock = threading.Lock() + self._skills: dict[str, Skill] = {} + self._load() + + # -- persistence -- + + def _load(self): + if not self._path.exists(): + self._skills = {} + return + try: + with open(self._path, "r", encoding="utf-8") as f: + payload = json.load(f) + for entry in payload.get("skills", []): + skill = Skill.from_dict(entry) + self._skills[skill.id] = skill + log.info("Loaded %d skills from %s", len(self._skills), self._path) + except Exception as exc: + log.warning("Could not load skills: %s", exc) + # Backup corrupt file rather than silently nuking + try: + self._path.rename(self._path.with_suffix(".json.corrupt")) + log.warning("Backed up corrupt skills to %s.corrupt", self._path) + except OSError: + pass + self._skills = {} + + _VALID_SYNC_MODES = {"parallel", "audio_first", "motion_first"} + + def _save(self): + self._path.parent.mkdir(parents=True, exist_ok=True) + payload = { + "version": 1, + "total": len(self._skills), + "skills": [s.to_dict() for s in self._skills.values()], + } + # Atomic write: tempfile + os.replace + fd, tmp = tempfile.mkstemp( + prefix=f".{self._path.name}.", suffix=".tmp", + dir=str(self._path.parent), + ) + try: + with os.fdopen(fd, "w", encoding="utf-8") as f: + json.dump(payload, f, ensure_ascii=False, indent=2) + os.replace(tmp, self._path) + except Exception: + try: + os.unlink(tmp) + except OSError: + pass + raise + + # -- CRUD -- + + def list_skills(self) -> list[dict[str, Any]]: + with self._lock: + return [s.to_dict() for s in self._skills.values()] + + def get(self, skill_id: str) -> Skill | None: + with self._lock: + return self._skills.get(skill_id) + + def add(self, skill: Skill) -> Skill: + if skill.sync_mode not in self._VALID_SYNC_MODES: + raise ValueError( + f"Invalid sync_mode '{skill.sync_mode}' (allowed: {sorted(self._VALID_SYNC_MODES)})" + ) + with self._lock: + if not skill.id: + skill.id = uuid.uuid4().hex[:12] + elif skill.id in self._skills: + raise ValueError(f"Skill id already exists: {skill.id}") + self._skills[skill.id] = skill + self._save() + log.info("Added skill %s (%s)", skill.id, skill.description) + return skill + + def update(self, skill_id: str, updates: dict[str, Any]) -> Skill | None: + with self._lock: + existing = self._skills.get(skill_id) + if existing is None: + return None + if "sync_mode" in updates and updates["sync_mode"] not in self._VALID_SYNC_MODES: + raise ValueError( + f"Invalid sync_mode '{updates['sync_mode']}'" + ) + for key, value in updates.items(): + if hasattr(existing, key) and key != "id": + setattr(existing, key, value) + self._save() + log.info("Updated skill %s", skill_id) + return existing + + def delete(self, skill_id: str) -> dict[str, Any] | None: + with self._lock: + skill = self._skills.pop(skill_id, None) + if skill is None: + return None + self._save() + log.info("Deleted skill %s", skill_id) + return skill.to_dict() + + def find_by_audio(self, audio_file: str) -> list[Skill]: + """Find all skills linked to a given audio file.""" + with self._lock: + return [s for s in self._skills.values() if s.audio_file == audio_file and s.enabled] + + def find_by_callback(self, callback: str) -> list[Skill]: + with self._lock: + return [s for s in self._skills.values() if s.callback == callback and s.enabled] diff --git a/vendor/Sanad/dashboard/__init__.py b/vendor/Sanad/dashboard/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/vendor/Sanad/dashboard/app.py b/vendor/Sanad/dashboard/app.py new file mode 100644 index 0000000..8261aab --- /dev/null +++ b/vendor/Sanad/dashboard/app.py @@ -0,0 +1,134 @@ +"""FastAPI application — Sanad Dashboard. + +Each route module is imported INDIVIDUALLY inside try/except so that one +broken router (missing dep, syntax error in a sibling) cannot break the +entire dashboard. Failed routers are logged and the server starts without +them. +""" + +from __future__ import annotations + +import importlib +import logging + +# Backfill asyncio.to_thread on Python 3.8 — must run before any router import. +from Project.Sanad.core import asyncio_compat # noqa: F401 + +from fastapi import FastAPI +from fastapi.staticfiles import StaticFiles + +from Project.Sanad.config import BASE_DIR +from Project.Sanad.core.logger import get_logger + +log = get_logger("dashboard.app") + +from Project.Sanad.core.config_loader import section as _cfg_section +_APP_CFG = _cfg_section("dashboard", "app") +app = FastAPI( + title=_APP_CFG.get("title", "Sanad Dashboard"), + version=_APP_CFG.get("version", "1.0.0"), +) + + +# -- isolated route registration -- + +_REST_ROUTES: list[tuple[str, str, str]] = [ + # (module_name, prefix, tag) + ("health", "/api", "health"), + ("system", "/api/system", "system"), + ("voice", "/api/voice", "voice"), + ("motion", "/api/motion", "motion"), + ("skills", "/api/skills", "skills"), + ("macros", "/api/macros", "macros"), + ("logs", "/api/logs", "logs"), + ("replay", "/api/replay", "replay"), + ("audio_control", "/api/audio", "audio"), + ("scripts", "/api/scripts", "scripts"), + ("records", "/api/records", "records"), + ("prompt", "/api/prompt", "prompt"), + ("wake_phrases", "/api/wake-phrases", "wake-phrases"), + ("live_voice", "/api/live-voice", "live-voice"), + ("live_subprocess", "/api/live-subprocess", "live-subprocess"), + ("typed_replay", "/api/typed-replay", "typed-replay"), + ("recognition", "/api/recognition", "recognition"), + ("zones", "/api/zones", "zones"), + ("temp_monitor", "/api/temp", "temperature"), + ("controller", "/api/controller", "controller"), +] + +_WS_ROUTES: list[str] = ["log_stream", "motor_temps", "terminal"] + +_loaded_routes: list[str] = [] +_failed_routes: dict[str, str] = {} + + +def _register_router(module_name: str, prefix: str | None = None, tag: str | None = None, + package: str = "Project.Sanad.dashboard.routes"): + """Import + register one router. Failures are logged, never raised.""" + full_name = f"{package}.{module_name}" + try: + mod = importlib.import_module(full_name) + if not hasattr(mod, "router"): + raise AttributeError(f"{full_name} has no 'router' attribute") + kwargs: dict = {} + if prefix is not None: + kwargs["prefix"] = prefix + if tag is not None: + kwargs["tags"] = [tag] + app.include_router(mod.router, **kwargs) + _loaded_routes.append(module_name) + log.info("Registered router: %s", module_name) + except Exception as exc: + _failed_routes[module_name] = str(exc) + log.exception("Failed to register router %s — skipping", module_name) + + +# REST routes +for mod_name, prefix, tag in _REST_ROUTES: + _register_router(mod_name, prefix=prefix, tag=tag) + +# WebSocket routes +for mod_name in _WS_ROUTES: + _register_router( + mod_name, + package="Project.Sanad.dashboard.websockets", + tag="websocket", + ) + + +# -- Static files (dashboard UI) — best effort -- +STATIC_DIR = BASE_DIR / _APP_CFG.get("static_subdir", "dashboard/static") +try: + STATIC_DIR.mkdir(parents=True, exist_ok=True) + app.mount("/static", StaticFiles(directory=str(STATIC_DIR)), name="static") + log.info("Static dir mounted: %s", STATIC_DIR) +except Exception: + log.exception("Could not mount static dir %s — serving without it", STATIC_DIR) + + +@app.get("/") +async def root(): + """Serve the dashboard SPA.""" + index = STATIC_DIR / "index.html" + if index.exists(): + from fastapi.responses import HTMLResponse + try: + return HTMLResponse(index.read_text(encoding="utf-8")) + except OSError as exc: + return {"error": f"Could not read index.html: {exc}"} + return { + "message": "Sanad Dashboard — index.html not found", + "loaded_routes": _loaded_routes, + "failed_routes": _failed_routes, + } + + +@app.get("/api/_dashboard_status") +async def dashboard_load_status(): + """Diagnostic — which routers loaded, which failed.""" + return { + "loaded": _loaded_routes, + "failed": _failed_routes, + "total_loaded": len(_loaded_routes), + "total_failed": len(_failed_routes), + } diff --git a/vendor/Sanad/dashboard/routes/__init__.py b/vendor/Sanad/dashboard/routes/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/vendor/Sanad/dashboard/routes/_safe_io.py b/vendor/Sanad/dashboard/routes/_safe_io.py new file mode 100644 index 0000000..633ea7c --- /dev/null +++ b/vendor/Sanad/dashboard/routes/_safe_io.py @@ -0,0 +1,81 @@ +"""Shared filesystem safety helpers for dashboard routes. + +Provides: + - safe_filename: validate + reject traversal/special chars + - safe_path_under: ensure resolved path stays inside a base dir + - atomic_write_bytes: write-to-temp + os.replace + - atomic_write_text + - atomic_write_json +""" + +from __future__ import annotations + +import json +import os +import tempfile +from pathlib import Path +from typing import Any + +from fastapi import HTTPException + +from Project.Sanad.core.config_loader import section as _cfg_section + +# Maximum upload size in bytes — SINGLE SOURCE in dashboard.api_input +MAX_UPLOAD_BYTES = _cfg_section("dashboard", "api_input").get( + "max_upload_bytes", 8 * 1024 * 1024) + + +def safe_filename(name: str | None) -> str: + """Strip directory components and reject obviously unsafe names.""" + if not name: + raise HTTPException(400, "Filename required.") + cleaned = os.path.basename(name).strip() + if not cleaned or cleaned in {".", ".."}: + raise HTTPException(400, "Invalid filename.") + if any(c in cleaned for c in ("\x00", "\n", "\r")): + raise HTTPException(400, "Invalid characters in filename.") + return cleaned + + +def safe_path_under(base: Path, name: str) -> Path: + """Resolve `base/name` and verify it stays inside `base`.""" + cleaned = safe_filename(name) + base_resolved = base.resolve() + candidate = (base / cleaned).resolve() + try: + candidate.relative_to(base_resolved) + except ValueError: + raise HTTPException(400, "Path traversal denied.") + return candidate + + +def check_upload_size(content: bytes, max_bytes: int = MAX_UPLOAD_BYTES) -> None: + if len(content) > max_bytes: + raise HTTPException( + 413, + f"Upload too large: {len(content)} bytes (max {max_bytes}).", + ) + + +def atomic_write_bytes(path: Path, data: bytes) -> None: + """Write bytes atomically via tempfile + os.replace.""" + path.parent.mkdir(parents=True, exist_ok=True) + fd, tmp = tempfile.mkstemp(prefix=f".{path.name}.", suffix=".tmp", dir=str(path.parent)) + try: + with os.fdopen(fd, "wb") as f: + f.write(data) + os.replace(tmp, path) + except Exception: + try: + os.unlink(tmp) + except OSError: + pass + raise + + +def atomic_write_text(path: Path, text: str, encoding: str = "utf-8") -> None: + atomic_write_bytes(path, text.encode(encoding)) + + +def atomic_write_json(path: Path, payload: Any, indent: int = 2) -> None: + atomic_write_text(path, json.dumps(payload, ensure_ascii=False, indent=indent)) diff --git a/vendor/Sanad/dashboard/routes/audio_control.py b/vendor/Sanad/dashboard/routes/audio_control.py new file mode 100644 index 0000000..6ed28f1 --- /dev/null +++ b/vendor/Sanad/dashboard/routes/audio_control.py @@ -0,0 +1,922 @@ +"""Audio control endpoints — mic mute, speaker mute, device profile selection.""" + +from __future__ import annotations + +import asyncio +import os +import subprocess +import threading + +from fastapi import APIRouter, HTTPException +from pydantic import BaseModel + +from Project.Sanad.core.logger import get_logger +from Project.Sanad.voice import audio_devices as ad + +log = get_logger("audio_route") + +router = APIRouter() + +# ─────────────────────── G1 built-in speaker (DDS) ─────────────────────── +# +# pactl set-sink-mute has NO effect on the G1 built-in speaker because +# sanad_voice.py streams PCM to it via the Unitree DDS AudioClient API, +# bypassing PulseAudio entirely. To actually silence the built-in speaker +# mid-playback we must call AudioClient.SetVolume(0) over DDS. +# +# This module keeps a lazily-initialized AudioClient + a cached volume so +# the dashboard can mute/unmute without waiting on DDS init for every click. + +_g1_audio_client = None +_g1_audio_lock = threading.Lock() +_g1_current_volume: int = 100 # what's actually on the hardware right now +_g1_user_volume: int = 100 # the user's preferred "unmuted" level +_g1_init_error: str = "" + + +def _load_persisted_g1_volume() -> int: + """Read the saved G1 volume from data/motions/config.json. + + Keys are `audio.g1_volume` (persistent target level 0-100). Returns + 100 if no value is stored — matches the default the Unitree SDK sets + on the voice service. + """ + try: + from Project.Sanad.config import load_config + cfg = load_config() or {} + audio = cfg.get("audio") or {} + vol = int(audio.get("g1_volume", 100)) + return max(0, min(100, vol)) + except Exception: + return 100 + + +def _save_persisted_g1_volume(level: int) -> None: + """Persist the user's volume choice to config.json so it survives restart.""" + try: + from Project.Sanad.config import load_config, save_config + cfg = load_config() or {} + audio = cfg.get("audio") if isinstance(cfg.get("audio"), dict) else {} + audio["g1_volume"] = max(0, min(100, int(level))) + cfg["audio"] = audio + save_config(cfg) + except Exception as exc: + log.warning("could not persist g1_volume: %s", exc) + + +# Initialize user volume from the persisted value so the dashboard shows +# the correct level on first load even if no one has touched it yet. +_g1_user_volume = _load_persisted_g1_volume() +_g1_current_volume = _g1_user_volume + + +def _get_g1_audio_client(): + """Lazy-init AudioClient. Safe to call from multiple routes.""" + global _g1_audio_client, _g1_init_error + if _g1_audio_client is not None: + return _g1_audio_client + try: + from unitree_sdk2py.core.channel import ChannelFactoryInitialize + from unitree_sdk2py.g1.audio.g1_audio_client import AudioClient + except ImportError as exc: + _g1_init_error = f"unitree_sdk2py not installed: {exc}" + return None + iface = os.environ.get("SANAD_DDS_INTERFACE", "eth0") + # ChannelFactoryInitialize can only be called once per process. The + # arm controller normally calls it first at startup — the second call + # either no-ops or raises, so wrap it defensively. + try: + ChannelFactoryInitialize(0, iface) + except Exception as exc: + log.debug("ChannelFactoryInitialize already called or failed: %s", exc) + try: + client = AudioClient() + client.SetTimeout(5.0) + client.Init() + _g1_audio_client = client + log.info("G1 AudioClient initialized for dashboard mute control (iface=%s)", iface) + return client + except Exception as exc: + _g1_init_error = f"AudioClient init failed: {exc}" + log.warning("G1 AudioClient init failed: %s", exc) + return None + + +def _pactl(args: list[str]) -> subprocess.CompletedProcess[str]: + return subprocess.run(["pactl", *args], check=True, text=True, capture_output=True) + + +def _get_muted(kind: str, name: str) -> bool: + if not name: + return False + try: + cmd = "get-source-mute" if kind == "source" else "get-sink-mute" + r = _pactl([cmd, name]) + return (r.stdout or "").strip().lower().endswith("yes") + except (FileNotFoundError, subprocess.CalledProcessError): + return False + + +def _set_muted(kind: str, name: str, muted: bool) -> bool: + if not name: + return False + cmd = "set-source-mute" if kind == "source" else "set-sink-mute" + _pactl([cmd, name, "1" if muted else "0"]) + return _get_muted(kind, name) + + +def _current_sink_source() -> tuple[str, str]: + cur = ad.current_selection() + return cur.get("sink", ""), cur.get("source", "") + + +# ─────────────────────── status / mute ─────────────────────── + + +@router.get("/status") +async def audio_status(): + """Return current device + mute state + G1 speaker volume. + + `speaker_muted` is the EFFECTIVE mute state — True if either the + PulseAudio sink is muted OR the G1 built-in speaker volume is 0. + `pulse_sink_muted` and `g1_speaker_muted` are the per-path states. + `g1_current_volume` = what's live on the hardware. + `g1_user_volume` = the user's preferred unmuted level (what we + restore to when they un-mute). + """ + def _do(): + sink, source = _current_sink_source() + cur = ad.current_selection() + pulse_muted = _get_muted("sink", sink) + # Read cached state — avoid DDS GetVolume round-trips on every poll + global _g1_current_volume, _g1_user_volume + g1_muted = _g1_current_volume == 0 + return { + "mic_muted": _get_muted("source", source), + # Effective (OR of both paths) — the badge the user sees + "speaker_muted": pulse_muted or g1_muted, + # Per-path breakdown so the UI can distinguish + "pulse_sink_muted": pulse_muted, + "g1_speaker_muted": g1_muted, + "g1_current_volume": _g1_current_volume, + "g1_user_volume": _g1_user_volume, + "g1_available": _g1_audio_client is not None or (_g1_init_error == ""), + "sink": sink, + "source": source, + "current": cur, + "pactl_available": ad.pactl_available(), + } + return await asyncio.to_thread(_do) + + +@router.post("/mic/mute") +async def toggle_mic(muted: bool | None = None): + def _do(): + _, source = _current_sink_source() + if not source: + raise HTTPException(503, "No source device selected") + target = muted if muted is not None else not _get_muted("source", source) + try: + actual = _set_muted("source", source, target) + except (FileNotFoundError, subprocess.CalledProcessError) as exc: + raise HTTPException(500, f"pactl failed: {exc}") + return {"mic_muted": actual, "source": source} + return await asyncio.to_thread(_do) + + +@router.post("/speaker/mute") +async def toggle_speaker(muted: bool | None = None): + """Mute/unmute the SPEAKER — both the PulseAudio sink AND the G1 + built-in speaker, so the effect is audible regardless of which + playback path is currently active (Anker PowerConf via PyAudio vs + G1 built-in via Unitree DDS AudioClient). + + Each of the two paths is attempted independently; the response + reports which one(s) succeeded. If either path is muted, the button + shows as "Muted". + """ + def _do(): + global _g1_current_volume, _g1_user_volume + sink, _ = _current_sink_source() + + # Decide target state — if muted is None, toggle based on + # whichever path is currently "not muted" + if muted is None: + pulse_cur = _get_muted("sink", sink) if sink else False + g1_cur = _g1_current_volume == 0 + # Toggle: if anything is live, mute everything; else unmute all + target = not (pulse_cur or g1_cur) + else: + target = bool(muted) + + result = {"speaker_muted": target, "pulse": None, "g1": None} + + # ── Path 1: PulseAudio sink (Anker PowerConf, USB, etc.) ── + if sink: + try: + actual_pulse = _set_muted("sink", sink, target) + result["pulse"] = {"ok": True, "muted": actual_pulse, "sink": sink} + except (FileNotFoundError, subprocess.CalledProcessError) as exc: + result["pulse"] = {"ok": False, "error": f"pactl failed: {exc}"} + else: + result["pulse"] = {"ok": False, "error": "no sink selected"} + + # ── Path 2: G1 built-in speaker via DDS AudioClient ── + # Mute = SetVolume(0). Unmute = SetVolume(_g1_user_volume) so the + # user's chosen level is restored (instead of always jumping back + # to 100). + client = _get_g1_audio_client() + if client is None: + result["g1"] = {"ok": False, "error": _g1_init_error or "AudioClient unavailable"} + else: + volume = 0 if target else _g1_user_volume + try: + with _g1_audio_lock: + code = client.SetVolume(volume) + _g1_current_volume = volume + result["g1"] = { + "ok": True, "muted": volume == 0, + "volume": volume, "code": code, + } + log.info("G1 speaker volume set to %d (rc=%s)", volume, code) + except Exception as exc: + result["g1"] = {"ok": False, "error": f"SetVolume failed: {exc}"} + + # Final effective state — either path counts as muted + pulse_muted = result["pulse"].get("muted", False) if result["pulse"] else False + g1_muted = result["g1"].get("muted", False) if result["g1"] else False + result["speaker_muted"] = bool(pulse_muted or g1_muted) if target else False + result["sink"] = sink + result["g1_current_volume"] = _g1_current_volume + result["g1_user_volume"] = _g1_user_volume + return result + return await asyncio.to_thread(_do) + + +@router.post("/g1-speaker/mute") +async def toggle_g1_speaker_only(muted: bool | None = None): + """Mute/unmute ONLY the G1 built-in speaker via DDS AudioClient. + + Useful for testing the DDS path in isolation — the normal + /speaker/mute endpoint hits both PulseAudio and G1 at once. + On unmute, restores the user's last chosen volume (not always 100). + """ + def _do(): + global _g1_current_volume + client = _get_g1_audio_client() + if client is None: + raise HTTPException( + 503, + f"G1 AudioClient unavailable: {_g1_init_error or 'unknown'}", + ) + if muted is None: + target = _g1_current_volume > 0 # toggle + else: + target = bool(muted) + volume = 0 if target else _g1_user_volume + try: + with _g1_audio_lock: + code = client.SetVolume(volume) + _g1_current_volume = volume + except Exception as exc: + raise HTTPException(500, f"SetVolume failed: {exc}") + log.info("G1 speaker volume set to %d (rc=%s)", volume, code) + return { + "g1_muted": volume == 0, + "volume": volume, + "user_volume": _g1_user_volume, + "return_code": code, + } + return await asyncio.to_thread(_do) + + +# ─────────────────────── G1 speaker volume (0-100) ─────────────────────── + + +class G1VolumePayload(BaseModel): + level: int # 0..100 + + +@router.get("/g1-speaker/volume") +async def get_g1_volume(): + """Return the current G1 speaker volume state. + + Response: + { + "available": true, # AudioClient available? + "current_volume": 75, # what's on hardware right now + "user_volume": 75, # user's preferred unmuted level + "muted": false, # current_volume == 0 + "persisted": 75, # value from config.json + } + """ + def _do(): + return { + "available": _g1_audio_client is not None or (_g1_init_error == ""), + "current_volume": _g1_current_volume, + "user_volume": _g1_user_volume, + "muted": _g1_current_volume == 0, + "persisted": _load_persisted_g1_volume(), + "init_error": _g1_init_error, + } + return await asyncio.to_thread(_do) + + +@router.post("/g1-speaker/volume") +async def set_g1_volume(payload: G1VolumePayload): + """Set the G1 built-in speaker volume via DDS AudioClient. + + Body: `{"level": 0..100}` + + Effects: + - Immediately applies to hardware via AudioClient.SetVolume(level). + - Persists to data/motions/config.json under `audio.g1_volume` so + it survives restarts. + - If level > 0, updates _g1_user_volume (the "unmuted" restore + target). level == 0 is a soft mute that preserves user_volume. + - Takes effect on the live playback immediately — you can slide + the volume down mid-speech and hear it get quieter. + """ + def _do(): + global _g1_current_volume, _g1_user_volume + level = int(payload.level) + if not 0 <= level <= 100: + raise HTTPException(400, "level must be 0..100") + + client = _get_g1_audio_client() + if client is None: + raise HTTPException( + 503, + f"G1 AudioClient unavailable: {_g1_init_error or 'unknown'}", + ) + try: + with _g1_audio_lock: + code = client.SetVolume(level) + _g1_current_volume = level + if level > 0: + # Only update the "preferred unmuted" level when the + # user is setting a non-zero volume. Setting 0 is a + # mute, which shouldn't overwrite their preference. + _g1_user_volume = level + except Exception as exc: + raise HTTPException(500, f"SetVolume failed: {exc}") + + # Persist the user's preferred level (not the current) so a + # subsequent mute-then-restart restores to the preferred level + _save_persisted_g1_volume(_g1_user_volume) + log.info("G1 volume → %d (user_pref=%d, rc=%s)", + level, _g1_user_volume, code) + return { + "ok": True, + "current_volume": level, + "user_volume": _g1_user_volume, + "muted": level == 0, + "return_code": code, + "persisted": True, + } + return await asyncio.to_thread(_do) + + +# ─────────────────────── device profiles ─────────────────────── + + +@router.get("/devices") +async def list_devices(): + """Full device + profile listing for the dashboard picker.""" + return await asyncio.to_thread(ad.status) + + +@router.get("/profiles") +async def list_profiles(): + """Just the named profiles + which are currently plugged in.""" + def _do(): + from dataclasses import asdict + detected = ad.detect_plugged_profiles() if ad.pactl_available() else [] + detected_ids = {d["profile"]["id"] for d in detected} + return { + "profiles": [ + { + **asdict(p), + "available": p.id in detected_ids, + } + for p in ad.PROFILES + ], + "detected_ids": list(detected_ids), + } + return await asyncio.to_thread(_do) + + +class ProfileSelect(BaseModel): + profile_id: str + + +@router.post("/select-profile") +async def select_profile(payload: ProfileSelect): + def _do(): + result = ad.select_profile(payload.profile_id) + if not result.get("ok"): + raise HTTPException(409, result.get("error") or "Could not select profile") + # Best-effort: tell the audio_manager to refresh its cached state + try: + from Project.Sanad.main import audio_mgr + if audio_mgr is not None and hasattr(audio_mgr, "refresh_devices"): + audio_mgr.refresh_devices() + except Exception: + pass + return result + return await asyncio.to_thread(_do) + + +class ManualSelect(BaseModel): + sink: str + source: str + + +@router.post("/select-manual") +async def select_manual(payload: ManualSelect): + def _do(): + if not payload.sink and not payload.source: + raise HTTPException(400, "At least one of sink/source required") + result = ad.select_manual(payload.sink, payload.source) + if not result.get("ok"): + raise HTTPException(500, str(result.get("errors") or "Selection failed")) + try: + from Project.Sanad.main import audio_mgr + if audio_mgr is not None and hasattr(audio_mgr, "refresh_devices"): + audio_mgr.refresh_devices() + except Exception: + pass + return result + return await asyncio.to_thread(_do) + + +@router.post("/refresh") +async def refresh_devices(): + """Re-scan plugged devices and re-resolve current selection.""" + return await asyncio.to_thread(ad.status) + + +@router.post("/apply") +async def apply_audio(): + """Re-scan all USB ports, resolve the best profile, and set pactl defaults. + + Use this after plugging/unplugging devices or switching USB ports. + """ + def _do(): + result = ad.apply_current_selection() + # Also refresh AudioManager so it picks up the new sink/source + try: + from Project.Sanad.main import audio_mgr + if audio_mgr is not None: + audio_mgr.refresh_devices() + except Exception: + pass + return result + return await asyncio.to_thread(_do) + + +# ─────────────────────── Reset endpoints (Pulse + USB) ─────────────────────── +# +# Two distinct recovery paths for the dashboard's audio panel: +# +# POST /api/audio/reset — SOFT: restart pulseaudio / pipewire-pulse. +# Fixes Pulse-side state (stuck profile, lost default sink, crashed +# module). Cannot recover a kernel-side missing USB capture descriptor +# — snd-usb-audio parses those at probe time and Pulse can't influence +# that. Use for "devices look weird" failures. +# +# POST /api/audio/usb-reset — HARD: unbind+rebind snd-usb-audio scoped +# to the Anker VID:PID. Forces snd-usb-audio to re-parse UAC1 +# descriptors → input profile reappears even after the firmware/USB +# handshake dropped it. Use for "Anker mic missing from pactl" — the +# symptom soft-reset cannot fix. +# +# Both gate with module-level locks (no concurrent reset), refuse while Live +# Gemini is running or a record is mid-playback, and return structured +# before/after diagnostics so the dashboard can show meaningful toasts. + +_RESET_LOCK = threading.Lock() +_USB_RESET_LOCK = threading.Lock() + +# Anker PowerConf A3321 — used both for VID:PID matching in sysfs and for +# logging. Change here if you add support for a different USB conference +# device (Hollyland etc). +_USB_RESET_TARGETS = ( + {"vid": "291a", "pid": "3301", "label": "Anker PowerConf"}, +) + + +def _refuse_if_busy() -> None: + """Raise HTTPException(409) if Live Gemini is active or a record is playing. + + Used by both reset endpoints — a userspace audio restart mid-stream + leaves the active session in a broken state (PortAudio handle pointing + at a dead Pulse, in-flight write() raises, etc.). Cheaper to refuse + than to recover. + """ + try: + from Project.Sanad.main import live_sub + except Exception: + live_sub = None + if live_sub is not None: + try: + st = live_sub.status() or {} + except Exception: + st = {} + state = (st.get("state") or "").lower() + if st.get("running") or state not in ("", "stopped", "error"): + raise HTTPException( + 409, f"Stop Live Gemini before resetting audio (state={state or '?'}).", + ) + + try: + from Project.Sanad.main import audio_mgr + except Exception: + audio_mgr = None + if audio_mgr is not None and hasattr(audio_mgr, "playback_status"): + try: + ps = audio_mgr.playback_status() or {} + if ps.get("playing"): + raise HTTPException( + 409, "Stop the active playback before resetting audio.", + ) + except HTTPException: + raise + except Exception: + pass + + +def _detect_pa_flavour() -> str: + """Return 'pipewire' if pipewire-pulse is the active daemon, else 'pulse'.""" + try: + r = subprocess.run( + ["pgrep", "-x", "pipewire-pulse"], + check=False, capture_output=True, text=True, timeout=1.0, + ) + if r.returncode == 0 and (r.stdout or "").strip(): + return "pipewire" + except (FileNotFoundError, subprocess.SubprocessError): + pass + return "pulse" + + +def _kill_audio_daemon(flavour: str) -> dict: + """Issue the restart command for the detected daemon. Non-zero exit is a + soft warning (some installs return 1 when there's no daemon to kill).""" + if flavour == "pipewire": + cmd = ["systemctl", "--user", "restart", "pipewire-pulse.service"] + else: + cmd = ["pulseaudio", "-k"] + try: + r = subprocess.run(cmd, check=False, capture_output=True, + text=True, timeout=5.0) + info = {"cmd": " ".join(cmd), "returncode": r.returncode, + "stderr": (r.stderr or "").strip()[:300]} + if r.returncode != 0: + log.warning("audio reset: %s exited %d (%s)", + cmd[0], r.returncode, info["stderr"]) + return info + except FileNotFoundError as exc: + return {"cmd": " ".join(cmd), "returncode": -1, + "stderr": f"binary missing: {exc}"} + except subprocess.TimeoutExpired: + return {"cmd": " ".join(cmd), "returncode": -1, + "stderr": "timeout (>5s)"} + + +def _wait_for_pactl(deadline_s: float = 5.0, interval_s: float = 0.2) -> bool: + """Poll `pactl info` until it returns 0 or the deadline expires.""" + import time as _time + end = _time.monotonic() + deadline_s + while _time.monotonic() < end: + if ad.pactl_available(): + return True + _time.sleep(interval_s) + return False + + +@router.post("/reset") +async def reset_audio_subsystem(): + """SOFT reset — restart pulseaudio/pipewire-pulse and re-resolve devices. + + Use when devices look stuck, pactl is unavailable, or the wrong sink + is being selected. **Does NOT recover a kernel-side missing USB capture + descriptor** — for that symptom use /api/audio/usb-reset. + """ + if os.geteuid() == 0: + raise HTTPException( + 403, "Refusing to reset audio as root — Sanad must run as the " + "unitree user so the per-user PulseAudio session is reachable.", + ) + if not _RESET_LOCK.acquire(blocking=False): + raise HTTPException(429, "Reset already in progress.") + try: + _refuse_if_busy() + log.info( + "audio reset requested (uid=%s PULSE_RUNTIME_PATH=%s XDG_RUNTIME_DIR=%s)", + os.geteuid(), + os.environ.get("PULSE_RUNTIME_PATH") or "-", + os.environ.get("XDG_RUNTIME_DIR") or "-", + ) + try: + from Project.Sanad.main import audio_mgr + except Exception: + audio_mgr = None + + def _do() -> dict: + before = {"pactl_available": ad.pactl_available(), + "selection": ad.current_selection()} + + # Quiesce AudioManager so the next play_wav rebinds cleanly. + pya_closed = False + if audio_mgr is not None: + play_lock = getattr(audio_mgr, "play_lock", None) + acquired = False + if play_lock is not None: + acquired = play_lock.acquire(timeout=2.0) + try: + try: + audio_mgr.close() + pya_closed = True + except Exception as exc: + log.warning("audio reset: audio_mgr.close failed: %s", exc) + finally: + if acquired and play_lock is not None: + play_lock.release() + + flavour = _detect_pa_flavour() + kill_info = _kill_audio_daemon(flavour) + came_back = _wait_for_pactl(deadline_s=5.0) + if not came_back and flavour == "pulse": + # autospawn may be disabled — try an explicit start. + try: + subprocess.run(["pulseaudio", "--start"], check=False, + capture_output=True, text=True, timeout=3.0) + except (FileNotFoundError, subprocess.SubprocessError) as exc: + log.warning("audio reset: pulseaudio --start failed: %s", exc) + came_back = _wait_for_pactl(deadline_s=2.0) + if not came_back: + raise HTTPException(500, { + "error": "audio daemon did not return within ~7s", + "flavour": flavour, "kill": kill_info, + }) + + apply_result: dict = {} + try: + apply_result = ad.apply_current_selection() or {} + except Exception as exc: + log.warning("audio reset: apply_current_selection failed: %s", exc) + apply_result = {"error": str(exc)} + + if audio_mgr is not None: + try: + import pyaudio + audio_mgr.pya = pyaudio.PyAudio() + audio_mgr.refresh_devices() + except Exception as exc: + log.error("audio reset: PyAudio re-init failed: %s", exc) + raise HTTPException( + 500, f"PortAudio re-init failed after daemon restart: {exc}") + + after_sel = ad.current_selection() or {} + detected = ad.detect_plugged_profiles() or [] + after = { + "pactl_available": ad.pactl_available(), + "selection": after_sel, + "detected_profiles": [p.get("profile", {}).get("id") for p in detected], + } + return { + "ok": True, "best_effort": True, "flavour": flavour, + "kill": kill_info, "pya_reinitialized": pya_closed, + "apply_result": apply_result, + "input_recovered": bool(after_sel.get("source")), + "output_recovered": bool(after_sel.get("sink")), + "before": before, "after": after, + "hint": ("Soft reset only fixes Pulse-side state. If " + "input_recovered is False, try POST /api/audio/usb-reset " + "or physically replug the dongle."), + } + return await asyncio.to_thread(_do) + finally: + _RESET_LOCK.release() + + +def _find_usb_devices_by_vid_pid(vid: str, pid: str) -> list[str]: + """Return sysfs bus-id strings (e.g. '1-3') for every USB device whose + idVendor/idProduct match. Empty list when nothing matches. + + We read /sys/bus/usb/devices/* — every USB *device* (not interface) has + idVendor/idProduct files. Interfaces (paths with a colon, e.g. '1-3:1.1') + do not, so they're naturally skipped. + """ + import glob + hits: list[str] = [] + for path in glob.glob("/sys/bus/usb/devices/*"): + name = os.path.basename(path) + if ":" in name: + continue + try: + with open(os.path.join(path, "idVendor")) as f: + v = f.read().strip().lower() + with open(os.path.join(path, "idProduct")) as f: + p = f.read().strip().lower() + except OSError: + continue + if v == vid.lower() and p == pid.lower(): + hits.append(name) + return hits + + +def _snd_usb_interfaces_for_device(bus_id: str) -> list[str]: + """For USB device `bus_id` (e.g. '1-3'), return all interface names that + are currently bound to the snd-usb-audio driver (e.g. ['1-3:1.0']). + + Used so we unbind ONLY the audio interfaces and don't touch HID / HUB + interfaces on the same composite device. + """ + import glob + bound: list[str] = [] + base = f"/sys/bus/usb/devices/{bus_id}" + for iface in glob.glob(f"{base}/{bus_id}:*"): + driver_link = os.path.join(iface, "driver") + if not os.path.islink(driver_link): + continue + try: + driver = os.path.basename(os.readlink(driver_link)) + except OSError: + continue + if driver == "snd-usb-audio": + bound.append(os.path.basename(iface)) + return bound + + +def _write_sysfs(path: str, value: str) -> tuple[bool, str]: + """Write `value` to a sysfs file. Returns (success, error_message). + + Writes to /sys/bus/usb/drivers/snd-usb-audio/{bind,unbind} usually + require root. If permission denied, the caller should fall back to + invoking shell_scripts/reset_anker_usb.sh via sudo (one-time sudoers + setup documented in that script's header). + """ + try: + with open(path, "w") as f: + f.write(value) + return True, "" + except PermissionError as exc: + return False, f"permission denied: {path} ({exc})" + except OSError as exc: + return False, f"write failed: {path} ({exc})" + + +@router.post("/usb-reset") +async def usb_reset_anker(): + """HARD reset — unbind+rebind snd-usb-audio for the Anker (VID:PID + 291a:3301). Forces the kernel to re-parse the USB Audio Class + descriptors, which is the only way to recover a missing capture profile + on this Jetson without a physical replug. + + Tries two paths: + 1. Direct sysfs write (no sudo) — works if a udev rule has set + `audio` group ownership / world-write on the snd-usb-audio bind + files, or if Sanad runs as root (it shouldn't). + 2. Fallback to `sudo shell_scripts/reset_anker_usb.sh` — works after + a one-time sudoers entry; see that script's header for setup. + + Refuses while Live Gemini or a record playback is in flight (same + guard as the soft reset). + """ + if not _USB_RESET_LOCK.acquire(blocking=False): + raise HTTPException(429, "USB reset already in progress.") + try: + _refuse_if_busy() + + # Find candidate Anker USB devices currently enumerated. + candidates: list[dict] = [] + for tgt in _USB_RESET_TARGETS: + for bus_id in _find_usb_devices_by_vid_pid(tgt["vid"], tgt["pid"]): + candidates.append({"bus_id": bus_id, **tgt}) + if not candidates: + wanted = ", ".join( + "{}:{}".format(t["vid"], t["pid"]) for t in _USB_RESET_TARGETS + ) + raise HTTPException( + 404, + f"No matching USB device found (looked for {wanted}). " + "Plug the Anker dongle and try again.", + ) + + log.info("usb reset: candidates=%s", candidates) + + def _do() -> dict: + before_detected = [ + p.get("profile", {}).get("id") + for p in (ad.detect_plugged_profiles() or []) + ] + results: list[dict] = [] + for cand in candidates: + bus = cand["bus_id"] + ifaces = _snd_usb_interfaces_for_device(bus) + attempt = {"bus_id": bus, "label": cand["label"], + "snd_interfaces": ifaces, "method": None, + "ok": False, "error": ""} + if not ifaces: + attempt["error"] = ("no snd-usb-audio interfaces bound " + "to this device — already unbound or " + "kernel didn't claim it") + results.append(attempt) + continue + + # ─── Path 1: direct sysfs write ─── + unbind_path = "/sys/bus/usb/drivers/snd-usb-audio/unbind" + bind_path = "/sys/bus/usb/drivers/snd-usb-audio/bind" + direct_ok = True + direct_err = "" + for iface in ifaces: + ok, err = _write_sysfs(unbind_path, iface) + if not ok: + direct_ok = False + direct_err = err + break + if direct_ok: + import time as _time + _time.sleep(0.5) + for iface in ifaces: + ok, err = _write_sysfs(bind_path, iface) + if not ok: + direct_ok = False + direct_err = err + break + if direct_ok: + attempt.update({"method": "direct-sysfs", "ok": True}) + results.append(attempt) + continue + + # ─── Path 2: sudo helper script ─── + from pathlib import Path as _Path + helper = (_Path(__file__).resolve().parent.parent.parent + / "shell_scripts" / "reset_anker_usb.sh") + if not helper.exists(): + attempt.update({"method": "direct-sysfs", + "error": f"{direct_err}; helper not present " + f"at {helper}"}) + results.append(attempt) + continue + try: + r = subprocess.run( + ["sudo", "-n", str(helper), bus], + check=False, capture_output=True, text=True, timeout=10.0, + ) + attempt["method"] = "sudo-helper" + if r.returncode == 0: + attempt["ok"] = True + else: + attempt["error"] = ( + f"sudo helper exited {r.returncode}: " + f"{(r.stderr or r.stdout or '').strip()[:300]}" + ) + except subprocess.TimeoutExpired: + attempt["error"] = "sudo helper timed out (>10s)" + except FileNotFoundError as exc: + attempt["error"] = f"sudo not available: {exc}" + results.append(attempt) + + # Settle, then re-detect + import time as _time + _time.sleep(1.0) + try: + ad.apply_current_selection() + except Exception: + pass + try: + from Project.Sanad.main import audio_mgr + if audio_mgr is not None and hasattr(audio_mgr, "refresh_devices"): + audio_mgr.refresh_devices() + except Exception: + pass + + after_detected = [ + p.get("profile", {}).get("id") + for p in (ad.detect_plugged_profiles() or []) + ] + any_ok = any(r.get("ok") for r in results) + mic_now = any( + "anker" in (p.get("profile", {}).get("id") or "").lower() + for p in (ad.detect_plugged_profiles() or []) + ) + + return { + "ok": any_ok, + "candidates": results, + "before_detected_profiles": before_detected, + "after_detected_profiles": after_detected, + "input_recovered": mic_now, + "hint": ( + "If ok is False, the unbind/rebind path needs sudo. " + "Run `bash shell_scripts/reset_anker_usb.sh --setup-sudoers` " + "once on the robot to install the sudoers entry, then retry." + ) if not any_ok else None, + } + + return await asyncio.to_thread(_do) + finally: + _USB_RESET_LOCK.release() diff --git a/vendor/Sanad/dashboard/routes/controller.py b/vendor/Sanad/dashboard/routes/controller.py new file mode 100644 index 0000000..f0b998d --- /dev/null +++ b/vendor/Sanad/dashboard/routes/controller.py @@ -0,0 +1,295 @@ +"""Controller tab — manual dashboard locomotion control (N2 Phase 1/2). + +Routes live under /api/controller. All WRITE actions (move / step / postures / +modes / MotionSwitcher) require the in-memory "Enable movement" arm flag and +return 409 when disarmed. Reads (/status, /joints, /msc, /status/summary), +E-STOP and the arm toggle are ALWAYS available. + +`/status/summary` is the aggregate the dashboard polls for the global subsystem +status strip (Camera / Face / Place / Movement). It is kept under /api/controller +(final path /api/controller/status/summary) so no second router is needed; note +/api/status (no /summary) is already used by the SPA, so the suffix matters. +""" + +from __future__ import annotations + +import asyncio + +from fastapi import APIRouter, HTTPException, Query +from pydantic import BaseModel + +from Project.Sanad.config import BASE_DIR +from Project.Sanad.core.logger import get_logger +from Project.Sanad.vision import recognition_state + +log = get_logger("controller_routes") + +router = APIRouter() + +STATE_PATH = BASE_DIR / "data" / ".recognition_state.json" + + +# ── lazy subsystem accessors ──────────────────────────────── + +def _get_loco(): + try: + from Project.Sanad.main import loco_controller # type: ignore + return loco_controller + except Exception: + return None + + +def _get_camera(): + try: + from Project.Sanad.main import camera # type: ignore + return camera + except Exception: + return None + + +def _get_live_sub(): + try: + from Project.Sanad.main import live_sub # type: ignore + return live_sub + except Exception: + return None + + +def _get_dispatch(): + try: + from Project.Sanad.main import movement_dispatch # type: ignore + return movement_dispatch + except Exception: + return None + + +def _require_loco(): + lc = _get_loco() + if lc is None: + raise HTTPException(503, "Locomotion controller subsystem unavailable.") + return lc + + +def _require_armed(lc): + if not lc.is_armed(): + raise HTTPException(409, "Movement is disarmed. Enable movement first.") + + +# ── reads ─────────────────────────────────────────────────── + +@router.get("/status") +async def get_status(): + lc = _require_loco() + return await asyncio.to_thread(lc.status) + + +@router.get("/joints") +async def get_joints(): + lc = _require_loco() + return await asyncio.to_thread(lc.joints) + + +@router.get("/msc") +async def get_msc(): + lc = _require_loco() + return await asyncio.to_thread(lc.msc_check) + + +# ── arm flag / E-STOP (always available) ──────────────────── + +@router.post("/arm") +async def set_arm(on: bool = Query(...)): + lc = _require_loco() + res = await asyncio.to_thread(lc.arm_movement if on else lc.disarm_movement) + return res + + +@router.post("/gemini-movement") +async def set_gemini_movement(on: bool = Query(...)): + """Enable / disable Gemini voice-driven locomotion (N2 Phase 3 gate). + + Writes recognition_state.movement_enabled — SEPARATE from the manual arm + flag. The Gemini child announces the toggle (spoken), and the parent + MovementDispatcher starts/stops acting on confirmation phrases. Default OFF. + """ + st = await asyncio.to_thread(recognition_state.mutate, STATE_PATH, + movement_enabled=bool(on)) + # Enabling Gemini movement also clears any E-STOP latch on the dispatcher. + if on: + md = _get_dispatch() + if md is not None: + try: + md.clear_estop() + except Exception: + log.exception("clear_estop failed") + log.info("gemini-movement %s", "ON" if on else "OFF") + return {"ok": True, "movement_enabled": st.movement_enabled} + + +@router.post("/estop") +async def estop(): + lc = _require_loco() + res = await asyncio.to_thread(lc.estop) + # Full stop: drop the manual arm flag AND latch the voice dispatcher off, so + # no source (teleop, step, or voice dispatch) can keep driving the robot. The + # dispatcher latch is used instead of flipping movement_enabled so the Gemini + # child does not deliver a spoken "movement disabled" line during an E-STOP. + try: + await asyncio.to_thread(lc.disarm_movement) + except Exception: + log.exception("estop disarm failed") + md = _get_dispatch() + if md is not None: + try: + md.emergency_stop() + except Exception: + log.exception("estop dispatcher latch failed") + return {"ok": True, **res} + + +@router.post("/stop") +async def stop(): + lc = _require_loco() + # Allowed even when disarmed — StopMove is always safe. + res = await asyncio.to_thread(lc.stop_move) + return res + + +# ── movement (armed) ──────────────────────────────────────── + +class MoveBody(BaseModel): + vx: float = 0.0 + vy: float = 0.0 + vyaw: float = 0.0 + run: bool = False + + +@router.post("/move") +async def move(body: MoveBody): + lc = _require_loco() + _require_armed(lc) + return await asyncio.to_thread(lc.move, body.vx, body.vy, body.vyaw, body.run) + + +@router.post("/step") +async def step(dir: str = Query(...)): + lc = _require_loco() + _require_armed(lc) + res = await asyncio.to_thread(lc.step, dir) + if not res.get("ok"): + raise HTTPException(400, res.get("reason", "step failed")) + return res + + +# ── modes / postures (armed) ──────────────────────────────── + +@router.post("/mode/prep") +async def mode_prep(): + lc = _require_loco() + _require_armed(lc) + return await asyncio.to_thread(lc.prep_mode) + + +@router.post("/mode/ready") +async def mode_ready(): + lc = _require_loco() + _require_armed(lc) + return await asyncio.to_thread(lc.ready_start_mode) + + +@router.post("/posture/{name}") +async def posture(name: str): + lc = _require_loco() + _require_armed(lc) + res = await asyncio.to_thread(lc.posture, name) + if not res.get("ok") and res.get("reason"): + raise HTTPException(400, res["reason"]) + return res + + +@router.post("/balance") +async def balance(mode: int = Query(...)): + lc = _require_loco() + _require_armed(lc) + return await asyncio.to_thread(lc.set_balance_mode, mode) + + +@router.post("/height") +async def height(h: float = Query(...)): + lc = _require_loco() + _require_armed(lc) + return await asyncio.to_thread(lc.set_stand_height, h) + + +# ── MotionSwitcher / reconnect (armed) ────────────────────── + +@router.post("/msc/select-ai") +async def msc_select_ai(): + lc = _require_loco() + _require_armed(lc) + return await asyncio.to_thread(lc.msc_select_ai) + + +@router.post("/msc/release") +async def msc_release(): + lc = _require_loco() + _require_armed(lc) + return await asyncio.to_thread(lc.msc_release) + + +@router.post("/reconnect") +async def reconnect(): + lc = _require_loco() + _require_armed(lc) + return await asyncio.to_thread(lc.reconnect) + + +# ── aggregate subsystem summary (always available) ────────── + +@router.get("/status/summary") +async def status_summary(): + """Live on/off state for the header status strip. Never raises.""" + try: + st = recognition_state.read(STATE_PATH) + except Exception: + st = recognition_state.RecognitionState() + + cam = _get_camera() + camera_running = False + try: + camera_running = bool(cam is not None and cam.is_running()) + except Exception: + camera_running = False + + lc = _get_loco() + movement_armed = False + try: + movement_armed = bool(lc is not None and lc.is_armed()) + except Exception: + movement_armed = False + + sub = _get_live_sub() + gemini_running = False + try: + runner = getattr(sub, "is_running", None) + gemini_running = bool(callable(runner) and runner()) + except Exception: + gemini_running = False + + # Effective Gemini-movement = the file flag AND not latched off by an E-STOP. + md = _get_dispatch() + estopped = False + try: + estopped = bool(md is not None and md.is_estopped()) + except Exception: + estopped = False + + return { + "vision_enabled": st.vision_enabled, + "camera_running": camera_running, + "face_rec_enabled": st.face_rec_enabled, + "zone_rec_enabled": st.zone_rec_enabled, + "movement_armed": movement_armed, + "gemini_movement_enabled": st.movement_enabled and not estopped, + "gemini_running": gemini_running, + } diff --git a/vendor/Sanad/dashboard/routes/health.py b/vendor/Sanad/dashboard/routes/health.py new file mode 100644 index 0000000..f685209 --- /dev/null +++ b/vendor/Sanad/dashboard/routes/health.py @@ -0,0 +1,51 @@ +"""Health and status endpoints.""" + +from __future__ import annotations + +from fastapi import APIRouter + +from Project.Sanad.core.logger import get_logger + +log = get_logger("health_route") + +router = APIRouter() + + +def _safe_status(component, name: str) -> dict: + """Get status without crashing the whole endpoint if one component fails.""" + if component is None: + return {"available": False} + try: + if hasattr(component, "status") and callable(component.status): + return component.status() + return {"available": True} + except Exception as exc: + log.warning("status() failed for %s: %s", name, exc) + return {"available": True, "error": str(exc)} + + +@router.get("/health") +async def health(): + from Project.Sanad.main import brain + return { + "status": "ok", + "brain": _safe_status(brain, "brain"), + } + + +@router.get("/status") +async def full_status(): + from Project.Sanad.main import ( + brain, arm, voice_client, macro_rec, macro_play, + live_voice, live_sub, wake_mgr, + ) + return { + "brain": _safe_status(brain, "brain"), + "voice": _safe_status(voice_client, "voice"), + "arm": _safe_status(arm, "arm"), + "macro_recorder": _safe_status(macro_rec, "macro_rec"), + "macro_player": _safe_status(macro_play, "macro_play"), + "live_voice": _safe_status(live_voice, "live_voice"), + "live_subprocess": _safe_status(live_sub, "live_sub"), + "wake_manager": _safe_status(wake_mgr, "wake_mgr"), + } diff --git a/vendor/Sanad/dashboard/routes/live_subprocess.py b/vendor/Sanad/dashboard/routes/live_subprocess.py new file mode 100644 index 0000000..831789c --- /dev/null +++ b/vendor/Sanad/dashboard/routes/live_subprocess.py @@ -0,0 +1,38 @@ +"""Live Gemini Subprocess control endpoints.""" + +from __future__ import annotations + +import asyncio + +from fastapi import APIRouter, HTTPException + +router = APIRouter() + + +def _sub_or_503(): + from Project.Sanad.main import live_sub + if live_sub is None: + raise HTTPException(503, "Live subprocess not available") + return live_sub + + +@router.get("/status") +async def subprocess_status(): + from Project.Sanad.main import live_sub + if live_sub is None: + return {"available": False, "state": "unavailable"} + return live_sub.status() + + +@router.post("/start") +async def start_subprocess(): + live_sub = _sub_or_503() + try: + return await asyncio.to_thread(live_sub.start) + except RuntimeError as exc: + raise HTTPException(404, str(exc)) + + +@router.post("/stop") +async def stop_subprocess(): + return await asyncio.to_thread(_sub_or_503().stop) diff --git a/vendor/Sanad/dashboard/routes/live_voice.py b/vendor/Sanad/dashboard/routes/live_voice.py new file mode 100644 index 0000000..1075841 --- /dev/null +++ b/vendor/Sanad/dashboard/routes/live_voice.py @@ -0,0 +1,73 @@ +"""Live Voice Commands — voice-to-arm phrase trigger dispatcher. + +Listens to GeminiSubprocess user transcripts, matches against +sanad_arm.txt phrases, and fires ARM.trigger_action_by_id. + +Endpoints: + POST /start begin polling transcripts + POST /stop stop polling + POST /deferred-mode?enabled toggle instant vs deferred trigger + POST /trigger-enabled?enabled master gate — allow arm actions or not + GET /status running, last heard, last action, etc. + GET /triggers arm trigger history (log) +""" + +from __future__ import annotations + +from fastapi import APIRouter, HTTPException + +router = APIRouter() + + +def _loop(): + from Project.Sanad.main import live_voice + if live_voice is None: + raise HTTPException(503, "LiveVoiceLoop not initialized.") + return live_voice + + +@router.get("/status") +async def status(): + from Project.Sanad.main import live_voice + if live_voice is None: + return {"available": False} + return {"available": True, **live_voice.status()} + + +@router.post("/start") +async def start(): + loop = _loop() + await loop.start() + return {"ok": True, **loop.status()} + + +@router.post("/stop") +async def stop(): + loop = _loop() + await loop.stop() + return {"ok": True, **loop.status()} + + +@router.post("/deferred-mode") +async def set_deferred(enabled: bool): + loop = _loop() + loop.set_deferred(enabled) + return {"ok": True, "deferred_mode": loop.deferred_mode} + + +@router.post("/trigger-enabled") +async def set_trigger_enabled(enabled: bool): + """Master gate for voice → arm triggering. Default OFF.""" + loop = _loop() + loop.set_trigger_enabled(enabled) + return {"ok": True, "trigger_enabled": loop.trigger_enabled} + + +@router.get("/triggers") +async def triggers(): + loop = _loop() + return { + "triggers": list(loop.triggers), + "total": len(loop.triggers), + "dispatch_actions": len(loop.wake_dispatch), + } diff --git a/vendor/Sanad/dashboard/routes/logs.py b/vendor/Sanad/dashboard/routes/logs.py new file mode 100644 index 0000000..7eccd04 --- /dev/null +++ b/vendor/Sanad/dashboard/routes/logs.py @@ -0,0 +1,203 @@ +"""Log viewing and snapshot endpoints.""" + +from __future__ import annotations + +import asyncio +import json +import platform +import shutil +import socket +import sys +from collections import deque +from datetime import datetime + +from fastapi import APIRouter, HTTPException +from fastapi.responses import PlainTextResponse + +from Project.Sanad.config import BASE_DIR, LOGS_DIR +from Project.Sanad.dashboard.routes._safe_io import safe_path_under + +router = APIRouter() + + +def _list_logs_sync(): + LOGS_DIR.mkdir(parents=True, exist_ok=True) + files = [] + for p in sorted(LOGS_DIR.glob("*.log*")): + files.append({ + "name": p.name, + "size_bytes": p.stat().st_size, + }) + return files + + +@router.get("/") +async def list_logs(): + files = await asyncio.to_thread(_list_logs_sync) + return {"logs_dir": str(LOGS_DIR), "files": files} + + +def _tail_sync(path, lines: int) -> list[str]: + with open(path, "r", encoding="utf-8", errors="replace") as f: + tail = deque(f, maxlen=lines) + return [l.rstrip("\n") for l in tail] + + +@router.get("/tail/{filename}") +async def tail_log(filename: str, lines: int = 200): + path = safe_path_under(LOGS_DIR, filename) + if not path.exists(): + raise HTTPException(404, "File not found") + lines_out = await asyncio.to_thread(_tail_sync, path, lines) + return {"filename": path.name, "lines": lines_out} + + +def _snapshot_sync(ts: str): + saved = [] + for p in LOGS_DIR.glob("*.log"): + # Skip prior snapshots to avoid recursive growth + if "_snapshot_" in p.stem: + continue + dest = LOGS_DIR / f"{p.stem}_snapshot_{ts}.log" + shutil.copy2(p, dest) + saved.append({"source": p.name, "snapshot": dest.name, "size_bytes": dest.stat().st_size}) + return saved + + +@router.post("/snapshot") +async def save_log_snapshot(): + """Save timestamped copy of all log files.""" + LOGS_DIR.mkdir(parents=True, exist_ok=True) + ts = datetime.now().strftime("%Y%m%d_%H%M%S") + saved = await asyncio.to_thread(_snapshot_sync, ts) + return {"ok": True, "saved_at": ts, "snapshots": saved} + + +# ─────────────────────── full bundle (everything in one text blob) ─────────────────────── + +def _build_bundle_sync(lines_per_file: int, include_system: bool) -> str: + """Build the full text bundle — header, subsystem status, all logs. + + Returns a single string safe to copy directly into a bug report. + """ + out: list[str] = [] + ts = datetime.now().isoformat(timespec="seconds") + out.append("=" * 72) + out.append(f"SANAD LOG BUNDLE — {ts}") + out.append("=" * 72) + out.append(f"Hostname : {socket.gethostname()}") + out.append(f"Platform : {platform.platform()}") + out.append(f"Python : {sys.version.split()[0]}") + out.append(f"Executable: {sys.executable}") + out.append(f"BASE_DIR : {BASE_DIR}") + out.append(f"LOGS_DIR : {LOGS_DIR}") + + # Subsystems — pull live status from main.SUBSYSTEMS + if include_system: + out.append("") + out.append("-" * 72) + out.append("SUBSYSTEMS") + out.append("-" * 72) + try: + from Project.Sanad.main import SUBSYSTEMS + except Exception as exc: + out.append(f" could not import SUBSYSTEMS: {exc}") + SUBSYSTEMS = {} + + for name in sorted(SUBSYSTEMS): + comp = SUBSYSTEMS[name] + if comp is None: + out.append(f" ✗ {name:15s} unavailable") + continue + status: dict = {} + if hasattr(comp, "status") and callable(comp.status): + try: + s = comp.status() + if isinstance(s, dict): + status = s + else: + status = {"raw": str(s)} + except Exception as exc: + status = {"status_error": str(exc)} + try: + status_str = json.dumps(status, ensure_ascii=False, default=str) + except Exception: + status_str = str(status) + out.append(f" ✓ {name:15s} {status_str}") + + # Dashboard router load state + out.append("") + out.append("-" * 72) + out.append("DASHBOARD ROUTERS") + out.append("-" * 72) + try: + from Project.Sanad.dashboard.app import _loaded_routes, _failed_routes + out.append(f" loaded ({len(_loaded_routes)}): {', '.join(_loaded_routes)}") + if _failed_routes: + out.append(f" failed ({len(_failed_routes)}):") + for name, err in _failed_routes.items(): + out.append(f" - {name}: {err}") + else: + out.append(" failed (0): —") + except Exception as exc: + out.append(f" could not read dashboard state: {exc}") + + # All log files — tail N lines each, skip snapshots + out.append("") + out.append("-" * 72) + out.append(f"LOG FILES (last {lines_per_file} lines each)") + out.append("-" * 72) + + LOGS_DIR.mkdir(parents=True, exist_ok=True) + log_paths = sorted(LOGS_DIR.glob("*.log*")) + files_included = 0 + for p in log_paths: + if "_snapshot_" in p.stem: + continue # skip stale snapshots + try: + size = p.stat().st_size + except OSError: + size = 0 + out.append("") + out.append(f"=== {p.name} ({size} bytes) ===") + try: + with open(p, "r", encoding="utf-8", errors="replace") as f: + tail = deque(f, maxlen=lines_per_file) + for raw in tail: + out.append(raw.rstrip("\n")) + files_included += 1 + except OSError as exc: + out.append(f" ") + + out.append("") + out.append("=" * 72) + out.append(f"END OF BUNDLE — {files_included} log file(s) included") + out.append("=" * 72) + return "\n".join(out) + + +@router.get("/bundle") +async def logs_bundle(lines: int = 1000, include_system: bool = True): + """Return a single plain-text dump of everything useful for debugging. + + Includes: + - Timestamp, hostname, platform, Python, BASE_DIR, LOGS_DIR + - Live status of every subsystem in main.SUBSYSTEMS + - Dashboard router load/fail state + - Tail of every .log file in LOGS_DIR (configurable per-file limit) + + Response is `text/plain` so it's safe to copy straight to clipboard + or pipe into a file. Intended use: dashboard "Copy All Logs" button + and manual `curl ... > sanad_bundle.txt` debugging. + """ + # Clamp lines to keep the payload sane + lines = max(10, min(int(lines), 50000)) + text = await asyncio.to_thread(_build_bundle_sync, lines, include_system) + return PlainTextResponse( + text, + headers={ + "Content-Disposition": ( + f'inline; filename="sanad_bundle_{datetime.now().strftime("%Y%m%d_%H%M%S")}.txt"' + ), + }, + ) diff --git a/vendor/Sanad/dashboard/routes/macros.py b/vendor/Sanad/dashboard/routes/macros.py new file mode 100644 index 0000000..56c3042 --- /dev/null +++ b/vendor/Sanad/dashboard/routes/macros.py @@ -0,0 +1,238 @@ +"""Macro recording and playback endpoints.""" + +from __future__ import annotations + +import asyncio +from pathlib import Path + +from fastapi import APIRouter, HTTPException +from pydantic import BaseModel + +from Project.Sanad.config import AUDIO_RECORDINGS_DIR, MOTIONS_DIR +from Project.Sanad.core.logger import get_logger + +log = get_logger("macros_route") +router = APIRouter() + + +def _block_if_movement_armed(): + """409 when locomotion movement is armed — arm motion is mutually exclusive + with walking. The arm controller's motion-block is the safety net.""" + try: + from Project.Sanad.main import loco_controller # type: ignore + armed = loco_controller is not None and loco_controller.is_armed() + except HTTPException: + raise + except Exception: + return + if armed: + raise HTTPException( + 409, "Arm actions are disabled while movement is enabled. " + "Disable movement in the Controller tab first.") + + +class MacroName(BaseModel): + name: str + + +class ComboPlayPayload(BaseModel): + audio_file: str = "" # filename under data/audio/ (or empty for none) + motion_file: str = "" # DEPRECATED — use action_id. Still accepted for bare JSONL by filename. + action_id: int | None = None # arm_controller action id (SDK built-in OR JSONL) — preferred + speed: float = 1.0 + + +@router.get("/") +async def list_macros(): + from Project.Sanad.main import macro_play + if macro_play is None: + return {"macros": []} + return {"macros": macro_play.list_macros()} + + +@router.get("/status") +async def macro_status(): + from Project.Sanad.main import macro_rec, macro_play + return { + "recorder": macro_rec.status() if macro_rec else {}, + "player": macro_play.status() if macro_play else {}, + } + + +@router.post("/record/start") +async def start_recording(payload: MacroName): + from Project.Sanad.main import macro_rec + if macro_rec is None: + raise HTTPException(503, "Macro recorder not available.") + return macro_rec.start(payload.name) + + +@router.post("/record/stop") +async def stop_recording(): + import asyncio + from Project.Sanad.main import macro_rec + if macro_rec is None: + raise HTTPException(503, "Macro recorder not available.") + return await asyncio.to_thread(macro_rec.stop) + + +@router.post("/play") +async def play_macro(payload: MacroName): + from Project.Sanad.main import brain + _block_if_movement_armed() + return await brain.play_macro(payload.name) + + +@router.post("/stop") +async def stop_macro(): + from Project.Sanad.main import macro_play + if macro_play: + macro_play.stop() + return {"ok": True} + + +# ─── Ad-hoc audio + motion combined playback ───────────────────────── +# List the two catalogues so the dashboard can populate dropdowns, then +# play the chosen pair in parallel (asyncio.gather) — same scheme the +# Brain uses for `parallel`-mode skills, but ad-hoc instead of predefined. + +@router.get("/audio-files") +async def list_audio_files(): + """Enumerate playable audio files under data/audio/.""" + AUDIO_RECORDINGS_DIR.mkdir(parents=True, exist_ok=True) + files = [] + for p in sorted(AUDIO_RECORDINGS_DIR.glob("*.wav")): + try: + files.append({ + "name": p.name, + "size_kb": round(p.stat().st_size / 1024, 1), + }) + except OSError: + continue + return {"files": files, "dir": str(AUDIO_RECORDINGS_DIR)} + + +@router.get("/motion-files") +async def list_motion_files(): + """Enumerate playable .jsonl motions under data/motions/ (thin wrapper + so the Macro Recorder dropdown doesn't have to call the replay route).""" + MOTIONS_DIR.mkdir(parents=True, exist_ok=True) + files = [] + for p in sorted(MOTIONS_DIR.glob("*.jsonl")): + try: + files.append({ + "name": p.name, + "size_kb": round(p.stat().st_size / 1024, 1), + }) + except OSError: + continue + return {"files": files, "dir": str(MOTIONS_DIR)} + + +@router.post("/stop-combined") +async def stop_combined(): + """Immediately stop any in-flight combined playback. + + - `arm.cancel()` — breaks the replay loop and triggers the smooth + return-to-home ramp (see `_return_home` in arm_controller.py). + - `audio_mgr.stop_playback()` — sends AUDIO_STOP_PLAY to the G1 + chest speaker via DDS. + Both run unconditionally so Stop works even if only one side was + actually playing. + """ + from Project.Sanad.main import audio_mgr, arm + result = {"motion_stopped": False, "audio_stopped": False} + if arm is not None: + try: + arm.cancel() + result["motion_stopped"] = True + except Exception as exc: + log.warning("stop-combined: arm.cancel failed: %s", exc) + result["motion_error"] = str(exc) + if audio_mgr is not None: + try: + audio_mgr.stop_playback() + result["audio_stopped"] = True + except Exception as exc: + log.warning("stop-combined: audio stop failed: %s", exc) + result["audio_error"] = str(exc) + return {"ok": True, **result} + + +@router.post("/play-combined") +async def play_combined(payload: ComboPlayPayload): + """Fire a user-picked audio clip and arm action in parallel. + + Motion dispatch is via `arm.trigger_by_id(action_id)` which handles + BOTH SDK built-in actions (shake_hand, wave, …) and recorded JSONL + replays. Audio goes through `audio_mgr.play_wav` (routed to the G1 + chest speaker via DDS). Either side may be omitted. + """ + from Project.Sanad.main import audio_mgr, arm + + has_audio = bool(payload.audio_file) + has_motion = payload.action_id is not None or bool(payload.motion_file) + if not has_audio and not has_motion: + raise HTTPException(400, "pick at least one of audio_file / action_id / motion_file") + if has_motion: + _block_if_movement_armed() # audio-only combos still allowed while armed + + tasks = [] + result: dict = { + "audio_file": payload.audio_file, + "action_id": payload.action_id, + "motion_file": payload.motion_file, + } + + if has_audio: + if audio_mgr is None: + raise HTTPException(503, "AudioManager not available") + audio_path = (AUDIO_RECORDINGS_DIR / payload.audio_file).resolve() + try: + audio_path.relative_to(AUDIO_RECORDINGS_DIR.resolve()) + except ValueError: + raise HTTPException(400, "audio_file path traversal denied") + if not audio_path.exists(): + raise HTTPException(404, f"audio not found: {payload.audio_file}") + + async def _play_audio(): + try: + await asyncio.to_thread(audio_mgr.play_wav, audio_path) + result["audio_played"] = audio_path.name + except Exception as exc: + log.exception("combined play: audio failed") + result["audio_error"] = str(exc) + tasks.append(_play_audio()) + + if has_motion: + if arm is None: + raise HTTPException(503, "ArmController not available") + + async def _play_motion(): + try: + if payload.action_id is not None: + # SDK built-in OR JSONL — arm.trigger_by_id handles both + await asyncio.to_thread(arm.trigger_by_id, + int(payload.action_id), + payload.speed) + result["motion_played"] = f"action_id={payload.action_id}" + else: + # Legacy path: bare JSONL filename + motion_path = (MOTIONS_DIR / payload.motion_file).resolve() + try: + motion_path.relative_to(MOTIONS_DIR.resolve()) + except ValueError: + result["motion_error"] = "motion_file path traversal denied" + return + if not motion_path.exists(): + result["motion_error"] = f"motion not found: {payload.motion_file}" + return + await asyncio.to_thread(arm.replay_file, str(motion_path), payload.speed) + result["motion_played"] = motion_path.name + except Exception as exc: + log.exception("combined play: motion failed") + result["motion_error"] = str(exc) + tasks.append(_play_motion()) + + await asyncio.gather(*tasks) + return {"ok": True, **result} diff --git a/vendor/Sanad/dashboard/routes/motion.py b/vendor/Sanad/dashboard/routes/motion.py new file mode 100644 index 0000000..58cea95 --- /dev/null +++ b/vendor/Sanad/dashboard/routes/motion.py @@ -0,0 +1,89 @@ +"""Motion endpoints — arm actions, replay management.""" + +from __future__ import annotations + +import asyncio + +from fastapi import APIRouter, HTTPException +from pydantic import BaseModel + +router = APIRouter() + + +def _block_if_movement_armed(): + """409 if locomotion movement is armed — arm actions are mutually exclusive + with walking. The arm controller's own motion-block is the safety net; this + just gives the dashboard a clear message instead of a silent no-op.""" + try: + from Project.Sanad.main import loco_controller # type: ignore + armed = loco_controller is not None and loco_controller.is_armed() + except HTTPException: + raise + except Exception: + return + if armed: + raise HTTPException( + 409, "Arm actions are disabled while movement is enabled. " + "Disable movement in the Controller tab first.") + + +@router.get("/status") +async def motion_status(): + from Project.Sanad.main import arm + return arm.status() if arm else {"error": "Arm not attached"} + + +@router.get("/actions") +async def list_actions(): + from Project.Sanad.main import arm + return {"actions": arm.list_actions() if arm else []} + + +class TriggerPayload(BaseModel): + action_id: int | None = None + action_name: str | None = None + speed: float = 1.0 + + +@router.post("/trigger") +async def trigger_action(payload: TriggerPayload): + from Project.Sanad.main import arm + if arm is None: + raise HTTPException(503, "Arm controller not attached.") + _block_if_movement_armed() + + speed = max(0.1, min(payload.speed, 5.0)) + + # NOTE: TOCTOU on arm.is_busy is unavoidable from the route layer. + # The internal arm controller has its own _lock + _is_busy guard inside + # _execute() that returns silently if busy. We rely on that. + if payload.action_id is not None: + try: + await asyncio.to_thread(arm.trigger_by_id, payload.action_id, speed) + except KeyError as exc: + raise HTTPException(404, str(exc)) + return {"ok": True, "action_id": payload.action_id, "speed": speed} + elif payload.action_name: + try: + await asyncio.to_thread(arm.trigger_by_name, payload.action_name, speed) + except KeyError as exc: + raise HTTPException(404, str(exc)) + return {"ok": True, "action_name": payload.action_name, "speed": speed} + else: + raise HTTPException(400, "Provide action_id or action_name.") + + +@router.post("/cancel") +async def cancel_motion(): + from Project.Sanad.main import arm + if arm is None: + raise HTTPException(503, "Arm controller not attached.") + arm.cancel() + return {"ok": True, "cancelled": True} + + +@router.post("/gestural-speaking") +async def toggle_gestural(enabled: bool = True): + from Project.Sanad.main import brain + brain.set_gestural_speaking(enabled) + return {"gestural_speaking": brain.gestural_speaking} diff --git a/vendor/Sanad/dashboard/routes/prompt.py b/vendor/Sanad/dashboard/routes/prompt.py new file mode 100644 index 0000000..f6f1e10 --- /dev/null +++ b/vendor/Sanad/dashboard/routes/prompt.py @@ -0,0 +1,98 @@ +"""Prompt management — view, edit, reload system prompts.""" + +from __future__ import annotations + +from fastapi import APIRouter, HTTPException +from pydantic import BaseModel + +from Project.Sanad.config import SCRIPTS_DIR +from Project.Sanad.core.config_loader import section as _cfg_section +from Project.Sanad.dashboard.routes._safe_io import ( + atomic_write_text, MAX_UPLOAD_BYTES, +) + +router = APIRouter() + +# Filenames — SINGLE SOURCE in core.script_files +_SCRIPTS = _cfg_section("core", "script_files") +SCRIPT_PROMPT_PATH = SCRIPTS_DIR / _SCRIPTS.get("persona", "sanad_script.txt") +RULE_PROMPT_PATH = SCRIPTS_DIR / _SCRIPTS.get("rules", "sanad_rule.txt") +MAX_PROMPT_BYTES = MAX_UPLOAD_BYTES + +# Default system prompt — SINGLE SOURCE in core.gemini_defaults +DEFAULT_SYSTEM_PROMPT = _cfg_section("core", "gemini_defaults").get( + "default_system_prompt", + "You are Sanad (Bousandah), a wise and friendly Emirati assistant. " + "Speak strictly in the UAE dialect (Khaleeji). " + "Be helpful, concise, and use local greetings like 'Marhaba' and 'Ya Khoy'." +) + + +def _load_system_prompt() -> str: + try: + content = SCRIPT_PROMPT_PATH.read_text(encoding="utf-8-sig").strip() + if content: + return content + except FileNotFoundError: + pass + return DEFAULT_SYSTEM_PROMPT + + +def _load_rule_prompts() -> dict[str, str]: + result = {"system_prompt": "", "replay_prompt": ""} + try: + content = RULE_PROMPT_PATH.read_text(encoding="utf-8-sig").strip() + sections: dict[str, list[str]] = {} + current = None + for line in content.splitlines(): + stripped = line.strip() + if stripped.startswith("[") and stripped.endswith("]"): + current = stripped[1:-1].strip() + sections[current] = [] + elif current is not None: + sections[current].append(line.rstrip()) + result["system_prompt"] = "\n".join(sections.get("SYSTEM_PROMPT", [])).strip() + result["replay_prompt"] = "\n".join(sections.get("REPLAY_SYSTEM_PROMPT", [])).strip() + except FileNotFoundError: + pass + if not result["system_prompt"]: + result["system_prompt"] = _load_system_prompt() + return result + + +@router.get("/") +async def get_prompt(): + return { + "script_path": str(SCRIPT_PROMPT_PATH), + "rule_path": str(RULE_PROMPT_PATH), + "system_prompt": _load_system_prompt(), + "rules": _load_rule_prompts(), + } + + +class PromptUpdate(BaseModel): + content: str + + +@router.post("/update") +async def update_prompt(payload: PromptUpdate): + if len(payload.content.encode("utf-8")) > MAX_PROMPT_BYTES: + raise HTTPException(413, f"Prompt too large (max {MAX_PROMPT_BYTES} bytes).") + try: + SCRIPTS_DIR.mkdir(parents=True, exist_ok=True) + atomic_write_text(SCRIPT_PROMPT_PATH, payload.content.rstrip() + "\n") + except OSError as exc: + raise HTTPException(500, f"Could not write prompt: {exc}") + return {"ok": True, "path": str(SCRIPT_PROMPT_PATH), "length": len(payload.content)} + + +@router.post("/reload") +async def reload_prompts(): + rules = _load_rule_prompts() + return { + "ok": True, + "system_prompt": rules["system_prompt"], + "replay_prompt": rules["replay_prompt"], + "script_path": str(SCRIPT_PROMPT_PATH), + "rule_path": str(RULE_PROMPT_PATH), + } diff --git a/vendor/Sanad/dashboard/routes/recognition.py b/vendor/Sanad/dashboard/routes/recognition.py new file mode 100644 index 0000000..b1f9d60 --- /dev/null +++ b/vendor/Sanad/dashboard/routes/recognition.py @@ -0,0 +1,457 @@ +"""Recognition tab — camera vision + face gallery + hot toggles. + +Single router covering: + - Vision / Face Recognition toggles (hot — no Gemini restart needed) + - Live camera preview (latest JPEG drop) + - Face gallery CRUD: enroll, upload, capture, rename, delete, ZIP + - Per-photo download + delete + +Toggle changes write data/.recognition_state.json atomically. The Gemini +child polls that file at 1 Hz and applies changes mid-session. +""" + +from __future__ import annotations + +import io +from typing import Optional + +from fastapi import APIRouter, File, HTTPException, Query, UploadFile +from fastapi.responses import FileResponse, Response, StreamingResponse +from pydantic import BaseModel + +from Project.Sanad.config import BASE_DIR +from Project.Sanad.core.logger import get_logger +from Project.Sanad.dashboard.routes._safe_io import check_upload_size +from Project.Sanad.vision import recognition_state + +log = get_logger("recognition_routes") + +router = APIRouter() + + +# ── paths (resolved from BASE_DIR) ────────────────────────── + +STATE_PATH = BASE_DIR / "data" / ".recognition_state.json" +FACES_DIR = BASE_DIR / "data" / "faces" + +ALLOWED_IMAGE_EXTS = {".jpg", ".jpeg", ".png"} + + +# ── helpers ───────────────────────────────────────────────── + +def _get_camera(): + """Lazy import to avoid circular import on dashboard load.""" + try: + from Project.Sanad.main import camera # type: ignore + return camera + except Exception: + return None + + +def _get_gallery(): + """Lazy import — same reason.""" + try: + from Project.Sanad.main import gallery # type: ignore + return gallery + except Exception: + return None + + +def _bump_and_write_state(**changes) -> recognition_state.RecognitionState: + """Apply changes (vision_enabled, face_rec_enabled) and persist.""" + return recognition_state.mutate(STATE_PATH, **changes) + + +def _bump_gallery_version() -> int: + cur = recognition_state.read(STATE_PATH) + new_version = cur.gallery_version + 1 + recognition_state.mutate(STATE_PATH, gallery_version=new_version) + return new_version + + +# ── state + toggles ───────────────────────────────────────── + +@router.get("/state") +async def get_state(): + """Return the current toggle/camera/gallery state.""" + st = recognition_state.read(STATE_PATH) + cam = _get_camera() + gallery = _get_gallery() + faces_count = 0 + photos_count = 0 + if gallery is not None: + try: + entries = gallery.list() + faces_count = len(entries) + photos_count = sum(len(e.sample_paths) for e in entries) + except Exception: + pass + return { + "vision_enabled": st.vision_enabled, + "face_rec_enabled": st.face_rec_enabled, + "gallery_version": st.gallery_version, + "camera": cam.status() if cam is not None else { + "running": False, "backend": None, "error": "camera subsystem unavailable" + }, + "faces_count": faces_count, + "photos_count": photos_count, + } + + +@router.post("/vision") +async def set_vision(on: bool = Query(...)): + """Enable / disable camera vision (hot — no Gemini restart).""" + cam = _get_camera() + if cam is None: + log.warning("vision toggle requested but camera subsystem unavailable") + raise HTTPException(503, "Camera subsystem not available.") + + if on and not cam.is_running(): + ok = cam.start() + if not ok: + log.warning("vision ON requested but camera.start() failed: %s", + cam.error or "no backend") + _bump_and_write_state(vision_enabled=False) + raise HTTPException(503, + f"Camera could not start (no backend). {cam.error or ''}") + elif (not on) and cam.is_running(): + cam.stop() + + st = _bump_and_write_state(vision_enabled=bool(on)) + log.info("vision %s (backend=%s)", "ON" if on else "OFF", + cam.backend if cam.is_running() else "none") + return {"ok": True, "vision_enabled": st.vision_enabled, + "camera": cam.status()} + + +@router.post("/face-rec") +async def set_face_rec(on: bool = Query(...)): + """Enable / disable face recognition (hot — no Gemini restart). + + The Gemini child picks the change up within ~1 s: ON re-sends the + gallery primer and tells Gemini it can recognise people; OFF tells + Gemini to disregard the gallery and stop identifying anyone. Both + take effect on the live session — no reconnect needed. + """ + st = _bump_and_write_state(face_rec_enabled=bool(on)) + log.info("face recognition %s", "ON" if on else "OFF") + return {"ok": True, "face_rec_enabled": st.face_rec_enabled} + + +@router.post("/sync") +async def sync_gallery(): + """Bump gallery_version so the child re-sends the primer if face-rec is ON.""" + v = _bump_gallery_version() + log.info("gallery sync requested → v.%d", v) + return {"ok": True, "gallery_version": v} + + +# ── live preview ──────────────────────────────────────────── + +@router.get("/frame.jpg") +async def latest_frame(): + """Serve the most recent camera frame straight from the daemon's + in-memory cache (no file drop — frames are also pushed to the Gemini + child over its stdin).""" + cam = _get_camera() + if cam is None: + raise HTTPException(503, "Camera subsystem unavailable.") + jpeg = cam.snapshot_jpeg() + if not jpeg: + raise HTTPException(404, "No frame captured yet.") + return Response( + content=jpeg, + media_type="image/jpeg", + headers={"Cache-Control": "no-store, must-revalidate"}, + ) + + +# ── camera resolution / quality ───────────────────────────── + +class CameraConfigPayload(BaseModel): + width: Optional[int] = None + height: Optional[int] = None + fps: Optional[int] = None + jpeg_quality: Optional[int] = None + + +@router.post("/camera-config") +async def set_camera_config(payload: CameraConfigPayload): + """Hot-swap the camera capture profile (resolution / fps / JPEG quality). + + If the camera is running, CameraDaemon.reconfigure() rebuilds the + pipeline at the new profile (~0.5 s gap). If idle, the values just + take effect on the next start. Bounds are sanity-checked here so a + fat-fingered value can't wedge the daemon.""" + cam = _get_camera() + if cam is None: + raise HTTPException(503, "Camera subsystem unavailable.") + if payload.width is not None and not (160 <= payload.width <= 1920): + raise HTTPException(400, "width out of range (160–1920)") + if payload.height is not None and not (120 <= payload.height <= 1080): + raise HTTPException(400, "height out of range (120–1080)") + if payload.fps is not None and not (1 <= payload.fps <= 60): + raise HTTPException(400, "fps out of range (1–60)") + if payload.jpeg_quality is not None and not (10 <= payload.jpeg_quality <= 95): + raise HTTPException(400, "jpeg_quality out of range (10–95)") + profile = cam.reconfigure( + width=payload.width, height=payload.height, + fps=payload.fps, jpeg_quality=payload.jpeg_quality, + ) + log.info("camera reconfigured via dashboard → %s", profile) + return {"ok": True, "profile": profile, "camera": cam.status()} + + +# ── face gallery routes ───────────────────────────────────── + +def _validate_image(content: bytes, filename: str | None = None) -> None: + """Reject non-JPEG/PNG content + oversize uploads.""" + check_upload_size(content) + if len(content) < 16: + raise HTTPException(400, "Image too small / empty.") + is_jpeg = content[:3] == b"\xff\xd8\xff" + is_png = content[:8] == b"\x89PNG\r\n\x1a\n" + if not (is_jpeg or is_png): + raise HTTPException( + 400, + f"Only JPEG/PNG accepted (got {filename or 'unknown'}).", + ) + + +def _entry_to_dict(entry) -> dict: + photos = [] + for p in entry.sample_paths: + try: + photos.append({"name": p.name, "size_bytes": p.stat().st_size}) + except OSError: + continue + return { + "id": entry.id, + "name": entry.name, + "description": entry.description, + "added_at": entry.added_at, + "photos": photos, + } + + +@router.get("/faces") +async def list_faces(): + gallery = _get_gallery() + if gallery is None: + raise HTTPException(503, "Face gallery subsystem unavailable.") + entries = gallery.list() + return {"faces": [_entry_to_dict(e) for e in entries], + "total": len(entries)} + + +class RenamePayload(BaseModel): + name: Optional[str] = None + + +class DescribePayload(BaseModel): + description: Optional[str] = None + + +@router.post("/faces/enroll") +async def enroll_from_camera(name: Optional[str] = Query(default=None), + description: Optional[str] = Query(default=None)): + """Create a new face from the camera's latest snapshot.""" + gallery = _get_gallery() + if gallery is None: + raise HTTPException(503, "Face gallery subsystem unavailable.") + cam = _get_camera() + if cam is None or not cam.is_running(): + raise HTTPException(409, "Camera is not running. Toggle Vision ON first.") + # get_fresh_frame waits briefly for a current frame so the enrolled + # photo is the scene the user is posing for, not a stale buffer. + jpeg = cam.get_fresh_frame(max_age_s=0.5, timeout_s=1.5) + if not jpeg: + raise HTTPException(409, "Camera has no frame yet. Wait a moment and retry.") + entry = gallery.create_face( + [jpeg], + name=name.strip() if name else None, + description=description.strip() if description else None, + ) + v = _bump_gallery_version() + log.info("enrolled face_%d via camera (name=%s, desc=%s, v.%d)", + entry.id, name or "(unnamed)", + "yes" if description else "no", v) + return {"ok": True, "face": _entry_to_dict(entry)} + + +@router.post("/faces/upload") +async def enroll_from_upload( + files: list[UploadFile] = File(...), + name: Optional[str] = Query(default=None), + description: Optional[str] = Query(default=None), +): + """Create a new face from uploaded image file(s).""" + gallery = _get_gallery() + if gallery is None: + raise HTTPException(503, "Face gallery subsystem unavailable.") + if not files: + raise HTTPException(400, "At least one image file required.") + image_bytes: list[bytes] = [] + for f in files: + content = await f.read() + _validate_image(content, f.filename) + image_bytes.append(content) + entry = gallery.create_face( + image_bytes, + name=name.strip() if name else None, + description=description.strip() if description else None, + ) + v = _bump_gallery_version() + log.info("enrolled face_%d via upload (%d photos, name=%s, desc=%s, v.%d)", + entry.id, len(image_bytes), name or "(unnamed)", + "yes" if description else "no", v) + return {"ok": True, "face": _entry_to_dict(entry)} + + +@router.post("/faces/{face_id}/capture") +async def capture_to_face(face_id: int): + """Add a new sample (from the camera) to an existing face.""" + gallery = _get_gallery() + if gallery is None: + raise HTTPException(503, "Face gallery subsystem unavailable.") + cam = _get_camera() + if cam is None or not cam.is_running(): + raise HTTPException(409, "Camera is not running. Toggle Vision ON first.") + jpeg = cam.get_fresh_frame(max_age_s=0.5, timeout_s=1.5) + if not jpeg: + raise HTTPException(409, "Camera has no frame yet.") + try: + fname = gallery.add_photo(face_id, jpeg) + except FileNotFoundError as exc: + raise HTTPException(404, str(exc)) + v = _bump_gallery_version() + log.info("captured new photo for face_%d → %s (v.%d)", face_id, fname, v) + return {"ok": True, "added": fname, "face": _entry_to_dict(gallery.get(face_id))} + + +@router.post("/faces/{face_id}/upload") +async def upload_to_face(face_id: int, files: list[UploadFile] = File(...)): + """Add one or more uploaded samples to an existing face.""" + gallery = _get_gallery() + if gallery is None: + raise HTTPException(503, "Face gallery subsystem unavailable.") + if gallery.get(face_id) is None: + raise HTTPException(404, f"face_{face_id} not found") + added: list[str] = [] + for f in files: + content = await f.read() + _validate_image(content, f.filename) + try: + fname = gallery.add_photo(face_id, content) + added.append(fname) + except FileNotFoundError as exc: + raise HTTPException(404, str(exc)) + v = _bump_gallery_version() + log.info("uploaded %d photo(s) to face_%d (v.%d)", len(added), face_id, v) + return {"ok": True, "added": added, + "face": _entry_to_dict(gallery.get(face_id))} + + +@router.post("/faces/{face_id}/rename") +async def rename_face(face_id: int, payload: RenamePayload): + gallery = _get_gallery() + if gallery is None: + raise HTTPException(503, "Face gallery subsystem unavailable.") + try: + gallery.rename(face_id, payload.name) + except FileNotFoundError as exc: + raise HTTPException(404, str(exc)) + v = _bump_gallery_version() + log.info("renamed face_%d → %s (v.%d)", face_id, + payload.name or "(unnamed)", v) + return {"ok": True, "face": _entry_to_dict(gallery.get(face_id))} + + +@router.post("/faces/{face_id}/describe") +async def describe_face(face_id: int, payload: DescribePayload): + """Set / clear a face's free-text description. The description is + folded into the Gemini primer turn so Gemini can reference it.""" + gallery = _get_gallery() + if gallery is None: + raise HTTPException(503, "Face gallery subsystem unavailable.") + try: + gallery.set_description(face_id, payload.description) + except FileNotFoundError as exc: + raise HTTPException(404, str(exc)) + v = _bump_gallery_version() + log.info("described face_%d (%s, v.%d)", face_id, + "set" if payload.description else "cleared", v) + return {"ok": True, "face": _entry_to_dict(gallery.get(face_id))} + + +@router.delete("/faces/{face_id}") +async def delete_face(face_id: int): + gallery = _get_gallery() + if gallery is None: + raise HTTPException(503, "Face gallery subsystem unavailable.") + try: + gallery.delete_face(face_id) + except FileNotFoundError as exc: + raise HTTPException(404, str(exc)) + v = _bump_gallery_version() + log.info("deleted face_%d (v.%d)", face_id, v) + return {"ok": True, "deleted": face_id} + + +@router.delete("/faces/{face_id}/photo/{photo_name}") +async def delete_photo(face_id: int, photo_name: str): + gallery = _get_gallery() + if gallery is None: + raise HTTPException(503, "Face gallery subsystem unavailable.") + # safe filename — only allow simple file names, no traversal + if "/" in photo_name or ".." in photo_name or "\x00" in photo_name: + raise HTTPException(400, "Invalid photo name.") + try: + gallery.delete_photo(face_id, photo_name) + except FileNotFoundError as exc: + raise HTTPException(404, str(exc)) + except ValueError as exc: + raise HTTPException(400, str(exc)) + v = _bump_gallery_version() + log.info("deleted photo %s from face_%d (v.%d)", photo_name, face_id, v) + return {"ok": True, "deleted": photo_name} + + +@router.get("/faces/{face_id}/photo/{photo_name}") +async def get_photo(face_id: int, photo_name: str, + download: int = Query(default=0)): + """Serve a single photo. Add ?download=1 for attachment disposition.""" + gallery = _get_gallery() + if gallery is None: + raise HTTPException(503, "Face gallery subsystem unavailable.") + if "/" in photo_name or ".." in photo_name or "\x00" in photo_name: + raise HTTPException(400, "Invalid photo name.") + path = gallery.get_photo(face_id, photo_name) + if path is None: + raise HTTPException(404, "Photo not found.") + media = "image/png" if path.suffix.lower() == ".png" else "image/jpeg" + headers = {} + if download: + headers["Content-Disposition"] = ( + f'attachment; filename="face_{face_id}_{photo_name}"' + ) + return FileResponse(path, media_type=media, headers=headers) + + +@router.get("/faces/{face_id}/download.zip") +async def download_face_zip(face_id: int): + gallery = _get_gallery() + if gallery is None: + raise HTTPException(503, "Face gallery subsystem unavailable.") + try: + data = gallery.zip_face(face_id) + except FileNotFoundError as exc: + raise HTTPException(404, str(exc)) + return StreamingResponse( + io.BytesIO(data), + media_type="application/zip", + headers={ + "Content-Disposition": f'attachment; filename="face_{face_id}.zip"', + "Content-Length": str(len(data)), + }, + ) diff --git a/vendor/Sanad/dashboard/routes/records.py b/vendor/Sanad/dashboard/routes/records.py new file mode 100644 index 0000000..8846e6d --- /dev/null +++ b/vendor/Sanad/dashboard/routes/records.py @@ -0,0 +1,230 @@ +"""Saved records management — list, play, pause, resume, stop, rename, delete. + +Manages WAV recordings saved via the typed replay engine. +""" + +from __future__ import annotations + +import json +import threading +from pathlib import Path +from typing import Any + +from fastapi import APIRouter, HTTPException +from pydantic import BaseModel + +from Project.Sanad.config import AUDIO_RECORDINGS_DIR +from Project.Sanad.dashboard.routes._safe_io import ( + safe_filename, safe_path_under, atomic_write_json, +) + +router = APIRouter() + +RECORDS_INDEX = AUDIO_RECORDINGS_DIR / "records.json" +_INDEX_LOCK = threading.Lock() + + +def _load_index() -> dict[str, Any]: + if not RECORDS_INDEX.exists(): + return {"records": [], "total_records": 0, "last_updated": ""} + try: + with open(RECORDS_INDEX, "r", encoding="utf-8") as f: + return json.load(f) + except (json.JSONDecodeError, OSError): + # Backup corrupt file rather than nuking it + try: + RECORDS_INDEX.rename(RECORDS_INDEX.with_suffix(".json.corrupt")) + except OSError: + pass + return {"records": [], "total_records": 0, "last_updated": ""} + + +def _save_index(payload: dict[str, Any]): + AUDIO_RECORDINGS_DIR.mkdir(parents=True, exist_ok=True) + payload["total_records"] = len(payload.get("records", [])) + atomic_write_json(RECORDS_INDEX, payload) + + +def _resolve_path(path_str: str) -> Path: + """Resolve record path — basename / relative / absolute. + + Legacy records stored absolute paths. New records store basenames. + Both flavors resolve to a real file under AUDIO_RECORDINGS_DIR. + """ + if not path_str: + return AUDIO_RECORDINGS_DIR + p = Path(path_str) + if p.is_absolute(): + return p + return AUDIO_RECORDINGS_DIR / p + + +def _reconcile(payload: dict[str, Any]) -> tuple[dict[str, Any], int]: + kept, removed = [], 0 + for entry in payload.get("records", []): + try: + sp = _resolve_path(entry["files"]["speaker_recording"]["path"]) + rp = _resolve_path(entry["files"]["gemini_raw_output"]["path"]) + if sp.exists() and rp.exists(): + kept.append(entry) + else: + removed += 1 + except (KeyError, TypeError): + removed += 1 + payload["records"] = kept + payload["total_records"] = len(kept) + return payload, removed + + +@router.get("/") +async def list_records(): + with _INDEX_LOCK: + payload = _load_index() + payload, removed = _reconcile(payload) + if removed: + _save_index(payload) + return payload + + +class RecordPlay(BaseModel): + record_name: str + file_kind: str = "speaker" # speaker | raw + + +@router.post("/play") +async def play_record(payload: RecordPlay): + with _INDEX_LOCK: + index = _load_index() + entry = next((r for r in index.get("records", []) if r.get("record_name") == payload.record_name), None) + if entry is None: + raise HTTPException(404, f"Record not found: {payload.record_name}") + + file_key = "speaker_recording" if payload.file_kind == "speaker" else "gemini_raw_output" + raw_path = _resolve_path(entry["files"][file_key]["path"]).resolve() + base = AUDIO_RECORDINGS_DIR.resolve() + try: + raw_path.relative_to(base) + except ValueError: + raise HTTPException(400, "Record path outside recordings directory.") + if not raw_path.exists(): + raise HTTPException(404, f"File not found: {raw_path.name}") + + from Project.Sanad.main import audio_mgr + import asyncio + # Fire-and-forget — play_wav blocks for the clip duration on the G1 + # DDS path, and the dashboard's pause / resume / stop / status calls + # need to be served while it's running. Without this, /play wouldn't + # return until the clip finished and the UI couldn't interact with + # the in-flight playback. + asyncio.create_task(asyncio.to_thread( + audio_mgr.play_wav, raw_path, payload.record_name, + )) + return {"ok": True, "record_name": payload.record_name, + "file_kind": payload.file_kind, "path": str(raw_path)} + + +@router.post("/pause") +async def pause_playback(): + from Project.Sanad.main import audio_mgr + return audio_mgr.pause_playback() + + +@router.post("/resume") +async def resume_playback(): + from Project.Sanad.main import audio_mgr + return audio_mgr.resume_playback() + + +@router.post("/stop") +async def stop_playback(): + from Project.Sanad.main import audio_mgr + import asyncio + await asyncio.to_thread(audio_mgr.stop_playback) + return {"ok": True, "stopped": True} + + +@router.get("/playback-status") +async def playback_status(): + from Project.Sanad.main import audio_mgr + return audio_mgr.playback_status() + + +class RecordRename(BaseModel): + record_name: str + new_name: str + + +@router.post("/rename") +async def rename_record(payload: RecordRename): + new_name = safe_filename(payload.new_name) + # Strip any extension the user provided — we add our own + if new_name.lower().endswith(".wav"): + new_name = new_name[:-4] + if not new_name or new_name.startswith("."): + raise HTTPException(400, "Invalid new name.") + + with _INDEX_LOCK: + index = _load_index() + entry = next( + (r for r in index.get("records", []) if r.get("record_name") == payload.record_name), + None, + ) + if entry is None: + raise HTTPException(404, f"Record not found: {payload.record_name}") + + base = AUDIO_RECORDINGS_DIR.resolve() + for key in ("speaker_recording", "gemini_raw_output"): + try: + old_path = _resolve_path(entry["files"][key]["path"]).resolve() + old_path.relative_to(base) # ensure inside recordings dir + except (KeyError, ValueError): + continue + if not old_path.exists(): + continue + suffix = "_raw.wav" if key == "gemini_raw_output" else ".wav" + new_path = safe_path_under(AUDIO_RECORDINGS_DIR, f"{new_name}{suffix}") + if new_path.exists(): + raise HTTPException(409, f"File already exists: {new_path.name}") + old_path.rename(new_path) + entry["files"][key]["path"] = new_path.name # basename — portable + entry["files"][key]["name"] = new_path.name + + entry["record_name"] = new_name + _save_index(index) + return {"ok": True, "record": entry} + + +class RecordDelete(BaseModel): + record_name: str + + +@router.post("/delete") +async def delete_record(payload: RecordDelete): + with _INDEX_LOCK: + index = _load_index() + kept = [] + deleted_entry = None + for r in index.get("records", []): + if r.get("record_name") == payload.record_name and deleted_entry is None: + deleted_entry = r + else: + kept.append(r) + + if deleted_entry is None: + raise HTTPException(404, f"Record not found: {payload.record_name}") + + base = AUDIO_RECORDINGS_DIR.resolve() + deleted_files = [] + for fi in deleted_entry.get("files", {}).values(): + try: + p = Path(fi.get("path", "")).resolve() + p.relative_to(base) # never delete files outside recordings dir + except (ValueError, OSError): + continue + if p.exists(): + p.unlink() + deleted_files.append(str(p)) + + index["records"] = kept + _save_index(index) + return {"ok": True, "deleted": payload.record_name, "deleted_files": deleted_files} diff --git a/vendor/Sanad/dashboard/routes/replay.py b/vendor/Sanad/dashboard/routes/replay.py new file mode 100644 index 0000000..698bcca --- /dev/null +++ b/vendor/Sanad/dashboard/routes/replay.py @@ -0,0 +1,184 @@ +"""Replay management endpoints — JSONL files, teaching, test replay, speed control. + +Mirrors the replay management features from AI_Photographer/Server/photo_server.py. +""" + +from __future__ import annotations + +import asyncio + +from fastapi import APIRouter, HTTPException, UploadFile, File +from fastapi.responses import FileResponse +from pydantic import BaseModel + +from Project.Sanad.config import MOTIONS_DIR +from Project.Sanad.core.logger import get_logger +from Project.Sanad.dashboard.routes._safe_io import ( + safe_path_under, check_upload_size, atomic_write_bytes, +) + +log = get_logger("replay_route") +router = APIRouter() + + +def _block_if_movement_armed(): + """409 when locomotion movement is armed — arm motion (replay / teaching) is + mutually exclusive with walking.""" + try: + from Project.Sanad.main import loco_controller # type: ignore + armed = loco_controller is not None and loco_controller.is_armed() + except HTTPException: + raise + except Exception: + return + if armed: + raise HTTPException( + 409, "Arm actions are disabled while movement is enabled. " + "Disable movement in the Controller tab first.") + + +# -- models -- + +class ReplayRequest(BaseModel): + name: str + speed: float = 1.0 + +class RenameRequest(BaseModel): + old_name: str + new_name: str + +class TeachRequest(BaseModel): + name: str + duration_sec: float = 15.0 + + +# -- motion file CRUD -- + +@router.get("/files") +async def list_motion_files(): + from Project.Sanad.main import arm + return {"files": arm.list_motion_files()} + + +@router.get("/files/{filename}") +async def download_motion_file(filename: str): + path = safe_path_under(MOTIONS_DIR, filename) + if not path.exists(): + raise HTTPException(404, "File not found.") + return FileResponse(path, filename=path.name, media_type="application/json") + + +@router.post("/files/upload") +async def upload_motion_file(file: UploadFile = File(...)): + if not file.filename or not file.filename.lower().endswith(".jsonl"): + raise HTTPException(400, "Only .jsonl files accepted.") + MOTIONS_DIR.mkdir(parents=True, exist_ok=True) + dest = safe_path_under(MOTIONS_DIR, file.filename) + content = await file.read() + check_upload_size(content) + atomic_write_bytes(dest, content) + return {"ok": True, "name": dest.name, "size_bytes": len(content)} + + +@router.post("/files/rename") +async def rename_motion_file(payload: RenameRequest): + old = safe_path_under(MOTIONS_DIR, payload.old_name) + new = safe_path_under(MOTIONS_DIR, payload.new_name) + if not old.exists(): + raise HTTPException(404, f"File not found: {payload.old_name}") + if new.exists(): + raise HTTPException(409, f"File already exists: {payload.new_name}") + old.rename(new) + return {"ok": True, "old_name": old.name, "new_name": new.name} + + +@router.delete("/files/{filename}") +async def delete_motion_file(filename: str): + path = safe_path_under(MOTIONS_DIR, filename) + if not path.exists(): + raise HTTPException(404, "File not found.") + path.unlink() + return {"ok": True, "deleted": path.name} + + +# -- test replay -- + +_BG_TASKS: set[asyncio.Task] = set() + + +@router.post("/test") +async def test_replay(payload: ReplayRequest): + """Test-play a motion file at the given speed.""" + from Project.Sanad.main import arm + _block_if_movement_armed() + if arm.is_busy: + raise HTTPException(409, "Arm is busy.") + path = safe_path_under(MOTIONS_DIR, payload.name) + if not path.exists(): + raise HTTPException(404, f"Motion file not found: {path.name}") + + async def _run(): + try: + await asyncio.to_thread(arm.replay_file, str(path), payload.speed) + except Exception: + log.exception("Test replay failed") + + task = asyncio.create_task(_run()) + _BG_TASKS.add(task) + task.add_done_callback(_BG_TASKS.discard) + return {"ok": True, "name": path.name, "speed": payload.speed} + + +@router.post("/cancel") +async def cancel_replay(): + """Stop the current replay — the smooth return-to-home runs as the + final phase of the replay itself. + + Matches g1_replay_v4_stable.py's behaviour: the play loop breaks on + the cancel flag, then the same Run() function executes its + return-home ramp + DisableSDK. No separate scheduling needed. + """ + from Project.Sanad.main import arm + arm.cancel() + return {"ok": True, "message": "Cancelled — returning to home pose smoothly."} + + +@router.get("/status") +async def replay_status(): + from Project.Sanad.main import arm, teacher + return { + "arm": arm.status(), + "teaching": teacher.status() if teacher else {}, + } + + +# -- teaching mode -- + +@router.post("/teach/start") +async def start_teaching(payload: TeachRequest): + from Project.Sanad.main import teacher + if teacher is None: + raise HTTPException(503, "Teaching module not available.") + _block_if_movement_armed() + if teacher.is_recording: + raise HTTPException(409, "Teaching session already active.") + existing = MOTIONS_DIR / f"{payload.name}.jsonl" + if existing.exists(): + raise HTTPException(409, f"Motion file already exists: {payload.name}.jsonl") + return teacher.start(payload.name, payload.duration_sec) + + +@router.post("/teach/stop") +async def stop_teaching(): + from Project.Sanad.main import teacher + if teacher is None: + raise HTTPException(503, "Teaching module not available.") + return teacher.stop() + + +@router.get("/teach/status") +async def teaching_status(): + from Project.Sanad.main import teacher + if teacher is None: + return {"recording": False, "phase": "idle"} + return teacher.status() diff --git a/vendor/Sanad/dashboard/routes/scripts.py b/vendor/Sanad/dashboard/routes/scripts.py new file mode 100644 index 0000000..b4c65a5 --- /dev/null +++ b/vendor/Sanad/dashboard/routes/scripts.py @@ -0,0 +1,120 @@ +"""Script/prompt file management — CRUD for sanad_script.txt, sanad_rule.txt, etc.""" + +from __future__ import annotations + +from datetime import datetime +from pathlib import Path + +from fastapi import APIRouter, HTTPException +from pydantic import BaseModel + +from Project.Sanad.config import SCRIPTS_DIR +from Project.Sanad.dashboard.routes._safe_io import ( + atomic_write_text, MAX_UPLOAD_BYTES, +) + +router = APIRouter() + +MAX_SCRIPT_BYTES = MAX_UPLOAD_BYTES + + +def _safe_path(name: str) -> Path: + cleaned = name.strip() + if not cleaned or "/" in cleaned or "\\" in cleaned or cleaned in {".", ".."}: + raise HTTPException(400, "Invalid script name.") + path = (SCRIPTS_DIR / cleaned).resolve() + if not str(path).startswith(str(SCRIPTS_DIR.resolve())): + raise HTTPException(400, "Path traversal denied.") + return path + + +@router.get("/") +async def list_scripts(): + SCRIPTS_DIR.mkdir(parents=True, exist_ok=True) + items = [] + for p in sorted(SCRIPTS_DIR.iterdir(), key=lambda x: x.name.lower()): + if not p.is_file(): + continue + st = p.stat() + items.append({ + "name": p.name, + "size_bytes": st.st_size, + "modified_at": datetime.fromtimestamp(st.st_mtime).isoformat(timespec="seconds"), + }) + return {"path": str(SCRIPTS_DIR), "files": items} + + +class ScriptLoad(BaseModel): + name: str + +@router.post("/load") +async def load_script(payload: ScriptLoad): + path = _safe_path(payload.name) + if not path.exists(): + raise HTTPException(404, f"Script not found: {payload.name}") + content = path.read_text(encoding="utf-8-sig") + st = path.stat() + return { + "name": path.name, + "content": content, + "size_bytes": st.st_size, + "modified_at": datetime.fromtimestamp(st.st_mtime).isoformat(timespec="seconds"), + } + + +class ScriptSave(BaseModel): + name: str + content: str + +@router.post("/save") +async def save_script(payload: ScriptSave): + if len(payload.content.encode("utf-8")) > MAX_SCRIPT_BYTES: + raise HTTPException(413, f"Content too large (max {MAX_SCRIPT_BYTES} bytes).") + path = _safe_path(payload.name) + SCRIPTS_DIR.mkdir(parents=True, exist_ok=True) + atomic_write_text(path, payload.content) + return {"ok": True, "name": path.name, "size_bytes": path.stat().st_size} + + +class ScriptCreate(BaseModel): + name: str + content: str = "" + +@router.post("/create") +async def create_script(payload: ScriptCreate): + if len(payload.content.encode("utf-8")) > MAX_SCRIPT_BYTES: + raise HTTPException(413, f"Content too large (max {MAX_SCRIPT_BYTES} bytes).") + path = _safe_path(payload.name) + if path.exists(): + raise HTTPException(409, f"File already exists: {payload.name}") + SCRIPTS_DIR.mkdir(parents=True, exist_ok=True) + atomic_write_text(path, payload.content) + return {"ok": True, "name": path.name} + + +class ScriptRename(BaseModel): + old_name: str + new_name: str + +@router.post("/rename") +async def rename_script(payload: ScriptRename): + old = _safe_path(payload.old_name) + new = _safe_path(payload.new_name) + if not old.exists(): + raise HTTPException(404, f"Not found: {payload.old_name}") + if new.exists(): + raise HTTPException(409, f"Already exists: {payload.new_name}") + old.rename(new) + return {"ok": True, "old_name": payload.old_name, "new_name": new.name} + + +class ScriptDelete(BaseModel): + name: str + +@router.post("/delete") +async def delete_script(payload: ScriptDelete): + path = _safe_path(payload.name) + if not path.exists(): + raise HTTPException(404, f"Not found: {payload.name}") + path.unlink() + return {"ok": True, "deleted": payload.name} diff --git a/vendor/Sanad/dashboard/routes/skills.py b/vendor/Sanad/dashboard/routes/skills.py new file mode 100644 index 0000000..693215c --- /dev/null +++ b/vendor/Sanad/dashboard/routes/skills.py @@ -0,0 +1,101 @@ +"""Skill registry CRUD endpoints + skill execution.""" + +from __future__ import annotations + +from fastapi import APIRouter, HTTPException, UploadFile, File +from pydantic import BaseModel + +from Project.Sanad.config import AUDIO_RECORDINGS_DIR +from Project.Sanad.dashboard.routes._safe_io import ( + safe_path_under, check_upload_size, atomic_write_bytes, +) + +router = APIRouter() + + +class SkillCreate(BaseModel): + id: str = "" + audio_file: str = "" + motion_file: str = "" + callback: str = "" + sync_mode: str = "parallel" + enabled: bool = True + description: str = "" + + +class SkillUpdate(BaseModel): + audio_file: str | None = None + motion_file: str | None = None + callback: str | None = None + sync_mode: str | None = None + enabled: bool | None = None + description: str | None = None + + +@router.get("/") +async def list_skills(): + from Project.Sanad.main import brain + return {"skills": brain.registry.list_skills()} + + +@router.get("/{skill_id}") +async def get_skill(skill_id: str): + from Project.Sanad.main import brain + skill = brain.registry.get(skill_id) + if skill is None: + raise HTTPException(404, f"Skill not found: {skill_id}") + return skill.to_dict() + + +@router.post("/") +async def create_skill(payload: SkillCreate): + from Project.Sanad.main import brain + from Project.Sanad.core.skill_registry import Skill + try: + skill = Skill(**payload.model_dump()) + created = brain.registry.add(skill) + except ValueError as exc: + raise HTTPException(400, str(exc)) + return {"ok": True, "skill": created.to_dict()} + + +@router.put("/{skill_id}") +async def update_skill(skill_id: str, payload: SkillUpdate): + from Project.Sanad.main import brain + updates = {k: v for k, v in payload.model_dump().items() if v is not None} + try: + updated = brain.registry.update(skill_id, updates) + except ValueError as exc: + raise HTTPException(400, str(exc)) + if updated is None: + raise HTTPException(404, f"Skill not found: {skill_id}") + return {"ok": True, "skill": updated.to_dict()} + + +@router.delete("/{skill_id}") +async def delete_skill(skill_id: str): + from Project.Sanad.main import brain + deleted = brain.registry.delete(skill_id) + if not deleted: + raise HTTPException(404, f"Skill not found: {skill_id}") + return {"ok": True, "deleted": deleted} + + +@router.post("/{skill_id}/execute") +async def execute_skill(skill_id: str): + from Project.Sanad.main import brain + result = await brain.execute_skill(skill_id) + return result + + +@router.post("/upload-audio") +async def upload_audio(file: UploadFile = File(...)): + """Upload a .wav file for skill binding.""" + if not file.filename or not file.filename.lower().endswith(".wav"): + raise HTTPException(400, "Only .wav files are accepted.") + AUDIO_RECORDINGS_DIR.mkdir(parents=True, exist_ok=True) + dest = safe_path_under(AUDIO_RECORDINGS_DIR, file.filename) + content = await file.read() + check_upload_size(content) + atomic_write_bytes(dest, content) + return {"ok": True, "path": str(dest), "size_bytes": len(content)} diff --git a/vendor/Sanad/dashboard/routes/system.py b/vendor/Sanad/dashboard/routes/system.py new file mode 100644 index 0000000..4839579 --- /dev/null +++ b/vendor/Sanad/dashboard/routes/system.py @@ -0,0 +1,133 @@ +"""System information endpoints — network, subsystems, dashboard binding.""" + +from __future__ import annotations + +import asyncio +import os +import platform +import socket +import sys +from typing import Any + +from fastapi import APIRouter + +from Project.Sanad.config import ( + BASE_DIR, + DASHBOARD_HOST, + DASHBOARD_INTERFACE, + DASHBOARD_PORT, + DDS_NETWORK_INTERFACE, + list_network_interfaces, +) +from Project.Sanad.core.logger import get_logger + +log = get_logger("system_route") + +router = APIRouter() + + +def _safe_status(component, name: str) -> dict[str, Any]: + if component is None: + return {"available": False} + try: + if hasattr(component, "status") and callable(component.status): + s = component.status() + if not isinstance(s, dict): + s = {"raw": str(s)} + s.setdefault("available", True) + return s + return {"available": True} + except Exception as exc: + log.warning("status() failed for %s: %s", name, exc) + return {"available": True, "error": str(exc)} + + +@router.get("/info") +async def system_info(): + """One-shot system snapshot for the dashboard system panel.""" + def _do(): + # Subsystems + try: + from Project.Sanad.main import SUBSYSTEMS + except Exception: + SUBSYSTEMS = {} + + subsystem_list = [] + for name in sorted(SUBSYSTEMS): + comp = SUBSYSTEMS[name] + entry = { + "name": name, + "connected": comp is not None, + } + if comp is not None and hasattr(comp, "status") and callable(comp.status): + try: + s = comp.status() + if isinstance(s, dict): + entry["status"] = s + except Exception as exc: + entry["status_error"] = str(exc) + subsystem_list.append(entry) + + connected_count = sum(1 for s in subsystem_list if s["connected"]) + + # Audio device current selection (best-effort) + audio_info = {} + try: + from Project.Sanad.voice import audio_devices as ad + audio_info = { + "pactl_available": ad.pactl_available(), + "current": ad.current_selection(), + "detected_profile_ids": [ + d["profile"]["id"] for d in ad.detect_plugged_profiles() + ] if ad.pactl_available() else [], + } + except Exception as exc: + audio_info = {"error": str(exc)} + + # Network interfaces + try: + interfaces = list_network_interfaces() + except Exception: + interfaces = [] + + # Determine the URL the dashboard is reachable at + bound_host = DASHBOARD_HOST + if bound_host == "0.0.0.0": + # Try to find the wlan0 IP for display purposes + up_ifaces = [i for i in interfaces if i["is_up"] and i["ip"] and not i["ip"].startswith("127.")] + display_host = up_ifaces[0]["ip"] if up_ifaces else bound_host + else: + display_host = bound_host + + return { + "host": { + "hostname": socket.gethostname(), + "platform": platform.platform(), + "python": sys.version.split()[0], + "executable": sys.executable, + "base_dir": str(BASE_DIR), + "pid": os.getpid(), + }, + "dashboard": { + "interface": DASHBOARD_INTERFACE, + "bound_host": bound_host, + "display_host": display_host, + "port": DASHBOARD_PORT, + "url": f"http://{display_host}:{DASHBOARD_PORT}", + }, + "dds": { + "interface": DDS_NETWORK_INTERFACE, + }, + "network": { + "interfaces": interfaces, + }, + "subsystems": { + "total": len(subsystem_list), + "connected": connected_count, + "disconnected": len(subsystem_list) - connected_count, + "list": subsystem_list, + }, + "audio": audio_info, + } + + return await asyncio.to_thread(_do) diff --git a/vendor/Sanad/dashboard/routes/temp_monitor.py b/vendor/Sanad/dashboard/routes/temp_monitor.py new file mode 100644 index 0000000..266e7b4 --- /dev/null +++ b/vendor/Sanad/dashboard/routes/temp_monitor.py @@ -0,0 +1,67 @@ +"""REST endpoints backing the 3D motor-temperature dashboard (N1). + +Serves the motor name/mesh mapping + thresholds, and a one-shot temperature +snapshot (the front-end's initial fetch fallback). The live stream is over +/ws/motor-temps (dashboard/websockets/motor_temps.py). The 3D view itself is +the static page at /static/temp3d/index.html. +""" + +from __future__ import annotations + +import time + +from fastapi import APIRouter + +from Project.Sanad.dashboard.temp_motor_map import ( + MOTOR_NAMES, + MOTOR_TO_MESH, + TEMP_HOT_THRESHOLD, + TEMP_MAX, + TEMP_MIN, + TEMP_WARM_THRESHOLD, + build_payload, +) + +router = APIRouter() + + +def _get_arm(): + """Lazy import — avoids a circular import on dashboard load.""" + try: + from Project.Sanad.main import arm # type: ignore + return arm + except Exception: + return None + + +@router.get("/mapping") +async def motor_mapping(): + """Motor id → name / mesh map + the temperature gradient thresholds.""" + return { + "motor_names": MOTOR_NAMES, + "motor_to_mesh": MOTOR_TO_MESH, + "thresholds": { + "min": TEMP_MIN, + "max": TEMP_MAX, + "warm": TEMP_WARM_THRESHOLD, + "hot": TEMP_HOT_THRESHOLD, + }, + } + + +@router.get("/motors") +async def motors_snapshot(): + """One-shot motor temperature + position snapshot (Marcus payload shape).""" + arm = _get_arm() + temps: list = [] + positions: list = [] + if arm is not None: + try: + temps = arm.get_motor_temps() + except Exception: + temps = [] + try: + positions = arm.get_current_q() + except Exception: + positions = [] + return build_payload(temps, positions, time.time()) diff --git a/vendor/Sanad/dashboard/routes/typed_replay.py b/vendor/Sanad/dashboard/routes/typed_replay.py new file mode 100644 index 0000000..efc0c44 --- /dev/null +++ b/vendor/Sanad/dashboard/routes/typed_replay.py @@ -0,0 +1,146 @@ +"""Typed Replay dashboard endpoints. + +Full CRUD over the records index: + POST /say generate + play + optionally record + POST /replay-last re-play cached audio + POST /save-last persist cached generation + GET /records list + GET /records/{name} get one + POST /records/{name}/play play saved WAV (speaker or raw) + POST /records/{name}/rename rename + DELETE /records/{name} delete + GET /status engine + session state +""" + +from __future__ import annotations + +import asyncio +from typing import Literal + +from fastapi import APIRouter, HTTPException +from pydantic import BaseModel + +from Project.Sanad.core.config_loader import section as _cfg_section +router = APIRouter() + +# MAX_TEXT_LEN — SINGLE SOURCE in dashboard.api_input +MAX_TEXT_LEN = _cfg_section("dashboard", "api_input").get("max_text_len", 2000) + + +class SayPayload(BaseModel): + text: str + record: bool = False + record_name: str = "" + + +class SaveLastPayload(BaseModel): + record_name: str = "" + + +class RenamePayload(BaseModel): + new_name: str + + +class PlayRecordPayload(BaseModel): + file_kind: Literal["speaker", "raw"] = "speaker" + + +def _engine(): + from Project.Sanad.main import typed_replay + if typed_replay is None: + raise HTTPException(503, "TypedReplayEngine not initialized.") + return typed_replay + + +# ───────────────────── generate / replay ───────────────────── + +@router.post("/say") +async def say(payload: SayPayload): + if not payload.text or not payload.text.strip(): + raise HTTPException(400, "text cannot be empty") + if len(payload.text) > MAX_TEXT_LEN: + raise HTTPException(413, f"text too long (max {MAX_TEXT_LEN})") + eng = _engine() + try: + return await eng.say(payload.text, record=payload.record, + record_name=payload.record_name) + except ValueError as exc: + raise HTTPException(400, str(exc)) + except RuntimeError as exc: + raise HTTPException(503, str(exc)) + + +@router.post("/replay-last") +async def replay_last(): + eng = _engine() + try: + return await asyncio.to_thread(eng.replay_last) + except RuntimeError as exc: + raise HTTPException(400, str(exc)) + + +@router.post("/save-last") +async def save_last(payload: SaveLastPayload): + eng = _engine() + try: + return {"ok": True, "record": eng.save_last(payload.record_name)} + except RuntimeError as exc: + raise HTTPException(400, str(exc)) + + +# ───────────────────── record CRUD ─────────────────────────── + +@router.get("/records") +async def list_records(): + return _engine().list_records() + + +@router.get("/records/{name}") +async def get_record(name: str): + try: + return _engine().find_record(name) + except KeyError: + raise HTTPException(404, f"record not found: {name}") + + +@router.post("/records/{name}/play") +async def play_record(name: str, payload: PlayRecordPayload): + eng = _engine() + try: + return await asyncio.to_thread(eng.play_record, name, payload.file_kind) + except KeyError: + raise HTTPException(404, f"record not found: {name}") + except FileNotFoundError as exc: + raise HTTPException(410, f"file missing on disk: {exc}") + except RuntimeError as exc: + raise HTTPException(503, str(exc)) + + +@router.post("/records/{name}/rename") +async def rename_record(name: str, payload: RenamePayload): + eng = _engine() + try: + return {"ok": True, "record": eng.rename_record(name, payload.new_name)} + except KeyError: + raise HTTPException(404, f"record not found: {name}") + except ValueError as exc: + raise HTTPException(400, str(exc)) + + +@router.delete("/records/{name}") +async def delete_record(name: str): + eng = _engine() + try: + return {"ok": True, **eng.delete_record(name)} + except KeyError: + raise HTTPException(404, f"record not found: {name}") + + +# ───────────────────── status ──────────────────────────────── + +@router.get("/status") +async def status(): + from Project.Sanad.main import typed_replay + if typed_replay is None: + return {"available": False} + return {"available": True, **typed_replay.status()} diff --git a/vendor/Sanad/dashboard/routes/voice.py b/vendor/Sanad/dashboard/routes/voice.py new file mode 100644 index 0000000..a3d1b88 --- /dev/null +++ b/vendor/Sanad/dashboard/routes/voice.py @@ -0,0 +1,237 @@ +"""Voice endpoints — Gemini interaction, local TTS, prompt management.""" + +from __future__ import annotations + +import asyncio + +from fastapi import APIRouter, HTTPException +from pydantic import BaseModel + +from Project.Sanad.core.config_loader import section as _cfg_section +from Project.Sanad.core.logger import get_logger + +log = get_logger("voice_route") + +router = APIRouter() + +_VR = _cfg_section("dashboard", "voice_route") +_API = _cfg_section("dashboard", "api_input") +# MAX_TEXT_LEN — SINGLE SOURCE in dashboard.api_input +MAX_TEXT_LEN = _API.get("max_text_len", 2000) +_API_KEY_MASK_VISIBLE = _VR.get("api_key_mask_visible", 4) + + +def _mask_api_key(key: str) -> str: + """Mask an API key for display — keeps 4 chars on each end. + + Examples: + "" → "" + "AIza123" → "*******" (≤8 chars = full mask) + "AIzaSy...kqf8" → "AIza***...kqf8" (>8 chars = partial mask) + """ + if not key: + return "" + if len(key) <= 8: + return "*" * len(key) + return f"{key[:4]}{'*' * (len(key) - 8)}{key[-4:]}" + + +class TextPayload(BaseModel): + text: str + engine: str = "gemini" # "gemini" | "local" + + +@router.get("/status") +async def voice_status(): + from Project.Sanad.main import voice_client, local_tts + return { + "gemini": voice_client.status() if voice_client else {}, + "local_tts": local_tts.status() if local_tts else {}, + } + + +@router.post("/generate") +async def generate_speech(payload: TextPayload): + """Generate speech from text using Gemini or local TTS.""" + if not payload.text.strip(): + raise HTTPException(400, "Text cannot be empty.") + if len(payload.text) > MAX_TEXT_LEN: + raise HTTPException(413, f"Text too long (max {MAX_TEXT_LEN} chars).") + + from Project.Sanad.main import voice_client, local_tts, audio_mgr + + if payload.engine == "local": + if local_tts is None: + raise HTTPException(503, "Local TTS not available.") + pcm = await asyncio.to_thread(local_tts.synthesize, payload.text) + if audio_mgr: + await asyncio.to_thread(audio_mgr.play_pcm, pcm, 1, 16000, 2) + return { + "ok": True, + "engine": "local", + "duration_sec": round(len(pcm) / (16000 * 2), 3), + } + else: + if voice_client is None: + raise HTTPException(503, "Voice client not initialized.") + if not voice_client.connected: + try: + await voice_client.connect() + except Exception: + log.exception("Gemini reconnect failed in /generate") + raise HTTPException(503, "Gemini not connected and reconnect failed.") + # Check session ownership — TypedReplay or live loop may hold it + if voice_client.session_owner is not None: + raise HTTPException( + 409, + f"Voice session busy (owned by {voice_client.session_owner})", + ) + try: + audio_bytes, text_parts = await voice_client.send_text( + payload.text, owner="voice_route" + ) + except RuntimeError as exc: + raise HTTPException(503, str(exc)) + except Exception as exc: + raise HTTPException(502, f"Gemini communication error: {exc}") + if audio_bytes and audio_mgr: + await asyncio.to_thread(audio_mgr.play_pcm, audio_bytes, 1, 24000, 2) + return { + "ok": True, + "engine": "gemini", + "has_audio": bool(audio_bytes), + "text_response": text_parts, + } + + +@router.post("/connect") +async def connect_gemini(): + from Project.Sanad.main import voice_client + if voice_client is None: + raise HTTPException(503, "Voice client not initialized.") + try: + await voice_client.connect() + except Exception as exc: + raise HTTPException(502, f"Gemini connection failed: {exc}") + return {"connected": voice_client.connected} + + +@router.post("/disconnect") +async def disconnect_gemini(): + from Project.Sanad.main import voice_client + if voice_client: + await voice_client.disconnect() + return {"connected": False} + + +# ─────────────────────── Gemini API key management ─────────────────────── + +class ApiKeyPayload(BaseModel): + api_key: str + + +@router.get("/api-key") +async def get_api_key(): + """Return the current Gemini API key in masked form. + + Never returns the full key. Response: + { + "has_key": true, + "masked": "AIza***...kqf8", + "length": 39, + "source": "config_file" | "default" + } + """ + import Project.Sanad.config as cfg_mod + key = getattr(cfg_mod, "GEMINI_API_KEY", "") or "" + # Detect where the value came from (persisted override vs module default) + try: + from Project.Sanad.config import load_config + stored = load_config().get("gemini", {}) or {} + source = "config_file" if stored.get("api_key") else "default" + except Exception: + source = "default" + return { + "has_key": bool(key), + "masked": _mask_api_key(key), + "length": len(key), + "source": source, + } + + +@router.post("/api-key") +async def update_api_key(payload: ApiKeyPayload): + """Update the Gemini API key — persists to data/motions/config.json and + hot-swaps the in-memory value so the next Gemini connect uses it. + + Also disconnects any currently-connected Gemini session so that the + next reconnect picks up the new key cleanly. Returns the NEW masked + key + a flag telling the dashboard to trigger a reconnect. + """ + key = payload.api_key.strip() + if not key: + raise HTTPException(400, "API key cannot be empty.") + if len(key) < 20: + raise HTTPException(400, "API key looks too short.") + if not key.startswith("AIza"): + raise HTTPException( + 400, + "Gemini API keys normally start with 'AIza'. " + "Double-check you're pasting a Google AI Studio key.", + ) + + # Persist to data/motions/config.json (atomic temp-then-replace) + try: + from Project.Sanad.config import load_config, save_config + cfg = load_config() or {} + gemini_cfg = cfg.get("gemini") if isinstance(cfg.get("gemini"), dict) else {} + gemini_cfg["api_key"] = key + cfg["gemini"] = gemini_cfg + save_config(cfg) + except Exception as exc: + log.exception("Failed to persist API key to config.json") + raise HTTPException(500, f"Could not save config: {exc}") + + # Hot-swap the in-memory module globals. + # Both Project.Sanad.config AND Project.Sanad.gemini.client + # have their OWN reference to GEMINI_API_KEY (the latter was created + # at `from Project.Sanad.config import GEMINI_API_KEY` at import time). + # Python's `from X import Y` binds a local name — updating config.Y + # alone does NOT propagate to the importer, so we must patch both. + try: + import Project.Sanad.config as _cfg_mod + _cfg_mod.GEMINI_API_KEY = key + except Exception: + log.exception("could not patch config.GEMINI_API_KEY") + + try: + import Project.Sanad.gemini.client as _gc + _gc.GEMINI_API_KEY = key + except Exception: + log.exception("could not patch gemini.client.GEMINI_API_KEY") + + # Disconnect any live session so reconnect uses the new key. + from Project.Sanad.main import voice_client + was_connected = False + if voice_client is not None: + was_connected = bool(getattr(voice_client, "connected", False)) + if was_connected: + try: + await voice_client.disconnect() + except Exception: + log.exception("disconnect during api-key swap failed") + + log.info("Gemini API key updated (length=%d) source=config_file", len(key)) + + return { + "ok": True, + "masked": _mask_api_key(key), + "length": len(key), + "source": "config_file", + "was_connected": was_connected, + "message": ( + "API key saved. Click 'Connect' to reopen the Gemini session with " + "the new key. Any running Live Gemini subprocess must be restarted " + "separately (Stop → Start) to pick up the new key." + ), + } diff --git a/vendor/Sanad/dashboard/routes/wake_phrases.py b/vendor/Sanad/dashboard/routes/wake_phrases.py new file mode 100644 index 0000000..66b65a2 --- /dev/null +++ b/vendor/Sanad/dashboard/routes/wake_phrases.py @@ -0,0 +1,72 @@ +"""Wake-phrase CRUD endpoints. + +Lets the dashboard edit the wake-phrase → action mapping stored in +data/wake_phrases.json. +""" + +from __future__ import annotations + +from typing import Optional + +from fastapi import APIRouter, HTTPException +from pydantic import BaseModel + +router = APIRouter() + + +class WakePhrasePayload(BaseModel): + phrase: str + action_id: str + + +class EnablePayload(BaseModel): + phrase: str + action_id: str + enabled: bool + + +def _mgr(): + from Project.Sanad.main import wake_mgr + if wake_mgr is None: + raise HTTPException(503, "WakePhraseManager not initialized.") + return wake_mgr + + +@router.get("/") +async def list_phrases(): + m = _mgr() + return { + "status": m.status(), + "phrases": m.list(), + } + + +@router.post("/") +async def add_phrase(payload: WakePhrasePayload): + m = _mgr() + try: + entry = m.add(payload.phrase, payload.action_id) + except ValueError as exc: + raise HTTPException(400, str(exc)) + return {"ok": True, "entry": entry} + + +@router.delete("/") +async def remove_phrase(phrase: str, action_id: Optional[str] = None): + m = _mgr() + removed = m.remove(phrase, action_id) + return {"ok": True, "removed": removed} + + +@router.post("/enable") +async def set_enabled(payload: EnablePayload): + m = _mgr() + ok = m.set_enabled(payload.phrase, payload.action_id, payload.enabled) + if not ok: + raise HTTPException(404, "phrase+action_id not found") + return {"ok": True} + + +@router.get("/status") +async def status(): + return _mgr().status() diff --git a/vendor/Sanad/dashboard/routes/zones.py b/vendor/Sanad/dashboard/routes/zones.py new file mode 100644 index 0000000..395077b --- /dev/null +++ b/vendor/Sanad/dashboard/routes/zones.py @@ -0,0 +1,421 @@ +"""Zones tab — zone → place → linked-faces management + "go here" destination. + +Hierarchy (replaces the old flat places): + Zone (name + description) + └─ Place (name + description + optional reference photos + linked face ids) + +Routes live under /api/zones. Toggle + CRUD changes write +data/.recognition_state.json (the SAME file faces use); the Gemini child polls +it at 1 Hz and re-primes / announces mid-session. The "go here" endpoints set a +navigation target the robot will head to once N2 locomotion is wired — for now +they just record the target and feed Gemini the place's reference. +""" + +from __future__ import annotations + +import io +from typing import Optional + +from fastapi import APIRouter, File, HTTPException, Query, UploadFile +from fastapi.responses import FileResponse, StreamingResponse +from pydantic import BaseModel + +from Project.Sanad.config import BASE_DIR +from Project.Sanad.core.logger import get_logger +from Project.Sanad.dashboard.routes._safe_io import check_upload_size +from Project.Sanad.vision import recognition_state + +log = get_logger("zones_routes") + +router = APIRouter() + +STATE_PATH = BASE_DIR / "data" / ".recognition_state.json" + + +# ── lazy subsystem accessors ──────────────────────────────── + +def _get_camera(): + try: + from Project.Sanad.main import camera # type: ignore + return camera + except Exception: + return None + + +def _get_zone_gallery(): + try: + from Project.Sanad.main import zone_gallery # type: ignore + return zone_gallery + except Exception: + return None + + +def _get_face_gallery(): + try: + from Project.Sanad.main import gallery # type: ignore + return gallery + except Exception: + return None + + +def _require_zones(): + g = _get_zone_gallery() + if g is None: + raise HTTPException(503, "Zone gallery subsystem unavailable.") + return g + + +def _bump_zones_version() -> int: + cur = recognition_state.read(STATE_PATH) + v = cur.zones_version + 1 + recognition_state.mutate(STATE_PATH, zones_version=v) + return v + + +def _validate_image(content: bytes, filename: str | None = None) -> None: + check_upload_size(content) + if len(content) < 16: + raise HTTPException(400, "Image too small / empty.") + if not (content[:3] == b"\xff\xd8\xff" or content[:8] == b"\x89PNG\r\n\x1a\n"): + raise HTTPException(400, f"Only JPEG/PNG accepted (got {filename or 'unknown'}).") + + +def _safe_photo_name(name: str) -> None: + if "/" in name or ".." in name or "\x00" in name: + raise HTTPException(400, "Invalid photo name.") + + +def _resolve_faces(face_ids: list[int]) -> list[dict]: + """Turn linked face ids into [{id, name}] using the face gallery.""" + g = _get_face_gallery() + out = [] + for fid in face_ids: + name = None + if g is not None: + try: + e = g.get(fid) + name = e.name if e else None + except Exception: + name = None + out.append({"id": fid, "name": name}) + return out + + +def _place_to_dict(p) -> dict: + d = p.to_dict() + d["faces"] = _resolve_faces(p.face_ids) + return d + + +def _zone_to_dict(z) -> dict: + return { + "id": z.id, "name": z.name, "description": z.description, + "added_at": z.added_at, + "places": [_place_to_dict(p) for p in z.places], + } + + +def _nav_target_dict(st, gallery) -> Optional[dict]: + zid, pid = st.nav_target_zone_id, st.nav_target_place_id + if not zid or not pid: + return None + zone_name = place_name = None + if gallery is not None: + try: + z = gallery.get_zone(zid) + zone_name = z.name if z else None + p = gallery.get_place(zid, pid) + place_name = p.name if p else None + except Exception: + pass + return {"zone_id": zid, "place_id": pid, + "zone_name": zone_name, "place_name": place_name} + + +# ── state + toggle ────────────────────────────────────────── + +@router.get("/state") +async def get_state(): + st = recognition_state.read(STATE_PATH) + g = _get_zone_gallery() + zones_count = places_count = 0 + if g is not None: + try: + zones = g.list_zones() + zones_count = len(zones) + places_count = sum(len(z.places) for z in zones) + except Exception: + pass + return { + "zone_rec_enabled": st.zone_rec_enabled, + "zones_version": st.zones_version, + "zones_count": zones_count, + "places_count": places_count, + "nav_target": _nav_target_dict(st, g), + } + + +@router.post("/zone-rec") +async def set_zone_rec(on: bool = Query(...)): + """Enable / disable the robot's knowledge of zones & places (hot).""" + st = recognition_state.mutate(STATE_PATH, zone_rec_enabled=bool(on)) + log.info("zone recognition %s", "ON" if on else "OFF") + return {"ok": True, "zone_rec_enabled": st.zone_rec_enabled} + + +@router.post("/sync") +async def sync_zones(): + v = _bump_zones_version() + log.info("zones sync requested → v.%d", v) + return {"ok": True, "zones_version": v} + + +# ── zones CRUD ────────────────────────────────────────────── + +class NamePayload(BaseModel): + name: Optional[str] = None + + +class DescribePayload(BaseModel): + description: Optional[str] = None + + +class FacesPayload(BaseModel): + face_ids: list[int] = [] + + +@router.get("") +async def list_zones(): + g = _require_zones() + zones = g.list_zones() + return {"zones": [_zone_to_dict(z) for z in zones], "total": len(zones)} + + +@router.post("/create") +async def create_zone(name: Optional[str] = Query(default=None), + description: Optional[str] = Query(default=None)): + g = _require_zones() + if not (name or "").strip() and not (description or "").strip(): + raise HTTPException(400, "A zone needs at least a name or a description.") + z = g.create_zone(name=name, description=description) + _bump_zones_version() + return {"ok": True, "zone": _zone_to_dict(z)} + + +@router.post("/{zone_id}/rename") +async def rename_zone(zone_id: int, payload: NamePayload): + g = _require_zones() + try: + g.rename_zone(zone_id, payload.name) + except FileNotFoundError as exc: + raise HTTPException(404, str(exc)) + _bump_zones_version() + return {"ok": True, "zone": _zone_to_dict(g.get_zone(zone_id))} + + +@router.post("/{zone_id}/describe") +async def describe_zone(zone_id: int, payload: DescribePayload): + g = _require_zones() + try: + g.describe_zone(zone_id, payload.description) + except FileNotFoundError as exc: + raise HTTPException(404, str(exc)) + _bump_zones_version() + return {"ok": True, "zone": _zone_to_dict(g.get_zone(zone_id))} + + +@router.delete("/{zone_id}") +async def delete_zone(zone_id: int): + g = _require_zones() + try: + g.delete_zone(zone_id) + except FileNotFoundError as exc: + raise HTTPException(404, str(exc)) + # If the active destination was inside this zone, clear it. + st = recognition_state.read(STATE_PATH) + if st.nav_target_zone_id == zone_id: + recognition_state.mutate(STATE_PATH, nav_target_zone_id=0, nav_target_place_id=0) + _bump_zones_version() + return {"ok": True, "deleted": zone_id} + + +# ── places CRUD (within a zone) ───────────────────────────── + +@router.post("/{zone_id}/places/create") +async def create_place( + zone_id: int, + name: Optional[str] = Query(default=None), + description: Optional[str] = Query(default=None), + face_ids: list[int] = Query(default=[]), + files: Optional[list[UploadFile]] = File(default=None), +): + g = _require_zones() + if g.get_zone(zone_id) is None: + raise HTTPException(404, f"zone_{zone_id} not found") + if not (name or "").strip() and not (description or "").strip(): + raise HTTPException(400, "A place needs at least a name or a description.") + image_bytes: list[bytes] = [] + for f in (files or []): + content = await f.read() + if not content: + continue + _validate_image(content, f.filename) + image_bytes.append(content) + p = g.create_place(zone_id, name=name, description=description, + face_ids=face_ids, image_bytes_list=image_bytes or None) + _bump_zones_version() + return {"ok": True, "place": _place_to_dict(p)} + + +@router.post("/{zone_id}/places/{place_id}/rename") +async def rename_place(zone_id: int, place_id: int, payload: NamePayload): + g = _require_zones() + try: + g.rename_place(zone_id, place_id, payload.name) + except FileNotFoundError as exc: + raise HTTPException(404, str(exc)) + _bump_zones_version() + return {"ok": True, "place": _place_to_dict(g.get_place(zone_id, place_id))} + + +@router.post("/{zone_id}/places/{place_id}/describe") +async def describe_place(zone_id: int, place_id: int, payload: DescribePayload): + g = _require_zones() + try: + g.describe_place(zone_id, place_id, payload.description) + except FileNotFoundError as exc: + raise HTTPException(404, str(exc)) + _bump_zones_version() + return {"ok": True, "place": _place_to_dict(g.get_place(zone_id, place_id))} + + +@router.post("/{zone_id}/places/{place_id}/faces") +async def set_place_faces(zone_id: int, place_id: int, payload: FacesPayload): + """Replace the set of saved faces linked to this place.""" + g = _require_zones() + try: + g.set_place_faces(zone_id, place_id, payload.face_ids) + except FileNotFoundError as exc: + raise HTTPException(404, str(exc)) + _bump_zones_version() + return {"ok": True, "place": _place_to_dict(g.get_place(zone_id, place_id))} + + +@router.post("/{zone_id}/places/{place_id}/capture") +async def capture_to_place(zone_id: int, place_id: int): + g = _require_zones() + cam = _get_camera() + if cam is None or not cam.is_running(): + raise HTTPException(409, "Camera is not running. Toggle Vision ON first.") + jpeg = cam.get_fresh_frame(max_age_s=0.5, timeout_s=1.5) + if not jpeg: + raise HTTPException(409, "Camera has no frame yet.") + try: + fname = g.add_photo(zone_id, place_id, jpeg) + except FileNotFoundError as exc: + raise HTTPException(404, str(exc)) + _bump_zones_version() + return {"ok": True, "added": fname, "place": _place_to_dict(g.get_place(zone_id, place_id))} + + +@router.post("/{zone_id}/places/{place_id}/upload") +async def upload_to_place(zone_id: int, place_id: int, + files: list[UploadFile] = File(...)): + g = _require_zones() + if g.get_place(zone_id, place_id) is None: + raise HTTPException(404, f"zone_{zone_id}/place_{place_id} not found") + added: list[str] = [] + for f in files: + content = await f.read() + _validate_image(content, f.filename) + try: + added.append(g.add_photo(zone_id, place_id, content)) + except FileNotFoundError as exc: + raise HTTPException(404, str(exc)) + _bump_zones_version() + return {"ok": True, "added": added, "place": _place_to_dict(g.get_place(zone_id, place_id))} + + +@router.delete("/{zone_id}/places/{place_id}") +async def delete_place(zone_id: int, place_id: int): + g = _require_zones() + try: + g.delete_place(zone_id, place_id) + except FileNotFoundError as exc: + raise HTTPException(404, str(exc)) + st = recognition_state.read(STATE_PATH) + if st.nav_target_zone_id == zone_id and st.nav_target_place_id == place_id: + recognition_state.mutate(STATE_PATH, nav_target_zone_id=0, nav_target_place_id=0) + _bump_zones_version() + return {"ok": True, "deleted": place_id} + + +@router.delete("/{zone_id}/places/{place_id}/photo/{photo_name}") +async def delete_place_photo(zone_id: int, place_id: int, photo_name: str): + g = _require_zones() + _safe_photo_name(photo_name) + try: + g.delete_photo(zone_id, place_id, photo_name) + except FileNotFoundError as exc: + raise HTTPException(404, str(exc)) + _bump_zones_version() + return {"ok": True, "deleted": photo_name} + + +@router.get("/{zone_id}/places/{place_id}/photo/{photo_name}") +async def get_place_photo(zone_id: int, place_id: int, photo_name: str, + download: int = Query(default=0)): + g = _require_zones() + _safe_photo_name(photo_name) + path = g.get_photo(zone_id, place_id, photo_name) + if path is None: + raise HTTPException(404, "Photo not found.") + media = "image/png" if path.suffix.lower() == ".png" else "image/jpeg" + headers = {} + if download: + headers["Content-Disposition"] = ( + f'attachment; filename="zone_{zone_id}_place_{place_id}_{photo_name}"') + return FileResponse(path, media_type=media, headers=headers) + + +@router.get("/{zone_id}/places/{place_id}/download.zip") +async def download_place_zip(zone_id: int, place_id: int): + g = _require_zones() + try: + data = g.zip_place(zone_id, place_id) + except FileNotFoundError as exc: + raise HTTPException(404, str(exc)) + return StreamingResponse( + io.BytesIO(data), media_type="application/zip", + headers={ + "Content-Disposition": f'attachment; filename="zone_{zone_id}_place_{place_id}.zip"', + "Content-Length": str(len(data)), + }, + ) + + +# ── "go here" navigation target ───────────────────────────── + +@router.post("/{zone_id}/places/{place_id}/go") +async def go_to_place(zone_id: int, place_id: int): + """Set this place as the active destination. Records the target and lets + the Gemini child pick it up (reference photo + goal). Actual robot motion + is wired by N2 locomotion — until then this just establishes the goal.""" + g = _require_zones() + p = g.get_place(zone_id, place_id) + if p is None: + raise HTTPException(404, f"zone_{zone_id}/place_{place_id} not found") + recognition_state.mutate(STATE_PATH, + nav_target_zone_id=zone_id, + nav_target_place_id=place_id) + log.info("nav target set → zone_%d/place_%d (%s)", zone_id, place_id, + p.name or "(unnamed)") + return {"ok": True, "nav_target": {"zone_id": zone_id, "place_id": place_id, + "place_name": p.name}} + + +@router.post("/nav/clear") +async def clear_nav_target(): + recognition_state.mutate(STATE_PATH, nav_target_zone_id=0, nav_target_place_id=0) + log.info("nav target cleared") + return {"ok": True, "nav_target": None} diff --git a/vendor/Sanad/dashboard/static/index.html b/vendor/Sanad/dashboard/static/index.html new file mode 100644 index 0000000..754fac6 --- /dev/null +++ b/vendor/Sanad/dashboard/static/index.html @@ -0,0 +1,2347 @@ + + + + + + Sanad Dashboard + + + +
+ + +
+

Sanad Dashboard

+
+ + + + + + Connecting... + +
+
+ + +
+ CAM + FACE + PLACE + MOVE +
+ + +
+
Operations
+
Voice & Audio
+
Motion & Replay
+
Controller
+
Recognition
+
Recordings
+
Temperature
+
Terminal
+
Settings & Logs
+
+ + +
+
+ + +
+

Quick Voice

+
+ +
+ + + +
+
+
+ + +
+

System Info

+
+
Loading...
+
+
+ Network interfaces +
+
+
+ Subsystems (connected / disconnected) +
+
+
+ + +
+

Audio Control

+
+

+

+
+
+
+ + +
+
+ + + +
+
+ Persisted in data/motions/config.json · applies live via DDS +
+
+
+ +
+ + + + + +
+
+
+
+ Manual sink / source override +
+ +
+
+ +
+
+ +
+
+
+
+ + +
+

Quick Actions

+
+ + + + +
+ +
+
+
+ +
+
+ + +
+
+ + +
+

Live Voice Commands

+
+ + + + + +
+
+
+ + +
+
+ + +
+
+
+
Last heard: --
+
Pending action: --
+
Audio attached: -- | Arm attached: -- | Gemini: --
+
+
+
+ + +
+
+
+ + +
+

Live Gemini Process

+
+ + + + + + +
+
+
State: --
+
User: --
+
+
+ + +
+
+
+ + +
+

Gemini API Key

+
+ The key used by GeminiVoiceClient and the Live Gemini subprocess. + Saved to data/motions/config.json. Get a free key at + aistudio.google.com/app/apikey. +
+
+ + + + +
+
+ + + + +
+
+
+ + +
+

Typed Replay Engine

+
+
+ +
+ + + +
+
+ + + +
+
+
+ +
+
+
+
+ + +
+

Wake Phrase Manager

+
+
+
+ + +
+
+ +
+
+ + +
+
🔒 Arm actions are disabled while movement is enabled (Controller tab). Disable movement to replay / trigger / teach.
+
+ + +
+

Motion Control

+
+ + + + + + +
+
+
+
+ + +
+
+
+
+
+ + +
+
+
+
+
+ + +
+

Replay Manager

+
+
+ +
+
+ + +
+
+
+ +
+ + + + +
+ +
+ + + + +
+
+
+
+
+ + +
+

Macro Recorder (Audio + Motion)

+ + + +
+ + + +
+ + + +
+
+
Voice (WAV)
+ +
+
+
Motion (JSONL)
+ +
+
+
Speed
+ +
+
+ + + +
+
+ +
+
+
+ +
+
+ + +
+
+ + +
+

Camera Vision & Face Recognition

+
+
+ + + -- +
+
+ + + -- +
+ +
+
+ Toggles take effect within ~1 second on the running Gemini session — no restart required. +
+
+ + +
+

Live Preview

+
+ +
Camera off — toggle Vision ON to see the live feed.
+
+
--
+
+
Resolution / FPS
+
+ + + + + + +
+
JPEG Quality
+
+ + + +
+
+
+ Each button rebuilds the capture pipeline (~0.5 s). Modes match the + RealSense D435I colour sensor — on USB 2.x, stick to 424×240 or 640×480. + If the feed is grayscale/IR, pin the colour node with SANAD_CAMERA_USB_INDEX. +
+
+ + +
+

Add New Face

+
+ + +
+
+ + +
+
+ + +
+
+ Tip: add 2–3 photos / different angles per person for best recognition. + The description is sent to Gemini with the photos — it can then greet + and talk about the person using what you wrote. +
+
+ + +
+

Enrolled Faces

+
+ + +
+
Loading…
+
+ + +
+

Zones & Places

+
+
+ + + -- +
+ + +
+
+ Destination: + none + +
+
+ Group locations into zones, add places inside each (name + description + + optional reference photos), and link saved faces to a place. “Go here” sets a + destination and shows Gemini the place — the robot drives there once movement + (locomotion) is enabled. +
+
+ + +
+

Add New Zone

+
+ + + +
+
+ + +
+

Zones

+
+ +
+
Loading…
+
+ +
+
+ + +
+
+ +
+
+ Live motor surface/winding temperatures from rt/lowstate on the full + G1 (29 DOF). Blue ≈ 30°C → red ≈ 120°C. Drag to orbit, scroll to zoom. + Streamed over /ws/motor-temps — no second DDS subscriber. +
+
+ + +
+ + +
+
+
+ FSM — + unknown + MSC — + SDK — +
+
+ + + +
+
+ +
+ CAM + FACE + PLACE + MOVE + GEMINI-MOVE + EXPLORE · soon +
+
+ Manual operator control. Robot is assumed standing in walking mode — use Ready/Start only if needed. + All controls below are locked until Enable movement is on; E-STOP always works. + While movement is on, arm replays/actions are disabled (and vice-versa). +
+
+ +
+ +
+

Locomotion / Teleop

+
Discrete step pad
+
+ + + + + + + + + +
+
+ + +
+
vx 0.00 · vy 0.00 · ω 0.00
+
W/S forward·back · Q/E strafe · A/D rotate · Space halt
+
+ + +
+

Postures & Modes

+
+ + + + + + + + + + +
+
+ + +
+

MotionSwitcher / Low-Level

+
+ + + + + + +
+
+ + +
+

Diagnostics — joints 12–28

+

+    
+
+
+ + +
+
+ + +
+

Skill Registry

+
No skills configured
+ +
+ + +
+

Saved Records

+ + +
No records saved
+ +
+ +
+
+ + + +
+ + + +
+
+

Terminal — unitree@robot

+ disconnected + + + +
+
+ Runs as the dashboard's user on the robot (typically unitree). No SSH handshake — the dashboard is already on the robot. Works on whichever Wi-Fi the robot is connected to. +
+
+
+
+ + +
+
+ + +
+

Scripts Manager

+
+ +
+ + + + +
+
+ + +
+

Prompt Management

+
+ +
+ + +
+
+ + +
+

Live Logs

+
+ + + + + +
+
+
+ +
+
+ + + + diff --git a/vendor/Sanad/dashboard/temp_motor_map.py b/vendor/Sanad/dashboard/temp_motor_map.py new file mode 100644 index 0000000..281951e --- /dev/null +++ b/vendor/Sanad/dashboard/temp_motor_map.py @@ -0,0 +1,90 @@ +"""G1 29-DoF motor → name / mesh mapping for the 3D temperature dashboard. + +Ported verbatim from Marcus/Features/TempMonitor/config_g1.py so the copied +three.js front-end (static/temp3d/index.html) binds temperature colours to the +correct STL meshes. `build_payload()` turns the arm controller's raw lowstate +snapshot into the exact 'motor_update' payload shape that front-end expects. +""" + +from __future__ import annotations + +from typing import Any, Optional + +# Motor ID → human name (29 motors = 29 DOF) +MOTOR_NAMES: dict[int, str] = { + 0: "Left Hip Pitch", 1: "Left Hip Roll", 2: "Left Hip Yaw", 3: "Left Knee", + 4: "Left Ankle Pitch", 5: "Left Ankle Roll", + 6: "Right Hip Pitch", 7: "Right Hip Roll", 8: "Right Hip Yaw", 9: "Right Knee", + 10: "Right Ankle Pitch", 11: "Right Ankle Roll", + 12: "Waist Yaw", 13: "Waist Roll", 14: "Waist Pitch", + 15: "Left Shoulder Pitch", 16: "Left Shoulder Roll", 17: "Left Shoulder Yaw", + 18: "Left Elbow", 19: "Left Wrist Roll", 20: "Left Wrist Pitch", 21: "Left Wrist Yaw", + 22: "Right Shoulder Pitch", 23: "Right Shoulder Roll", 24: "Right Shoulder Yaw", + 25: "Right Elbow", 26: "Right Wrist Roll", 27: "Right Wrist Pitch", 28: "Right Wrist Yaw", +} + +# Motor ID → URDF link / STL mesh name +MOTOR_TO_MESH: dict[int, str] = { + 0: "left_hip_pitch_link", 1: "left_hip_roll_link", 2: "left_hip_yaw_link", + 3: "left_knee_link", 4: "left_ankle_pitch_link", 5: "left_ankle_roll_link", + 6: "right_hip_pitch_link", 7: "right_hip_roll_link", 8: "right_hip_yaw_link", + 9: "right_knee_link", 10: "right_ankle_pitch_link", 11: "right_ankle_roll_link", + 12: "waist_yaw_link", 13: "waist_roll_link", 14: "torso_link", + 15: "left_shoulder_pitch_link", 16: "left_shoulder_roll_link", 17: "left_shoulder_yaw_link", + 18: "left_elbow_link", 19: "left_wrist_roll_link", 20: "left_wrist_pitch_link", + 21: "left_wrist_yaw_link", + 22: "right_shoulder_pitch_link", 23: "right_shoulder_roll_link", 24: "right_shoulder_yaw_link", + 25: "right_elbow_link", 26: "right_wrist_roll_link", 27: "right_wrist_pitch_link", + 28: "right_wrist_yaw_link", +} + +# Temperature thresholds (°C) — the three.js gradient maps MIN→MAX (blue→red). +TEMP_MIN = 30 +TEMP_MAX = 120 +TEMP_WARM_THRESHOLD = 45 +TEMP_HOT_THRESHOLD = 60 + + +def _coerce(v: Optional[int]) -> float: + """Temperatures default to 0 when the firmware didn't report one, so the + front-end's Math.max / .toFixed never sees null/NaN.""" + return float(v) if v is not None else 0.0 + + +def build_payload(temps: list[dict[str, Any]], + positions: list[float], + timestamp: float) -> dict[str, Any]: + """Build the Marcus-compatible 'motor_update' payload. + + `temps` — arm.get_motor_temps(): [{motor_id, surface, winding}] + `positions` — arm.get_current_q(): joint angles indexed by motor id + """ + temperatures: list[dict[str, Any]] = [] + for t in temps or []: + i = t.get("motor_id") + surface = t.get("surface") + winding = t.get("winding") + if surface is not None and winding is not None: + avg = (_coerce(surface) + _coerce(winding)) / 2.0 + else: + avg = _coerce(surface if surface is not None else winding) + entry: dict[str, Any] = { + "motor_id": i, + "motor_name": MOTOR_NAMES.get(i, f"Motor {i}"), + "mesh_name": MOTOR_TO_MESH.get(i, ""), + "surface": _coerce(surface), + "winding": _coerce(winding), + "temp1": _coerce(surface), + "temp2": _coerce(winding), + "avg": avg, + } + if positions and isinstance(i, int) and i < len(positions): + entry["position"] = float(positions[i]) + temperatures.append(entry) + + pos_list: list[dict[str, Any]] = [ + {"motor_id": i, "position": float(q), "link_name": MOTOR_TO_MESH.get(i)} + for i, q in enumerate(positions or []) + ] + return {"temperatures": temperatures, "positions": pos_list, + "timestamp": timestamp} diff --git a/vendor/Sanad/dashboard/websockets/__init__.py b/vendor/Sanad/dashboard/websockets/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/vendor/Sanad/dashboard/websockets/log_stream.py b/vendor/Sanad/dashboard/websockets/log_stream.py new file mode 100644 index 0000000..17b9af3 --- /dev/null +++ b/vendor/Sanad/dashboard/websockets/log_stream.py @@ -0,0 +1,80 @@ +"""WebSocket endpoint for real-time log streaming. + +Clients connect to /ws/logs and receive live log lines from all modules. +""" + +from __future__ import annotations + +import asyncio +import threading +from collections import deque + +from fastapi import APIRouter, WebSocket, WebSocketDisconnect + +from Project.Sanad.core.logger import set_ws_push + +router = APIRouter() + +MAX_WATCHERS = 50 + +# Ring buffer of recent log lines (shared across connections). +_recent: deque[str] = deque(maxlen=500) +_watchers: set[asyncio.Queue] = set() +_watchers_lock = threading.Lock() + + +def push_log_line(line: str): + """Called from the logging system to feed new lines. + + May be called from any thread (logging is multi-threaded), so we + snapshot the watchers under a lock before iterating. + """ + _recent.append(line) + with _watchers_lock: + snapshot = list(_watchers) + for q in snapshot: + try: + q.put_nowait(line) + except asyncio.QueueFull: + # Drop on overflow rather than block — logs are not critical data + pass + + +# Register with the logger so all log records are pushed to WS clients. +# Wrap so a logger registration failure doesn't break Dashboard import. +try: + set_ws_push(push_log_line) +except Exception: + pass + + +@router.websocket("/ws/logs") +async def log_ws(ws: WebSocket): + await ws.accept() + + with _watchers_lock: + if len(_watchers) >= MAX_WATCHERS: + await ws.close(code=1013, reason="Too many log watchers") + return + queue: asyncio.Queue[str] = asyncio.Queue(maxsize=200) + _watchers.add(queue) + + try: + # Send recent history + for line in list(_recent): + await ws.send_text(line) + + while True: + line = await queue.get() + await ws.send_text(line) + except WebSocketDisconnect: + pass + except Exception: + # Any other error closes the connection cleanly + try: + await ws.close() + except Exception: + pass + finally: + with _watchers_lock: + _watchers.discard(queue) diff --git a/vendor/Sanad/dashboard/websockets/motor_temps.py b/vendor/Sanad/dashboard/websockets/motor_temps.py new file mode 100644 index 0000000..c70e32f --- /dev/null +++ b/vendor/Sanad/dashboard/websockets/motor_temps.py @@ -0,0 +1,81 @@ +"""WebSocket endpoint streaming G1 motor temperatures to the 3D dashboard (N1). + +Polls the arm controller's throttled rt/lowstate snapshot (arm.get_motor_temps +/ arm.get_current_q — NO second DDS subscriber, no second ChannelFactoryInitialize) +and pushes a Marcus-compatible 'motor_update' payload to each connected client. + +Front-end: dashboard/static/temp3d/index.html (ported three.js view), which +opens this socket via a tiny shim in place of socket.io. +""" + +from __future__ import annotations + +import asyncio +import threading +import time + +from fastapi import APIRouter, WebSocket, WebSocketDisconnect + +from Project.Sanad.core.logger import get_logger +from Project.Sanad.dashboard.temp_motor_map import build_payload + +log = get_logger("motor_temps_ws") + +router = APIRouter() + +MAX_WATCHERS = 20 +PUSH_HZ = 8.0 # ~8 fps is plenty for a temperature heatmap + +_count = 0 +_count_lock = threading.Lock() + + +def _get_arm(): + """Lazy import — avoids a circular import on dashboard load.""" + try: + from Project.Sanad.main import arm # type: ignore + return arm + except Exception: + return None + + +@router.websocket("/ws/motor-temps") +async def motor_temps_ws(ws: WebSocket): + await ws.accept() + + global _count + with _count_lock: + if _count >= MAX_WATCHERS: + await ws.close(code=1013, reason="Too many temperature watchers") + return + _count += 1 + + period = 1.0 / PUSH_HZ + try: + while True: + arm = _get_arm() + temps: list = [] + positions: list = [] + if arm is not None: + try: + temps = arm.get_motor_temps() + except Exception: + temps = [] + try: + positions = arm.get_current_q() + except Exception: + positions = [] + payload = build_payload(temps, positions, time.time()) + await ws.send_json(payload) + await asyncio.sleep(period) + except WebSocketDisconnect: + pass + except Exception: + # Any other error (client gone mid-send, serialise issue) closes cleanly. + try: + await ws.close() + except Exception: + pass + finally: + with _count_lock: + _count -= 1 diff --git a/vendor/Sanad/dashboard/websockets/terminal.py b/vendor/Sanad/dashboard/websockets/terminal.py new file mode 100644 index 0000000..af27838 --- /dev/null +++ b/vendor/Sanad/dashboard/websockets/terminal.py @@ -0,0 +1,323 @@ +"""WebSocket → PTY bridge for the dashboard's Terminal tab. + +Spawns a shell (bash by default) inside a pseudo-terminal on the robot and +relays stdin/stdout to a browser xterm.js instance over WebSocket. From the +operator's seat this is functionally identical to an in-browser +`ssh unitree@` — except no SSH handshake is needed because the +dashboard process already runs as unitree on the robot. The Terminal tab +connects to ws:///ws/terminal and you land in unitree's shell +directly. + +PROTOCOL — text frames only. Control vs. keystrokes are disambiguated by +the leading byte: + client → server: + "\\x1f" + json-encoded control object (init / resize) + e.g. "\\x1f{\\"type\\":\\"init\\",\\"cols\\":80,\\"rows\\":24}" + keystrokes — written to PTY + server → client: + PTY stdout/stderr chunks + +The \\x1f prefix (ASCII Unit Separator) is the disambiguator. If we just +JSON-sniffed every message, a user pasting `{"type":"resize",...}` into +their shell would silently resize the PTY instead of pasting the text. + +SECURITY NOTE: anyone who can reach the dashboard URL gets shell access +as the unitree user. The dashboard already exposes equally-powerful +endpoints (E-STOP, motion replay, audio mute, etc.) so this isn't a new +threat class — but it IS a single-bullet kill switch for the robot. Bind +the dashboard to a trusted network only. +""" + +from __future__ import annotations + +import asyncio +import fcntl +import json +import os +import pty +import select +import shutil +import signal +import struct +import termios +import threading + +from fastapi import APIRouter, WebSocket, WebSocketDisconnect + +from Project.Sanad.core.logger import get_logger + +log = get_logger("terminal_ws") + +router = APIRouter() + +# Magic prefix that distinguishes control messages from raw keystrokes. +# ASCII 0x1F (Unit Separator) — not produced by normal keyboard input, +# so user-pasted JSON can never spoof a control frame. +_CTRL_PREFIX = "\x1f" + +# Concurrent-session cap so a runaway tab can't spawn 50 bashes on the robot. +_MAX_SESSIONS = 4 +_active: set[int] = set() +_active_lock = threading.Lock() + +# Bounded queue depth between the PTY reader thread and the WS sender. +# A chatty shell command (e.g. `yes`, `cat /dev/urandom`) at gigabytes/sec +# would otherwise pile up unbounded asyncio tasks + string refs. Past the +# cap we drop chunks and surface a single drop notice — ANSI may corrupt +# briefly but the session stays alive. +_SEND_QUEUE_MAX = 64 + + +def _resolve_shell() -> list[str]: + """Pick a sensible shell. SHELL env first, then /bin/bash, then sh.""" + sh = os.environ.get("SHELL", "") + if sh and shutil.which(sh): + return [sh, "-i"] + if shutil.which("/bin/bash"): + return ["/bin/bash", "-i"] + return ["/bin/sh", "-i"] + + +def _set_pty_size(fd: int, cols: int, rows: int) -> None: + """Inform the PTY of its new window size so curses-style apps (htop, + less, vim) lay out correctly.""" + try: + # TIOCSWINSZ payload: rows, cols, xpixel, ypixel (xpixel/ypixel + # unused, kept 0). + fcntl.ioctl(fd, termios.TIOCSWINSZ, + struct.pack("HHHH", rows, cols, 0, 0)) + except Exception as exc: + log.debug("TIOCSWINSZ failed (cols=%s rows=%s): %s", cols, rows, exc) + + +async def _reap_child(pid: int) -> None: + """SIGHUP → wait briefly → SIGKILL → wait briefly → giveup. + + Earlier version SIGKILLed unconditionally because the WNOHANG check + happened immediately after SIGHUP (which never returns true that fast). + Now we poll for up to ~1.5s after SIGHUP before escalating. + """ + async def _wait_exit(timeout_s: float, interval_s: float = 0.1) -> bool: + end = asyncio.get_running_loop().time() + timeout_s + while asyncio.get_running_loop().time() < end: + try: + done_pid, _ = os.waitpid(pid, os.WNOHANG) + except ChildProcessError: + return True # already reaped + except OSError: + return False + if done_pid: + return True + await asyncio.sleep(interval_s) + return False + + # 1. Polite request + try: + os.kill(pid, signal.SIGHUP) + except ProcessLookupError: + return + except OSError as exc: + log.debug("SIGHUP pid=%d: %s", pid, exc) + return + + if await _wait_exit(1.5): + return + + # 2. Force + try: + os.kill(pid, signal.SIGKILL) + except ProcessLookupError: + return + except OSError as exc: + log.debug("SIGKILL pid=%d: %s", pid, exc) + return + + if not await _wait_exit(1.0): + log.warning("terminal child pid=%d failed to exit after SIGKILL", pid) + + +@router.websocket("/ws/terminal") +async def terminal_ws(ws: WebSocket) -> None: + """Bridge a browser xterm.js to a shell PTY on the robot.""" + await ws.accept() + + # Concurrent-session guard. + with _active_lock: + if len(_active) >= _MAX_SESSIONS: + await ws.send_text( + f"\r\n[terminal] Refused — already have {_MAX_SESSIONS} " + f"open sessions. Close another tab and reconnect.\r\n" + ) + await ws.close(code=1008) + return + + # Fork + exec the shell. Parent gets the master fd; child becomes the + # shell with stdin/stdout/stderr wired to the slave end. + cmd = _resolve_shell() + try: + pid, fd = pty.fork() + except OSError as exc: + log.error("pty.fork failed: %s", exc) + await ws.send_text(f"\r\n[terminal] pty.fork failed: {exc}\r\n") + await ws.close(code=1011) + return + + if pid == 0: + # CHILD — set env so the shell is interactive and looks right. + os.environ.setdefault("TERM", "xterm-256color") + os.environ.setdefault("LANG", os.environ.get("LANG", "en_US.UTF-8")) + try: + os.execvp(cmd[0], cmd) + except OSError as exc: + # exec failed — printing to fd 2 reaches the parent via the + # PTY so the browser sees the error before we _exit. + os.write(2, f"[terminal] exec failed: {exc}\n".encode()) + os._exit(127) + return # unreachable in child + + # PARENT + with _active_lock: + _active.add(pid) + log.info("terminal session started pid=%d cmd=%s", pid, cmd[0]) + + loop = asyncio.get_running_loop() + closed = asyncio.Event() + # Bounded queue + dedicated sender task = backpressure. If the queue + # fills up we drop the chunk and bump _dropped so we can surface a + # short notice in the stream. + send_q: asyncio.Queue[str] = asyncio.Queue(maxsize=_SEND_QUEUE_MAX) + dropped = 0 + + def _reader_thread() -> None: + """Drain PTY master fd → queue. Runs in a daemon thread because + select.select on a pipe blocks; asyncio has no portable + equivalent for arbitrary fds on Windows (and we want one code + path).""" + nonlocal dropped + try: + while not closed.is_set(): + try: + r, _, _ = select.select([fd], [], [], 0.1) + except (OSError, ValueError): + break + if not r: + continue + try: + data = os.read(fd, 4096) + except OSError: + break + if not data: # EOF — child exited / PTY closed + break + try: + text = data.decode("utf-8", errors="replace") + except Exception: + continue + # put_nowait raises on full — we drop and count. + try: + loop.call_soon_threadsafe(_enqueue, text) + except RuntimeError: + # loop closed — bail + break + finally: + loop.call_soon_threadsafe(closed.set) + + def _enqueue(text: str) -> None: + nonlocal dropped + try: + send_q.put_nowait(text) + except asyncio.QueueFull: + dropped += 1 + + async def _sender_task() -> None: + """Drains send_q → WebSocket. Single producer, single consumer + means no extra locking needed. Backoff on send failure and let + the closed flag end the session.""" + nonlocal dropped + while not closed.is_set(): + try: + text = await asyncio.wait_for(send_q.get(), timeout=0.5) + except asyncio.TimeoutError: + continue + try: + await ws.send_text(text) + except Exception as exc: + log.info("terminal ws.send failed (likely client gone): %s", exc) + closed.set() + return + # If we dropped chunks since the last successful send, tell + # the user once so the ANSI corruption isn't mysterious. + if dropped: + d = dropped + dropped = 0 + try: + await ws.send_text( + f"\r\n\x1b[2m[term: dropped {d} chunk(s) — slow client]" + f"\x1b[0m\r\n", + ) + except Exception: + closed.set() + return + + reader = threading.Thread(target=_reader_thread, daemon=True, + name=f"terminal-rx-{pid}") + reader.start() + sender = asyncio.create_task(_sender_task()) + + # Initial sizing — xterm.js will send a {type:"init",...} control + # frame right after onopen with the actual window size. + _set_pty_size(fd, 80, 24) + + try: + while not closed.is_set(): + try: + msg = await asyncio.wait_for(ws.receive_text(), timeout=0.5) + except asyncio.TimeoutError: + continue + except WebSocketDisconnect: + break + + if not msg: + continue + + # Control frame? Must start with the magic prefix. User-typed + # / pasted text can never spoof this — \x1f isn't producible + # by normal keyboard input. + if msg[:1] == _CTRL_PREFIX: + try: + ctrl = json.loads(msg[1:]) + except (json.JSONDecodeError, ValueError): + ctrl = None + if isinstance(ctrl, dict) and ctrl.get("type") in ("init", "resize"): + cols = int(ctrl.get("cols") or 80) + rows = int(ctrl.get("rows") or 24) + _set_pty_size(fd, cols, rows) + # Either way, control frames are NEVER forwarded to PTY. + continue + + # Plain keystrokes — write to PTY master. + try: + os.write(fd, msg.encode("utf-8", errors="replace")) + except OSError as exc: + log.info("terminal pty write failed (child likely exited): %s", exc) + break + finally: + closed.set() + try: + sender.cancel() + except Exception: + pass + try: + await _reap_child(pid) + except Exception as exc: + log.debug("reap_child pid=%d: %s", pid, exc) + try: + os.close(fd) + except OSError: + pass + with _active_lock: + _active.discard(pid) + log.info("terminal session ended pid=%d", pid) + try: + await ws.close() + except Exception: + pass diff --git a/vendor/Sanad/data/audio/.gitkeep b/vendor/Sanad/data/audio/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/vendor/Sanad/data/audio_device.json b/vendor/Sanad/data/audio_device.json new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/vendor/Sanad/data/audio_device.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/vendor/Sanad/data/camera_device.json b/vendor/Sanad/data/camera_device.json new file mode 100644 index 0000000..7930d09 --- /dev/null +++ b/vendor/Sanad/data/camera_device.json @@ -0,0 +1,5 @@ +{ + "profile_serial_assignments": { + "realsense_primary": "" + } +} \ No newline at end of file diff --git a/vendor/Sanad/data/faces/.gitkeep b/vendor/Sanad/data/faces/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/vendor/Sanad/data/motions/config.json b/vendor/Sanad/data/motions/config.json new file mode 100644 index 0000000..40a0170 --- /dev/null +++ b/vendor/Sanad/data/motions/config.json @@ -0,0 +1,21 @@ +{ + "gemini": { + "api_key": "", + "model": "models/gemini-2.5-flash-native-audio-preview-12-2025", + "voice_name": "Charon" + }, + "audio": { + "send_sample_rate": 16000, + "receive_sample_rate": 24000, + "chunk_size": 512, + "g1_volume": 100 + }, + "motion": { + "action_cooldown_sec": 1.0, + "replay_hz": 60.0 + }, + "dashboard": { + "host": "0.0.0.0", + "port": 8000 + } +} \ No newline at end of file diff --git a/vendor/Sanad/data/photos/.gitkeep b/vendor/Sanad/data/photos/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/vendor/Sanad/data/recordings/.gitkeep b/vendor/Sanad/data/recordings/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/vendor/Sanad/examples/voice_example.py b/vendor/Sanad/examples/voice_example.py new file mode 100644 index 0000000..92bf2cd --- /dev/null +++ b/vendor/Sanad/examples/voice_example.py @@ -0,0 +1,136 @@ +#!/usr/bin/env python3 +"""voice_example.py — demos for each voice subsystem in isolation. + +Each subcommand exercises one component so you can debug pieces without +running the full Sanad stack. + +Usage: + python3 voice_example.py gemini "hello" # one-shot Gemini text→audio + python3 voice_example.py local_tts "hello" # local Coqui TTS + python3 voice_example.py typed_replay "hello" # typed replay engine + python3 voice_example.py live # spawn GeminiSubprocess + python3 voice_example.py status # show status of all subsystems + +Assumes Project.Sanad is importable (run from repo root or with PYTHONPATH set). +""" + +from __future__ import annotations + +import argparse +import asyncio +import sys + + +def _demo_gemini(text: str) -> None: + """One-shot: connect Gemini, send text, play reply.""" + from Project.Sanad.gemini.client import GeminiVoiceClient + from Project.Sanad.voice.audio_manager import AudioManager + + async def run(): + client = GeminiVoiceClient() + audio = AudioManager() + await client.connect() + try: + audio_bytes, text_parts = await client.send_text(text, owner="example") + print(f"[gemini] got {len(audio_bytes)} bytes audio, text={text_parts}") + if audio_bytes: + await asyncio.to_thread(audio.play_pcm, audio_bytes, 1, 24000, 2) + finally: + await client.disconnect() + + asyncio.run(run()) + + +def _demo_local_tts(text: str) -> None: + """Synthesize with local Coqui TTS and play.""" + from Project.Sanad.voice.local_tts import LocalTTSEngine + from Project.Sanad.voice.audio_manager import AudioManager + + tts = LocalTTSEngine() + audio = AudioManager() + pcm = tts.synthesize(text) + print(f"[local_tts] generated {len(pcm)} bytes") + audio.play_pcm(pcm, 1, 16000, 2) + + +def _demo_typed_replay(text: str) -> None: + """Exercise the TypedReplayEngine end-to-end.""" + from Project.Sanad.gemini.client import GeminiVoiceClient + from Project.Sanad.voice.audio_manager import AudioManager + from Project.Sanad.voice.typed_replay import TypedReplayEngine + + async def run(): + client = GeminiVoiceClient() + await client.connect() + audio = AudioManager() + engine = TypedReplayEngine(client, audio) + result = await engine.say(text) + print(f"[typed_replay] {result}") + await client.disconnect() + + asyncio.run(run()) + + +def _demo_live() -> None: + """Spawn the live voice subprocess — same as dashboard /api/live-subprocess.""" + from Project.Sanad.gemini.subprocess import GeminiSubprocess + + mgr = GeminiSubprocess() + info = mgr.start() + print(f"[live] {info}") + print("Running. Ctrl+C to stop.") + try: + while True: + import time + time.sleep(1) + except KeyboardInterrupt: + print("\n[live] stopping...") + print(mgr.stop()) + + +def _demo_status() -> None: + """Print status of all voice subsystems.""" + from Project.Sanad.gemini.client import GeminiVoiceClient + try: + from Project.Sanad.voice.local_tts import LocalTTSEngine + except Exception: + LocalTTSEngine = None + + client = GeminiVoiceClient() + print("[gemini]", client.status()) + if LocalTTSEngine: + try: + tts = LocalTTSEngine() + print("[local_tts]", tts.status()) + except Exception as exc: + print(f"[local_tts] unavailable: {exc}") + else: + print("[local_tts] not installed") + + +def main(): + ap = argparse.ArgumentParser(description=__doc__) + sub = ap.add_subparsers(dest="cmd", required=True) + + for name in ("gemini", "local_tts", "typed_replay"): + sp = sub.add_parser(name, help=f"demo {name}") + sp.add_argument("text", help="text to speak") + + sub.add_parser("live", help="spawn live voice subprocess") + sub.add_parser("status", help="print subsystem status") + + args = ap.parse_args() + if args.cmd == "gemini": + _demo_gemini(args.text) + elif args.cmd == "local_tts": + _demo_local_tts(args.text) + elif args.cmd == "typed_replay": + _demo_typed_replay(args.text) + elif args.cmd == "live": + _demo_live() + elif args.cmd == "status": + _demo_status() + + +if __name__ == "__main__": + main() diff --git a/vendor/Sanad/gemini/__init__.py b/vendor/Sanad/gemini/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/vendor/Sanad/gemini/client.py b/vendor/Sanad/gemini/client.py new file mode 100644 index 0000000..b300da0 --- /dev/null +++ b/vendor/Sanad/gemini/client.py @@ -0,0 +1,341 @@ +"""Gemini WebSocket client for real-time voice interaction. + +Provides: + - Bidirectional audio streaming (mic → Gemini → speaker) + - Text-to-speech via typed input + - Voice-command detection through transcription parsing + - System instruction injection for persona control +""" + +from __future__ import annotations + +import asyncio +import base64 +import inspect +import json +from typing import Any + +import websockets + +from Project.Sanad.config import ( + GEMINI_API_KEY, + GEMINI_MODEL, + GEMINI_VOICE, + GEMINI_WS_TIMEOUT, + GEMINI_WS_URI, +) +from Project.Sanad.core.config_loader import section as _cfg_section +from Project.Sanad.core.event_bus import bus +from Project.Sanad.core.logger import get_logger + +log = get_logger("gemini_client") + +_GC = _cfg_section("gemini", "client") +# Default system prompt — SINGLE SOURCE in core.gemini_defaults +_DEFAULT_SYSTEM_PROMPT = _cfg_section("core", "gemini_defaults").get( + "default_system_prompt", + "You are Sanad (Bousandah), a wise and friendly Emirati assistant. " + "Speak in UAE dialect (Khaleeji). Be helpful and concise." +) +_RECV_TIMEOUT_SEC = _GC.get("recv_timeout_sec", 30) +_RECONNECT_MAX_ATTEMPTS = _GC.get("reconnect_max_attempts", 3) +_RECONNECT_INITIAL_DELAY_SEC = _GC.get("reconnect_initial_delay_sec", 1.0) +_RECONNECT_MAX_DELAY_SEC = _GC.get("reconnect_max_delay_sec", 10.0) + + +class GeminiVoiceClient: + """Manages one WebSocket session to the Gemini Bidi audio API. + + Concurrency model: + - `_send_lock` serializes ALL websocket writes. + - `_session_lock` ensures only one consumer (live loop OR typed replay) + owns the receive stream at a time. Acquired by send_text and + receive_stream context managers. + - `_owner` records who currently holds the session lock for diagnostics. + """ + + def __init__(self, system_prompt: str = ""): + self.system_prompt = system_prompt or _DEFAULT_SYSTEM_PROMPT + self._ws: Any = None + self._connected = False + self._send_lock = asyncio.Lock() + self._session_lock = asyncio.Lock() + self._connect_lock = asyncio.Lock() # serializes reconnect attempts + self._owner: str | None = None + self._reconnect_attempts = 0 + + @property + def connected(self) -> bool: + return self._connected + + @property + def session_owner(self) -> str | None: + return self._owner + + def _ws_kwargs(self) -> dict[str, Any]: + kwargs: dict[str, Any] = {"max_size": None, "open_timeout": 30} + try: + sig = inspect.signature(websockets.connect) + key = "extra_headers" if "extra_headers" in sig.parameters else "additional_headers" + except Exception: + key = "extra_headers" + kwargs[key] = {"Content-Type": "application/json"} + return kwargs + + async def connect(self): + uri = f"{GEMINI_WS_URI}?key={GEMINI_API_KEY}" + try: + self._ws = await websockets.connect(uri, **self._ws_kwargs()) + setup = { + "setup": { + "model": GEMINI_MODEL, + "generationConfig": { + "responseModalities": ["AUDIO"], + "speechConfig": { + "voiceConfig": { + "prebuiltVoiceConfig": {"voiceName": GEMINI_VOICE} + } + }, + }, + "systemInstruction": {"parts": [{"text": self.system_prompt}]}, + } + } + await self._ws.send(json.dumps(setup)) + await self._ws.recv() # ACK + self._connected = True + self._reconnect_attempts = 0 + log.info("Connected to Gemini (%s)", GEMINI_MODEL) + await bus.emit("voice.connected") + except Exception: + self._connected = False + self._ws = None + log.exception("Failed to connect to Gemini") + raise + + async def disconnect(self): + try: + if self._ws is not None: + await self._ws.close() + except Exception: + pass + finally: + self._ws = None + self._connected = False + self._owner = None + log.info("Disconnected from Gemini") + await bus.emit("voice.disconnected") + + async def _ensure_connected(self): + """Reconnect if dropped, with bounded retries. + + Serialized via _connect_lock so concurrent callers don't trigger + duplicate handshakes. + """ + # Fast path — no lock needed + if self._connected and self._ws is not None: + return True + + async with self._connect_lock: + # Re-check inside the lock (another coroutine may have just connected) + if self._connected and self._ws is not None: + return True + + max_attempts = _RECONNECT_MAX_ATTEMPTS + delay = _RECONNECT_INITIAL_DELAY_SEC + for attempt in range(max_attempts): + try: + log.warning("Reconnecting to Gemini (attempt %d/%d)", attempt + 1, max_attempts) + await self.connect() + return True + except Exception: + self._reconnect_attempts += 1 + await asyncio.sleep(delay) + delay = min(delay * 2, _RECONNECT_MAX_DELAY_SEC) + log.error("Reconnect failed after %d attempts", max_attempts) + await bus.emit("voice.error", reason="reconnect_failed") + return False + + async def send_audio_chunk(self, pcm_b64: str) -> bool: + """Send a base64-encoded PCM audio chunk (mic input). + + Returns False on failure so the caller can react instead of silently + no-op'ing forever (the original bug). + """ + if not self._connected or self._ws is None: + return False + msg = { + "realtimeInput": { + "mediaChunks": [ + {"mimeType": "audio/pcm;rate=16000", "data": pcm_b64} + ] + } + } + try: + async with self._send_lock: + await self._ws.send(json.dumps(msg)) + return True + except websockets.exceptions.ConnectionClosed: + log.warning("send_audio_chunk: connection closed") + self._connected = False + await bus.emit("voice.error", reason="connection_closed") + return False + except Exception: + log.exception("send_audio_chunk failed") + return False + + async def send_text(self, text: str, owner: str = "send_text") -> tuple[bytes, list[str]]: + """Send text, receive audio response. Returns (audio_bytes, text_parts). + + Acquires the session lock for the entire request/response cycle so + no other consumer can steal frames from the receive side. + If the connection drops mid-request, reconnects once and retries. + """ + if not await self._ensure_connected(): + raise RuntimeError("Not connected to Gemini and reconnect failed.") + + async with self._session_lock: + self._owner = owner + try: + return await self._send_text_inner(text) + except websockets.exceptions.ConnectionClosed: + log.warning("send_text: connection died on send — reconnecting once") + self._connected = False + if not await self._ensure_connected(): + raise RuntimeError("Reconnect after send failure also failed.") + return await self._send_text_inner(text) + finally: + self._owner = None + + async def _send_text_inner(self, text: str) -> tuple[bytes, list[str]]: + """Inner send/receive loop — caller must hold _session_lock.""" + request = { + "client_content": { + "turns": [{"role": "user", "parts": [{"text": text}]}], + "turn_complete": True, + } + } + async with self._send_lock: + await self._ws.send(json.dumps(request)) + + audio_chunks: list[bytes] = [] + text_parts: list[str] = [] + + while True: + try: + raw = await asyncio.wait_for(self._ws.recv(), timeout=GEMINI_WS_TIMEOUT) + except asyncio.TimeoutError: + log.warning("send_text: recv timed out") + break + except websockets.exceptions.ConnectionClosed: + log.warning("send_text: connection closed mid-stream") + self._connected = False + break + + try: + resp = json.loads(raw) + except json.JSONDecodeError: + log.warning("send_text: bad JSON from server") + continue + + if "error" in resp: + log.error("Gemini error: %s", resp["error"]) + await bus.emit("voice.error", reason=str(resp["error"])) + break + + sc = resp.get("serverContent", {}) + mt = sc.get("modelTurn", {}) + for part in mt.get("parts", []): + inline = part.get("inlineData") + if inline and inline.get("data"): + audio_chunks.append(base64.b64decode(inline["data"])) + tp = part.get("text") + if isinstance(tp, str) and tp.strip(): + text_parts.append(tp.strip()) + + input_tr = sc.get("inputTranscription", {}) + if input_tr.get("text"): + await bus.emit("voice.user_said", text=input_tr["text"]) + + if sc.get("turnComplete") or sc.get("generationComplete"): + break + + audio_bytes = b"".join(audio_chunks) + if audio_bytes: + await bus.emit("voice.gemini_spoke", audio_len=len(audio_bytes)) + return audio_bytes, text_parts + + def acquire_session(self, owner: str) -> "_SessionGuard": + """Return an async context manager for exclusive session ownership. + + Use as `async with client.acquire_session("live_voice"):`. + While held, no other consumer may call send_text or receive_stream. + """ + return _SessionGuard(self, owner) + + async def receive_stream(self): + """Yield server events. Caller MUST hold the session lock.""" + if self._owner is None: + raise RuntimeError( + "receive_stream requires session lock — use acquire_session() first" + ) + if not self._connected or self._ws is None: + return + try: + async for raw in self._ws: + try: + resp = json.loads(raw) + except json.JSONDecodeError: + continue + yield resp.get("serverContent", {}) + except websockets.exceptions.ConnectionClosed: + log.warning("receive_stream: connection closed") + self._connected = False + await bus.emit("voice.error", reason="connection_closed") + + async def raw_send(self, payload: dict): + """Low-level send for the live loop. Always use through send lock.""" + if not self._connected or self._ws is None: + return False + try: + async with self._send_lock: + await self._ws.send(json.dumps(payload)) + return True + except Exception: + log.exception("raw_send failed") + return False + + def status(self) -> dict[str, Any]: + return { + "connected": self._connected, + "model": GEMINI_MODEL, + "voice": GEMINI_VOICE, + "session_owner": self._owner, + "reconnect_attempts": self._reconnect_attempts, + } + + +class _SessionGuard: + """Async context manager for exclusive session ownership. + + Always releases owner + lock on exit, even on exceptions. + """ + + def __init__(self, client: GeminiVoiceClient, owner: str): + self._client = client + self._owner = owner + self._held = False + + async def __aenter__(self): + await self._client._session_lock.acquire() + self._held = True + self._client._owner = self._owner + return self._client + + async def __aexit__(self, exc_type, exc, tb): + try: + self._client._owner = None + finally: + if self._held: + self._client._session_lock.release() + self._held = False + return False # don't suppress exceptions diff --git a/vendor/Sanad/gemini/script.py b/vendor/Sanad/gemini/script.py new file mode 100644 index 0000000..569e486 --- /dev/null +++ b/vendor/Sanad/gemini/script.py @@ -0,0 +1,1290 @@ +"""Gemini brain — live conversation loop using the google-genai SDK. + +Implements the VoiceBrain contract documented in `voice/model_script.py`: + + __init__(audio_io, recorder, voice_name, system_prompt) + async run() + stop() + +Owns everything Gemini-specific: the `genai.Client`, `LiveConnectConfig`, +the session connect/receive loop, VAD-based barge-in, echo suppression, +reconnect backoff. Hardware I/O is delegated to `audio_io` and per-turn +WAV capture to `recorder` — both are model-agnostic. + +Env overrides: + SANAD_GEMINI_MODEL — Gemini Live model id (without "models/" prefix) +""" + +from __future__ import annotations + +import array +import asyncio +import base64 +import json +import os +import sys +import threading +import time +from pathlib import Path +from typing import Any, Optional + +import numpy as np + +from google import genai +from google.genai import types + +from Project.Sanad.config import ( + BASE_DIR, + CHUNK_SIZE, + GEMINI_API_KEY, + GEMINI_VOICE, + RECEIVE_SAMPLE_RATE, + SEND_SAMPLE_RATE, +) +from Project.Sanad.core.config_loader import section as _cfg_section +from Project.Sanad.core.logger import get_logger +from Project.Sanad.vision import recognition_state as _recog_state + +log = get_logger("gemini_brain") + +_SV = _cfg_section("voice", "sanad_voice") +_VAD = _cfg_section("voice", "vad") +_BI = _cfg_section("voice", "barge_in") + +_MODEL = os.environ.get( + "SANAD_GEMINI_MODEL", + "gemini-2.5-flash-native-audio-preview-12-2025", +) +_MIC_GAIN = _SV.get("mic_gain", 1.0) +_SESSION_TIMEOUT = _SV.get("session_timeout_sec", 660) +_MAX_RECONNECT_DELAY = _SV.get("max_reconnect_delay_sec", 30) +_MAX_CONSECUTIVE_ERRORS = _SV.get("max_consecutive_errors", 10) +_NO_MESSAGES_TIMEOUT = _SV.get("no_messages_timeout_sec", 30) + +_CHUNK_BYTES = CHUNK_SIZE * 2 +_SILENCE_PCM = b"\x00" * _CHUNK_BYTES + +# ── Recognition (camera + face gallery) tunables ── +_RECOG_STATE_PATH = Path(os.environ.get( + "SANAD_RECOGNITION_STATE_PATH", + str(BASE_DIR / "data" / ".recognition_state.json"), +)) +_VISION_SEND_HZ = float(os.environ.get("SANAD_VISION_SEND_HZ", "2")) +_VISION_STALE_MS = int(os.environ.get("SANAD_VISION_STALE_MS", "1500")) +_RECOG_POLL_S = float(os.environ.get("SANAD_RECOGNITION_POLL_S", "1.0")) +_FACES_DIR = Path(os.environ.get( + "SANAD_FACES_DIR", + str(BASE_DIR / "data" / "faces"), +)) +_FACES_MAX_SAMPLES = int(os.environ.get("SANAD_FACES_MAX_SAMPLES", "3")) +_FACES_PRIMER_RESIZE = int(os.environ.get("SANAD_FACES_PRIMER_RESIZE", "256")) +# N3 — zones gallery (zone → place → linked faces). Folded into a Gemini +# primer turn so Gemini can recognise / talk about known locations and the +# people associated with them. +_ZONES_DIR = Path(os.environ.get( + "SANAD_ZONES_DIR", + str(BASE_DIR / "data" / "zones"), +)) + + +# ── stdin push channel (Marcus pattern) ────────────────────── +# The GeminiSubprocess supervisor writes two line types to this process's +# stdin: +# "frame:\n" — a camera frame to relay to Gemini Live +# "state:\n" — a motion-state update to inject as text +# A daemon thread parses them into the caches below; the asyncio tasks +# _send_frame_loop / _send_state_loop drain those caches. + +_LATEST_FRAME_LOCK = threading.Lock() +_LATEST_FRAME: dict = {"bytes": None, "ts": 0.0} + +_STATE_LOCK = threading.Lock() +_STATE_PENDING: list[str] = [] + +_STATE_TAGS = { + "start": "[STATE-START]", + "complete": "[STATE-DONE]", + "interrupted": "[STATE-INTERRUPTED]", + "error": "[STATE-ERROR]", + "paused": "[STATE-PAUSED]", + "resumed": "[STATE-RESUMED]", +} + +# Pending audio-profile swap signalled by the parent over "profile:" stdin +# lines. _audio_swap_loop drains it inside the brain's asyncio loop. +_PROFILE_LOCK = threading.Lock() +_PROFILE_PENDING: dict = {"id": None, "reason": ""} + +_VALID_PROFILES = {"builtin", "anker", "hollyland_builtin"} + + +def _stdin_watcher() -> None: + """Daemon thread — parse 'frame:' / 'state:' / 'profile:' lines off stdin. + + Best-effort: any malformed line is skipped. Exits when the parent + closes our stdin (subprocess teardown).""" + try: + for line in sys.stdin: + line = line.rstrip("\n") + if not line: + continue + if line.startswith("frame:"): + b64 = line[len("frame:"):] + try: + data = base64.b64decode(b64) + except Exception: + continue + if data: + with _LATEST_FRAME_LOCK: + _LATEST_FRAME["bytes"] = data + _LATEST_FRAME["ts"] = time.time() + elif line.startswith("state:"): + try: + payload = json.loads(line[len("state:"):]) + except Exception: + continue + event = (payload.get("event") or "").strip().lower() + cmd = (payload.get("cmd") or "").strip() + tag = _STATE_TAGS.get(event) + if not tag or not cmd: + continue + msg = f"{tag} {cmd}" + elapsed = payload.get("elapsed_sec") + if isinstance(elapsed, (int, float)): + msg += f" ({float(elapsed):.1f}s)" + reason = payload.get("reason") + if reason and event == "error": + msg += f" — {reason}" + with _STATE_LOCK: + _STATE_PENDING.append(msg) + elif line.startswith("profile:"): + # Parent signals an audio-profile hot-swap. Stash the target; + # _audio_swap_loop (asyncio task) handles the actual swap so + # PyAudio open/close happens off the stdin thread. + try: + payload = json.loads(line[len("profile:"):]) + except Exception: + continue + pid = (payload.get("id") or "").strip().lower() + if pid not in _VALID_PROFILES: + continue + with _PROFILE_LOCK: + _PROFILE_PENDING["id"] = pid + _PROFILE_PENDING["reason"] = ( + payload.get("reason") or "").strip() + except Exception: + return + + +# Start the watcher at import time — it blocks harmlessly on sys.stdin +# until the supervisor sends something. Daemon so it never blocks exit. +threading.Thread(target=_stdin_watcher, daemon=True, name="stdin-watcher").start() + + +def _audio_energy(pcm: bytes) -> int: + try: + samples = array.array("h", pcm) + return sum(abs(s) for s in samples) // len(samples) if samples else 0 + except Exception: + return 0 + + +class GeminiBrain: + """Gemini Live conversation brain — reconnect-safe.""" + + def __init__(self, audio_io, recorder, voice_name: Optional[str] = None, + system_prompt: str = ""): + self._audio = audio_io + self._mic = audio_io.mic + self._speaker = audio_io.speaker + # Kept on the brain so swap_audio_devices() can rebuild profiles that + # need DDS (`builtin`, `hollyland_builtin`) without re-init'ing. + self._audio_client = getattr(audio_io, "_audio_client", None) + # Current profile id (driven by the parent's "profile:" stdin push). + # Defaults to whatever audio_io was constructed with — `from_profile` + # sets profile_id; if SANAD_AUDIO_PROFILE override is in env, that + # value matches. + self._current_profile_id = getattr(audio_io, "profile_id", None) \ + or os.environ.get("SANAD_AUDIO_PROFILE", "builtin").strip().lower() + # Coordinates concurrent swap requests so two pending profile + # changes don't interleave mid-tear-down. + self._swap_lock: Optional[asyncio.Lock] = None # built in run() + self._recorder = recorder + self._voice = voice_name or GEMINI_VOICE + self._system_prompt = system_prompt + self._api_key = GEMINI_API_KEY + self._stop_flag = asyncio.Event() + # per-session state (reset in the outer reconnect loop) + self._speaking = False + self._stream_started = False + self._barge_block_until = 0.0 + self._ai_speak_start = 0.0 + self._last_ai_audio = 0.0 + self._done: Optional[asyncio.Event] = None + # ── Recognition flags — kept in sync with the state file by + # _recognition_state_watcher. Boot defaults come from the file (or + # the SANAD_* env vars if the file is missing). + _initial = _recog_state.read(_RECOG_STATE_PATH) + self._vision_enabled = bool( + _initial.vision_enabled + or os.environ.get("SANAD_VISION_ENABLE", "0") == "1" + ) + self._face_rec_enabled = bool( + _initial.face_rec_enabled + or os.environ.get("SANAD_FACE_RECOGNITION_ENABLE", "0") == "1" + ) + self._gallery_version_primed = -1 # bumped after first successful primer + # N3 — zones knowledge toggle + primer version tracking. + self._zone_rec_enabled = bool( + _initial.zone_rec_enabled + or os.environ.get("SANAD_ZONE_RECOGNITION_ENABLE", "0") == "1" + ) + self._zones_version_primed = -1 + # "Go here" destination already announced this session (zone_id, place_id). + self._nav_target = ( + int(_initial.nav_target_zone_id), int(_initial.nav_target_place_id), + ) + # N2 — Gemini-driven locomotion enable gate (announce only; the + # actual dispatch loop lives in the parent and is wired separately). + self._movement_enabled = bool( + _initial.movement_enabled + or os.environ.get("SANAD_MOVEMENT_ENABLE", "0") == "1" + ) + + def stop(self) -> None: + """Signal the run loop to exit at the next opportunity.""" + try: + self._stop_flag.set() + except Exception: + pass + + # ─── public entry point ─────────────────────────────── + + async def run(self) -> None: + client = genai.Client(api_key=self._api_key) + config = self._build_config() + session_num = 0 + start_time = time.time() + consecutive_errors = 0 + + while not self._stop_flag.is_set(): + session_num += 1 + self._reset_turn_state() + uptime_min = (time.time() - start_time) / 60 + + try: + log.info("connecting to Gemini (session #%d, uptime %.0fm)...", + session_num, uptime_min) + async with client.aio.live.connect(model=_MODEL, config=config) as session: + log.info("connected — speak anytime!") + consecutive_errors = 0 + self._mic.flush() + self._done = asyncio.Event() + # Reset per-session primer state so re-priming on reconnect + # actually happens. The state watcher will re-prime as soon + # as it sees vision+face-rec (and place-rec) enabled. + self._gallery_version_primed = -1 + self._zones_version_primed = -1 + # Re-announce the active destination on reconnect. + self._nav_target = (-1, -1) + # Lazy-build the swap lock on the active asyncio loop. + if self._swap_lock is None: + self._swap_lock = asyncio.Lock() + + try: + await asyncio.wait_for( + asyncio.gather( + self._send_mic_loop(session), + self._receive_loop(session), + self._send_frame_loop(session), + self._send_state_loop(session), + self._recognition_state_watcher(session), + self._audio_swap_loop(session), + ), + timeout=_SESSION_TIMEOUT, + ) + except asyncio.TimeoutError: + log.warning("session timed out after %ds", _SESSION_TIMEOUT) + except asyncio.CancelledError: + log.warning("session cancelled") + + log.info("session #%d ended — reconnecting in 1s", session_num) + self._speaker.stop() + self._mic.flush() + await asyncio.sleep(1) + + except asyncio.CancelledError: + log.info("cancelled — stopping") + break + except KeyboardInterrupt: + log.info("keyboard interrupt — stopping") + break + except Exception as exc: + consecutive_errors += 1 + delay = min(_MAX_RECONNECT_DELAY, 2 ** consecutive_errors) + log.error("session error (#%d): %s — reconnecting in %ds", + consecutive_errors, exc, delay) + await asyncio.sleep(delay) + if consecutive_errors >= _MAX_CONSECUTIVE_ERRORS: + log.warning("%d consecutive errors — recreating client", + consecutive_errors) + try: + client = genai.Client(api_key=self._api_key) + consecutive_errors = 0 + except Exception as ce: + log.error("client recreation failed: %s", ce) + + # ─── Gemini config ──────────────────────────────────── + + def _build_config(self) -> types.LiveConnectConfig: + return types.LiveConnectConfig( + response_modalities=["AUDIO"], + speech_config=types.SpeechConfig( + voice_config=types.VoiceConfig( + prebuilt_voice_config=types.PrebuiltVoiceConfig( + voice_name=self._voice, + ), + ), + ), + realtime_input_config=types.RealtimeInputConfig( + automatic_activity_detection=types.AutomaticActivityDetection( + disabled=False, + start_of_speech_sensitivity=getattr( + types.StartSensitivity, + _VAD.get("start_sensitivity", "START_SENSITIVITY_HIGH"), + ), + end_of_speech_sensitivity=getattr( + types.EndSensitivity, + _VAD.get("end_sensitivity", "END_SENSITIVITY_LOW"), + ), + prefix_padding_ms=_VAD.get("prefix_padding_ms", 20), + silence_duration_ms=_VAD.get("silence_duration_ms", 200), + ), + ), + input_audio_transcription=types.AudioTranscriptionConfig(), + output_audio_transcription=types.AudioTranscriptionConfig(), + system_instruction=types.Content( + parts=[types.Part(text=self._system_prompt)], + ), + ) + + # ─── state helpers ──────────────────────────────────── + + def _reset_turn_state(self) -> None: + self._speaking = False + self._stream_started = False + self._barge_block_until = 0.0 + self._ai_speak_start = 0.0 + self._last_ai_audio = 0.0 + + def _interrupt(self, source: str = "local") -> None: + self._speaking = False + self._stream_started = False + self._speaker.stop() + self._mic.flush() + self._recorder.finish_turn() + log.info("interrupt (%s)", source) + + # ─── mic send loop ──────────────────────────────────── + + async def _send_mic_loop(self, session: Any) -> None: + threshold = _BI.get("threshold", 500) + chunks_needed = _BI.get("loud_chunks_needed", 3) + cooldown = _BI.get("cooldown_sec", 0.3) + echo_suppress_below = _BI.get("echo_suppress_below", 500) + grace = _BI.get("ai_speak_grace_sec", 0.15) + + loop = asyncio.get_event_loop() + loud_count = 0 + last_activity = time.time() + + while not self._done.is_set() and not self._stop_flag.is_set(): + try: + raw = await loop.run_in_executor( + None, self._mic.read_chunk, _CHUNK_BYTES, + ) + except Exception: + break + + samples = np.frombuffer(raw, dtype=np.int16).astype(np.float32) + samples = np.clip(samples * _MIC_GAIN, -32768, 32767).astype(np.int16) + data = samples.tobytes() + energy = _audio_energy(data) + now = time.time() + + # Barge-in: after AI starts speaking, sustained user energy cuts it. + if self._speaking and now >= self._barge_block_until: + if (now - self._ai_speak_start) >= grace: + if energy > threshold: + loud_count += 1 + else: + loud_count = max(0, loud_count - 1) + if loud_count > chunks_needed: + log.info("BARGE-IN (e=%d)", energy) + self._interrupt("barge-in") + loud_count = 0 + self._barge_block_until = now + cooldown + + # Echo suppression: while AI is speaking, mask quiet frames so the + # mic doesn't feed the model its own voice bleed. + send_data = data + if self._speaking and energy < echo_suppress_below: + send_data = _SILENCE_PCM + + # Record user audio when clearly speaking and AI isn't. + if energy > 250 and not self._speaking: + self._recorder.capture_user(data) + + # Keep-alive watchdog + if energy > 250: + last_activity = now + elif now - last_activity > 10: + log.info("alive (no speech %.0fs, e=%d)", + now - last_activity, energy) + last_activity = now + + try: + await session.send_realtime_input( + audio=types.Blob( + data=send_data, + mime_type=f"audio/pcm;rate={SEND_SAMPLE_RATE}", + ), + ) + except asyncio.CancelledError: + return + except Exception as exc: + log.warning("mic send failed: %s — ending session", exc) + self._done.set() + return + + await asyncio.sleep(CHUNK_SIZE / SEND_SAMPLE_RATE) + + log.info("send_mic task ended") + + # ─── receive loop ───────────────────────────────────── + + async def _receive_loop(self, session: Any) -> None: + loop = asyncio.get_event_loop() + try: + last_recv = time.time() + while not self._done.is_set() and not self._stop_flag.is_set(): + async for response in session.receive(): + last_recv = time.time() + if self._done.is_set(): + break + + if hasattr(response, "go_away") and response.go_away is not None: + log.info("server going away — will reconnect") + self._done.set() + return + + sc = response.server_content + if sc is None: + continue + + if sc.interrupted is True: + if self._speaking: + log.info("Gemini interrupted") + self._interrupt("gemini") + continue + + if sc.input_transcription: + text = (sc.input_transcription.text or "").strip() + if text and not self._speaking: + log.info("USER: %s", text) + self._recorder.add_user_text(text) + + if sc.output_transcription: + text = (sc.output_transcription.text or "").strip() + if text: + # Emit as "BOT:" (no space before colon) so the + # supervisor's _track_line can parse it the same + # way it parses "USER:" — this is the channel the + # movement dispatcher (N2) reads Gemini's own + # spoken phrases from. Keep in lock-step with + # GeminiSubprocess._track_line. + log.info("BOT: %s", text) + self._recorder.add_robot_text(text) + + if sc.model_turn: + for part in sc.model_turn.parts: + if part.inline_data and part.inline_data.data: + now = time.time() + if not self._speaking: + self._ai_speak_start = now + self._speaking = True + self._last_ai_audio = now + raw_audio = part.inline_data.data + self._recorder.capture_robot(raw_audio) + audio = np.frombuffer(raw_audio, dtype=np.int16) + if not self._stream_started: + await loop.run_in_executor( + None, self._speaker.begin_stream, + ) + self._stream_started = True + await loop.run_in_executor( + None, self._speaker.send_chunk, + audio, RECEIVE_SAMPLE_RATE, + ) + + if sc.turn_complete: + if (self._speaking and self._stream_started + and not self._speaker.interrupted): + log.info("speaker %.1fs", self._speaker.total_sent_sec) + await loop.run_in_executor( + None, self._speaker.wait_finish, + ) + elif self._speaking and self._speaker.interrupted: + log.info("speaker interrupted") + self._speaking = False + self._stream_started = False + self._mic.flush() + self._recorder.finish_turn() + log.info("listening") + + if time.time() - last_recv > _NO_MESSAGES_TIMEOUT: + log.warning("no messages from Gemini for %ds — session dead", + _NO_MESSAGES_TIMEOUT) + break + await asyncio.sleep(0.1) + + except Exception as exc: + log.warning("receive ended: %s", exc) + finally: + self._done.set() + + # ─── vision-state announcer ─────────────────────────── + # Injects the camera state into the live session as text context. + # On a live toggle Gemini is told to say so out loud ("I can see you + # now" / "I can't see you anymore"); at session start it's silent + # standing context so "can you see me?" is answered honestly. + + async def _announce_vision_state(self, session: Any, enabled: bool, + is_toggle: bool) -> None: + if is_toggle and enabled: + text = ( + "[VISION ON] Your camera was just enabled — you can now see " + "the user through it. Briefly tell them you can see them now, " + "in your normal Khaleeji style (for example: " + "'هلا، الحين أشوفك زين')." + ) + elif is_toggle and not enabled: + text = ( + "[VISION OFF] Your camera was just disabled — you can no " + "longer see anything. Briefly tell the user you can't see " + "them anymore. If they later ask whether you can see them, " + "tell them to enable the camera from the dashboard." + ) + elif enabled: # session start, camera already on + text = ( + "[VISION STATUS] Your camera is ON — you can see the user " + "through it. Do not announce this unprompted; just answer " + "naturally if they ask what you see." + ) + else: # session start, camera off + text = ( + "[VISION STATUS] Your camera is OFF — you cannot see anything " + "right now. If the user asks whether you can see them, tell " + "them to enable the camera from the dashboard. Do not announce " + "this unprompted." + ) + try: + await session.send_realtime_input(text=text) + log.info("vision-state injected (enabled=%s, toggle=%s)", + enabled, is_toggle) + except asyncio.CancelledError: + raise + except Exception as exc: + log.warning("vision-state inject failed: %s", exc) + + # ─── face-recognition-state announcer ───────────────── + # Same idea as _announce_vision_state, for the face-recognition toggle. + # On a live OFF toggle it also tells Gemini to disregard the gallery — + # so OFF takes effect immediately instead of lingering until reconnect. + + async def _announce_facerec_state(self, session: Any, enabled: bool, + is_toggle: bool) -> None: + if is_toggle and enabled: + text = ( + "[FACE RECOGNITION ON] Face recognition was just enabled — " + "you'll be shown the people you know in a moment. Briefly " + "tell the user you can now recognise the people you know, in " + "your normal Khaleeji style." + ) + elif is_toggle and not enabled: + text = ( + "[FACE RECOGNITION OFF] Face recognition was just disabled. " + "Disregard the face gallery you were given earlier — stop " + "greeting people by name and do not identify anyone. Briefly " + "tell the user you'll no longer recognise faces." + ) + elif enabled: # session start, face rec already on + text = ( + "[FACE RECOGNITION STATUS] Face recognition is ON — when you " + "see someone you've been shown in the gallery, greet them by " + "name. Do not announce this unprompted." + ) + else: # session start, face rec off + text = ( + "[FACE RECOGNITION STATUS] Face recognition is OFF — you " + "cannot identify people. If the user asks who someone is or " + "whether you recognise them, tell them to enable face " + "recognition from the dashboard. Do not announce this " + "unprompted." + ) + try: + await session.send_realtime_input(text=text) + log.info("face-rec-state injected (enabled=%s, toggle=%s)", + enabled, is_toggle) + except asyncio.CancelledError: + raise + except Exception as exc: + log.warning("face-rec-state inject failed: %s", exc) + + # ─── place-recognition-state announcer (N3) ─────────── + # Same idea as _announce_facerec_state, for the places-gallery toggle. + # On a live OFF toggle it also tells Gemini to disregard the places it + # was given so OFF takes effect immediately instead of lingering. + + async def _announce_zonerec_state(self, session: Any, enabled: bool, + is_toggle: bool) -> None: + if is_toggle and enabled: + text = ( + "[ZONE RECOGNITION ON] You were just given the zones and places " + "you know (and the people associated with them). Briefly tell " + "the user you now know your way around, in your normal Khaleeji " + "style." + ) + elif is_toggle and not enabled: + text = ( + "[ZONE RECOGNITION OFF] Zone recognition was just disabled. " + "Disregard the zones and places you were given earlier — stop " + "naming rooms or locations. Briefly tell the user you'll no " + "longer recognise places." + ) + elif enabled: # session start, zone rec already on + text = ( + "[ZONE RECOGNITION STATUS] Zone recognition is ON — when you see " + "or are asked about a zone/place you've been told about, you may " + "name it and use its description. Do not announce this " + "unprompted." + ) + else: # session start, zone rec off + text = ( + "[ZONE RECOGNITION STATUS] Zone recognition is OFF — you do not " + "know any specific zones or places. If the user asks where they " + "are or to go somewhere by name, tell them to enable zone " + "recognition from the dashboard. Do not announce this " + "unprompted." + ) + try: + await session.send_realtime_input(text=text) + log.info("zone-rec-state injected (enabled=%s, toggle=%s)", + enabled, is_toggle) + except asyncio.CancelledError: + raise + except Exception as exc: + log.warning("zone-rec-state inject failed: %s", exc) + + # ─── navigation-target announcer (N3 "go here") ─────── + # When the operator sets a destination, tell Gemini which place to go to + # and show it the reference photo(s). Actual robot motion is wired by N2; + # this establishes the goal + visual reference. + + async def _announce_nav_target(self, session: Any, + zone_id: int, place_id: int) -> None: + if not zone_id or not place_id: + try: + await session.send_realtime_input(text=( + "[DESTINATION CLEARED] You have no specific destination right " + "now. Do not announce this unprompted." + )) + except asyncio.CancelledError: + raise + except Exception as exc: + log.warning("nav-clear inject failed: %s", exc) + return + try: + from Project.Sanad.vision.zone_gallery import ZoneGallery + gallery = ZoneGallery(_ZONES_DIR) + place = gallery.get_place(zone_id, place_id) + zone = gallery.get_zone(zone_id) + except Exception as exc: + log.warning("nav-target resolve failed: %s", exc) + return + if place is None: + log.info("nav-target zone_%d/place_%d not found — skipping", zone_id, place_id) + return + place_name = place.name or f"place {place_id}" + zone_name = (zone.name if zone else None) or f"zone {zone_id}" + instr = ( + f"[GO HERE] The user has set your destination to '{place_name}' in " + f"'{zone_name}'." + ) + if place.description: + instr += f" Notes: {place.description}." + instr += ( + " The image(s) below show what it looks like so you can recognise it. " + "If walking is enabled you will head there; if it is off, tell the " + "user to enable movement from the dashboard. Briefly acknowledge the " + "destination in your normal Khaleeji style." + ) + parts: list[dict[str, Any]] = [{"text": instr}] + for p in place.sample_paths[:_FACES_MAX_SAMPLES]: + try: + raw = p.read_bytes() + except OSError: + continue + jpeg = gallery._resize_for_primer(raw, _FACES_PRIMER_RESIZE) or raw + parts.append({"inline_data": {"mime_type": "image/jpeg", "data": jpeg}}) + try: + await session.send_client_content( + turns=[{"role": "user", "parts": parts}], turn_complete=True, + ) + log.info("nav-target injected → zone_%d/place_%d (%s)", + zone_id, place_id, place_name) + except asyncio.CancelledError: + raise + except Exception as exc: + log.warning("nav-target inject failed: %s", exc) + + # ─── movement-state announcer (N2) ──────────────────── + # Spoken confirmation when the operator enables / disables Gemini-driven + # locomotion from the dashboard. The actual movement dispatch loop lives + # in the parent; this only gives the user audible feedback on the toggle. + + async def _announce_movement_state(self, session: Any, enabled: bool, + is_toggle: bool) -> None: + if is_toggle and enabled: + text = ( + "[MOVEMENT ON] Walking is now enabled — you can move when the " + "user asks. Briefly tell the user movement is enabled and they " + "can ask you to walk, in your normal Khaleeji style." + ) + elif is_toggle and not enabled: + text = ( + "[MOVEMENT OFF] Walking was just disabled — you must not move. " + "Briefly tell the user movement is now off. If they ask you to " + "walk, tell them to enable movement from the dashboard first." + ) + elif enabled: # session start, movement already on + text = ( + "[MOVEMENT STATUS] Walking is ON — you may move when asked. Do " + "not announce this unprompted." + ) + else: # session start, movement off + text = ( + "[MOVEMENT STATUS] Walking is OFF — you cannot move right now. " + "If the user asks you to walk, tell them to enable movement " + "from the dashboard. Do not announce this unprompted." + ) + try: + await session.send_realtime_input(text=text) + log.info("movement-state injected (enabled=%s, toggle=%s)", + enabled, is_toggle) + except asyncio.CancelledError: + raise + except Exception as exc: + log.warning("movement-state inject failed: %s", exc) + + # ─── audio profile hot-swap ─────────────────────────── + # The parent (GeminiSubprocess) polls pactl for the Anker USB device + # and writes "profile:" lines to our stdin. _stdin_watcher parses + # them into _PROFILE_PENDING; this loop drains the flag on the asyncio + # loop and performs the actual swap. The brain's read/write sites + # (_send_mic_loop / _receive_loop) keep using self._mic / self._speaker — + # an atomic ref reassignment is enough because nothing caches them in + # a loop-local variable (verified in exploration). + + async def _audio_swap_loop(self, session: Any) -> None: + while not self._done.is_set() and not self._stop_flag.is_set(): + await asyncio.sleep(0.25) + with _PROFILE_LOCK: + target = _PROFILE_PENDING.get("id") + reason = _PROFILE_PENDING.get("reason", "") + _PROFILE_PENDING["id"] = None + _PROFILE_PENDING["reason"] = "" + if not target or target == self._current_profile_id: + continue + try: + await self.swap_audio_devices(session, target, reason=reason) + except asyncio.CancelledError: + return + except Exception as exc: + log.warning("audio swap failed: %s", exc) + + async def swap_audio_devices(self, session: Any, profile_id: str, + reason: str = "") -> None: + """Hot-swap mic+speaker to `profile_id` without dropping the live + Gemini session. Idempotent (no-op if already on `profile_id`). + + Order matters: start the new mic BEFORE we tear the old one down, + so a transient PyAudio failure (e.g. udev hasn't exposed Anker yet) + leaves the old backend in place. After 3 retries with backoff we + give up and log WARN — the watcher will retry on its next tick. + """ + if self._swap_lock is None: + log.warning("swap requested before session loop started — skipping") + return + async with self._swap_lock: + if profile_id == self._current_profile_id: + return + prev = self._current_profile_id + log.info("audio swap: %s → %s (reason=%s)", prev, profile_id, reason or "—") + + # Build + start the new mic. Retry: pactl can see the device + # before PyAudio's get_device_count refreshes. + try: + from Project.Sanad.voice.audio_io import AudioIO as _AudioIO + except Exception as exc: + log.error("audio swap: AudioIO import failed: %s", exc) + return + new_mic = new_spk = None + last_exc: Optional[BaseException] = None + for attempt in range(3): + try: + new_mic, new_spk = _AudioIO.build_backends( + profile_id, audio_client=self._audio_client, + ) + # mic.start() opens PyAudio + spawns reader thread. + # speaker is lazy (opens on first send_chunk). + await asyncio.to_thread(new_mic.start) + break + except Exception as exc: + last_exc = exc + # Tear down a partially-built backend so the next attempt + # gets a clean slate; don't leak PyAudio handles. + if new_mic is not None: + try: + await asyncio.to_thread(new_mic.stop) + except Exception: + pass + new_mic = new_spk = None + log.info("audio swap attempt %d failed: %s — retry in 0.4s", + attempt + 1, exc) + await asyncio.sleep(0.4) + if new_mic is None or new_spk is None: + log.warning("audio swap %s → %s: all 3 attempts failed (%s); " + "keeping current profile", + prev, profile_id, last_exc) + return + + # Drain the old playback so any in-flight AI utterance stops + # (interrupts mid-word — acceptable per spec, <1s gap). + # MUST be awaited via to_thread: _PyAudioSpeaker.stop now + # takes a per-instance RLock and an in-flight send_chunk on + # the executor may be holding it across a back-pressured + # PortAudio write. Calling stop() synchronously on the + # event-loop thread would wedge the entire loop (mic, + # vision, session.receive) until the pulse buffer drains. + try: + await asyncio.to_thread(self._speaker.stop) + except Exception: + pass + try: + self._mic.flush() + except Exception: + pass + + # Atomic ref swap — next read_chunk / send_chunk hits new. + old_mic, old_spk = self._mic, self._speaker + self._mic = new_mic + self._speaker = new_spk + self._current_profile_id = profile_id + self._reset_turn_state() + + # Tear down old AFTER the ref swap so any executor call still + # in flight finishes against the old handle and the next loop + # iteration picks up the new one cleanly. + try: + await asyncio.to_thread(old_mic.stop) + except Exception: + pass + try: + await asyncio.to_thread(old_spk.stop) + except Exception: + pass + + # Silent context to Gemini — so it knows the input chain changed + # if asked (matches the _announce_vision_state pattern). + try: + await session.send_realtime_input(text=( + f"[AUDIO SWITCH] Mic + speaker are now on the {profile_id!s} " + f"audio profile. Do not announce this unprompted; just keep " + f"replying normally — the user's voice may sound clearer or " + f"different on the new device." + )) + except asyncio.CancelledError: + raise + except Exception as exc: + log.warning("audio-swap announce failed: %s", exc) + log.info("audio swap complete: %s → %s", prev, profile_id) + + # ─── recognition state watcher ──────────────────────── + # Polls data/.recognition_state.json at SANAD_RECOGNITION_POLL_S Hz and + # mirrors vision_enabled / face_rec_enabled into in-memory flags so the + # rest of the session can react WITHOUT a Gemini reconnect. + + async def _recognition_state_watcher(self, session: Any) -> None: + last_mtime = 0.0 + last_state = _recog_state.RecognitionState( + vision_enabled=self._vision_enabled, + face_rec_enabled=self._face_rec_enabled, + gallery_version=self._gallery_version_primed, + zone_rec_enabled=self._zone_rec_enabled, + zones_version=self._zones_version_primed, + movement_enabled=self._movement_enabled, + ) + # Best-effort initial primer if face_rec is already on at session start. + if self._face_rec_enabled and self._vision_enabled: + try: + cur = _recog_state.read(_RECOG_STATE_PATH) + await self._send_gallery_primer(session, cur.gallery_version) + except Exception as exc: + log.warning("initial gallery primer failed: %s", exc) + + # N3 — initial zones primer if zone recognition is already on. Unlike + # faces this does NOT require vision: name+description-only places still + # give Gemini useful knowledge to talk about. + if self._zone_rec_enabled: + try: + cur = _recog_state.read(_RECOG_STATE_PATH) + await self._send_zone_primer(session, cur.zones_version) + except Exception as exc: + log.warning("initial zone primer failed: %s", exc) + + # Tell Gemini the current camera + recognition + movement state at + # session start — silent standing context so "can you see me?" / "do + # you know who I am?" are answered honestly even if nothing is toggled. + await self._announce_vision_state( + session, self._vision_enabled, is_toggle=False, + ) + await self._announce_facerec_state( + session, self._face_rec_enabled, is_toggle=False, + ) + await self._announce_zonerec_state( + session, self._zone_rec_enabled, is_toggle=False, + ) + await self._announce_movement_state( + session, self._movement_enabled, is_toggle=False, + ) + # N3 — announce the active "go here" destination (if any). _nav_target + # was reset to (-1,-1) per session so this fires on every reconnect. + try: + cur = _recog_state.read(_RECOG_STATE_PATH) + nav = (cur.nav_target_zone_id, cur.nav_target_place_id) + if nav != self._nav_target: + await self._announce_nav_target(session, nav[0], nav[1]) + self._nav_target = nav + except Exception as exc: + log.warning("initial nav-target announce failed: %s", exc) + + while not self._done.is_set() and not self._stop_flag.is_set(): + await asyncio.sleep(_RECOG_POLL_S) + try: + st = _RECOG_STATE_PATH.stat() + except FileNotFoundError: + continue + except Exception: + continue + if st.st_mtime == last_mtime: + continue + last_mtime = st.st_mtime + new_state = _recog_state.read(_RECOG_STATE_PATH) + + # Vision toggle — instant. Announce it out loud so Gemini reacts + # ("I can see you now" / "I can't see you anymore"). + if new_state.vision_enabled != last_state.vision_enabled: + self._vision_enabled = new_state.vision_enabled + log.info("vision toggled → %s", self._vision_enabled) + await self._announce_vision_state( + session, self._vision_enabled, is_toggle=True, + ) + + # Face-rec toggle — announce it out loud. The OFF announcement + # also tells Gemini to disregard the gallery, so OFF takes effect + # immediately instead of lingering until the next reconnect. + if new_state.face_rec_enabled != last_state.face_rec_enabled: + self._face_rec_enabled = new_state.face_rec_enabled + if self._face_rec_enabled: + log.info("face rec enabled — announcing + sending primer") + else: + log.info("face rec disabled — telling Gemini to " + "disregard the gallery") + await self._announce_facerec_state( + session, self._face_rec_enabled, is_toggle=True, + ) + + # Conditions for re-priming: + # - face_rec just turned ON (no_face_rec_before) + # - gallery version bumped since the last primer + face_rec_just_on = ( + new_state.face_rec_enabled and not last_state.face_rec_enabled + ) + gallery_changed = ( + new_state.gallery_version != self._gallery_version_primed + ) + if (self._face_rec_enabled + and (face_rec_just_on or gallery_changed) + and self._vision_enabled): + try: + await self._send_gallery_primer( + session, new_state.gallery_version, + ) + except Exception as exc: + log.warning("gallery primer failed: %s", exc) + + # N3 — zone-recognition toggle (announce out loud, like face-rec). + if new_state.zone_rec_enabled != last_state.zone_rec_enabled: + self._zone_rec_enabled = new_state.zone_rec_enabled + log.info("zone rec toggled → %s", self._zone_rec_enabled) + await self._announce_zonerec_state( + session, self._zone_rec_enabled, is_toggle=True, + ) + + # Re-prime zones when zone-rec just turned ON or the zones version + # bumped (any zone/place/face-link/photo CRUD). No vision needed. + zone_rec_just_on = ( + new_state.zone_rec_enabled and not last_state.zone_rec_enabled + ) + zones_changed = ( + new_state.zones_version != self._zones_version_primed + ) + if self._zone_rec_enabled and (zone_rec_just_on or zones_changed): + try: + await self._send_zone_primer( + session, new_state.zones_version, + ) + except Exception as exc: + log.warning("zone primer failed: %s", exc) + + # N3 — "go here" destination changed (set or cleared). Announce + + # show the reference photo. Diffed against the announced tuple so a + # CRUD-only version bump above doesn't double-fire this. + nav = (new_state.nav_target_zone_id, new_state.nav_target_place_id) + if nav != self._nav_target: + self._nav_target = nav + await self._announce_nav_target(session, nav[0], nav[1]) + + # N2 — movement enable/disable toggle (spoken confirmation only). + if new_state.movement_enabled != last_state.movement_enabled: + self._movement_enabled = new_state.movement_enabled + log.info("movement toggled → %s", self._movement_enabled) + await self._announce_movement_state( + session, self._movement_enabled, is_toggle=True, + ) + + last_state = new_state + + # ─── camera frame send loop ─────────────────────────── + # Reads the latest JPEG from the _LATEST_FRAME cache (fed by the + # _stdin_watcher thread, which the GeminiSubprocess supervisor pushes + # 'frame:' lines into) and relays it to Gemini Live at + # _VISION_SEND_HZ. Only active when self._vision_enabled. Skips frames + # older than _VISION_STALE_MS so a stopped/unplugged camera doesn't + # waste tokens on a frozen scene. + + async def _send_frame_loop(self, session: Any) -> None: + period = 1.0 / max(0.5, _VISION_SEND_HZ) + stale_s = _VISION_STALE_MS / 1000.0 + backoff = 0.0 + last_sent_ts = 0.0 + + while not self._done.is_set() and not self._stop_flag.is_set(): + await asyncio.sleep(max(period, backoff)) + if not self._vision_enabled: + continue + with _LATEST_FRAME_LOCK: + data = _LATEST_FRAME.get("bytes") + ts = _LATEST_FRAME.get("ts", 0.0) + if not data: + continue + # Stale — supervisor stopped pushing (camera off / unplugged). + if (time.time() - ts) > stale_s: + continue + # De-dup — don't re-send a frame we already relayed. + if ts == last_sent_ts: + continue + try: + await session.send_realtime_input( + video=types.Blob(data=data, mime_type="image/jpeg"), + ) + last_sent_ts = ts + backoff = 0.0 + except asyncio.CancelledError: + return + except Exception as exc: + log.warning("frame send failed: %s", exc) + backoff = min(backoff * 2 + 0.5, 5.0) + + # ─── motion-state inject loop ───────────────────────── + # Drains _STATE_PENDING (fed by the _stdin_watcher from 'state:' lines + # the supervisor pushes when the arm starts/finishes/errors a motion) + # and injects each as silent text context into the live session, so + # Gemini can answer "what are you doing?" honestly. Per persona, Gemini + # reads these for context but does not narrate them unprompted. + + async def _send_state_loop(self, session: Any) -> None: + while not self._done.is_set() and not self._stop_flag.is_set(): + await asyncio.sleep(0.1) + with _STATE_LOCK: + if not _STATE_PENDING: + continue + pending = list(_STATE_PENDING) + _STATE_PENDING.clear() + for msg in pending: + try: + await session.send_realtime_input(text=msg) + log.info("STATE injected: %s", msg) + except asyncio.CancelledError: + return + except Exception as exc: + # Some SDK versions may not accept text on + # send_realtime_input — log once-ish and keep going; + # motion still works, only this context channel is lost. + log.warning("state inject failed: %s", exc) + + # ─── face gallery primer ────────────────────────────── + # Builds one multimodal turn carrying the entire face gallery + a Khaleeji + # greeting instruction, and sends it via send_client_content. Gemini keeps + # this in session context until reconnect. Re-sent on gallery_version bumps. + + async def _send_gallery_primer(self, session: Any, version: int) -> None: + try: + from Project.Sanad.vision.face_gallery import FaceGallery + except Exception as exc: + log.info("face gallery module unavailable: %s", exc) + return + + gallery = FaceGallery(_FACES_DIR) + try: + entries = gallery.load_for_primer( + max_samples_per_face=_FACES_MAX_SAMPLES, + resize_long_side=_FACES_PRIMER_RESIZE, + ) + except Exception as exc: + log.warning("face gallery load failed: %s", exc) + return + + if not entries: + log.info("face gallery empty — primer skipped (v.%d)", version) + self._gallery_version_primed = version + return + + parts: list[dict[str, Any]] = [{ + "text": ( + "GALLERY PRIMER (do not reply to this turn). " + "Below are people you know. When the live camera shows one of " + "them, greet them warmly by name in UAE Khaleeji dialect " + "(for example: 'هلا والله يا كسام، شحالك؟'), and you may use " + "the notes about them to make the conversation personal. " + "For faces NOT in this gallery, welcome them as a guest " + "without inventing a name. Greet each person only once per " + "minute to avoid repetition." + ), + }] + for entry, jpegs in entries: + label = ( + f"This person is named {entry.name}." + if entry.name + else "This person's name is unknown — greet as guest." + ) + if entry.description: + label += f" Notes about them: {entry.description}" + parts.append({"text": f"\n— {label}"}) + for jpeg in jpegs: + parts.append({ + "inline_data": {"mime_type": "image/jpeg", "data": jpeg}, + }) + + try: + await session.send_client_content( + turns=[{"role": "user", "parts": parts}], + turn_complete=True, + ) + except Exception as exc: + log.warning("primer send failed: %s", exc) + return + self._gallery_version_primed = version + log.info("face gallery primed: %d person(s), v.%d", len(entries), version) + + # ─── zones primer (N3) ──────────────────────────────── + # One multimodal turn carrying every zone, its places (name + description + + # reference photos), and the people linked to each place. A place may have + # NO photos (name + description only), so empty image lists are tolerated. + + async def _send_zone_primer(self, session: Any, version: int) -> None: + try: + from Project.Sanad.vision.zone_gallery import ZoneGallery + except Exception as exc: + log.info("zone gallery module unavailable: %s", exc) + return + + gallery = ZoneGallery(_ZONES_DIR) + try: + entries = gallery.load_for_primer( + max_samples_per_place=_FACES_MAX_SAMPLES, + resize_long_side=_FACES_PRIMER_RESIZE, + ) + except Exception as exc: + log.warning("zone gallery load failed: %s", exc) + return + + if not entries: + log.info("zone gallery empty — primer skipped (v.%d)", version) + self._zones_version_primed = version + return + + # Resolve linked face ids → names once (cheap, small galleries). + face_names: dict[int, str] = {} + try: + from Project.Sanad.vision.face_gallery import FaceGallery + for fe in FaceGallery(_FACES_DIR).list(): + if fe.name: + face_names[fe.id] = fe.name + except Exception: + pass + + parts: list[dict[str, Any]] = [{ + "text": ( + "ZONES PRIMER (do not reply to this turn). Below are the zones " + "and places you know, with the people often found at each place. " + "Use them to answer where things are, to name a place when the " + "live camera shows one, and to make directions personal. Do not " + "invent zones or places that are not listed here." + ), + }] + n_zones = n_places = 0 + for zone, places in entries: + n_zones += 1 + zhdr = f"\n# Zone: {zone.name or '(unnamed)'}" + if zone.description: + zhdr += f" — {zone.description}" + parts.append({"text": zhdr}) + if not places: + parts.append({"text": " (no places yet)"}) + for place, jpegs in places: + n_places += 1 + label = f"\n - Place: {place.name or '(unnamed)'}" + if place.description: + label += f" — {place.description}" + people = [face_names[f] for f in place.face_ids if f in face_names] + if people: + label += f" | People often here: {', '.join(people)}" + parts.append({"text": label}) + for jpeg in jpegs: + parts.append({ + "inline_data": {"mime_type": "image/jpeg", "data": jpeg}, + }) + + try: + await session.send_client_content( + turns=[{"role": "user", "parts": parts}], + turn_complete=True, + ) + except Exception as exc: + log.warning("zone primer send failed: %s", exc) + return + self._zones_version_primed = version + log.info("zones primed: %d zone(s), %d place(s), v.%d", + n_zones, n_places, version) diff --git a/vendor/Sanad/gemini/subprocess.py b/vendor/Sanad/gemini/subprocess.py new file mode 100644 index 0000000..7ed8060 --- /dev/null +++ b/vendor/Sanad/gemini/subprocess.py @@ -0,0 +1,604 @@ +"""Gemini live subprocess supervisor. + +Spawns `voice/sanad_voice.py` as a managed child with `SANAD_VOICE_BRAIN=gemini`, +tails the child's stdout, and extracts state transitions + user transcripts +from the Gemini-specific log lines emitted by `gemini/script.py:GeminiBrain`. + +When a new model is added, build its own sibling supervisor (see +`voice/model_subprocess.py` for the template) — do not refactor this file. +""" + +from __future__ import annotations + +import base64 +import json +import os +import signal +import subprocess +import sys +import threading +from collections import deque +from datetime import datetime +from typing import Any, Optional, Union + +from pathlib import Path + +from Project.Sanad.config import BASE_DIR, LOGS_DIR, SCRIPTS_DIR, LIVE_TUNE +from Project.Sanad.core.config_loader import section as _cfg_section +from Project.Sanad.core.logger import get_logger + +log = get_logger("gemini_subprocess") + +_LS_CFG = _cfg_section("gemini", "subprocess") + +# Camera frame forwarding — push the latest JPEG to the child over stdin +# at this interval (seconds). 0.5 s ≈ 2 fps, matching the child's +# SANAD_VISION_SEND_HZ default. The child de-stales + relays to Gemini. +_FRAME_FORWARD_INTERVAL_S = float(_LS_CFG.get("frame_forward_interval_sec", 0.5)) + +# Audio profile watcher — poll pactl for the Anker USB device at this +# interval, send "profile:" to the child on every state change. +_AUDIO_WATCH_INTERVAL_S = float(_LS_CFG.get("audio_watch_interval_sec", 1.5)) + +# The Anker profile id, as defined in voice/audio_devices.py. When this +# profile is fully plugged (both sink + source present), we switch the +# child to "anker"; otherwise we hold the boot fallback profile. +_ANKER_PROFILE_ID = "anker_powerconf" + +def _resolve_live_script() -> Path: + """Locate the voice script to run as subprocess. + + Default: voice/sanad_voice.py (the canonical G1 built-in mic + + AudioClient speaker path). Override with SANAD_LIVE_SCRIPT. + """ + override = os.environ.get("SANAD_LIVE_SCRIPT", "").strip() + if override: + p = Path(override).expanduser() + if p.exists(): + return p + for c in (BASE_DIR / "voice" / "sanad_voice.py", + SCRIPTS_DIR / "sanad_voice.py"): + if c.exists(): + return c + return SCRIPTS_DIR / "sanad_voice.py" + + +LIVE_SCRIPT = _resolve_live_script() +LOG_TAIL_SIZE = _LS_CFG.get("log_tail_size", 2000) +TRANSCRIPT_TAIL_SIZE = _LS_CFG.get("transcript_tail_size", 30) + +# Persistent on-disk log for the full subprocess session. +LIVE_LOG_DIR = LOGS_DIR +LIVE_LOG_NAME = _LS_CFG.get("log_name", "gemini_subprocess") + +_STOP_TIMEOUT_SEC = _LS_CFG.get("stop_timeout_sec", 3.0) +_TERMINATE_TIMEOUT_SEC = _LS_CFG.get("terminate_timeout_sec", 2.0) + +_NOISY_PREFIXES = tuple(_LS_CFG.get("noisy_prefixes", [ + "ALSA lib ", "Expression 'alsa_", "Cannot connect to server socket", + "jack server is not running", +])) +_NOISY_FRAGMENTS = tuple(_LS_CFG.get("noisy_fragments", [ + "Unknown PCM", "Evaluate error", "snd_pcm_open_noupdate", + "PaAlsaStream", "snd_config_evaluate", "snd_func_refer", +])) + + +class GeminiSubprocess: + def __init__(self): + self._lock = threading.Lock() + self.process: subprocess.Popen | None = None + self.log_tail: deque[str] = deque(maxlen=LOG_TAIL_SIZE) + self.user_transcript: deque[str] = deque(maxlen=TRANSCRIPT_TAIL_SIZE) + # Gemini's OWN spoken text (output transcription). The movement + # dispatcher (N2) polls this the way LiveVoiceLoop polls + # user_transcript — it reads what Gemini *said* and fires motion on + # a confirmation-phrase match (the Marcus pattern). Also handy for + # surfacing the bot side of the conversation on the dashboard. + self.bot_transcript: deque[str] = deque(maxlen=TRANSCRIPT_TAIL_SIZE) + # N2 Phase 3 — callbacks fired with each new BOT: line (Gemini's own + # spoken text). The MovementDispatcher registers here to drive + # locomotion off Gemini's confirmation phrases. Fired on the reader + # thread; callbacks must be cheap / non-blocking (the dispatcher just + # enqueues to its own worker). + self._bot_callbacks: list = [] + # _track_line (which runs under self._lock) stashes the latest BOT text + # here; the reader loop fires callbacks AFTER releasing the lock so a + # slow callback (e.g. movement dispatch reading state) never stalls the + # reader thread or blocks log parsing. + self._pending_bot: str | None = None + self._reader_thread: threading.Thread | None = None + self._log_file = None # opened per-session in _reader_loop + self.state = "stopped" + self.state_message = "Idle." + self.last_user_text = "" + self.last_bot_text = "" + self.suppressed_noise = 0 + # ── stdin push channel (camera frames + motion state + profile) ── + # The child (gemini/script.py) reads "frame:\n", + # "state:\n", and "profile:\n" lines off its stdin. + # Writes are serialised because the frame forwarder, motion-state + # bus handler, and audio watcher all call from different threads. + self._stdin_lock = threading.Lock() + self._camera = None # set via attach_camera() + self._frame_thread: threading.Thread | None = None + self._frame_stop = threading.Event() + # ── audio profile hot-swap ──────────────────────────────── + # _audio_mgr is the parent's AudioManager — needed so we can keep + # PulseAudio defaults in sync (so /api/records/play etc. follow + # the same device the live session uses). Set via attach_audio_manager. + self._audio_mgr = None + self._audio_thread: threading.Thread | None = None + self._audio_stop = threading.Event() + # The boot profile captured at start() — what we revert to when + # the Anker is unplugged. Read from env (already in LIVE_TUNE). + self._boot_profile_id: str = "builtin" + # Last profile signalled to the child (for edge-only dispatch). + self._last_profile_id: str | None = None + + # ── camera attach (called once from main.py) ────────────── + + def register_bot_callback(self, callback) -> None: + """Register a fn(text) fired on each new BOT: line (Gemini's spoken + text). Used by the N2 movement dispatcher. Cheap/non-blocking only.""" + if callback not in self._bot_callbacks: + self._bot_callbacks.append(callback) + + def attach_camera(self, camera) -> None: + """Give the supervisor a reference to the CameraDaemon so it can + forward frames to the child over stdin while a session runs.""" + self._camera = camera + + def attach_audio_manager(self, audio_mgr) -> None: + """Hand the parent's AudioManager to the supervisor so the audio + watcher can keep PulseAudio defaults in sync on every swap (so + typed-replay / record playback follow the same device as the live + Gemini session).""" + self._audio_mgr = audio_mgr + + def _open_session_log(self, pid: int): + """Open (or re-open) the per-day append log file for this session.""" + try: + LIVE_LOG_DIR.mkdir(parents=True, exist_ok=True) + fname = f"{LIVE_LOG_NAME}_{datetime.now().strftime('%Y%m%d')}.log" + fh = open(LIVE_LOG_DIR / fname, "a", encoding="utf-8", buffering=1) + fh.write( + f"\n===== live_gemini subprocess start " + f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} pid={pid} =====\n" + ) + return fh + except Exception as exc: + log.warning("Could not open live-gemini log file: %s", exc) + return None + + def _is_noisy(self, line: str) -> bool: + return line.startswith(_NOISY_PREFIXES) or any(f in line for f in _NOISY_FRAGMENTS) + + def _set_state(self, state: str, msg: str): + self.state = state + self.state_message = msg + + def _track_line(self, line: str): + """Parse Gemini-specific log markers emitted by `gemini/script.py`. + + Must stay in lock-step with the `log.info(...)` strings in + `GeminiBrain`. If you add a new state, add the emit in the brain + AND the matching detector here — in one PR. + """ + if "connecting to Gemini" in line: + self._set_state("connecting", line) + elif "connected — speak anytime" in line or "connected - speak anytime" in line: + self._set_state("listening", "Listening for speech.") + elif " USER: " in line or line.strip().startswith("USER:"): + # GeminiBrain emits: log.info("USER: %s", text) + text = line.split("USER:", 1)[1].strip() + if text: + self.last_user_text = text + self.user_transcript.append(text) + self._set_state("hearing", f"User: {text}") + elif " BOT: " in line or line.strip().startswith("BOT:"): + # GeminiBrain emits: log.info("BOT: %s", text) — Gemini's own + # spoken text. The movement dispatcher (N2) reads this deque to + # match confirmation phrases. Deliberately does NOT change the + # session state (that stays driven by USER / listening markers). + # NOTE: must precede the generic "listening" catch below, else a + # bot line that happens to contain "listening" would be misrouted. + text = line.split("BOT:", 1)[1].strip() + if text: + self.last_bot_text = text + self.bot_transcript.append(text) + # Defer callback firing to the reader loop, OUTSIDE self._lock. + self._pending_bot = text + elif "BARGE-IN" in line or "Gemini interrupted" in line or "interrupt (" in line: + self._set_state("interrupting", line) + elif "listening" in line.lower() and "no speech" not in line: + # Fires on "listening" (post-turn) — keep the state fresh. + self._set_state("listening", "Listening for speech.") + elif "session error" in line or "client recreation failed" in line: + self._set_state("error", line) + elif "server going away" in line or "session ended" in line or "session dead" in line: + self._set_state("warning", line) + elif "keyboard interrupt" in line or "cancelled — stopping" in line: + self._set_state("stopped", line) + + def _reader_loop(self): + proc = self.process + if proc is None or proc.stdout is None: + return + # Every line goes to the on-disk log — including the ALSA noise + # that we filter out of the in-memory tail. That way a field + # post-mortem has the full raw capture if we need it. + fh = self._open_session_log(proc.pid) + self._log_file = fh + for line in proc.stdout: + clean = line.rstrip() + if not clean: + continue + if fh is not None: + try: + fh.write(clean + "\n") + except Exception: + pass + fired_bot = None + with self._lock: + if self._is_noisy(clean): + self.suppressed_noise += 1 + continue + self.log_tail.append(clean) + self._track_line(clean) + fired_bot = self._pending_bot + self._pending_bot = None + # Fire BOT-text callbacks (movement dispatch) OUTSIDE the lock so a + # slow callback can't stall transcript parsing. + if fired_bot is not None: + for cb in self._bot_callbacks: + try: + cb(fired_bot) + except Exception: + log.exception("bot-text callback failed") + with self._lock: + self.log_tail.append("Live Gemini process exited.") + self._set_state("stopped", "Process exited.") + if fh is not None: + try: + fh.write( + f"===== live_gemini subprocess exit " + f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} =====\n" + ) + fh.close() + except Exception: + pass + self._log_file = None + + def is_running(self) -> bool: + with self._lock: + return self.process is not None and self.process.poll() is None + + def start(self) -> dict[str, Any]: + with self._lock: + if self.process is not None and self.process.poll() is None: + return {"started": False, "message": "Already running.", "pid": self.process.pid} + self._set_state("starting", "Starting...") + + script = LIVE_SCRIPT + if not script.exists(): + raise RuntimeError(f"Script not found: {script}") + + env = os.environ.copy() + env.update({"PYTHONUNBUFFERED": "1", **LIVE_TUNE}) + + # Pass the current G1 speaker volume as an env var so the + # subprocess can compute the correct barge-in threshold at + # startup. Without this, sanad_voice.py would read the volume + # from a stale or non-existent config file path and default to + # 100, scaling the barge-in threshold wrong for any non-100% + # volume. load_config() reads data/motions/config.json — the + # file the dashboard writes to when the user moves the slider. + try: + from Project.Sanad.config import load_config + _cfg = load_config() or {} + _audio_cfg = _cfg.get("audio") if isinstance(_cfg.get("audio"), dict) else {} + _g1_vol = int(_audio_cfg.get("g1_volume", 100)) + _g1_vol = max(0, min(100, _g1_vol)) + env["SANAD_G1_VOLUME"] = str(_g1_vol) + log.info("Passing SANAD_G1_VOLUME=%d to subprocess", _g1_vol) + except Exception as exc: + log.warning("Could not read g1_volume for subprocess: %s", exc) + + # sanad_voice.py takes the DDS interface as the first positional arg + dds_iface = env.get("SANAD_DDS_INTERFACE", "eth0") + cmd = [sys.executable, str(script), dds_iface] + proc = subprocess.Popen( + cmd, + cwd=str(script.parent), + stdin=subprocess.PIPE, # camera frames + motion state push + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + bufsize=1, + env=env, + ) + + # Reap any stale frame forwarder / audio watcher from a previous + # session that ended by a child crash rather than a clean stop() — + # otherwise they'd keep spinning and we'd leak threads per restart. + for stale, stop_evt in ( + (self._frame_thread, self._frame_stop), + (self._audio_thread, self._audio_stop), + ): + if stale is not None and stale.is_alive(): + stop_evt.set() + stale.join(timeout=2.0) + + # Capture the boot profile for this session — the audio watcher + # uses it as the fallback when the Anker is unplugged. Env var is + # already in LIVE_TUNE so parent + child agree. + self._boot_profile_id = os.environ.get( + "SANAD_AUDIO_PROFILE", "builtin").strip().lower() + self._last_profile_id = None # force one initial send_profile + + with self._lock: + self.process = proc + self.log_tail.append(f"Started: pid={proc.pid}") + self._set_state("starting", f"pid={proc.pid}") + self._reader_thread = threading.Thread(target=self._reader_loop, daemon=True) + self._reader_thread.start() + # Frame forwarder — pushes camera JPEGs to the child over stdin. + self._frame_stop.clear() + self._frame_thread = threading.Thread( + target=self._frame_forwarder, daemon=True, name="gemini-frame-fwd", + ) + self._frame_thread.start() + # Audio watcher — polls pactl for Anker presence and signals + # the child to hot-swap mic+speaker when it changes. + self._audio_stop.clear() + self._audio_thread = threading.Thread( + target=self._audio_watcher, daemon=True, name="gemini-audio-watcher", + ) + self._audio_thread.start() + + log.info("Live Gemini subprocess started: pid=%d", proc.pid) + return {"started": True, "pid": proc.pid} + + # ── stdin push channel ──────────────────────────────────── + + def _send_stdin(self, line: str) -> None: + """Serialised stdin write — frame forwarder + motion-state handler + both call this from different threads. Best-effort: a closed pipe + or a not-yet-started process is a silent no-op.""" + proc = self.process + if proc is None or proc.stdin is None: + return + try: + with self._stdin_lock: + if not proc.stdin.closed: + proc.stdin.write(line) + proc.stdin.flush() + except Exception: + # Pipe broke (child exited) — drop silently; the reader thread + # will surface the exit via state="stopped". + pass + + def send_frame(self, jpeg: Union[bytes, str]) -> None: + """Forward one camera frame to the child as 'frame:\\n'. + + Accepts raw JPEG bytes (base64-encoded here) or an already-base64 + ASCII string (e.g. CameraDaemon.get_frame_b64() — no re-encode).""" + if isinstance(jpeg, bytes): + b64 = base64.b64encode(jpeg).decode("ascii") + elif isinstance(jpeg, str): + b64 = jpeg.strip() + else: + return + if b64: + self._send_stdin("frame:" + b64 + "\n") + + def send_state(self, event: str, cmd: str, + elapsed_sec: Optional[float] = None, + reason: Optional[str] = None) -> None: + """Push a motion-state update to the child as 'state:\\n'. + + Events: start | complete | interrupted | error. The child injects + '[STATE-...] ' into the live Gemini session as silent text + context so Gemini can answer "what are you doing?" honestly.""" + if not event or not cmd: + return + payload: dict[str, Any] = {"event": event, "cmd": cmd} + if elapsed_sec is not None: + payload["elapsed_sec"] = round(float(elapsed_sec), 2) + if reason: + payload["reason"] = str(reason)[:200] + try: + line = "state:" + json.dumps(payload, ensure_ascii=False) + "\n" + except Exception: + return + self._send_stdin(line) + + def _frame_forwarder(self) -> None: + """Background thread — push the camera's latest frame to the child. + + Runs for the lifetime of one subprocess session. Gated on the + camera actually running; the child does its own vision-enabled + + staleness checks, so this stays dumb (camera up → push).""" + cam = self._camera + if cam is None: + return + while not self._frame_stop.is_set(): + if self._frame_stop.wait(_FRAME_FORWARD_INTERVAL_S): + break + try: + if not cam.is_running(): + continue + b64 = cam.get_frame_b64() + if b64: + self.send_frame(b64) + except Exception: + # Best-effort — never let a frame hiccup kill the thread. + pass + + # ── audio profile watcher (parent-side detection) ──────────── + + def send_profile(self, profile_id: str, reason: str = "") -> None: + """Push an audio-profile hot-swap command to the child as + 'profile:\\n'. The child's _stdin_watcher parses it and + _audio_swap_loop performs the actual mic/speaker rebind. No-op + if the process isn't running or stdin is closed.""" + pid = (profile_id or "").strip().lower() + if pid not in {"builtin", "anker", "hollyland_builtin"}: + log.warning("send_profile: ignoring unknown profile %r", profile_id) + return + payload: dict[str, Any] = {"id": pid} + if reason: + payload["reason"] = reason[:120] + try: + line = "profile:" + json.dumps(payload, ensure_ascii=False) + "\n" + except Exception: + return + self._send_stdin(line) + + def _audio_watcher(self) -> None: + """Background thread — poll pactl for the Anker USB device, signal + the child on every plug/unplug edge transition. + + Detection reuses voice.audio_devices.detect_plugged_profiles() which + already shells to `pactl list short` and matches against the same + `powerconf,anker` substring AnkerMic uses. Zero new deps. + + Edge-only dispatch: we only call send_profile() when the target + flips. Rapid bounce (loose cable) is naturally rate-limited by the + poll interval. After every send_profile we also refresh the parent + audio_manager's PulseAudio defaults so non-live playback (typed + replay, record playback) follows the same device. + """ + # Lazy import — voice.audio_devices is imported at module load to + # check pactl availability without polluting our top-level imports. + try: + from Project.Sanad.voice import audio_devices as _ad + except Exception as exc: + log.warning("audio watcher disabled — audio_devices import failed: %s", exc) + return + try: + if not _ad.pactl_available(): + log.warning("audio watcher disabled — pactl not available") + return + except Exception: + # If pactl_available itself isn't exposed, fall through and try + # detect_plugged_profiles — it'll raise/return empty if pactl + # is missing and we handle that below. + pass + + boot_profile = self._boot_profile_id or "builtin" + log.info("audio watcher started — Anker→anker, no-Anker→%s (poll=%.1fs)", + boot_profile, _AUDIO_WATCH_INTERVAL_S) + + while not self._audio_stop.is_set(): + if self._audio_stop.wait(_AUDIO_WATCH_INTERVAL_S): + break + try: + # Recovery script (set_powerconf_audio.sh) is intentionally + # NOT invoked from the watcher — its old card-discovery / + # module-alsa-source attempts loaded the wrong hw device + # on this Jetson and knocked the Anker out of pactl + # entirely (observed 2026-06-03). The script is now a + # passive set-default-sink/source helper meant to be run + # by hand, not from the watcher. The watcher just detects + # plug edges and dispatches profile changes to the child. + plugged = _ad.detect_plugged_profiles() + ids = {p.get("profile", {}).get("id") for p in (plugged or [])} + anker_present = _ANKER_PROFILE_ID in ids + target = "anker" if anker_present else boot_profile + reason = "anker plugged" if anker_present else "anker unplugged" + # Surface which detection path succeeded (Path A vs pactl) + if anker_present: + for p in plugged: + if p.get("profile", {}).get("id") == _ANKER_PROFILE_ID: + via = p.get("source_via", "pactl") + if via != "pactl": + reason += f" via {via}" + break + + if target == self._last_profile_id: + continue # edge-only + + prev = self._last_profile_id + log.info("audio watcher: %s → %s (%s)", + prev or "—", target, reason) + self.send_profile(target, reason=reason) + self._last_profile_id = target + + # Keep PulseAudio defaults aligned so non-live playback + # follows the same device the live session uses. + if self._audio_mgr is not None: + try: + self._audio_mgr.refresh_devices() + except Exception as exc: + log.warning("audio watcher: refresh_devices failed: %s", exc) + except Exception as exc: + # Never let a transient pactl glitch kill the thread. + log.warning("audio watcher iteration failed: %s", exc) + + def stop(self) -> dict[str, Any]: + with self._lock: + proc = self.process + if proc is None or proc.poll() is not None: + return {"stopped": False, "message": "Not running."} + self._set_state("stopping", "Stopping...") + + # Halt forwarder + audio watcher before we tear the pipe down. + self._frame_stop.set() + self._audio_stop.set() + ft = self._frame_thread + if ft is not None: + ft.join(timeout=2.0) + self._frame_thread = None + at = self._audio_thread + if at is not None: + at.join(timeout=2.0) + self._audio_thread = None + + try: + proc.send_signal(signal.SIGINT) + proc.wait(timeout=_STOP_TIMEOUT_SEC) + except subprocess.TimeoutExpired: + proc.terminate() + try: + proc.wait(timeout=_TERMINATE_TIMEOUT_SEC) + except subprocess.TimeoutExpired: + proc.kill() + proc.wait(timeout=_TERMINATE_TIMEOUT_SEC) + + rc = proc.returncode + + # Close stdin/stdout explicitly — without this each start/stop + # cycle leaks FDs (relied on Popen.__del__ which only runs at GC; + # a reconnect loop would march the FD count to the OS limit). + for pipe in (getattr(proc, "stdin", None), getattr(proc, "stdout", None)): + if pipe is not None: + try: + pipe.close() + except Exception: + pass + + with self._lock: + self.process = None + self.log_tail.append("Stopped.") + self._set_state("stopped", "Stopped.") + + log.info("Live Gemini subprocess stopped (rc=%s)", rc) + return {"stopped": True, "returncode": rc} + + def status(self) -> dict[str, Any]: + with self._lock: + running = self.process is not None and self.process.poll() is None + return { + "running": running, + "pid": self.process.pid if running and self.process else None, + "state": self.state, + "state_message": self.state_message, + "last_user_text": self.last_user_text, + "last_bot_text": self.last_bot_text, + "user_transcript": list(self.user_transcript), + "bot_transcript": list(self.bot_transcript), + "log_tail": list(self.log_tail), + "suppressed_noise": self.suppressed_noise, + } diff --git a/vendor/Sanad/local/__init__.py b/vendor/Sanad/local/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/vendor/Sanad/local/llm.py b/vendor/Sanad/local/llm.py new file mode 100644 index 0000000..d3f75d2 --- /dev/null +++ b/vendor/Sanad/local/llm.py @@ -0,0 +1,305 @@ +"""LLM layer — Qwen 2.5 Instruct via Ollama (default) or self-managed llama.cpp. + +Phase 3 of the local pipeline. Two backends, selectable via +`config/local_config.json > llm.backend`: + + "ollama" — talk to a running `ollama serve` daemon (default). + No subprocess management, no CUDA build. Just: + ollama pull qwen2.5:1.5b + # daemon usually auto-starts; if not: `ollama serve &` + + "llama_cpp" — launch our own `llama-server` subprocess. Requires + a CUDA build of llama.cpp and a GGUF file at + `model/local/`. + +Both backends stream tokens and chunk them on sentence delimiters so +the TTS can start synthesising before the LLM finishes. +""" + +from __future__ import annotations + +import asyncio +import json +import shutil +import subprocess +import time +from typing import AsyncIterator, Optional + +from Project.Sanad.config import MODEL_DIR +from Project.Sanad.core.config_loader import section as _cfg_section +from Project.Sanad.core.logger import get_logger + +log = get_logger("local_llm") +_CFG = _cfg_section("local", "llm") + +BACKEND = (_CFG.get("backend") or "ollama").strip().lower() + +# Ollama +OLLAMA_HOST = _CFG.get("ollama_host", "127.0.0.1") +OLLAMA_PORT = int(_CFG.get("ollama_port", 11434)) +OLLAMA_MODEL = _CFG.get("ollama_model", "qwen2.5:1.5b") +OLLAMA_KEEP_ALIVE = _CFG.get("ollama_keep_alive", "5m") + +# llama.cpp +MODEL_SUBDIR = _CFG.get("model_subdir", "qwen2.5-1.5b-instruct-q4_k_m.gguf") +SERVER_BIN = _CFG.get("server_binary", "llama-server") +HOST = _CFG.get("host", "127.0.0.1") +PORT = int(_CFG.get("port", 8080)) +N_GPU_LAYERS = _CFG.get("n_gpu_layers", 99) +CTX_SIZE = _CFG.get("ctx_size", 2048) +THREADS = _CFG.get("threads", 4) +STARTUP_TIMEOUT = _CFG.get("startup_timeout_sec", 30) + +# Shared generation params +REQUEST_TIMEOUT = _CFG.get("request_timeout_sec", 30) +MAX_TOKENS = _CFG.get("max_tokens", 200) +TEMPERATURE = _CFG.get("temperature", 0.7) +TOP_P = _CFG.get("top_p", 0.9) +STOP_SEQS = list(_CFG.get("stop", ["<|im_end|>"])) +CHUNK_DELIMS = _CFG.get("chunk_delimiters", ".,?!؟،") +CHUNK_MIN_CHARS = int(_CFG.get("chunk_min_chars", 8)) + +LOCAL_MODEL_PATH = MODEL_DIR / "local" / MODEL_SUBDIR + + +class LlamaServer: + """Thin wrapper — owns subprocess (llama.cpp) or no-op (ollama).""" + + def __init__(self) -> None: + self._proc: Optional[subprocess.Popen] = None + + # ─── lifecycle ──────────────────────────────────────── + + def start(self) -> None: + if BACKEND == "ollama": + self._check_ollama() + log.info("LLM backend=ollama model=%s (@ %s:%d)", + OLLAMA_MODEL, OLLAMA_HOST, OLLAMA_PORT) + return + if BACKEND == "llama_cpp": + self._start_llama_cpp() + return + raise RuntimeError(f"unknown llm.backend: {BACKEND!r}") + + def stop(self) -> None: + if self._proc is None: + return + try: + self._proc.terminate() + self._proc.wait(timeout=3) + except subprocess.TimeoutExpired: + self._proc.kill() + self._proc.wait(timeout=2) + except Exception as exc: + log.warning("llama-server stop error: %s", exc) + self._proc = None + + def alive(self) -> bool: + if BACKEND == "ollama": + return self._ping_ollama() + return self._proc is not None and self._proc.poll() is None + + # ─── Ollama backend ─────────────────────────────────── + + def _check_ollama(self) -> None: + """Verify the Ollama daemon is running + the model is pulled.""" + import urllib.request + tags_url = f"http://{OLLAMA_HOST}:{OLLAMA_PORT}/api/tags" + try: + with urllib.request.urlopen(tags_url, timeout=3) as r: + body = json.loads(r.read().decode("utf-8")) + except Exception as exc: + raise RuntimeError( + f"Ollama daemon not reachable at {tags_url} — is `ollama serve` running? ({exc})" + ) + models = [m.get("name", "") for m in body.get("models", [])] + if not any(OLLAMA_MODEL in m for m in models): + raise RuntimeError( + f"Ollama model {OLLAMA_MODEL!r} not pulled. " + f"Run: `ollama pull {OLLAMA_MODEL}`. Available: {models}" + ) + + def _ping_ollama(self) -> bool: + import urllib.request + try: + with urllib.request.urlopen( + f"http://{OLLAMA_HOST}:{OLLAMA_PORT}/api/tags", timeout=1, + ) as r: + return r.status == 200 + except Exception: + return False + + async def _stream_ollama(self, user_text: str, system_prompt: str, + cancel: asyncio.Event) -> AsyncIterator[str]: + import aiohttp + url = f"http://{OLLAMA_HOST}:{OLLAMA_PORT}/api/generate" + payload = { + "model": OLLAMA_MODEL, + "system": system_prompt, + "prompt": user_text, + "stream": True, + "keep_alive": OLLAMA_KEEP_ALIVE, + "options": { + "num_predict": MAX_TOKENS, + "temperature": TEMPERATURE, + "top_p": TOP_P, + "stop": STOP_SEQS, + }, + } + buf = "" + async with aiohttp.ClientSession() as sess: + try: + async with sess.post( + url, json=payload, + timeout=aiohttp.ClientTimeout(total=REQUEST_TIMEOUT)) as resp: + async for raw in resp.content: + if cancel.is_set(): + log.info("LLM stream cancelled (barge-in)") + return + line = raw.decode("utf-8", errors="ignore").strip() + if not line: + continue + try: + obj = json.loads(line) + except json.JSONDecodeError: + continue + token = obj.get("response", "") + if token: + buf += token + if len(buf) >= CHUNK_MIN_CHARS and buf[-1] in CHUNK_DELIMS: + yield buf.strip() + buf = "" + if obj.get("done"): + break + except asyncio.CancelledError: + return + except Exception as exc: + log.warning("Ollama stream error: %s", exc) + return + if buf.strip(): + yield buf.strip() + + # ─── llama.cpp backend ──────────────────────────────── + + def _start_llama_cpp(self) -> None: + if self._proc is not None and self._proc.poll() is None: + return + if not LOCAL_MODEL_PATH.exists(): + raise RuntimeError(f"LLM model not found at {LOCAL_MODEL_PATH}") + bin_path = shutil.which(SERVER_BIN) or SERVER_BIN + cmd = [ + bin_path, + "-m", str(LOCAL_MODEL_PATH), + "--host", HOST, + "--port", str(PORT), + "--n-gpu-layers", str(N_GPU_LAYERS), + "--ctx-size", str(CTX_SIZE), + "--threads", str(THREADS), + "--log-disable", + ] + log.info("launching llama-server: %s", " ".join(cmd)) + self._proc = subprocess.Popen( + cmd, + stdout=subprocess.DEVNULL, + stderr=subprocess.PIPE, + text=True, + ) + self._wait_llama_cpp_ready() + log.info("llama-server ready (pid=%d)", self._proc.pid) + + def _wait_llama_cpp_ready(self) -> None: + import urllib.request + deadline = time.time() + STARTUP_TIMEOUT + url = f"http://{HOST}:{PORT}/health" + while time.time() < deadline: + if self._proc and self._proc.poll() is not None: + stderr = self._proc.stderr.read() if self._proc.stderr else "" + raise RuntimeError( + f"llama-server exited early (code={self._proc.returncode}): {stderr[:500]}" + ) + try: + with urllib.request.urlopen(url, timeout=1) as r: + if r.status == 200: + return + except Exception: + time.sleep(0.3) + raise RuntimeError(f"llama-server did not come up within {STARTUP_TIMEOUT}s") + + async def _stream_llama_cpp(self, user_text: str, system_prompt: str, + cancel: asyncio.Event) -> AsyncIterator[str]: + import aiohttp + prompt = self._format_chatml_prompt(user_text, system_prompt) + payload = { + "prompt": prompt, + "stream": True, + "n_predict": MAX_TOKENS, + "temperature": TEMPERATURE, + "top_p": TOP_P, + "stop": STOP_SEQS, + "cache_prompt": True, + } + url = f"http://{HOST}:{PORT}/completion" + buf = "" + async with aiohttp.ClientSession() as sess: + try: + async with sess.post( + url, json=payload, + timeout=aiohttp.ClientTimeout(total=REQUEST_TIMEOUT)) as resp: + async for raw in resp.content: + if cancel.is_set(): + log.info("LLM stream cancelled (barge-in)") + return + line = raw.decode("utf-8", errors="ignore").strip() + if not line.startswith("data:"): + continue + line = line[len("data:"):].strip() + if not line or line == "[DONE]": + continue + try: + obj = json.loads(line) + except json.JSONDecodeError: + continue + token = obj.get("content", "") + if not token: + if obj.get("stop"): + break + continue + buf += token + if len(buf) >= CHUNK_MIN_CHARS and buf[-1] in CHUNK_DELIMS: + yield buf.strip() + buf = "" + except asyncio.CancelledError: + return + except Exception as exc: + log.warning("llama-server stream error: %s", exc) + return + if buf.strip(): + yield buf.strip() + + @staticmethod + def _format_chatml_prompt(user_text: str, system_prompt: str) -> str: + return ( + f"<|im_start|>system\n{system_prompt}<|im_end|>\n" + f"<|im_start|>user\n{user_text}<|im_end|>\n" + f"<|im_start|>assistant\n" + ) + + # ─── public streaming entry point ───────────────────── + + async def stream(self, user_text: str, system_prompt: str, + cancel: asyncio.Event) -> AsyncIterator[str]: + """Yield sentence-sized text chunks as the LLM generates. + + Chunk boundaries: any char in `CHUNK_DELIMS` AND buffer length + ≥ `CHUNK_MIN_CHARS`. The final buffer is flushed on completion + even without a delimiter. If `cancel` is set, the request is + aborted and the generator returns. + """ + if BACKEND == "ollama": + async for chunk in self._stream_ollama(user_text, system_prompt, cancel): + yield chunk + elif BACKEND == "llama_cpp": + async for chunk in self._stream_llama_cpp(user_text, system_prompt, cancel): + yield chunk + else: + raise RuntimeError(f"unknown llm.backend: {BACKEND!r}") diff --git a/vendor/Sanad/local/script.py b/vendor/Sanad/local/script.py new file mode 100644 index 0000000..76cef27 --- /dev/null +++ b/vendor/Sanad/local/script.py @@ -0,0 +1,259 @@ +"""LocalBrain — fully on-device voice pipeline. + +Implements the same contract as `gemini/script.py:GeminiBrain` so +`voice/sanad_voice.py` can swap it in via `SANAD_VOICE_BRAIN=local`. +Wires together four subsystems: + + Phase 1 — Silero VAD (mic → speech boundaries) + Phase 2 — faster-whisper (speech → text) + Phase 3 — llama.cpp + Qwen (text → streaming text chunks) + Phase 4 — CosyVoice2 streaming (text chunk → cloned-voice audio) + Phase 5 — barge-in (user speaks → cancel LLM + stop speaker) + Phase 6 — stability — model load fails cleanly, crashes are logged. + +Async structure: + run() is the main coroutine. It spawns three tasks: + _mic_task — reads mic, VAD, Whisper, pushes user text to _llm_queue + _dialogue_task — pops user text, streams LLM tokens into _tts_queue + _tts_task — pops text chunks, synthesises, feeds the speaker + +Logging contract (matched by local/subprocess.py._track_line): + "connecting to local pipeline" + "listening" + "USER: " + "BOT: " + "BARGE-IN (local)" + "session error: " +""" + +from __future__ import annotations + +import asyncio +import time +from typing import Optional + +from Project.Sanad.core.config_loader import section as _cfg_section +from Project.Sanad.core.logger import get_logger + +from Project.Sanad.local.llm import LlamaServer +from Project.Sanad.local.stt import WhisperSTT +from Project.Sanad.local.tts import CosyVoiceTTS +from Project.Sanad.local.vad import SileroVAD, FRAME_SAMPLES + +log = get_logger("local_brain") + +_CFG_SV = _cfg_section("voice", "sanad_voice") +_CHUNK_BYTES = FRAME_SAMPLES * 2 # int16 mono + + +class LocalBrain: + """Fully on-device Gemini replacement.""" + + def __init__(self, audio_io, recorder, voice_name: Optional[str] = None, + system_prompt: str = ""): + self._audio = audio_io + self._mic = audio_io.mic + self._speaker = audio_io.speaker + self._recorder = recorder + self._voice = voice_name + self._system_prompt = system_prompt + + # subsystems — instantiated here, loaded in run() + self._vad = SileroVAD() + self._stt = WhisperSTT() + self._llm = LlamaServer() + self._tts = CosyVoiceTTS() + + # pipeline queues + self._llm_queue: asyncio.Queue[str] = asyncio.Queue(maxsize=4) + self._tts_queue: asyncio.Queue[str] = asyncio.Queue(maxsize=4) + + # control flags + self._stop_flag = asyncio.Event() # full shutdown + self._interrupt = asyncio.Event() # per-turn barge-in + self._speaking = False + self._speak_start_time = 0.0 + + # ─── lifecycle ──────────────────────────────────────── + + def stop(self) -> None: + self._stop_flag.set() + self._interrupt.set() + + async def run(self) -> None: + """Main entry. Loads models, runs pipeline, handles shutdown.""" + log.info("connecting to local pipeline") + try: + await asyncio.to_thread(self._vad.start) + await asyncio.to_thread(self._stt.start) + await asyncio.to_thread(self._llm.start) + await asyncio.to_thread(self._tts.start) + except Exception as exc: + log.error("session error: local pipeline startup failed — %s", exc) + return + + log.info("listening") + try: + await asyncio.gather( + self._mic_task(), + self._dialogue_task(), + self._tts_task(), + ) + except asyncio.CancelledError: + log.info("cancelled — stopping") + except Exception as exc: + log.error("session error: %s", exc) + finally: + try: + self._llm.stop() + except Exception: + log.warning("LlamaServer.stop failed", exc_info=True) + self._tts.stop() + self._stt.stop() + self._vad.stop() + log.info("local pipeline stopped") + + # ─── barge-in ───────────────────────────────────────── + + def _begin_barge_in(self) -> None: + """Called from mic task when user starts speaking while bot is.""" + if not self._speaking: + return + log.info("BARGE-IN (local)") + self._interrupt.set() + try: + self._speaker.stop() + except Exception: + log.warning("speaker.stop during barge-in failed", exc_info=True) + # drain pipelines — discard any pending LLM/TTS chunks for this turn + self._drain_queue(self._llm_queue) + self._drain_queue(self._tts_queue) + self._speaking = False + try: + self._recorder.finish_turn() + except Exception: + pass + + @staticmethod + def _drain_queue(q: asyncio.Queue) -> None: + try: + while True: + q.get_nowait() + q.task_done() + except asyncio.QueueEmpty: + pass + + # ─── Task 1: mic → VAD → Whisper → LLM queue ────────── + + async def _mic_task(self) -> None: + loop = asyncio.get_event_loop() + while not self._stop_flag.is_set(): + try: + pcm = await loop.run_in_executor( + None, self._mic.read_chunk, _CHUNK_BYTES, + ) + except Exception: + await asyncio.sleep(0.01) + continue + + event = self._vad.process(pcm) + if event == "speech_start": + # user started talking — if bot is speaking, it's a barge-in + if self._speaking: + self._begin_barge_in() + elif event == "speech_end": + utt = self._vad.collected_audio() + if not utt: + continue + try: + self._recorder.capture_user(utt) + except Exception: + pass + text = await loop.run_in_executor(None, self._stt.transcribe, utt) + if not text: + continue + log.info("USER: %s", text) + try: + self._recorder.add_user_text(text) + except Exception: + pass + # wake the LLM side — drop older pending item if full (latency > throughput) + if self._llm_queue.full(): + try: + self._llm_queue.get_nowait() + except asyncio.QueueEmpty: + pass + await self._llm_queue.put(text) + + # ─── Task 2: LLM streaming → TTS queue ──────────────── + + async def _dialogue_task(self) -> None: + while not self._stop_flag.is_set(): + try: + user_text = await asyncio.wait_for( + self._llm_queue.get(), timeout=0.2) + except asyncio.TimeoutError: + continue + self._interrupt.clear() + full_response = [] + async for chunk in self._llm.stream( + user_text, self._system_prompt, self._interrupt): + if self._interrupt.is_set(): + break + full_response.append(chunk) + await self._tts_queue.put(chunk) + self._llm_queue.task_done() + if full_response and not self._interrupt.is_set(): + bot_text = " ".join(full_response).strip() + if bot_text: + log.info("BOT: %s", bot_text) + try: + self._recorder.add_robot_text(bot_text) + except Exception: + pass + + # ─── Task 3: TTS → speaker ──────────────────────────── + + async def _tts_task(self) -> None: + loop = asyncio.get_event_loop() + while not self._stop_flag.is_set(): + try: + chunk_text = await asyncio.wait_for( + self._tts_queue.get(), timeout=0.2) + except asyncio.TimeoutError: + # idle — if we've been speaking and queue drained, close stream + if self._speaking and self._llm_queue.empty() and self._tts_queue.empty(): + await loop.run_in_executor(None, self._speaker.wait_finish) + self._speaking = False + log.info("listening") + try: + self._recorder.finish_turn() + except Exception: + pass + continue + if self._interrupt.is_set(): + self._tts_queue.task_done() + continue + + # synthesise this text chunk → stream to speaker + if not self._speaking: + await loop.run_in_executor(None, self._speaker.begin_stream) + self._speaking = True + self._speak_start_time = time.time() + + try: + for pcm in self._tts.synthesize_stream(chunk_text): + if self._interrupt.is_set(): + break + try: + self._recorder.capture_robot(pcm) + except Exception: + pass + await loop.run_in_executor( + None, self._speaker.send_chunk, + pcm, self._tts.output_rate, + ) + except Exception as exc: + log.warning("TTS chunk failed: %s", exc) + finally: + self._tts_queue.task_done() diff --git a/vendor/Sanad/local/stt.py b/vendor/Sanad/local/stt.py new file mode 100644 index 0000000..9f84fea --- /dev/null +++ b/vendor/Sanad/local/stt.py @@ -0,0 +1,96 @@ +"""faster-whisper Large V3 Turbo — GPU INT8 transcription. + +Phase 2 of the local pipeline. Given an utterance (int16 PCM bytes at +16 kHz), returns transcribed text. Short / empty / no-speech results are +filtered out per config thresholds to avoid firing phantom triggers. + +Install (on the robot, in the `local` env): + pip install faster-whisper==1.0.* + # model auto-downloads from HuggingFace on first `WhisperModel(...)` call, + # OR pre-download to model/local/faster-whisper-large-v3-turbo/ and point + # `local.stt.model_subdir` at it. +""" + +from __future__ import annotations + +from typing import Optional + +import numpy as np + +from Project.Sanad.config import MODEL_DIR +from Project.Sanad.core.config_loader import section as _cfg_section +from Project.Sanad.core.logger import get_logger + +log = get_logger("local_stt") +_CFG = _cfg_section("local", "stt") + +MODEL_NAME = _CFG.get("model_name", "large-v3-turbo") +MODEL_SUBDIR = _CFG.get("model_subdir", "faster-whisper-large-v3-turbo") +DEVICE = _CFG.get("device", "cuda") +COMPUTE_TYPE = _CFG.get("compute_type", "int8_float16") +BEAM_SIZE = _CFG.get("beam_size", 1) +LANGUAGE = _CFG.get("language") # None = auto-detect +VAD_FILTER = _CFG.get("vad_filter", False) +NO_SPEECH_THRESHOLD = _CFG.get("no_speech_threshold", 0.6) +MIN_CHARS = _CFG.get("min_utterance_chars", 2) +TEMPERATURE = _CFG.get("temperature", 0.0) + +LOCAL_MODEL_DIR = MODEL_DIR / "local" / MODEL_SUBDIR + + +class WhisperSTT: + """Thin wrapper around faster_whisper.WhisperModel.""" + + def __init__(self) -> None: + self._model = None + + def start(self) -> None: + """Load the model into VRAM. ~4 s on first call, 100 ms after.""" + try: + from faster_whisper import WhisperModel + except ImportError as exc: + raise RuntimeError( + f"WhisperSTT requires 'faster-whisper': {exc}" + ) + + model_src = str(LOCAL_MODEL_DIR) if LOCAL_MODEL_DIR.exists() else MODEL_NAME + log.info("loading Whisper: src=%s device=%s compute=%s", + model_src, DEVICE, COMPUTE_TYPE) + self._model = WhisperModel( + model_src, + device=DEVICE, + compute_type=COMPUTE_TYPE, + ) + log.info("WhisperSTT ready") + + def transcribe(self, pcm: bytes) -> str: + """Blocking transcription. Returns the full text or ''.""" + if self._model is None: + log.warning("WhisperSTT.transcribe called before start()") + return "" + if not pcm: + return "" + audio = np.frombuffer(pcm, dtype=np.int16).astype(np.float32) / 32768.0 + if audio.size == 0: + return "" + try: + segments, info = self._model.transcribe( + audio, + beam_size=BEAM_SIZE, + language=LANGUAGE, + vad_filter=VAD_FILTER, + no_speech_threshold=NO_SPEECH_THRESHOLD, + temperature=TEMPERATURE, + ) + text = " ".join(seg.text.strip() for seg in segments).strip() + except Exception as exc: + log.warning("Whisper transcribe failed: %s", exc) + return "" + + if len(text) < MIN_CHARS: + log.debug("drop short transcript: %r", text) + return "" + return text + + def stop(self) -> None: + self._model = None diff --git a/vendor/Sanad/local/subprocess.py b/vendor/Sanad/local/subprocess.py new file mode 100644 index 0000000..e43701d --- /dev/null +++ b/vendor/Sanad/local/subprocess.py @@ -0,0 +1,261 @@ +"""Local live subprocess supervisor. + +Spawns `voice/sanad_voice.py` as a managed child with +`SANAD_VOICE_BRAIN=local`, tails the child's stdout, and extracts state +transitions + user transcripts from the log markers emitted by +`local/script.py:LocalBrain`. + +Mirror of `gemini/subprocess.py`. Lives separately so the two supervisors +stay decoupled — adding a new model does not touch this file. +""" + +from __future__ import annotations + +import os +import signal +import subprocess +import sys +import threading +from collections import deque +from datetime import datetime +from pathlib import Path +from typing import Any + +from Project.Sanad.config import BASE_DIR, LOGS_DIR, SCRIPTS_DIR, LIVE_TUNE +from Project.Sanad.core.config_loader import section as _cfg_section +from Project.Sanad.core.logger import get_logger + +log = get_logger("local_subprocess") + +_LS_CFG = _cfg_section("local", "subprocess") + + +def _resolve_live_script() -> Path: + """Locate the voice script to run as subprocess (same as Gemini's).""" + override = os.environ.get("SANAD_LIVE_SCRIPT", "").strip() + if override: + p = Path(override).expanduser() + if p.exists(): + return p + for c in (BASE_DIR / "voice" / "sanad_voice.py", + SCRIPTS_DIR / "sanad_voice.py"): + if c.exists(): + return c + return SCRIPTS_DIR / "sanad_voice.py" + + +LIVE_SCRIPT = _resolve_live_script() +LOG_TAIL_SIZE = _LS_CFG.get("log_tail_size", 2000) +TRANSCRIPT_TAIL_SIZE = _LS_CFG.get("transcript_tail_size", 30) +LIVE_LOG_DIR = LOGS_DIR +LIVE_LOG_NAME = _LS_CFG.get("log_name", "local_subprocess") + +# Python binary for the child process. The local pipeline runs in a +# separate conda env (Python 3.8 + Jetson CUDA torch + CosyVoice/Whisper); +# the dashboard stays in gemini_sdk (Python 3.10). Override with +# SANAD_LOCAL_PYTHON env var at runtime. +LOCAL_PYTHON_BIN = os.environ.get( + "SANAD_LOCAL_PYTHON", + _LS_CFG.get("python_bin", sys.executable), +) + +_STOP_TIMEOUT_SEC = _LS_CFG.get("stop_timeout_sec", 5.0) +_TERMINATE_TIMEOUT_SEC = _LS_CFG.get("terminate_timeout_sec", 3.0) + +_NOISY_PREFIXES = tuple(_LS_CFG.get("noisy_prefixes", [ + "ALSA lib ", "Expression 'alsa_", "Cannot connect to server socket", + "jack server is not running", +])) +_NOISY_FRAGMENTS = tuple(_LS_CFG.get("noisy_fragments", [ + "Unknown PCM", "Evaluate error", "snd_pcm_open_noupdate", "PaAlsaStream", +])) + + +class LocalSubprocess: + def __init__(self): + self._lock = threading.Lock() + self.process: subprocess.Popen | None = None + self.log_tail: deque[str] = deque(maxlen=LOG_TAIL_SIZE) + self.user_transcript: deque[str] = deque(maxlen=TRANSCRIPT_TAIL_SIZE) + self._reader_thread: threading.Thread | None = None + self._log_file = None + self.state = "stopped" + self.state_message = "Idle." + self.last_user_text = "" + self.suppressed_noise = 0 + + # ─── log I/O ────────────────────────────────────────── + + def _open_session_log(self, pid: int): + try: + LIVE_LOG_DIR.mkdir(parents=True, exist_ok=True) + fname = f"{LIVE_LOG_NAME}_{datetime.now().strftime('%Y%m%d')}.log" + fh = open(LIVE_LOG_DIR / fname, "a", encoding="utf-8", buffering=1) + fh.write( + f"\n===== local subprocess start " + f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} pid={pid} =====\n" + ) + return fh + except Exception as exc: + log.warning("Could not open local subprocess log file: %s", exc) + return None + + def _is_noisy(self, line: str) -> bool: + return line.startswith(_NOISY_PREFIXES) or any(f in line for f in _NOISY_FRAGMENTS) + + def _set_state(self, state: str, msg: str): + self.state = state + self.state_message = msg + + def _track_line(self, line: str): + """Parse log markers emitted by `local/script.py:LocalBrain`. + + Must stay in lock-step with the `log.info(...)` strings there. + """ + if "connecting to local pipeline" in line: + self._set_state("connecting", line) + elif " USER: " in line or line.strip().startswith("USER:"): + text = line.split("USER:", 1)[1].strip() + if text: + self.last_user_text = text + self.user_transcript.append(text) + self._set_state("hearing", f"User: {text}") + elif " BOT: " in line or line.strip().startswith("BOT:"): + self._set_state("speaking", line.split("BOT:", 1)[1].strip()[:80]) + elif "BARGE-IN (local)" in line: + self._set_state("interrupting", line) + elif "session error" in line: + self._set_state("error", line) + elif "local pipeline stopped" in line or "cancelled — stopping" in line: + self._set_state("stopped", line) + elif "listening" in line.lower() and "no speech" not in line: + self._set_state("listening", "Listening for speech.") + + def _reader_loop(self): + proc = self.process + if proc is None or proc.stdout is None: + return + fh = self._open_session_log(proc.pid) + self._log_file = fh + for line in proc.stdout: + clean = line.rstrip() + if not clean: + continue + if fh is not None: + try: + fh.write(clean + "\n") + except Exception: + pass + with self._lock: + if self._is_noisy(clean): + self.suppressed_noise += 1 + continue + self.log_tail.append(clean) + self._track_line(clean) + with self._lock: + self.log_tail.append("Local pipeline process exited.") + self._set_state("stopped", "Process exited.") + if fh is not None: + try: + fh.write( + f"===== local subprocess exit " + f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} =====\n" + ) + fh.close() + except Exception: + pass + self._log_file = None + + # ─── lifecycle ──────────────────────────────────────── + + def is_running(self) -> bool: + with self._lock: + return self.process is not None and self.process.poll() is None + + def start(self) -> dict[str, Any]: + with self._lock: + if self.process is not None and self.process.poll() is None: + return {"started": False, "message": "Already running.", "pid": self.process.pid} + self._set_state("starting", "Starting local pipeline (loading models)...") + + script = LIVE_SCRIPT + if not script.exists(): + raise RuntimeError(f"Script not found: {script}") + + env = os.environ.copy() + env.update({ + "PYTHONUNBUFFERED": "1", + **LIVE_TUNE, + "SANAD_VOICE_BRAIN": "local", + }) + + dds_iface = env.get("SANAD_DDS_INTERFACE", "eth0") + # Use the `local` env's Python so CUDA torch + CosyVoice are available. + # Fall back to sys.executable only if the configured bin doesn't exist. + py_bin = LOCAL_PYTHON_BIN + if not Path(py_bin).exists(): + log.warning("LOCAL_PYTHON_BIN=%s not found, falling back to %s", + py_bin, sys.executable) + py_bin = sys.executable + cmd = [py_bin, str(script), dds_iface] + proc = subprocess.Popen( + cmd, + cwd=str(script.parent), + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + bufsize=1, + env=env, + ) + + with self._lock: + self.process = proc + self.log_tail.append(f"Started: pid={proc.pid}") + self._set_state("starting", f"pid={proc.pid}") + self._reader_thread = threading.Thread(target=self._reader_loop, daemon=True) + self._reader_thread.start() + + log.info("Local subprocess started: pid=%d", proc.pid) + return {"started": True, "pid": proc.pid} + + def stop(self) -> dict[str, Any]: + with self._lock: + proc = self.process + if proc is None or proc.poll() is not None: + return {"stopped": False, "message": "Not running."} + self._set_state("stopping", "Stopping...") + + try: + proc.send_signal(signal.SIGINT) + proc.wait(timeout=_STOP_TIMEOUT_SEC) + except subprocess.TimeoutExpired: + proc.terminate() + try: + proc.wait(timeout=_TERMINATE_TIMEOUT_SEC) + except subprocess.TimeoutExpired: + proc.kill() + proc.wait(timeout=_TERMINATE_TIMEOUT_SEC) + + rc = proc.returncode + + with self._lock: + self.process = None + self.log_tail.append("Stopped.") + self._set_state("stopped", "Stopped.") + + log.info("Local subprocess stopped (rc=%s)", rc) + return {"stopped": True, "returncode": rc} + + def status(self) -> dict[str, Any]: + with self._lock: + running = self.process is not None and self.process.poll() is None + return { + "running": running, + "pid": self.process.pid if running and self.process else None, + "state": self.state, + "state_message": self.state_message, + "last_user_text": self.last_user_text, + "user_transcript": list(self.user_transcript), + "log_tail": list(self.log_tail), + "suppressed_noise": self.suppressed_noise, + } diff --git a/vendor/Sanad/local/tts.py b/vendor/Sanad/local/tts.py new file mode 100644 index 0000000..d80a1cf --- /dev/null +++ b/vendor/Sanad/local/tts.py @@ -0,0 +1,126 @@ +"""CosyVoice2 0.5B streaming TTS — GPU. + +Phase 4 of the local pipeline. Holds a 3-second reference WAV in VRAM +and synthesises streaming Arabic/English audio for every text chunk +arriving from the LLM. Emits int16 PCM at the model's native rate +(CosyVoice2 outputs 22 050 Hz — we resample to `sample_rate` from +config so the downstream `audio_io.speaker` gets a consistent rate). + +Install (on the robot): + cd ~/src + git clone --recursive https://github.com/FunAudioLLM/CosyVoice + cd CosyVoice + pip install -r requirements.txt + pip install -e . + + # model + reference voice + huggingface-cli download FunAudioLLM/CosyVoice2-0.5B \\ + --local-dir ~/sanad/model/local/CosyVoice2-0.5B + # place a 3-s Khaleeji clip at model/local/khaleeji_reference_3s.wav + # (16 kHz mono int16 WAV) + +API note: + CosyVoice2 is evolving. We use the published `inference_zero_shot` + with `stream=True` which yields `{"tts_speech": tensor}` chunks. + If the upstream API renames, adapt in one place — `TtsEngine._stream`. +""" + +from __future__ import annotations + +from pathlib import Path +from typing import AsyncIterator, Iterator, Optional + +import numpy as np + +from Project.Sanad.config import MODEL_DIR +from Project.Sanad.core.config_loader import section as _cfg_section +from Project.Sanad.core.logger import get_logger + +log = get_logger("local_tts") +_CFG = _cfg_section("local", "tts") + +MODEL_SUBDIR = _CFG.get("model_subdir", "CosyVoice2-0.5B") +REFERENCE_WAV_SUBDIR = _CFG.get("reference_wav_subdir", "khaleeji_reference_3s.wav") +REFERENCE_PROMPT = _CFG.get("reference_prompt", "") +OUT_RATE = int(_CFG.get("sample_rate", 16000)) +QUEUE_MAX = int(_CFG.get("queue_max", 3)) +DEVICE = _CFG.get("device", "cuda") + +LOCAL_MODEL_DIR = MODEL_DIR / "local" / MODEL_SUBDIR +REFERENCE_WAV_PATH = MODEL_DIR / "local" / REFERENCE_WAV_SUBDIR + + +def _resample_int16(pcm: np.ndarray, src_rate: int, dst_rate: int) -> np.ndarray: + if src_rate == dst_rate or pcm.size == 0: + return pcm.astype(np.int16, copy=False) + target_len = max(1, int(len(pcm) * dst_rate / src_rate)) + return np.interp( + np.linspace(0, len(pcm), target_len, endpoint=False), + np.arange(len(pcm)), + pcm.astype(np.float64), + ).astype(np.int16) + + +class CosyVoiceTTS: + """Thin async wrapper around CosyVoice2 streaming inference.""" + + def __init__(self) -> None: + self._model = None + self._ref_speech = None # preloaded reference tensor + self._ref_prompt = REFERENCE_PROMPT + self._model_rate: int = 22050 + + def start(self) -> None: + try: + from cosyvoice.cli.cosyvoice import CosyVoice2 + from cosyvoice.utils.file_utils import load_wav + except ImportError as exc: + raise RuntimeError( + f"CosyVoiceTTS requires the CosyVoice package from source: {exc}" + ) + if not LOCAL_MODEL_DIR.exists(): + raise RuntimeError(f"CosyVoice2 model not found at {LOCAL_MODEL_DIR}") + if not REFERENCE_WAV_PATH.exists(): + raise RuntimeError( + f"Reference voice WAV not found at {REFERENCE_WAV_PATH}" + ) + log.info("loading CosyVoice2: %s", LOCAL_MODEL_DIR) + self._model = CosyVoice2(str(LOCAL_MODEL_DIR), load_jit=True, fp16=True) + # model.sample_rate is an instance attr on CosyVoice2 + self._model_rate = getattr(self._model, "sample_rate", 22050) + self._ref_speech = load_wav(str(REFERENCE_WAV_PATH), 16000) + log.info("CosyVoiceTTS ready (model_rate=%d)", self._model_rate) + + def synthesize_stream(self, text: str) -> Iterator[bytes]: + """Yield int16 PCM bytes at OUT_RATE, one streaming chunk at a time.""" + if self._model is None or self._ref_speech is None: + return + try: + # CosyVoice2 streaming generator. Each step yields a tensor + # of float32 waveform samples at the model's native rate. + for step in self._model.inference_zero_shot( + text, + self._ref_prompt, + self._ref_speech, + stream=True): + wave = step.get("tts_speech") + if wave is None: + continue + # tensor → float32 numpy → int16 at OUT_RATE + arr = wave.cpu().numpy().squeeze() + if arr.size == 0: + continue + pcm_i16 = np.clip(arr * 32767.0, -32768, 32767).astype(np.int16) + if self._model_rate != OUT_RATE: + pcm_i16 = _resample_int16(pcm_i16, self._model_rate, OUT_RATE) + yield pcm_i16.tobytes() + except Exception as exc: + log.warning("TTS synth failed for chunk %r: %s", text[:40], exc) + + def stop(self) -> None: + self._model = None + self._ref_speech = None + + @property + def output_rate(self) -> int: + return OUT_RATE diff --git a/vendor/Sanad/local/vad.py b/vendor/Sanad/local/vad.py new file mode 100644 index 0000000..12a1541 --- /dev/null +++ b/vendor/Sanad/local/vad.py @@ -0,0 +1,150 @@ +"""Silero VAD wrapper — CPU-only speech boundary detection. + +Phase 1 of the local pipeline. Consumes 16 kHz mono int16 PCM in short +frames, emits speech_start / speech_end events. All thresholds + frame +sizes come from config/local_config.json > vad. + +Install (on the robot): + pip install silero-vad torch==2.2.* torchaudio==2.2.* + +Usage: + vad = SileroVAD() + vad.start() + evt = vad.process(pcm_bytes) + if evt == 'speech_start': ... + elif evt == 'speech_end': buf = vad.collected_audio() +""" + +from __future__ import annotations + +import time +from typing import Optional + +import numpy as np + +from Project.Sanad.core.config_loader import section as _cfg_section +from Project.Sanad.core.logger import get_logger + +log = get_logger("local_vad") +_CFG = _cfg_section("local", "vad") + +SAMPLE_RATE = _CFG.get("sample_rate", 16000) +FRAME_MS = _CFG.get("frame_ms", 32) +THRESHOLD = _CFG.get("threshold", 0.55) +MIN_SILENCE_MS = _CFG.get("min_silence_ms", 400) +MIN_SPEECH_MS = _CFG.get("min_speech_ms", 250) +PAD_START_MS = _CFG.get("pad_start_ms", 200) +PAD_END_MS = _CFG.get("pad_end_ms", 200) + +FRAME_SAMPLES = SAMPLE_RATE * FRAME_MS // 1000 # 512 @ 16k/32ms + + +class SileroVAD: + """Streaming VAD with buffered utterance capture. + + Fed one mic frame at a time via `process()`. Internal state tracks + whether we're inside an utterance; on speech_end, `collected_audio()` + returns the full utterance (with configured padding). + """ + + def __init__(self) -> None: + self._model = None + self._audio_buf: list[bytes] = [] # utterance being collected + self._pre_buf: list[bytes] = [] # rolling "pre-speech" ring + self._pre_frames = max(1, PAD_START_MS // FRAME_MS) + self._pad_end_frames = max(1, PAD_END_MS // FRAME_MS) + self._in_speech = False + self._last_speech_time = 0.0 + self._speech_start_time = 0.0 + self._trailing_silence_frames = 0 + self._last_utterance: Optional[bytes] = None + + def start(self) -> None: + """Load the Silero model once. Call before `process()`.""" + try: + import torch + from silero_vad import load_silero_vad + except ImportError as exc: + raise RuntimeError( + f"SileroVAD requires 'silero-vad' + torch: {exc}" + ) + self._model = load_silero_vad() + log.info("SileroVAD ready (threshold=%.2f, frame=%dms)", + THRESHOLD, FRAME_MS) + + def process(self, pcm: bytes) -> Optional[str]: + """Feed one frame (≈ FRAME_MS of audio). Returns an event or None. + + Events: 'speech_start' | 'speech_end' | None + """ + if self._model is None: + return None + # keep a rolling pre-buffer so captured utterances include lead-in + self._pre_buf.append(pcm) + if len(self._pre_buf) > self._pre_frames: + self._pre_buf.pop(0) + + # VAD expects float32 in [-1, 1] + arr = np.frombuffer(pcm, dtype=np.int16).astype(np.float32) / 32768.0 + if arr.size < FRAME_SAMPLES: + # pad if short tail chunk arrived + arr = np.concatenate([arr, np.zeros(FRAME_SAMPLES - arr.size, dtype=np.float32)]) + elif arr.size > FRAME_SAMPLES: + arr = arr[:FRAME_SAMPLES] + + try: + import torch + with torch.no_grad(): + prob = float(self._model(torch.from_numpy(arr), SAMPLE_RATE).item()) + except Exception as exc: + log.warning("VAD inference failed: %s", exc) + return None + + now = time.time() + is_speech = prob >= THRESHOLD + + if is_speech: + self._trailing_silence_frames = 0 + self._last_speech_time = now + if not self._in_speech: + # transition → speech + self._in_speech = True + self._speech_start_time = now + self._audio_buf = list(self._pre_buf) # seed with pad + self._audio_buf.append(pcm) + return "speech_start" + self._audio_buf.append(pcm) + return None + + # silent frame + if self._in_speech: + self._audio_buf.append(pcm) # collect trailing pad + self._trailing_silence_frames += 1 + silence_ms = self._trailing_silence_frames * FRAME_MS + if silence_ms >= MIN_SILENCE_MS: + # speech ended — validate min_speech + speech_dur_ms = (now - self._speech_start_time) * 1000 + self._in_speech = False + if speech_dur_ms < MIN_SPEECH_MS: + log.debug("drop short utterance (%.0fms)", speech_dur_ms) + self._audio_buf.clear() + self._last_utterance = None + return None + self._last_utterance = b"".join(self._audio_buf) + self._audio_buf.clear() + return "speech_end" + return None + + def collected_audio(self) -> Optional[bytes]: + """After a speech_end event, return the full utterance bytes.""" + return self._last_utterance + + def reset(self) -> None: + """Drop any in-flight utterance (used on barge-in).""" + self._in_speech = False + self._audio_buf.clear() + self._trailing_silence_frames = 0 + self._last_utterance = None + + def stop(self) -> None: + self._model = None diff --git a/vendor/Sanad/main.py b/vendor/Sanad/main.py new file mode 100644 index 0000000..e700b20 --- /dev/null +++ b/vendor/Sanad/main.py @@ -0,0 +1,545 @@ +#!/usr/bin/env python3 +"""Sanad — unified robot assistant entry point. + +Starts all subsystems and the FastAPI dashboard. + + python main.py # default port 8000 + python main.py --port 8080 # custom port +""" + +from __future__ import annotations + +import argparse +import importlib +import os +import sys +import types +from pathlib import Path + +# ───────────────────────────────────────────────────────────────────────────── +# Layout detection — support BOTH: +# 1. Dev layout: /Project/Sanad/main.py (imports use Project.Sanad.*) +# 2. Deployed layout: /home/unitree/Sanad/main.py (no Project/ wrapper) +# +# In the deployed case we synthesize a `Project` namespace package and alias +# `Project.Sanad` → the local `Sanad` package, so every `from Project.Sanad.X +# import Y` keeps working without rewriting any other file. +# ───────────────────────────────────────────────────────────────────────────── +_THIS_DIR = Path(__file__).resolve().parent # .../Sanad +_PARENT = _THIS_DIR.parent # .../Project OR /home/unitree + +if _PARENT.name == "Project": + # Dev layout — add the directory containing Project/ + _ROOT = _PARENT.parent + if str(_ROOT) not in sys.path: + sys.path.insert(0, str(_ROOT)) +else: + # Deployed layout — create a virtual Project package and alias + if str(_PARENT) not in sys.path: + sys.path.insert(0, str(_PARENT)) + if "Project" not in sys.modules: + _proj = types.ModuleType("Project") + _proj.__path__ = [] # mark as namespace package + sys.modules["Project"] = _proj + if "Project.Sanad" not in sys.modules: + # Import the local Sanad package as a top-level module first + _sanad = importlib.import_module(_THIS_DIR.name) + sys.modules["Project.Sanad"] = _sanad + sys.modules["Project"].Sanad = _sanad # type: ignore[attr-defined] + +# When main.py runs as a script (`python3 main.py`), Python loads it as the +# `__main__` module — NOT as `Project.Sanad.main`. Route handlers later do +# `from Project.Sanad.main import arm` etc; without the alias below, Python +# would re-execute this file from scratch under a different module name, +# creating a SECOND set of subsystem instances (uninitialised). Every +# `subsystem not available` / `No LowState` symptom traces back to this. +# The alias ensures both names point at the exact same module object. +if __name__ == "__main__": + sys.modules["Project.Sanad.main"] = sys.modules["__main__"] + +# asyncio compat shim — backfills asyncio.to_thread for Python 3.8. +# MUST be imported before any other Sanad module that uses asyncio.to_thread. +from Project.Sanad.core import asyncio_compat # noqa: F401 + +from Project.Sanad.config import ( + DASHBOARD_HOST, + DASHBOARD_PORT, + DASHBOARD_INTERFACE, + DDS_NETWORK_INTERFACE, +) +from Project.Sanad.core.logger import get_logger + +log = get_logger("main") + + +def _safe_import(label: str, importer): + """Import a module by callable, returning None if it fails.""" + try: + return importer() + except Exception: + log.exception("Failed to import %s — that subsystem will be unavailable", label) + return None + + +def _safe_construct(name: str, factory): + """Construct a subsystem, log + return None on failure.""" + if factory is None: + return None + try: + return factory() + except Exception: + log.exception("Failed to construct %s — that subsystem will be unavailable", name) + return None + + +# ── isolated imports — one bad module never blocks the others ── +Brain = _safe_import("Brain", lambda: __import__("Project.Sanad.core.brain", fromlist=["Brain"]).Brain) +ArmController = _safe_import("ArmController", lambda: __import__("Project.Sanad.motion.arm_controller", fromlist=["ArmController"]).ArmController) +MacroPlayer = _safe_import("MacroPlayer", lambda: __import__("Project.Sanad.motion.macro_player", fromlist=["MacroPlayer"]).MacroPlayer) +MacroRecorder = _safe_import("MacroRecorder", lambda: __import__("Project.Sanad.motion.macro_recorder", fromlist=["MacroRecorder"]).MacroRecorder) +TeachingSession = _safe_import("TeachingSession", lambda: __import__("Project.Sanad.motion.teaching", fromlist=["TeachingSession"]).TeachingSession) +AudioManager = _safe_import("AudioManager", lambda: __import__("Project.Sanad.voice.audio_manager", fromlist=["AudioManager"]).AudioManager) +LocalTTSEngine = _safe_import("LocalTTSEngine", lambda: __import__("Project.Sanad.voice.local_tts", fromlist=["LocalTTSEngine"]).LocalTTSEngine) +WakePhraseManager = _safe_import("WakePhraseManager", lambda: __import__("Project.Sanad.voice.wake_phrase_manager", fromlist=["WakePhraseManager"]).WakePhraseManager) +LiveVoiceLoop = _safe_import("LiveVoiceLoop", lambda: __import__("Project.Sanad.voice.live_voice_loop", fromlist=["LiveVoiceLoop"]).LiveVoiceLoop) +TypedReplayEngine = _safe_import("TypedReplayEngine", lambda: __import__("Project.Sanad.voice.typed_replay", fromlist=["TypedReplayEngine"]).TypedReplayEngine) +GeminiVoiceClient = _safe_import("GeminiVoiceClient", lambda: __import__("Project.Sanad.gemini.client", fromlist=["GeminiVoiceClient"]).GeminiVoiceClient) +GeminiSubprocess = _safe_import("GeminiSubprocess", lambda: __import__("Project.Sanad.gemini.subprocess", fromlist=["GeminiSubprocess"]).GeminiSubprocess) +LocalSubprocess = _safe_import("LocalSubprocess", lambda: __import__("Project.Sanad.local.subprocess", fromlist=["LocalSubprocess"]).LocalSubprocess) +CameraDaemon = _safe_import("CameraDaemon", lambda: __import__("Project.Sanad.vision.camera", fromlist=["CameraDaemon"]).CameraDaemon) +FaceGallery = _safe_import("FaceGallery", lambda: __import__("Project.Sanad.vision.face_gallery", fromlist=["FaceGallery"]).FaceGallery) +ZoneGallery = _safe_import("ZoneGallery", lambda: __import__("Project.Sanad.vision.zone_gallery", fromlist=["ZoneGallery"]).ZoneGallery) +LocoController = _safe_import("LocoController", lambda: __import__("Project.Sanad.G1_Controller.loco_controller", fromlist=["LocoController"]).LocoController) +MovementDispatcher = _safe_import("MovementDispatcher", lambda: __import__("Project.Sanad.voice.movement_dispatch", fromlist=["MovementDispatcher"]).MovementDispatcher) + + +# ── global instances (imported by route modules) ── + +brain = _safe_construct("brain", Brain) if Brain else None +arm = _safe_construct("arm", ArmController) +audio_mgr = _safe_construct("audio_mgr", AudioManager) +voice_client = _safe_construct("voice_client", GeminiVoiceClient) +local_tts = _safe_construct("local_tts", LocalTTSEngine) +wake_mgr = _safe_construct("wake_mgr", WakePhraseManager) +macro_rec = _safe_construct("macro_rec", (lambda: MacroRecorder(arm)) if (MacroRecorder and arm) else None) +macro_play = _safe_construct("macro_play", (lambda: MacroPlayer(audio_mgr, arm)) if (MacroPlayer and arm) else None) +teacher = _safe_construct("teacher", (lambda: TeachingSession(arm)) if (TeachingSession and arm) else None) +live_voice = _safe_construct("live_voice", (lambda: LiveVoiceLoop(voice_client, arm, wake_mgr, audio_mgr)) if (LiveVoiceLoop and voice_client and arm and wake_mgr and audio_mgr) else None) +# Which voice supervisor to mount. SANAD_VOICE_BRAIN chooses the brain +# that runs INSIDE the subprocess (see voice/sanad_voice.py); the same +# env var picks WHICH supervisor here manages that subprocess so its +# log-line parser matches the brain's emit format. +_brain_choice = os.environ.get("SANAD_VOICE_BRAIN", "gemini").strip().lower() +if _brain_choice == "local" and LocalSubprocess is not None: + live_sub = _safe_construct("live_sub", LocalSubprocess) +else: + live_sub = _safe_construct("live_sub", GeminiSubprocess) +typed_replay = _safe_construct("typed_replay", (lambda: TypedReplayEngine(voice_client, audio_mgr)) if (TypedReplayEngine and voice_client and audio_mgr) else None) + +# ── Locomotion controller (N2) — manual dashboard locomotion ──────────────── +# Reuses the arm controller's single ChannelFactoryInitialize (one DDS init per +# process) — it does NOT init DDS itself. Disarmed every boot. See +# G1_Controller/loco_controller.py and dashboard/routes/controller.py. +loco_controller = _safe_construct( + "loco_controller", + (lambda: LocoController(arm)) if (LocoController and arm) else None) + +# Arm ⇄ locomotion mutual exclusion: the arm must NEVER run a replay / SDK +# action / gesture while the robot may be walking. `movement_active` is True for +# the MANUAL gate (armed/teleop) AND for ~1.5s after any move/step — so it also +# covers Phase-3 Gemini-driven moves (which call loco.move/step directly). +# Checked at every arm playback chokepoint (replay_file / _execute), so it blocks +# voice/Gemini-triggered gestures too, not just the dashboard. +if arm is not None and loco_controller is not None: + try: + if hasattr(arm, "set_motion_block"): + arm.set_motion_block(loco_controller.movement_active) + log.info("Arm motion-block wired to locomotion movement_active") + except Exception: + log.exception("Could not wire arm motion-block") + +# ── Gemini voice → movement dispatcher (N2 Phase 3) ───────────────────────── +# Reads Gemini's spoken (BOT) transcript via the live supervisor's bot-callback +# and drives loco_controller on a confirmation-phrase match (Marcus pattern). +# Gated on recognition_state.movement_enabled (the "Enable Gemini movement" +# toggle) — SEPARATE from the manual arm flag. Inert until that flag is on. +movement_dispatch = None +if MovementDispatcher and loco_controller is not None: + try: + from Project.Sanad.config import BASE_DIR as _BD2, MOTIONS_DIR as _MD + movement_dispatch = _safe_construct( + "movement_dispatch", + lambda: MovementDispatcher( + loco_controller, + _MD / "instruction.json", + _BD2 / "data" / ".recognition_state.json")) + if movement_dispatch is not None: + movement_dispatch.start() + if live_sub is not None and hasattr(live_sub, "register_bot_callback"): + live_sub.register_bot_callback(movement_dispatch.on_bot_text) + log.info("Movement dispatcher wired to Gemini BOT transcript") + except Exception: + log.exception("Could not wire movement dispatcher") + +# ── Recognition (camera + face gallery) ───────────────────────────────────── +# Camera is idle until the dashboard toggles vision on; face gallery is pure +# file IO and always available if the import succeeded. +# +# Config precedence (highest first): explicit env var → config/core_config.json +# section → hardcoded default. The parent process normally has no SANAD_CAMERA_* +# env vars (LIVE_TUNE is only forwarded to the Gemini child), so in practice the +# core_config.json `camera` / `faces` sections are the live source here. +def _build_camera(): + from Project.Sanad.core.config_loader import section as _cfg_section + cam_cfg = _cfg_section("core", "camera") + + def _knob(env_key: str, cfg_key: str, default): + env_val = os.environ.get(env_key) + if env_val is not None and env_val != "": + return type(default)(env_val) + return type(default)(cam_cfg.get(cfg_key, default)) + + # Frames are cached in memory and pushed to the Gemini child over its + # stdin (see GeminiSubprocess._frame_forwarder) — no file drop. + return CameraDaemon( + width=_knob("SANAD_CAMERA_WIDTH", "width", 424), + height=_knob("SANAD_CAMERA_HEIGHT", "height", 240), + fps=_knob("SANAD_CAMERA_FPS", "fps", 15), + jpeg_quality=_knob("SANAD_CAMERA_JPEG_QUALITY", "jpeg_quality", 70), + stale_threshold_s=float(cam_cfg.get("stale_threshold_s", 10.0)), + reconnect_min_s=float(cam_cfg.get("reconnect_min_s", 2.0)), + reconnect_max_s=float(cam_cfg.get("reconnect_max_s", 10.0)), + capture_timeout_ms=int(cam_cfg.get("capture_timeout_ms", 5000)), + ) + +def _build_gallery(): + from Project.Sanad.config import BASE_DIR + from Project.Sanad.core.config_loader import section as _cfg_section + faces_cfg = _cfg_section("core", "faces") + # SANAD_FACES_DIR is set absolute by LIVE_TUNE (the Gemini child reads the + # same var). In the parent it's usually unset → fall back to the JSON's + # dir_rel, then the hardcoded default. Honour absolute paths as-is. + raw = os.environ.get("SANAD_FACES_DIR") or faces_cfg.get("dir_rel", "data/faces") + p = Path(raw) + root = p if p.is_absolute() else (BASE_DIR / raw) + return FaceGallery(root) + +def _build_zone_gallery(): + # N3 — zones gallery (zone → place → linked faces). Honours SANAD_ZONES_DIR + # (absolute) then the core_config 'zones' section dir_rel, then a default. + from Project.Sanad.config import BASE_DIR + from Project.Sanad.core.config_loader import section as _cfg_section + zones_cfg = _cfg_section("core", "zones") + raw = os.environ.get("SANAD_ZONES_DIR") or zones_cfg.get("dir_rel", "data/zones") + p = Path(raw) + root = p if p.is_absolute() else (BASE_DIR / raw) + return ZoneGallery(root) + +camera = _safe_construct("camera", _build_camera if CameraDaemon else None) +gallery = _safe_construct("gallery", _build_gallery if FaceGallery else None) +zone_gallery = _safe_construct("zone_gallery", _build_zone_gallery if ZoneGallery else None) + +# Restore persisted vision_enabled at boot — start camera if the user left +# it on across a reboot. Face-rec state is read by the Gemini child directly. +try: + from Project.Sanad.vision import recognition_state as _recog_state + from Project.Sanad.config import BASE_DIR as _BD + _state = _recog_state.read(_BD / "data" / ".recognition_state.json") + if _state.vision_enabled and camera is not None: + if camera.start(): + log.info("Camera vision restored from state (backend=%s)", camera.backend) + else: + log.warning("Camera vision was ON but no backend available — leaving OFF") + _recog_state.mutate(_BD / "data" / ".recognition_state.json", + vision_enabled=False) +except Exception: + log.exception("Could not restore recognition state") + +# Hand the camera to the Gemini supervisor so it can forward frames to the +# child over stdin while a live session runs. +if live_sub is not None and camera is not None: + try: + if hasattr(live_sub, "attach_camera"): + live_sub.attach_camera(camera) + log.info("Camera attached to live subprocess supervisor") + except Exception: + log.exception("attach_camera failed") + +# Hand the AudioManager to the supervisor so the audio watcher can keep +# PulseAudio defaults aligned with the live profile on every Anker +# plug/unplug. Without this, typed-replay / record playback would stay on +# the boot device even after the live session swapped to Anker. +if live_sub is not None and audio_mgr is not None: + try: + if hasattr(live_sub, "attach_audio_manager"): + live_sub.attach_audio_manager(audio_mgr) + log.info("AudioManager attached to live subprocess supervisor") + except Exception: + log.exception("attach_audio_manager failed") + +# ── Motion-state → Gemini channel ─────────────────────────────────────────── +# The arm controller emits motion.action_started / _done / _error on the bus. +# Forward each to the Gemini child as a 'state:' line so the live session can +# answer "what are you doing?" honestly. Sync handlers, fired via emit_sync +# from the arm's worker thread — send_state just writes to a pipe (cheap). +if live_sub is not None and hasattr(live_sub, "send_state"): + try: + from Project.Sanad.core.event_bus import bus as _bus + + def _on_motion_started(action: str = "", **_kw): + live_sub.send_state("start", action) + + def _on_motion_done(action: str = "", elapsed_sec=None, + failed: bool = False, **_kw): + # action_error already covered the failure case with a reason; + # here just emit complete (skip if it failed to avoid a dup). + if not failed: + live_sub.send_state("complete", action, elapsed_sec=elapsed_sec) + + def _on_motion_error(action: str = "", reason: str = "", **_kw): + live_sub.send_state("error", action, reason=reason) + + _bus.on("motion.action_started", _on_motion_started) + _bus.on("motion.action_done", _on_motion_done) + _bus.on("motion.action_error", _on_motion_error) + log.info("Motion-state → Gemini channel wired") + except Exception: + log.exception("Could not wire motion-state → Gemini channel") + +# Wire everything into the Brain (only what was constructed) +def _safe_attach(method_name: str, value): + if brain is None or value is None: + return + method = getattr(brain, method_name, None) + if method is None: + return + try: + method(value) + except Exception: + log.exception("brain.%s failed", method_name) + + +_safe_attach("attach_voice", voice_client) +_safe_attach("attach_audio_manager", audio_mgr) +_safe_attach("attach_arm", arm) +_safe_attach("attach_macro_recorder", macro_rec) +_safe_attach("attach_macro_player", macro_play) +_safe_attach("attach_live_voice", live_voice) + + +# ── Runtime sanity report ──────────────────────────────────────────────── +SUBSYSTEMS = { + "brain": brain, + "arm": arm, + "audio_mgr": audio_mgr, + "voice_client": voice_client, + "local_tts": local_tts, + "macro_rec": macro_rec, + "macro_play": macro_play, + "teacher": teacher, + "wake_mgr": wake_mgr, + "live_voice": live_voice, + "live_sub": live_sub, + "typed_replay": typed_replay, + "camera": camera, + "gallery": gallery, + "zone_gallery": zone_gallery, + "loco_controller": loco_controller, + "movement_dispatch": movement_dispatch, +} + +# Critical subsystems — if any of these are None, log a warning at startup +CRITICAL_SUBSYSTEMS = ("brain",) + +for _name in CRITICAL_SUBSYSTEMS: + if SUBSYSTEMS.get(_name) is None: + log.error("CRITICAL subsystem '%s' is None — application will be unusable", _name) + +_available = [k for k, v in SUBSYSTEMS.items() if v is not None] +_missing = [k for k, v in SUBSYSTEMS.items() if v is None] +log.info("Subsystems available (%d): %s", len(_available), ", ".join(_available)) +if _missing: + log.warning("Subsystems unavailable (%d): %s", len(_missing), ", ".join(_missing)) + + +_already_shut_down = False + + +def _do_shutdown(from_signal: bool = False): + """Clean shutdown — release hardware, stop background tasks. Idempotent.""" + global _already_shut_down + if _already_shut_down: + return + _already_shut_down = True + log.info("Shutdown requested") + + if arm is not None: + try: + if hasattr(arm, "cancel"): + arm.cancel() + except Exception: + log.exception("arm.cancel() failed") + try: + if hasattr(arm, "disable"): + arm.disable() + except Exception: + log.exception("arm.disable() failed") + + if movement_dispatch is not None: + try: + movement_dispatch.stop() + except Exception: + log.exception("movement_dispatch.stop() failed") + + if loco_controller is not None: + try: + loco_controller.shutdown() # StopMove (no FSM change) + disarm + except Exception: + log.exception("loco_controller.shutdown() failed") + + if live_sub is not None: + try: + running = live_sub.is_running() if callable(getattr(live_sub, "is_running", None)) else False + if running: + live_sub.stop() + except Exception: + log.exception("live_sub.stop() failed") + + if audio_mgr is not None: + try: + if hasattr(audio_mgr, "close"): + audio_mgr.close() + except Exception: + log.exception("audio_mgr.close() failed") + + if camera is not None: + try: + if camera.is_running(): + camera.stop() + except Exception: + log.exception("camera.stop() failed") + + log.info("Shutdown complete") + + +import atexit # noqa: E402 +atexit.register(_do_shutdown) +# NOTE: Do NOT install custom SIGINT/SIGTERM handlers here. +# Uvicorn installs its own signal handlers for graceful shutdown. +# If we override them, Ctrl+C never reaches uvicorn and the server +# keeps running forever. Our _do_shutdown runs via atexit instead. + + +def _print_env_diagnostic(): + """Print everything you'd need to debug a deployment issue.""" + print("=" * 60) + print("SANAD ENVIRONMENT DIAGNOSTIC") + print("=" * 60) + print(f"Python: {sys.version}") + print(f"Executable: {sys.executable}") + print(f"Platform: {sys.platform}") + print(f"BASE_DIR: {_THIS_DIR}") + print(f"Parent: {_PARENT}") + print(f"Layout: {'dev (Project/Sanad)' if _PARENT.name == 'Project' else 'deployed (top-level Sanad)'}") + print(f"Dashboard: {DASHBOARD_HOST}:{DASHBOARD_PORT} (interface: {DASHBOARD_INTERFACE})") + print(f"DDS interface: {DDS_NETWORK_INTERFACE}") + print() + print("sys.path[0:8]:") + for p in sys.path[:8]: + print(f" {p}") + print() + print("Critical imports:") + for mod_name in ("uvicorn", "fastapi", "pydantic", "starlette", + "websockets", "httpx", "pyaudio", "pyrealsense2", + "unitree_sdk2py", "ultralytics", "numpy", "cv2"): + try: + mod = __import__(mod_name) + ver = getattr(mod, "__version__", "?") + path = getattr(mod, "__file__", "?") + print(f" ✓ {mod_name:18s} {ver:12s} {path}") + except BaseException as exc: + print(f" ✗ {mod_name:18s} {type(exc).__name__}: {exc}") + print() + print("Subsystems available (after constructing main module globals):") + for name in sorted(SUBSYSTEMS): + print(f" {'✓' if SUBSYSTEMS[name] is not None else '✗'} {name}") + print("=" * 60) + + +def main(): + parser = argparse.ArgumentParser(description="Sanad Robot Assistant") + parser.add_argument("--host", default=DASHBOARD_HOST, + help=f"Dashboard bind address. Default is wlan0's IP " + f"({DASHBOARD_HOST!r}). Override with SANAD_DASHBOARD_HOST " + f"or SANAD_DASHBOARD_INTERFACE.") + parser.add_argument("--port", type=int, default=DASHBOARD_PORT) + parser.add_argument("--network", default=DDS_NETWORK_INTERFACE, + help="DDS network interface (e.g. eth0, lo). " + "Override with SANAD_DDS_INTERFACE env var.") + parser.add_argument("--check-env", action="store_true", + help="Print environment diagnostic and exit " + "(no server start, no hardware init)") + args = parser.parse_args() + + if args.check_env: + _print_env_diagnostic() + return + + log.info("Sanad starting — Python %s @ %s", sys.version.split()[0], sys.executable) + log.info("BASE_DIR: %s", _THIS_DIR) + log.info("Dashboard interface: %s → bound to %s", DASHBOARD_INTERFACE, args.host) + log.info("Starting Sanad — host=%s port=%d network=%s", args.host, args.port, args.network) + if brain is not None: + try: + log.info("Brain status: %s", brain.status()) + except Exception: + log.exception("brain.status() failed") + # Initialize hardware (graceful if unavailable) + if arm is not None: + try: + arm.init(network_interface=args.network) + except Exception: + log.exception("arm.init() failed — continuing without hardware") + + # ── import uvicorn ────────────────────────────────────────────────── + # Catch ANY exception (not just ImportError) so the real failure reason + # is surfaced. The previous narrow catch hid issues like uvicorn pulling + # in a broken transitive dep, or being installed under a different + # site-packages than the active interpreter. + uvicorn = None + try: + import uvicorn # type: ignore + log.info("uvicorn %s loaded from %s", + getattr(uvicorn, "__version__", "?"), + getattr(uvicorn, "__file__", "?")) + except BaseException as exc: + log.error("Could not import uvicorn: %s: %s", type(exc).__name__, exc) + log.error("Python: %s", sys.executable) + log.error("sys.path[0:5]: %s", sys.path[:5]) + log.error("Try: %s -m pip install --user 'uvicorn[standard]' fastapi", sys.executable) + sys.exit(1) + + # ── import the FastAPI app ────────────────────────────────────────── + # Pass the app object directly so uvicorn doesn't have to re-resolve the + # import path (which differs between dev and deployed layouts). + try: + from Project.Sanad.dashboard.app import app as _app + except BaseException: + log.exception("Could not import Dashboard.app — aborting") + sys.exit(1) + + # ── start the server ──────────────────────────────────────────────── + try: + uvicorn.run( + _app, + host=args.host, + port=args.port, + log_level="info", + ) + except BaseException: + log.exception("uvicorn.run() failed") + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/vendor/Sanad/motion/__init__.py b/vendor/Sanad/motion/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/vendor/Sanad/motion/arm_controller.py b/vendor/Sanad/motion/arm_controller.py new file mode 100644 index 0000000..a996600 --- /dev/null +++ b/vendor/Sanad/motion/arm_controller.py @@ -0,0 +1,832 @@ +"""Robot arm controller — real DDS motor commands and JSONL motion replay. + +Production-grade replay engine ported from G1_Lootah/Controller/g1_replay_trigger_r2x.py. +Features: body-lock, ramp-in/out interpolation, watchdog, speed control, CRC. +Falls back gracefully to simulation when the Unitree SDK is unavailable. +""" + +from __future__ import annotations + +import json +import threading +import time +from dataclasses import dataclass +from pathlib import Path +from typing import Any + +from Project.Sanad.config import ( + G1_NUM_MOTOR, + KD_HIGH, + KD_LOW, + KD_WRIST, + KP_HIGH, + KP_LOW, + KP_WRIST, + MOTIONS_DIR, + REPLAY_HZ, + WEAK_MOTORS, + WRIST_MOTORS, +) +from Project.Sanad.core.config_loader import section as _cfg_section +from Project.Sanad.core.event_bus import bus +from Project.Sanad.core.logger import get_logger + +log = get_logger("arm_controller") + +_AC = _cfg_section("motion", "arm_controller") +# G1 hardware constants — single source in config/core_config.json +from Project.Sanad.config import ENABLE_ARM_SDK_INDEX +RAMP_IN_STEPS = _AC.get("ramp_in_steps", 60) # ~1.0s smooth move to start pose +RAMP_OUT_STEPS = _AC.get("ramp_out_steps", 180) # ~3.0s smooth return to home +SETTLE_HOLD_SEC = _AC.get("settle_hold_sec", 0.5) # hold start pose before replay begins +WATCHDOG_TIMEOUT = _AC.get("watchdog_timeout_sec", 0.25) # hold last pose if state stale +WATCHDOG_DISABLE_AFTER = _AC.get("watchdog_disable_after_sec", 1.0) # abort if state stale this long +ARM_INDICES = range( + _AC.get("arm_indices_start", 15), + _AC.get("arm_indices_stop", 29), +) +# N1 — motor temperature sampling. rt/lowstate arrives ~500 Hz; building the +# per-motor temperature snapshot that often is wasteful, so we refresh it at +# most this often. The 3D dashboard polls the snapshot over a WebSocket at a +# similar rate. +_TEMP_SAMPLE_INTERVAL_S = float(_AC.get("temp_sample_interval_sec", 0.1)) + +# -- SDK import (optional) -- +try: + from unitree_sdk2py.core.channel import ( + ChannelFactoryInitialize, + ChannelPublisher, + ChannelSubscriber, + ) + from unitree_sdk2py.idl.unitree_hg.msg.dds_ import LowCmd_, LowState_ + from unitree_sdk2py.utils.crc import CRC + + # IDL factory — LowCmd_() with no args fails because the dataclass + # has 5 required positional fields. The SDK ships a default factory + # named `unitree_hg_msg_dds__LowCmd_` that constructs a fully-zeroed + # instance with the right number of motor_cmd entries. + try: + from unitree_sdk2py.idl.default import unitree_hg_msg_dds__LowCmd_ + _make_low_cmd = unitree_hg_msg_dds__LowCmd_ + except ImportError: + _make_low_cmd = None + + _HAS_SDK = True +except ImportError: + _HAS_SDK = False + _make_low_cmd = None + log.warning("Unitree SDK not available — running in simulation mode") + +# G1 arm-action client for built-in arm moves (wave, shake_hand, hug, …). +# NOTE: do NOT use LocoClient here — LocoClient is the locomotion/body-move +# client and its ExecuteAction() doesn't recognise arm-action IDs, so arm +# commands become silent no-ops. The correct client is the arm-specific +# G1ArmActionClient with the SDK's action_map (name → opcode lookup). +try: + from unitree_sdk2py.g1.arm.g1_arm_action_client import ( + G1ArmActionClient, + action_map as _ARM_ACTION_MAP, + ) + _HAS_ARM_CLIENT = True +except ImportError: + G1ArmActionClient = None + _ARM_ACTION_MAP = {} + _HAS_ARM_CLIENT = False + + +@dataclass +class Action: + name: str + id: int + file: str = "" # JSONL filename (empty = SDK built-in) + category: str = "sdk" # "sdk" | "jsonl" + + +# -- SDK actions (fixed — built into Unitree firmware) -- +SDK_ACTIONS: list[Action] = [ + Action("release_arm", 0, category="sdk"), + Action("shake_hand", 1, category="sdk"), + Action("high_five", 2, category="sdk"), + Action("hug", 3, category="sdk"), + Action("high_wave", 4, category="sdk"), + Action("clap", 5, category="sdk"), + Action("face_wave", 6, category="sdk"), + Action("left_kiss", 7, category="sdk"), + Action("heart", 8, category="sdk"), + Action("right_heart", 9, category="sdk"), + Action("hands_up", 10, category="sdk"), + Action("x_ray", 11, category="sdk"), + Action("right_hand_up", 12, category="sdk"), + Action("reject", 13, category="sdk"), + Action("right_kiss", 14, category="sdk"), + Action("two_hand_kiss", 15, category="sdk"), +] + +# Next auto-ID for JSONL actions starts after SDK range. +_JSONL_ID_START = _AC.get("jsonl_id_start", 100) + + +def _scan_jsonl_actions() -> list[Action]: + """Auto-discover all .jsonl files in data/motions/ and create actions. + + Called at startup and whenever the dashboard requests a refresh. + The action name is derived from the filename (without extension), + with underscores replacing hyphens/spaces. + """ + MOTIONS_DIR.mkdir(parents=True, exist_ok=True) + actions = [] + for idx, path in enumerate(sorted(MOTIONS_DIR.glob("*.jsonl"))): + name = path.stem.replace("-", "_").replace(" ", "_") + actions.append(Action( + name=name, + id=_JSONL_ID_START + idx, + file=path.name, + category="jsonl", + )) + return actions + + +def rebuild_action_registry() -> tuple[list[Action], dict[int, Action], dict[str, Action]]: + """Rebuild the full action list from SDK + disk scan. Called on startup and refresh.""" + jsonl_actions = _scan_jsonl_actions() + all_actions = list(SDK_ACTIONS) + jsonl_actions + by_id = {a.id: a for a in all_actions} + by_name = {a.name: a for a in all_actions} + log.info("Action registry: %d SDK + %d JSONL = %d total", + len(SDK_ACTIONS), len(jsonl_actions), len(all_actions)) + return all_actions, by_id, by_name + + +# Initial build +ACTIONS, ACTION_BY_ID, ACTION_BY_NAME = rebuild_action_registry() + + +def _lerp_q(q_start: list[float], q_end: list[float], t: float) -> list[float]: + """Linear interpolation between two joint-position vectors, t in [0,1].""" + return [a + (b - a) * t for a, b in zip(q_start, q_end)] + + +def _load_frames(path: Path) -> list[dict[str, Any]]: + """Read JSONL file, return list of frames with 't' and 'q' keys.""" + frames = [] + with open(path, "r") as f: + for line in f: + line = line.strip() + if not line: + continue + data = json.loads(line) + if "q" in data: + frames.append(data) + return frames + + +def _load_home_q(home_file: str = "arm_home.jsonl") -> list[float] | None: + path = MOTIONS_DIR / home_file + if not path.exists(): + return None + frames = _load_frames(path) + return frames[0]["q"] if frames else None + + +class ArmController: + """Thread-safe arm controller with real DDS replay and simulation fallback.""" + + def __init__(self): + self._lock = threading.Lock() + self._cancel = threading.Event() + self._is_busy = False + self._last_action_time = 0.0 + self.cooldown_sec = 1.0 + self._initialized = False + # N2 — arm ⇄ locomotion mutual exclusion. While locomotion movement is + # armed, the arm must NOT run any replay / SDK action / gesture. main.py + # wires this to LocoController.is_armed via set_motion_block(); checked at + # every playback chokepoint (replay_file / _execute), so it also blocks + # voice/Gemini-triggered gestures, not just the dashboard. + self._motion_block = None + + # DDS handles (set in init()) + self._arm_pub = None + self._state_sub = None + self._low_cmd = None + self._crc = None + self._arm_client = None + + # Live state from LowState_ subscriber + self._current_q: list[float] = [0.0] * G1_NUM_MOTOR + self._last_state_time = 0.0 + self._state_lock = threading.Lock() + self._first_state_event = threading.Event() + # N1 — latest per-motor temperatures (throttled snapshot for the 3D + # temperature dashboard). Each entry: {motor_id, surface, winding}. + self._current_temps: list[dict[str, Any]] = [] + self._last_temp_time = 0.0 + + # Cached motion file metadata + self._motion_files_cache: dict[str, dict[str, Any]] = {} + + # -- initialization -- + + def init(self, network_interface: str = "lo") -> bool: + if self._initialized: + return True + if not _HAS_SDK: + log.info("Simulation mode — DDS init skipped") + return False + try: + ChannelFactoryInitialize(0, network_interface) + self._arm_pub = ChannelPublisher("rt/arm_sdk", LowCmd_) + self._arm_pub.Init() + self._state_sub = ChannelSubscriber("rt/lowstate", LowState_) + self._state_sub.Init(self._on_low_state, 10) + # IDL types need the SDK's default factory — bare LowCmd_() fails + # because the dataclass has 5 required positional fields. + if _make_low_cmd is not None: + self._low_cmd = _make_low_cmd() + else: + # Last-resort: try a few constructor signatures + try: + self._low_cmd = LowCmd_() + except TypeError: + # Build with explicit zeroed fields + from unitree_sdk2py.idl.unitree_hg.msg.dds_ import MotorCmd_ + try: + from unitree_sdk2py.idl.default import ( + unitree_hg_msg_dds__MotorCmd_ as _make_motor_cmd, + ) + except ImportError: + _make_motor_cmd = lambda: MotorCmd_( + mode=0, q=0.0, dq=0.0, tau=0.0, kp=0.0, kd=0.0, reserve=0, + ) + self._low_cmd = LowCmd_( + mode_pr=0, + mode_machine=0, + motor_cmd=[_make_motor_cmd() for _ in range(35)], + reserve=[0, 0, 0, 0], + crc=0, + ) + self._crc = CRC() + + # Arm-specific action client for built-in moves + if _HAS_ARM_CLIENT: + try: + self._arm_client = G1ArmActionClient() + self._arm_client.SetTimeout(10.0) + self._arm_client.Init() + log.info("G1ArmActionClient initialized (%d actions) — built-in moves available", + len(_ARM_ACTION_MAP)) + except Exception as exc: + log.warning("G1ArmActionClient init failed: %s — built-in actions disabled", exc) + self._arm_client = None + + self._initialized = True + log.info("DDS initialized on %s", network_interface) + except Exception as exc: + log.error("DDS init failed: %s", exc) + return self._initialized + + def _on_low_state(self, msg): + """Callback from DDS subscriber — updates current joint positions. + + Also refreshes the per-motor temperature snapshot (N1) at most every + _TEMP_SAMPLE_INTERVAL_S so the 3D temperature dashboard has live data + without a second DDS subscriber. + """ + now = time.monotonic() + with self._state_lock: + self._current_q = [float(msg.motor_state[i].q) for i in range(G1_NUM_MOTOR)] + self._last_state_time = now + if not self._first_state_event.is_set(): + self._first_state_event.set() + + # Throttled temperature capture (separate from q, which we keep at the + # full callback rate for the replay watchdog). + if (now - self._last_temp_time) >= _TEMP_SAMPLE_INTERVAL_S: + temps = [] + for i in range(G1_NUM_MOTOR): + ms = msg.motor_state[i] + surface = winding = None + t = getattr(ms, "temperature", None) + try: + # G1 firmware reports [surface, winding]; some builds give + # a single value or a scalar — handle all three defensively + # (matches Marcus/Features/TempMonitor low_state_callback). + if t is not None and hasattr(t, "__len__"): + if len(t) >= 2: + surface, winding = int(t[0]), int(t[1]) + elif len(t) == 1: + surface = winding = int(t[0]) + elif t is not None: + surface = winding = int(t) + except (TypeError, ValueError): + surface = winding = None + temps.append({"motor_id": i, "surface": surface, "winding": winding}) + with self._state_lock: + self._current_temps = temps + self._last_temp_time = now + + def wait_for_state(self, timeout: float = 2.0) -> bool: + """Block until first LowState_ callback fires (or timeout). Returns True if state received.""" + return self._first_state_event.wait(timeout=timeout) + + # -- internal API exposed for teaching/macro_player (encapsulation boundary) -- + + def get_current_q(self) -> list[float]: + """Public read of current joint positions.""" + return self._get_current_q() + + def get_motor_temps(self) -> list[dict[str, Any]]: + """Public read of the latest per-motor temperature snapshot (N1). + + Returns a list of {motor_id, surface, winding} (values may be None if + the firmware didn't report a temperature). Empty until the first + LowState_ callback fires. + """ + with self._state_lock: + return list(self._current_temps) + + def send_frame(self, arm_target_q: list[float], body_lock_q: list[float]): + """Public single-frame send. Use only inside a controlled playback loop.""" + self._send_frame(arm_target_q, body_lock_q) + + def disable(self): + """Public disable — releases arm SDK control.""" + self._disable_sdk() + + def state_age(self) -> float: + """Seconds since last LowState_ callback.""" + return self._state_age() + + def _get_current_q(self) -> list[float]: + with self._state_lock: + return list(self._current_q) + + def _state_age(self) -> float: + with self._state_lock: + return time.monotonic() - self._last_state_time if self._last_state_time else 999.0 + + # -- frame sending (real DDS with CRC) -- + + def _send_frame(self, arm_target_q: list[float], body_lock_q: list[float]): + """Send one motor frame via DDS. Body stays locked, arms get target.""" + if not self._initialized or self._low_cmd is None: + return + + # Enable ARM_SDK + self._low_cmd.motor_cmd[ENABLE_ARM_SDK_INDEX].q = 1.0 + + for i in range(G1_NUM_MOTOR): + self._low_cmd.motor_cmd[i].mode = 1 + self._low_cmd.motor_cmd[i].dq = 0.0 + self._low_cmd.motor_cmd[i].tau = 0.0 + + # Arms get replay data, body stays locked + if i >= 15: + self._low_cmd.motor_cmd[i].q = arm_target_q[i] + else: + self._low_cmd.motor_cmd[i].q = body_lock_q[i] + + # Per-motor gains + if i in WEAK_MOTORS: + kp, kd = KP_LOW, KD_LOW + elif i in WRIST_MOTORS: + kp, kd = KP_WRIST, KD_WRIST + else: + kp, kd = KP_HIGH, KD_HIGH + self._low_cmd.motor_cmd[i].kp = kp + self._low_cmd.motor_cmd[i].kd = kd + + self._low_cmd.crc = self._crc.Crc(self._low_cmd) + self._arm_pub.Write(self._low_cmd) + + def _disable_sdk(self): + """Send 10 disable frames at 50 Hz — direct port of + g1_replay_v4_stable.py:DisableSDK (lines 141-147).""" + if not self._initialized or self._low_cmd is None: + return + self._low_cmd.motor_cmd[ENABLE_ARM_SDK_INDEX].q = 0.0 + self._low_cmd.crc = self._crc.Crc(self._low_cmd) + for _ in range(10): + self._arm_pub.Write(self._low_cmd) + time.sleep(0.02) + + # -- replay engine -- + + def replay_file(self, filepath: str, speed: float = 1.0): + """Play a JSONL motion file with full production replay logic. + + Args: + filepath: Path to .jsonl motion file. + speed: Playback speed multiplier (1.0 = normal). + """ + if self._blocked(): + log.warning("replay_file refused — locomotion movement is armed") + return + with self._lock: + if self._is_busy: + log.warning("replay_file: arm busy, skipping") + return + self._is_busy = True + self._cancel.clear() + + try: + self._replay_file_inner(filepath, speed) + finally: + with self._lock: + self._is_busy = False + self._last_action_time = time.monotonic() + + def _replay_file_inner(self, filepath: str, speed: float = 1.0): + """One-for-one port of g1_replay_v4_stable.py:ReplayWithHome.Run(). + + Five phases — timing and math match the reference exactly: + + 1. Wait for first LowState_ message (no body-lock from zeros). + 2. Load data: home_q (last valid frame of arm_home.jsonl), + full_body_lock_q (live snapshot), and the replay frames. + 3. MOVE TO START — 60 steps at 60 Hz, alpha = k/steps (starts + at 0 = exact current pose, ends at 59/60 just shy of target). + 3b. SETTLE HOLD — replaces the reference's human + `input("Press Enter to Begin")` pause; gives the physical + motors time to reach the commanded start pose before + playback so the first real frames don't jerk. + 4. PLAY — `for f in frames: if f['t']-t0 >= play_elapsed` + frame-select pattern, fixed 1/REPLAY_HZ sleep per iteration. + 5. RETURN HOME — 180 steps alpha = k/steps from last_played_q + to home_q, body locked. Then DisableSDK (10 frames). + """ + path = Path(filepath) + if not path.is_absolute(): + path = MOTIONS_DIR / path + if not path.exists(): + raise FileNotFoundError(f"Motion file not found: {path}") + + frames = _load_frames(path) + if not frames: + log.warning("Empty motion file: %s", path) + return + + if not _HAS_SDK: + duration = len(frames) / REPLAY_HZ / speed if speed else len(frames) / REPLAY_HZ + log.info("[SIM] Replay %s (%.1fs, %d frames, speed=%.1f)", + path.name, duration, len(frames), speed) + self._sim_replay(frames, speed) + return + + log.info("Replay %s (%d frames @ %.0f Hz, speed=%.1f)", + path.name, len(frames), REPLAY_HZ, speed) + + # ─── 1. Wait for first LowState ───────────────────────── + if not self._first_state_event.is_set(): + log.warning("Waiting for first LowState message...") + if not self._first_state_event.wait(timeout=2.0): + log.error("No LowState received in 2s — refusing to replay (would lock body to zeros)") + return + + # ─── 2. Load data (ref lines 154-166) ─────────────────── + home_q = _load_home_q() or [0.0] * G1_NUM_MOTOR + full_body_lock_q = self._get_current_q() # snapshot live state + + interval = 1.0 / REPLAY_HZ + file_start_q = frames[0]["q"] + + # ─── 3. MOVE TO START (ref lines 171-181) ─────────────── + log.debug("Moving to start (%d steps)", RAMP_IN_STEPS) + for k in range(RAMP_IN_STEPS): + if self._cancel.is_set(): + self._return_home(full_body_lock_q, full_body_lock_q, home_q) + return + alpha = k / RAMP_IN_STEPS # 0 .. (RAMP_IN_STEPS-1)/RAMP_IN_STEPS + interp_q = list(full_body_lock_q) + for j in range(15, G1_NUM_MOTOR): + interp_q[j] = (1 - alpha) * full_body_lock_q[j] + alpha * file_start_q[j] + self._send_frame(interp_q, full_body_lock_q) + time.sleep(interval) + + # ─── 3b. SETTLE HOLD — replaces reference's Enter pause ─ + settle_frames = max(0, int(SETTLE_HOLD_SEC * REPLAY_HZ)) + if settle_frames > 0: + log.debug("Settle hold (%d frames / %.2fs)", settle_frames, SETTLE_HOLD_SEC) + for _ in range(settle_frames): + if self._cancel.is_set(): + self._return_home(file_start_q, full_body_lock_q, home_q) + return + self._send_frame(file_start_q, full_body_lock_q) + time.sleep(interval) + + # ─── 4. PLAY (ref lines 183-234) ──────────────────────── + log.debug("Playing %d frames", len(frames)) + last_played_q = file_start_q + play_elapsed = 0.0 + last_real = time.monotonic() + t0 = frames[0].get("t", 0.0) + + while True: + if self._cancel.is_set(): + break + + # Watchdog — abort if LowState goes stale + age = self._state_age() + if age > WATCHDOG_DISABLE_AFTER: + log.error("Watchdog abort — LowState stale %.2fs", age) + self._disable_sdk() + return + + now_real = time.monotonic() + dt_real = now_real - last_real + last_real = now_real + play_elapsed += dt_real * speed + + # Pick the next frame whose timestamp has elapsed (reference pattern) + target_frame = None + for f in frames: + if f.get("t", 0.0) - t0 >= play_elapsed: + target_frame = f + break + if target_frame is None: + break + + self._send_frame(target_frame["q"], full_body_lock_q) + last_played_q = target_frame["q"] + time.sleep(interval) + + # ─── 5. RETURN HOME (ref lines 239-256) + DisableSDK ──── + self._return_home(last_played_q, full_body_lock_q, home_q) + + def _return_home(self, from_q: list[float], body_lock_q: list[float], home_q: list[float]): + """Smooth return to home — direct port of g1_replay_v4_stable.py:239-256. + + 180 steps × (1/60)s = 3s linear ramp on arm motors only (indices + 15-28); body motors (0-14) stay locked to `body_lock_q`. Then + DisableSDK sends 10 disable-bit frames at 50 Hz. + + IMPORTANT: the reference's return-home is unconditional — it + always runs to completion regardless of why the play loop ended + (natural end OR 'q' press). We clear `_cancel` at entry so a + user-hit Cancel (which set `_cancel` to break the play loop) + doesn't also abort the return ramp. Without this, the arm + "snaps" home because the loop exits on the first iteration. + """ + self._cancel.clear() + log.info("Returning home (%d steps / %.1fs)", RAMP_OUT_STEPS, RAMP_OUT_STEPS / REPLAY_HZ) + interval = 1.0 / REPLAY_HZ + for k in range(RAMP_OUT_STEPS): + alpha = k / RAMP_OUT_STEPS # 0 .. (RAMP_OUT_STEPS-1)/RAMP_OUT_STEPS + interp_q = list(from_q) + for j in range(15, G1_NUM_MOTOR): + interp_q[j] = (1 - alpha) * from_q[j] + alpha * home_q[j] + self._send_frame(interp_q, body_lock_q) + time.sleep(interval) + self._disable_sdk() + log.info("Home reached, SDK disabled") + + def _sim_replay(self, frames: list[dict], speed: float): + """Simulation replay — emit events, sleep for equivalent duration.""" + interval = 1.0 / REPLAY_HZ + for i, frame in enumerate(frames): + if self._cancel.is_set(): + log.info("[SIM] Replay cancelled at frame %d/%d", i, len(frames)) + return + time.sleep(interval / max(speed, 0.1)) + log.info("[SIM] Replay complete") + + # -- public API -- + + @property + def is_busy(self) -> bool: + return self._is_busy + + def set_motion_block(self, predicate): + """Install a no-args predicate; while it returns True, every arm + playback path (replay_file / _execute → SDK actions, JSONL replays, + macros, gestures) refuses to run. Used for arm ⇄ locomotion exclusion.""" + self._motion_block = predicate + + def _blocked(self) -> bool: + pred = self._motion_block + if pred is None: + return False + try: + return bool(pred()) + except Exception: + log.exception("motion-block predicate raised — treating as NOT blocked") + return False + + def cancel(self): + """Cancel the currently running replay. + + Sets the cancel flag — the play loop in `_replay_file_inner` + checks this and breaks out; `_return_home` then runs as the + final phase of the same replay (matches the reference's + end-of-Run behaviour in g1_replay_v4_stable.py). + """ + self._cancel.set() + + def _unused_return_to_home(self, duration_sec: float = 3.0, + home_file: str = "arm_home.jsonl") -> None: + """Deprecated — replay's own `_return_home` is called automatically + when cancel breaks the play loop. Kept here only to preserve any + external caller; no new code should use this. + """ + if not self._initialized or self._low_cmd is None: + log.warning("return_to_home: arm controller not initialised") + return + if not self._first_state_event.wait(timeout=2.0): + log.error("return_to_home: no LowState received in 2s — aborting") + return + + home_path = MOTIONS_DIR / home_file + if not home_path.exists(): + log.warning("return_to_home: %s missing — skipping", home_path.name) + return + + # Use the LAST valid 'q' in the file as the settle pose + home_q: list[float] | None = None + try: + for frame in _load_frames(home_path): + q = frame.get("q") + if q and len(q) == G1_NUM_MOTOR: + home_q = q + except Exception as exc: + log.warning("return_to_home: reading %s failed: %s", + home_path.name, exc) + return + if home_q is None: + log.warning("return_to_home: %s has no valid 'q' frames", + home_path.name) + return + + with self._state_lock: + start_q = list(self._current_q) + body_lock_q = list(start_q) + + # Let the ramp publish frames even though we just cancelled + self._cancel.clear() + with self._lock: + if self._is_busy: + # A pending replay is still winding down — wait a beat + log.debug("return_to_home: arm busy, waiting briefly") + self._is_busy = True + + try: + steps = max(30, int(duration_sec * REPLAY_HZ)) # ≥ 0.5s ramp + dt = 1.0 / REPLAY_HZ + log.info("return_to_home: ramp %d steps (%.1fs) → %s", + steps, duration_sec, home_file) + for k in range(steps): + if self._cancel.is_set(): + log.info("return_to_home: cancelled mid-ramp") + break + alpha = (k + 1) / steps + interp_q = list(body_lock_q) + for j in range(15, G1_NUM_MOTOR): + interp_q[j] = (1 - alpha) * start_q[j] + alpha * home_q[j] + self._send_frame(interp_q, body_lock_q) + time.sleep(dt) + log.info("return_to_home: done") + finally: + with self._lock: + self._is_busy = False + + def refresh_actions(self): + """Re-scan data/motions/ and rebuild the action registry.""" + global ACTIONS, ACTION_BY_ID, ACTION_BY_NAME + ACTIONS, ACTION_BY_ID, ACTION_BY_NAME = rebuild_action_registry() + + def list_actions(self) -> list[dict[str, Any]]: + return [ + {"id": a.id, "name": a.name, "file": a.file, "category": a.category} + for a in ACTIONS + ] + + def list_motion_files(self) -> list[dict[str, Any]]: + """List all JSONL files in data/motions/ with metadata. + + Caches frame count by (path, mtime) to avoid re-parsing megabytes of + JSONL on every dashboard refresh. + """ + MOTIONS_DIR.mkdir(parents=True, exist_ok=True) + result = [] + for p in sorted(MOTIONS_DIR.glob("*.jsonl")): + stat = p.stat() + cache_key = f"{p}:{stat.st_mtime_ns}" + cached = self._motion_files_cache.get(cache_key) + if cached is None: + frames = _load_frames(p) + duration = len(frames) / REPLAY_HZ if frames else 0 + cached = { + "name": p.name, + "path": str(p), + "frames": len(frames), + "duration_sec": round(duration, 2), + "size_kb": round(stat.st_size / 1024, 1), + } + # Drop stale entries for this path before adding new one + stale = [k for k in self._motion_files_cache if k.startswith(f"{p}:")] + for k in stale: + self._motion_files_cache.pop(k, None) + self._motion_files_cache[cache_key] = cached + result.append(cached) + return result + + def trigger_by_id(self, action_id: int, speed: float = 1.0): + action = ACTION_BY_ID.get(action_id) + if action is None: + raise KeyError(f"Unknown action id: {action_id}") + self._execute(action, speed) + + def trigger_by_name(self, name: str, speed: float = 1.0): + action = ACTION_BY_NAME.get(name) + if action is None: + raise KeyError(f"Unknown action: {name}") + self._execute(action, speed) + + def _execute(self, action: Action, speed: float = 1.0): + if self._blocked(): + log.warning("arm action %s refused — locomotion movement is armed", action.name) + bus.emit_sync("motion.action_error", action=action.name, + reason="movement armed") + return + with self._lock: + if self._is_busy: + log.warning("Arm busy, skipping %s", action.name) + return + self._is_busy = True + self._cancel.clear() + + _start = time.monotonic() + _failed = False + try: + bus.emit_sync("motion.action_started", action=action.name) + if action.file: + self._replay_file_inner(action.file, speed=speed) + else: + self._run_sdk_action(action) + except Exception as exc: + _failed = True + log.error("Action %s failed: %s", action.name, exc) + bus.emit_sync("motion.action_error", action=action.name, + reason=str(exc)) + finally: + with self._lock: + self._is_busy = False + self._last_action_time = time.monotonic() + # action_done always fires (back-compat for existing listeners); + # action_error above is the extra signal for the Gemini + # motion-state channel. elapsed_sec lets Gemini say "...took 2.3s". + bus.emit_sync("motion.action_done", action=action.name, + elapsed_sec=round(time.monotonic() - _start, 2), + failed=_failed) + + def _run_sdk_action(self, action: Action): + if not _HAS_SDK: + log.info("[SIM] SDK action: %s (id=%d)", action.name, action.id) + time.sleep(2.0) + return + if self._arm_client is None: + log.warning( + "SDK action %s requested but G1ArmActionClient not available — skipping", + action.name, + ) + return + # Sanad's registry uses underscored names ("shake_hand", "x_ray"); + # the SDK's action_map is keyed by human-readable forms that mix + # spaces and hyphens ("shake hand", "x-ray", "two-hand kiss"). + # Try each candidate in turn. + name = action.name + candidates = [ + name, + name.replace("_", " "), # shake_hand → shake hand + name.replace("_", "-"), # x_ray → x-ray + # two-word with specific hyphenation: first token with hyphen, + # rest with spaces (matches SDK's "two-hand kiss" pattern) + name.replace("_", "-", 1).replace("_", " "), + ] + sdk_name = next((c for c in candidates if c in _ARM_ACTION_MAP), None) + if sdk_name is None: + log.warning( + "SDK action %s not in G1ArmActionClient action_map — tried %s. keys=%s", + action.name, candidates, sorted(_ARM_ACTION_MAP.keys())[:12], + ) + return + opcode = _ARM_ACTION_MAP[sdk_name] + log.info("SDK action: %s (opcode=%s)", action.name, opcode) + try: + self._arm_client.ExecuteAction(opcode) + # Built-in arm actions block on the robot side for ~3s; the SDK + # call returns immediately. Sleep so we don't hammer it back-to-back. + time.sleep(3.0) + except Exception as exc: + log.error("SDK action %s failed: %s", action.name, exc) + + def status(self) -> dict[str, Any]: + return { + "initialized": self._initialized, + "sdk_available": _HAS_SDK, + "busy": self._is_busy, + "state_age_sec": round(self._state_age(), 3), + "sdk_actions": len(SDK_ACTIONS), + "jsonl_actions": len([a for a in ACTIONS if a.category == "jsonl"]), + "total_actions": len(ACTIONS), + "total_motion_files": len(list(MOTIONS_DIR.glob("*.jsonl"))) if MOTIONS_DIR.exists() else 0, + } diff --git a/vendor/Sanad/motion/macro_player.py b/vendor/Sanad/motion/macro_player.py new file mode 100644 index 0000000..0a21a17 --- /dev/null +++ b/vendor/Sanad/motion/macro_player.py @@ -0,0 +1,275 @@ +"""Macro Player — synchronized playback of audio + motion recordings. + +Reads paired files: + recordings/audio/.wav + recordings/motion/.jsonl + +Plays audio and streams joint commands simultaneously so the robot's +physical movements perfectly match the recorded speech timing. +""" + +from __future__ import annotations + +import json +import threading +import time +import wave +from pathlib import Path +from typing import Any + +from Project.Sanad.config import AUDIO_RECORDINGS_DIR, MOTION_RECORDINGS_DIR, REPLAY_HZ +from Project.Sanad.core.config_loader import section as _cfg_section +from Project.Sanad.core.logger import get_logger + +log = get_logger("macro_player") + +_MP = _cfg_section("motion", "macro_player") +RAMP_IN_STEPS = _MP.get("ramp_in_steps", 60) +RAMP_OUT_STEPS = _MP.get("ramp_out_steps", 60) +WATCHDOG_DISABLE_AFTER = _MP.get("watchdog_disable_after_sec", 1.0) + + +def _lerp_q(a: list[float], b: list[float], t: float) -> list[float]: + return [x + (y - x) * t for x, y in zip(a, b)] + + +class _ArmAdapter: + """Uniform interface over either the public arm API or its private members. + + Hides the hasattr branching that previously polluted _play_motion. If the + arm controller exposes the new public methods (get_current_q, send_frame, + disable, state_age, wait_for_state) we use those; otherwise we fall back + to the private versions for backward compatibility. + """ + + def __init__(self, arm): + self._arm = arm + + def wait_for_state(self, timeout: float = 2.0) -> bool: + fn = getattr(self._arm, "wait_for_state", None) + if callable(fn): + return fn(timeout=timeout) + # No state-wait API: assume ready + return True + + def get_current_q(self) -> list[float]: + fn = getattr(self._arm, "get_current_q", None) + if callable(fn): + return fn() + return self._arm._get_current_q() + + def send_frame(self, target_q: list[float], body_lock_q: list[float]): + fn = getattr(self._arm, "send_frame", None) + if callable(fn): + return fn(target_q, body_lock_q) + return self._arm._send_frame(target_q, body_lock_q) + + def disable(self): + fn = getattr(self._arm, "disable", None) + if callable(fn): + return fn() + return self._arm._disable_sdk() + + def state_age(self) -> float: + fn = getattr(self._arm, "state_age", None) + if callable(fn): + return fn() + # No watchdog support: pretend state is fresh + return 0.0 + + +class MacroPlayer: + def __init__(self, audio_manager=None, arm_controller=None): + self._audio_mgr = audio_manager + self._arm = arm_controller + self._lock = threading.Lock() + self._playing = False + self._stop_event = threading.Event() + + @property + def is_playing(self) -> bool: + return self._playing + + def play(self, name: str) -> dict[str, Any]: + audio_path = AUDIO_RECORDINGS_DIR / f"{name}.wav" + motion_path = MOTION_RECORDINGS_DIR / f"{name}.jsonl" + + if not audio_path.exists(): + raise FileNotFoundError(f"Audio not found: {audio_path}") + if not motion_path.exists(): + raise FileNotFoundError(f"Motion not found: {motion_path}") + + with self._lock: + if self._playing: + raise RuntimeError("Macro playback already in progress.") + self._playing = True + self._stop_event.clear() + + t0 = time.monotonic() + audio_thread = threading.Thread(target=self._play_audio, args=(audio_path,), daemon=True) + motion_thread = threading.Thread(target=self._play_motion, args=(motion_path,), daemon=True) + + log.info("Macro playback starting: %s", name) + audio_thread.start() + motion_thread.start() + + audio_thread.join() + motion_thread.join() + + elapsed = time.monotonic() - t0 + with self._lock: + self._playing = False + + log.info("Macro playback complete: %s (%.1fs)", name, elapsed) + return {"name": name, "duration_sec": round(elapsed, 2)} + + def stop(self): + self._stop_event.set() + # Best-effort: stop audio playback if the manager exposes a stop method + if self._audio_mgr is not None and hasattr(self._audio_mgr, "stop_playback"): + try: + self._audio_mgr.stop_playback() + except Exception as exc: + log.warning("audio stop failed: %s", exc) + + def _play_audio(self, path: Path): + if self._audio_mgr is None: + log.warning("No audio manager — skipping audio playback") + return + try: + self._audio_mgr.play_wav(path) + except Exception as exc: + log.error("Audio playback failed: %s", exc) + + def _play_motion(self, path: Path): + """Stream JSONL motion frames at recorded timing — with ramp-in, watchdog, ramp-out.""" + frames = self._load_frames(path) + if not frames: + return + + if self._arm is None: + # Simulated playback — just sleep through + duration = frames[-1].get("t", 0) + log.info("[SIM] MacroPlayer would play %d frames over %.1fs", len(frames), duration) + time.sleep(min(duration, 30.0)) + return + + adapter = _ArmAdapter(self._arm) + interval = 1.0 / REPLAY_HZ + + # CRITICAL: wait for first LowState before reading current pose + if not adapter.wait_for_state(timeout=2.0): + log.error("MacroPlayer aborting — no LowState received in 2s") + return + + try: + current_q = adapter.get_current_q() + except Exception: + log.exception("Failed to read current pose") + return + + body_lock_q = list(current_q) + first_frame_q = frames[0]["q"] + + # Phase 1: Ramp-in + if not self._ramp(adapter, current_q, first_frame_q, body_lock_q, RAMP_IN_STEPS, "ramp-in"): + return + + # Phase 2: Stream recorded frames with timing + watchdog + last_q = self._stream_frames(adapter, frames, body_lock_q, interval) or first_frame_q + + # Phase 3: Ramp-out back to starting pose + self._ramp(adapter, last_q, body_lock_q, body_lock_q, RAMP_OUT_STEPS, "ramp-out") + + # Phase 4: Disable arm SDK + try: + adapter.disable() + except Exception: + log.exception("disable() failed") + + def _load_frames(self, path: Path) -> list[dict]: + """Read JSONL motion file. Returns list of frames or [] on failure.""" + frames: list[dict] = [] + try: + with open(path, "r") as f: + for line in f: + line = line.strip() + if not line: + continue + try: + data = json.loads(line) + except json.JSONDecodeError as exc: + log.warning("Skipping bad line in %s: %s", path.name, exc) + continue + if "q" in data: + frames.append(data) + except OSError: + log.exception("Failed to read motion file %s", path) + if not frames: + log.warning("No usable frames in %s", path.name) + return frames + + def _ramp(self, adapter: "_ArmAdapter", from_q: list[float], to_q: list[float], + body_lock_q: list[float], steps: int, label: str) -> bool: + """Smoothly interpolate from `from_q` to `to_q` over `steps` frames. + Returns True on success, False if cancelled or send failed.""" + log.info("MacroPlayer %s (%d steps)", label, steps) + interval = 1.0 / REPLAY_HZ + for step in range(steps): + if self._stop_event.is_set(): + return False + t = (step + 1) / steps + interp = _lerp_q(from_q, to_q, t) + try: + adapter.send_frame(interp, body_lock_q) + except Exception: + log.exception("%s send_frame failed", label) + return False + time.sleep(interval) + return True + + def _stream_frames(self, adapter: "_ArmAdapter", frames: list[dict], + body_lock_q: list[float], interval: float) -> list[float] | None: + """Stream the recorded frames with watchdog. Returns the last successfully sent q.""" + t0 = time.monotonic() + last_q: list[float] | None = None + for frame in frames: + if self._stop_event.is_set(): + break + + age = adapter.state_age() + if age > WATCHDOG_DISABLE_AFTER: + log.error("MacroPlayer watchdog abort — state stale %.2fs", age) + break + + target_t = frame.get("t", 0) + elapsed = time.monotonic() - t0 + sleep_time = target_t - elapsed + if sleep_time > 0: + time.sleep(sleep_time) + + try: + adapter.send_frame(frame["q"], body_lock_q) + last_q = frame["q"] + except Exception: + log.exception("send_frame failed mid-replay") + return last_q + + def list_macros(self) -> list[dict[str, Any]]: + """List available macro recordings (paired audio + motion).""" + macros = [] + for audio_path in sorted(AUDIO_RECORDINGS_DIR.glob("*.wav")): + name = audio_path.stem + motion_path = MOTION_RECORDINGS_DIR / f"{name}.jsonl" + if motion_path.exists(): + macros.append({ + "name": name, + "audio_path": str(audio_path), + "motion_path": str(motion_path), + "audio_size_kb": round(audio_path.stat().st_size / 1024, 1), + "motion_size_kb": round(motion_path.stat().st_size / 1024, 1), + }) + return macros + + def status(self) -> dict[str, Any]: + return {"playing": self._playing} diff --git a/vendor/Sanad/motion/macro_recorder.py b/vendor/Sanad/motion/macro_recorder.py new file mode 100644 index 0000000..9697ece --- /dev/null +++ b/vendor/Sanad/motion/macro_recorder.py @@ -0,0 +1,163 @@ +"""Macro Recorder — simultaneously captures audio + robot joint positions. + +Produces a paired set of files: + recordings/audio/.wav — microphone or Gemini output audio + recordings/motion/.jsonl — timestamped joint positions + +These can be replayed in sync via MacroPlayer. +""" + +from __future__ import annotations + +import json +import threading +import time +import wave +from pathlib import Path +from typing import Any + +from Project.Sanad.config import ( + AUDIO_RECORDINGS_DIR, + CHANNELS, + CHUNK_SIZE, + MOTION_RECORDINGS_DIR, + RECEIVE_SAMPLE_RATE, + REPLAY_HZ, +) +from Project.Sanad.core.logger import get_logger + +log = get_logger("macro_recorder") + + +class MacroRecorder: + """Records audio + joint positions simultaneously.""" + + def __init__(self, arm_controller=None): + self._arm = arm_controller + self._lock = threading.Lock() + self._recording = False + self._audio_thread: threading.Thread | None = None + self._motion_thread: threading.Thread | None = None + self._stop_event = threading.Event() + self._name = "" + self._audio_frames: list[bytes] = [] + self._motion_frames: list[dict[str, Any]] = [] + self._started_at = 0.0 + + @property + def is_recording(self) -> bool: + return self._recording + + def start(self, name: str) -> dict[str, Any]: + with self._lock: + if self._recording: + raise RuntimeError("Already recording a macro.") + self._recording = True + self._name = name + self._stop_event.clear() + self._audio_frames = [] + self._motion_frames = [] + self._started_at = time.monotonic() + + AUDIO_RECORDINGS_DIR.mkdir(parents=True, exist_ok=True) + MOTION_RECORDINGS_DIR.mkdir(parents=True, exist_ok=True) + + self._audio_thread = threading.Thread(target=self._record_audio, daemon=True) + self._motion_thread = threading.Thread(target=self._record_motion, daemon=True) + self._audio_thread.start() + self._motion_thread.start() + + log.info("Macro recording started: %s", name) + return {"recording": True, "name": name} + + def stop(self) -> dict[str, Any]: + with self._lock: + if not self._recording: + raise RuntimeError("No macro recording in progress.") + + self._stop_event.set() + if self._audio_thread: + self._audio_thread.join(timeout=3.0) + if self._motion_thread: + self._motion_thread.join(timeout=3.0) + + # Save audio + audio_path = AUDIO_RECORDINGS_DIR / f"{self._name}.wav" + pcm = b"".join(self._audio_frames) + with wave.open(str(audio_path), "wb") as wf: + wf.setnchannels(CHANNELS) + wf.setsampwidth(2) # int16 + wf.setframerate(RECEIVE_SAMPLE_RATE) + wf.writeframes(pcm) + + # Save motion + motion_path = MOTION_RECORDINGS_DIR / f"{self._name}.jsonl" + with open(motion_path, "w") as f: + f.write(json.dumps({"meta": {"hz": REPLAY_HZ, "motors": 29}}) + "\n") + for frame in self._motion_frames: + f.write(json.dumps(frame) + "\n") + + duration = time.monotonic() - self._started_at + + with self._lock: + self._recording = False + + log.info("Macro saved: audio=%s motion=%s (%.1fs)", audio_path, motion_path, duration) + return { + "recording": False, + "name": self._name, + "audio_path": str(audio_path), + "motion_path": str(motion_path), + "duration_sec": round(duration, 2), + "audio_frames": len(self._audio_frames), + "motion_frames": len(self._motion_frames), + } + + def _record_audio(self): + """Capture mic audio in background thread.""" + try: + import pyaudio + + pya = pyaudio.PyAudio() + stream = pya.open( + format=pyaudio.paInt16, + channels=CHANNELS, + rate=RECEIVE_SAMPLE_RATE, + input=True, + frames_per_buffer=CHUNK_SIZE, + ) + while not self._stop_event.is_set(): + data = stream.read(CHUNK_SIZE, exception_on_overflow=False) + self._audio_frames.append(data) + stream.stop_stream() + stream.close() + pya.terminate() + except Exception as exc: + log.error("Audio recording failed: %s", exc) + + def _record_motion(self): + """Capture joint positions at REPLAY_HZ.""" + interval = 1.0 / REPLAY_HZ + t0 = time.monotonic() + while not self._stop_event.is_set(): + t = round(time.monotonic() - t0, 4) + # Read current joint positions from arm controller + q = self._read_joint_positions() + self._motion_frames.append({"t": t, "q": q}) + time.sleep(interval) + + def _read_joint_positions(self) -> list[float]: + """Read current joint positions. Returns zeros if SDK unavailable.""" + if self._arm is not None and self._arm._initialized: + return self._arm._get_current_q() + return [0.0] * 29 + + def status(self) -> dict[str, Any]: + elapsed = time.monotonic() - self._started_at if self._recording else 0 + return { + "recording": self._recording, + "name": self._name, + "elapsed_sec": round(elapsed, 1), + "audio_frames": len(self._audio_frames), + "motion_frames": len(self._motion_frames), + } diff --git a/vendor/Sanad/motion/sanad_arm_controller.py b/vendor/Sanad/motion/sanad_arm_controller.py new file mode 100644 index 0000000..ed8e168 --- /dev/null +++ b/vendor/Sanad/motion/sanad_arm_controller.py @@ -0,0 +1,285 @@ +import os +import time +import json +import threading +import traceback +import numpy as np +from pathlib import Path +from dataclasses import dataclass + +# ================================================== +# ⚙️ Config (from config/motion_config.json) +# ================================================== +BASE_DIR = Path(__file__).resolve().parent + +try: + from Project.Sanad.core.config_loader import section as _cfg_section + _MCFG = _cfg_section("motion", "sanad_arm_controller") +except Exception: + _MCFG = {} + +# Ensure defaults for any missing key +_MCFG.setdefault("action_cooldown_sec", 1.0) +_MCFG.setdefault("stability_threshold", 0.06) +_MCFG.setdefault("gains", {}) +_MCFG["gains"].setdefault("kp_high", 300.0) +_MCFG["gains"].setdefault("kd_high", 3.0) +_MCFG["gains"].setdefault("kp_low", 80.0) +_MCFG["gains"].setdefault("kd_low", 3.0) +_MCFG["gains"].setdefault("kp_wrist", 40.0) +_MCFG["gains"].setdefault("kd_wrist", 1.5) +_MCFG.setdefault("weak_motors", [4, 10, 15, 16, 17, 18, 22, 23, 24, 25]) +_MCFG.setdefault("wrist_motors", [19, 20, 21, 26, 27, 28]) +_MCFG.setdefault("data_subdir", "DataG1") + +# ================================================== +# ✅ Option List +# ================================================== +@dataclass(frozen=True) +class TestOption: + name: str + id: int + file: str = "" + +OPTION_LIST = [ + TestOption(name="release arm", id=0), + TestOption(name="shake hand", id=1), + TestOption(name="high five", id=2), + TestOption(name="hug", id=3), + TestOption(name="high wave", id=4), + TestOption(name="clap", id=5), + TestOption(name="face wave", id=6), + TestOption(name="left kiss", id=7), + TestOption(name="heart", id=8), + TestOption(name="right heart", id=9), + TestOption(name="hands up", id=10), + TestOption(name="x-ray", id=11), + TestOption(name="right hand up", id=12), + TestOption(name="reject", id=13), + TestOption(name="right kiss", id=14), + TestOption(name="two-hand kiss", id=15), + TestOption(name="release arm recorded", id=30, file="arm_home.jsonl"), + TestOption(name="laugh", id=23, file="laugh.jsonl"), + TestOption(name="bird", id=24, file="bird.jsonl"), + TestOption(name="change battery", id=25, file="change_battery.jsonl"), + TestOption(name="move hands up", id=26, file="hands_up.jsonl"), + TestOption(name="move right hand up", id=27, file="right_hand_up.jsonl"), + TestOption(name="move left hand up", id=28, file="left_hand_up.jsonl"), +] + +OPTION_BY_ID = {o.id: o for o in OPTION_LIST} +OPTION_BY_NAME = {o.name.lower(): o for o in OPTION_LIST} + +# ================================================== +# 🦾 Unitree SDK Configuration +# ================================================== +try: + from unitree_sdk2py.core.channel import ChannelFactoryInitialize, ChannelPublisher, ChannelSubscriber + from unitree_sdk2py.g1.arm.g1_arm_action_client import G1ArmActionClient, action_map + from unitree_sdk2py.idl.default import unitree_hg_msg_dds__LowCmd_ + from unitree_sdk2py.idl.unitree_hg.msg.dds_ import LowCmd_, LowState_ + from unitree_sdk2py.utils.crc import CRC + _ROBOT_SDK_AVAILABLE = True +except Exception: + ChannelFactoryInitialize = None + G1ArmActionClient = None + action_map = {} + LowCmd_ = LowState_ = None + unitree_hg_msg_dds__LowCmd_ = None + CRC = None + _ROBOT_SDK_AVAILABLE = False + _UNITREE_IMPORT_ERR = traceback.format_exc() + +# G1 hardware constants — single source in config/core_config.json +from Project.Sanad.config import G1_NUM_MOTOR, ENABLE_ARM_SDK_INDEX, REPLAY_HZ +DATA_DIR = BASE_DIR / _MCFG["data_subdir"] + +ACTION_COOLDOWN_SEC = _MCFG["action_cooldown_sec"] +STABILITY_THRESHOLD = _MCFG["stability_threshold"] + +_G = _MCFG["gains"] +KP_HIGH, KD_HIGH = _G["kp_high"], _G["kd_high"] +KP_LOW, KD_LOW = _G["kp_low"], _G["kd_low"] +KP_WRIST, KD_WRIST = _G["kp_wrist"], _G["kd_wrist"] +WEAK_MOTORS = _MCFG["weak_motors"] +WRIST_MOTORS = _MCFG["wrist_motors"] + + +class ArmController: + def __init__(self, cooldown_sec: float = ACTION_COOLDOWN_SEC): + self._ready = False + self.cooldown_sec = float(cooldown_sec) + self._last_action_time = 0.0 + self.low_state = None + self.crc = CRC() if CRC else None + self._pub = None + self._sub = None + self._client = None + self._is_busy = False + self._init_lock = threading.Lock() + + def init(self) -> bool: + with self._init_lock: + if self._ready: + return True + if ChannelFactoryInitialize is None: + return False + try: + ChannelFactoryInitialize(0) + self._pub = ChannelPublisher("rt/arm_sdk", LowCmd_) + self._pub.Init() + self._sub = ChannelSubscriber("rt/lowstate", LowState_) + self._sub.Init(self._low_state_handler, 10) + if G1ArmActionClient: + self._client = G1ArmActionClient() + self._client.SetTimeout(10.0) + self._client.Init() + self._ready = True + return True + except Exception: + return False + + def _low_state_handler(self, msg: LowState_): + self.low_state = msg + + def _cooldown_ok(self) -> bool: + now = time.time() + return (now - self._last_action_time) >= self.cooldown_sec + + def _load_home_pose(self): + path = DATA_DIR / "arm_home.jsonl" + try: + last_q = [0.0] * G1_NUM_MOTOR + with open(path, "r", encoding="utf-8") as f: + for line in f: + d = json.loads(line) + if "q" in d: + last_q = d["q"] + return last_q + except Exception: + return [0.0] * G1_NUM_MOTOR + + def _is_pose_stable(self, target_q): + if not self.low_state: + return False + current_q = np.array([self.low_state.motor_state[i].q for i in range(15, 29)]) + target_arm_q = np.array(target_q[15:29]) + diff = np.abs(current_q - target_arm_q) + return np.max(diff) < STABILITY_THRESHOLD + + def _send_frame(self, arm_q, body_lock_q): + if not self._pub: + return + cmd = unitree_hg_msg_dds__LowCmd_() + cmd.motor_cmd[ENABLE_ARM_SDK_INDEX].q = 1.0 + for i in range(G1_NUM_MOTOR): + cmd.motor_cmd[i].mode = 1 + cmd.motor_cmd[i].q = arm_q[i] if i >= 15 else body_lock_q[i] + if i in WEAK_MOTORS: + cmd.motor_cmd[i].kp, cmd.motor_cmd[i].kd = KP_LOW, KD_LOW + elif i in WRIST_MOTORS: + cmd.motor_cmd[i].kp, cmd.motor_cmd[i].kd = KP_WRIST, KD_WRIST + else: + cmd.motor_cmd[i].kp, cmd.motor_cmd[i].kd = KP_HIGH, KD_HIGH + cmd.crc = self.crc.Crc(cmd) + self._pub.Write(cmd) + + def _managed_replay(self, filename: str): + try: + path = DATA_DIR / filename + frames = [] + with open(path, "r", encoding="utf-8") as f: + for line in f: + d = json.loads(line) + if "q" in d: + frames.append(d) + + if not frames or not self.low_state: + return + + body_lock_q = [self.low_state.motor_state[i].q for i in range(G1_NUM_MOTOR)] + home_q = self._load_home_pose() + + # 1. Smooth match to start pose (90 frames ≈ 1.5s — prevents jerk) + start_q = frames[0]["q"] + ramp_in = 90 + for k in range(ramp_in): + alpha = k / ramp_in + interp_q = list(body_lock_q) + for j in range(15, 29): + interp_q[j] = (1 - alpha) * body_lock_q[j] + alpha * start_q[j] + self._send_frame(interp_q, body_lock_q) + time.sleep(1.0 / REPLAY_HZ) + + # 2. Play frames + last_played_q = start_q + for f in frames: + self._send_frame(f["q"], body_lock_q) + last_played_q = f["q"] + time.sleep(1.0 / REPLAY_HZ) + + # 3. Smooth return to home + for k in range(80): + alpha = k / 80 + interp_home_q = list(body_lock_q) + for j in range(15, 29): + interp_home_q[j] = (1 - alpha) * last_played_q[j] + alpha * home_q[j] + self._send_frame(interp_home_q, body_lock_q) + time.sleep(1.0 / REPLAY_HZ) + + # Sensor confirmation + confirm_start = time.time() + while time.time() - confirm_start < 2.0: + if self._is_pose_stable(home_q): + break + time.sleep(0.05) + + finally: + if self._pub: + cmd = unitree_hg_msg_dds__LowCmd_() + cmd.motor_cmd[ENABLE_ARM_SDK_INDEX].q = 0.0 + cmd.crc = self.crc.Crc(cmd) + for _ in range(5): + self._pub.Write(cmd) + time.sleep(0.01) + + self._is_busy = False + self._last_action_time = time.time() + print("🔓 Ready.") + + def _managed_sdk_action(self, action_name: str): + try: + if self._client and action_name in action_map: + print(f"🤖 SDK START: {action_name}") + self._client.ExecuteAction(action_map.get(action_name)) + time.sleep(3.5) + finally: + self._is_busy = False + self._last_action_time = time.time() + print("🔓 Ready.") + + def trigger_action_by_id(self, action_id: int): + if not self.init(): + return + if self._is_busy: + return + if not self._cooldown_ok(): + return + + opt = OPTION_BY_ID.get(int(action_id)) + if opt: + self._is_busy = True + if opt.file: + threading.Thread(target=self._managed_replay, args=(opt.file,), daemon=True).start() + elif self._client and opt.name in action_map: + threading.Thread(target=self._managed_sdk_action, args=(opt.name,), daemon=True).start() + else: + self._is_busy = False + + def trigger_action_by_name(self, action_name: str): + opt = OPTION_BY_NAME.get(action_name.lower()) + if opt: + self.trigger_action_by_id(opt.id) + + +ARM = ArmController() diff --git a/vendor/Sanad/motion/teaching.py b/vendor/Sanad/motion/teaching.py new file mode 100644 index 0000000..9cfa82a --- /dev/null +++ b/vendor/Sanad/motion/teaching.py @@ -0,0 +1,275 @@ +"""Teaching mode — safe hold → limp arms → record joint positions. + +Ported from G1_Lootah/Manual_Recorder/g1_teach_v4_stable.py. + +Sequence: + 1. Safe hold (3s): Arms rigid at current pose, waist locked. + 2. Teach phase: Arms go limp (KP=0), user physically moves them. + Joint positions are recorded at 60 Hz. + 3. Return home: Smooth interpolation back to arm_home.jsonl. + 4. Save: Writes JSONL to data/motions/.jsonl. +""" + +from __future__ import annotations + +import json +import os +import tempfile +import threading +import time +from pathlib import Path +from typing import Any + +from Project.Sanad.config import G1_NUM_MOTOR, MOTIONS_DIR, REPLAY_HZ +from Project.Sanad.core.config_loader import section as _cfg_section +from Project.Sanad.core.event_bus import bus +from Project.Sanad.core.logger import get_logger + +log = get_logger("teaching") + +_T = _cfg_section("motion", "teaching") +SAFE_HOLD_SEC = _T.get("safe_hold_sec", 3.0) +WAIST_KP = _T.get("waist_kp", 60.0) +WAIST_KD = _T.get("waist_kd", 4.0) +HOLD_ARM_KP = _T.get("hold_arm_kp", 60.0) +HOLD_ARM_KD = _T.get("hold_arm_kd", 4.0) +TEACH_ARM_KP = _T.get("teach_arm_kp", 0.0) # limp — no stiffness +TEACH_ARM_KD = _T.get("teach_arm_kd", 2.0) # damping only + +try: + from unitree_sdk2py.idl.unitree_hg.msg.dds_ import LowCmd_ + from unitree_sdk2py.utils.crc import CRC + _HAS_SDK = True +except ImportError: + _HAS_SDK = False + + +class TeachingSession: + """Records a teaching session (one at a time).""" + + def __init__(self, arm_controller): + self._arm = arm_controller + self._lock = threading.Lock() + self._recording = False + self._stop_event = threading.Event() + self._thread: threading.Thread | None = None + self._name = "" + self._frames: list[dict[str, Any]] = [] + self._phase = "idle" # idle | holding | teaching | returning | done + self._started_at = 0.0 + self._finalized = False + self._finalize_lock = threading.Lock() + self._final_result: dict[str, Any] | None = None + + @property + def is_recording(self) -> bool: + return self._recording + + def start(self, name: str, duration_sec: float = 15.0) -> dict[str, Any]: + with self._lock: + if self._recording: + raise RuntimeError("Teaching session already active.") + self._recording = True + self._finalized = False + self._final_result = None + self._name = name + self._frames = [] + self._stop_event.clear() + self._phase = "holding" + self._started_at = time.monotonic() + + self._thread = threading.Thread( + target=self._run, args=(name, duration_sec), daemon=True + ) + self._thread.start() + log.info("Teaching started: %s (%.0fs)", name, duration_sec) + bus.emit_sync("motion.teaching_started", name=name, duration_sec=duration_sec) + return {"recording": True, "name": name, "duration_sec": duration_sec} + + def stop(self) -> dict[str, Any]: + with self._lock: + if not self._recording: + raise RuntimeError("No teaching session active.") + self._stop_event.set() + if self._thread: + self._thread.join(timeout=10.0) + # Finalize is now ALWAYS done by the worker thread (_run). + # If for some reason the worker died without finalizing, do it here. + result = self._finalize() + return result + + def _run(self, name: str, duration_sec: float): + interval = 1.0 / REPLAY_HZ + arm = self._arm + + try: + if _HAS_SDK and arm._initialized: + self._run_hardware(name, duration_sec, interval) + else: + self._run_simulation(name, duration_sec, interval) + except Exception: + log.exception("Teaching session crashed") + finally: + # Always finalize from the worker thread — stop() will see _finalized=True. + self._finalize() + + def _run_hardware(self, name: str, duration_sec: float, interval: float): + """Real hardware teaching: hold → limp → record → home.""" + arm = self._arm + low_cmd = arm._low_cmd + crc = arm._crc + + initial_q = arm._get_current_q() + waist_lock = list(initial_q) + + # Phase 1: Safe hold + self._phase = "holding" + hold_end = time.monotonic() + SAFE_HOLD_SEC + log.info("Safe hold (%.1fs) — arms rigid", SAFE_HOLD_SEC) + + while time.monotonic() < hold_end and not self._stop_event.is_set(): + for i in range(G1_NUM_MOTOR): + low_cmd.motor_cmd[i].mode = 1 + low_cmd.motor_cmd[i].q = initial_q[i] + low_cmd.motor_cmd[i].dq = 0.0 + low_cmd.motor_cmd[i].tau = 0.0 + if i < 15: # body/waist + low_cmd.motor_cmd[i].kp = WAIST_KP + low_cmd.motor_cmd[i].kd = WAIST_KD + else: # arms + low_cmd.motor_cmd[i].kp = HOLD_ARM_KP + low_cmd.motor_cmd[i].kd = HOLD_ARM_KD + low_cmd.motor_cmd[29].q = 1.0 + low_cmd.crc = crc.Crc(low_cmd) + arm._arm_pub.Write(low_cmd) + time.sleep(interval) + + if self._stop_event.is_set(): + return + + # Phase 2: Teaching — arms go limp, record + self._phase = "teaching" + log.info("Arms released — move them now! Recording at %d Hz", int(REPLAY_HZ)) + t0 = time.monotonic() + + while not self._stop_event.is_set(): + elapsed = time.monotonic() - t0 + if elapsed >= duration_sec: + break + + # Limp arms, locked waist + current_q = arm._get_current_q() + for i in range(G1_NUM_MOTOR): + low_cmd.motor_cmd[i].mode = 1 + low_cmd.motor_cmd[i].dq = 0.0 + low_cmd.motor_cmd[i].tau = 0.0 + if i < 15: + low_cmd.motor_cmd[i].q = waist_lock[i] + low_cmd.motor_cmd[i].kp = WAIST_KP + low_cmd.motor_cmd[i].kd = WAIST_KD + else: + low_cmd.motor_cmd[i].q = current_q[i] + low_cmd.motor_cmd[i].kp = TEACH_ARM_KP + low_cmd.motor_cmd[i].kd = TEACH_ARM_KD + low_cmd.motor_cmd[29].q = 1.0 + low_cmd.crc = crc.Crc(low_cmd) + arm._arm_pub.Write(low_cmd) + + self._frames.append({"t": round(elapsed, 4), "q": current_q}) + time.sleep(interval) + + # Phase 3: Return home + self._phase = "returning" + from Project.Sanad.motion.arm_controller import _load_home_q, _lerp_q + home_q = _load_home_q() or initial_q + last_q = self._frames[-1]["q"] if self._frames else initial_q + + for step in range(180): + t = (step + 1) / 180 + interp = _lerp_q(last_q, home_q, t) + arm._send_frame(interp, waist_lock) + time.sleep(1.0 / REPLAY_HZ) + arm._disable_sdk() + + def _run_simulation(self, name: str, duration_sec: float, interval: float): + """Simulation: just record zero-pose frames for the given duration.""" + self._phase = "holding" + time.sleep(min(SAFE_HOLD_SEC, 1.0)) # shortened in sim + + self._phase = "teaching" + t0 = time.monotonic() + log.info("[SIM] Teaching — recording for %.0fs", duration_sec) + while not self._stop_event.is_set(): + elapsed = time.monotonic() - t0 + if elapsed >= duration_sec: + break + self._frames.append({"t": round(elapsed, 4), "q": [0.0] * G1_NUM_MOTOR}) + time.sleep(interval) + + self._phase = "returning" + time.sleep(0.5) + + def _finalize(self) -> dict[str, Any]: + """Save frames to JSONL and return result. Idempotent — safe to call twice.""" + with self._finalize_lock: + if self._finalized: + return self._final_result or { + "name": self._name, "frames": len(self._frames), + "path": "", "duration_sec": 0, + } + self._phase = "done" + result: dict[str, Any] = {"name": self._name, "frames": len(self._frames)} + + if self._frames: + MOTIONS_DIR.mkdir(parents=True, exist_ok=True) + out_path = MOTIONS_DIR / f"{self._name}.jsonl" + # Atomic write: tempfile + os.replace + content_lines = [ + json.dumps({"meta": {"hz": REPLAY_HZ, "motors": G1_NUM_MOTOR}}), + ] + for frame in self._frames: + content_lines.append(json.dumps(frame)) + content = ("\n".join(content_lines) + "\n").encode("utf-8") + + fd, tmp = tempfile.mkstemp( + prefix=f".{out_path.name}.", suffix=".tmp", + dir=str(out_path.parent), + ) + try: + with os.fdopen(fd, "wb") as f: + f.write(content) + os.replace(tmp, out_path) + except Exception: + try: + os.unlink(tmp) + except OSError: + pass + raise + + duration = self._frames[-1]["t"] if self._frames else 0 + result["path"] = str(out_path) + result["duration_sec"] = round(duration, 2) + result["size_kb"] = round(out_path.stat().st_size / 1024, 1) + log.info("Teaching saved: %s (%d frames, %.1fs)", out_path.name, len(self._frames), duration) + else: + result["path"] = "" + result["duration_sec"] = 0 + + self._finalized = True + self._final_result = result + + with self._lock: + self._recording = False + self._phase = "idle" + bus.emit_sync("motion.teaching_finished", name=result.get("name"), frames=result.get("frames")) + return result + + def status(self) -> dict[str, Any]: + elapsed = time.monotonic() - self._started_at if self._recording else 0 + return { + "recording": self._recording, + "phase": self._phase, + "name": self._name, + "elapsed_sec": round(elapsed, 1), + "frames_recorded": len(self._frames), + } diff --git a/vendor/Sanad/note.txt b/vendor/Sanad/note.txt new file mode 100644 index 0000000..6619b81 --- /dev/null +++ b/vendor/Sanad/note.txt @@ -0,0 +1,12 @@ +N2 — Gemini-phrase movement via direct LocoClient · effort L (highest risk) +This is the Marcus phrase-confirmation pattern, but driven by direct LocoClient on the robot (no Holosoma/ZMQ, no laptop). The full chain: + +Gemini speaks a confirmation phrase → parent reads it → matches → LocoClient.Move(). + +Where LocoClient lives → the parent process, next to the arm. main.py:425 already calls ChannelFactoryInitialize once for the arm; the new LocoController reuses that same DDS participant (no second init). Port init_loco/move_cmd/stop_only from G1_Lootah/Controller/g1_mode_controller.py, plus a non-interactive FSM-200 bring-up adapted from hanger_boot_sequence.py (strip its own ChannelFactoryInitialize and the blocking input()). +The dispatch loop (voice/movement_loop.py) mirrors voice/live_voice_loop.py but polls the new bot_transcript, gated on a movement_enabled flag (default OFF). The matcher (voice/bot_dispatch.py) ports Marcus's _dispatch_gemini_bot: strip [STATE-]/quoted/question clauses → normalize numbers + Arabic→English → parametric-regex scan → longest-needle bot-phrase scan → dedup/cooldown. Vocabulary comes from a ported instruction.json. +Canonical → velocity (Marcus's MOVE_MAP ports 1:1 since LocoClient.Move uses the same m/s, rad/s units): forward→Move(0.3,0,0), backward→Move(-0.2,0,0), turn-left→Move(0,0,0.3), turn-right→Move(0,0,-0.3), stop→StopMove(). +Toggle in the Voice & Audio tab (#tab-voice): add movement_enabled to recognition_state.py, a dashboard/routes/movement.py route (clone the face-rec toggle), and a switch in the UI. +Announce on toggle: _announce_movement_state in script.py (clone _announce_facerec_state) — robot says "movement enabled" / "movement disabled." +Persona rules: append Marcus-style rules to Sanad's prompt (wake-word + action required; clarification questions must not contain motion verbs) so Gemini reliably emits the confirmation phrases. +🛑 Safety is mandatory, not optional: Move(continous_move=True) walks until StopMove. Needs a FIFO worker with stop-priority, a per-motion watchdog that forces StopMove after a max duration, velocity caps + deadband, and StopMove on both disable-toggle and shutdown. Sanad has no obstacle/abort stack today. \ No newline at end of file diff --git a/vendor/Sanad/requirements.txt b/vendor/Sanad/requirements.txt new file mode 100644 index 0000000..ab563c4 --- /dev/null +++ b/vendor/Sanad/requirements.txt @@ -0,0 +1,43 @@ +# Sanad — Python dependencies +# Install: pip install -r requirements.txt + +# Dashboard +fastapi>=0.110.0 +uvicorn[standard]>=0.29.0 +python-multipart>=0.0.9 + +# Gemini voice +# google-genai: the Gemini Live SDK — used by gemini/script.py (live brain) +# and gemini/client.py. Needs Python 3.10+, which is why the voice loop +# runs in the gemini_sdk conda env. send_realtime_input(video=)/(text=) +# and send_client_content() require a reasonably recent (>=1.x) release. +google-genai>=1.0.0 +websockets>=12.0 +pyaudio>=0.2.13 + +# Recognition (camera vision + face gallery for Gemini-side face recognition) +# opencv-python-headless: JPEG encode + USB-camera fallback. Headless wheel — +# the dashboard renders frames; we never need a GUI window. +# Pillow: resize face samples before the Gemini primer turn. +opencv-python-headless>=4.8.0 +Pillow>=10.0.0 +# +# pyrealsense2 — DO NOT `pip install` on Jetson / JetPack 5. +# The PyPI wheel is built against glibc 2.32+ (Ubuntu 22.04); JetPack 5 ships +# glibc 2.31, so the wheel fails to load with: +# ImportError: ... version `GLIBC_2.32' not found +# On Jetson, build the Python binding from source against the apt-installed +# librealsense2 runtime (see README → "Camera vision on Jetson"). +# On x86_64 / Ubuntu 22.04+ desktops, `pip install pyrealsense2` works fine. +# If pyrealsense2 is absent, CameraDaemon falls back to cv2.VideoCapture(0). +# pyrealsense2>=2.50.0 # intentionally commented — see note above + +# Local TTS (optional — only needed for MBZUAI model) +transformers>=4.40.0 +sentencepiece>=0.2.0 +torch>=2.2.0 +datasets>=2.19.0 +soundfile>=0.12.0 + +# General +numpy>=1.24.0 diff --git a/vendor/Sanad/scripts/sanad_arm.txt b/vendor/Sanad/scripts/sanad_arm.txt new file mode 100644 index 0000000..916c164 --- /dev/null +++ b/vendor/Sanad/scripts/sanad_arm.txt @@ -0,0 +1,724 @@ +# ================================================== +# 📄 sanad_arm.txt +# قاموس سند الشامل (لهجات عربية + تعرف أعمق + أخطاء شائعة) +# ملاحظة: +# - خففنا الكلمات "العامة جدًا" قدر الإمكان لتقليل التفعيل بالغلط. +# - بدون تشكيل لزيادة دقة المطابقة البرمجية. +# - IDs محفوظة كما هي في OPTION_LIST (0-28). +# ================================================== + +# ===================== +# Release arm (id=0) — stop/neutral / lower hands +# ===================== +WAKE_PHRASES_release_arm = { + # أوامر إيقاف عامة + "وقف", + "وقف بس", + "وقف الحين", + "وقف الحركة", + "وقف هالحركة", + "وقف يا سند", + "وقف يا بوسنده", + "وقف يا بوسنيده", + "وقف كذا", + "وقف خلاص", + + # خلاص + "خلاص", + "خلاص بس", + "خلاص الحين", + "تمام خلاص", + "تمام بس", + "بس خلاص", + "خلاص يا سند", + "خلاص يا بوسنده", + + # راحة / استرخاء + "ارتح", + "ارتاح", + "استريح", + "ريح", + "ريح يدك", + "ريح ايدك", + "ريح يدينك", + "ريح ايدينك", + "ارتح يا سند", + "استريح يا سند", + + # تحرير + "حرر اليد", + "حرر الذراع", + "حرر يدك", + "حرر ايدك", + "حرر يدينك", + "حرر ايدينك", + "فك يدك", + "فك ايدك", + "فك يدينك", + "فك ايدينك", + "فك الذراع", + "فك ايديك", + + # رجوع للوضع الطبيعي + "ارجع طبيعي", + "ارجع وضع طبيعي", + "رجع طبيعي", + "وضع طبيعي", + "نيوترال", + "محايد", + "هوم", + "ارجع هوم", + "ارجع للوضع", + "رجع للوضع", + + # إنزال اليد/الأيدي (لهجات + شدّة) + "نزل", + "نزل يدك", + "نزل ايدك", + "نزل يدينك", + "نزل ايدينك", + "نزل يدك تحت", + "نزل ايدك تحت", + "نزل يدينك تحت", + "نزل ايدينك تحت", + "نزل ايدك لتحت", + "نزل يدك لتحت", + + "نزّل", + "نزّل يدك", + "نزّل ايدك", + "نزّل يدينك", + "نزّل ايدينك", + + "انزل", + "انزل يدك", + "انزل ايدك", + "انزل يدينك", + "انزل ايدينك", + + # English (added) + "stop", + "stop now", + "stop moving", + "enough", + "relax", + "rest", + "neutral", + "home", + "go home", + "hands down", + "lower your hands", + "lower your arms", + "arms down", + "release", + "stand down", +} +# ===================== +# Shake hand (id=1) — formal greeting (SDK) +# ===================== +WAKE_PHRASES_shake_hand = { + # تحية دينية + "السلام عليكم", + "سلام عليكم", + "السلام عليكم ورحمة الله", + "السلام عليكم ورحمة الله وبركاته", + + # مع الاسم + "السلام عليكم يا سند", + "السلام عليكم سند", + "السلام عليكم يا بوسنده", + "السلام عليكم بوسنده", + "السلام عليكم يا بوسنيده", + "السلام عليكم بوسنيده", + + # طلب مصافحة + "صافحني", + "صافحني يا سند", + "صافحني يا بوسنده", + "صافحني يا بوسنيده", + "صافح", + "صافحنا", + "مد يدك", + "مد ايدك", + "مد ايدك اليمين", + "هات يدك", + "هات ايدك", + "عطني يدك", + "عطني ايدك", + "اعطيني يدك", + "اعطيني ايدك", + "سلم بيدك", + "سلم بايدك", + "سلم يا بطل", + "شيك هاند", + "شيك هاند يا سند", + + # English (added) + "assalamu alaykum", + "as-salamu alaykum", + "peace be upon you", + "handshake", + "shake hands", + "shake my hand", + "let's shake hands", + "nice to meet you", + "greetings", +} +# ===================== +# High five (id=2) — palm greeting (SDK) +# ===================== +WAKE_PHRASES_high_five = { + "هاي فايف", + "هاي فايف يا سند", + "هاي فايڤ", + "هاي فايڤ يا سند", + "عالي خمسة", + "عالي خمسه", + "اعطني خمسة", + "اعطني خمسه", + "هات خمسة", + "هات خمسه", + "دق كف", + "طق كف", + "دقلي كف", + "هات كفك", + "كف عالي", + "يلا كف", + "فرجينا الكف", + "خمسة بيدك", + "خمسه بايدك", + + # English (added) + "high five", + "high-five", + "give me five", + "gimme five", + "up top", + "slap my hand", +} +# ===================== +# Hug (id=3) — hug (SDK) +# ===================== +WAKE_PHRASES_hug = { + "حضن", + "حضني", + "حضنني", + "عناق", + "عانقني", + "ضمني", + "عطني حضن", + "عطني ضمه", + "ابي حضن", + "ابغى حضن", + "ممكن حضن", + "تعال حضن", + "تعال حضني", + "احتاج حضن", + "حضن كبير", + "حضنه قوية", + "ضمه", + "ضمة", + + # English (added) + "hug", + "give me a hug", + "big hug", + "come hug", + "embrace", + "cuddle", +} +# ===================== +# High wave (id=4) — goodbye / big wave (SDK) +# ===================== +WAKE_PHRASES_high_wave = { + "مع السلامه", + "مع السلامة", + "باي", + "باي باي", + "وداعا", + "وداعًا", + "ودع", + "ودعنا", + "اشوفك على خير", + "نشوفك على خير", + "في امان الله", + "الله وياك", + "تصبح على خير", + "تصبحون على خير", + + # تلويح + "لوح", + "لوح لي", + "لوح بيدك", + "لوح بايدك", + "لوح بيدك فوق", + "سلم عليهم", + "سلم عليهم يا سند", + "سلم عليهم كلهم", + "ودعهم", + "ودع الضيوف", + + # English (added) + "bye", + "bye bye", + "goodbye", + "see you", + "see you later", + "take care", + "wave", + "wave goodbye", +} +# ===================== +# Clap (id=5) — clap (SDK) +# ===================== +WAKE_PHRASES_clap = { + "صفق", + "صقف", + "تصفيق", + "يلا صفق", + "صفق لهم", + "يلا صفق لهم", + "سوي تصفيق", + "ابدأ تصفيق", + "وريني كيف تصفق", + "صفق بيدك", + + # (قد تكون عامة - احذفها إذا تسبب تفعيل بالغلط) + "برافو", + "حيوهم", + + # English (added) + "clap", + "applause", + "clapping", + "round of applause", + "bravo", + "clap for them", +} +# ===================== +# Face wave (id=6) — friendly greeting / calling Sanad (SDK) +# ===================== +WAKE_PHRASES_face_wave = { + # الاسم + "سند", + "يا سند", + "بوسنده", + "يا بوسنده", + "بوسنيده", + "يا بوسنيده", + "بو سنده", + "بو سنيده", + "يا بو سنده", + "يا بو سنيده", + + # تحيات + "هلا", + "يا هلا", + "ياهلا", + "هلا والله", + "ياهلا والله", + "هلا وغلا", + "مرحبا", + "مرحبتين", + "حي الله", + "حي الله من جانا", + + # تحية + الاسم + "هلا سند", + "هلا يا سند", + "مرحبا سند", + "مرحبا يا سند", + "يا هلا سند", + "يا هلا يا سند", + "هلا بوسنده", + "مرحبا بوسنده", + "هلا بوسنيده", + "مرحبا بوسنيده", + + # نداءات (تجنبنا الكلمات العامة وحدها) + "سند تعال", + "سند تعال هنا", + "سند اسمعني", + "سند اسمع", + "سند وينك", + "سند موجود", + "سند حاضر", + "سند شوفني", + "سند ركز", + "سند ركز معي", + "سند انت وين", + + "بوسنده تعال", + "بوسنده اسمعني", + "بوسنده وينك", + "بوسنيده تعال", + "بوسنيده اسمعني", + "بوسنيده وينك", + + # English (added) + "sanad", + "hey sanad", + "hello sanad", + "hi sanad", + "bosanda", + "bosandah", + "are you there", + "listen to me", + "come here", + "look at me", + "pay attention", +} +# ===================== +# Left kiss (id=7) — left cheek kiss (SDK) +# ===================== +WAKE_PHRASES_left_kiss = { + "بوسه يسار", + "بوسة يسار", + "بوسه على الخد اليسار", + "بوسة على الخد اليسار", + "بوسه على اليسار", + "بوسة يسار يا سند", + "بوسه يسار يا سند", + "قبلة يسار", + "قبله يسار", + + # English (added) + "left kiss", + "kiss left", + "kiss on the left cheek", + "left cheek kiss", +} +# ===================== +# Heart (id=8) — heart (SDK) +# ===================== +WAKE_PHRASES_heart = { + "قلب", + "سوي قلب", + "سوي قلب كبير", + "اعطني قلب", + "عطني قلب", + "ابي قلب", + "ابغى قلب", + "هارت", + "سوي هارت", + "وريني قلب", + "شكل قلب", + "قلب قلب", + "لوف", + "اعطيني لوف", + "سوي لوف", + + # English (added) + "heart", + "make a heart", + "do a heart", + "love", + "i love you", +} +# ===================== +# Right heart (id=9) — right-side heart (SDK) +# ===================== +WAKE_PHRASES_right_heart = { + "قلب يمين", + "سوي قلب يمين", + "سوي هارت يمين", + "هارت يمين", + "قلب على اليمين", + "قلب يمين يا سند", + + # English (added) + "right heart", + "make a right heart", + "heart right", + "heart on the right", +} +# ===================== +# Hands up (id=10) — built-in display only (SDK) +# ===================== +WAKE_PHRASES_hands_up = { + "وريني يدينك", + "وريني ايدينك", + "فرجينا يدينك", + "فرجينا ايدينك", + "هات يدينك نشوف", + "ايديك الثنتين", + "يديك الثنتين", + "طلع يدينك", + "طلع ايدينك", + "ارفع يدينك شوي", + "ارفع ايدينك شوي", + + # English (added) + "hands up", + "put your hands up", + "raise your hands", + "raise both hands", + "arms up", + "lift your hands", +} +# ===================== +# X-ray (id=11) — x-ray pose (SDK) +# ===================== +WAKE_PHRASES_x_ray = { + "اكس راي", + "xray", + "اشعه", + "اشعة", + "اشعة اكس", + "اشعة سينية", + "سكان", + "سكانر", + "فحص", + "فحص اشعة", + "سوي اكس راي", + + # English (added) + "x-ray", + "scan", + "scanner", + "xray pose", + "medical scan", +} +# ===================== +# Right hand up (id=12) — built-in display only (SDK) +# ===================== +WAKE_PHRASES_right_hand_up = { + "وريني يدك اليمين", + "وريني ايدك اليمين", + "فرجينا يدك اليمين", + "هات اليمين نشوف", + "طلع يدك اليمين", + "طلع ايدك اليمين", + "ارفع يدك اليمين", + "ارفع ايدك اليمين", + + # English (added) + "right hand up", + "raise your right hand", + "lift your right hand", + "right arm up", + "put your right hand up", +} +# ===================== +# Reject (id=13) — reject/decline (SDK) +# ===================== +WAKE_PHRASES_reject = { + "ارفض", + "رفض", + "مرفوض", + "مو موافق", + "مش موافق", + "لا ابي", + "لا ابغا", + "ما ابي", + "ما ابغا", + "لا شكرا", + "لا شكرًا", + "كنسل", + "الغيه", + "الغيها", + "لا تسويها", + "لا تعملها", + "لا تساويها", + + # English (added) + "reject", + "decline", +} +# ===================== +# Right kiss (id=14) — right cheek kiss (SDK) +# ===================== +WAKE_PHRASES_right_kiss = { + "بوسه يمين", + "بوسة يمين", + "بوسه على الخد اليمين", + "بوسة على الخد اليمين", + "بوسه على اليمين", + "بوسة يمين يا سند", + "قبلة يمين", + "قبله يمين", + + # English (added) + "right kiss", + "kiss right", + "kiss on the right cheek", + "right cheek kiss", +} +# ===================== +# Two-hand kiss (id=15) — two-hand kiss (SDK) +# ===================== +WAKE_PHRASES_two_hand_kiss = { + "بوسات", + "بوسات كثير", + "بوسه كبيرة", + "بوسة كبيرة", + "بوسه بكل اليدين", + "بوسة بكل اليدين", + "بوسه بيدينك", + "بوسة بيدينك", + "بوسه قوية", + "بوسة قوية", + "قبلة كبيرة", + "قبلات", + + # English (added) + "two hand kiss", + "two-hand kiss", + "big kiss", + "many kisses", + "lots of kisses", + "blow a big kiss", +} +# ================================================== +# Recorded Actions (IDs 23+ for JSONL Replay) +# ================================================== + +# ===================== +# Laugh (id=23) — recorded +# ===================== +WAKE_PHRASES_laugh = { + "اضحك", + "ضحكه", + "ضحكة", + "يضحك", + "ضحك", + "هههه", + "ههههه", + "نكته", + "نكتة", + "مضحك", + "كركر", + "اضحك يا سند", + "فرجينا ضحكتك", + "ضحكنا", + + # English (added) + "laugh", + "haha", + "hahaha", + "lol", + "that's funny", + "make me laugh", + "giggle", +} +# ===================== +# Bird (id=24) — recorded +# ===================== +WAKE_PHRASES_bird = { + "طير", + "سوي طير", + "عصفور", + "جناح", + "رفرف", + "رفرف بيدك", + "رفرف بايدك", + "حرك يدينك مثل الطير", + "وريني كيف يطير العصفور", + "سوي جناحات", + + # English (added) + "bird", + "fly", + "flap", + "flap your wings", + "wings", +} +# ===================== +# Change Battery (id=25) — recorded +# ===================== +WAKE_PHRASES_change_battery = { + "غير البطاريه", + "غير البطارية", + "بدل البطاريه", + "بدل البطارية", + "تغيير بطاريه", + "تغيير بطارية", + "البطاريه خلصت", + "البطارية خلصت", + "شحن البطاريه", + "شحن البطارية", + "تشينج باتري", + "change battery", + "battery low", + + # English (added) + "replace battery", + "swap battery", + "need charging", + "charge battery", +} +# ================================================== +# Active Movement (Recorded) — IDs 26–28 +# ================================================== + +# ===================== +# Move hands up (id=26) — active movement (dressing/exercise) +# ===================== +WAKE_PHRASES_move_hands_up = { + "ارفع يدينك", + "ارفع ايدينك", + "يدينك فوق", + "ايدينك فوق", + "ارفعهم فوق", + "ارفع يدينك الاثنين", + "ارفع ايديك الثنتين", + "هاندز اب", + + # dressing/exercise context + "نلبسك", + "بلبسك", + "البس", + "غير ملابسك", + "ساعدنا نلبسك", + "يالله نلبس", + "تمرين ارفع يدينك", + "رفع يدين للتلبيس", + + # English (added) + "hands up", + "raise your hands", + "raise both hands", + "lift your arms", + "arms up", + "raise both arms", +} +# ===================== +# Move right hand up (id=27) — active movement +# ===================== +WAKE_PHRASES_move_right_hand_up = { + "ارفع اليمين فوق", + "يدك اليمين فوق", + "ايدك اليمين فوق", + "ارفع يدك اليمين", + "ارفع ايدك اليمين", + "يمينك فوق", + "يمين فوق للاعلى", + "وريني يدك اليمين فوق", + "ارفع يمينك", + + # English (added) + "right hand up", + "raise your right hand", + "lift your right hand", + "right arm up", +} +# ===================== +# Move left hand up (id=28) — active movement +# ===================== +WAKE_PHRASES_move_left_hand_up = { + "ارفع اليسار فوق", + "يدك اليسار فوق", + "ايدك اليسار فوق", + "ارفع يدك اليسار", + "ارفع ايدك اليسار", + "يسارك فوق", + "يسار فوق للاعلى", + "وريني يدك اليسار فوق", + "ارفع يسارك", + + # English (added) + "left hand up", + "raise your left hand", + "lift your left hand", + "left arm up", +} diff --git a/vendor/Sanad/scripts/sanad_rule.txt b/vendor/Sanad/scripts/sanad_rule.txt new file mode 100644 index 0000000..07cd86b --- /dev/null +++ b/vendor/Sanad/scripts/sanad_rule.txt @@ -0,0 +1,19 @@ +[SYSTEM_PROMPT] +You are Sanad (Bousandah), a wise and friendly Emirati assistant. +Speak naturally in the UAE dialect (Khaleeji) unless the user explicitly provides text that must be spoken exactly. +Keep the tone warm, respectful, and clear. +Do not be robotic. +Do not over-explain. +Prefer concise speech that sounds natural when spoken aloud funny mode and happy sound. + +[REPLAY_SYSTEM_PROMPT] +You are Sanad (Bousandah), using the same Emirati voice and personality. +For replay mode, the user will provide text that you must speak exactly as written. +You may sound warm and lively, but you must preserve the exact text. +Do not translate it. +Do not summarize it. +Do not answer it. +Do not rephrase it into another dialect or style. +Do not add greetings, punctuation changes, comments, or extra words. +Keep the same word order and language as the provided text. +Your only task is to speak the exact user text verbatim. diff --git a/vendor/Sanad/scripts/sanad_script.txt b/vendor/Sanad/scripts/sanad_script.txt new file mode 100644 index 0000000..3a2d184 --- /dev/null +++ b/vendor/Sanad/scripts/sanad_script.txt @@ -0,0 +1,68 @@ +أنت "بوسنده" — روبوت إماراتي ذكي تابع لروبوت شركة لوتاه تيك Lootah Tech. + +[أولاً: الروح والمعرفة] + + لديك معرفة واسعة بالقرآن الكريم والأحاديث النبوية الشريفة للإجابة على أي سؤال ديني أو تقديم تذكيرات إيمانية بدقة، مع ذكر المصدر متى لزم. + + إذا سألك المستخدم عن آية، حديث، أو حكم شرعي، جاوب بوقار، ووضوح، وتبسيط، وبلهجتك الإماراتية. + + التزم بالدقة في المسائل الدينية، وإذا كانت المسألة فيها خلاف أو تحتاج تحقق، وضّح ذلك بصراحة بدون جزم زائد. + +[ثانياً: الأسلوب واللغة (التبديل المرن)] + + تكلم باللهجة الإماراتية بشكل طبيعي، راقٍ، ومفهوم، بدون مبالغة أو تصنع. + + قاعدة التبديل الفوري: إذا استخدم المستخدم أي لغة ثانية في أي لحظة، غيّر فوراً ورد بنفس اللغة الجديدة في نفس الرد. + + إذا رجع المستخدم للعربي: أرجع فوراً للعربي باللهجة الإماراتية. + + "آخر لغة كتب فيها المستخدم" هي اللغة اللي ترد فيها. + + ممنوع تخلط لغتين في نفس الرد إلا إذا كان طلب المستخدم ترجمة أو مقارنة صريحة. + +[ثالثاً: التفاعل والبدايات] + + خلك محترم، ودود، مباشر، وركّز على الزبدة والحل العملي. + + نوّع البدايات بأسلوب طبيعي، مثل: + (مرحبابك، أبشر بعزك، فالك طيب، يا مرحبا، حياك الله، زين، تم، حاضر) + ولا تكرر نفس البداية مرتين متتاليتين. + + حافظ على أسلوب احترافي ومرن يناسب السؤال: ديني، تقني، عملي، أو يومي. + +[رابعاً: إنجاز المهام وقوة الذاكرة العمومية] + + اعتبر كل كلمة، اسم، مسار، ملاحظة، أو تفضيل يذكره المستخدم بمثابة "أمر حفظ" وأولوية قصوى داخل المحادثة. + + تعامل مع معلومات المستخدم، تفضيلاته، وتصحيحاته كأنها ثوابت محفوظة يجب الرجوع لها لاحقاً. + + عند تصحيح المستخدم لمعلومة، قل: + "زين نبهتني يا الشيخ، انحفرت في الذاكرة" + + ابنِ على السياق السابق مباشرة بدون ما ترجع تسأل عن أمور سبق ذكرها. + +[خامساً: الأمان والخصوصية] + + إذا كتب المستخدم API key أو Password أو Token أو أي بيانات حساسة: نبهه فوراً يمسحها ويبدلها. + + لا تطلب بيانات حساسة إلا عند الضرورة القصوى، وبأسلوب محترم وواضح. + + حافظ على الخصوصية، ولا تعيد عرض المعلومات الحساسة بدون داعٍ. + + ممنوع أي نكت أو محتوى غير لائق في الدين أو السياسة أو الأمور الحساسة. + +[سادساً: السرعة والتكرار] + + جاوب بسرعة، بوضوح، وباختصار، وغالباً ضمن 2 إلى 6 سطور، إلا إذا طلب المستخدم التفصيل. + + إذا طلب المستخدم "كرر" أو "repeat": أعد نفس الكلام بنفس اللغة الحالية، وحرفياً إذا طلب ذلك. + + إذا طلب اختصار: اختصر مباشرة بدون فقدان المعنى الأساسي. + +[سابعاً: جودة الإجابة] + + قدّم الجواب بشكل واضح، مرتب، وسهل الفهم، وابدأ بالأهم ثم التفصيل إذا احتاج المستخدم. + + إذا كان طلب المستخدم عملي أو تقني، ركّز على الخطوات والحل المباشر. + + إذا كان السؤال يحتمل أكثر من معنى، خذ التفسير الأقرب من السياق وقدّم أفضل إجابة مفيدة بدون إطالة. \ No newline at end of file diff --git a/vendor/Sanad/shell_scripts/check_audio_caps.sh b/vendor/Sanad/shell_scripts/check_audio_caps.sh new file mode 100755 index 0000000..b5346d2 --- /dev/null +++ b/vendor/Sanad/shell_scripts/check_audio_caps.sh @@ -0,0 +1,98 @@ +#!/usr/bin/env bash + +set -e + +echo "==============================" +echo "🔊 POWERCONF AUDIO CHECK" +echo "==============================" +echo + +# ---------------------------------- +# Detect PowerConf card + device +# ---------------------------------- +POWERCONF_LINE=$(arecord -l | grep -i "PowerConf" | head -1) + +if [ -z "$POWERCONF_LINE" ]; then + echo "❌ PowerConf device not found" + exit 1 +fi + +CARD_INDEX=$(echo "$POWERCONF_LINE" | sed -n 's/.*card \([0-9]\+\):.*/\1/p') +DEVICE_INDEX=$(echo "$POWERCONF_LINE" | sed -n 's/.*device \([0-9]\+\):.*/\1/p') + +echo "✅ PowerConf detected:" +echo " Card = $CARD_INDEX" +echo " Device = $DEVICE_INDEX" +echo " ALSA = hw:${CARD_INDEX},${DEVICE_INDEX}" +echo + +# ---------------------------------- +# Capture devices (PowerConf only) +# ---------------------------------- +echo "🎤 CAPTURE DEVICE (PowerConf only)" +arecord -l | grep -i -A4 "PowerConf" +echo + +# ---------------------------------- +# Playback devices (PowerConf only) +# ---------------------------------- +echo "🔈 PLAYBACK DEVICE (PowerConf only)" +aplay -l | grep -i -A4 "PowerConf" +echo + +# ---------------------------------- +# Capture HW params (forced safe) +# ---------------------------------- +echo "🎤 MICROPHONE HARDWARE PARAMETERS (PowerConf)" + +MIC_HW=$(arecord -D hw:${CARD_INDEX},${DEVICE_INDEX} \ + -f S16_LE -r 48000 -c 1 \ + -d 0.1 --dump-hw-params 2>/dev/null) + +echo "$MIC_HW" | grep -E "FORMAT:|CHANNELS:|RATE:" +echo + +# ---------------------------------- +# Playback HW params (forced safe) +# ---------------------------------- +echo "🔈 SPEAKER HARDWARE PARAMETERS (PowerConf)" + +SPK_HW=$(aplay -D hw:${CARD_INDEX},${DEVICE_INDEX} \ + -f S16_LE -r 48000 -c 1 \ + -d 0.1 --dump-hw-params /dev/zero 2>/dev/null) + +echo "$SPK_HW" | grep -E "FORMAT:|CHANNELS:|RATE:" +echo + +# ---------------------------------- +# Extract clean values +# ---------------------------------- +FORMAT=$(echo "$MIC_HW" | grep -m1 "FORMAT" | awk '{print $2}') +RATE=$(echo "$MIC_HW" | grep -m1 "RATE" | awk '{print $2}') +CHANNELS_IN=$(echo "$MIC_HW" | grep -m1 "CHANNELS" | awk '{print $2}') +CHANNELS_OUT=$(echo "$SPK_HW" | grep -m1 "CHANNELS" | awk '{print $2}') + +FORMAT=${FORMAT:-S16_LE} +RATE=${RATE:-48000} +CHANNELS_IN=${CHANNELS_IN:-1} +CHANNELS_OUT=${CHANNELS_OUT:-1} +CHUNK=960 + +# ---------------------------------- +# Final verified Python config +# ---------------------------------- +echo "==============================" +echo "✅ VERIFIED POWERCONF PYTHON CONFIG" +echo "==============================" +echo "FORMAT = pyaudio.paInt16 # ${FORMAT}" +echo "RATE = ${RATE}" +echo "CHANNELS_IN = ${CHANNELS_IN}" +echo "CHANNELS_OUT = ${CHANNELS_OUT}" +echo "CHUNK = ${CHUNK}" +echo "INPUT_DEVICE = ${CARD_INDEX}" +echo "OUTPUT_DEVICE = ${CARD_INDEX}" +echo +echo "✔ PowerConf USB Audio" +echo "✔ Mono mic + Mono speaker" +echo "✔ hw:${CARD_INDEX},${DEVICE_INDEX}" +echo "==============================" diff --git a/vendor/Sanad/shell_scripts/check_pulse_devices.sh b/vendor/Sanad/shell_scripts/check_pulse_devices.sh new file mode 100755 index 0000000..533db6d --- /dev/null +++ b/vendor/Sanad/shell_scripts/check_pulse_devices.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash + +echo "==============================" +echo "🔊 PulseAudio Devices Checker" +echo "==============================" +echo + +# تحقق أن PulseAudio شغال +if ! pactl info >/dev/null 2>&1; then + echo "❌ PulseAudio is NOT running" + echo "ℹ️ Use ALSA (hw:X,Y) instead" + exit 1 +fi + +echo "✅ PulseAudio is running" +echo + +# ------------------------------- +# List Sinks (Speakers) +# ------------------------------- +echo "🔈 AVAILABLE SINKS (Speakers)" +echo "------------------------------" +pactl list short sinks | awk '{printf "• Name: %-70s | Index: %s\n", $2, $1}' +echo + +# ------------------------------- +# List Sources (Microphones) +# ------------------------------- +echo "🎤 AVAILABLE SOURCES (Microphones)" +echo "----------------------------------" +pactl list short sources | awk '{printf "• Name: %-70s | Index: %s\n", $2, $1}' +echo + +# ------------------------------- +# Highlight PowerConf if exists +# ------------------------------- +echo "🔍 PowerConf Devices Found" +echo "--------------------------" + +FOUND=0 + +pactl list short sinks | grep -i powerconf && FOUND=1 +pactl list short sources | grep -i powerconf && FOUND=1 + +if [ "$FOUND" -eq 0 ]; then + echo "⚠️ PowerConf NOT found in PulseAudio" +else + echo "✅ PowerConf detected above" +fi + +echo +echo "==============================" diff --git a/vendor/Sanad/shell_scripts/clean_sanad.sh b/vendor/Sanad/shell_scripts/clean_sanad.sh new file mode 100755 index 0000000..14adfd5 --- /dev/null +++ b/vendor/Sanad/shell_scripts/clean_sanad.sh @@ -0,0 +1,156 @@ +#!/usr/bin/env bash +# clean_sanad.sh — wipe transient state (logs, recordings, audio, caches). +# +# Safe by default: shows a preview + asks for confirmation. Won't touch +# config files, skills.json, wake_phrases.json, recorded JSONL motions, +# or the model directory. +# +# Usage: +# ./clean_sanad.sh # interactive — preview + y/N prompt +# ./clean_sanad.sh -y # skip the prompt +# ./clean_sanad.sh --dry-run # show what would be deleted; delete nothing +# ./clean_sanad.sh --logs # logs only +# ./clean_sanad.sh --records # recorded turns + typed-replay audio only +# ./clean_sanad.sh --cache # __pycache__ only +# ./clean_sanad.sh --all # everything (default) +# +# Override the project location: +# SANAD_HOME=/some/path ./clean_sanad.sh + +set -u + +SANAD_HOME="${SANAD_HOME:-$HOME/Sanad}" +if [ ! -d "$SANAD_HOME" ]; then + # Fallback for invocation from the repo (workstation) + SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + SANAD_HOME="$(dirname "$SCRIPT_DIR")" +fi +if [ ! -d "$SANAD_HOME" ]; then + echo "Sanad dir not found: $SANAD_HOME" >&2 + exit 1 +fi + +# ── flag parsing ────────────────────────────────────────── +TARGET="all" +DRY_RUN=0 +ASSUME_YES=0 +for arg in "$@"; do + case "$arg" in + -y|--yes) ASSUME_YES=1 ;; + -n|--dry-run) DRY_RUN=1 ;; + --logs) TARGET="logs" ;; + --records) TARGET="records" ;; + --cache) TARGET="cache" ;; + --all) TARGET="all" ;; + -h|--help) + sed -n '2,20p' "$0"; exit 0 ;; + *) + echo "unknown option: $arg (try -h)" >&2; exit 2 ;; + esac +done + +# ── targets — grouped per category so we can summarise per-category ── +declare -a LOGS_PATHS=() +declare -a RECORDS_PATHS=() +declare -a CACHE_PATHS=() + +collect_logs() { + while IFS= read -r p; do LOGS_PATHS+=("$p"); done < <( + find "$SANAD_HOME/logs" -maxdepth 2 -type f 2>/dev/null + ) +} +collect_records() { + while IFS= read -r p; do RECORDS_PATHS+=("$p"); done < <( + find "$SANAD_HOME/data/recordings" -type f 2>/dev/null + ) + while IFS= read -r p; do RECORDS_PATHS+=("$p"); done < <( + find "$SANAD_HOME/data/audio" -maxdepth 1 -type f \ + \( -name "*.wav" -o -name "*.pcm" \) 2>/dev/null + ) +} +collect_cache() { + while IFS= read -r p; do CACHE_PATHS+=("$p"); done < <( + find "$SANAD_HOME" -type d -name "__pycache__" 2>/dev/null + ) + while IFS= read -r p; do CACHE_PATHS+=("$p"); done < <( + find "$SANAD_HOME" -type f -name "*.pyc" 2>/dev/null + ) +} + +case "$TARGET" in + logs) collect_logs ;; + records) collect_records ;; + cache) collect_cache ;; + all) collect_logs; collect_records; collect_cache ;; +esac + +# Bytes per array (silently skips missing paths) +sum_bytes() { + local total=0 p sz + for p in "$@"; do + [ -e "$p" ] || continue + sz=$(du -sb "$p" 2>/dev/null | awk '{print $1}') + total=$((total + ${sz:-0})) + done + echo "$total" +} +fmt() { numfmt --to=iec --suffix=B "$1" 2>/dev/null || echo "${1} B"; } + +LOGS_BYTES=$(sum_bytes "${LOGS_PATHS[@]:-}") +RECORDS_BYTES=$(sum_bytes "${RECORDS_PATHS[@]:-}") +CACHE_BYTES=$(sum_bytes "${CACHE_PATHS[@]:-}") +TOTAL_BYTES=$((LOGS_BYTES + RECORDS_BYTES + CACHE_BYTES)) +TOTAL_COUNT=$(( ${#LOGS_PATHS[@]} + ${#RECORDS_PATHS[@]} + ${#CACHE_PATHS[@]} )) + +# ── preview ─────────────────────────────────────────────── +if [ "$TOTAL_COUNT" -eq 0 ]; then + echo "Nothing to delete (target=$TARGET)." + exit 0 +fi + +echo +echo "Sanad clean — target=$TARGET dry_run=$DRY_RUN" +echo "Project: $SANAD_HOME" +echo +printf '┌──────────────┬─────────┬────────────┐\n' +printf '│ %-12s │ %7s │ %10s │\n' "Category" "Items" "Size" +printf '├──────────────┼─────────┼────────────┤\n' +if [ "${#LOGS_PATHS[@]}" -gt 0 ]; then printf '│ %-12s │ %7d │ %10s │\n' "logs" "${#LOGS_PATHS[@]}" "$(fmt "$LOGS_BYTES")"; fi +if [ "${#RECORDS_PATHS[@]}" -gt 0 ]; then printf '│ %-12s │ %7d │ %10s │\n' "records" "${#RECORDS_PATHS[@]}" "$(fmt "$RECORDS_BYTES")"; fi +if [ "${#CACHE_PATHS[@]}" -gt 0 ]; then printf '│ %-12s │ %7d │ %10s │\n' "cache" "${#CACHE_PATHS[@]}" "$(fmt "$CACHE_BYTES")"; fi +printf '├──────────────┼─────────┼────────────┤\n' +printf '│ %-12s │ %7d │ %10s │\n' "TOTAL" "$TOTAL_COUNT" "$(fmt "$TOTAL_BYTES")" +printf '└──────────────┴─────────┴────────────┘\n' + +# Flatten for the delete loop +declare -a PATHS_TO_DELETE=( "${LOGS_PATHS[@]:-}" "${RECORDS_PATHS[@]:-}" "${CACHE_PATHS[@]:-}" ) +# Strip any empty entries the unset-array fallback may have introduced +PATHS_TO_DELETE=("${PATHS_TO_DELETE[@]/#/}") +TMP_PATHS=() +for p in "${PATHS_TO_DELETE[@]}"; do [ -n "$p" ] && TMP_PATHS+=("$p"); done +PATHS_TO_DELETE=("${TMP_PATHS[@]}") + +if [ "$DRY_RUN" -eq 1 ]; then + echo "Dry run — nothing deleted." + exit 0 +fi + +if [ "$ASSUME_YES" -ne 1 ]; then + read -r -p "Proceed with delete? [y/N] " ans + case "$ans" in + y|Y|yes|YES) ;; + *) echo "Aborted."; exit 0 ;; + esac +fi + +# ── delete ──────────────────────────────────────────────── +removed=0 +for p in "${PATHS_TO_DELETE[@]}"; do + rm -rf -- "$p" && removed=$((removed + 1)) +done +echo "Removed $removed of ${#PATHS_TO_DELETE[@]} item(s)." + +# Recreate empty top-level dirs so next start_sanad.sh run doesn't +# complain about missing paths. +mkdir -p "$SANAD_HOME/logs" "$SANAD_HOME/data/recordings" "$SANAD_HOME/data/audio" +echo "Done." diff --git a/vendor/Sanad/shell_scripts/reset_anker_usb.sh b/vendor/Sanad/shell_scripts/reset_anker_usb.sh new file mode 100755 index 0000000..c50b850 --- /dev/null +++ b/vendor/Sanad/shell_scripts/reset_anker_usb.sh @@ -0,0 +1,119 @@ +#!/usr/bin/env bash +# reset_anker_usb.sh — unbind+rebind snd-usb-audio for an Anker USB device. +# +# WHY THIS EXISTS +# The Anker PowerConf A3321 on this Jetson sometimes enumerates with only +# output USB Audio Class descriptors (no capture interface). PulseAudio +# then shows the card with only output-only profiles and the dashboard +# can't expose the mic. Restarting PulseAudio does nothing — UAC +# descriptors are parsed by snd-usb-audio at probe time, persist in +# kernel structs, and only get re-parsed on a fresh driver bind. +# +# `/api/audio/usb-reset` writes directly to +# /sys/bus/usb/drivers/snd-usb-audio/{unbind,bind} when possible. That +# path needs root. This script exists as a sudo fallback so the dashboard +# can recover without Sanad itself running as root. +# +# USAGE +# reset_anker_usb.sh — unbind+rebind given device +# (bus_id like "1-3") +# reset_anker_usb.sh --setup-sudoers — install one-time NOPASSWD entry +# (must be run via sudo) +# +# Exit codes: +# 0 — unbind + rebind both succeeded +# 1 — bus_id missing or device not present +# 2 — no snd-usb-audio interfaces bound to that device +# 3 — unbind or bind sysfs write failed +# 4 — --setup-sudoers used outside of sudo + +set -u + +USAGE="usage: $(basename "$0") or $(basename "$0") --setup-sudoers" + +if [ "$#" -lt 1 ]; then + echo "$USAGE" >&2 + exit 1 +fi + +# ───────────────────── --setup-sudoers ───────────────────── +if [ "$1" = "--setup-sudoers" ]; then + if [ "$(id -u)" -ne 0 ]; then + echo "❌ --setup-sudoers must run as root (use: sudo $0 --setup-sudoers)" >&2 + exit 4 + fi + # Install a NOPASSWD entry so the unitree user can invoke THIS exact + # script path with sudo without typing a password. Scoped to one + # binary; not a blanket sudo grant. + SELF_PATH="$(readlink -f "$0")" + SUDO_FILE="/etc/sudoers.d/sanad-anker-usb-reset" + cat > "$SUDO_FILE" <&2 + exit 1 +fi + +# Discover snd-usb-audio interfaces on this device. Don't unbind anything +# else (some Anker firmwares present HID-Consumer for the mute button on +# a separate interface — we leave those alone). +declare -a IFACES=() +for iface_path in "${DEV_PATH}/${BUS_ID}:"*; do + [ -e "$iface_path" ] || continue + driver_link="${iface_path}/driver" + [ -L "$driver_link" ] || continue + driver=$(basename "$(readlink "$driver_link")") + if [ "$driver" = "snd-usb-audio" ]; then + IFACES+=("$(basename "$iface_path")") + fi +done + +if [ "${#IFACES[@]}" -eq 0 ]; then + echo "❌ No snd-usb-audio interfaces bound to device $BUS_ID" >&2 + exit 2 +fi + +echo "ℹ️ Re-binding snd-usb-audio for $BUS_ID (interfaces: ${IFACES[*]})" + +UNBIND="/sys/bus/usb/drivers/snd-usb-audio/unbind" +BIND="/sys/bus/usb/drivers/snd-usb-audio/bind" + +# Unbind first; on failure exit before rebind so we don't leave the device +# in a half-bound state. +for iface in "${IFACES[@]}"; do + if ! echo -n "$iface" > "$UNBIND" 2>/dev/null; then + echo "❌ unbind failed: $iface → $UNBIND" >&2 + exit 3 + fi + echo " unbound: $iface" +done + +# Brief settle — snd-usb-audio's release path tears down ALSA card N. +sleep 0.5 + +for iface in "${IFACES[@]}"; do + if ! echo -n "$iface" > "$BIND" 2>/dev/null; then + echo "❌ rebind failed: $iface → $BIND" >&2 + exit 3 + fi + echo " bound: $iface" +done + +# Let probe complete so callers can pactl list cards right after. +sleep 1.0 +echo "✅ snd-usb-audio re-bound for $BUS_ID" +exit 0 diff --git a/vendor/Sanad/shell_scripts/sanad.service b/vendor/Sanad/shell_scripts/sanad.service new file mode 100644 index 0000000..b5c561a --- /dev/null +++ b/vendor/Sanad/shell_scripts/sanad.service @@ -0,0 +1,31 @@ +# systemd user-level unit for Sanad. Install with: +# +# mkdir -p ~/.config/systemd/user +# cp ~/Sanad/shell_scripts/sanad.service ~/.config/systemd/user/sanad.service +# systemctl --user daemon-reload +# systemctl --user enable --now sanad.service +# sudo loginctl enable-linger unitree # run at boot even when not logged in +# +# Watch logs: +# journalctl --user -u sanad.service -f +# +# Restart after a code/config change: +# systemctl --user restart sanad.service + +[Unit] +Description=Sanad robot assistant (FastAPI dashboard + voice/motion subsystems) +After=network-online.target +Wants=network-online.target + +[Service] +Type=exec +WorkingDirectory=%h/Sanad +ExecStart=/usr/bin/env bash %h/Sanad/shell_scripts/start_sanad.sh +Restart=on-failure +RestartSec=5 +TimeoutStopSec=15 +KillSignal=SIGINT +PassEnvironment=PULSE_RUNTIME_PATH XDG_RUNTIME_DIR + +[Install] +WantedBy=default.target diff --git a/vendor/Sanad/shell_scripts/set_powerconf_audio.sh b/vendor/Sanad/shell_scripts/set_powerconf_audio.sh new file mode 100755 index 0000000..a1e2923 --- /dev/null +++ b/vendor/Sanad/shell_scripts/set_powerconf_audio.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash + +set -e + +SINK="alsa_output.usb-Anker_PowerConf_A3321-DEV-SN1-01.analog-stereo" +SOURCE="alsa_input.usb-Anker_PowerConf_A3321-DEV-SN1-01.mono-fallback" + +echo "🔊 Checking PulseAudio..." +if ! pactl info >/dev/null 2>&1; then + echo "❌ PulseAudio is not running" + exit 1 +fi + +echo "✅ PulseAudio is running" + +echo "🎧 Setting default speaker → PowerConf" +pactl set-default-sink "$SINK" + +echo "🎤 Setting default microphone → PowerConf" +pactl set-default-source "$SOURCE" + +echo +echo "📋 Current PulseAudio defaults:" +pactl info | grep -E "Default Sink|Default Source" + +echo +echo "✅ PowerConf audio routing applied successfully" diff --git a/vendor/Sanad/shell_scripts/start_sanad.sh b/vendor/Sanad/shell_scripts/start_sanad.sh new file mode 100755 index 0000000..7997815 --- /dev/null +++ b/vendor/Sanad/shell_scripts/start_sanad.sh @@ -0,0 +1,83 @@ +#!/usr/bin/env bash +# start_sanad.sh — boot Sanad's main.py inside the gemini_sdk conda env. +# +# Used both manually (./start_sanad.sh) and from the systemd unit +# (sanad.service) for boot-time auto-start. +# +# Override knobs (env vars; all optional): +# SANAD_HOME project root (default ~/Sanad) +# SANAD_CONDA_ENV conda env name (default gemini_sdk) +# SANAD_CONDA_BASE conda install dir (default $HOME/miniconda3) +# SANAD_DDS_INTERFACE DDS network iface (default eth0) +# SANAD_VOICE_BRAIN gemini | local | model (default gemini) +# SANAD_AUDIO_PROFILE builtin | anker | hollyland_builtin (default builtin) +# PORT dashboard port (default 8000) + +set -u + +SANAD_HOME="${SANAD_HOME:-$HOME/Sanad}" +SANAD_CONDA_ENV="${SANAD_CONDA_ENV:-gemini_sdk}" +SANAD_CONDA_BASE="${SANAD_CONDA_BASE:-$HOME/miniconda3}" + +export SANAD_DDS_INTERFACE="${SANAD_DDS_INTERFACE:-eth0}" +export SANAD_VOICE_BRAIN="${SANAD_VOICE_BRAIN:-gemini}" +export SANAD_AUDIO_PROFILE="${SANAD_AUDIO_PROFILE:-builtin}" +export PORT="${PORT:-8000}" + +# Mandatory environment fixes for Jetson + conda + Unitree SDK +export LD_PRELOAD="${LD_PRELOAD:-/usr/lib/aarch64-linux-gnu/libgomp.so.1}" +export PYTHONUNBUFFERED=1 + +# Point conda's bundled PyAudio/PortAudio at the SYSTEM ALSA plugin dir +# so it can load the `pulse` PCM plugin (from the libasound2-plugins apt +# package). Without this, conda's libasound looks in its own env path +# (which doesn't ship plugins) → PortAudio's device list has no 'pulse' +# entry → AnkerMic/AnkerSpeaker fall back to direct hw:N grabs which +# fail with paInvalidSampleRate (mic) or kick PulseAudio off the card +# (speaker). Same pattern used by AI_Photographer's photo_sanad.sh on +# this Jetson. +if [ -d "/usr/lib/aarch64-linux-gnu/alsa-lib" ]; then + export ALSA_PLUGIN_DIR="/usr/lib/aarch64-linux-gnu/alsa-lib" +elif [ -d "/usr/lib/alsa-lib" ]; then + export ALSA_PLUGIN_DIR="/usr/lib/alsa-lib" +fi +if [ -f "/usr/share/alsa/alsa.conf" ]; then + export ALSA_CONFIG_PATH="/usr/share/alsa/alsa.conf" +fi +export ALSA_LOG_LEVEL=0 + +# Optional: needed if/when the local pipeline imports CosyVoice +if [ -d "$HOME/CosyVoice" ]; then + export PYTHONPATH="$HOME/CosyVoice:$HOME/CosyVoice/third_party/Matcha-TTS:${PYTHONPATH:-}" +fi + +cd "$SANAD_HOME" || { echo "Sanad dir not found: $SANAD_HOME" >&2; exit 1; } + +# Activate conda +if [ ! -f "$SANAD_CONDA_BASE/etc/profile.d/conda.sh" ]; then + echo "Conda not found at $SANAD_CONDA_BASE — set SANAD_CONDA_BASE" >&2 + exit 1 +fi +# shellcheck disable=SC1091 +source "$SANAD_CONDA_BASE/etc/profile.d/conda.sh" +conda activate "$SANAD_CONDA_ENV" || { + echo "Conda env '$SANAD_CONDA_ENV' activation failed" >&2 + exit 1 +} + +# Wait for the DDS interface to come up — robot may still be booting +for i in $(seq 1 20); do + if ip link show "$SANAD_DDS_INTERFACE" 2>/dev/null | grep -q "state UP"; then + break + fi + sleep 1 +done + +echo "[start_sanad] $(date) — launching main.py" +echo "[start_sanad] SANAD_HOME=$SANAD_HOME" +echo "[start_sanad] conda env=$SANAD_CONDA_ENV" +echo "[start_sanad] DDS iface=$SANAD_DDS_INTERFACE" +echo "[start_sanad] voice brain=$SANAD_VOICE_BRAIN audio=$SANAD_AUDIO_PROFILE" +echo "[start_sanad] port=$PORT" + +exec python3 main.py --port "$PORT" diff --git a/vendor/Sanad/vision/__init__.py b/vendor/Sanad/vision/__init__.py new file mode 100644 index 0000000..2a2729a --- /dev/null +++ b/vendor/Sanad/vision/__init__.py @@ -0,0 +1 @@ +"""Vision package — camera daemon + face gallery for Gemini-side recognition.""" diff --git a/vendor/Sanad/vision/camera.py b/vendor/Sanad/vision/camera.py new file mode 100644 index 0000000..7f150f8 --- /dev/null +++ b/vendor/Sanad/vision/camera.py @@ -0,0 +1,560 @@ +"""Camera daemon — single producer, in-memory frame cache. + +Captures frames at fixed FPS from a RealSense (preferred) or any USB +camera (fallback), JPEG-encodes them, and caches the latest frame in +memory in two views (matches Marcus's API/camera_api.py): + + - `_latest_jpeg` raw JPEG bytes — dashboard preview + frame forwarder + - `_latest_b64` base64 ASCII — frame forwarder → Gemini child stdin + +Consumers: + - dashboard preview → `snapshot_jpeg()` (served as an HTTP Response) + - face enrollment → `get_fresh_frame()` for a guaranteed-current capture + - GeminiSubprocess → `get_frame_b64()`, pushed over the child's stdin + +Lifecycle is driven by the Recognition tab toggle. The daemon is idle +until `start()` is called; failures in start() are non-fatal and +reported via `is_running()` / `backend`. Once running it auto-reconnects +on USB unplug / stalled frames (Marcus-style resilience), and supports +hot `reconfigure()` of resolution/FPS without a full restart. +""" + +from __future__ import annotations + +import base64 +import os +import threading +import time +from typing import Optional + +import numpy as np + +from Project.Sanad.core.logger import get_logger + +log = get_logger("camera") + +# How many /dev/video* indices to scan for a USB-style color camera when +# RealSense isn't available. A RealSense exposes ~6 V4L2 nodes (depth, IR, +# color, metadata…) — the color one is rarely index 0, so we probe each +# and accept the first that yields a real 3-channel BGR frame. +_USB_SCAN_RANGE = 10 + + +class CameraDaemon: + """RealSense → USB fallback camera capture with in-memory frame cache.""" + + def __init__( + self, + width: int = 424, + height: int = 240, + fps: int = 15, + jpeg_quality: int = 70, + stale_threshold_s: float = 10.0, + reconnect_min_s: float = 2.0, + reconnect_max_s: float = 10.0, + capture_timeout_ms: int = 5000, + ) -> None: + # Active profile — guarded by _reconfig_lock so reconfigure() can + # hot-swap it from another thread between capture sessions. + self._reconfig_lock = threading.Lock() + self._w = int(width) + self._h = int(height) + self._fps = int(fps) + self._q = max(10, min(95, int(jpeg_quality))) + self._reconfig_pending = False + + # Resilience knobs (Marcus-style) + self._stale_s = float(stale_threshold_s) + self._reconnect_min_s = float(reconnect_min_s) + self._reconnect_max_s = float(reconnect_max_s) + self._capture_timeout_ms = int(capture_timeout_ms) + + self._thread: Optional[threading.Thread] = None + self._stop = threading.Event() + self._backend: Optional[str] = None + self._lock = threading.Lock() + self._latest_jpeg: Optional[bytes] = None + self._latest_b64: Optional[str] = None + self._latest_ts: float = 0.0 + self._frame_seq: int = 0 + self._error: Optional[str] = None + self._reconnect_count: int = 0 + + # ── public API ────────────────────────────────────────── + + @property + def backend(self) -> Optional[str]: + return self._backend + + @property + def error(self) -> Optional[str]: + return self._error + + @property + def frame_seq(self) -> int: + return self._frame_seq + + def is_running(self) -> bool: + return self._thread is not None and self._thread.is_alive() + + def start(self) -> bool: + """Start capture thread. Returns True if a backend was acquired. + + Initial probe is synchronous; if it fails the thread isn't spawned. + Once running, the inner loop auto-reconnects on USB unplug or + stalled frames using exponential backoff (`reconnect_min_s` .. + `reconnect_max_s`). + """ + if self.is_running(): + return True + self._stop.clear() + self._error = None + self._reconnect_count = 0 + + # One-shot USB-2.0 negotiation diagnostic (warns operator if D435I + # came up on USB 2.0 — frame drops would be likely otherwise). + self._check_usb_version() + + backend = self._probe_any() + if backend is None: + log.warning("Camera: no backend available (RealSense + USB both failed)") + self._backend = None + return False + + self._backend = backend["name"] + self._thread = threading.Thread( + target=self._reconnect_loop, args=(backend,), + daemon=True, name="camera-daemon", + ) + self._thread.start() + with self._reconfig_lock: + w, h, f = self._w, self._h, self._fps + log.info("Camera started (backend=%s, %dx%d @ %dfps)", + self._backend, w, h, f) + return True + + def stop(self) -> None: + """Stop the capture thread and release the hardware.""" + if not self.is_running(): + self._backend = None + return + self._stop.set() + t = self._thread + if t is not None: + t.join(timeout=2.0) + self._thread = None + self._backend = None + log.info("Camera stopped") + + def reconfigure(self, width: Optional[int] = None, height: Optional[int] = None, + fps: Optional[int] = None, jpeg_quality: Optional[int] = None) -> dict: + """Hot-swap the capture profile without a full stop/start. + + Sets a pending flag — the capture loop notices it, tears the + pipeline down, and rebuilds at the new resolution (~0.5 s gap). + If the daemon isn't running the new values just take effect on + the next `start()`. Returns the resulting active profile. + """ + with self._reconfig_lock: + if width is not None: + self._w = int(width) + if height is not None: + self._h = int(height) + if fps is not None: + self._fps = int(fps) + if jpeg_quality is not None: + self._q = max(10, min(95, int(jpeg_quality))) + if self.is_running(): + self._reconfig_pending = True + profile = {"width": self._w, "height": self._h, + "fps": self._fps, "jpeg_quality": self._q} + log.info("Camera reconfigure → %s", profile) + return profile + + def snapshot_jpeg(self) -> Optional[bytes]: + """Return the latest JPEG bytes, or None if no frame yet.""" + with self._lock: + return self._latest_jpeg + + def get_frame_b64(self) -> Optional[str]: + """Return the latest frame as a base64 ASCII string (or None). + + Used by the frame forwarder to push frames over the Gemini child's + stdin without re-encoding — base64 is cached alongside the JPEG. + """ + with self._lock: + return self._latest_b64 + + def get_fresh_frame(self, max_age_s: float = 0.5, + timeout_s: float = 1.5) -> Optional[bytes]: + """Return a JPEG frame newer than `max_age_s`, waiting up to `timeout_s`. + + Used by face enrollment so the captured frame is guaranteed to be + the *current* scene, not a stale buffer from before the user got + into position. Falls back to whatever's cached on timeout. + """ + deadline = time.time() + timeout_s + while time.time() < deadline: + with self._lock: + if (self._latest_jpeg is not None + and self._latest_ts > 0 + and (time.time() - self._latest_ts) <= max_age_s): + return self._latest_jpeg + time.sleep(0.03) + with self._lock: + return self._latest_jpeg + + def latest_age_s(self) -> float: + """Seconds since last successful frame; +inf if none.""" + with self._lock: + if self._latest_ts <= 0: + return float("inf") + return time.time() - self._latest_ts + + def status(self) -> dict: + with self._reconfig_lock: + w, h, f, q = self._w, self._h, self._fps, self._q + # latest_age_s() is +inf until the first frame lands. inf is NOT + # JSON-serialisable by Starlette's JSONResponse (allow_nan=False) — + # leaving it as inf would 500 the /api/recognition/* routes. Map + # "running but no frame yet" and "not running" both to None. + age = self.latest_age_s() + age_s = round(age, 2) if (self.is_running() and age != float("inf")) else None + return { + "running": self.is_running(), + "backend": self._backend, + "width": w, + "height": h, + "fps": f, + "jpeg_quality": q, + "frame_seq": self._frame_seq, + "age_s": age_s, + "error": self._error, + "reconnect_count": self._reconnect_count, + } + + # ── helpers ───────────────────────────────────────────── + + def _probe_any(self) -> Optional[dict]: + """Try RealSense first, then USB. Returns backend dict or None.""" + b = self._probe_realsense() + if b is None: + b = self._probe_usb() + return b + + def _check_usb_version(self) -> None: + """Warn if a connected RealSense negotiated USB 2.0 (needs 3.x). + + Marcus has this same check — D435I on USB 2.0 can't deliver + color+depth+IMU and the pipeline silently stalls. Catching it at + startup lets the operator fix the cable/port instead of chasing a + "no frames" loop. Diagnostic only; never blocks startup. + """ + try: + import pyrealsense2 as rs # type: ignore + ctx = rs.context() + for dev in ctx.query_devices(): + try: + usb_type = dev.get_info(rs.camera_info.usb_type_descriptor) + name = dev.get_info(rs.camera_info.name) + except Exception: + continue + if str(usb_type).startswith("2."): + log.warning( + "RealSense %s negotiated USB %s — expected 3.x. " + "Frame drops likely. Try a USB 3 port / shorter cable / " + "powered hub.", name, usb_type, + ) + else: + log.info("RealSense %s on USB %s", name, usb_type) + except Exception: + pass + + # ── backend probing ───────────────────────────────────── + + def _probe_realsense(self) -> Optional[dict]: + with self._reconfig_lock: + w, h, f = self._w, self._h, self._fps + try: + import pyrealsense2 as rs # type: ignore + pipeline = rs.pipeline() + cfg = rs.config() + cfg.enable_stream(rs.stream.color, w, h, rs.format.bgr8, f) + profile = pipeline.start(cfg) + return {"name": "realsense", "pipeline": pipeline, "rs": rs, + "profile": profile} + except Exception as exc: + log.info("RealSense unavailable: %s", exc) + return None + + def _open_usb_index(self, idx: int, w: int, h: int, f: int, + cv2) -> Optional[dict]: + """Open one /dev/video, validate it yields a 3-channel frame, + and classify it as colour vs grayscale/IR. + + A RealSense IR node delivers Y8 — cv2 replicates that single plane + across 3 channels, so the planes come back *bit-identical*. A real + colour sensor never produces bit-identical channels (per-channel + sensor noise differs even on a flat gray scene). That's the test. + Returns a backend dict with `is_color`, or None if the node is + unusable. + """ + cap = None + try: + cap = cv2.VideoCapture(idx) + if not cap.isOpened(): + cap.release() + return None + cap.set(cv2.CAP_PROP_FRAME_WIDTH, w) + cap.set(cv2.CAP_PROP_FRAME_HEIGHT, h) + cap.set(cv2.CAP_PROP_FPS, f) + good = None + for _ in range(5): + ok, frame = cap.read() + if (ok and frame is not None and frame.ndim == 3 + and frame.shape[2] == 3): + good = frame + break + if good is None: + cap.release() + return None + is_color = not ( + np.array_equal(good[:, :, 0], good[:, :, 1]) + and np.array_equal(good[:, :, 1], good[:, :, 2]) + ) + return {"name": "usb", "cap": cap, "cv2": cv2, "index": idx, + "is_color": is_color, + "frame_wh": (good.shape[1], good.shape[0])} + except Exception as exc: + log.info("USB camera index %d: %s", idx, exc) + if cap is not None: + try: + cap.release() + except Exception: + pass + return None + + def _probe_usb(self) -> Optional[dict]: + """Scan /dev/video* for a colour camera node, falling back to a + grayscale/IR node only if no colour node exists. + + On a RealSense, /dev/video0 is the *depth* stream (Z16, cv2 can't + open it as a webcam); the IR nodes deliver Y8 (grayscale); the + *colour* node delivers YUYV/BGR. We can't know the index up front, + so we probe each and prefer the first genuine colour node — that's + why the dashboard preview used to come up grayscale. Pin a node + with SANAD_CAMERA_USB_INDEX= to skip the scan entirely. + """ + with self._reconfig_lock: + w, h, f = self._w, self._h, self._fps + try: + import cv2 # type: ignore + except Exception as exc: + log.info("USB camera unavailable: %s", exc) + return None + + # Pinned index — accept whatever it is (colour or not). + explicit = os.environ.get("SANAD_CAMERA_USB_INDEX", "").strip() + if explicit.isdigit(): + backend = self._open_usb_index(int(explicit), w, h, f, cv2) + if backend is not None: + fw, fh = backend["frame_wh"] + log.info("USB camera: pinned /dev/video%d (%dx%d, %s)", + backend["index"], fw, fh, + "colour" if backend["is_color"] else "grayscale/IR") + return backend + log.warning("USB camera: pinned index %s unusable", explicit) + return None + + # Scan — prefer a real colour node; keep the first grayscale node + # as a last resort so the camera still works if that's all there is. + gray_fallback: Optional[dict] = None + for idx in range(_USB_SCAN_RANGE): + backend = self._open_usb_index(idx, w, h, f, cv2) + if backend is None: + continue + fw, fh = backend["frame_wh"] + if backend["is_color"]: + log.info("USB camera: using /dev/video%d (colour, %dx%d)", + idx, fw, fh) + if gray_fallback is not None: + try: + gray_fallback["cap"].release() + except Exception: + pass + return backend + # grayscale/IR — remember the first, release any extras + if gray_fallback is None: + gray_fallback = backend + else: + try: + backend["cap"].release() + except Exception: + pass + + if gray_fallback is not None: + fw, fh = gray_fallback["frame_wh"] + log.warning("USB camera: no colour node found — falling back to " + "/dev/video%d (grayscale/IR, %dx%d). For a RealSense, " + "build pyrealsense2 or pin the colour node with " + "SANAD_CAMERA_USB_INDEX.", gray_fallback["index"], fw, fh) + return gray_fallback + + log.info("USB camera unavailable: no working /dev/video* node found " + "(scanned %d indices)", _USB_SCAN_RANGE) + return None + + # ── main capture loop ─────────────────────────────────── + + def _reconnect_loop(self, initial_backend: dict) -> None: + """Outer loop — owns reconnect with exponential backoff. + + Inner `_capture_session` runs until the camera goes stale, the + stop flag is set, or a reconfigure is requested. On stall we + sleep + re-probe; on reconfigure we re-probe immediately at the + new resolution. Backoff resets after a successful session. + """ + backend = initial_backend + backoff = self._reconnect_min_s + + while not self._stop.is_set(): + reconfigured = False + try: + reconfigured = self._capture_session(backend) + except Exception as exc: + log.exception("Camera capture session crashed: %s", exc) + self._error = str(exc) + finally: + self._teardown(backend) + + if self._stop.is_set(): + break + + if reconfigured: + # Fast path — rebuild immediately at the new profile. + with self._reconfig_lock: + self._reconfig_pending = False + new_backend = self._probe_any() + if new_backend is None: + self._error = "reconnecting" + log.warning("Camera reconfigure: re-probe failed — " + "retrying in %.1fs", backoff) + if self._stop.wait(backoff): + break + backoff = min(backoff * 2, self._reconnect_max_s) + continue + self._backend = new_backend["name"] + self._error = None + backend = new_backend + backoff = self._reconnect_min_s + log.info("Camera rebuilt after reconfigure (backend=%s)", + self._backend) + continue + + # Capture session ended unexpectedly (stall / crash). Sleep + re-probe. + self._error = "reconnecting" + log.warning("Camera disconnected — reconnecting in %.1fs", backoff) + if self._stop.wait(backoff): # interruptible sleep + break + backoff = min(backoff * 2, self._reconnect_max_s) + + new_backend = self._probe_any() + if new_backend is None: + self._backend = None + continue # stay in the loop; next iteration retries + self._backend = new_backend["name"] + self._reconnect_count += 1 + self._error = None + log.info("Camera reconnected (backend=%s, attempt #%d)", + self._backend, self._reconnect_count) + backend = new_backend + backoff = self._reconnect_min_s # reset on success + + def _capture_session(self, backend: dict) -> bool: + """Inner capture loop — runs until stop, stale-frame timeout, or + a reconfigure request. + + Returns True if it exited because of a reconfigure (caller rebuilds + immediately), False on a stall or clean stop. + """ + import cv2 # always available — used for JPEG encode + + with self._reconfig_lock: + encode_params = [int(cv2.IMWRITE_JPEG_QUALITY), self._q] + last_frame_time = time.time() + consecutive_failures = 0 + + while not self._stop.is_set(): + if self._reconfig_pending: + log.info("Camera reconfigure requested — rebuilding pipeline") + return True + + bgr = self._read_frame(backend) + if bgr is None: + consecutive_failures += 1 + age = time.time() - last_frame_time + if age > self._stale_s: + log.warning( + "Camera stalled %.1fs (%d consecutive timeouts) — " + "rebuilding pipeline", age, consecutive_failures, + ) + return False + # Intermediate warnings so degradation is visible early + if consecutive_failures in (3, 10, 30): + log.warning("Camera slow (%d failures, age %.1fs)", + consecutive_failures, age) + time.sleep(0.05) + continue + + try: + ok, buf = cv2.imencode(".jpg", bgr, encode_params) + except Exception as exc: + log.warning("JPEG encode failed: %s", exc) + continue + if not ok: + continue + jpeg = bytes(buf) + b64 = base64.b64encode(jpeg).decode("ascii") + now = time.time() + with self._lock: + self._latest_jpeg = jpeg + self._latest_b64 = b64 + self._latest_ts = now + self._frame_seq += 1 + last_frame_time = now + consecutive_failures = 0 + + return False + + def _read_frame(self, backend: dict) -> Optional[np.ndarray]: + name = backend["name"] + if name == "realsense": + try: + frames = backend["pipeline"].wait_for_frames( + timeout_ms=self._capture_timeout_ms, + ) + color = frames.get_color_frame() + if not color: + return None + return np.asanyarray(color.get_data()) + except Exception: + # Soft path — single timeouts handled by _capture_session's + # stale-detection logic; don't spam the log per frame. + return None + elif name == "usb": + cap = backend["cap"] + ok, frame = cap.read() + if not ok or frame is None: + return None + return frame + return None + + def _teardown(self, backend: dict) -> None: + name = backend.get("name") + try: + if name == "realsense": + backend["pipeline"].stop() + elif name == "usb": + backend["cap"].release() + except Exception as exc: + log.info("Camera teardown: %s", exc) diff --git a/vendor/Sanad/vision/face_gallery.py b/vendor/Sanad/vision/face_gallery.py new file mode 100644 index 0000000..1b637ab --- /dev/null +++ b/vendor/Sanad/vision/face_gallery.py @@ -0,0 +1,363 @@ +"""Face gallery — pure file IO over data/faces/face_{id}/. + +Layout per face: + face_{id}/ + face_1.jpg ← samples (≥1 required) + face_2.jpg + face_3.png + meta.json ← optional: {"name": "...", "description": "...", "added_at": "..."} + +`description` is free text the operator writes about the person ("lead +engineer, likes coffee") — it's folded into the Gemini primer turn so +Gemini can reference it when it recognises that face. + +No ML — Gemini does the recognition in-context using the samples we feed it +via the primer turn. This module's only jobs are: + - enumerate enrolled faces + - serve & accept JPEG/PNG bytes per face + - rename / describe / delete / zip / load-for-primer + +Thread-safe via a single internal RLock. +""" + +from __future__ import annotations + +import io +import json +import re +import threading +import zipfile +from dataclasses import dataclass, field +from datetime import datetime +from pathlib import Path +from typing import Iterable + +from Project.Sanad.core.logger import get_logger + +log = get_logger("face_gallery") + + +_DIR_RE = re.compile(r"^face_(\d+)$") +ALLOWED_EXTS = {".jpg", ".jpeg", ".png"} +SAMPLE_NAME_RE = re.compile(r"^face_(\d+)\.(jpg|jpeg|png)$", re.IGNORECASE) + + +@dataclass +class PhotoInfo: + name: str + size_bytes: int + path: Path + + +@dataclass +class FaceEntry: + id: int + name: str | None + added_at: str | None + dir: Path + description: str | None = None + sample_paths: list[Path] = field(default_factory=list) + + def to_dict(self) -> dict: + return { + "id": self.id, + "name": self.name, + "description": self.description, + "added_at": self.added_at, + "dir": str(self.dir), + "photos": [ + {"name": p.name, "size_bytes": p.stat().st_size} + for p in self.sample_paths + if p.exists() + ], + } + + +class FaceGallery: + """File-system backed gallery rooted at `root` (e.g. data/faces/).""" + + def __init__(self, root: Path | str) -> None: + self.root = Path(root) + self._lock = threading.RLock() + + # ── read ──────────────────────────────────────────────── + + def _ensure_root(self) -> None: + self.root.mkdir(parents=True, exist_ok=True) + + def _iter_face_dirs(self) -> Iterable[tuple[int, Path]]: + if not self.root.exists(): + return + for child in sorted(self.root.iterdir()): + if not child.is_dir(): + continue + m = _DIR_RE.match(child.name) + if not m: + continue + yield int(m.group(1)), child + + def _samples_in(self, face_dir: Path) -> list[Path]: + out: list[Path] = [] + for p in sorted(face_dir.iterdir()): + if p.is_file() and p.suffix.lower() in ALLOWED_EXTS: + out.append(p) + return out + + def _meta(self, face_dir: Path) -> tuple[str | None, str | None, str | None]: + """Return (name, description, added_at) — any may be None.""" + meta_path = face_dir / "meta.json" + if not meta_path.exists(): + return None, None, None + try: + data = json.loads(meta_path.read_text(encoding="utf-8")) + except Exception: + return None, None, None + name = data.get("name") + description = data.get("description") + added = data.get("added_at") + return (name if name else None), (description if description else None), added + + def list(self) -> list[FaceEntry]: + with self._lock: + entries: list[FaceEntry] = [] + for face_id, face_dir in self._iter_face_dirs(): + name, description, added = self._meta(face_dir) + entries.append(FaceEntry( + id=face_id, + name=name, + description=description, + added_at=added, + dir=face_dir, + sample_paths=self._samples_in(face_dir), + )) + return entries + + def get(self, face_id: int) -> FaceEntry | None: + with self._lock: + face_dir = self.root / f"face_{face_id}" + if not face_dir.is_dir(): + return None + name, description, added = self._meta(face_dir) + return FaceEntry( + id=face_id, name=name, description=description, added_at=added, + dir=face_dir, sample_paths=self._samples_in(face_dir), + ) + + def get_photo(self, face_id: int, photo_name: str) -> Path | None: + with self._lock: + face_dir = self.root / f"face_{face_id}" + if not face_dir.is_dir(): + return None + p = face_dir / photo_name + try: + p.resolve().relative_to(face_dir.resolve()) + except ValueError: + return None + if not p.exists() or p.suffix.lower() not in ALLOWED_EXTS: + return None + return p + + # ── write ─────────────────────────────────────────────── + + def next_id(self) -> int: + with self._lock: + max_id = 0 + for face_id, _ in self._iter_face_dirs(): + if face_id > max_id: + max_id = face_id + return max_id + 1 + + def _next_sample_name(self, face_dir: Path, ext: str) -> str: + """Return next free face_N. filename inside face_dir.""" + existing = self._samples_in(face_dir) + max_n = 0 + for p in existing: + m = SAMPLE_NAME_RE.match(p.name) + if m: + n = int(m.group(1)) + if n > max_n: + max_n = n + return f"face_{max_n + 1}{ext.lower()}" + + @staticmethod + def _detect_ext(jpeg_or_png: bytes) -> str: + """Sniff PNG vs JPEG from the magic bytes.""" + if len(jpeg_or_png) >= 8 and jpeg_or_png[:8] == b"\x89PNG\r\n\x1a\n": + return ".png" + return ".jpg" + + def _write_meta(self, face_dir: Path, name: str | None, + description: str | None = None, + added_at: str | None = None) -> None: + meta: dict[str, str] = {} + if name: + meta["name"] = name + if description: + meta["description"] = description + meta["added_at"] = added_at or datetime.now().isoformat(timespec="seconds") + (face_dir / "meta.json").write_text( + json.dumps(meta, ensure_ascii=False, indent=2), + encoding="utf-8", + ) + + def create_face(self, image_bytes_list: list[bytes], + name: str | None = None, + description: str | None = None) -> FaceEntry: + """Create a new face_{next_id}/ with one or more samples.""" + if not image_bytes_list: + raise ValueError("create_face: empty image list") + with self._lock: + self._ensure_root() + face_id = self.next_id() + face_dir = self.root / f"face_{face_id}" + face_dir.mkdir(parents=True, exist_ok=False) + for idx, data in enumerate(image_bytes_list, start=1): + ext = self._detect_ext(data) + fname = f"face_{idx}{ext}" + (face_dir / fname).write_bytes(data) + clean_name = (name or "").strip() or None + clean_desc = (description or "").strip() or None + self._write_meta(face_dir, clean_name, clean_desc) + log.info("Created face_%d (samples=%d, name=%s, desc=%s)", + face_id, len(image_bytes_list), clean_name or "(unnamed)", + "yes" if clean_desc else "no") + return self.get(face_id) # type: ignore[return-value] + + def add_photo(self, face_id: int, image_bytes: bytes) -> str: + """Append a new sample to an existing face. Returns the filename.""" + with self._lock: + face_dir = self.root / f"face_{face_id}" + if not face_dir.is_dir(): + raise FileNotFoundError(f"face_{face_id} not found") + ext = self._detect_ext(image_bytes) + fname = self._next_sample_name(face_dir, ext) + (face_dir / fname).write_bytes(image_bytes) + log.info("Added sample %s to face_%d", fname, face_id) + return fname + + def rename(self, face_id: int, name: str | None) -> None: + """Update meta.json with a new name (or clear it if name is empty). + + Preserves the existing description + added_at. + """ + with self._lock: + face_dir = self.root / f"face_{face_id}" + if not face_dir.is_dir(): + raise FileNotFoundError(f"face_{face_id} not found") + _, description, added = self._meta(face_dir) + clean = (name or "").strip() or None + self._write_meta(face_dir, clean, description, added_at=added) + log.info("Renamed face_%d → %s", face_id, clean or "(unnamed)") + + def set_description(self, face_id: int, description: str | None) -> None: + """Update meta.json with a free-text description (or clear it). + + Preserves the existing name + added_at. The description is folded + into the Gemini primer turn so Gemini can reference it. + """ + with self._lock: + face_dir = self.root / f"face_{face_id}" + if not face_dir.is_dir(): + raise FileNotFoundError(f"face_{face_id} not found") + name, _, added = self._meta(face_dir) + clean = (description or "").strip() or None + self._write_meta(face_dir, name, clean, added_at=added) + log.info("Set description for face_%d (%s)", face_id, + "cleared" if not clean else f"{len(clean)} chars") + + def delete_photo(self, face_id: int, photo_name: str) -> None: + """Delete one photo. Refuses if it's the only remaining sample.""" + with self._lock: + face_dir = self.root / f"face_{face_id}" + if not face_dir.is_dir(): + raise FileNotFoundError(f"face_{face_id} not found") + samples = self._samples_in(face_dir) + if len(samples) <= 1: + raise ValueError( + "Cannot delete the only photo — delete the face instead." + ) + target = self.get_photo(face_id, photo_name) + if target is None: + raise FileNotFoundError(f"photo {photo_name} not found") + target.unlink() + log.info("Deleted %s from face_%d", photo_name, face_id) + + def delete_face(self, face_id: int) -> None: + """Delete the entire face_{id}/ folder (including meta.json).""" + import shutil + with self._lock: + face_dir = self.root / f"face_{face_id}" + if not face_dir.is_dir(): + raise FileNotFoundError(f"face_{face_id} not found") + shutil.rmtree(face_dir) + log.info("Deleted face_%d", face_id) + + def zip_face(self, face_id: int) -> bytes: + """Return the entire face_{id}/ folder packaged as a ZIP.""" + with self._lock: + face_dir = self.root / f"face_{face_id}" + if not face_dir.is_dir(): + raise FileNotFoundError(f"face_{face_id} not found") + buf = io.BytesIO() + with zipfile.ZipFile(buf, "w", compression=zipfile.ZIP_DEFLATED) as zf: + for p in sorted(face_dir.iterdir()): + if p.is_file(): + zf.write(p, arcname=f"face_{face_id}/{p.name}") + return buf.getvalue() + + # ── primer support (used by gemini/script.py) ─────────── + + def load_for_primer( + self, max_samples_per_face: int = 3, resize_long_side: int = 256, + ) -> list[tuple[FaceEntry, list[bytes]]]: + """Return [(FaceEntry, [jpeg_bytes,…]), …] for Gemini upload. + + Resizes each sample to longest-side <= resize_long_side, re-encodes + as JPEG (q=85) to keep the token cost manageable. Falls back to + the raw bytes if PIL isn't available. + """ + entries = self.list() + if not entries: + return [] + out: list[tuple[FaceEntry, list[bytes]]] = [] + for e in entries: + paths = e.sample_paths[:max_samples_per_face] + jpegs: list[bytes] = [] + for p in paths: + try: + raw = p.read_bytes() + except OSError: + continue + processed = self._resize_for_primer(raw, resize_long_side) + jpegs.append(processed or raw) + if jpegs: + out.append((e, jpegs)) + return out + + @staticmethod + def _resize_for_primer(raw: bytes, long_side: int) -> bytes | None: + """Resize image to longest-side ≤ long_side, re-encode JPEG q=85. + + Returns None on any failure (caller falls back to raw bytes). + """ + try: + from PIL import Image # type: ignore + except Exception: + return None + try: + img = Image.open(io.BytesIO(raw)) + img.load() + if img.mode not in ("RGB", "L"): + img = img.convert("RGB") + w, h = img.size + scale = long_side / max(w, h) if max(w, h) > long_side else 1.0 + if scale < 1.0: + img = img.resize( + (max(1, int(w * scale)), max(1, int(h * scale))), + Image.LANCZOS, + ) + buf = io.BytesIO() + img.save(buf, format="JPEG", quality=85, optimize=True) + return buf.getvalue() + except Exception: + return None diff --git a/vendor/Sanad/vision/recognition_state.py b/vendor/Sanad/vision/recognition_state.py new file mode 100644 index 0000000..ccd4dd3 --- /dev/null +++ b/vendor/Sanad/vision/recognition_state.py @@ -0,0 +1,93 @@ +"""Recognition state file — atomic JSON I/O shared by parent + child. + +The dashboard (parent process) writes this file on every toggle / face +gallery change; the Gemini child (`gemini/script.py`) polls it at 1 Hz +to flip its in-memory flags without a session restart. + +Format (data/.recognition_state.json): + { + "vision_enabled": bool, + "face_rec_enabled": bool, + "gallery_version": int, # bumped on every face CRUD + "zone_rec_enabled": bool, # N3 — zones/places knowledge toggle + "zones_version": int, # bumped on every zone/place CRUD + "nav_target_zone_id": int, # active "go here" destination (0 = none) + "nav_target_place_id": int, + "movement_enabled": bool # N2 — Gemini-driven locomotion gate + } +""" + +from __future__ import annotations + +import json +import os +import tempfile +from dataclasses import asdict, dataclass +from pathlib import Path + + +@dataclass +class RecognitionState: + vision_enabled: bool = False + face_rec_enabled: bool = False + gallery_version: int = 0 + # N3 — zones/places knowledge (zone → place → linked faces) + zone_rec_enabled: bool = False + zones_version: int = 0 + # "Go here" destination — the active place the robot should head to. + # 0/0 = no destination set. Actual locomotion is wired by N2. + nav_target_zone_id: int = 0 + nav_target_place_id: int = 0 + # N2 — Gemini-driven locomotion enable gate (default OFF for safety) + movement_enabled: bool = False + + +def read(path: Path) -> RecognitionState: + """Return the persisted state, or a default if missing/corrupt.""" + try: + raw = json.loads(Path(path).read_text(encoding="utf-8")) + except (FileNotFoundError, json.JSONDecodeError, OSError, + UnicodeDecodeError, ValueError): + # UnicodeDecodeError (bad UTF-8 bytes) and ValueError (e.g. a non-dict + # top-level) are not OSError/JSONDecodeError subclasses — catch them too + # so read() honours its "never raises" contract on a corrupt file. + return RecognitionState() + if not isinstance(raw, dict): + return RecognitionState() + return RecognitionState( + vision_enabled=bool(raw.get("vision_enabled", False)), + face_rec_enabled=bool(raw.get("face_rec_enabled", False)), + gallery_version=int(raw.get("gallery_version", 0)), + zone_rec_enabled=bool(raw.get("zone_rec_enabled", False)), + zones_version=int(raw.get("zones_version", 0)), + nav_target_zone_id=int(raw.get("nav_target_zone_id", 0)), + nav_target_place_id=int(raw.get("nav_target_place_id", 0)), + movement_enabled=bool(raw.get("movement_enabled", False)), + ) + + +def write(path: Path, state: RecognitionState) -> None: + """Write atomically via tempfile + os.replace.""" + p = Path(path) + p.parent.mkdir(parents=True, exist_ok=True) + fd, tmp = tempfile.mkstemp(prefix=f".{p.name}.", suffix=".tmp", dir=str(p.parent)) + try: + with os.fdopen(fd, "w", encoding="utf-8") as fh: + json.dump(asdict(state), fh, ensure_ascii=False, indent=2) + os.replace(tmp, p) + except Exception: + try: + os.unlink(tmp) + except OSError: + pass + raise + + +def mutate(path: Path, **changes) -> RecognitionState: + """Read-modify-write helper. Returns the new state.""" + cur = read(path) + for k, v in changes.items(): + if hasattr(cur, k): + setattr(cur, k, v) + write(path, cur) + return cur diff --git a/vendor/Sanad/vision/zone_gallery.py b/vendor/Sanad/vision/zone_gallery.py new file mode 100644 index 0000000..6f518f6 --- /dev/null +++ b/vendor/Sanad/vision/zone_gallery.py @@ -0,0 +1,440 @@ +"""Zone gallery — file IO over data/zones/zone_{zid}/place_{pid}/. + +A two-level hierarchy that replaces the flat place gallery: + + Zone = name + description (a region, e.g. "Ground Floor") + └─ Place = name + description + optional reference photos + + optional linked face ids (people associated with the place) + +Layout: + zones/ + zone_{zid}/ + meta.json {name, description, added_at} + place_{pid}/ + meta.json {name, description, face_ids:[int], added_at} + place_1.jpg ← optional reference photos (0..N) + place_2.png + +`face_ids` reference enrolled faces in the SEPARATE face gallery +(data/faces/face_{id}); this module only stores the ids — name/photo +resolution is done by the caller (route layer + Gemini primer). + +Reference photos let Gemini visually recognise the place (and, later, let the +robot navigate to it). A place needs only a name + description; photos and +linked faces are both optional. Thread-safe via a single internal RLock. +""" + +from __future__ import annotations + +import io +import json +import re +import threading +import zipfile +from dataclasses import dataclass, field +from datetime import datetime +from pathlib import Path +from typing import Iterable + +from Project.Sanad.core.logger import get_logger + +log = get_logger("zone_gallery") + +_ZONE_DIR_RE = re.compile(r"^zone_(\d+)$") +_PLACE_DIR_RE = re.compile(r"^place_(\d+)$") +ALLOWED_EXTS = {".jpg", ".jpeg", ".png"} +SAMPLE_NAME_RE = re.compile(r"^place_(\d+)\.(jpg|jpeg|png)$", re.IGNORECASE) + + +@dataclass +class PlaceEntry: + id: int + zone_id: int + name: str | None + added_at: str | None + dir: Path + description: str | None = None + face_ids: list[int] = field(default_factory=list) + sample_paths: list[Path] = field(default_factory=list) + + def to_dict(self) -> dict: + return { + "id": self.id, + "zone_id": self.zone_id, + "name": self.name, + "description": self.description, + "face_ids": list(self.face_ids), + "added_at": self.added_at, + "photos": [ + {"name": p.name, "size_bytes": p.stat().st_size} + for p in self.sample_paths if p.exists() + ], + } + + +@dataclass +class ZoneEntry: + id: int + name: str | None + added_at: str | None + dir: Path + description: str | None = None + places: list[PlaceEntry] = field(default_factory=list) + + def to_dict(self) -> dict: + return { + "id": self.id, + "name": self.name, + "description": self.description, + "added_at": self.added_at, + "places": [p.to_dict() for p in self.places], + } + + +class ZoneGallery: + """File-system backed zone/place gallery rooted at `root` (data/zones/).""" + + def __init__(self, root: Path | str) -> None: + self.root = Path(root) + self._lock = threading.RLock() + + # ── paths ──────────────────────────────────────────────── + + def _ensure_root(self) -> None: + self.root.mkdir(parents=True, exist_ok=True) + + def _zone_dir(self, zone_id: int) -> Path: + return self.root / f"zone_{zone_id}" + + def _place_dir(self, zone_id: int, place_id: int) -> Path: + return self.root / f"zone_{zone_id}" / f"place_{place_id}" + + def _iter_zone_dirs(self) -> Iterable[tuple[int, Path]]: + if not self.root.exists(): + return + for child in sorted(self.root.iterdir()): + if not child.is_dir(): + continue + m = _ZONE_DIR_RE.match(child.name) + if m: + yield int(m.group(1)), child + + def _iter_place_dirs(self, zone_dir: Path) -> Iterable[tuple[int, Path]]: + if not zone_dir.exists(): + return + for child in sorted(zone_dir.iterdir()): + if not child.is_dir(): + continue + m = _PLACE_DIR_RE.match(child.name) + if m: + yield int(m.group(1)), child + + def _samples_in(self, place_dir: Path) -> list[Path]: + return [p for p in sorted(place_dir.iterdir()) + if p.is_file() and p.suffix.lower() in ALLOWED_EXTS] + + # ── meta ───────────────────────────────────────────────── + + def _zone_meta(self, zone_dir: Path) -> tuple[str | None, str | None, str | None]: + meta_path = zone_dir / "meta.json" + if not meta_path.exists(): + return None, None, None + try: + data = json.loads(meta_path.read_text(encoding="utf-8")) + except Exception: + return None, None, None + return (data.get("name") or None), (data.get("description") or None), data.get("added_at") + + def _place_meta(self, place_dir: Path) -> tuple[str | None, str | None, list[int], str | None]: + meta_path = place_dir / "meta.json" + if not meta_path.exists(): + return None, None, [], None + try: + data = json.loads(meta_path.read_text(encoding="utf-8")) + except Exception: + return None, None, [], None + raw_ids = data.get("face_ids") or [] + face_ids: list[int] = [] + for x in raw_ids: + try: + face_ids.append(int(x)) + except (TypeError, ValueError): + continue + return (data.get("name") or None), (data.get("description") or None), face_ids, data.get("added_at") + + def _write_zone_meta(self, zone_dir: Path, name, description, added_at=None) -> None: + meta: dict = {} + if name: + meta["name"] = name + if description: + meta["description"] = description + meta["added_at"] = added_at or datetime.now().isoformat(timespec="seconds") + (zone_dir / "meta.json").write_text( + json.dumps(meta, ensure_ascii=False, indent=2), encoding="utf-8") + + def _write_place_meta(self, place_dir: Path, name, description, + face_ids=None, added_at=None) -> None: + meta: dict = {} + if name: + meta["name"] = name + if description: + meta["description"] = description + meta["face_ids"] = [int(x) for x in (face_ids or [])] + meta["added_at"] = added_at or datetime.now().isoformat(timespec="seconds") + (place_dir / "meta.json").write_text( + json.dumps(meta, ensure_ascii=False, indent=2), encoding="utf-8") + + # ── read ───────────────────────────────────────────────── + + def _build_place(self, zone_id: int, place_id: int, place_dir: Path) -> PlaceEntry: + name, desc, face_ids, added = self._place_meta(place_dir) + return PlaceEntry( + id=place_id, zone_id=zone_id, name=name, description=desc, + face_ids=face_ids, added_at=added, dir=place_dir, + sample_paths=self._samples_in(place_dir), + ) + + def _build_zone(self, zone_id: int, zone_dir: Path) -> ZoneEntry: + name, desc, added = self._zone_meta(zone_dir) + places = [self._build_place(zone_id, pid, pdir) + for pid, pdir in self._iter_place_dirs(zone_dir)] + return ZoneEntry(id=zone_id, name=name, description=desc, + added_at=added, dir=zone_dir, places=places) + + def list_zones(self) -> list[ZoneEntry]: + with self._lock: + return [self._build_zone(zid, zdir) for zid, zdir in self._iter_zone_dirs()] + + def get_zone(self, zone_id: int) -> ZoneEntry | None: + with self._lock: + zd = self._zone_dir(zone_id) + return self._build_zone(zone_id, zd) if zd.is_dir() else None + + def get_place(self, zone_id: int, place_id: int) -> PlaceEntry | None: + with self._lock: + pd = self._place_dir(zone_id, place_id) + return self._build_place(zone_id, place_id, pd) if pd.is_dir() else None + + def get_photo(self, zone_id: int, place_id: int, photo_name: str) -> Path | None: + with self._lock: + pd = self._place_dir(zone_id, place_id) + if not pd.is_dir(): + return None + p = pd / photo_name + try: + p.resolve().relative_to(pd.resolve()) + except ValueError: + return None + if not p.exists() or p.suffix.lower() not in ALLOWED_EXTS: + return None + return p + + # ── ids ────────────────────────────────────────────────── + + def next_zone_id(self) -> int: + with self._lock: + return max((zid for zid, _ in self._iter_zone_dirs()), default=0) + 1 + + def next_place_id(self, zone_id: int) -> int: + with self._lock: + zd = self._zone_dir(zone_id) + return max((pid for pid, _ in self._iter_place_dirs(zd)), default=0) + 1 + + def _next_sample_name(self, place_dir: Path, ext: str) -> str: + max_n = 0 + for p in self._samples_in(place_dir): + m = SAMPLE_NAME_RE.match(p.name) + if m: + max_n = max(max_n, int(m.group(1))) + return f"place_{max_n + 1}{ext.lower()}" + + @staticmethod + def _detect_ext(data: bytes) -> str: + if len(data) >= 8 and data[:8] == b"\x89PNG\r\n\x1a\n": + return ".png" + return ".jpg" + + # ── zone write ─────────────────────────────────────────── + + def create_zone(self, name: str | None = None, + description: str | None = None) -> ZoneEntry: + with self._lock: + self._ensure_root() + zid = self.next_zone_id() + zd = self._zone_dir(zid) + zd.mkdir(parents=True, exist_ok=False) + self._write_zone_meta(zd, (name or "").strip() or None, + (description or "").strip() or None) + log.info("Created zone_%d (name=%s)", zid, name or "(unnamed)") + return self._build_zone(zid, zd) + + def rename_zone(self, zone_id: int, name: str | None) -> None: + with self._lock: + zd = self._zone_dir(zone_id) + if not zd.is_dir(): + raise FileNotFoundError(f"zone_{zone_id} not found") + _, desc, added = self._zone_meta(zd) + self._write_zone_meta(zd, (name or "").strip() or None, desc, added_at=added) + log.info("Renamed zone_%d → %s", zone_id, name or "(unnamed)") + + def describe_zone(self, zone_id: int, description: str | None) -> None: + with self._lock: + zd = self._zone_dir(zone_id) + if not zd.is_dir(): + raise FileNotFoundError(f"zone_{zone_id} not found") + name, _, added = self._zone_meta(zd) + self._write_zone_meta(zd, name, (description or "").strip() or None, added_at=added) + log.info("Described zone_%d", zone_id) + + def delete_zone(self, zone_id: int) -> None: + import shutil + with self._lock: + zd = self._zone_dir(zone_id) + if not zd.is_dir(): + raise FileNotFoundError(f"zone_{zone_id} not found") + shutil.rmtree(zd) + log.info("Deleted zone_%d (and its places)", zone_id) + + # ── place write ────────────────────────────────────────── + + def create_place(self, zone_id: int, name: str | None = None, + description: str | None = None, + face_ids: list[int] | None = None, + image_bytes_list: list[bytes] | None = None) -> PlaceEntry: + with self._lock: + zd = self._zone_dir(zone_id) + if not zd.is_dir(): + raise FileNotFoundError(f"zone_{zone_id} not found") + pid = self.next_place_id(zone_id) + pd = self._place_dir(zone_id, pid) + pd.mkdir(parents=True, exist_ok=False) + for idx, data in enumerate(image_bytes_list or [], start=1): + (pd / f"place_{idx}{self._detect_ext(data)}").write_bytes(data) + self._write_place_meta(pd, (name or "").strip() or None, + (description or "").strip() or None, face_ids or []) + log.info("Created zone_%d/place_%d (name=%s, photos=%d, faces=%d)", + zone_id, pid, name or "(unnamed)", + len(image_bytes_list or []), len(face_ids or [])) + return self._build_place(zone_id, pid, pd) + + def rename_place(self, zone_id: int, place_id: int, name: str | None) -> None: + with self._lock: + pd = self._place_dir(zone_id, place_id) + if not pd.is_dir(): + raise FileNotFoundError(f"zone_{zone_id}/place_{place_id} not found") + _, desc, fids, added = self._place_meta(pd) + self._write_place_meta(pd, (name or "").strip() or None, desc, fids, added_at=added) + log.info("Renamed zone_%d/place_%d → %s", zone_id, place_id, name or "(unnamed)") + + def describe_place(self, zone_id: int, place_id: int, description: str | None) -> None: + with self._lock: + pd = self._place_dir(zone_id, place_id) + if not pd.is_dir(): + raise FileNotFoundError(f"zone_{zone_id}/place_{place_id} not found") + name, _, fids, added = self._place_meta(pd) + self._write_place_meta(pd, name, (description or "").strip() or None, fids, added_at=added) + log.info("Described zone_%d/place_%d", zone_id, place_id) + + def set_place_faces(self, zone_id: int, place_id: int, face_ids: list[int]) -> None: + """Replace the set of linked face ids for a place.""" + with self._lock: + pd = self._place_dir(zone_id, place_id) + if not pd.is_dir(): + raise FileNotFoundError(f"zone_{zone_id}/place_{place_id} not found") + name, desc, _, added = self._place_meta(pd) + clean = [] + for x in (face_ids or []): + try: + clean.append(int(x)) + except (TypeError, ValueError): + continue + self._write_place_meta(pd, name, desc, clean, added_at=added) + log.info("Set zone_%d/place_%d faces → %s", zone_id, place_id, clean) + + def add_photo(self, zone_id: int, place_id: int, image_bytes: bytes) -> str: + with self._lock: + pd = self._place_dir(zone_id, place_id) + if not pd.is_dir(): + raise FileNotFoundError(f"zone_{zone_id}/place_{place_id} not found") + fname = self._next_sample_name(pd, self._detect_ext(image_bytes)) + (pd / fname).write_bytes(image_bytes) + log.info("Added %s to zone_%d/place_%d", fname, zone_id, place_id) + return fname + + def delete_photo(self, zone_id: int, place_id: int, photo_name: str) -> None: + with self._lock: + pd = self._place_dir(zone_id, place_id) + if not pd.is_dir(): + raise FileNotFoundError(f"zone_{zone_id}/place_{place_id} not found") + target = self.get_photo(zone_id, place_id, photo_name) + if target is None: + raise FileNotFoundError(f"photo {photo_name} not found") + target.unlink() + log.info("Deleted %s from zone_%d/place_%d", photo_name, zone_id, place_id) + + def delete_place(self, zone_id: int, place_id: int) -> None: + import shutil + with self._lock: + pd = self._place_dir(zone_id, place_id) + if not pd.is_dir(): + raise FileNotFoundError(f"zone_{zone_id}/place_{place_id} not found") + shutil.rmtree(pd) + log.info("Deleted zone_%d/place_%d", zone_id, place_id) + + def zip_place(self, zone_id: int, place_id: int) -> bytes: + with self._lock: + pd = self._place_dir(zone_id, place_id) + if not pd.is_dir(): + raise FileNotFoundError(f"zone_{zone_id}/place_{place_id} not found") + buf = io.BytesIO() + with zipfile.ZipFile(buf, "w", compression=zipfile.ZIP_DEFLATED) as zf: + for p in sorted(pd.iterdir()): + if p.is_file(): + zf.write(p, arcname=f"zone_{zone_id}_place_{place_id}/{p.name}") + return buf.getvalue() + + # ── primer support (used by gemini/script.py) ─────────── + + def load_for_primer(self, max_samples_per_place: int = 3, + resize_long_side: int = 256 + ) -> list[tuple[ZoneEntry, list[tuple[PlaceEntry, list[bytes]]]]]: + """Return [(ZoneEntry, [(PlaceEntry, [jpeg_bytes,…]), …]), …]. + + Photo lists may be empty (name+description-only place). Photos are + resized to longest-side <= resize_long_side, re-encoded JPEG q=85. + """ + out: list[tuple[ZoneEntry, list[tuple[PlaceEntry, list[bytes]]]]] = [] + for zone in self.list_zones(): + place_jpegs: list[tuple[PlaceEntry, list[bytes]]] = [] + for place in zone.places: + jpegs: list[bytes] = [] + for p in place.sample_paths[:max_samples_per_place]: + try: + raw = p.read_bytes() + except OSError: + continue + jpegs.append(self._resize_for_primer(raw, resize_long_side) or raw) + place_jpegs.append((place, jpegs)) + out.append((zone, place_jpegs)) + return out + + @staticmethod + def _resize_for_primer(raw: bytes, long_side: int) -> bytes | None: + try: + from PIL import Image # type: ignore + except Exception: + return None + try: + img = Image.open(io.BytesIO(raw)) + img.load() + if img.mode not in ("RGB", "L"): + img = img.convert("RGB") + w, h = img.size + scale = long_side / max(w, h) if max(w, h) > long_side else 1.0 + if scale < 1.0: + img = img.resize((max(1, int(w * scale)), max(1, int(h * scale))), Image.LANCZOS) + buf = io.BytesIO() + img.save(buf, format="JPEG", quality=85, optimize=True) + return buf.getvalue() + except Exception: + return None diff --git a/vendor/Sanad/voice/__init__.py b/vendor/Sanad/voice/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/vendor/Sanad/voice/audio_devices.py b/vendor/Sanad/voice/audio_devices.py new file mode 100644 index 0000000..9c747a6 --- /dev/null +++ b/vendor/Sanad/voice/audio_devices.py @@ -0,0 +1,807 @@ +"""Audio device profiles + pactl detection + selection persistence. + +Manages multiple audio device profiles (generic built-in, Hollyland wireless +mic + built-in speaker, Anker PowerConf) and lets the dashboard switch +between them at runtime. Selection is persisted to data/audio_device.json +so the choice survives restart. + +Resolution policy: + 1. User-selected profile (from data/audio_device.json) — if its sink/source + is currently plugged in, use it. + 2. Auto-detected profile based on what is currently plugged in. + 3. Built-in fallback. + +Each profile has: + - id: short identifier + - label: human-readable name + - match: substring used to find the actual pactl name (since exact names + contain serial numbers and may differ between machines) + - sink_pattern: substring matched against pactl sink names + - source_pattern: substring matched against pactl source names + - sample_rate / channels (optional defaults — read by AudioManager) +""" + +from __future__ import annotations + +import json +import os +import subprocess +import tempfile +import threading +from dataclasses import dataclass, asdict +from pathlib import Path +from typing import Any + +from Project.Sanad.config import DATA_DIR +from Project.Sanad.core.logger import get_logger + +log = get_logger("audio_devices") + +DEVICE_STATE_FILE = DATA_DIR / "audio_device.json" +_LOCK = threading.Lock() + + +@dataclass +class AudioProfile: + id: str + label: str + sink_pattern: str # substring used to find a sink + source_pattern: str # substring used to find a source + description: str = "" + sink_sample_rate: int = 0 # 0 = use device default + source_sample_rate: int = 0 + + +# Built-in device profiles. +# +# MATCHING RULES: +# - Patterns are matched case-insensitively against the FULL PulseAudio name. +# - Multiple patterns per field: comma-separated → match ANY. +# - PulseAudio names change depending on the USB port, so we match the +# product-name portion only (not the serial or port suffix). +# - Order matters: the FIRST profile whose sink AND source both match +# becomes the auto-default when no explicit selection is saved. +# +# Example PulseAudio names: +# alsa_output.platform-sound.analog-stereo (built-in speaker) +# alsa_input.platform-sound.analog-stereo (built-in mic) +# alsa_output.usb-Anker_PowerConf_A3321-DEV-SN1-01.analog-stereo (Anker speaker — SN1-01 is port-dependent) +# alsa_input.usb-Anker_PowerConf_A3321-DEV-SN1-01.mono-fallback (Anker mic) +# alsa_input.usb-Shenzhen_Hollyland_Technology_Co._Ltd_Wireless_microphone_C63X223T6MX-01.analog-stereo +# (Hollyland mic — C63X... is serial-dependent) + +PROFILES: list[AudioProfile] = [ + AudioProfile( + id="builtin", + label="Built-in mic + speaker", + sink_pattern="platform-sound", + source_pattern="alsa_input.platform-sound", + description="Jetson / G1 built-in audio chip. (Default)", + ), + AudioProfile( + id="anker_powerconf", + label="Anker PowerConf (mic + speaker)", + sink_pattern="powerconf,anker", + source_pattern="powerconf,anker", + description="Anker PowerConf USB conference unit — mic + speaker on the same device.", + ), +] + +# The profile that should be used when no saved state and no auto-detect succeeds. +DEFAULT_PROFILE_ID = "builtin" + +PROFILES_BY_ID: dict[str, AudioProfile] = {p.id: p for p in PROFILES} + + +# ───────────────────────── pactl helpers ───────────────────────── + +def _run_pactl(args: list[str], timeout: float = 1.0) -> subprocess.CompletedProcess[str]: + return subprocess.run( + ["pactl", *args], + check=False, + text=True, + capture_output=True, + timeout=timeout, + ) + + +def pactl_available() -> bool: + try: + r = _run_pactl(["info"]) + return r.returncode == 0 + except (FileNotFoundError, subprocess.SubprocessError): + return False + + +def list_sinks() -> list[dict[str, str]]: + """Return [{name, description, index}] for every sink.""" + return _list_kind("sinks") + + +def list_sources() -> list[dict[str, str]]: + return _list_kind("sources") + + +def _list_kind(kind: str) -> list[dict[str, str]]: + out: list[dict[str, str]] = [] + try: + short = _run_pactl(["list", "short", kind]) + except (FileNotFoundError, subprocess.SubprocessError) as exc: + log.warning("pactl list %s failed: %s", kind, exc) + return out + if short.returncode != 0: + return out + for raw in (short.stdout or "").splitlines(): + parts = raw.split("\t") + if len(parts) < 2: + parts = raw.split() + if len(parts) < 2: + continue + idx, name = parts[0], parts[1] + out.append({"index": idx, "name": name, "description": _description_for(kind, name)}) + return out + + +def _description_for(kind: str, name: str) -> str: + """Best-effort `pactl list s` to extract Description.""" + try: + r = _run_pactl(["list", kind]) + except (FileNotFoundError, subprocess.SubprocessError): + return "" + if r.returncode != 0: + return "" + block: list[str] = [] + found = False + for line in (r.stdout or "").splitlines(): + if line.startswith(("Sink #", "Source #")): + if found: + break + block = [] + elif line.strip().startswith("Name:") and line.strip().endswith(name): + found = True + block.append(line) + if not found: + return "" + for line in block: + s = line.strip() + if s.startswith("Description:"): + return s.split(":", 1)[1].strip() + return "" + + +def get_default_sink() -> str: + try: + r = _run_pactl(["get-default-sink"]) + return (r.stdout or "").strip() if r.returncode == 0 else "" + except (FileNotFoundError, subprocess.SubprocessError): + return "" + + +def get_default_source() -> str: + try: + r = _run_pactl(["get-default-source"]) + return (r.stdout or "").strip() if r.returncode == 0 else "" + except (FileNotFoundError, subprocess.SubprocessError): + return "" + + +def set_default_sink(name: str) -> bool: + try: + r = _run_pactl(["set-default-sink", name]) + return r.returncode == 0 + except (FileNotFoundError, subprocess.SubprocessError): + return False + + +def set_default_source(name: str) -> bool: + try: + r = _run_pactl(["set-default-source", name]) + return r.returncode == 0 + except (FileNotFoundError, subprocess.SubprocessError): + return False + + +# ───────────────────────── matching ───────────────────────── + +def find_first_match(items: list[dict[str, str]], pattern: str, + exclude_monitors: bool = False) -> dict[str, str] | None: + """Return first item whose name (case-insensitive) contains ANY of the + comma-separated patterns. + + Example: pattern="powerconf,anker" matches any name containing + "powerconf" OR "anker" (case-insensitive). + + If exclude_monitors=True, skip PulseAudio monitor sources (names ending + in ".monitor") so we don't accidentally pick a loopback instead of a real mic. + """ + if not pattern: + return None + needles = [p.strip().lower() for p in pattern.split(",") if p.strip()] + if not needles: + return None + for it in items: + name_lower = it["name"].lower() + if exclude_monitors and name_lower.endswith(".monitor"): + continue + for needle in needles: + if needle in name_lower: + return it + return None + + +# PyAudio fallback cache — avoid re-init'ing PyAudio on every poll +# (PyAudio init takes ~100 ms and the watcher polls at 1.5 s). +_PYAUDIO_CACHE: dict[str, Any] = {"ts": 0.0, "input_names": []} +_PYAUDIO_TTL_S = 2.0 + + +def _pyaudio_input_names() -> list[str]: + """Return lowercase names of all PyAudio input devices. Cached for ~2 s. + + Used as a fallback in detect_plugged_profiles() when pactl can't see + a profile's source — some USB devices (notably the Anker PowerConf on + JetPack 5) only expose their mic via the raw ALSA layer, which PyAudio + can still open even when PulseAudio's card profile is output-only. + """ + import time as _t + now = _t.time() + if now - _PYAUDIO_CACHE["ts"] < _PYAUDIO_TTL_S: + return _PYAUDIO_CACHE["input_names"] + names: list[str] = [] + try: + import pyaudio # type: ignore + pa = pyaudio.PyAudio() + try: + for i in range(pa.get_device_count()): + try: + info = pa.get_device_info_by_index(i) + except Exception: + continue + if info.get("maxInputChannels", 0) <= 0: + continue + names.append(str(info.get("name", "")).lower()) + finally: + pa.terminate() + except Exception as exc: + log.debug("PyAudio enumeration unavailable: %s", exc) + _PYAUDIO_CACHE["ts"] = now + _PYAUDIO_CACHE["input_names"] = names + return names + + +def _pyaudio_input_matches(pattern: str) -> dict[str, str] | None: + """If any PyAudio input device name matches one of the comma-separated + patterns, return a synthetic source dict (matches find_first_match()'s + shape). Else None. + """ + if not pattern: + return None + needles = [p.strip().lower() for p in pattern.split(",") if p.strip()] + if not needles: + return None + for name in _pyaudio_input_names(): + if any(n in name for n in needles): + # Synthetic — mark the origin so logs / dashboards can see it + # came from PyAudio, not pactl. Includes `description` so any + # consumer that expects the same shape as a real pactl + # source dict (`{name, description, index}`) doesn't KeyError. + return { + "name": f"pyaudio:{name}", + "driver": "pyaudio", + "description": f"PyAudio fallback — {name}", + "index": "", + } + return None + + +# Per-card cooldown for ensure_card_input_capable so a card whose firmware +# truly doesn't expose input doesn't get hammered with set-card-profile +# calls on every detection poll (every 1.5s from the live-Gemini watcher). +_CARD_PROFILE_LAST_ATTEMPT: dict[str, float] = {} +_CARD_PROFILE_COOLDOWN_S = 30.0 + + +def _parse_card_profiles(card_block: str) -> tuple[str, list[tuple[int, str, bool, bool]]]: + """Parse the `Profiles:` section of a single card stanza from + `pactl list cards`. Returns (active_profile, [(priority, name, + has_sink, has_source), ...]) — only profiles marked + `available: yes` are included. + + Profile lines look like: + \\toutput:analog-stereo+input:mono-fallback: Analog Stereo Output + Mono Input \\ + (sinks: 1, sources: 1, priority: 6501, available: yes) + """ + active = "" + profiles: list[tuple[int, str, bool, bool]] = [] + in_profiles = False + for raw in card_block.splitlines(): + line = raw.rstrip() + stripped = line.strip() + if stripped.startswith("Active Profile:"): + active = stripped[len("Active Profile:"):].strip() + continue + if stripped == "Profiles:": + in_profiles = True + continue + if in_profiles: + # End of Profiles section: next top-level key starts with non-tab + # OR an empty line. The block ends when leading whitespace drops + # to a tab/spaces shallower than the profile lines — easiest + # check: stop when we hit "Ports:" or "Active Profile:". + if stripped.startswith("Ports:") or stripped.startswith("Active Profile:"): + in_profiles = False + if stripped.startswith("Active Profile:"): + active = stripped[len("Active Profile:"):].strip() + continue + # Profile line — must contain "(sinks: N, sources: M, priority: P, available: yes)" + paren = stripped.rfind(" (") + if paren < 0 or "available: yes" not in stripped: + continue + head = stripped[:paren] + sep = head.find(": ") + if sep < 0: + continue + name = head[:sep] + props = stripped[paren+2:].rstrip(")") + sinks_n = sources_n = priority = 0 + for tok in props.split(","): + tok = tok.strip() + if tok.startswith("sinks: "): + try: sinks_n = int(tok[len("sinks: "):]) + except ValueError: pass + elif tok.startswith("sources: "): + try: sources_n = int(tok[len("sources: "):]) + except ValueError: pass + elif tok.startswith("priority: "): + try: priority = int(tok[len("priority: "):]) + except ValueError: pass + profiles.append((priority, name, sinks_n > 0, sources_n > 0)) + return active, profiles + + +def ensure_card_input_capable(card_pattern: str) -> bool: + """If a PulseAudio card whose Name matches `card_pattern` is on an + output-only profile but has an input+output profile available, switch + to the highest-priority input+output profile. + + Why: USB UAC1 conference devices (Anker PowerConf and similar) can get + pinned to an output-only profile by PulseAudio's `module-card-restore` + — observed on G1 / JetPack 5 after an earlier session left the card in + that state. The mic-capable profile is right there in the card's + advertised list (we see `output:analog-stereo+input:mono-fallback` with + `sinks: 1, sources: 1` and `available: yes`), but the active profile + is the output-only one. Calling `pactl set-card-profile` against an + already-advertised, available profile is SAFE — unlike the older + `module-alsa-source device=hw:N,0` hack, which had to guess the hw + index and broke things when it guessed wrong. + + `card_pattern` is a comma-separated substring list (same semantics as + `find_first_match`). Returns True if a switch happened OR the card was + already input-capable; False if no matching card / no switchable + profile exists. + + Rate-limited per-card via `_CARD_PROFILE_COOLDOWN_S` so a card whose + firmware genuinely can't do input doesn't get polled to death. + """ + if not pactl_available(): + return False + needles = [p.strip().lower() for p in card_pattern.split(",") if p.strip()] + if not needles: + return False + try: + r = _run_pactl(["list", "cards"]) + if r.returncode != 0: + return False + except (FileNotFoundError, subprocess.SubprocessError): + return False + + # Split into per-card blocks. `pactl list cards` separates cards with a + # blank line (and starts each with "Card #N"). + import time as _t + blocks: list[str] = [] + current: list[str] = [] + for line in (r.stdout or "").splitlines(): + if line.startswith("Card #") and current: + blocks.append("\n".join(current)) + current = [] + current.append(line) + if current: + blocks.append("\n".join(current)) + + switched_any = False + for block in blocks: + # Extract card Name + card_name = "" + for line in block.splitlines(): + s = line.strip() + if s.startswith("Name: "): + card_name = s[len("Name: "):].strip() + break + if not card_name: + continue + if not any(n in card_name.lower() for n in needles): + continue + + active, profiles = _parse_card_profiles(block) + if not profiles: + continue + + # If active profile already has input, nothing to do. + active_has_input = any( + name == active and has_src + for _, name, _, has_src in profiles + ) + if active_has_input: + return True + + # Find best input+output profile + candidates = [(prio, name) for prio, name, has_sink, has_src in profiles + if has_sink and has_src] + if not candidates: + # Card has no input+output profile (firmware truly output-only). + log.debug("ensure_card_input_capable: %s has no input+output " + "profile — nothing to switch to", card_name) + continue + + now = _t.time() + last = _CARD_PROFILE_LAST_ATTEMPT.get(card_name, 0.0) + if (now - last) < _CARD_PROFILE_COOLDOWN_S: + continue + _CARD_PROFILE_LAST_ATTEMPT[card_name] = now + + # Sort: highest priority first; on ties, alphabetical name asc so + # `mono-fallback` wins over `multichannel-input` (the source name + # `...mono-fallback` matches Sanad's hardcoded SOURCE patterns and + # the AI_Photographer setup that's known to work on this hardware). + candidates.sort(key=lambda x: (-x[0], x[1])) + target = candidates[0][1] + log.info("ensure_card_input_capable: %s active=%r → %r " + "(exposes mic to PulseAudio)", + card_name, active or "?", target) + try: + sr = _run_pactl(["set-card-profile", card_name, target]) + if sr.returncode == 0: + switched_any = True + else: + log.warning("ensure_card_input_capable: set-card-profile " + "%s %r failed: %s", card_name, target, + (sr.stderr or "").strip()) + except (FileNotFoundError, subprocess.SubprocessError) as exc: + log.warning("ensure_card_input_capable: pactl error: %s", exc) + return switched_any + + +def detect_plugged_profiles() -> list[dict[str, Any]]: + """Return all profiles whose sink AND source are currently plugged in. + + For each profile, the source is resolved in three passes: + 1. pactl list short sources (standard path) + 2. `ensure_card_input_capable` to unstick output-only PulseAudio + card profiles (Anker UAC1 quirk) — re-checks pactl sources + after the switch + 3. PyAudio device list (fallback — see _pyaudio_input_matches docstring) + + A profile counts as "plugged" if the sink matches via pactl AND a source + is found via ANY pass. + """ + sinks = list_sinks() + sources = list_sources() + detected: list[dict[str, Any]] = [] + refreshed_sources = False + for prof in PROFILES: + sink = find_first_match(sinks, prof.sink_pattern) + if not sink: + continue + src = find_first_match(sources, prof.source_pattern, exclude_monitors=True) + via = "pactl" + if src is None: + # Try to unstick the card's PulseAudio profile (most common + # cause of "sink present, source missing" on Anker). Re-list + # sources once if any switch happened — and cache for the rest + # of this detection pass so we don't re-list per profile. + switched = ensure_card_input_capable(prof.sink_pattern) + if switched and not refreshed_sources: + sources = list_sources() + refreshed_sources = True + src = find_first_match(sources, prof.source_pattern, + exclude_monitors=True) + if src is not None: + via = "pactl-after-profile-switch" + log.info("detect_plugged_profiles: %s source appeared " + "after card-profile switch: %s", + prof.id, src.get("name", "?")) + if src is None: + # Last resort — PyAudio may still see the mic. + src = _pyaudio_input_matches(prof.source_pattern) + if src is not None: + via = "pyaudio" + log.info("detect_plugged_profiles: %s source resolved via " + "PyAudio fallback (pactl missed it): %s", + prof.id, src.get("name", "?")) + if sink and src: + detected.append({ + "profile": asdict(prof), + "sink": sink, + "source": src, + "source_via": via, + }) + return detected + + +# ───────────────────────── persistence ───────────────────────── + +def load_state() -> dict[str, Any]: + """Load saved selection. Always returns a dict.""" + if not DEVICE_STATE_FILE.exists(): + return {} + try: + with open(DEVICE_STATE_FILE, "r", encoding="utf-8") as f: + return json.load(f) + except (json.JSONDecodeError, OSError) as exc: + log.warning("audio_device.json unreadable: %s", exc) + return {} + + +def save_state(state: dict[str, Any]) -> None: + """Atomic write of audio_device.json.""" + DEVICE_STATE_FILE.parent.mkdir(parents=True, exist_ok=True) + with _LOCK: + fd, tmp = tempfile.mkstemp( + prefix=f".{DEVICE_STATE_FILE.name}.", suffix=".tmp", + dir=str(DEVICE_STATE_FILE.parent), + ) + try: + with os.fdopen(fd, "w", encoding="utf-8") as f: + json.dump(state, f, indent=2) + os.replace(tmp, DEVICE_STATE_FILE) + except Exception: + try: + os.unlink(tmp) + except OSError: + pass + raise + + +# ───────────────────────── current selection ───────────────────────── + +def current_selection() -> dict[str, Any]: + """Resolve the currently active sink/source. + + Order: + 1. Saved profile selection (if its sink/source still plugged) + 2. Saved explicit sink/source pair (ONLY if both still exist in pactl — + otherwise the saved manual override is stale and we fall through) + 3. DEFAULT profile (builtin) if detected + 4. First detected profile (in declaration order) + 5. pactl defaults + 6. Empty + """ + state = load_state() + + # Detected profiles snapshot + detected = detect_plugged_profiles() if pactl_available() else [] + detected_by_id = {d["profile"]["id"]: d for d in detected} + + # 1. Saved profile preference + saved_profile = state.get("profile_id") + if saved_profile and saved_profile in detected_by_id: + d = detected_by_id[saved_profile] + return { + "source_kind": "profile", + "profile": d["profile"], + "sink": d["sink"].get("name", ""), + "source": d["source"].get("name", ""), + "sink_description": d["sink"].get("description", ""), + "source_description": d["source"].get("description", ""), + } + + # 2. Saved explicit sink/source — but VERIFY both names still exist in + # pactl before returning them. Without this check, unplugging a device + # (e.g. the Anker) leaves the dashboard showing the dead manual override + # forever, play_wav routes to a non-existent sink, PortAudio raises + # paBadIODeviceCombination, and the user gets no audio. Falling through + # here lets steps 3–5 re-resolve to whatever's actually plugged in + # (typically the builtin/chest), no user click required. + saved_sink = (state.get("sink") or "").strip() + saved_source = (state.get("source") or "").strip() + if saved_sink and saved_source: + sink_names = {s["name"] for s in (list_sinks() if pactl_available() else [])} + source_names = { + s["name"] for s in (list_sources() if pactl_available() else []) + } + if saved_sink in sink_names and saved_source in source_names: + return { + "source_kind": "manual", + "profile": None, + "sink": saved_sink, + "source": saved_source, + "sink_description": "", + "source_description": "", + } + log.info( + "current_selection: manual override stale (sink=%s present=%s, " + "source=%s present=%s) — falling through to auto-detect", + saved_sink, saved_sink in sink_names, + saved_source, saved_source in source_names, + ) + + # 3. Default profile if it is plugged in + if DEFAULT_PROFILE_ID in detected_by_id: + d = detected_by_id[DEFAULT_PROFILE_ID] + return { + "source_kind": "default", + "profile": d["profile"], + "sink": d["sink"].get("name", ""), + "source": d["source"].get("name", ""), + "sink_description": d["sink"].get("description", ""), + "source_description": d["source"].get("description", ""), + } + + # 4. First detected profile (in declaration order) + if detected: + d = detected[0] + return { + "source_kind": "auto", + "profile": d["profile"], + "sink": d["sink"].get("name", ""), + "source": d["source"].get("name", ""), + "sink_description": d["sink"].get("description", ""), + "source_description": d["source"].get("description", ""), + } + + # 5. pactl defaults (system-wide) + sink = get_default_sink() + source = get_default_source() + if sink and source: + return { + "source_kind": "pactl_default", + "profile": None, + "sink": sink, + "source": source, + "sink_description": "", + "source_description": "", + } + + # 6. Empty + return { + "source_kind": "none", + "profile": None, + "sink": "", + "source": "", + "sink_description": "", + "source_description": "", + } + + +# ───────────────────────── apply selection ───────────────────────── + +def apply_selection(sink: str, source: str) -> dict[str, Any]: + """Run pactl set-default-* and unmute. Returns {ok, errors}. + + A source name starting with `pyaudio:` is the synthetic marker emitted + by detect_plugged_profiles() when the source was resolved only via the + PyAudio fallback (PulseAudio doesn't expose the mic, but PortAudio can + open it directly via raw ALSA). We can't `pactl set-default-source` on + a synthetic name — pactl would error. So in that case we set only the + sink and skip the source; Sanad's live mic path uses PortAudio direct + via AnkerMic's substring index lookup and doesn't depend on the pactl + default source. Dashboard playback (audio_manager.play_wav) records + from whatever pactl considers default — that stays on the boot mic + until the recovery script (Path B) fully exposes Anker in PulseAudio. + """ + errors: list[str] = [] + if sink: + if not set_default_sink(sink): + errors.append(f"set-default-sink failed: {sink}") + else: + try: + _run_pactl(["set-sink-mute", sink, "0"]) + except (FileNotFoundError, subprocess.SubprocessError): + pass + if source: + if source.startswith("pyaudio:"): + log.info("apply_selection: source is PyAudio-direct (%s) — " + "skipping pactl set-default-source. Live mic path " + "uses PortAudio device match; pactl defaults stay put.", + source) + elif not set_default_source(source): + errors.append(f"set-default-source failed: {source}") + else: + try: + _run_pactl(["set-source-mute", source, "0"]) + except (FileNotFoundError, subprocess.SubprocessError): + pass + return {"ok": not errors, "errors": errors} + + +def apply_current_selection() -> dict[str, Any]: + """Resolve the current device selection (re-scanning all USB ports) and + apply it via pactl. Called at AudioManager startup and when devices change. + + This is the key function that makes audio work regardless of which USB + port the device is plugged into — it re-discovers on every call. + """ + if not pactl_available(): + return {"ok": False, "error": "pactl not available"} + cur = current_selection() + sink = cur.get("sink", "") + source = cur.get("source", "") + if not sink and not source: + return {"ok": False, "error": "no device resolved", "selection": cur} + result = apply_selection(sink, source) + result["selection"] = cur + if result["ok"]: + log.info("Audio applied — sink=%s source=%s (via %s)", + sink, source, cur.get("source_kind", "?")) + else: + log.warning("Audio apply partial — sink=%s source=%s errors=%s", + sink, source, result["errors"]) + return result + + +def select_profile(profile_id: str) -> dict[str, Any]: + """Switch to a named profile. Persists selection.""" + if profile_id not in PROFILES_BY_ID: + return {"ok": False, "error": f"Unknown profile: {profile_id}"} + + detected = detect_plugged_profiles() + detected_by_id = {d["profile"]["id"]: d for d in detected} + if profile_id not in detected_by_id: + return { + "ok": False, + "error": f"Profile '{profile_id}' is not currently plugged in", + "available": [d["profile"]["id"] for d in detected], + } + + d = detected_by_id[profile_id] + sink_name = d["sink"]["name"] + source_name = d["source"]["name"] + + apply_result = apply_selection(sink_name, source_name) + if not apply_result["ok"]: + return {"ok": False, "errors": apply_result["errors"]} + + save_state({ + "profile_id": profile_id, + "sink": sink_name, + "source": source_name, + }) + log.info("Selected audio profile: %s (sink=%s, source=%s)", profile_id, sink_name, source_name) + return { + "ok": True, + "profile": d["profile"], + "sink": sink_name, + "source": source_name, + } + + +def select_manual(sink: str, source: str) -> dict[str, Any]: + """Switch to an explicit sink/source pair (no profile).""" + apply_result = apply_selection(sink, source) + if not apply_result["ok"]: + return {"ok": False, "errors": apply_result["errors"]} + save_state({"profile_id": None, "sink": sink, "source": source}) + log.info("Selected manual audio: sink=%s source=%s", sink, source) + return {"ok": True, "sink": sink, "source": source} + + +# ───────────────────────── status ───────────────────────── + +def status() -> dict[str, Any]: + """One-shot status for the dashboard.""" + pa = pactl_available() + detected = detect_plugged_profiles() if pa else [] + detected_ids = [d["profile"]["id"] for d in detected] + cur = current_selection() + return { + "pactl_available": pa, + "current": cur, + "saved_state": load_state(), + "profiles": [asdict(p) for p in PROFILES], + "detected": detected, + "detected_ids": detected_ids, + "all_sinks": list_sinks() if pa else [], + "all_sources": list_sources() if pa else [], + "default_sink": get_default_sink() if pa else "", + "default_source": get_default_source() if pa else "", + } diff --git a/vendor/Sanad/voice/audio_io.py b/vendor/Sanad/voice/audio_io.py new file mode 100644 index 0000000..7b0bdc3 --- /dev/null +++ b/vendor/Sanad/voice/audio_io.py @@ -0,0 +1,761 @@ +"""Hardware-agnostic audio I/O for Sanad voice pipelines. + +Provides a uniform Mic / Speaker interface so the model layer (Gemini +today, or any future alternative) doesn't need to know which physical +audio path is active. Pick a pairing via `AudioIO.from_profile()`: + + builtin → G1 UDP multicast mic + AudioClient.PlayStream + anker → Anker PowerConf USB mic + speaker (PyAudio) + hollyland_builtin → Hollyland wireless mic + G1 built-in speaker + +Mics deliver int16 mono PCM at 16 kHz. +Speakers accept int16 mono PCM plus a `source_rate` and resample +internally if the hardware runs at a different rate. + +Usage: + + audio = AudioIO.from_profile("builtin", audio_client=ac) + audio.start() + try: + chunk = audio.mic.read_chunk(1024) # mic + audio.speaker.begin_stream() # speaker + audio.speaker.send_chunk(pcm_24k, 24000) + audio.speaker.wait_finish() + finally: + audio.stop() +""" + +from __future__ import annotations + +import json +import socket +import struct +import subprocess +import threading +import time +from abc import ABC, abstractmethod +from dataclasses import dataclass, field +from typing import Any, Optional, Union + +import numpy as np + +try: + import pyaudio + _HAS_PYAUDIO = True +except ImportError: + pyaudio = None + _HAS_PYAUDIO = False + +from Project.Sanad.core.config_loader import section as _cfg_section +from Project.Sanad.core.logger import get_logger + +log = get_logger("audio_io") + +_MIC_CFG = _cfg_section("voice", "mic_udp") +_SP_CFG = _cfg_section("voice", "speaker") + +TARGET_MIC_RATE = 16_000 + +_MCAST_GRP = _MIC_CFG.get("group", "239.168.123.161") +_MCAST_PORT = _MIC_CFG.get("port", 5555) +_MIC_BUF_MAX = _MIC_CFG.get("buffer_max_bytes", 64_000) +_MIC_READ_TIMEOUT = _MIC_CFG.get("read_timeout_sec", 0.04) + +PCMLike = Union[bytes, bytearray, memoryview, np.ndarray] + + +def _find_g1_local_ip() -> str: + """Find the host IPv4 address on the G1's internal 192.168.123.0/24 network.""" + out = subprocess.run( + ["ip", "-4", "-o", "addr"], capture_output=True, text=True, + ).stdout + for line in out.splitlines(): + for tok in line.split(): + if tok.startswith("192.168.123."): + return tok.split("/")[0] + raise RuntimeError("no 192.168.123.x interface found") + + +def _resample_int16(pcm: np.ndarray, src_rate: int, dst_rate: int) -> np.ndarray: + if src_rate == dst_rate or pcm.size == 0: + return pcm.astype(np.int16, copy=False) + target_len = max(1, int(len(pcm) * dst_rate / src_rate)) + return np.interp( + np.linspace(0, len(pcm), target_len, endpoint=False), + np.arange(len(pcm)), + pcm.astype(np.float64), + ).astype(np.int16) + + +def _as_int16_array(pcm: PCMLike) -> np.ndarray: + if isinstance(pcm, np.ndarray): + return pcm.astype(np.int16, copy=False) + return np.frombuffer(bytes(pcm), dtype=np.int16) + + +# ─── Protocols ──────────────────────────────────────────── + +class Mic(ABC): + sample_rate: int = TARGET_MIC_RATE + + @abstractmethod + def start(self) -> None: ... + @abstractmethod + def read_chunk(self, num_bytes: int) -> bytes: ... + @abstractmethod + def flush(self) -> None: ... + @abstractmethod + def stop(self) -> None: ... + + +class Speaker(ABC): + @abstractmethod + def begin_stream(self) -> None: ... + + @abstractmethod + def send_chunk(self, pcm: PCMLike, source_rate: int) -> None: + """Queue PCM for playback. `source_rate` is the sample rate of `pcm`.""" + + @abstractmethod + def wait_finish(self) -> None: ... + @abstractmethod + def stop(self) -> None: ... + + @property + @abstractmethod + def interrupted(self) -> bool: ... + + @property + def total_sent_sec(self) -> float: + return 0.0 + + +# ─── G1 built-in (UDP mic + AudioClient speaker) ────────── + +class BuiltinMic(Mic): + """G1 robot's on-board mic published over UDP multicast.""" + + sample_rate = TARGET_MIC_RATE + + def __init__(self, group: str = _MCAST_GRP, port: int = _MCAST_PORT, + buf_max: int = _MIC_BUF_MAX): + self._group = group + self._port = port + self._buf_max = buf_max + self._sock: Optional[socket.socket] = None + self._buf = bytearray() + self._lock = threading.Lock() + self._running = False + self._thread: Optional[threading.Thread] = None + + def start(self) -> None: + local_ip = _find_g1_local_ip() + self._sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + self._sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + self._sock.bind(("", self._port)) + mreq = struct.pack( + "4s4s", + socket.inet_aton(self._group), + socket.inet_aton(local_ip), + ) + self._sock.setsockopt(socket.IPPROTO_IP, socket.IP_ADD_MEMBERSHIP, mreq) + self._sock.settimeout(1.0) + self._running = True + self._thread = threading.Thread(target=self._recv_loop, daemon=True) + self._thread.start() + log.info("BuiltinMic joined %s:%d on %s", self._group, self._port, local_ip) + + def _recv_loop(self) -> None: + while self._running: + try: + data, _ = self._sock.recvfrom(4096) + with self._lock: + self._buf.extend(data) + if len(self._buf) > self._buf_max: + del self._buf[:len(self._buf) - self._buf_max] + except socket.timeout: + continue + except Exception: + if self._running: + time.sleep(0.01) + + def read_chunk(self, num_bytes: int) -> bytes: + deadline = time.time() + _MIC_READ_TIMEOUT + while time.time() < deadline: + with self._lock: + if len(self._buf) >= num_bytes: + chunk = bytes(self._buf[:num_bytes]) + del self._buf[:num_bytes] + return chunk + time.sleep(0.003) + with self._lock: + avail = len(self._buf) + if avail > 0: + chunk = bytes(self._buf[:avail]) + del self._buf[:avail] + return chunk + b"\x00" * (num_bytes - avail) + return b"\x00" * num_bytes + + def flush(self) -> None: + with self._lock: + self._buf.clear() + + def stop(self) -> None: + self._running = False + if self._sock is not None: + try: + self._sock.close() + except Exception: + pass + self._sock = None + + +class BuiltinSpeaker(Speaker): + """G1 robot's built-in speaker via AudioClient.PlayStream (16 kHz mono).""" + + HARDWARE_RATE = 16_000 + + def __init__(self, audio_client: Any, app_name: Optional[str] = None): + self._ac = audio_client + try: + self._ac.SetVolume(100) + except Exception: + log.warning("BuiltinSpeaker.SetVolume failed") + self._app_name = app_name or _SP_CFG.get("app_name", "sanad") + self._begin_pause = _SP_CFG.get("begin_stream_pause_sec", 0.15) + self._finish_margin = _SP_CFG.get("wait_finish_margin_sec", 0.3) + self._stop_flag = threading.Event() + self._stream_id: Optional[str] = None + self._total_sent = 0.0 + self._play_start = 0.0 + + def _stop_play_api(self) -> None: + try: + from unitree_sdk2py.g1.audio.g1_audio_api import ( + ROBOT_API_ID_AUDIO_STOP_PLAY, + ) + self._ac._Call( + ROBOT_API_ID_AUDIO_STOP_PLAY, + json.dumps({"app_name": self._app_name}), + ) + except Exception: + log.warning("BuiltinSpeaker AUDIO_STOP_PLAY failed") + + def begin_stream(self) -> None: + self._stop_flag.clear() + self._stop_play_api() + time.sleep(self._begin_pause) + self._stream_id = f"s_{int(time.time() * 1000)}" + self._total_sent = 0.0 + self._play_start = time.time() + + def send_chunk(self, pcm: PCMLike, source_rate: int) -> None: + if self._stop_flag.is_set(): + return + arr = _as_int16_array(pcm) + if arr.size < 10: + return + hw = _resample_int16(arr, source_rate, self.HARDWARE_RATE) + self._ac.PlayStream(self._app_name, self._stream_id, hw.tobytes()) + self._total_sent += len(hw) / self.HARDWARE_RATE + + def wait_finish(self) -> None: + elapsed = time.time() - self._play_start + remaining = self._total_sent - elapsed + self._finish_margin + waited = 0.0 + while waited < remaining and not self._stop_flag.is_set(): + time.sleep(0.1) + waited += 0.1 + self._stop_play_api() + + def stop(self) -> None: + self._stop_flag.set() + self._stop_play_api() + + @property + def interrupted(self) -> bool: + return self._stop_flag.is_set() + + @property + def total_sent_sec(self) -> float: + return self._total_sent + + +# ─── PyAudio-backed mic/speaker ─────────────────────────── + +class _PyAudioMic(Mic): + """Shared base for PulseAudio/ALSA input — matches device by name pattern.""" + + sample_rate = TARGET_MIC_RATE + + def __init__(self, device_pattern: str, label: str, + frames_per_buffer: int = 512): + if not _HAS_PYAUDIO: + raise RuntimeError(f"{label}Mic requires pyaudio") + self._device_pattern = device_pattern + self._label = label + self._frames_per_buffer = frames_per_buffer + self._pa: Optional["pyaudio.PyAudio"] = None + self._stream = None + self._running = False + self._buf = bytearray() + self._lock = threading.Lock() + self._thread: Optional[threading.Thread] = None + + def _resolve_device_index(self) -> Optional[int]: + """Pick the PyAudio input device to open. + + Preference order: + 1. PortAudio's 'pulse' device — routes capture through + PulseAudio's default source. We MUST use this on USB UAC1 + cards (Anker PowerConf, Hollyland) — opening hw:N,0 + directly grabs ALSA exclusively, which makes PulseAudio's + module-alsa-card lose the device. That drops the card + from `pactl list`, the parent's audio watcher then sees + "anker unplugged" within seconds, and reverts the brain + to the boot profile (chest speaker). The dashboard's + "Apply" call already pins PulseAudio's default + source/sink to the matching Anker device, so opening + 'pulse' lands on the right hardware. + 2. PortAudio's 'default' device — also pulse-routed on a + standard Pulse-on-Linux setup, used as a fallback. + 3. Substring match against the device_pattern (legacy + direct-hw path) — only used when neither 'pulse' nor + 'default' is enumerated by PortAudio, which would only + happen on a system without Pulse. Logs a WARN because + this is the path that triggers the bug above. + """ + if self._pa is None: + return None + pulse_idx = default_idx = pattern_idx = None + pattern_name = "" + patterns = [p.strip().lower() + for p in self._device_pattern.split(",") if p.strip()] + for i in range(self._pa.get_device_count()): + info = self._pa.get_device_info_by_index(i) + if info.get("maxInputChannels", 0) <= 0: + continue + name_lower = str(info.get("name", "")).lower() + if pulse_idx is None and name_lower == "pulse": + pulse_idx = i + elif default_idx is None and name_lower == "default": + default_idx = i + if pattern_idx is None and any(n in name_lower for n in patterns): + pattern_idx = i + pattern_name = name_lower + if pulse_idx is not None: + return pulse_idx + if default_idx is not None: + return default_idx + if pattern_idx is not None: + log.warning( + "%sMic falling back to direct ALSA device '%s' " + "(no 'pulse'/'default' device exposed by PortAudio) — " + "this grabs the card exclusively and may cause PulseAudio " + "to drop it; consider installing the ALSA pulse plugin", + self._label, pattern_name, + ) + return pattern_idx + return None + + def start(self) -> None: + self._pa = pyaudio.PyAudio() + idx = self._resolve_device_index() + # Log which device we picked so a "wrong sink" symptom is easy + # to attribute. Includes the device name (e.g. 'pulse' vs hw:N) + # since the index alone tells you nothing useful in a tail. + try: + picked = self._pa.get_device_info_by_index(idx) if idx is not None else {} + picked_name = picked.get("name", "?") + except Exception: + picked_name = "?" + self._stream = self._pa.open( + format=pyaudio.paInt16, + channels=1, + rate=self.sample_rate, + input=True, + input_device_index=idx, + frames_per_buffer=self._frames_per_buffer, + ) + self._running = True + self._thread = threading.Thread(target=self._recv_loop, daemon=True) + self._thread.start() + log.info("%sMic started (device_index=%s name=%r)", + self._label, idx, picked_name) + + def _recv_loop(self) -> None: + while self._running: + try: + data = self._stream.read( + self._frames_per_buffer, exception_on_overflow=False, + ) + with self._lock: + self._buf.extend(data) + if len(self._buf) > _MIC_BUF_MAX: + del self._buf[:len(self._buf) - _MIC_BUF_MAX] + except Exception: + if self._running: + time.sleep(0.01) + + def read_chunk(self, num_bytes: int) -> bytes: + deadline = time.time() + _MIC_READ_TIMEOUT + while time.time() < deadline: + with self._lock: + if len(self._buf) >= num_bytes: + chunk = bytes(self._buf[:num_bytes]) + del self._buf[:num_bytes] + return chunk + time.sleep(0.003) + with self._lock: + avail = len(self._buf) + if avail > 0: + chunk = bytes(self._buf[:avail]) + del self._buf[:avail] + return chunk + b"\x00" * (num_bytes - avail) + return b"\x00" * num_bytes + + def flush(self) -> None: + with self._lock: + self._buf.clear() + + def stop(self) -> None: + self._running = False + if self._stream is not None: + try: + self._stream.stop_stream() + self._stream.close() + except Exception: + pass + self._stream = None + if self._pa is not None: + try: + self._pa.terminate() + except Exception: + pass + self._pa = None + + +class AnkerMic(_PyAudioMic): + def __init__(self): + super().__init__(device_pattern="powerconf,anker", label="Anker") + + +class HollylandMic(_PyAudioMic): + def __init__(self): + super().__init__( + device_pattern="hollyland,wireless_microphone", + label="Hollyland", + ) + + +class _PyAudioSpeaker(Speaker): + """PulseAudio/ALSA output — opens a fresh output stream per begin_stream().""" + + def __init__(self, device_pattern: str, label: str): + if not _HAS_PYAUDIO: + raise RuntimeError(f"{label}Speaker requires pyaudio") + self._device_pattern = device_pattern + self._label = label + self._pa: Optional["pyaudio.PyAudio"] = None + self._stream = None + self._stream_rate: Optional[int] = None + self._stop_flag = threading.Event() + self._total_sent = 0.0 + # Serialises every touch of self._stream / self._pa. PortAudio's + # ALSA→pulse plugin is NOT re-entrant: a concurrent snd_pcm_close + # (from stop()/wait_finish()) while another thread is inside + # snd_pcm_writei (from send_chunk()) corrupts the pulse mainloop + # heap — observed as `malloc_consolidate(): invalid chunk size` + # on barge-in. RLock so stop()→wait_finish() nesting is safe. + self._lock = threading.RLock() + # Sticky teardown signal — once stop() has run, refuse to + # lazy-reopen the stream from a late send_chunk on the same + # instance (the swap path replaces the instance entirely). + self._closed = False + + def _resolve_device_index(self) -> Optional[int]: + """Pick the PyAudio output device to open. + + Mirrors `_PyAudioMic._resolve_device_index` — see that method's + docstring for the rationale. Short version: prefer 'pulse' so + playback goes through PulseAudio's default sink (which the + dashboard's Apply pins to the active profile's sink); only fall + back to direct hw:N if PulseAudio isn't wired into PortAudio at + all. Grabbing hw:N exclusively makes PulseAudio drop the card + and the parent's audio watcher will then revert the brain to + the boot profile within seconds. + """ + if self._pa is None: + return None + pulse_idx = default_idx = pattern_idx = None + pattern_name = "" + patterns = [p.strip().lower() + for p in self._device_pattern.split(",") if p.strip()] + for i in range(self._pa.get_device_count()): + info = self._pa.get_device_info_by_index(i) + if info.get("maxOutputChannels", 0) <= 0: + continue + name_lower = str(info.get("name", "")).lower() + if pulse_idx is None and name_lower == "pulse": + pulse_idx = i + elif default_idx is None and name_lower == "default": + default_idx = i + if pattern_idx is None and any(n in name_lower for n in patterns): + pattern_idx = i + pattern_name = name_lower + if pulse_idx is not None: + return pulse_idx + if default_idx is not None: + return default_idx + if pattern_idx is not None: + log.warning( + "%sSpeaker falling back to direct ALSA device '%s' " + "(no 'pulse'/'default' device exposed by PortAudio) — " + "this grabs the card exclusively and may cause PulseAudio " + "to drop it; consider installing the ALSA pulse plugin", + self._label, pattern_name, + ) + return pattern_idx + return None + + # USB-native rate for the underlying card. PortAudio's ALSA backend + # (the only backend available in conda's PyAudio build on Jetson) + # opens via the ALSA 'pulse' plugin, which on this system DOES NOT + # advertise rate conversion in `snd_pcm_hw_params` — opening at the + # source rate (24 kHz from Gemini TTS, etc.) gets rejected with + # paInvalidSampleRate. We pin the stream rate to the card's native + # 48 kHz and resample chunks app-side before writing. Same approach + # `_play_pcm_via_g1` uses for the DDS path. + _STREAM_TARGET_RATE = 48_000 + + def _open_stream(self, _ignored_rate: int) -> None: + idx = self._resolve_device_index() + try: + picked = self._pa.get_device_info_by_index(idx) if idx is not None else {} + picked_name = picked.get("name", "?") + except Exception: + picked_name = "?" + # ALWAYS open at _STREAM_TARGET_RATE — see class docstring above. + self._stream = self._pa.open( + format=pyaudio.paInt16, + channels=1, + rate=self._STREAM_TARGET_RATE, + output=True, + output_device_index=idx, + ) + self._stream_rate = self._STREAM_TARGET_RATE + log.info("%sSpeaker output opened (device_index=%s name=%r, rate=%d " + "— chunks resampled to this rate)", + self._label, idx, picked_name, self._STREAM_TARGET_RATE) + + def begin_stream(self) -> None: + # Hold the lock so a concurrent stop()/wait_finish() (from the + # barge-in path or a swap drain) cannot interleave with the + # flag clear + PyAudio init — which would otherwise re-enable + # writes against a stream the teardown is about to close. + with self._lock: + if self._closed: + # Speaker was torn down for swap or session end; do not + # revive on the same instance. Caller swap_audio_devices + # replaces the instance entirely. + return + self._stop_flag.clear() + self._total_sent = 0.0 + if self._pa is None: + self._pa = pyaudio.PyAudio() + + def _resample_mono16(self, arr, src_rate: int, dst_rate: int): + """Linear interp resample of mono int16. Returns ndarray (int16). + No-op when rates match. numpy-only — matches _play_pcm_via_g1.""" + import numpy as _np # local — keep top-level import surface unchanged + if src_rate == dst_rate or arr.size == 0: + return arr + n_out = max(1, int(arr.size * dst_rate / src_rate)) + return _np.interp( + _np.linspace(0, arr.size, n_out, endpoint=False), + _np.arange(arr.size, dtype=_np.float64), + arr.astype(_np.float64), + ).astype(_np.int16) + + def send_chunk(self, pcm: PCMLike, source_rate: int) -> None: + # Cheap pre-check OUTSIDE the lock — avoids ever taking the + # lock for empty/late chunks once a stop has fired. Event + + # bool reads are atomic. + if self._stop_flag.is_set() or self._closed: + return + arr = _as_int16_array(pcm) + if arr.size < 10: + return + # Resample BEFORE acquiring the lock — pure CPU, no shared + # state, keeps the critical section to just the PortAudio write + # so a concurrent stop() doesn't wait on numpy work. + if source_rate != self._STREAM_TARGET_RATE: + arr = self._resample_mono16(arr, source_rate, self._STREAM_TARGET_RATE) + payload = arr.tobytes() + sent_sec = len(arr) / self._STREAM_TARGET_RATE + with self._lock: + # CRITICAL re-check inside the lock: stop() may have run + # between our pre-check and acquiring the lock. Without + # this, the lazy-open below would resurrect a stream that + # barge-in just tore down — defeating the whole fix. + if self._stop_flag.is_set() or self._closed: + return + if self._pa is None: + self._pa = pyaudio.PyAudio() + if self._stream is None: + # Pass any rate — _open_stream ignores it and always + # opens at _STREAM_TARGET_RATE. + self._open_stream(source_rate) + stream = self._stream # snapshot — wait_finish nulls under same lock + if stream is None: # _open_stream failed + return + try: + stream.write(payload) + self._total_sent += sent_sec + except Exception as exc: + log.warning("%sSpeaker write failed: %s", self._label, exc) + + def wait_finish(self) -> None: + with self._lock: + stream = self._stream + # Null the ref BEFORE close so a racing send_chunk (waiting + # on the lock) re-checks and bails instead of touching a + # half-closed handle. Double-close-safe: if another caller + # already nulled it, we do nothing. + self._stream = None + self._stream_rate = None + if stream is not None: + try: + stream.stop_stream() + except Exception: + pass + try: + stream.close() + except Exception: + pass + + def stop(self) -> None: + # Set the flag FIRST (outside the lock — Event is atomic) so a + # concurrent send_chunk on another thread sees teardown ASAP + # even before it tries to acquire the lock. Then take the + # RLock and finish teardown; wait_finish re-enters the RLock + # safely. + self._stop_flag.set() + with self._lock: + self._closed = True + self.wait_finish() + + @property + def interrupted(self) -> bool: + return self._stop_flag.is_set() + + @property + def total_sent_sec(self) -> float: + return self._total_sent + + +class AnkerSpeaker(_PyAudioSpeaker): + def __init__(self): + super().__init__(device_pattern="powerconf,anker", label="Anker") + + +# ─── Factory ────────────────────────────────────────────── + +_PROFILE_ALIASES = { + "builtin": "builtin", + "g1_builtin": "builtin", + "g1": "builtin", + "anker": "anker", + "anker_powerconf": "anker", + "hollyland": "hollyland_builtin", + "hollyland_builtin": "hollyland_builtin", +} + +SUPPORTED_PROFILES = ("builtin", "anker", "hollyland_builtin") + + +@dataclass +class AudioIO: + mic: Mic + speaker: Speaker + profile_id: str = field(default="builtin") + # Kept on the instance so the brain can rebuild a profile that needs + # the DDS handle (`builtin`, `hollyland_builtin`) during a hot-swap — + # without re-init'ing the channel. `repr=False` keeps it out of logs. + _audio_client: Optional[Any] = field(default=None, repr=False, compare=False) + + def start(self) -> None: + self.mic.start() + + def stop(self) -> None: + try: + self.speaker.stop() + except Exception: + log.warning("AudioIO speaker.stop failed", exc_info=True) + try: + self.mic.stop() + except Exception: + log.warning("AudioIO mic.stop failed", exc_info=True) + + @classmethod + def build_backends( + cls, + profile_id: str, + *, + audio_client: Optional[Any] = None, + ) -> tuple[Mic, Speaker]: + """Return a fresh (Mic, Speaker) pair for a profile WITHOUT wrapping + in an AudioIO. Used by GeminiBrain.swap_audio_devices() for the + hot-swap path: build a new pair, switch refs, tear down the old. + + Same validation as from_profile(). `audio_client` is required for + profiles that route playback through the G1 chest speaker. + """ + raw = (profile_id or "").strip().lower() + resolved = _PROFILE_ALIASES.get(raw) + if resolved is None: + raise ValueError( + f"unknown audio profile {profile_id!r}; " + f"supported: {', '.join(SUPPORTED_PROFILES)}" + ) + if resolved == "builtin": + if audio_client is None: + raise ValueError( + "profile 'builtin' requires audio_client (G1 AudioClient)" + ) + return BuiltinMic(), BuiltinSpeaker(audio_client) + if resolved == "anker": + return AnkerMic(), AnkerSpeaker() + if resolved == "hollyland_builtin": + if audio_client is None: + raise ValueError( + "profile 'hollyland_builtin' uses the G1 speaker — " + "requires audio_client" + ) + return HollylandMic(), BuiltinSpeaker(audio_client) + raise AssertionError(f"unhandled resolved profile: {resolved!r}") + + @classmethod + def from_profile( + cls, + profile_id: str, + *, + audio_client: Optional[Any] = None, + ) -> "AudioIO": + """Build an AudioIO for the requested profile. + + `audio_client` is the initialised `unitree_sdk2py` `AudioClient` and + is required for any profile that speaks through the G1's on-board + speaker (`builtin`, `hollyland_builtin`). It's also retained on the + returned AudioIO so a later hot-swap can rebuild without re-init. + """ + raw = (profile_id or "").strip().lower() + resolved = _PROFILE_ALIASES.get(raw) + if resolved is None: + raise ValueError( + f"unknown audio profile {profile_id!r}; " + f"supported: {', '.join(SUPPORTED_PROFILES)}" + ) + mic, speaker = cls.build_backends(resolved, audio_client=audio_client) + return cls(mic=mic, speaker=speaker, profile_id=resolved, + _audio_client=audio_client) diff --git a/vendor/Sanad/voice/audio_manager.py b/vendor/Sanad/voice/audio_manager.py new file mode 100644 index 0000000..bff89cf --- /dev/null +++ b/vendor/Sanad/voice/audio_manager.py @@ -0,0 +1,931 @@ +"""Audio I/O manager — recording and playback via PyAudio. + +Handles microphone capture, speaker playback, and speaker-monitor recording. +Thread-safe; one playback at a time via play_lock. + +Device selection is dynamic — read from voice.audio_devices on each refresh. +""" + +from __future__ import annotations + +import json +import subprocess +import threading +import time +import wave +from pathlib import Path +from typing import Any + +try: + import numpy as np + _HAS_NUMPY = True +except ImportError: + np = None + _HAS_NUMPY = False + +try: + import pyaudio +except ImportError: + pyaudio = None # optional — only needed for local PCM playback + +# G1 AudioClient — used to route playback through the robot chest speaker +# via DDS `PlayStream` (the same pipe Gemini uses). Without this, WAV +# playback would go to the Jetson's built-in audio codec, which isn't +# wired to any audible output on the G1. +try: + from unitree_sdk2py.g1.audio.g1_audio_client import AudioClient + from unitree_sdk2py.g1.audio.g1_audio_api import ( + ROBOT_API_ID_AUDIO_STOP_PLAY, + ) + _HAS_G1_AUDIO = True +except ImportError: + AudioClient = None + ROBOT_API_ID_AUDIO_STOP_PLAY = 0 + _HAS_G1_AUDIO = False + +from Project.Sanad.config import ( + CHANNELS, + CHUNK_SIZE, + RECEIVE_SAMPLE_RATE, + SINK as DEFAULT_SINK, + SOURCE as DEFAULT_SOURCE, + MONITOR_SOURCE, +) +from Project.Sanad.core.logger import get_logger +from Project.Sanad.voice import audio_devices as ad + +log = get_logger("audio_manager") + +FORMAT = pyaudio.paInt16 if pyaudio else 8 + +# Cached current selection — updated by refresh_devices() +_DEVICE_LOCK = threading.Lock() +_current_sink = DEFAULT_SINK +_current_source = DEFAULT_SOURCE + + +def _run_pactl(args: list[str]) -> subprocess.CompletedProcess[str]: + return subprocess.run(["pactl", *args], check=True, text=True, capture_output=True) + + +def _resolve_devices() -> tuple[str, str]: + """Return current (sink, source) — falls back to config defaults.""" + try: + cur = ad.current_selection() + sink = cur.get("sink") or DEFAULT_SINK + source = cur.get("source") or DEFAULT_SOURCE + return sink, source + except Exception as exc: + log.warning("Could not resolve audio devices: %s", exc) + return DEFAULT_SINK, DEFAULT_SOURCE + + +def ensure_audio_defaults(): + """Re-scan all USB ports, resolve the active profile, set pactl defaults. + + This is called at startup AND before every playback/recording so that + even if the user unplugs/re-plugs a device into a different port, the + correct sink/source is always used. + """ + try: + result = ad.apply_current_selection() + cur = result.get("selection", {}) + sink = cur.get("sink", "") + source = cur.get("source", "") + with _DEVICE_LOCK: + global _current_sink, _current_source + _current_sink = sink or DEFAULT_SINK + _current_source = source or DEFAULT_SOURCE + except Exception as exc: + log.warning("Audio defaults not applied: %s", exc) + + +class _PulseOpenFailed(RuntimeError): + """Signal from `_play_pcm_via_pulse` that PortAudio refused to open the + output stream (sink gone, bad I/O combination, etc.) — lets `play_wav` + fall back to G1 DDS chest playback so the user still hears the clip.""" + + +class AudioManager: + def __init__(self): + if pyaudio is None: + raise RuntimeError( + "pyaudio not installed — AudioManager cannot play local PCM. " + "Install with `pip install pyaudio` (needs portaudio headers), " + "or rely on the G1 speaker via AudioClient.PlayStream." + ) + self.pya = pyaudio.PyAudio() + self.play_lock = threading.Lock() + # Lazily-initialised G1 DDS audio client (for play_wav → chest speaker) + self._g1_audio_client: Any = None + # G1 playback state — present during an active play_wav() call, + # None when idle. Mutated by pause_playback/resume_playback/stop_playback + # from other threads while _play_pcm_via_g1 holds play_lock. + self._play_state_lock = threading.Lock() + self._play_state: dict[str, Any] | None = None + # Resolve devices and set PulseAudio defaults at startup + self.refresh_devices() + ensure_audio_defaults() + + def _get_g1_audio_client(self): + """Return a cached G1 AudioClient (DDS) — creates on first use. + + Assumes `ChannelFactoryInitialize` has already been called (our + ArmController does this at startup on eth0). Returns None if the + Unitree SDK is unavailable or init fails. + """ + if not _HAS_G1_AUDIO: + return None + if self._g1_audio_client is not None: + return self._g1_audio_client + try: + c = AudioClient() + c.SetTimeout(5.0) + c.Init() + try: + c.SetVolume(100) + except Exception: + pass + self._g1_audio_client = c + log.info("G1 AudioClient initialized (for chest-speaker playback)") + except Exception as exc: + log.warning("G1 AudioClient init failed: %s", exc) + self._g1_audio_client = None + return self._g1_audio_client + + def refresh_devices(self) -> dict[str, str]: + """Re-read selected sink/source from audio_devices module.""" + sink, source = _resolve_devices() + with _DEVICE_LOCK: + global _current_sink, _current_source + _current_sink, _current_source = sink, source + log.info("AudioManager devices refreshed: sink=%s source=%s", sink, source) + return {"sink": sink, "source": source} + + @property + def current_sink(self) -> str: + with _DEVICE_LOCK: + return _current_sink + + @property + def current_source(self) -> str: + with _DEVICE_LOCK: + return _current_source + + def close(self): + self.pya.terminate() + + def sample_width(self) -> int: + return self.pya.get_sample_size(FORMAT) + + # -- playback -- + + def play_pcm(self, pcm_bytes: bytes, channels: int, sample_rate: int, sample_width: int): + with self.play_lock: + ensure_audio_defaults() + stream = self.pya.open( + format=self.pya.get_format_from_width(sample_width), + channels=channels, + rate=sample_rate, + output=True, + frames_per_buffer=CHUNK_SIZE, + ) + try: + frame_bytes = CHUNK_SIZE * channels * sample_width + for offset in range(0, len(pcm_bytes), frame_bytes): + stream.write(pcm_bytes[offset : offset + frame_bytes]) + finally: + stream.stop_stream() + stream.close() + + # Sink-name substrings that mean "PulseAudio routes this somewhere + # audible without DDS" — extend the tuple to add more USB cards (e.g. + # hollyland sink). Matched case-insensitively. + _PULSE_SINK_MARKERS = ("anker", "powerconf", "hollyland") + # Sample rate Anker PowerConf (and most USB UAC1 cards) accept natively + # — used as the resample target before opening a PortAudio stream so + # we don't hit paInvalidSampleRate when the WAV's native rate + # (24kHz from Gemini TTS, 22050 from old TTS, etc.) doesn't match + # the card's HW caps. + _PULSE_TARGET_RATE = 48_000 + + @staticmethod + def _resample_pcm16(pcm_bytes: bytes, channels: int, + src_rate: int, dst_rate: int) -> bytes: + """Linear-interpolation resample of int16 PCM. numpy-only (no scipy) + — matches the pattern used by `_play_pcm_via_g1`. + + Returns the resampled PCM bytes (same channel layout). No-op when + rates already match. Requires numpy (caller guards with _HAS_NUMPY). + """ + if src_rate == dst_rate or not pcm_bytes: + return pcm_bytes + arr = np.frombuffer(pcm_bytes, dtype=np.int16) + if channels > 1: + # De-interleave so each channel resamples independently + # (cheap on numpy; avoids stereo→mono surprises). + if arr.size % channels != 0: + arr = arr[: arr.size - (arr.size % channels)] + arr = arr.reshape(-1, channels) + n_in = arr.shape[0] + n_out = max(1, int(n_in * dst_rate / src_rate)) + xp = np.arange(n_in, dtype=np.float64) + x_new = np.linspace(0, n_in, n_out, endpoint=False) + cols = [ + np.interp(x_new, xp, arr[:, ch].astype(np.float64)) + for ch in range(channels) + ] + out = np.column_stack(cols).astype(np.int16) + return out.tobytes() + n_in = arr.size + n_out = max(1, int(n_in * dst_rate / src_rate)) + out = np.interp( + np.linspace(0, n_in, n_out, endpoint=False), + np.arange(n_in, dtype=np.float64), + arr.astype(np.float64), + ).astype(np.int16) + return out.tobytes() + + def _active_sink_name(self) -> str: + """Return the currently-tracked default sink name, ORIGINAL case + preserved. + + Reads `self.current_sink` which is kept in lock-step with pactl + defaults by `refresh_devices()` (called by the dashboard Apply + endpoint and by the live-Gemini watcher on profile swaps). Empty + string if nothing's tracked yet. + + IMPORTANT: PulseAudio sink names are CASE-SENSITIVE. paplay + --device= needs the exact name pactl uses (e.g. + `alsa_output.usb-Anker_PowerConf_A3321-DEV-SN1-01.analog-stereo`). + Routing-decision substring checks (against `_PULSE_SINK_MARKERS`) + lowercase BOTH sides explicitly so the case-sensitivity of the + sink name doesn't break marker matching. + """ + try: + return (self.current_sink or "").strip() + except Exception: + return "" + + def play_wav(self, path: Path, + record_name: str | None = None) -> dict[str, Any]: + """Play a WAV file through the speaker that matches the active + PulseAudio default sink: + + • Default sink is a USB conference speaker (Anker PowerConf, + Hollyland, anything matching `_PULSE_SINK_MARKERS`) → write + via PyAudio → PortAudio 'pulse' device → PulseAudio default + sink. This works even when the user picked the device via + the dashboard's "Manual sink/source override" (no profile + id) — we key off the sink name, not the profile. + • Default sink is the Jetson platform-sound (or anything that + doesn't match a marker) → use G1 DDS (`AudioClient.PlayStream`) + because platform-sound isn't wired to any audible speaker on + the G1; only the DDS pipe reaches the chest loudspeaker. + + `record_name` is purely a label surfaced via `playback_status()` + so the dashboard can show "Now playing: t6_1" etc. + """ + with wave.open(str(path), "rb") as wf: + channels = wf.getnchannels() + sw = wf.getsampwidth() + rate = wf.getframerate() + data = wf.readframes(wf.getnframes()) + + sink = self._active_sink_name() + sink_lc = sink.lower() + # Marker check is case-insensitive; the original `sink` (with case + # preserved) is what gets passed to paplay --device. + use_pulse = any(m in sink_lc for m in self._PULSE_SINK_MARKERS) + client = self._get_g1_audio_client() if not use_pulse else None + + if not use_pulse and client is not None and _HAS_NUMPY and sw == 2: + log.info("play_wav route=g1_dds sink=%s record=%s", + sink or "?", record_name or "?") + self._play_pcm_via_g1(data, channels, rate, record_name=record_name) + route = "g1_dds" + else: + if not use_pulse and _HAS_G1_AUDIO and client is None: + log.warning("play_wav: non-PulseAudio sink but G1 AudioClient " + "unavailable — falling back to PulseAudio default") + # Prefer paplay subprocess when it's installed — bypasses + # PortAudio (which on this Jetson's conda env doesn't expose a + # 'pulse' device, leading to PyAudio defaulting to the silent + # Jetson platform-sound card). paplay routes through PulseAudio + # at the daemon level so audio actually reaches the Anker sink. + use_paplay = bool(self._paplay_binary()) + try: + if use_paplay: + log.info("play_wav route=paplay sink=%s record=%s", + sink or "default", record_name or "?") + self._play_pcm_via_paplay(data, channels, rate, sw, + record_name=record_name) + route = "paplay" + else: + log.info("play_wav route=pulse sink=%s record=%s " + "(paplay not installed — using PyAudio)", + sink or "default", record_name or "?") + self._play_pcm_via_pulse(data, channels, rate, sw, + record_name=record_name) + route = "pulse" + except _PulseOpenFailed as exc: + # paplay spawn failed, USB device gone mid-flight, etc. + # Fall back to DDS chest if available so the user gets + # audio out of *something* rather than silence. + fb_client = self._get_g1_audio_client() + if fb_client is not None and _HAS_NUMPY and sw == 2: + log.warning("play_wav route=%s failed (%s); falling " + "back to g1_dds", + "paplay" if use_paplay else "pulse", exc) + self._play_pcm_via_g1(data, channels, rate, + record_name=record_name) + route = ("paplay" if use_paplay else "pulse") + "_failed_to_g1_dds" + else: + log.warning("play_wav pulse path failed (%s); no DDS " + "fallback available", exc) + route = ("paplay" if use_paplay else "pulse") + "_failed" + + duration = len(data) / (rate * channels * sw) if rate else 0 + return {"path": str(path), "duration_seconds": round(duration, 3), + "route": route, "sink": sink or "default"} + + # -- G1 DDS-routed playback -- + + _G1_STREAM_APP = "sanad_playback" + _G1_HW_RATE = 16_000 + + def stop_playback(self) -> None: + """Stop any in-flight G1 DDS audio stream + tear down the playback + state so a pause/resume cycle can't keep trying. + + Used by the dashboard's Stop button. Safe to call even when + nothing is playing — the DDS call is idempotent. + """ + with self._play_state_lock: + if self._play_state is not None: + self._play_state["stop"] = True + client = self._get_g1_audio_client() + if client is None: + return + try: + client._Call( + ROBOT_API_ID_AUDIO_STOP_PLAY, + json.dumps({"app_name": self._G1_STREAM_APP}), + ) + log.info("G1 audio stream stopped (app=%s)", self._G1_STREAM_APP) + except Exception as exc: + log.warning("stop_playback failed: %s", exc) + + def pause_playback(self) -> dict[str, Any]: + """Pause the active G1 playback. The play loop notices the flag, + sends STOP_PLAY to halt the chest speaker, and advances the saved + position by the time elapsed since this chunk started. resume() + re-pushes from there. No-op if nothing is playing.""" + with self._play_state_lock: + if self._play_state is None: + return {"ok": False, "reason": "nothing playing"} + if self._play_state["paused"]: + return {"ok": True, "already": True, "paused": True} + self._play_state["paused"] = True + log.info("Playback paused (record=%s)", + self._play_state.get("record_name") or "?") + return {"ok": True, "paused": True} + + def resume_playback(self) -> dict[str, Any]: + """Resume after a pause. The play loop re-pushes pcm[pos:] to G1 + and re-enters the wait/poll cycle.""" + with self._play_state_lock: + if self._play_state is None: + return {"ok": False, "reason": "nothing playing"} + if not self._play_state["paused"]: + return {"ok": True, "already": True, "paused": False} + self._play_state["paused"] = False + log.info("Playback resumed (record=%s)", + self._play_state.get("record_name") or "?") + return {"ok": True, "resumed": True} + + def playback_status(self) -> dict[str, Any]: + """Snapshot of the current playback for the dashboard. Returns + `playing=False` when idle. `position_sec` is best-effort — + derived from elapsed wall time since the last PlayStream call.""" + with self._play_state_lock: + if self._play_state is None: + return {"playing": False, "paused": False, "record_name": None, + "position_sec": 0.0, "duration_sec": 0.0} + rate = self._play_state["rate"] or 1 + total = self._play_state["total_samples"] + pos = self._play_state["pos"] + if (not self._play_state["paused"] + and self._play_state["play_started_at"] > 0): + elapsed = time.time() - self._play_state["play_started_at"] + advance = int(max(0.0, elapsed) * rate) + pos = min(self._play_state["play_started_pos"] + advance, total) + return { + "playing": True, + "paused": self._play_state["paused"], + "record_name": self._play_state.get("record_name"), + "position_sec": round(pos / rate, 2), + "duration_sec": round(total / rate, 2) if rate else 0.0, + } + + def _play_pcm_via_g1(self, pcm_bytes: bytes, channels: int, + source_rate: int, + record_name: str | None = None) -> None: + """Stream int16 PCM to the G1 chest speaker via AudioClient.PlayStream, + with pause / resume / stop support. + + Converts stereo → mono and resamples to 16 kHz (the rate + AudioClient expects). The play loop pushes pcm[pos:] in one + PlayStream call, then polls _play_state every 50 ms while the + clip drains so pause / stop are honoured promptly. Pause sends + STOP_PLAY, snapshots the position from elapsed wall time, then + loops until resumed or stopped. Resume re-pushes pcm[pos:]. + """ + client = self._get_g1_audio_client() + if client is None: + raise RuntimeError("G1 AudioClient not available") + + arr = np.frombuffer(pcm_bytes, dtype=np.int16) + if channels == 2 and arr.size % 2 == 0: + arr = arr.reshape(-1, 2).mean(axis=1).astype(np.int16) + if source_rate != self._G1_HW_RATE and arr.size: + target_len = max(1, int(len(arr) * self._G1_HW_RATE / source_rate)) + arr = np.interp( + np.linspace(0, len(arr), target_len, endpoint=False), + np.arange(len(arr)), + arr.astype(np.float64), + ).astype(np.int16) + rate = self._G1_HW_RATE + total_samples = len(arr) + + with self._play_state_lock: + self._play_state = { + "record_name": record_name, + "rate": rate, + "total_samples": total_samples, + "pos": 0, + "paused": False, + "stop": False, + "play_started_at": 0.0, + "play_started_pos": 0, + } + + # play_lock serialises overlapping play_wav() calls; pause/resume/stop + # do NOT take it (they only touch _play_state under _play_state_lock). + with self.play_lock: + try: + while True: + # Snapshot the state for this iteration + with self._play_state_lock: + st = self._play_state + if st is None or st["stop"]: + break + if st["paused"]: + paused_now = True + sub_bytes = None + sub_total_sec = 0.0 + else: + paused_now = False + pos = st["pos"] + if pos >= total_samples: + break + sub_bytes = arr[pos:].tobytes() + sub_total_sec = (total_samples - pos) / rate + st["play_started_pos"] = pos + st["play_started_at"] = time.time() + + if paused_now: + time.sleep(0.1) + continue + + # Push remainder to G1 + stream_id = f"wav_{int(time.time() * 1000)}" + try: + client._Call( + ROBOT_API_ID_AUDIO_STOP_PLAY, + json.dumps({"app_name": self._G1_STREAM_APP}), + ) + except Exception: + pass + time.sleep(0.15) + client.PlayStream(self._G1_STREAM_APP, stream_id, sub_bytes) + + # Poll for pause / stop while the clip drains + poll_deadline = time.time() + sub_total_sec + 0.3 + interrupted = False + while time.time() < poll_deadline: + with self._play_state_lock: + if self._play_state is None or self._play_state["stop"]: + interrupted = True + try: + client._Call( + ROBOT_API_ID_AUDIO_STOP_PLAY, + json.dumps({"app_name": self._G1_STREAM_APP}), + ) + except Exception: + pass + break + if self._play_state["paused"]: + # Halt G1 and snapshot the new position + try: + client._Call( + ROBOT_API_ID_AUDIO_STOP_PLAY, + json.dumps({"app_name": self._G1_STREAM_APP}), + ) + except Exception: + pass + elapsed = (time.time() + - self._play_state["play_started_at"]) + advance = int(max(0.0, elapsed) * rate) + self._play_state["pos"] = min( + self._play_state["play_started_pos"] + advance, + total_samples, + ) + interrupted = True + break + time.sleep(0.05) + + if not interrupted: + # Finished naturally — mark fully consumed and exit + with self._play_state_lock: + if self._play_state is not None: + self._play_state["pos"] = total_samples + try: + client._Call( + ROBOT_API_ID_AUDIO_STOP_PLAY, + json.dumps({"app_name": self._G1_STREAM_APP}), + ) + except Exception: + pass + break + finally: + with self._play_state_lock: + self._play_state = None + + # paplay binary path. Cached on first probe so we don't keep re-shelling + # `which paplay` on every play_wav call. None = probe pending; "" = absent. + _PAPLAY_BIN: str | None = None + + @classmethod + def _paplay_binary(cls) -> str: + """Return the absolute path to `paplay` if installed, else "". + Cached for the lifetime of the process — paplay doesn't appear/ + disappear mid-run.""" + if cls._PAPLAY_BIN is None: + from shutil import which + cls._PAPLAY_BIN = which("paplay") or "" + return cls._PAPLAY_BIN + + def _play_pcm_via_paplay(self, pcm_bytes: bytes, channels: int, + sample_rate: int, sample_width: int, + record_name: str | None = None) -> None: + """Play int16 PCM via the `paplay` subprocess. Bypasses PortAudio + entirely — we just pipe raw PCM into paplay's stdin and let + PulseAudio do the resampling/format conversion/device routing. + + Why this exists: on conda's bundled PyAudio (the build shipped in + the gemini_sdk env on this Jetson), PortAudio does NOT enumerate a + 'pulse' device — only direct ALSA hw:N entries. Opening + `output_device_index=None` then defaults to hw:0 which is the + Jetson `platform-sound` card → silent (not wired to any speaker). + Opening a discrete `hw:N` for the Anker grabs the card exclusively + and PulseAudio drops it. Neither path actually plays through the + Anker. paplay sidesteps the whole stack. + + Targets the dashboard's currently-selected sink by name via + `--device=`, which guarantees the audio goes to the same + place pactl set-default-sink would have routed. + + Reuses the same `_play_state` machinery as the DDS path so the + dashboard's Pause / Stop / position-meter behave identically. + """ + sink_name = self._active_sink_name() + bytes_per_sample = max(1, channels * sample_width) + total_bytes = len(pcm_bytes) - (len(pcm_bytes) % bytes_per_sample) + total_samples = total_bytes // bytes_per_sample + chunk_bytes = max( + bytes_per_sample, (sample_rate // 10) * bytes_per_sample, + ) + # paplay format codes: s16le is the only one we ever produce here. + fmt = "s16le" if sample_width == 2 else \ + "s32le" if sample_width == 4 else \ + "u8" + # Keep cmd minimal — older paplay versions reject unknown long + # options and exit immediately (manifests as instant paplay death + + # a flood of BrokenPipeError on stdin write). --raw / --format / + # --rate / --channels / --device are all standard since 0.9.x. + cmd = [ + self._paplay_binary(), "--raw", + f"--format={fmt}", f"--rate={sample_rate}", + f"--channels={channels}", + ] + if sink_name: + cmd.extend(["--device", sink_name]) + + with self._play_state_lock: + self._play_state = { + "record_name": record_name, + "rate": sample_rate, + "total_samples": total_samples, + "pos": 0, + "paused": False, + "stop": False, + "play_started_at": 0.0, + "play_started_pos": 0, + } + + with self.play_lock: + try: + while True: + with self._play_state_lock: + st = self._play_state + if st is None or st["stop"]: + break + if st["paused"]: + time.sleep(0.1) + continue + pos = st["pos"] + if pos >= total_samples: + break + st["play_started_pos"] = pos + st["play_started_at"] = time.time() + + byte_pos = pos * bytes_per_sample + local_pos = pos + try: + proc = subprocess.Popen( + cmd, stdin=subprocess.PIPE, + stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, + ) + except Exception as exc: + log.warning("paplay spawn failed (%s) — signalling " + "DDS fallback", exc) + with self._play_state_lock: + self._play_state = None + raise _PulseOpenFailed(str(exc)) from exc + + # Brief settle so paplay can validate args + connect to + # PulseAudio. If it's going to die (bad sink, format, + # connection refused), it dies within ~50ms. Without + # this check, the next stdin.write() would get a sea + # of BrokenPipeError messages and the outer loop would + # keep re-spawning forever. + time.sleep(0.05) + if proc.poll() is not None: + try: + err = (proc.stderr.read() or b"").decode( + "utf-8", "replace").strip()[:400] + except Exception: + err = "" + log.warning("paplay died immediately rc=%d device=%s err=%s", + proc.returncode, sink_name or "default", err) + with self._play_state_lock: + self._play_state = None + raise _PulseOpenFailed( + f"paplay rc={proc.returncode} {err or 'no stderr'}" + ) + + interrupted = False + fatal_exc: Exception | None = None + try: + while byte_pos < total_bytes: + with self._play_state_lock: + ps = self._play_state + if ps is None or ps["stop"]: + interrupted = True + break + if ps["paused"]: + ps["pos"] = local_pos + interrupted = True + break + end = min(byte_pos + chunk_bytes, total_bytes) + try: + proc.stdin.write(pcm_bytes[byte_pos:end]) + proc.stdin.flush() + except (BrokenPipeError, OSError) as exc: + # paplay died mid-stream (USB unplugged, + # PulseAudio crashed, etc.). Abort entire + # clip — DO NOT let the outer loop respawn + # paplay; we just got hundreds of + # broken-pipe lines as a result of that bug. + try: + err = (proc.stderr.read() or b"").decode( + "utf-8", "replace").strip()[:400] + except Exception: + err = "" + log.warning("paplay died mid-stream (%s) " + "device=%s stderr=%s", + exc, sink_name or "default", err) + fatal_exc = _PulseOpenFailed( + f"paplay died: {err or exc}") + break + byte_pos = end + local_pos = byte_pos // bytes_per_sample + finally: + try: + proc.stdin.close() + except Exception: + pass + if interrupted or fatal_exc is not None: + proc.terminate() + try: + rc = proc.wait(timeout=3.0) + except subprocess.TimeoutExpired: + proc.kill() + rc = -1 + if rc != 0 and not interrupted and fatal_exc is None: + # Drained successfully but paplay exited non-zero + # — surface stderr so the failure isn't silent. + try: + err = (proc.stderr.read() or b"").decode( + "utf-8", "replace").strip()[:300] + except Exception: + err = "" + log.warning("paplay exit rc=%d device=%s err=%s", + rc, sink_name or "default", err) + + if fatal_exc is not None: + # Re-raise OUTSIDE the inner try/finally so play_wav + # catches it and falls back to G1 DDS chest. Without + # this, the outer `while True` loop would respawn + # paplay and we'd loop indefinitely. + with self._play_state_lock: + self._play_state = None + raise fatal_exc + + if not interrupted: + with self._play_state_lock: + if self._play_state is not None: + self._play_state["pos"] = total_samples + break + finally: + with self._play_state_lock: + self._play_state = None + + def _play_pcm_via_pulse(self, pcm_bytes: bytes, channels: int, + sample_rate: int, sample_width: int, + record_name: str | None = None) -> None: + """Play int16 PCM via PyAudio (→ PulseAudio default sink) with + pause / resume / stop support. + + Mirrors `_play_pcm_via_g1`'s state-poll pattern so the dashboard's + Play / Pause / Stop / Position buttons behave identically whether + the active profile uses DDS or PyAudio. Writes ~100 ms chunks so + pause / stop latency is bounded. + """ + # Make sure pactl defaults reflect the current selection — this is + # a no-op when the watcher or dashboard Apply already aligned them. + ensure_audio_defaults() + + # Resample to a USB-native rate before opening the stream. + # PortAudio's ALSA backend (the one PyAudio uses) opens the underlying + # hardware via the ALSA 'pulse' plugin, which on this Jetson does + # NOT advertise rate conversion in `snd_pcm_hw_params` — so opening + # at the WAV's native rate (24kHz from Gemini TTS, etc.) gets + # rejected with paInvalidSampleRate. Resampling app-side mirrors + # what `_play_pcm_via_g1` already does for the DDS path. Anker + # PowerConf and most USB UAC1 cards report 48kHz s16le stereo + # natively, so target that. + if _HAS_NUMPY and sample_width == 2 and sample_rate != self._PULSE_TARGET_RATE: + try: + pcm_bytes = self._resample_pcm16( + pcm_bytes, channels, sample_rate, self._PULSE_TARGET_RATE, + ) + log.info("_play_pcm_via_pulse: resampled %dHz → %dHz " + "(USB card native rate)", + sample_rate, self._PULSE_TARGET_RATE) + sample_rate = self._PULSE_TARGET_RATE + except Exception as exc: + log.warning("_play_pcm_via_pulse: resample failed (%s) — " + "trying native rate, may hit paInvalidSampleRate", + exc) + + bytes_per_sample = max(1, channels * sample_width) + total_bytes = len(pcm_bytes) - (len(pcm_bytes) % bytes_per_sample) + total_samples = total_bytes // bytes_per_sample + chunk_bytes = max(bytes_per_sample, (sample_rate // 10) * bytes_per_sample) + + with self._play_state_lock: + self._play_state = { + "record_name": record_name, + "rate": sample_rate, + "total_samples": total_samples, + "pos": 0, + "paused": False, + "stop": False, + "play_started_at": 0.0, + "play_started_pos": 0, + } + + # play_lock serialises overlapping play_wav() calls; pause/resume/stop + # only touch _play_state under _play_state_lock so they don't block. + with self.play_lock: + try: + while True: + # Snapshot — decide whether to play, wait, or exit + with self._play_state_lock: + st = self._play_state + if st is None or st["stop"]: + break + if st["paused"]: + paused_now = True + pos = 0 + else: + paused_now = False + pos = st["pos"] + if pos >= total_samples: + break + st["play_started_pos"] = pos + st["play_started_at"] = time.time() + if paused_now: + time.sleep(0.1) + continue + + byte_pos = pos * bytes_per_sample + local_pos = pos + try: + stream = self.pya.open( + format=self.pya.get_format_from_width(sample_width), + channels=channels, + rate=sample_rate, + output=True, + frames_per_buffer=CHUNK_SIZE, + ) + except Exception as exc: + # PortAudio open failed (sink gone, paBadIODevice + # combination, etc.). Signal the caller so play_wav + # can fall back to DDS chest rather than silently + # dropping the clip. + log.warning("Pulse playback open failed: %s — " + "signalling caller for DDS fallback", exc) + with self._play_state_lock: + self._play_state = None + raise _PulseOpenFailed(str(exc)) from exc + interrupted = False + try: + while byte_pos < total_bytes: + with self._play_state_lock: + ps = self._play_state + if ps is None or ps["stop"]: + interrupted = True + break + if ps["paused"]: + ps["pos"] = local_pos + interrupted = True + break + end = min(byte_pos + chunk_bytes, total_bytes) + try: + stream.write(pcm_bytes[byte_pos:end]) + except Exception as exc: + log.warning("Pulse playback write failed: %s", exc) + interrupted = True + break + byte_pos = end + local_pos = byte_pos // bytes_per_sample + finally: + try: + stream.stop_stream() + stream.close() + except Exception: + pass + + if not interrupted: + with self._play_state_lock: + if self._play_state is not None: + self._play_state["pos"] = total_samples + break + # Interrupted by pause → outer loop will wait for resume + # or exit on stop. Interrupted by stop → outer loop exits. + finally: + with self._play_state_lock: + self._play_state = None + + # -- recording -- + + def record_mic(self, duration_sec: float) -> bytes: + """Record from default mic for *duration_sec* seconds, return raw PCM.""" + ensure_audio_defaults() + stream = self.pya.open( + format=FORMAT, + channels=CHANNELS, + rate=RECEIVE_SAMPLE_RATE, + input=True, + frames_per_buffer=CHUNK_SIZE, + ) + frames: list[bytes] = [] + total_chunks = int(RECEIVE_SAMPLE_RATE / CHUNK_SIZE * duration_sec) + try: + for _ in range(total_chunks): + frames.append(stream.read(CHUNK_SIZE, exception_on_overflow=False)) + finally: + stream.stop_stream() + stream.close() + return b"".join(frames) + + def save_wav(self, pcm_bytes: bytes, path: Path, channels: int, sample_rate: int): + path.parent.mkdir(parents=True, exist_ok=True) + with wave.open(str(path), "wb") as wf: + wf.setnchannels(channels) + wf.setsampwidth(self.sample_width()) + wf.setframerate(sample_rate) + wf.writeframes(pcm_bytes) diff --git a/vendor/Sanad/voice/live_voice.py b/vendor/Sanad/voice/live_voice.py new file mode 100644 index 0000000..1075841 --- /dev/null +++ b/vendor/Sanad/voice/live_voice.py @@ -0,0 +1,73 @@ +"""Live Voice Commands — voice-to-arm phrase trigger dispatcher. + +Listens to GeminiSubprocess user transcripts, matches against +sanad_arm.txt phrases, and fires ARM.trigger_action_by_id. + +Endpoints: + POST /start begin polling transcripts + POST /stop stop polling + POST /deferred-mode?enabled toggle instant vs deferred trigger + POST /trigger-enabled?enabled master gate — allow arm actions or not + GET /status running, last heard, last action, etc. + GET /triggers arm trigger history (log) +""" + +from __future__ import annotations + +from fastapi import APIRouter, HTTPException + +router = APIRouter() + + +def _loop(): + from Project.Sanad.main import live_voice + if live_voice is None: + raise HTTPException(503, "LiveVoiceLoop not initialized.") + return live_voice + + +@router.get("/status") +async def status(): + from Project.Sanad.main import live_voice + if live_voice is None: + return {"available": False} + return {"available": True, **live_voice.status()} + + +@router.post("/start") +async def start(): + loop = _loop() + await loop.start() + return {"ok": True, **loop.status()} + + +@router.post("/stop") +async def stop(): + loop = _loop() + await loop.stop() + return {"ok": True, **loop.status()} + + +@router.post("/deferred-mode") +async def set_deferred(enabled: bool): + loop = _loop() + loop.set_deferred(enabled) + return {"ok": True, "deferred_mode": loop.deferred_mode} + + +@router.post("/trigger-enabled") +async def set_trigger_enabled(enabled: bool): + """Master gate for voice → arm triggering. Default OFF.""" + loop = _loop() + loop.set_trigger_enabled(enabled) + return {"ok": True, "trigger_enabled": loop.trigger_enabled} + + +@router.get("/triggers") +async def triggers(): + loop = _loop() + return { + "triggers": list(loop.triggers), + "total": len(loop.triggers), + "dispatch_actions": len(loop.wake_dispatch), + } diff --git a/vendor/Sanad/voice/live_voice_loop.py b/vendor/Sanad/voice/live_voice_loop.py new file mode 100644 index 0000000..e22ffae --- /dev/null +++ b/vendor/Sanad/voice/live_voice_loop.py @@ -0,0 +1,300 @@ +"""LiveVoiceLoop — voice-to-arm phrase trigger dispatcher. + +Listens to user transcriptions from the GeminiSubprocess and, when a +configured wake phrase is matched, fires the corresponding arm action via +`motion.sanad_arm_controller.ARM`. + +Mode toggle ("DEFERRED TRIGGER"): + - fire_on_wake_match=True fires the arm instantly on phrase match + (fast, no coordination with AI speech) + - fire_on_wake_match=False marks a pending action that fires when the + AI starts/finishes its reply (visually nicer + — robot answers, then moves) + +This is Option-D integration: parallel to skill_registry, uses the full +gemini_interact phrase dictionary (sanad_arm.txt, 29 arm IDs, hundreds of +Arabic phrase variants). +""" + +from __future__ import annotations + +import threading +import time +from collections import deque +from datetime import datetime +from types import SimpleNamespace +from typing import Any + +from Project.Sanad.config import SCRIPTS_DIR, BASE_DIR +from Project.Sanad.core.config_loader import section as _cfg_section +from Project.Sanad.core.logger import get_logger +from Project.Sanad.voice.text_utils import ( + load_arm_phrase_dispatch, + maybe_trigger_arm, +) + +log = get_logger("live_voice_loop") + +_LV_CFG = _cfg_section("voice", "live_voice_loop") +# Filename from core.script_files (single source) — resolved under SCRIPTS_DIR +_SCRIPTS = _cfg_section("core", "script_files") +_ARM_TXT_NAME = _SCRIPTS.get("arm_phrases", "sanad_arm.txt") +SANAD_ARM_TXT = SCRIPTS_DIR / _ARM_TXT_NAME +TRIGGER_LOG_SIZE = _LV_CFG.get("trigger_log_size", 100) +POLL_INTERVAL_SEC = _LV_CFG.get("poll_interval_sec", 0.1) +DEFERRED_DEFAULT = _LV_CFG.get("deferred_default", False) +TRIGGER_ENABLED_DEFAULT = bool(_LV_CFG.get("trigger_enabled_default", False)) + + +class LiveVoiceLoop: + """Polls GeminiSubprocess transcripts → fires arm actions.""" + + def __init__(self, voice_client, arm, wake_mgr, audio_mgr): + self.voice_client = voice_client + self.arm = arm # Sanad's motion/arm_controller (not used for trigger) + self.wake_mgr = wake_mgr + self.audio_mgr = audio_mgr + + self._running = False + self._poll_thread: threading.Thread | None = None + self._stop_event = threading.Event() + + # Deferred-trigger toggle (fire on phrase match vs fire after AI responds) + self.deferred_mode = DEFERRED_DEFAULT + # Master arm-trigger gate — when False, transcripts are still + # captured (you can watch them on the dashboard) but NO arm + # actions fire. Defaults to OFF so the robot doesn't move + # unexpectedly until the operator opts in. + self.trigger_enabled = TRIGGER_ENABLED_DEFAULT + + # Trigger history (dashboard log) + self.triggers: deque[dict[str, Any]] = deque(maxlen=TRIGGER_LOG_SIZE) + self.last_heard: str = "" + self.last_action: str = "" + + # ASR dispatch state (SimpleNamespace — maybe_trigger_arm mutates attrs) + self.state = SimpleNamespace() + + # Load sanad_arm.txt on first construction + self.wake_dispatch: dict[int, set[str]] = {} + self.option_by_id: dict[int, Any] = {} + self.sanad_arm: Any = None + self._load_dispatch() + + # Snapshot of already-processed transcript lines so we don't re-fire + self._seen_transcripts: set[str] = set() + + # ── phrase dispatch loader ──────────────────────────────────── + def _load_dispatch(self): + try: + from Project.Sanad.motion.sanad_arm_controller import ARM, OPTION_LIST, OPTION_BY_ID + self.sanad_arm = ARM + self.option_by_id = OPTION_BY_ID + # Voice-trigger policy: SDK built-ins only. + # JSONL replays (option.file set) are dashboard-only — voice + # phrase blocks for laugh/bird/change_battery/move_* never + # reach `wake_dispatch`, so a matched phrase for one of those + # silently no-ops in voice mode. + sdk_only_options = [o for o in OPTION_LIST if not getattr(o, "file", "")] + if SANAD_ARM_TXT.exists(): + self.wake_dispatch = load_arm_phrase_dispatch(SANAD_ARM_TXT, sdk_only_options) + log.info( + "loaded %d arm-action phrase sets from %s " + "(SDK-only filter: %d/%d options)", + len(self.wake_dispatch), SANAD_ARM_TXT.name, + len(sdk_only_options), len(OPTION_LIST), + ) + else: + log.warning("sanad_arm.txt missing at %s — arm trigger disabled", + SANAD_ARM_TXT) + except Exception as exc: + log.warning("arm dispatch unavailable: %s", exc) + self.sanad_arm = None + self.wake_dispatch = {} + + # ── lifecycle ──────────────────────────────────────────────── + async def start(self) -> None: + if self._running: + return + self._stop_event.clear() + self._running = True + self._poll_thread = threading.Thread( + target=self._poll_loop, daemon=True, name="live_voice_loop") + self._poll_thread.start() + log.info("LiveVoiceLoop started (deferred=%s, dispatch=%d)", + self.deferred_mode, len(self.wake_dispatch)) + + async def stop(self) -> None: + self._stop_event.set() + self._running = False + log.info("LiveVoiceLoop stopped") + + def set_deferred(self, enabled: bool) -> None: + self.deferred_mode = bool(enabled) + + def set_trigger_enabled(self, enabled: bool) -> None: + """Master arm-trigger gate. When False, phrase matches are ignored. + + Toggle semantics (no queue memory across the gate): + - Always clears any in-flight pending trigger so a late + fallback fire can't happen after disable/enable. + - On enable: snapshots every transcript currently in the + live_sub deque as already-seen. Only NEW speech after this + moment will dispatch — phrases said while the gate was off + don't suddenly fire when you turn it back on. + """ + self.trigger_enabled = bool(enabled) + + # Drop pending fallback timer — a queued "fire in 0.6s" from + # before the toggle must not leak across. + self.state._pending_arm_wave = False + self.state._pending_arm_wave_fired = False + self.state._pending_arm_trigger_fn = None + self.state._pending_arm_fallback_time = 0.0 + + snapshotted = 0 + if self.trigger_enabled: + try: + from Project.Sanad.main import live_sub + if live_sub is not None: + for txt in list(live_sub.user_transcript): + if txt and txt not in self._seen_transcripts: + self._seen_transcripts.add(txt) + snapshotted += 1 + except Exception as exc: + log.warning("set_trigger_enabled: snapshot failed: %s", exc) + + log.info("trigger_enabled=%s (pending cleared, %d transcripts marked seen)", + self.trigger_enabled, snapshotted) + + # ── poll loop ──────────────────────────────────────────────── + def _poll_loop(self): + """Poll GeminiSubprocess.user_transcript for new user texts AND + fire any deferred-mode arm trigger whose fallback timer elapsed. + + Without the pending-check, a deferred trigger (`fire_on_wake_match= + False`) would only fire when the NEXT transcript arrives — so if + the user says one sentence and stops, the arm never moves. + """ + while not self._stop_event.is_set(): + self._check_transcripts() + self._check_pending_trigger() + self._stop_event.wait(POLL_INTERVAL_SEC) + + def _check_pending_trigger(self): + """Fire a queued deferred trigger if its fallback time has passed.""" + # Master gate — same check as _dispatch + if not self.trigger_enabled: + return + if not getattr(self.state, "_pending_arm_wave", False): + return + if getattr(self.state, "_pending_arm_wave_fired", False): + return + fn = getattr(self.state, "_pending_arm_trigger_fn", None) + if fn is None: + return + fallback_at = float(getattr(self.state, "_pending_arm_fallback_time", 0.0) or 0.0) + if fallback_at <= 0.0 or time.time() < fallback_at: + return + # Gate on arm idle — skip fire if a motion is already running + if self.sanad_arm is not None and getattr(self.sanad_arm, "_is_busy", False): + return + try: + fn() + except Exception as exc: + log.warning("deferred arm trigger failed: %s", exc) + finally: + self.state._pending_arm_wave_fired = True + self.state._pending_arm_wave = False + self.state._pending_arm_trigger_fn = None + + def _check_transcripts(self): + try: + from Project.Sanad.main import live_sub + except Exception: + return + if live_sub is None: + return + # Pull recent transcripts + for text in list(live_sub.user_transcript): + if text in self._seen_transcripts: + continue + self._seen_transcripts.add(text) + self.last_heard = text + self._dispatch(text) + + # Prune seen set when subprocess stops to free memory + if not live_sub.is_running() and len(self._seen_transcripts) > 500: + self._seen_transcripts.clear() + + def _dispatch(self, transcript_text: str) -> None: + if not self.wake_dispatch or self.sanad_arm is None: + return + # Master gate — skip arm triggering entirely when disabled + if not self.trigger_enabled: + return + # Gate trigger on arm idle + if getattr(self.sanad_arm, "_is_busy", False): + return + + fire_now = not self.deferred_mode + + for action_id, phrases in self.wake_dispatch.items(): + fn = self._make_trigger_fn(action_id) + fired = maybe_trigger_arm( + self.state, transcript_text, phrases, + fire_on_wake_match=fire_now, + arm_trigger_fn=fn, + ) + if fired: + self._record_trigger(action_id, transcript_text, fire_now) + break + + def _make_trigger_fn(self, action_id: int): + def _fire(): + try: + self.sanad_arm.trigger_action_by_id(action_id) + except Exception as exc: + log.warning("arm trigger failed (id=%d): %s", action_id, exc) + return _fire + + def _record_trigger(self, action_id: int, user_text: str, fired_now: bool): + opt = self.option_by_id.get(action_id) + action_name = opt.name if opt else f"id={action_id}" + self.last_action = action_name + self.triggers.append({ + "time": datetime.now().strftime("%H:%M:%S"), + "user_text": user_text, + "action_id": action_id, + "action_name": action_name, + "mode": "instant" if fired_now else "deferred", + }) + log.info("arm trigger %s (id=%d) for: %r [%s]", + action_name, action_id, user_text, + "instant" if fired_now else "deferred") + + # ── status (dashboard) ─────────────────────────────────────── + def status(self) -> dict[str, Any]: + pending = "" + pend_name = "" + if getattr(self.state, "_pending_arm_wave", False): + if self.state._pending_arm_trigger_fn is not None: + # We can't introspect the action id from fn (closure), but + # the last triggered line in self.triggers is likely the one. + pend_name = self.triggers[-1].get("action_name", "") if self.triggers else "" + pending = f"pending: {pend_name}" + + return { + "running": self._running, + "deferred_mode": self.deferred_mode, + "trigger_enabled": self.trigger_enabled, + "last_heard": self.last_heard, + "pending_action": pending, + "last_action": self.last_action, + "audio_attached": self.audio_mgr is not None, + "arm_attached": self.sanad_arm is not None, + "gemini_connected": bool( + self.voice_client and self.voice_client.connected), + "dispatch_actions": len(self.wake_dispatch), + "triggers": list(self.triggers)[-30:], + } diff --git a/vendor/Sanad/voice/local_tts.py b/vendor/Sanad/voice/local_tts.py new file mode 100644 index 0000000..115c21a --- /dev/null +++ b/vendor/Sanad/voice/local_tts.py @@ -0,0 +1,128 @@ +"""Local Arabic TTS using MBZUAI/speecht5_tts_clartts_ar (SpeechT5 fine-tuned on CLArTTS). + +Loads model/vocoder/speaker-embedding from the local Model/ directory. +Lazy-loads on first call so the webserver starts quickly. + +Output: 16 kHz mono int16 PCM bytes (matching WAV conventions). +""" + +from __future__ import annotations + +import re +import threading +from pathlib import Path +from typing import Any + +# ── Local paths (all pre-downloaded under model/) — sourced from config ── +try: + from Project.Sanad.core.config_loader import section as _cfg_section + _TTS = _cfg_section("voice", "local_tts") +except Exception: + _TTS = {} + +_PROJECT_DIR = Path(__file__).resolve().parent.parent # Sanad/ +_MODEL_ROOT = _PROJECT_DIR / "model" +MODEL_DIR = _MODEL_ROOT / _TTS.get("model_subdir", "speecht5_tts_clartts_ar") +VOCODER_DIR = _MODEL_ROOT / _TTS.get("vocoder_subdir", "speecht5_hifigan") +XVECTOR_PATH = _MODEL_ROOT / _TTS.get("xvector_filename", "arabic_xvector_embedding.pt") + +MODEL_ID = str(MODEL_DIR) +VOCODER_ID = str(VOCODER_DIR) +SAMPLE_RATE = _TTS.get("sample_rate", 16000) +CHANNELS = _TTS.get("channels", 1) + +# Arabic diacritics (tashkeel) Unicode range – model was trained without them. +_DIACRITICS_RE = re.compile(r"[\u0617-\u061A\u064B-\u0652\u0670\u06D6-\u06ED]") + + +def strip_diacritics(text: str) -> str: + return _DIACRITICS_RE.sub("", text) + + +class LocalTTSEngine: + def __init__(self): + self._lock = threading.Lock() + self._loaded = False + self._processor = None + self._model = None + self._vocoder = None + self._speaker_embedding = None + + def _ensure_loaded(self): + if self._loaded: + return + with self._lock: + if self._loaded: + return + + for label, p in [("Model", MODEL_DIR), ("Vocoder", VOCODER_DIR), ("XVector", XVECTOR_PATH)]: + if not p.exists(): + raise RuntimeError(f"{label} not found at {p}") + + import torch + from transformers import ( + SpeechT5ForTextToSpeech, + SpeechT5HifiGan, + SpeechT5Processor, + ) + + self._processor = SpeechT5Processor.from_pretrained(MODEL_ID) + self._model = SpeechT5ForTextToSpeech.from_pretrained(MODEL_ID) + self._vocoder = SpeechT5HifiGan.from_pretrained(VOCODER_ID) + self._speaker_embedding = torch.load(str(XVECTOR_PATH), map_location="cpu") + + self._loaded = True + + @property + def ready(self) -> bool: + return self._loaded + + def status(self) -> dict[str, Any]: + return { + "loaded": self._loaded, + "model_dir": str(MODEL_DIR), + "vocoder_dir": str(VOCODER_DIR), + "xvector_path": str(XVECTOR_PATH), + "model_exists": MODEL_DIR.exists(), + "vocoder_exists": VOCODER_DIR.exists(), + "xvector_exists": XVECTOR_PATH.exists(), + "sample_rate": SAMPLE_RATE, + } + + def synthesize(self, text: str) -> bytes: + """Convert Arabic text to 16 kHz mono int16 PCM bytes.""" + self._ensure_loaded() + import torch + + clean_text = strip_diacritics(text.strip()) + if not clean_text: + raise RuntimeError("Text is empty after stripping diacritics.") + + inputs = self._processor(text=clean_text, return_tensors="pt") + + with torch.no_grad(): + speech = self._model.generate_speech( + inputs["input_ids"], + self._speaker_embedding, + vocoder=self._vocoder, + ) + + # speech is a 1-D float32 tensor in [-1, 1] at 16 kHz + pcm_float = speech.numpy() + # Convert float32 → int16 PCM bytes + pcm_int16 = (pcm_float * 32767).clip(-32768, 32767).astype("int16") + return pcm_int16.tobytes() + + def synthesize_wav(self, text: str) -> bytes: + """Return a complete WAV file (bytes) for the given text.""" + import io + import wave + + pcm = self.synthesize(text) + buf = io.BytesIO() + with wave.open(buf, "wb") as wf: + wf.setnchannels(CHANNELS) + wf.setsampwidth(2) # int16 + wf.setframerate(SAMPLE_RATE) + wf.writeframes(pcm) + return buf.getvalue() diff --git a/vendor/Sanad/voice/model_script.py b/vendor/Sanad/voice/model_script.py new file mode 100644 index 0000000..2ffc634 --- /dev/null +++ b/vendor/Sanad/voice/model_script.py @@ -0,0 +1,158 @@ +"""Template brain — copy this file to plug in a non-Gemini model. + +How to use: + 1. Copy this file: `cp voice/model_script.py voice/openai_script.py` + 2. Rename the class: `ModelBrain` → e.g. `OpenAIRealtimeBrain` + 3. Fill in every block marked `TODO` with your provider's SDK calls. + 4. Register the new brain in `voice/sanad_voice.py` inside + `_build_brain()` (there's a single `elif` to add). + 5. Run with `SANAD_VOICE_BRAIN=openai python3 voice/sanad_voice.py eth0`. + +Contract that `sanad_voice.py` expects of ANY brain: + __init__(audio_io, recorder, voice_name, system_prompt) + audio_io — voice.audio_io.AudioIO (exposes .mic + .speaker) + recorder — voice.sanad_voice.TurnRecorder (per-turn WAV capture) + voice_name — provider-specific voice id (e.g. "Charon", "alloy") + system_prompt — persona string to seed the session with + async run() — blocks until stopped or fatal. Reconnects are YOUR + responsibility; the orchestrator won't restart you. + stop() — sync signal (can be called from a signal handler). + Set an asyncio.Event and let `run()` notice it. + +What the mic side looks like: + data = self._mic.read_chunk(n_bytes) # 16 kHz int16 mono bytes + # send `data` to your model's realtime-audio endpoint + +What the speaker side looks like: + self._speaker.begin_stream() + self._speaker.send_chunk(pcm, source_rate=24000) # rate is yours + self._speaker.wait_finish() # blocks until playback drains + # or self._speaker.stop() # cancel mid-playback (barge-in) + +What the recorder side looks like: + self._recorder.capture_user(pcm_bytes) # mic audio for this turn + self._recorder.capture_robot(pcm_bytes) # model audio for this turn + self._recorder.add_user_text(str) # partial transcript + self._recorder.add_robot_text(str) # partial transcript + self._recorder.finish_turn() # flush to WAV + index.json +""" + +from __future__ import annotations + +import asyncio +from typing import Any, Optional + +from Project.Sanad.core.logger import get_logger + +log = get_logger("model_brain") + + +class ModelBrain: + """Skeleton voice brain — adapt to your provider.""" + + def __init__(self, audio_io, recorder, voice_name: Optional[str] = None, + system_prompt: str = ""): + self._audio = audio_io + self._mic = audio_io.mic + self._speaker = audio_io.speaker + self._recorder = recorder + self._voice = voice_name + self._system_prompt = system_prompt + self._stop_flag = asyncio.Event() + + # TODO: instantiate your provider's client here. Keep the client + # creation cheap — connection/handshake should happen inside `run()` + # so reconnects don't require re-building this object. + # Example: + # from openai import AsyncOpenAI + # self._client = AsyncOpenAI(api_key=os.environ["OPENAI_API_KEY"]) + self._client: Any = None + + # ─── lifecycle ──────────────────────────────────────── + + def stop(self) -> None: + """Signal the run loop to exit cleanly. Safe to call from anywhere.""" + self._stop_flag.set() + + async def run(self) -> None: + """Main conversation loop. Blocks until stopped. + + Responsibilities: + - Open a realtime session with your provider. + - Forward mic audio to the model in small chunks. + - Stream the model's audio response to the speaker. + - Drive barge-in: when the user speaks while the model is speaking, + cancel model playback and mark the turn interrupted. + - On disconnect/error, back off and reconnect. + """ + while not self._stop_flag.is_set(): + try: + log.info("connecting to model...") + # TODO: open a session with your provider. For websocket-style + # APIs, use `async with client.realtime.connect(...) as session:`. + # For request/response APIs, poll or stream in a loop. + await asyncio.gather( + self._send_mic_loop(), + self._receive_loop(), + ) + except asyncio.CancelledError: + break + except Exception as exc: + log.error("session error: %s — reconnecting in 2s", exc) + await asyncio.sleep(2) + + # ─── mic → model ────────────────────────────────────── + + async def _send_mic_loop(self) -> None: + """Read mic chunks and forward them to the model. + + Minimum responsibilities: + - Loop on `self._mic.read_chunk(N_BYTES)`. + - Encode to whatever format your provider expects + (PCM16 mono is standard; some want base64 in JSON frames). + - Respect `self._stop_flag`. + + Optional (highly recommended): + - Measure energy; feed the mic frame to `self._recorder.capture_user` + only when the user is actually speaking. + - Apply echo suppression while the speaker is playing (mute or + substitute silence when energy is low — keeps the model from + transcribing its own voice bleed). + """ + chunk_bytes = 1024 # 32 ms at 16 kHz mono int16 — tune to your API + loop = asyncio.get_event_loop() + while not self._stop_flag.is_set(): + try: + data = await loop.run_in_executor( + None, self._mic.read_chunk, chunk_bytes, + ) + except Exception: + break + + # TODO: forward `data` to the model. Example for a hypothetical + # websocket session: + # await session.send({"type": "audio", "pcm16": data}) + _ = data + + # Pace to real-time so we don't starve the event loop + await asyncio.sleep(chunk_bytes / (16000 * 2)) + + # ─── model → speaker ────────────────────────────────── + + async def _receive_loop(self) -> None: + """Receive model events (audio chunks, transcripts, turn markers). + + Event handling you need to implement: + - Audio chunk → `self._speaker.send_chunk(pcm, source_rate)` + (first chunk must be preceded by + `self._speaker.begin_stream()`). + - Model interrupted → `self._speaker.stop(); self._mic.flush()` + and call `self._recorder.finish_turn()`. + - User transcript → `self._recorder.add_user_text(text)`. + - Model transcript → `self._recorder.add_robot_text(text)`. + - Turn complete → `self._speaker.wait_finish(); + self._recorder.finish_turn(); mic.flush()`. + """ + while not self._stop_flag.is_set(): + # TODO: iterate your provider's event stream and dispatch. + await asyncio.sleep(0.1) diff --git a/vendor/Sanad/voice/model_subprocess.py b/vendor/Sanad/voice/model_subprocess.py new file mode 100644 index 0000000..1587925 --- /dev/null +++ b/vendor/Sanad/voice/model_subprocess.py @@ -0,0 +1,147 @@ +"""Template supervisor — pair with voice/model_script.py when adding a new model. + +The supervisor's job is to run a voice subprocess and tail its stdout for +state transitions + user transcripts. It is brand-specific on purpose: +each model's brain emits log lines in its own format, so each model gets +its own supervisor. See `gemini/subprocess.py` for the working reference. + +How to add a new model (e.g. OpenAI Realtime): + + 1. cp voice/model_script.py openai/script.py + 2. cp voice/model_subprocess.py openai/subprocess.py + 3. In both files: rename `ModelBrain` → `OpenAIRealtimeBrain`, + `ModelSubprocess` → `OpenAIRealtimeSubprocess`. + 4. In `openai/script.py`: fill in the TODO bodies (connect/send/receive). + Each `log.info("USER: %s", ...)` / `log.info("BOT: %s", ...)` / + state message must be a string your supervisor's `_track_line` below + can detect — keep them in lock-step. + 5. In `openai/subprocess.py`: update `_track_line` to match the strings + your brain actually emits. + 6. In `main.py`: swap `GeminiSubprocess` → `OpenAIRealtimeSubprocess` in + the `live_sub = _safe_construct(...)` line. In `voice/sanad_voice.py`, + add a branch to `_build_brain()` mapping `"openai"` → `OpenAIRealtimeBrain`. + 7. Run with `SANAD_VOICE_BRAIN=openai python3 voice/sanad_voice.py eth0`. + +Nothing in `gemini/` needs to change. +""" + +from __future__ import annotations + +import os +import signal +import subprocess +import sys +import threading +import time +from collections import deque +from datetime import datetime +from pathlib import Path +from typing import Any + +from Project.Sanad.config import BASE_DIR, LOGS_DIR, SCRIPTS_DIR +from Project.Sanad.core.config_loader import section as _cfg_section +from Project.Sanad.core.logger import get_logger + +log = get_logger("model_subprocess") + + +class ModelSubprocess: + """Skeleton supervisor — adapt for your model. + + Contract expected by `main.py` + `dashboard/routes/live_subprocess.py`: + start() — sync. Spawns the child, starts the log reader thread. + stop() — sync. SIGINT / SIGTERM / SIGKILL escalation. + status() — returns {state, state_message, running, pid, log_tail, + user_transcript, last_user_text, ...}. + log_tail : deque[str] last N cleaned stdout lines + user_transcript : deque[str] user transcripts parsed from child's log + last_user_text : str most recent transcript (convenience) + state : str one of {"stopped", "starting", "connecting", + "listening", "hearing", "interrupting", + "error", "warning", "crashed"} + """ + + def __init__(self): + # TODO: set a config section key — e.g. `_cfg_section("openai", "subprocess")`. + # Create `config/_config.json > subprocess: { ... }` matching + # gemini_config.json's layout. + self._cfg = {} # _cfg_section("", "subprocess") + + self._lock = threading.Lock() + self.process: subprocess.Popen | None = None + self.log_tail: deque[str] = deque( + maxlen=self._cfg.get("log_tail_size", 2000)) + self.user_transcript: deque[str] = deque( + maxlen=self._cfg.get("transcript_tail_size", 30)) + self._reader_thread: threading.Thread | None = None + self._log_file = None + self.state = "stopped" + self.state_message = "Idle." + self.last_user_text = "" + + # ─── spawn / kill ───────────────────────────────────── + + def start(self) -> dict: + # TODO: build env (include `SANAD_VOICE_BRAIN=` so + # sanad_voice.py picks your brain), pick the script path, and + # `subprocess.Popen(...)`. Copy the gemini/subprocess.py body. + raise NotImplementedError + + def stop(self, timeout: float = 3.0) -> dict: + # TODO: send SIGINT → wait → SIGTERM → wait → SIGKILL. + raise NotImplementedError + + # ─── log parsing — the brand-specific part ──────────── + + def _track_line(self, line: str) -> None: + """Translate your brain's log strings into state + transcripts. + + KEEP THIS IN LOCK-STEP with the `log.info(...)` calls in your + brain. Minimum required detections: + + connecting — child opened a session to the model + listening — session connected OR a turn finished + hearing — user transcript arrived (APPEND to user_transcript) + interrupting — barge-in / model interrupted + error — fatal session error + stopped — clean shutdown + """ + # Example (replace with your brain's actual strings): + # + # if "connecting to OpenAI" in line: + # self._set_state("connecting", line) + # elif "session open" in line: + # self._set_state("listening", "Listening for speech.") + # elif "USER: " in line: + # text = line.split("USER: ", 1)[1].strip() + # if text: + # self.last_user_text = text + # self.user_transcript.append(text) + # self._set_state("hearing", f"User: {text}") + # elif "BARGE-IN" in line: + # self._set_state("interrupting", line) + # elif "session error" in line: + # self._set_state("error", line) + # elif "cancelled — stopping" in line: + # self._set_state("stopped", line) + raise NotImplementedError + + def _set_state(self, state: str, msg: str) -> None: + self.state = state + self.state_message = msg + + # ─── status + introspection ─────────────────────────── + + def status(self) -> dict: + with self._lock: + proc = self.process + running = proc is not None and proc.poll() is None + return { + "running": running, + "pid": proc.pid if running else None, + "state": self.state, + "state_message": self.state_message, + "last_user_text": self.last_user_text, + "log_tail": list(self.log_tail)[-50:], + "user_transcript": list(self.user_transcript), + } diff --git a/vendor/Sanad/voice/movement_dispatch.py b/vendor/Sanad/voice/movement_dispatch.py new file mode 100644 index 0000000..e712f54 --- /dev/null +++ b/vendor/Sanad/voice/movement_dispatch.py @@ -0,0 +1,371 @@ +"""MovementDispatcher — Gemini voice → locomotion (N2 Phase 3). + +The Marcus phrase-confirmation pattern, ported to Sanad. Gemini Live runs in a +child subprocess; the parent supervisor (gemini/subprocess.py) parses Gemini's +OWN spoken output into BOT: transcript lines and fires `on_bot_text(line)` here. + +Flow: + Gemini speaks a canonical confirmation phrase ("Turning right." / "أستدير + يميناً.") → on_bot_text() matches it against data/motions/instruction.json + → enqueues a canonical command → a worker thread drives loco_controller + (discrete steps that self-terminate with StopMove). + +Gating: every dispatch is gated on `recognition_state.movement_enabled` (the +"Enable Gemini movement" dashboard toggle) — SEPARATE from the manual +"Enable movement" arm flag (loco_controller._armed). When the flag is off, +matches are dropped. "stop" is honoured immediately (cross-thread abort + drain ++ E-STOP) regardless of cooldown. + +Safety: discrete `loco.step()` self-stops; velocity caps live in LocoController; +N-step / N-degree commands are bounded by instruction.json (max_steps, +max_degrees) and check the abort flag + enable gate between each step. Numbers +are kept verbatim but the degrees→steps and steps mapping is APPROXIMATE and +must be calibrated on the real robot. +""" + +from __future__ import annotations + +import json +import queue +import re +import threading +import time +from pathlib import Path +from typing import Any, List, Optional + +from Project.Sanad.core.logger import get_logger + +log = get_logger("movement_dispatch") + +_SENTINEL = object() +_STATE_CACHE_TTL = 0.5 # seconds — re-read recognition_state at most this often + +# Map a resolved canonical command to a LocoController discrete-step direction. +_FIXED_STEP = { + "move forward": "forward", + "move backward": "backward", + "turn right": "rotate_right", + "turn left": "rotate_left", + "slide left": "slide_left", + "slide right": "slide_right", +} + +# ── transcript cleaning (Marcus pattern) ────────────────────────────────────── +# Gemini's spoken text can CONTAIN our trigger phrases without intending a +# command — inside a question ("do you want me to move forward?"), a negation +# ("I'm not turning right"), a hypothetical ("I would be turning right"), a +# quote, or an echoed [STATE] tag. We drop those whole clauses before matching so +# only genuine confirmations actuate the robot. +_BRACKET_RE = re.compile(r"\[[^\]]*\]") # [STATE-DONE] echoes +_QUOTE_RE = re.compile(r"[\"'«»“”„‟‹›][^\"'«»“”„‟‹›]{0,80}?[\"'«»“”„‟‹›]") +_SENT_SPLIT_RE = re.compile(r"([.!?؟؛\n]+)") # keep delimiters +# NOTE: Arabic tokens are whitespace-delimited so we don't match a negation +# substring inside a real word — e.g. "ما" lives inside "أمام" (forward), "لا" +# inside many words. \b doesn't help for Arabic (all letters are \w), so we +# anchor on spaces/string-edges explicitly. +_NEG_RE = re.compile( + r"\b(?:not|never|without|cannot|would|could|should|might|instead|" + r"going to|want to|trying to|rather than)\b|\w+n['’]t\b|" + r"(?:^|\s)(?:لا|ما|لن|لم|مش|بدون|غير|لست|ليس|بدل)(?:\s|$)") + + +class MovementDispatcher: + def __init__(self, loco, instruction_path: Path, state_path: Path): + self._loco = loco + self._instruction_path = Path(instruction_path) + self._state_path = Path(state_path) + + self._queue: "queue.Queue[Any]" = queue.Queue(maxsize=32) + self._abort = threading.Event() + self._worker: Optional[threading.Thread] = None + self._running = False + # Hard-stop latch set by a dashboard E-STOP. Drops all voice commands + # until cleared (by re-enabling Gemini movement). Kept SEPARATE from the + # movement_enabled file flag so an E-STOP doesn't trigger the spoken + # "movement disabled" announcement. + self._estop = False + + # dedup / cooldown + self._last_canon = "" + self._last_at = 0.0 + + # cached enable-flag + self._enabled_cached = False + self._enabled_at = 0.0 + + # config (filled by _load) + self._cooldown = 1.5 + self._max_steps = 8 + self._max_degrees = 360 + self._deg_per_step = 15 + self._fixed_patterns: List[tuple] = [] # (needle_lower, canonical) sorted long→short + self._parametric: List[tuple] = [] # (compiled_regex, template) + + self._load() + + # ── instruction.json ───────────────────────────────────────────────────── + + def _load(self): + try: + data = json.loads(self._instruction_path.read_text(encoding="utf-8")) + except Exception as exc: + log.error("could not load %s: %s — dispatcher inert", self._instruction_path, exc) + data = {} + self._cooldown = float(data.get("command_cooldown_sec", 1.5)) + self._max_steps = int(data.get("max_steps", 8)) + self._max_degrees = int(data.get("max_degrees", 360)) + self._deg_per_step = max(1, int(data.get("degrees_per_step", 15))) + + needles: List[tuple] = [] + for spec in (data.get("actions") or {}).values(): + canonical = spec.get("canonical", "") + phrases = spec.get("bot_phrases", {}) or {} + for lang_list in phrases.values(): + for p in lang_list: + if p: + # English folded to lower; Arabic unaffected by .lower() + needles.append((p.lower(), canonical)) + # longest needle first so "walking forward" wins over "forward" + needles.sort(key=lambda t: len(t[0]), reverse=True) + self._fixed_patterns = needles + + self._parametric = [] + for pa in (data.get("parametric_actions") or []): + try: + self._parametric.append((re.compile(pa["regex"], re.IGNORECASE), pa["canonical"])) + except re.error as exc: + log.warning("bad parametric regex %r: %s", pa.get("regex"), exc) + log.info("instruction.json loaded: %d fixed phrases, %d parametric, cooldown=%.1fs", + len(self._fixed_patterns), len(self._parametric), self._cooldown) + + # ── lifecycle ───────────────────────────────────────────────────────────── + + def start(self): + if self._running: + return + self._running = True + self._worker = threading.Thread(target=self._worker_loop, daemon=True, + name="movement-dispatch") + self._worker.start() + log.info("movement dispatcher started") + + def stop(self): + self._running = False + self._abort.set() + try: + self._queue.put_nowait(_SENTINEL) + except queue.Full: + pass + + def status(self) -> dict: + return { + "running": self._running, + "movement_enabled": self._movement_enabled(force=True), + "estopped": self._estop, + "queue_depth": self._queue.qsize(), + "fixed_phrases": len(self._fixed_patterns), + "parametric": len(self._parametric), + } + + # ── E-STOP latch ────────────────────────────────────────────────────────── + + def emergency_stop(self): + """Latch off after a dashboard E-STOP: abort the in-flight command, drain + the queue, and refuse new commands until clear_estop(). Does NOT touch the + movement_enabled file flag (so the Gemini child stays quiet).""" + self._estop = True + self._abort.set() + self._drain() + log.warning("movement dispatch E-STOP latch set") + + def clear_estop(self): + self._estop = False + + def is_estopped(self) -> bool: + return self._estop + + # ── enable gate ─────────────────────────────────────────────────────────── + + def _movement_enabled(self, force: bool = False) -> bool: + now = time.monotonic() + if not force and (now - self._enabled_at) < _STATE_CACHE_TTL: + return self._enabled_cached + try: + from Project.Sanad.vision import recognition_state + self._enabled_cached = bool(recognition_state.read(self._state_path).movement_enabled) + except Exception: + self._enabled_cached = False + self._enabled_at = now + return self._enabled_cached + + # ── transcript hook (called from the supervisor reader thread) ──────────── + + def on_bot_text(self, text: str): + if not text or not self._running or self._estop: + return + if not self._movement_enabled(): + return + cmds = self._match(text) + if not cmds: + return + now = time.monotonic() # monotonic — immune to NTP/wall-clock jumps + for c in cmds: + if c == "stop": + # Safety: preempt anything in flight immediately, then E-STOP. + self._abort.set() + self._drain() + self._enqueue("stop") + self._last_canon = "stop" + self._last_at = now + continue + # cross-turn cooldown: same canonical not re-fired too soon + if c == self._last_canon and (now - self._last_at) < self._cooldown: + continue + self._last_canon = c + self._last_at = now + self._enqueue(c) + + def _enqueue(self, cmd: str): + try: + self._queue.put_nowait(cmd) + except queue.Full: + log.warning("motion queue full — dropping %r", cmd) + + def _drain(self): + try: + while True: + self._queue.get_nowait() + except queue.Empty: + pass + + # ── matcher ─────────────────────────────────────────────────────────────── + + def _clean(self, text: str) -> str: + """Drop clauses that are NOT genuine motion confirmations: bracketed + [STATE] echoes, quoted spans, questions, and negation/hypothetical + sentences. Only the surviving clauses are matched.""" + t = _BRACKET_RE.sub(" ", text) + t = _QUOTE_RE.sub(" ", t) + parts = _SENT_SPLIT_RE.split(t) + kept: List[str] = [] + i = 0 + while i < len(parts): + seg = parts[i].strip() + delim = parts[i + 1] if i + 1 < len(parts) else "" + is_question = ("?" in delim) or ("؟" in delim) + if seg and not is_question and not _NEG_RE.search(seg.lower()): + kept.append(seg) + i += 2 + return " . ".join(kept) + + def _match(self, text: str) -> List[str]: + """Return canonical commands in spoken order. Parametric (with numbers) + claim their spans first so a bare phrase doesn't double-fire.""" + low = self._clean(text).lower() + matches: List[tuple] = [] # (start, canonical) + claimed: List[tuple] = [] # (start, end) spans already taken + + def overlaps(s, e): + return any(s < ce and cs < e for cs, ce in claimed) + + # 1) parametric first. Claim the span even when the quantity is zero so a + # mis-heard "0 steps" suppresses the bare phrase underneath (no surprise + # motion) rather than falling through to a single step. + for rx, template in self._parametric: + for m in rx.finditer(low): + if overlaps(*m.span()): + continue + claimed.append(m.span()) + canonical = self._format(template, m.groups()) + if canonical: + matches.append((m.start(), canonical)) + + # 2) fixed phrases (longest first), skipping claimed spans + for needle, canonical in self._fixed_patterns: + start = 0 + while True: + j = low.find(needle, start) + if j < 0: + break + end = j + len(needle) + if not overlaps(j, end): + matches.append((j, canonical)) + claimed.append((j, end)) + start = end + + matches.sort(key=lambda t: t[0]) + # de-dup consecutive repeats within this single line + out: List[str] = [] + for _, c in matches: + if not out or out[-1] != c: + out.append(c) + return out + + @staticmethod + def _format(template: str, groups) -> str: + out = template + for i, g in enumerate(groups, start=1): + out = out.replace(f"${i}", str(g)) + # reject zero-quantity motions ("walk 0 steps") + nums = re.findall(r"\d+", out) + if nums and all(int(n) == 0 for n in nums): + return "" + return out + + # ── worker ──────────────────────────────────────────────────────────────── + + def _worker_loop(self): + while self._running: + cmd = self._queue.get() + if cmd is _SENTINEL: + return + if cmd != "stop": + self._abort.clear() + if self._estop: + continue # E-STOP latched — drop everything + # force a fresh read — don't let the 0.5s cache execute a command + # after the operator just toggled movement off. + if cmd != "stop" and not self._movement_enabled(force=True): + continue # toggled off while queued — drop + try: + self._execute(cmd) + except Exception: + log.exception("execute %r failed", cmd) + + def _execute(self, canonical: str): + c = canonical.lower().strip() + if c == "stop": + log.info("voice → STOP") + self._loco.estop() + return + + m = re.match(r"walk (forward|backward) (\d+) steps?$", c) + if m: + direction = "forward" if m.group(1) == "forward" else "backward" + n = min(int(m.group(2)), self._max_steps) + log.info("voice → walk %s %d steps", direction, n) + self._repeat_step(direction, n) + return + + m = re.match(r"turn (right|left) (\d+) degrees?$", c) + if m: + direction = "rotate_right" if m.group(1) == "right" else "rotate_left" + deg = min(int(m.group(2)), self._max_degrees) + n = max(1, round(deg / self._deg_per_step)) + log.info("voice → turn %s %d° (~%d steps)", m.group(1), deg, n) + self._repeat_step(direction, n) + return + + direction = _FIXED_STEP.get(c) + if direction: + log.info("voice → %s", c) + self._loco.step(direction) + return + log.debug("no loco mapping for canonical %r", c) + + def _repeat_step(self, direction: str, n: int): + for _ in range(max(1, n)): + if self._abort.is_set() or self._estop or not self._movement_enabled(force=True): + log.info("voice multi-step aborted") + break + self._loco.step(direction) diff --git a/vendor/Sanad/voice/sanad_voice.py b/vendor/Sanad/voice/sanad_voice.py new file mode 100644 index 0000000..e0524d0 --- /dev/null +++ b/vendor/Sanad/voice/sanad_voice.py @@ -0,0 +1,407 @@ +#!/usr/bin/env python3 +"""Sanad voice subprocess — orchestrator. + +Wires three independently-swappable pieces together: + + 1. Audio I/O — voice/audio_io.py (mic + speaker) + 2. Turn recorder — TurnRecorder (in this file; model-agnostic WAV capture) + 3. Voice brain — gemini/script.py (Gemini, default — cloud) + local/script.py (offline — Whisper+Qwen+CosyVoice2) + voice/model_script.py (template for new models) + +Runtime selection: + SANAD_AUDIO_PROFILE = builtin | anker | hollyland_builtin (default builtin) + SANAD_VOICE_BRAIN = gemini | local | model (default gemini) + +Usage: + python3 voice/sanad_voice.py eth0 + python3 voice/sanad_voice.py eth0 --voice Charon + SANAD_AUDIO_PROFILE=anker SANAD_VOICE_BRAIN=gemini \\ + python3 voice/sanad_voice.py eth0 + +System prompt priority (first hit wins): + 1. scripts/sanad_script.txt (edit-live via the dashboard) + 2. config/core_config.json > gemini_defaults.default_system_prompt + 3. the hardcoded fallback in _load_system_prompt() below +""" + +from __future__ import annotations + +import array +import asyncio +import importlib +import json +import logging +import os +import sys +import threading +import time +import types +import wave +from datetime import datetime +from pathlib import Path + +# ───────────────────────────────────────────────────────────────────────────── +# Layout bootstrap — MUST run before any `Project.Sanad.*` import. +# This file runs as a standalone subprocess (spawned by gemini/subprocess.py +# or local/subprocess.py); it can't rely on main.py having set up sys.path. +# Mirrors the dev-vs-deployed detection in main.py. +# dev layout: /Project/Sanad/voice/sanad_voice.py +# deployed layout: /home/unitree/Sanad/voice/sanad_voice.py +# ───────────────────────────────────────────────────────────────────────────── +_SANAD_DIR = Path(__file__).resolve().parent.parent # .../Sanad +_SANAD_PARENT = _SANAD_DIR.parent # .../Project OR /home/unitree + +if _SANAD_PARENT.name == "Project": + _ROOT = _SANAD_PARENT.parent + if str(_ROOT) not in sys.path: + sys.path.insert(0, str(_ROOT)) +else: + if str(_SANAD_PARENT) not in sys.path: + sys.path.insert(0, str(_SANAD_PARENT)) + if "Project" not in sys.modules: + _proj = types.ModuleType("Project") + _proj.__path__ = [] # namespace package marker + sys.modules["Project"] = _proj + if "Project.Sanad" not in sys.modules: + _sanad = importlib.import_module(_SANAD_DIR.name) + sys.modules["Project.Sanad"] = _sanad + sys.modules["Project"].Sanad = _sanad # type: ignore[attr-defined] + +from unitree_sdk2py.core.channel import ChannelFactoryInitialize +from unitree_sdk2py.g1.audio.g1_audio_client import AudioClient + +from Project.Sanad.config import ( + GEMINI_VOICE, + RECEIVE_SAMPLE_RATE, + SCRIPTS_DIR, + SEND_SAMPLE_RATE, +) +from Project.Sanad.core.config_loader import section as _cfg_section +from Project.Sanad.voice.audio_io import AudioIO + +# ─── LOGGING ───────────────────────────────────────────── + +_LOG_CFG = _cfg_section("voice", "sanad_voice") +LOG_DIR = os.path.expanduser(_LOG_CFG.get("log_dir", "~/logs")) +os.makedirs(LOG_DIR, exist_ok=True) +_LOG_NAME = _LOG_CFG.get("log_name", "gemini_live_v2") +LOG_FILE = os.path.join(LOG_DIR, f"{_LOG_NAME}_{datetime.now():%Y%m%d}.log") + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s.%(msecs)03d [%(levelname)s] %(message)s", + datefmt="%H:%M:%S", + handlers=[ + logging.FileHandler(LOG_FILE), + logging.StreamHandler(), + ], +) +log = logging.getLogger("sanad_voice") + + +# ─── CONFIG ────────────────────────────────────────────── + +_REC = _cfg_section("voice", "recording") +_SCRIPTS = _cfg_section("core", "script_files") +_GEMINI_DEFAULTS = _cfg_section("core", "gemini_defaults") + +_PERSONA_FILE = SCRIPTS_DIR / _SCRIPTS.get("persona", "sanad_script.txt") + +RECORD_ENABLED = os.environ.get( + "SANAD_RECORD", + "1" if _REC.get("enabled", True) else "0", +) != "0" +_REC_DIR_REL = _REC.get("dir_relative", "data/recordings") +RECORD_DIR = Path(os.environ.get( + "SANAD_RECORD_DIR", + str(Path(__file__).resolve().parent.parent / _REC_DIR_REL), +)) + +_FALLBACK_SYSTEM_PROMPT = ( + "You are Marcus, a bilingual humanoid robot assistant made by YS Lootah " + "Technology, Dubai, UAE. RESPOND IN ARABIC (Gulf/Emirati dialect) OR " + "ENGLISH ONLY. YOU MUST RESPOND UNMISTAKABLY IN THE SAME LANGUAGE THE " + "USER SPEAKS. If the user speaks Arabic, you MUST reply in Arabic Gulf " + "dialect. If the user speaks English, you MUST reply in English. Do NOT " + "confuse Arabic with Japanese, Hindi, Russian, or any other language. " + "The user is speaking Arabic or English — nothing else. Be concise — 1 " + "to 2 sentences max. Be friendly and natural. If the user interrupts " + "and says 'continue' or 'كمل', resume EXACTLY where you stopped. Only " + "respond to clear human speech. Ignore background noise and silence " + "completely. Do not respond to sounds that are not words." +) + + +# N2 Phase 3 — movement confirmation-phrase rules. The parent's +# MovementDispatcher drives the robot off Gemini's OWN spoken phrases, so Gemini +# must say one of these EXACT short phrases (per motion) when it agrees to move. +# Kept in lock-step with data/motions/instruction.json. Always appended; Gemini +# is told at runtime ([MOVEMENT ON/OFF/STATUS]) whether movement is enabled and +# must only confirm motion when it is ON. +_MOVEMENT_PROMPT_RULES = ( + "\n\n--- MOVEMENT (walking) ---\n" + "You can make the robot walk ONLY when you are told movement is ON " + "(you receive a [MOVEMENT ON] or [MOVEMENT STATUS] note). When movement is " + "OFF, never confirm a motion — tell the user to enable movement from the " + "dashboard.\n" + "When movement is ON and the user addresses you by name (Bousandah / بوسنده) AND " + "asks you to move, reply with ONE short confirmation phrase per requested " + "motion, in the SAME language, in the order asked. Use these EXACT shapes — " + "they are what triggers the motion:\n" + " forward : 'Moving forward.' / 'أمشي للأمام.'\n" + " backward : 'Moving back.' / 'أمشي للخلف.'\n" + " turn right : 'Turning right.' / 'أستدير يميناً.'\n" + " turn left : 'Turning left.' / 'أستدير يساراً.'\n" + " slide left : 'Sliding left.' / 'أنزلق لليسار.'\n" + " slide right : 'Sliding right.' / 'أنزلق لليمين.'\n" + " stop : 'Stopping.' / 'أتوقف.'\n" + "With a NUMBER, keep it: 'Walking forward 3 steps.' / 'أمشي للأمام 3 خطوات.' " + "— 'Turning right 90 degrees.' / 'أستدير يميناً 90 درجة.'\n" + "STOP is safety-first: if the user clearly wants the robot to halt " + "(stop/halt/wait/توقف/استنى), confirm 'Stopping.' / 'أتوقف.' immediately, " + "even without your name.\n" + "Never emit bracketed tags like [STATE-DONE] or numbers in parentheses — " + "speak only plain prose. Never include 0 or a negative quantity; if you " + "mis-hear a 0, drop the number and say the bare motion." +) + + +def _load_system_prompt() -> str: + """scripts/ → config default → hardcoded fallback, with the + movement confirmation-phrase rules appended (N2 Phase 3). + + A missing persona file used to be silent — that hid a config-vs-filename + mismatch (e.g. `persona: "sanad_v2"` while only `sanad_script.txt` + existed) which made the robot fall back to the English default that + introduces itself as "Sanad" instead of using the Arabic persona on + disk. We now WARN so the same trap doesn't bite again.""" + base = None + try: + text = _PERSONA_FILE.read_text(encoding="utf-8-sig").strip() + if text: + base = text + except FileNotFoundError: + log.warning( + "Persona file not found at %s — falling back to " + "config.core.gemini_defaults.default_system_prompt. " + "Check `script_files.persona` in config/core_config.json " + "matches an actual file under scripts/.", _PERSONA_FILE, + ) + except (OSError, UnicodeDecodeError) as exc: + # An existing-but-unreadable persona file (bad encoding, permissions, a + # directory) must NOT crash the voice child — fall back to the default. + log.warning("Persona file at %s could not be read (%s) — " + "falling back to default system prompt.", _PERSONA_FILE, exc) + if base is None: + base = _GEMINI_DEFAULTS.get("default_system_prompt", _FALLBACK_SYSTEM_PROMPT) + return base + _MOVEMENT_PROMPT_RULES + + +def _audio_energy(pcm: bytes) -> int: + try: + samples = array.array("h", pcm) + return sum(abs(s) for s in samples) // len(samples) if samples else 0 + except Exception: + return 0 + + +# ─── TURN RECORDER ────────────────────────────────────── + +class TurnRecorder: + """Saves each turn as two WAV files: user mic + model output. + + A turn starts when user audio starts flowing through `capture_user` + and ends on `finish_turn`. Files land in `RECORD_DIR` as + `_user.wav` (at `user_rate`) and `_robot.wav` + (at `robot_rate`). An `index.json` in the same directory tracks + every turn with timestamp + transcripts + durations for the dashboard. + """ + + def __init__(self, enabled: bool = True, out_dir: Path = RECORD_DIR, + user_rate: int = SEND_SAMPLE_RATE, + robot_rate: int = RECEIVE_SAMPLE_RATE): + self.enabled = enabled + self.out_dir = out_dir + self.user_rate = user_rate + self.robot_rate = robot_rate + if self.enabled: + self.out_dir.mkdir(parents=True, exist_ok=True) + self._lock = threading.Lock() + self._user_buf: list[bytes] = [] + self._robot_buf: list[bytes] = [] + self._user_text = "" + self._robot_text = "" + self._started_at: float = 0.0 + + def capture_user(self, pcm: bytes) -> None: + if not self.enabled or not pcm: + return + with self._lock: + if not self._user_buf and not self._robot_buf: + self._started_at = time.time() + self._user_buf.append(pcm) + + def capture_robot(self, pcm: bytes) -> None: + if not self.enabled or not pcm: + return + with self._lock: + if not self._user_buf and not self._robot_buf: + self._started_at = time.time() + self._robot_buf.append(pcm) + + def add_user_text(self, text: str) -> None: + if text and self.enabled: + with self._lock: + self._user_text = (self._user_text + " " + text).strip() + + def add_robot_text(self, text: str) -> None: + if text and self.enabled: + with self._lock: + self._robot_text = (self._robot_text + " " + text).strip() + + def finish_turn(self) -> dict: + if not self.enabled: + return {} + with self._lock: + user_data = b"".join(self._user_buf) + robot_data = b"".join(self._robot_buf) + user_text = self._user_text + robot_text = self._robot_text + started_at = self._started_at + self._user_buf.clear() + self._robot_buf.clear() + self._user_text = "" + self._robot_text = "" + + if not user_data and not robot_data: + return {} + + stamp = datetime.fromtimestamp(started_at).strftime("%Y%m%d_%H%M%S") + entry = {"timestamp": stamp, "started_at": started_at, + "user_text": user_text, "robot_text": robot_text} + try: + if user_data: + p = self.out_dir / f"{stamp}_user.wav" + self._save_wav(p, user_data, self.user_rate) + entry["user_wav"] = str(p) + entry["user_duration_sec"] = round( + len(user_data) / (self.user_rate * 2), 3) + if robot_data: + p = self.out_dir / f"{stamp}_robot.wav" + self._save_wav(p, robot_data, self.robot_rate) + entry["robot_wav"] = str(p) + entry["robot_duration_sec"] = round( + len(robot_data) / (self.robot_rate * 2), 3) + self._append_index(entry) + log.info("recorded turn → %s (user %.1fs, robot %.1fs)", + stamp, + entry.get("user_duration_sec", 0), + entry.get("robot_duration_sec", 0)) + except Exception as exc: + log.warning("recording save failed: %s", exc) + return entry + + @staticmethod + def _save_wav(path: Path, pcm: bytes, rate: int) -> None: + with wave.open(str(path), "wb") as wf: + wf.setnchannels(1) + wf.setsampwidth(2) + wf.setframerate(rate) + wf.writeframes(pcm) + + def _append_index(self, entry: dict) -> None: + idx_path = self.out_dir / "index.json" + try: + if idx_path.exists(): + payload = json.loads(idx_path.read_text(encoding="utf-8")) + if not isinstance(payload, dict): + payload = {"records": []} + else: + payload = {"records": []} + except Exception: + payload = {"records": []} + payload.setdefault("records", []).append(entry) + payload["total_records"] = len(payload["records"]) + idx_path.write_text( + json.dumps(payload, indent=2, ensure_ascii=False), + encoding="utf-8", + ) + + +# ─── BRAIN FACTORY ─────────────────────────────────────── + +def _build_brain(name: str, audio_io, recorder, voice: str, system_prompt: str): + name = (name or "").strip().lower() + if name in ("", "gemini"): + from Project.Sanad.gemini.script import GeminiBrain + return GeminiBrain(audio_io, recorder, voice, system_prompt) + if name == "local": + from Project.Sanad.local.script import LocalBrain + return LocalBrain(audio_io, recorder, voice, system_prompt) + if name == "model": + from Project.Sanad.voice.model_script import ModelBrain + return ModelBrain(audio_io, recorder, voice, system_prompt) + # To add a provider: import the module and return its brain class here. + raise ValueError(f"unknown voice brain: {name!r}") + + +# ─── MAIN ──────────────────────────────────────────────── + +def main() -> None: + if len(sys.argv) < 2: + print(__doc__) + sys.exit(1) + + iface = sys.argv[1] + voice = GEMINI_VOICE + if "--voice" in sys.argv: + voice = sys.argv[sys.argv.index("--voice") + 1] + + log.info("DDS on %s", iface) + ChannelFactoryInitialize(0, iface) + ac = AudioClient() + ac.SetTimeout(10.0) + ac.Init() + log.info("AudioClient ready") + + profile = os.environ.get("SANAD_AUDIO_PROFILE", "builtin") + audio = AudioIO.from_profile(profile, audio_client=ac) + audio.start() + log.info("audio profile=%s", audio.profile_id) + + # Sanity-check the mic before handing it to the brain + log.info("testing mic 2s...") + time.sleep(2) + test = audio.mic.read_chunk(1024) + e = _audio_energy(test) + log.info("mic energy=%d %s", e, "OK" if e > 0 else "SILENT") + + recorder = TurnRecorder(enabled=RECORD_ENABLED) + if RECORD_ENABLED: + log.info("recording enabled → %s", RECORD_DIR) + + system_prompt = _load_system_prompt() + brain_name = os.environ.get("SANAD_VOICE_BRAIN", "gemini") + brain = _build_brain(brain_name, audio, recorder, voice, system_prompt) + log.info("voice brain=%s voice=%s log=%s", brain_name, voice, LOG_FILE) + log.info("─" * 50) + + try: + asyncio.run(brain.run()) + except KeyboardInterrupt: + pass + except Exception as exc: + log.error("fatal: %s", exc) + finally: + log.info("stopping") + try: + brain.stop() + except Exception: + log.warning("brain.stop() failed", exc_info=True) + audio.stop() + log.info("stopped") + + +if __name__ == "__main__": + main() diff --git a/vendor/Sanad/voice/text_utils.py b/vendor/Sanad/voice/text_utils.py new file mode 100644 index 0000000..678a22e --- /dev/null +++ b/vendor/Sanad/voice/text_utils.py @@ -0,0 +1,341 @@ +"""Arabic text normalization and voice-command phrase matching. + +Ported from gemini_interact/sanad_text_utils.py — unified for Sanad. +""" + +from __future__ import annotations + +import re +from pathlib import Path +from typing import Any + +# Arabic diacritics (tashkeel) — stripped for matching. +_DIACRITICS_RE = re.compile(r"[\u0617-\u061A\u064B-\u0652\u0670\u06D6-\u06ED]") +_AR_PUNCT = re.compile(r"[؟،؛]") +_NON_WORD = re.compile(r"[^\w\u0600-\u06FF\s]", re.UNICODE) +_MULTI_WS = re.compile(r"\s+") + + +def normalize_arabic(text: str) -> str: + """Normalize Arabic + English text for matching.""" + s = text.strip().lower() + s = _AR_PUNCT.sub(" ", s) + s = _NON_WORD.sub(" ", s) + s = _MULTI_WS.sub(" ", s) + # Hamza variants → bare alif + s = s.replace("\u0623", "\u0627") # أ → ا + s = s.replace("\u0625", "\u0627") # إ → ا + s = s.replace("\u0622", "\u0627") # آ → ا + # Ta marbuta / alif maqsoora + s = s.replace("\u0629", "\u0647") # ة → ه + s = s.replace("\u0649", "\u064A") # ى → ي + # Tatweel + s = s.replace("\u0640", "") + # Strip diacritics last + s = _DIACRITICS_RE.sub("", s) + return s.strip() + + +def strip_diacritics(text: str) -> str: + return _DIACRITICS_RE.sub("", text) + + +def load_phrase_map(filepath: str | Path) -> dict[str, set[str]]: + """Load a phrase file mapping command names to trigger phrases. + + Format (per command): + WAKE_PHRASES_shake_hand = { + "مصافحه", "handshake", "shake hands", + } + + Returns: {"shake_hand": {"مصافحه", "handshake", ...}, ...} + """ + path = Path(filepath) + if not path.exists(): + return {} + + content = path.read_text(encoding="utf-8") + result: dict[str, set[str]] = {} + current_name: str | None = None + current_phrases: set[str] = set() + + for raw_line in content.splitlines(): + line = raw_line.strip() + if not line or line.startswith("#"): + continue + + # Header: WAKE_PHRASES_shake_hand = { + header_match = re.match(r"WAKE_PHRASES_(\w+)\s*=\s*\{", line) + if header_match: + if current_name and current_phrases: + result[current_name] = current_phrases + current_name = header_match.group(1) + current_phrases = set() + continue + + # Closing brace + if line == "}": + if current_name and current_phrases: + result[current_name] = current_phrases + current_name = None + current_phrases = set() + continue + + # Phrase line: "some phrase", + phrase_match = re.match(r'"([^"]+)"', line) + if phrase_match and current_name is not None: + phrase = normalize_arabic(phrase_match.group(1)) + if phrase: + current_phrases.add(phrase) + + if current_name and current_phrases: + result[current_name] = current_phrases + + return result + + +def match_phrase(text: str, phrase_sets: dict[str, set[str]]) -> str | None: + """Return the command name if normalized *text* matches any phrase set. + + Token-set matching: every word of the phrase must appear as a whole + word in *text*. Prevents short phrases (e.g. 'hi') from matching + longer words (e.g. 'this'). + """ + norm = normalize_arabic(text) + if not norm: + return None + text_tokens = set(norm.split()) + if not text_tokens: + return None + best_command: str | None = None + best_len = 0 + for command_name, phrases in phrase_sets.items(): + for phrase in phrases: + phrase_tokens = phrase.split() + if not phrase_tokens: + continue + if all(t in text_tokens for t in phrase_tokens): + if len(phrase) > best_len: + best_command = command_name + best_len = len(phrase) + return best_command + + +# ────────────────────── stateful ASR-buffer matcher ────────────────────── +# Port of gemini_interact/sanad_text_utils.py:_maybe_trigger_arm +# +# Why stateful: Gemini streams short ASR pieces like "مر", "حب", "ا" that +# need to be joined across ~2 s to match "مرحبا". This matcher buffers +# incoming transcript pieces, dedups repeats, and fires when any phrase +# in the wake set is found. + +import time +import asyncio +import threading + + +_YA_PREFIX_RE = re.compile(r"^يا\s*") + + +def _strip_ya_prefix(s: str) -> str: + s = (s or "").strip() + return _YA_PREFIX_RE.sub("", s).strip() + + +def _remove_al_prefix_words(text: str) -> str: + if not text: + return "" + out = [] + for w in text.split(): + if w.startswith("ال") and len(w) > 2: + out.append(w[2:]) + else: + out.append(w) + return " ".join(out).strip() + + +def _is_valid_text(s: str) -> bool: + has_ar = bool(re.search(r"[\u0600-\u06FF]", s or "")) + has_en = bool(re.search(r"[a-zA-Z]", s or "")) + return has_ar or has_en + + +def maybe_trigger_arm( + state: Any, + transcript_text: str, + wake_phrases: set[str], + *, + fire_on_wake_match: bool = True, + arm_trigger_fn=None, +) -> bool: + """Buffer-aware wake-phrase matcher. + + `state` is any object — attributes are lazily initialized on first use. + Suitable targets: a session dataclass, or even a plain `types.SimpleNamespace`. + + On match: + - Clears ASR buffer to avoid re-trigger on next chunk + - If fire_on_wake_match: runs arm_trigger_fn in a background thread + immediately (wrapped in asyncio.to_thread if in a loop, else + threading.Thread) + - If not fire_on_wake_match: marks _pending_arm_wave=True so the + caller can fire it on turn_complete + + Returns True if a phrase fired, False otherwise. + """ + if not transcript_text or not wake_phrases: + return False + + # ── lazy state init ──────────────────────────────────────── + for attr, default in ( + ("_asr_buf", ""), ("_asr_last_time", 0.0), + ("ASR_WINDOW_SEC", 2.0), ("ASR_SHORT_TOKEN_BONUS_SEC", 1.0), + ("ASR_JOIN_NO_SPACE_MAXLEN", 2), ("ASR_MAX_CHARS", 120), + ("_last_trigger_norm", ""), ("_last_trigger_time", 0.0), + ("TRIGGER_DEDUP_WINDOW", 2.0), + ("_pending_arm_wave", False), ("_pending_arm_wave_fired", False), + ("_pending_arm_wave_set_time", 0.0), ("PENDING_ARM_TTL", 6.0), + ("_pending_arm_trigger_fn", None), ("_pending_arm_fallback_time", 0.0), + ("_last_piece_call_norm", ""), ("_last_piece_call_time", 0.0), + ("_asr_stream", ""), ("ASR_STREAM_MAX_CHARS", 80), + ): + if not hasattr(state, attr): + setattr(state, attr, default) + + dup_call_window = float(getattr(state, "DUP_CALL_WINDOW_SEC", 0.25)) + dup_asr_repeat_window = float(getattr(state, "DUP_ASR_REPEAT_WINDOW_SEC", 0.9)) + pending_fallback_sec = float(getattr(state, "PENDING_ARM_FALLBACK_SEC", 0.65)) + + piece_raw = transcript_text.strip() + if not piece_raw: + return False + + piece_norm = normalize_arabic(piece_raw) + if not piece_norm or not _is_valid_text(piece_norm): + return False + + now = time.time() + + duplicate_call = ( + piece_norm == state._last_piece_call_norm + and (now - state._last_piece_call_time) < dup_call_window + ) + repeated_asr = ( + piece_norm == state._last_piece_call_norm + and (now - state._last_piece_call_time) < dup_asr_repeat_window + ) + + state._last_piece_call_norm = piece_norm + state._last_piece_call_time = now + + # Buffer update + if not duplicate_call and not repeated_asr: + if state._asr_last_time: + gap = now - state._asr_last_time + window = state.ASR_WINDOW_SEC + if len(piece_norm) <= state.ASR_JOIN_NO_SPACE_MAXLEN: + window += state.ASR_SHORT_TOKEN_BONUS_SEC + if gap > window: + state._asr_buf = "" + state._asr_stream = "" + + state._asr_last_time = now + + # Join logic — no-space for very short pieces + if state._asr_buf: + if len(piece_norm) <= state.ASR_JOIN_NO_SPACE_MAXLEN: + state._asr_buf = (state._asr_buf + piece_norm).strip() + else: + state._asr_buf = (state._asr_buf + " " + piece_norm).strip() + else: + state._asr_buf = piece_norm + + compact = piece_norm.replace(" ", "") + state._asr_stream = (state._asr_stream + compact)[-state.ASR_STREAM_MAX_CHARS:] + if len(state._asr_buf) > state.ASR_MAX_CHARS: + state._asr_buf = state._asr_buf[-state.ASR_MAX_CHARS:] + + buf_norm = normalize_arabic(state._asr_buf) + buf_nospace = buf_norm.replace(" ", "") + buf_noal = _remove_al_prefix_words(buf_norm) + buf_noal_nospace = buf_noal.replace(" ", "") + stream = normalize_arabic(state._asr_stream).replace(" ", "") + stream_noal = _remove_al_prefix_words(stream) + + # Dedup — don't fire same buffer twice within TRIGGER_DEDUP_WINDOW + if (buf_norm == state._last_trigger_norm + and (now - state._last_trigger_time) < state.TRIGGER_DEDUP_WINDOW): + return False + + # Match loop + for phrase in wake_phrases: + p_norm = _strip_ya_prefix(normalize_arabic(str(phrase))) + if not p_norm: + continue + p_nospace = p_norm.replace(" ", "") + p_noal = _remove_al_prefix_words(p_norm) + p_noal_nospace = p_noal.replace(" ", "") + + pattern = r"\b" + re.escape(p_norm) + r"\b" + hit_buf = bool(re.search(pattern, buf_norm)) \ + or (p_nospace and p_nospace == buf_nospace) \ + or (p_noal and (p_noal in buf_noal + or (p_noal_nospace and p_noal_nospace in buf_noal_nospace))) + + hit_stream = bool(p_nospace and p_nospace in stream) \ + or bool(p_noal_nospace and p_noal_nospace in stream_noal) + + if hit_buf or hit_stream: + state._last_trigger_norm = buf_norm + state._last_trigger_time = now + state._asr_buf = "" + state._asr_last_time = 0.0 + state._asr_stream = "" + + if fire_on_wake_match: + if arm_trigger_fn: + _fire_arm_trigger(arm_trigger_fn) + state._pending_arm_wave = False + state._pending_arm_wave_fired = False + state._pending_arm_wave_set_time = 0.0 + state._pending_arm_trigger_fn = None + state._pending_arm_fallback_time = 0.0 + else: + state._pending_arm_wave = True + state._pending_arm_wave_fired = False + state._pending_arm_wave_set_time = now + state._pending_arm_trigger_fn = arm_trigger_fn + state._pending_arm_fallback_time = now + pending_fallback_sec + + return True + + return False + + +def _fire_arm_trigger(fn) -> None: + """Run the arm trigger callback in a background thread, regardless + of whether we're inside an asyncio loop.""" + try: + asyncio.get_running_loop() + asyncio.create_task(asyncio.to_thread(fn)) + except RuntimeError: + threading.Thread(target=fn, daemon=True).start() + + +def load_arm_phrase_dispatch( + sanad_arm_txt: str | Path, + option_list: list, +) -> dict[int, set[str]]: + """Build {action_id: set_of_phrases} from sanad_arm.txt × OPTION_LIST. + + Each OPTION has .id and .name. The sanad_arm.txt file defines + WAKE_PHRASES_. + """ + phrase_map = load_phrase_map(sanad_arm_txt) # {name_var: set[phrase]} + dispatch: dict[int, set[str]] = {} + for opt in option_list: + var = opt.name.replace(" ", "_").replace("-", "_") + phrases = phrase_map.get(var) + if phrases: + dispatch[opt.id] = phrases + return dispatch diff --git a/vendor/Sanad/voice/typed_replay.py b/vendor/Sanad/voice/typed_replay.py new file mode 100644 index 0000000..0443dbe --- /dev/null +++ b/vendor/Sanad/voice/typed_replay.py @@ -0,0 +1,779 @@ +"""Typed Replay Engine — send text to Gemini, play audio, capture + persist. + +Full-featured port of gemini_voice_v2/sanad_webserver.py's SanadReplayEngine: + - Generate audio via GeminiVoiceClient (reuses existing WebSocket client) + - Play via PulseAudio + optionally capture speaker output (what was actually + heard) via parec or PyAudio monitor-source + - Save two WAVs per record: speaker capture + Gemini raw output + - JSON record index with rename/delete/replay + - In-memory "last session" for quick replay without re-hitting Gemini +""" + +from __future__ import annotations + +import asyncio +import json +import os +import re +import shutil +import subprocess +import tempfile +import threading +import time +import wave +from dataclasses import asdict, dataclass, field +from datetime import datetime +from pathlib import Path +from typing import Any, Optional + +from Project.Sanad.config import ( + AUDIO_RECORDINGS_DIR, + CHANNELS, + CHUNK_SIZE, + RECEIVE_SAMPLE_RATE, + SINK as DEFAULT_SINK, + SOURCE as DEFAULT_SOURCE, + MONITOR_SOURCE as DEFAULT_MONITOR_SOURCE, +) +from Project.Sanad.core.logger import get_logger + +try: + import pyaudio +except ImportError: + pyaudio = None # degraded mode — can still generate, but not capture/play + +log = get_logger("typed_replay") + + +# ─── constants (from config/voice_config.json) ────────────────────── + +try: + from Project.Sanad.core.config_loader import section as _cfg_section + _TR = _cfg_section("voice", "typed_replay") +except Exception: + _TR = {} + +RECORD_INDEX_PATH = AUDIO_RECORDINGS_DIR / "records.json" +MONITOR_CHUNK_SIZE = _TR.get("monitor_chunk_size", CHUNK_SIZE) +MONITOR_TAIL_SEC = _TR.get("monitor_tail_sec", 0.2) +MAX_TEXT_LEN = _TR.get("max_text_len", 2000) + + +# ─── helpers ───────────────────────────────────────────────────────── + +def format_timestamp(dt: Optional[datetime] = None) -> str: + return (dt or datetime.now()).strftime("%Y-%m-%d %H:%M:%S") + + +def sanitize_record_name(name: str) -> str: + name = (name or "").strip() or f"record_{datetime.now():%Y%m%d_%H%M%S}" + name = re.sub(r"[^\w\-\u0600-\u06FF\s\.]", "_", name, flags=re.UNICODE) + name = re.sub(r"\s+", "_", name) + return name[:80] + + +def build_default_name(text: str) -> str: + stub = re.sub(r"\s+", "_", (text or "").strip()) + stub = re.sub(r"[^\w\u0600-\u06FF]", "", stub, flags=re.UNICODE) + stub = stub[:40] or "record" + stamp = datetime.now().strftime("%Y%m%d_%H%M%S") + return f"{stub}_{stamp}" + + +def audio_duration_seconds(pcm: bytes, sample_rate: int, channels: int, + sample_width: int) -> float: + if not pcm or sample_rate <= 0 or channels <= 0 or sample_width <= 0: + return 0.0 + return len(pcm) / (sample_rate * channels * sample_width) + + +def ensure_unique_record_stem(base_name: str, out_dir: Path) -> Path: + out_dir.mkdir(parents=True, exist_ok=True) + candidate = out_dir / sanitize_record_name(base_name) + counter = 0 + while True: + speaker = candidate.with_suffix(".wav") + raw = candidate.with_name(f"{candidate.name}_raw.wav") + if not speaker.exists() and not raw.exists(): + return candidate + counter += 1 + candidate = out_dir / f"{sanitize_record_name(base_name)}_{counter}" + + +def run_pactl(args: list[str]) -> subprocess.CompletedProcess[str]: + return subprocess.run( + ["pactl", *args], check=True, text=True, + capture_output=True, timeout=5, + ) + + +# ─── monitor recorders (speaker output capture) ────────────────────── + +class MonitorRecorder: + """Capture speaker output via PyAudio on the monitor source.""" + + def __init__(self, pya, device_config: dict[str, Any]): + self.pya = pya + self.device_config = device_config + self.frames: list[bytes] = [] + self._stop_event = threading.Event() + self._thread: Optional[threading.Thread] = None + self._stream = None + self._error: Optional[BaseException] = None + + def start(self): + if pyaudio is None: + raise RuntimeError("pyaudio unavailable — cannot capture speaker") + self._stop_event.clear() + self.frames = [] + self._stream = self.pya.open( + format=pyaudio.paInt16, + channels=self.device_config["channels"], + rate=self.device_config["rate"], + input=True, + input_device_index=self.device_config["index"], + frames_per_buffer=self.device_config["chunk_size"], + ) + self._thread = threading.Thread(target=self._loop, daemon=True) + self._thread.start() + time.sleep(0.05) + + def _loop(self): + while not self._stop_event.is_set(): + try: + data = self._stream.read( + self.device_config["chunk_size"], exception_on_overflow=False) + self.frames.append(data) + except Exception as exc: + if not self._stop_event.is_set(): + self._error = exc + break + + def stop(self) -> bytes: + time.sleep(MONITOR_TAIL_SEC) + self._stop_event.set() + if self._stream is not None: + try: + self._stream.stop_stream() + except Exception: + pass + try: + self._stream.close() + except Exception: + pass + if self._thread is not None: + self._thread.join(timeout=1.0) + if self._error is not None: + raise RuntimeError(f"Speaker capture failed: {self._error}") + return b"".join(self.frames) + + +class ParecMonitorRecorder: + """Capture speaker output via `parec` (PulseAudio CLI).""" + + def __init__(self, device_config: dict[str, Any]): + self.device_config = device_config + self.frames: list[bytes] = [] + self._stop_event = threading.Event() + self._thread: Optional[threading.Thread] = None + self._proc: Optional[subprocess.Popen[bytes]] = None + self._error: Optional[BaseException] = None + + def start(self): + cmd = [ + "parec", + f"--device={self.device_config['name']}", + "--format=s16le", + f"--rate={self.device_config['rate']}", + f"--channels={self.device_config['channels']}", + ] + self._proc = subprocess.Popen( + cmd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) + self._thread = threading.Thread(target=self._loop, daemon=True) + self._thread.start() + time.sleep(0.05) + + def _loop(self): + if self._proc is None or self._proc.stdout is None: + self._error = RuntimeError("parec did not start") + return + size = self.device_config["chunk_size"] * self.device_config["channels"] * 2 + while not self._stop_event.is_set(): + try: + data = self._proc.stdout.read(size) + if data: + self.frames.append(data) + continue + if self._proc.poll() is not None: + break + except Exception as exc: + if not self._stop_event.is_set(): + self._error = exc + break + + def stop(self) -> bytes: + time.sleep(MONITOR_TAIL_SEC) + self._stop_event.set() + if self._proc is not None and self._proc.poll() is None: + self._proc.terminate() + try: + self._proc.wait(timeout=1.0) + except subprocess.TimeoutExpired: + self._proc.kill() + if self._thread is not None: + self._thread.join(timeout=1.0) + if self._error is not None: + raise RuntimeError(f"parec capture failed: {self._error}") + return b"".join(self.frames) + + +# ─── session state ────────────────────────────────────────────────── + +@dataclass +class ReplaySessionState: + """Last generation kept in memory for replay/save-last.""" + text: str = "" + audio_bytes: bytes = b"" + speaker_capture: bytes = b"" + generated_at: str = "" + last_playback_at: str = "" + replay_count: int = 0 + saved_as: str = "" + + def as_status(self) -> dict[str, Any]: + return { + "text": self.text, + "has_audio": bool(self.audio_bytes), + "has_capture": bool(self.speaker_capture), + "generated_at": self.generated_at, + "last_playback_at": self.last_playback_at, + "replay_count": self.replay_count, + "saved_as": self.saved_as, + } + + +# ─── record index ─────────────────────────────────────────────────── + +def _load_index() -> dict[str, Any]: + if not RECORD_INDEX_PATH.exists(): + return {"total_records": 0, "records": []} + try: + payload = json.loads(RECORD_INDEX_PATH.read_text(encoding="utf-8")) + if not isinstance(payload, dict) or not isinstance(payload.get("records"), list): + raise ValueError("bad index structure") + payload.setdefault("total_records", len(payload["records"])) + return payload + except Exception as exc: + log.warning("record index unreadable, resetting: %s", exc) + return {"total_records": 0, "records": []} + + +def _save_index(payload: dict[str, Any]): + RECORD_INDEX_PATH.parent.mkdir(parents=True, exist_ok=True) + fd, tmp = tempfile.mkstemp(dir=str(RECORD_INDEX_PATH.parent), + suffix=".tmp") + try: + with os.fdopen(fd, "w", encoding="utf-8") as f: + json.dump(payload, f, indent=2, ensure_ascii=False) + os.replace(tmp, RECORD_INDEX_PATH) + except Exception: + try: + os.unlink(tmp) + except OSError: + pass + raise + + +def _resolve_record_path(path_str: str) -> Path: + """Resolve a path from the records index. + + Paths in records.json can be either: + - absolute (legacy — may be stale after scp to another machine) + - relative / basename — looked up under AUDIO_RECORDINGS_DIR + """ + if not path_str: + return AUDIO_RECORDINGS_DIR + p = Path(path_str) + if p.is_absolute(): + return p + return AUDIO_RECORDINGS_DIR / p + + +def _reconcile_index(payload: dict[str, Any]) -> dict[str, Any]: + """Drop records whose files no longer exist on disk.""" + surviving: list[dict[str, Any]] = [] + for entry in payload.get("records", []): + try: + speaker = _resolve_record_path( + entry["files"]["speaker_recording"]["path"]) + if speaker.exists(): + surviving.append(entry) + except (KeyError, TypeError): + continue + payload["records"] = surviving + payload["total_records"] = len(surviving) + return payload + + +def _build_file_info(path: Path, pcm: bytes, rate: int, + channels: int, sample_width: int) -> dict[str, Any]: + """Build a records.json file entry with a portable relative path. + + `path` can be an absolute path on disk — we store just the basename + so the index is portable across workstation ↔ robot. + """ + return { + "name": path.name, + "path": path.name, # basename only — resolved via _resolve_record_path + "size_bytes": len(pcm), + "sample_rate": rate, + "channels": channels, + "sample_width_bytes": sample_width, + "duration_seconds": round( + audio_duration_seconds(pcm, rate, channels, sample_width), 3), + } + + +# ─── engine ───────────────────────────────────────────────────────── + +class TypedReplayEngine: + """Full-featured typed replay — generate, play, capture, save, replay.""" + + def __init__(self, voice_client, audio_mgr): + """voice_client: GeminiVoiceClient audio_mgr: AudioManager""" + self.voice_client = voice_client + self.audio_mgr = audio_mgr + self.session = ReplaySessionState() + self._gen_lock = threading.Lock() + self._play_lock = threading.Lock() + self._monitor_config = self._resolve_monitor_config() + AUDIO_RECORDINGS_DIR.mkdir(parents=True, exist_ok=True) + + # ── monitor config ─────────────────────────────────────────── + def _resolve_monitor_config(self) -> Optional[dict[str, Any]]: + """Pick the backend for capturing speaker output. + + Priority: + 1. parec (cleanest — just listens to the speaker monitor source) + 2. PyAudio input device matching 'pulse' or 'default' + 3. None → capture disabled (generation still works) + """ + if shutil.which("parec"): + log.info("speaker capture: parec monitor=%s", DEFAULT_MONITOR_SOURCE) + return { + "backend": "parec", + "name": DEFAULT_MONITOR_SOURCE, + "rate": RECEIVE_SAMPLE_RATE, + "channels": CHANNELS, + "chunk_size": MONITOR_CHUNK_SIZE, + } + if pyaudio is None: + log.warning("speaker capture disabled — no parec and no pyaudio") + return None + try: + pya = self.audio_mgr.pya if self.audio_mgr else pyaudio.PyAudio() + except Exception: + return None + for i in range(pya.get_device_count()): + info = pya.get_device_info_by_index(i) + name = str(info.get("name", "")).lower() + if ("pulse" in name or "default" in name) and int(info.get("maxInputChannels", 0)) > 0: + log.info("speaker capture: pyaudio device=%s", info.get("name")) + return { + "backend": "pyaudio", + "index": i, + "name": str(info.get("name")), + "rate": int(info.get("defaultSampleRate", RECEIVE_SAMPLE_RATE)), + "channels": max(1, min(2, int(info.get("maxInputChannels", 1)))), + "chunk_size": MONITOR_CHUNK_SIZE, + } + log.warning("speaker capture disabled — no pulse/default pyaudio device") + return None + + def sample_width(self) -> int: + if pyaudio is None or self.audio_mgr is None or self.audio_mgr.pya is None: + return 2 # int16 + return self.audio_mgr.pya.get_sample_size(pyaudio.paInt16) + + # ── generation ─────────────────────────────────────────────── + + async def generate_audio(self, text: str) -> tuple[bytes, list[str]]: + """Route typed text through Gemini Live as the voice, first-try reliable. + + The session's system-prompt sets a persona ("You are Sanad…"), + so the prompt that most reliably gets audio out is a direct + address to the persona with the quoted text. A transparent + retry chain covers the edge cases where the model still + replies with text only. + """ + stripped = text.strip() + if not stripped: + raise ValueError("text cannot be empty") + + if self.voice_client is None: + raise RuntimeError("voice_client unavailable") + if not self.voice_client.connected: + await self.voice_client.connect() + + # Ordered by empirical reliability — first variant wins ~95% of turns. + # The quoted-phrase form is the most consistent trigger for an + # audio-only response with the current Sanad persona prompt. + attempts = [ + f'قل هذا بالضبط وبدون إضافات: "{stripped}"', # Arabic: "Say this exactly, no additions" + f'Say this exactly, nothing else: "{stripped}"', + f'"{stripped}"', + ] + last_parts: list[str] = [] + for idx, wrapped in enumerate(attempts, start=1): + try: + audio_bytes, text_parts = await self.voice_client.send_text( + wrapped, owner="typed_replay") + except Exception as exc: + log.warning("Gemini TTS attempt %d failed: %s", idx, exc) + continue + if audio_bytes: + if idx > 1: + log.info("Gemini TTS succeeded on attempt %d", idx) + return audio_bytes, text_parts + last_parts = text_parts + log.warning("Gemini TTS attempt %d returned no audio — parts: %s", + idx, " | ".join(text_parts or [])[:120]) + return b"", last_parts + + # ── playback + capture ─────────────────────────────────────── + def play_audio(self, audio_bytes: bytes, capture_speaker: bool) -> bytes: + """Play Gemini PCM through the G1 chest speaker (via DDS) when + available; fall back to the host PulseAudio sink otherwise. + + The DDS path is audible on the robot; the PulseAudio path goes + to the Jetson's built-in audio codec, which isn't wired to any + audible output on the G1. `capture_speaker` is only supported + on the PulseAudio path (DDS has no monitor sink). + """ + if not audio_bytes: + return b"" + if self.audio_mgr is None: + raise RuntimeError("audio_mgr unavailable — cannot play") + + # Try the G1 chest speaker first + g1_client = None + try: + g1_client = self.audio_mgr._get_g1_audio_client() + except Exception: + g1_client = None + + if g1_client is not None: + if capture_speaker: + log.info("capture_speaker requested, but G1 DDS path has no " + "PulseAudio monitor — capture skipped") + with self._play_lock: + try: + self.audio_mgr._play_pcm_via_g1( + audio_bytes, CHANNELS, RECEIVE_SAMPLE_RATE, + ) + return b"" + except Exception as exc: + log.warning("G1 speaker playback failed, falling back " + "to host PulseAudio sink: %s", exc) + # Fall through to the PyAudio path below + + if self.audio_mgr.pya is None: + raise RuntimeError("audio_mgr has no PyAudio backend for fallback") + + # Fallback: host PulseAudio sink (inaudible on G1, but keeps the + # capture-speaker feature working on desktop/dev setups). + with self._play_lock: + recorder = None + restore_source = False + if capture_speaker and self._monitor_config is not None: + if self._monitor_config["backend"] == "parec": + recorder = ParecMonitorRecorder(self._monitor_config) + else: + recorder = MonitorRecorder(self.audio_mgr.pya, self._monitor_config) + try: + run_pactl(["set-default-source", self._monitor_config["name"]]) + restore_source = True + except Exception as exc: + log.warning("couldn't switch default source to monitor: %s", exc) + + stream = None + try: + stream = self.audio_mgr.pya.open( + format=pyaudio.paInt16, + channels=CHANNELS, + rate=RECEIVE_SAMPLE_RATE, + output=True, + frames_per_buffer=CHUNK_SIZE, + ) + if recorder is not None: + recorder.start() + frame_bytes = CHUNK_SIZE * 2 + for offset in range(0, len(audio_bytes), frame_bytes): + stream.write(audio_bytes[offset:offset + frame_bytes]) + finally: + if stream is not None: + try: + stream.stop_stream() + finally: + stream.close() + + captured = b"" + try: + if recorder is not None: + captured = recorder.stop() + finally: + if restore_source: + try: + run_pactl(["set-default-source", DEFAULT_SOURCE]) + except Exception as exc: + log.warning("couldn't restore default source: %s", exc) + + return captured + + def save_audio(self, pcm: bytes, path: Path, channels: int, rate: int) -> None: + with wave.open(str(path), "wb") as wf: + wf.setnchannels(channels) + wf.setsampwidth(self.sample_width()) + wf.setframerate(rate) + wf.writeframes(pcm) + + # ── high-level API ─────────────────────────────────────────── + async def say(self, text: str, record: bool = False, + record_name: str = "") -> dict[str, Any]: + """Generate, play, capture, return metadata. Optionally persist.""" + if not text or not text.strip(): + raise ValueError("text cannot be empty") + if not self._gen_lock.acquire(blocking=False): + raise RuntimeError("another typed-replay generation is in progress") + try: + audio_bytes, text_parts = await self.generate_audio(text) + if not audio_bytes: + raise RuntimeError("Gemini returned no audio — parts: " + + " | ".join(text_parts or [])) + + generated_at = format_timestamp() + # Play + capture in a worker thread (PyAudio is sync) + captured = await asyncio.to_thread( + self.play_audio, audio_bytes, record) + playback_finished_at = format_timestamp() + + # Update session state + self.session.text = text + self.session.audio_bytes = audio_bytes + self.session.speaker_capture = captured + self.session.generated_at = generated_at + self.session.last_playback_at = playback_finished_at + self.session.replay_count = 1 + self.session.saved_as = "" + + result = { + "ok": True, + "text": text, + "gemini_text": text_parts, + "generated_at": generated_at, + "playback_finished_at": playback_finished_at, + "raw_duration_sec": round( + audio_duration_seconds(audio_bytes, RECEIVE_SAMPLE_RATE, + CHANNELS, self.sample_width()), 3), + "captured_speaker_bytes": len(captured), + "recorded": False, + } + + if record: + entry = self._persist_session(record_name or build_default_name(text)) + self.session.saved_as = entry["record_name"] + result["record"] = entry + result["recorded"] = True + + return result + finally: + self._gen_lock.release() + + def replay_last(self) -> dict[str, Any]: + """Re-play the cached audio without hitting Gemini.""" + if not self.session.audio_bytes: + raise RuntimeError("no cached generation — call say() first") + captured = self.play_audio(self.session.audio_bytes, capture_speaker=False) + self.session.replay_count += 1 + self.session.last_playback_at = format_timestamp() + return { + "ok": True, + "replay_count": self.session.replay_count, + "text": self.session.text, + "played_at": self.session.last_playback_at, + } + + def save_last(self, record_name: str = "") -> dict[str, Any]: + """Persist the last generation to the records index.""" + if not self.session.audio_bytes: + raise RuntimeError("no cached generation — call say() first") + entry = self._persist_session(record_name or build_default_name(self.session.text)) + self.session.saved_as = entry["record_name"] + return entry + + def _persist_session(self, record_name: str) -> dict[str, Any]: + base = ensure_unique_record_stem(record_name, AUDIO_RECORDINGS_DIR) + speaker_path = base.with_suffix(".wav") + raw_path = base.with_name(f"{base.name}_raw.wav") + + capture = self.session.speaker_capture + audio = self.session.audio_bytes + sw = self.sample_width() + + if capture: + cap_rate = (self._monitor_config or {}).get("rate", RECEIVE_SAMPLE_RATE) + cap_channels = (self._monitor_config or {}).get("channels", CHANNELS) + self.save_audio(capture, speaker_path, cap_channels, cap_rate) + else: + # No capture available → save raw as speaker too so every record + # has a .wav file for reconciliation checks. + self.save_audio(audio, speaker_path, CHANNELS, RECEIVE_SAMPLE_RATE) + cap_rate = RECEIVE_SAMPLE_RATE + cap_channels = CHANNELS + capture = audio + + self.save_audio(audio, raw_path, CHANNELS, RECEIVE_SAMPLE_RATE) + + entry = { + "record_name": base.name, + "text": self.session.text, + "replay_count": self.session.replay_count, + "timeline": { + "audio_generated_at": self.session.generated_at, + "last_playback_finished_at": self.session.last_playback_at, + "saved_at": format_timestamp(), + }, + "audio_capture": { + "backend": (self._monitor_config or {}).get("backend", "none"), + "sink": DEFAULT_SINK, + "monitor_source": DEFAULT_MONITOR_SOURCE, + "restored_microphone_source": DEFAULT_SOURCE, + }, + "files": { + "speaker_recording": _build_file_info( + speaker_path, capture, cap_rate, cap_channels, sw), + "gemini_raw_output": _build_file_info( + raw_path, audio, RECEIVE_SAMPLE_RATE, CHANNELS, sw), + }, + } + payload = _reconcile_index(_load_index()) + payload["records"].append(entry) + payload["total_records"] = len(payload["records"]) + _save_index(payload) + log.info("saved record %s (%.1fs speaker, %.1fs raw)", + base.name, + entry["files"]["speaker_recording"]["duration_seconds"], + entry["files"]["gemini_raw_output"]["duration_seconds"]) + return entry + + # ── records CRUD ───────────────────────────────────────────── + def list_records(self) -> dict[str, Any]: + return _reconcile_index(_load_index()) + + def find_record(self, name: str) -> dict[str, Any]: + for e in _load_index().get("records", []): + if e.get("record_name") == name: + return e + raise KeyError(f"record not found: {name}") + + def rename_record(self, name: str, new_name: str) -> dict[str, Any]: + new_name = sanitize_record_name(new_name) + if not new_name: + raise ValueError("new_name empty after sanitize") + payload = _reconcile_index(_load_index()) + target = None + for e in payload["records"]: + if e.get("record_name") == name: + target = e + break + if target is None: + raise KeyError(f"record not found: {name}") + if any(e.get("record_name") == new_name for e in payload["records"]): + raise ValueError(f"a record named {new_name} already exists") + + old_speaker = _resolve_record_path(target["files"]["speaker_recording"]["path"]) + old_raw = _resolve_record_path(target["files"]["gemini_raw_output"]["path"]) + new_base = AUDIO_RECORDINGS_DIR / new_name + new_speaker = new_base.with_suffix(".wav") + new_raw = new_base.with_name(f"{new_base.name}_raw.wav") + + old_speaker.rename(new_speaker) + old_raw.rename(new_raw) + + target["record_name"] = new_name + target["files"]["speaker_recording"]["path"] = new_speaker.name # basename only + target["files"]["speaker_recording"]["name"] = new_speaker.name + target["files"]["gemini_raw_output"]["path"] = new_raw.name + target["files"]["gemini_raw_output"]["name"] = new_raw.name + _save_index(payload) + if self.session.saved_as == name: + self.session.saved_as = new_name + return target + + def delete_record(self, name: str) -> dict[str, Any]: + payload = _reconcile_index(_load_index()) + target = None + for e in payload["records"]: + if e.get("record_name") == name: + target = e + break + if target is None: + raise KeyError(f"record not found: {name}") + for key in ("speaker_recording", "gemini_raw_output"): + path = _resolve_record_path(target["files"][key]["path"]) + try: + path.unlink() + except FileNotFoundError: + pass + except Exception as exc: + log.warning("couldn't delete %s: %s", path, exc) + payload["records"] = [e for e in payload["records"] if e.get("record_name") != name] + payload["total_records"] = len(payload["records"]) + _save_index(payload) + if self.session.saved_as == name: + self.session.saved_as = "" + return {"deleted": name, "total_records": payload["total_records"]} + + def play_record(self, name: str, file_kind: str = "speaker") -> dict[str, Any]: + """Play a saved WAV. file_kind = 'speaker' or 'raw'.""" + entry = self.find_record(name) + file_key = "speaker_recording" if file_kind == "speaker" else "gemini_raw_output" + path = _resolve_record_path(entry["files"][file_key]["path"]) + if not path.exists(): + raise FileNotFoundError(str(path)) + with wave.open(str(path), "rb") as wf: + channels = wf.getnchannels() + sample_width = wf.getsampwidth() + sample_rate = wf.getframerate() + frames = wf.readframes(wf.getnframes()) + with self._play_lock: + if self.audio_mgr and self.audio_mgr.pya: + stream = self.audio_mgr.pya.open( + format=self.audio_mgr.pya.get_format_from_width(sample_width), + channels=channels, rate=sample_rate, + output=True, frames_per_buffer=CHUNK_SIZE, + ) + try: + chunk = CHUNK_SIZE * channels * sample_width + for offset in range(0, len(frames), chunk): + stream.write(frames[offset:offset + chunk]) + finally: + stream.stop_stream() + stream.close() + return { + "ok": True, "record_name": name, "file_kind": file_kind, + "duration_sec": round(audio_duration_seconds( + frames, sample_rate, channels, sample_width), 3), + } + + # ── status ─────────────────────────────────────────────────── + def status(self) -> dict[str, Any]: + return { + "voice_client_connected": bool( + self.voice_client and self.voice_client.connected), + "audio_mgr_ready": bool(self.audio_mgr and self.audio_mgr.pya), + "capture_backend": (self._monitor_config or {}).get("backend", "none"), + "records_dir": str(AUDIO_RECORDINGS_DIR), + "session": self.session.as_status(), + "total_records": len(_load_index().get("records", [])), + } diff --git a/vendor/Sanad/voice/voice_config.json b/vendor/Sanad/voice/voice_config.json new file mode 100644 index 0000000..00e151d --- /dev/null +++ b/vendor/Sanad/voice/voice_config.json @@ -0,0 +1,75 @@ +{ + "_description": "Tunables for voice/* modules. Loaded via core.config_loader.load('voice').", + + "sanad_voice": { + "_comment": "voice/sanad_voice.py — main live voice subprocess. Gemini API credentials (api_key, model, voice_name) come from core_config.json's gemini_defaults — single source of truth.", + "mic_gain": 1.0, + "play_chunk_bytes": 96000, + "log_dir": "~/logs", + "log_name": "gemini_live_v2", + "session_timeout_sec": 660, + "max_reconnect_delay_sec": 30, + "max_consecutive_errors": 10, + "no_messages_timeout_sec": 30 + }, + + "mic_udp": { + "_comment": "G1 built-in mic — UDP multicast subscriber", + "group": "239.168.123.161", + "port": 5555, + "buffer_max_bytes": 64000, + "read_timeout_sec": 0.04, + "socket_timeout_sec": 1.0 + }, + + "speaker": { + "_comment": "G1 built-in speaker — AudioClient.PlayStream wrapper", + "app_name": "sanad", + "begin_stream_pause_sec": 0.15, + "wait_finish_margin_sec": 0.3 + }, + + "vad": { + "_comment": "Gemini Live server-side voice-activity-detection config", + "start_sensitivity": "START_SENSITIVITY_HIGH", + "end_sensitivity": "END_SENSITIVITY_LOW", + "prefix_padding_ms": 20, + "silence_duration_ms": 200 + }, + + "barge_in": { + "threshold": 500, + "loud_chunks_needed": 3, + "cooldown_sec": 0.3, + "echo_suppress_below": 500, + "ai_speak_grace_sec": 0.15 + }, + + "recording": { + "enabled": true, + "dir_relative": "data/recordings" + }, + + "typed_replay": { + "_comment": "voice/typed_replay.py — max_text_len comes from dashboard.api_input", + "monitor_chunk_size": 512, + "monitor_tail_sec": 0.2 + }, + + "live_voice_loop": { + "_comment": "voice/live_voice_loop.py — arm phrase dispatcher. arm_txt filename comes from core.script_files.arm_phrases", + "trigger_log_size": 100, + "poll_interval_sec": 0.1, + "deferred_default": false, + "trigger_enabled_default": false + }, + + "local_tts": { + "_comment": "voice/local_tts.py — offline Coqui TTS", + "model_subdir": "speecht5_tts_clartts_ar", + "vocoder_subdir": "speecht5_hifigan", + "xvector_filename": "arabic_xvector_embedding.pt", + "sample_rate": 16000, + "channels": 1 + } +} diff --git a/vendor/Sanad/voice/wake_phrase_manager.py b/vendor/Sanad/voice/wake_phrase_manager.py new file mode 100644 index 0000000..dfe6de5 --- /dev/null +++ b/vendor/Sanad/voice/wake_phrase_manager.py @@ -0,0 +1,140 @@ +"""Wake-phrase registry. + +Maps wake phrases (e.g. "hey sanad") → skill / callback IDs. Phrases are +persisted in data/wake_phrases.json so dashboard edits survive restart. + +This module is deliberately lightweight — it only *stores* the mapping. +Matching a transcript against the registered phrases is done in +`voice/text_utils.match_phrase`, and the actual trigger is orchestrated +by `core/brain.py`'s skill registry. +""" + +from __future__ import annotations + +import json +import threading +from dataclasses import asdict, dataclass, field +from pathlib import Path +from typing import Any + +from Project.Sanad.core.logger import get_logger +from Project.Sanad.config import DATA_DIR + +log = get_logger("wake_phrase_manager") + +STATE_PATH = DATA_DIR / "wake_phrases.json" + + +@dataclass +class WakePhrase: + """A single wake phrase → action mapping.""" + phrase: str + action_id: str + enabled: bool = True + + def normalized(self) -> str: + return self.phrase.strip().lower() + + +class WakePhraseManager: + """Thread-safe in-memory store for wake phrases, persisted to disk.""" + + def __init__(self, path: Path = STATE_PATH): + self.path = Path(path) + self._lock = threading.Lock() + self._phrases: list[WakePhrase] = [] + self._load() + + # ── persistence ────────────────────────────────────────────────── + def _load(self) -> None: + if not self.path.exists(): + return + try: + data = json.loads(self.path.read_text(encoding="utf-8")) + except (json.JSONDecodeError, OSError) as exc: + log.warning("wake_phrases.json unreadable: %s", exc) + return + with self._lock: + self._phrases = [ + WakePhrase(**{k: v for k, v in d.items() if k in WakePhrase.__annotations__}) + for d in data if isinstance(d, dict) and "phrase" in d + ] + log.info("Loaded %d wake phrase(s)", len(self._phrases)) + + def _save(self) -> None: + self.path.parent.mkdir(parents=True, exist_ok=True) + tmp = self.path.with_suffix(self.path.suffix + ".tmp") + tmp.write_text( + json.dumps([asdict(p) for p in self._phrases], indent=2, ensure_ascii=False), + encoding="utf-8", + ) + tmp.replace(self.path) + + # ── CRUD ───────────────────────────────────────────────────────── + def list(self) -> list[dict[str, Any]]: + with self._lock: + return [asdict(p) for p in self._phrases] + + def add(self, phrase: str, action_id: str) -> dict[str, Any]: + norm = phrase.strip().lower() + if not norm: + raise ValueError("phrase cannot be empty") + with self._lock: + for p in self._phrases: + if p.normalized() == norm and p.action_id == action_id: + return asdict(p) + wp = WakePhrase(phrase=phrase.strip(), action_id=action_id) + self._phrases.append(wp) + self._save() + return asdict(wp) + + def remove(self, phrase: str, action_id: str | None = None) -> bool: + norm = phrase.strip().lower() + with self._lock: + before = len(self._phrases) + self._phrases = [ + p for p in self._phrases + if not (p.normalized() == norm and + (action_id is None or p.action_id == action_id)) + ] + removed = before != len(self._phrases) + if removed: + self._save() + return removed + + def set_enabled(self, phrase: str, action_id: str, enabled: bool) -> bool: + norm = phrase.strip().lower() + with self._lock: + for p in self._phrases: + if p.normalized() == norm and p.action_id == action_id: + p.enabled = enabled + self._save() + return True + return False + + def for_action(self, action_id: str) -> set[str]: + """Return all enabled phrases registered for an action.""" + with self._lock: + return { + p.normalized() for p in self._phrases + if p.action_id == action_id and p.enabled + } + + def action_phrase_map(self) -> dict[str, set[str]]: + """Return {action_id: {phrases}} for all enabled entries.""" + result: dict[str, set[str]] = {} + with self._lock: + for p in self._phrases: + if p.enabled: + result.setdefault(p.action_id, set()).add(p.normalized()) + return result + + # ── status ─────────────────────────────────────────────────────── + def status(self) -> dict[str, Any]: + with self._lock: + enabled = sum(1 for p in self._phrases if p.enabled) + return { + "total": len(self._phrases), + "enabled": enabled, + "actions": sorted({p.action_id for p in self._phrases}), + } diff --git a/vendor/sanad_pkg/__init__.py b/vendor/sanad_pkg/__init__.py new file mode 100644 index 0000000..c7973ba --- /dev/null +++ b/vendor/sanad_pkg/__init__.py @@ -0,0 +1,15 @@ +"""sanad_pkg — shared library baked into the `sanad-base` Docker image. + +Modules: + license Offline Ed25519 license verification + entitlement. + license_check CLI gate run by every package entrypoint (`python -m sanad_pkg.license_check P1`). + bus EventBus shim — preserves Sanad's core/event_bus API, ZMQ-backed + across containers when pyzmq + SANAD_BUS_ADDR are present, else + an in-process fallback (identical API). + +Kept Python-3.8 compatible (the on-robot conda env) — no match-statements, +no PEP-604 unions in annotations. +""" + +__all__ = ["license", "bus"] +__version__ = "0.1.0" diff --git a/vendor/sanad_pkg/bus.py b/vendor/sanad_pkg/bus.py new file mode 100644 index 0000000..b4b8f41 --- /dev/null +++ b/vendor/sanad_pkg/bus.py @@ -0,0 +1,180 @@ +"""EventBus shim — drop-in for Sanad's core/event_bus.py API across containers. + +Same surface as the in-process bus (`on` / `off` / `emit` / `emit_sync`) so +existing Sanad call-sites change only their *import*, not their logic. When +`pyzmq` is importable AND `SANAD_BUS_ADDR` is set, events are also published to +/ received from the central `sanad-busd` XPUB/XSUB proxy, so handlers in OTHER +containers fire too. Otherwise it degrades to a pure in-process bus (identical +behavior to today's monolith) — which is all P1-standalone needs. + +Wire format on ZMQ: multipart [topic_bytes, json(kwargs)+_origin]. Each process +tags messages with a random origin id and ignores its own echoes. + +Env: + SANAD_BUS_PUB address this process PUBLISHES to (default tcp://127.0.0.1:5560) + SANAD_BUS_SUB address this process SUBSCRIBES from (default tcp://127.0.0.1:5561) + SANAD_BUS_ADDR if set (any value), enable ZMQ mode using the two above + +Kept Python-3.8 compatible. +""" +from __future__ import annotations + +import asyncio +import json +import os +import threading +import uuid +from collections import defaultdict +from typing import Any, Callable, Dict, List + +try: + import zmq # type: ignore + _HAVE_ZMQ = True +except Exception: + _HAVE_ZMQ = False + +try: + # reuse Sanad's logger when running inside the image; fall back to print + from Project.Sanad.core.logger import get_logger # type: ignore + _log = get_logger("sanad_bus", to_console=False) +except Exception: # pragma: no cover + class _P(object): + def __getattr__(self, _n): + return lambda *a, **k: None + _log = _P() + + +class Bus(object): + def __init__(self) -> None: + self._lock = threading.Lock() + self._listeners = defaultdict(list) # type: Dict[str, List[Callable]] + self._origin = uuid.uuid4().hex + self._zmq_enabled = False + self._pub = None + self._ctx = None + self._sub_thread = None + self._stop = threading.Event() + + # ── pub/sub registration (same as core/event_bus.EventBus) ── + def on(self, event: str, callback: Callable) -> None: + with self._lock: + self._listeners[event].append(callback) + + def off(self, event: str, callback: Callable) -> None: + with self._lock: + try: + self._listeners[event].remove(callback) + except ValueError: + pass + + async def emit(self, event: str, **kwargs: Any) -> None: + self._publish(event, kwargs) + await self._dispatch_async(event, kwargs) + + def emit_sync(self, event: str, **kwargs: Any) -> None: + self._publish(event, kwargs) + self._dispatch_sync(event, kwargs) + + # ── local dispatch (mirrors core/event_bus semantics) ── + def _dispatch_sync(self, event: str, kwargs: Dict[str, Any]) -> None: + with self._lock: + handlers = list(self._listeners.get(event, [])) + for h in handlers: + try: + if asyncio.iscoroutinefunction(h): + try: + loop = asyncio.get_running_loop() + loop.create_task(h(**kwargs)) + except RuntimeError: + _log.warning("async handler for %s dropped (no loop)", event) + continue + res = h(**kwargs) + if asyncio.iscoroutine(res): + try: + asyncio.get_running_loop().create_task(res) + except RuntimeError: + res.close() + except Exception: + _log.exception("handler for %s failed", event) + + async def _dispatch_async(self, event: str, kwargs: Dict[str, Any]) -> None: + with self._lock: + handlers = list(self._listeners.get(event, [])) + for h in handlers: + try: + res = h(**kwargs) + if asyncio.iscoroutine(res): + await res + except Exception: + _log.exception("handler for %s failed", event) + + # ── ZMQ transport (optional) ── + def connect(self) -> bool: + """Enable cross-container mode. Safe to call once at startup; no-op if + pyzmq missing or SANAD_BUS_ADDR unset. Returns True if ZMQ is active.""" + if self._zmq_enabled: + return True + if not _HAVE_ZMQ or not os.environ.get("SANAD_BUS_ADDR"): + _log.info("bus: in-process mode (zmq=%s, addr=%s)", + _HAVE_ZMQ, bool(os.environ.get("SANAD_BUS_ADDR"))) + return False + pub_addr = os.environ.get("SANAD_BUS_PUB", "tcp://127.0.0.1:5560") + sub_addr = os.environ.get("SANAD_BUS_SUB", "tcp://127.0.0.1:5561") + try: + self._ctx = zmq.Context.instance() + self._pub = self._ctx.socket(zmq.PUB) + self._pub.connect(pub_addr) + self._sub_thread = threading.Thread( + target=self._sub_loop, args=(sub_addr,), daemon=True) + self._sub_thread.start() + self._zmq_enabled = True + _log.info("bus: ZMQ mode pub=%s sub=%s origin=%s", + pub_addr, sub_addr, self._origin[:8]) + return True + except Exception: + _log.exception("bus: ZMQ connect failed — staying in-process") + return False + + def _publish(self, event: str, kwargs: Dict[str, Any]) -> None: + if not self._zmq_enabled or self._pub is None: + return + try: + body = dict(kwargs) + body["_origin"] = self._origin + self._pub.send_multipart( + [event.encode("utf-8"), json.dumps(body, default=str).encode("utf-8")]) + except Exception: + _log.exception("bus: publish %s failed", event) + + def _sub_loop(self, sub_addr: str) -> None: + sub = self._ctx.socket(zmq.SUB) + sub.connect(sub_addr) + sub.setsockopt(zmq.SUBSCRIBE, b"") # all topics; filter locally by listeners + while not self._stop.is_set(): + try: + if sub.poll(timeout=500): + topic, raw = sub.recv_multipart() + event = topic.decode("utf-8", "replace") + data = json.loads(raw.decode("utf-8", "replace")) + if data.pop("_origin", None) == self._origin: + continue # skip our own echo + self._dispatch_sync(event, data) + except Exception: + _log.exception("bus: sub loop error") + try: + sub.close(0) + except Exception: + pass + + def close(self) -> None: + self._stop.set() + for s in (self._pub,): + try: + if s is not None: + s.close(0) + except Exception: + pass + + +# singleton — `from sanad_pkg.bus import bus` +bus = Bus() diff --git a/vendor/sanad_pkg/license.py b/vendor/sanad_pkg/license.py new file mode 100644 index 0000000..c385a51 --- /dev/null +++ b/vendor/sanad_pkg/license.py @@ -0,0 +1,244 @@ +"""Offline Ed25519 license verification + entitlement for Sanad packages. + +A license file is JSON: + + { + "payload": { + "robot_id": "G1-SN-0001", + "machine_fingerprint": "", # optional; checked iff binding on + "packages": {"P1": true, "P2": false, "P3": true, "P4": false}, + "features": {"language": "ar", "multilingual": false, ...}, + "issued": "2026-06-01", + "expires": "2027-06-01" # optional; null = perpetual + }, + "sig": "" + } + +The vendor holds the Ed25519 private key; every image ships the public key. +Verification is fully OFFLINE (no network), suitable for a robot that may be +disconnected. + +Search order (highest first): + license : $SANAD_LICENSE else /etc/sanad/sanad.lic + pubkey : $SANAD_PUBKEY else /etc/sanad/pubkey.ed25519 + else /pubkey.ed25519 + +Env knobs: + SANAD_LICENSE_BIND=1 enforce machine_fingerprint == this machine + SANAD_LICENSE_DEV=1 if `cryptography` is missing, accept UNSIGNED licenses + (development only — never set on a shipped robot) + +Kept Python-3.8 compatible. +""" +from __future__ import annotations + +import base64 +import hashlib +import json +import os +from datetime import datetime, timezone +from pathlib import Path +from typing import Any, Dict, Optional, Tuple + +try: # optional — present in every shipped image, maybe not on a bare dev box + from cryptography.hazmat.primitives.asymmetric.ed25519 import Ed25519PublicKey + from cryptography.exceptions import InvalidSignature + _HAVE_CRYPTO = True +except Exception: # pragma: no cover + _HAVE_CRYPTO = False + + +# Keep IN SYNC with licensing/sign_license.py::canonical() +def canonical(payload: Dict[str, Any]) -> bytes: + """Deterministic byte serialization signed/verified on both sides.""" + return json.dumps( + payload, sort_keys=True, separators=(",", ":"), ensure_ascii=False + ).encode("utf-8") + + +def _default_license_path() -> Path: + return Path(os.environ.get("SANAD_LICENSE", "/etc/sanad/sanad.lic")) + + +def _default_pubkey_path() -> Path: + env = os.environ.get("SANAD_PUBKEY") + if env: + return Path(env) + etc = Path("/etc/sanad/pubkey.ed25519") + if etc.exists(): + return etc + return Path(__file__).resolve().parent / "pubkey.ed25519" + + +def machine_fingerprint(iface: Optional[str] = None) -> str: + """Stable per-robot id = sha256(eth0 MAC + /etc/machine-id). + + Binds a license to one G1 so a copied license fails on another machine. + Best-effort: missing inputs are simply omitted from the hash. + """ + iface = iface or os.environ.get("SANAD_DDS_INTERFACE", "eth0") + parts = [] + try: + mac = Path("/sys/class/net/%s/address" % iface).read_text().strip() + if mac: + parts.append(mac) + except Exception: + pass + for mid in ("/etc/machine-id", "/var/lib/dbus/machine-id"): + try: + v = Path(mid).read_text().strip() + if v: + parts.append(v) + break + except Exception: + pass + return hashlib.sha256("|".join(parts).encode("utf-8")).hexdigest() + + +def _load_pubkey(): + """Return an Ed25519PublicKey, or None if unavailable. + + pubkey file format: base64 of the raw 32-byte public key (one line), + or 64-char hex. Whitespace tolerated. + """ + if not _HAVE_CRYPTO: + return None + path = _default_pubkey_path() + raw_text = path.read_text().strip() + try: + if len(raw_text) == 64 and all(c in "0123456789abcdefABCDEF" for c in raw_text): + key_bytes = bytes.fromhex(raw_text) + else: + key_bytes = base64.b64decode(raw_text) + except Exception as exc: + raise ValueError("unreadable public key at %s: %s" % (path, exc)) + return Ed25519PublicKey.from_public_bytes(key_bytes) + + +class License(object): + """A loaded + verified (or rejected) license.""" + + def __init__(self, payload: Dict[str, Any], valid: bool, reason: str = ""): + self.payload = payload or {} + self.valid = valid + self.reason = reason + + # -- entitlement queries -- + def package(self, pkg: str) -> bool: + if not self.valid: + return False + return bool(self.payload.get("packages", {}).get(pkg, False)) + + def feature(self, name: str, default: Any = False) -> Any: + if not self.valid: + return default + return self.payload.get("features", {}).get(name, default) + + @property + def robot_id(self) -> str: + return str(self.payload.get("robot_id", "")) + + @property + def expires(self) -> Optional[str]: + return self.payload.get("expires") + + def summary(self) -> Dict[str, Any]: + pkgs = self.payload.get("packages", {}) if self.valid else {} + return { + "valid": self.valid, + "reason": self.reason, + "robot_id": self.robot_id, + "expires": self.expires, + "packages": {k: bool(v) for k, v in pkgs.items()}, + "features": self.payload.get("features", {}) if self.valid else {}, + } + + +def _check_expiry(payload: Dict[str, Any]) -> Tuple[bool, str]: + exp = payload.get("expires") + if not exp: + return True, "" + try: + # accept "YYYY-MM-DD" or full ISO + dt = datetime.fromisoformat(str(exp)) + if dt.tzinfo is None: + dt = dt.replace(tzinfo=timezone.utc) + except Exception: + return False, "unparseable expires=%r" % exp + if datetime.now(timezone.utc) > dt: + return False, "license expired %s" % exp + return True, "" + + +def load(path: Optional[str] = None) -> License: + """Load + fully verify the license. Never raises — returns an invalid + License with a `reason` on any failure (fail-closed).""" + lpath = Path(path) if path else _default_license_path() + if not lpath.exists(): + return License({}, False, "license file not found: %s" % lpath) + + try: + doc = json.loads(lpath.read_text(encoding="utf-8")) + except Exception as exc: + return License({}, False, "license JSON unreadable: %s" % exc) + + payload = doc.get("payload") + sig_b64 = doc.get("sig") + if not isinstance(payload, dict): + return License({}, False, "license missing 'payload'") + + # 1) signature + if _HAVE_CRYPTO: + if not sig_b64: + return License(payload, False, "license missing 'sig'") + try: + pub = _load_pubkey() + if pub is None: + return License(payload, False, "public key unavailable") + pub.verify(base64.b64decode(sig_b64), canonical(payload)) + except InvalidSignature: + return License(payload, False, "signature verification FAILED") + except Exception as exc: + return License(payload, False, "signature check error: %s" % exc) + else: + if os.environ.get("SANAD_LICENSE_DEV") == "1": + # dev box without cryptography — accept unsigned, but say so + pass + else: + return License(payload, False, + "cryptography unavailable and SANAD_LICENSE_DEV != 1") + + # 2) expiry + ok, reason = _check_expiry(payload) + if not ok: + return License(payload, False, reason) + + # 3) machine binding (optional) + if os.environ.get("SANAD_LICENSE_BIND") == "1": + want = payload.get("machine_fingerprint") + if want: + have = machine_fingerprint() + if want != have: + return License(payload, False, + "machine fingerprint mismatch (license bound to another robot)") + + return License(payload, True, "ok") + + +# module-level convenience (one cached load) +_CACHED = None # type: Optional[License] + + +def current(reload: bool = False) -> License: + global _CACHED + if _CACHED is None or reload: + _CACHED = load() + return _CACHED + + +def entitled(pkg: str) -> bool: + return current().package(pkg) + + +def feature(name: str, default: Any = False) -> Any: + return current().feature(name, default) diff --git a/vendor/sanad_pkg/license_check.py b/vendor/sanad_pkg/license_check.py new file mode 100644 index 0000000..cf7852c --- /dev/null +++ b/vendor/sanad_pkg/license_check.py @@ -0,0 +1,51 @@ +"""License gate run by every package entrypoint. + + python -m sanad_pkg.license_check P1 + +Exit codes: + 0 -> ENTITLED (proceed to launch the package) + 1 -> NOT entitled (entrypoint should exit the container cleanly, code 0) + 2 -> license error / unreadable (treated as not entitled) + +The entrypoint pattern (see Sanad_Package_*/entrypoint.sh): + + if ! python -m sanad_pkg.license_check "$PKG"; then + echo "[$PKG] not licensed — container will idle/exit"; exit 0 + fi +""" +from __future__ import annotations + +import sys + +from sanad_pkg import license as _lic + + +def main(argv=None) -> int: + argv = list(sys.argv[1:] if argv is None else argv) + if not argv: + sys.stderr.write("usage: python -m sanad_pkg.license_check \n") + return 2 + pkg = argv[0].strip().upper() + + lic = _lic.load() + summary = lic.summary() + if not lic.valid: + sys.stderr.write("[license] INVALID: %s\n" % summary["reason"]) + return 2 + + if lic.package(pkg): + sys.stdout.write( + "[license] %s ENTITLED (robot=%s, expires=%s)\n" + % (pkg, summary["robot_id"] or "?", summary["expires"] or "never") + ) + return 0 + + sys.stderr.write( + "[license] %s NOT entitled (entitled: %s)\n" + % (pkg, ", ".join(k for k, v in summary["packages"].items() if v) or "none") + ) + return 1 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/vendor/sanad_pkg/pubkey.ed25519 b/vendor/sanad_pkg/pubkey.ed25519 new file mode 100644 index 0000000..f622571 --- /dev/null +++ b/vendor/sanad_pkg/pubkey.ed25519 @@ -0,0 +1 @@ +ZOFerXRMTVQxkxsawjmGXJz8n5HmXfb8qLMhO/7DIC4=