Update 2026-06-17 18:58:35

This commit is contained in:
kassam 2026-06-17 18:58:36 +04:00
parent e43581b089
commit b71130dc50
133 changed files with 25152 additions and 148 deletions

25
.env.example Normal file
View File

@ -0,0 +1,25 @@
# Copy to .env — docker compose reads it automatically from this directory.
# Signed license for THIS robot (default = the bundled example, entitles P1).
SANAD_LICENSE_FILE=./license/sanad.lic.example
# Enforce machine-fingerprint binding (1 on a delivered robot). When 1, also
# uncomment the /etc/machine-id mount in docker-compose.yml so the in-container
# fingerprint matches the host.
SANAD_LICENSE_BIND=0
# Audio: builtin (G1 chest mic+speaker over DDS) | plugged (USB e.g. Anker via PulseAudio)
SANAD_AUDIO_PROFILE=builtin
# DDS interface to the G1 firmware.
SANAD_DDS_INTERFACE=eth0
# Conversation language (overrides the license `language` feature if set).
# e.g. ar, en, hi, ur, zh, ru, fr ... (any Gemini-supported language)
SANAD_LANGUAGE=
# Bundle the chest-audio Unitree SDK into the image at build time
# (1 = yes, default; 0 = leaner USB/plugged-only image).
WITH_UNITREE_SDK=1
# Base image (override only for a GPU build).
BASE_OS_IMAGE=python:3.10-slim-bookworm
# Image name/tag (e.g. a registry path for pull-and-run).
# SANAD_IMAGE=sanad-p1:latest

18
.gitignore vendored
View File

@ -1,4 +1,20 @@
# Python caches
__pycache__/
*.pyc
Logs/
# Logs
*.log
Logs/
# Customer license — NEVER commit a real signed license; ship only the example.
license/sanad.lic
# Runtime data (keep the seed structure + config; ignore generated media).
data/recordings/*
data/audio/*
data/faces/*
data/photos/*
!data/**/.gitkeep
# NOTE: ./vendor IS committed on purpose — it is the vendored Sanad engine that
# makes this package build standalone. Only its caches are ignored (above).

View File

@ -1,39 +1,98 @@
# syntax=docker/dockerfile:1
# ─────────────────────────────────────────────────────────────────────────────
# Sanad Package 1 — Basic Communication.
# BUILD CONTEXT MUST BE Packages/ (FROM the prebuilt sanad-base):
# docker build -f Sanad_Package_1/Dockerfile -t sanad-p1:latest .
# (the top-level Packages/docker-compose.yml sets context: . for this service.)
#
# SELF-CONTAINED: builds from a public base image with NO dependency on a
# `sanad-base` image or a sibling `Sanad/` / `sanad_pkg/` checkout. The Sanad
# engine is vendored at ./vendor/Sanad and the license/bus lib at
# ./vendor/sanad_pkg, so the package repo builds and runs entirely on its own.
#
# Build context MUST be THIS package directory:
# docker build -t sanad-p1:latest .
# (docker-compose.yml uses `context: .`. On a Jetson Docker without buildx:
# DOCKER_BUILDKIT=0 docker build -t sanad-p1:latest .)
# ─────────────────────────────────────────────────────────────────────────────
ARG BASE_IMAGE=sanad-base:latest
FROM ${BASE_IMAGE}
ARG BASE_OS_IMAGE=python:3.10-slim-bookworm
FROM ${BASE_OS_IMAGE}
# P1 (comms) extra system deps — PortAudio + a C toolchain so pyaudio's C
# extension compiles on the slim base (python:3.10-slim has no compiler).
ENV DEBIAN_FRONTEND=noninteractive \
PYTHONUNBUFFERED=1 \
PYTHONDONTWRITEBYTECODE=1 \
PYTHONPATH=/app
WORKDIR /app
# System deps: shared (audio) + P1 (PortAudio + a C toolchain so pyaudio's
# extension compiles on the slim base).
RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates libsndfile1 alsa-utils pulseaudio-utils \
portaudio19-dev libportaudio2 build-essential python3-dev \
&& rm -rf /var/lib/apt/lists/*
# P1 Python deps (sanad-base is python:3.10 → google-genai installs cleanly).
COPY Sanad_Package_1/requirements-p1.txt /tmp/requirements-p1.txt
RUN python3 -m pip install --no-cache-dir -r /tmp/requirements-p1.txt
# Python deps (base + P1 merged).
COPY requirements.txt /tmp/requirements.txt
RUN python3 -m pip install --no-cache-dir --upgrade pip \
&& python3 -m pip install --no-cache-dir -r /tmp/requirements.txt
# P1 launcher + routes + entrypoint + config (Sanad source baked into sanad-base).
COPY Sanad_Package_1/app_p1.py /app/app_p1.py
COPY Sanad_Package_1/routes_p1.py /app/routes_p1.py
COPY Sanad_Package_1/entrypoint.sh /app/entrypoint.sh
COPY Sanad_Package_1/config /app/pkg1_config
COPY Sanad_Package_1/static /app/pkg1_static
# ── Optional: Unitree SDK — G1 chest (builtin) audio over DDS ─────────────────
# WITH_UNITREE_SDK=1 (default) builds CycloneDDS + installs unitree_sdk2_python so
# the chest mic/speaker work out of the box. Wrapped so a failure NEVER breaks the
# image — chest audio is then unavailable (use SANAD_AUDIO_PROFILE=plugged); USB
# (plugged) audio always works without the SDK. Set =0 for a leaner image.
# NOTE: build the FULL CycloneDDS (do NOT pass -DBUILD_IDLC=NO) — the `cyclonedds`
# Python binding's find_package(CycloneDDS) needs idlc, else it fails with
# "Could not locate cyclonedds". Pin the binding to match the 0.10.x C library.
ARG WITH_UNITREE_SDK=1
ENV CYCLONEDDS_HOME=/usr/local \
LD_LIBRARY_PATH=/usr/local/lib
RUN if [ "$WITH_UNITREE_SDK" = "1" ]; then \
( set -eux; \
apt-get update; \
apt-get install -y --no-install-recommends git cmake build-essential; \
git clone --depth 1 -b releases/0.10.x https://github.com/eclipse-cyclonedds/cyclonedds /tmp/cyclonedds; \
cmake -S /tmp/cyclonedds -B /tmp/cyclonedds/build -DCMAKE_INSTALL_PREFIX=/usr/local; \
cmake --build /tmp/cyclonedds/build --target install -j"$(nproc)"; \
CYCLONEDDS_HOME=/usr/local CMAKE_PREFIX_PATH=/usr/local python3 -m pip install --no-cache-dir "cyclonedds==0.10.2"; \
git clone --depth 1 https://github.com/unitreerobotics/unitree_sdk2_python /opt/unitree_sdk2_python; \
python3 -m pip install --no-cache-dir -e /opt/unitree_sdk2_python; \
python3 -c "import unitree_sdk2py; print('unitree_sdk2py OK')"; \
rm -rf /tmp/cyclonedds /var/lib/apt/lists/*; \
) || echo "WARN[P1]: Unitree SDK build failed — chest (builtin) audio unavailable; use SANAD_AUDIO_PROFILE=plugged"; \
else echo "WITH_UNITREE_SDK=0 — skipping Unitree SDK (USB/plugged audio only)"; fi
# License/bus shim + PUBLIC verification key (vendored — no sanad_pkg sibling).
COPY vendor/sanad_pkg /app/sanad_pkg
RUN mkdir -p /etc/sanad && cp /app/sanad_pkg/pubkey.ed25519 /etc/sanad/pubkey.ed25519
# Canonical Sanad engine (vendored — no Sanad/ sibling, no sanad-base).
COPY vendor/Sanad /app/Sanad
# P1 launcher + routes + entrypoint + config + static.
COPY app_p1.py /app/app_p1.py
COPY routes_p1.py /app/routes_p1.py
COPY entrypoint.sh /app/entrypoint.sh
COPY config /app/pkg1_config
COPY static /app/pkg1_static
RUN chmod +x /app/entrypoint.sh
# Ship KEYLESS — strip any Gemini key baked into the Sanad config so the vendor
# key never ships in the image; the customer adds their own via the dashboard.
COPY Sanad_Package_1/strip_key.py /tmp/strip_key.py
# Ship KEYLESS — blank any Gemini key baked into the vendored Sanad config so the
# vendor key never ships; the customer adds their own via the dashboard.
COPY strip_key.py /tmp/strip_key.py
RUN python3 /tmp/strip_key.py && rm -f /tmp/strip_key.py
# Sanity: the vendored namespace imports cleanly.
RUN python3 - <<'PY'
import importlib.util as u, sys
mods = ("sanad_pkg.license", "sanad_pkg.bus", "Sanad")
ok = all(u.find_spec(m) for m in mods)
print("P1 self-contained: vendored modules importable:", ok)
sys.exit(0 if ok else 1)
PY
ENV SANAD_PACKAGE=P1 \
SANAD_DASHBOARD_PORT=8011 \
SANAD_DASHBOARD_HOST=0.0.0.0 \
SANAD_P1_STATIC=/app/pkg1_static
SANAD_P1_STATIC=/app/pkg1_static \
SANAD_LICENSE=/etc/sanad/sanad.lic \
SANAD_PUBKEY=/etc/sanad/pubkey.ed25519
EXPOSE 8011
ENTRYPOINT ["/app/entrypoint.sh"]

View File

@ -9,8 +9,8 @@ Roles:
- **Vendor side** (your workstation) — holds the Ed25519 **private** key
(`licensing/privkey.ed25519`) and signs licenses. Never goes on the robot.
- **Robot side** (the new Jetson) — runs the container; carries only the signed
`sanad.lic`. The **public** key is already baked into every image
(`sanad-base` copies it to `/etc/sanad/pubkey.ed25519`).
`sanad.lic`. The **public** key is already baked into the image (the Dockerfile
copies `vendor/sanad_pkg/pubkey.ed25519` to `/etc/sanad/pubkey.ed25519`).
---
@ -24,38 +24,36 @@ sudo usermod -aG docker "$USER"
# Optional: plug a USB speaker/mic (Anker) if you'll use the `plugged` profile.
```
## 1. Get the code onto the robot (so it can build)
## 1. Get the package onto the robot (so it can build)
`sanad-base` bakes the canonical `Sanad/` source, so the build needs **both**
`Sanad/` and `Packages/` present. From the workstation:
P1 is **self-contained** — it vendors the Sanad engine under `Sanad_Package_1/vendor`,
so you only copy the **package folder** (no `Sanad/` sibling, no `sanad-base`).
From the workstation:
```bash
rsync -az --exclude __pycache__ \
Project/Sanad Project/Packages \
Project/Packages/Sanad_Package_1 \
unitree@<NEWROBOT>:~/sanad_deploy/
```
This lays out `~/sanad_deploy/Sanad` + `~/sanad_deploy/Packages` (siblings — the
compose file mounts `../Sanad/data`, so they must sit side by side).
Everything P1 needs to build and run lives under `~/sanad_deploy/Sanad_Package_1`.
## 2. Build the images on the robot
## 2. Build the image on the robot
```bash
cd ~/sanad_deploy/Packages
# If this Docker has buildx (modern):
docker compose --profile base build # shared base (incl. chest-audio SDK)
docker compose --profile p1 build # P1 image
# If this Docker has NO buildx (e.g. Jetson Docker 26.x) use legacy build:
DOCKER_BUILDKIT=0 docker build -f sanad-base/Dockerfile -t sanad-base:latest .. # context = ~/sanad_deploy
DOCKER_BUILDKIT=0 docker build -f Sanad_Package_1/Dockerfile -t sanad-p1:latest . # context = Packages/
cd ~/sanad_deploy/Sanad_Package_1
# Modern Docker (buildx):
docker compose build
# Jetson Docker without buildx:
DOCKER_BUILDKIT=0 docker build -t sanad-p1:latest .
```
The build vendors the engine and (by default, `WITH_UNITREE_SDK=1`) compiles the
chest-audio SDK — first build takes a few minutes; later builds are cached.
> **Alternative — registry (build once, deploy to many robots):** on an x86 box
> with buildx + QEMU run
> `SANAD_REGISTRY=<reg>/ SANAD_TAG=1.0.0 PUSH=1 ./scripts/build_and_push.sh base p1`,
> then on each robot set `SANAD_REGISTRY`/`SANAD_TAG` in `.env` and
> `docker compose --profile p1 pull`.
> **Alternative — registry (build once, deploy to many robots):** build the
> `linux/arm64` image on an x86 box with buildx + QEMU, push to a registry, then
> on each robot set `SANAD_IMAGE=<reg>/sanad-p1:<tag>` in `.env` and
> `docker compose pull` instead of building.
## 3. License THIS robot
@ -74,8 +72,8 @@ the host's machine-id (step 4 does this) and compute the fingerprint the same wa
1. **Read the new robot's fingerprint on the host** (matches what the container
will see once `/etc/machine-id` is mounted):
```bash
cd ~/sanad_deploy/Packages
PYTHONPATH=. python3 -c 'from sanad_pkg import license as L; print(L.machine_fingerprint())'
cd ~/sanad_deploy/Sanad_Package_1
PYTHONPATH=vendor python3 -c 'from sanad_pkg import license as L; print(L.machine_fingerprint())'
```
2. **On the workstation**, write `claims.json`:
```json
@ -94,34 +92,34 @@ the host's machine-id (step 4 does this) and compute the fingerprint the same wa
--key licensing/privkey.ed25519 --in claims.json --out sanad.lic
```
(First time only, if no keypair yet: `python licensing/sign_license.py gen-keys --out-dir licensing`
— then rebuild images so the new `pubkey.ed25519` is baked in.)
in the monorepo, then run `./sync_vendor.sh` so the new `pubkey.ed25519` is
vendored into the package, and rebuild the image so it's baked in.)
4. **Copy to the robot:**
```bash
scp sanad.lic unitree@<NEWROBOT>:~/sanad_deploy/Packages/licensing/sanad.lic
scp sanad.lic unitree@<NEWROBOT>:~/sanad_deploy/Sanad_Package_1/license/sanad.lic
```
## 4. Configure `.env` and run
On the robot, `cd ~/sanad_deploy/Packages` and create `.env` from `.env.example`:
On the robot, `cd ~/sanad_deploy/Sanad_Package_1` and create `.env` from `.env.example`:
```ini
SANAD_LICENSE_FILE=./licensing/sanad.lic # the one you signed (3b) — or sanad.lic.example
SANAD_LICENSE_BIND=1 # 1 to enforce the fingerprint; 0 = unbound
SANAD_AUDIO_PROFILE=builtin # chest mic+speaker | plugged for USB/Anker
SANAD_LANGUAGE=ar # optional; else license feature / persona
SANAD_LICENSE_FILE=./license/sanad.lic # the one you signed (3b) — or sanad.lic.example
SANAD_LICENSE_BIND=1 # 1 to enforce the fingerprint; 0 = unbound
SANAD_AUDIO_PROFILE=builtin # chest mic+speaker | plugged for USB/Anker
SANAD_LANGUAGE=ar # optional; else license feature / persona
```
**If using a bound license (3b), add the host machine-id mount** to the `p1`
service in `docker-compose.yml` (under `volumes:`) so the container's fingerprint
matches the host's:
**If using a bound license (3b), uncomment the host machine-id mount** in the `p1`
service of `docker-compose.yml` so the container's fingerprint matches the host's:
```yaml
- "/etc/machine-id:/etc/machine-id:ro"
```
Then:
```bash
docker compose --profile p1 up -d
docker compose --profile p1 logs -f p1 # should print "[P1] entitled — lang=… port=8011"
docker compose up -d
docker compose logs -f # should print "[P1] entitled — lang=… port=8011"
```
Dashboard: **http://&lt;NEWROBOT&gt;:8011**
@ -152,17 +150,15 @@ through the chest (or USB) speaker.
> enabled at boot. Pick **one** of the two options below.
### Option A — Docker-native (simplest, no extra files)
The compose file already sets `restart: unless-stopped`, so you only need the
Docker daemon to start at boot:
```bash
# 1) make the daemon start at boot:
sudo systemctl enable docker
# 2) make P1 survive reboot + crash (overrides the on-failure policy on the
# live container — no compose edit needed):
docker update --restart unless-stopped sanad-p1
docker compose up -d # once; the container then returns on every boot
```
(Equivalent permanent form: change the `p1` service's `restart: on-failure`
`restart: unless-stopped` in `docker-compose.yml`, then `docker compose --profile
p1 up -d`.) After this, `docker compose --profile p1 up -d` once and the container
returns on every boot until you explicitly `docker compose --profile p1 down`.
(If the policy was ever switched back to `on-failure`, restore boot-survival with
`docker update --restart unless-stopped sanad-p1`.) The container keeps coming
back until you explicitly `docker compose down`.
### Option B — systemd unit (clean start/stop/status, mirrors the Sanad unit)
Create `/etc/systemd/system/sanad-p1.service` (adjust `User=` and the path to
@ -177,9 +173,9 @@ Wants=network-online.target
[Service]
Type=simple
User=unitree
WorkingDirectory=/home/unitree/sanad_deploy/Packages
ExecStart=/usr/bin/docker compose --profile p1 up
ExecStop=/usr/bin/docker compose --profile p1 down
WorkingDirectory=/home/unitree/sanad_deploy/Sanad_Package_1
ExecStart=/usr/bin/docker compose up
ExecStop=/usr/bin/docker compose down
Restart=on-failure
RestartSec=5
TimeoutStopSec=30

View File

@ -4,6 +4,34 @@ Hands-free conversation in **one operator-selected language** (Gemini Live),
audio via the **G1 chest** or **any plugged USB mic/speaker (Anker)**. **No**
voice-command motion, vision, recognition, or navigation. Dashboard on **:8011**.
This package is **self-contained** — it vendors the Sanad engine under `vendor/`,
so a clone of this folder builds and runs with no sibling folders or `sanad-base`.
## Quick start (how to start)
On the robot, **from this folder** (`Sanad_Package_1/`) — nothing else needed:
```bash
cp .env.example .env # optional: set language / audio / license path
docker compose up -d --build # build (vendored engine) + run
# Jetson Docker without buildx: DOCKER_BUILDKIT=0 docker compose up -d --build
```
Open **http://&lt;robot-ip&gt;:8011** and (the image ships **keyless**):
1. **Gemini API key** card → paste your key.
2. **Persona** card → set who the robot is + the language/dialect it speaks (saving restarts the live session).
3. **Audio** card → pick chest vs USB/Anker speaker, volume, mute.
4. Press **Start** in the **Conversation** card and talk.
Manage it:
```bash
docker compose logs -f # live logs
docker compose down # stop
./test_p1.sh <robot-ip>:8011 # smoke test (expect 11/11 PASS)
```
- **Auto-start on boot:** `sudo systemctl enable docker` — compose already runs P1
`restart: unless-stopped`, so it returns after a reboot.
- **A brand-new G1** (build, sign a license, bind to the robot) → **`NEW_ROBOT_SETUP.md`**.
- **No Docker?** dev mode in the `gemini_sdk` conda env → `./p1ctl.sh start` (runs against `./vendor`).
## What it ships
- `app_p1.py` — launcher: bootstraps the `Project.Sanad` namespace, constructs
ONLY the comms subsystems (`brain`, `audio_mgr`, `voice_client`, `local_tts`,
@ -13,38 +41,48 @@ voice-command motion, vision, recognition, or navigation. Dashboard on **:8011**
the logs websocket. Serves the real Sanad SPA with non-P1 tabs hidden.
- `entrypoint.sh` — license gate (`license_check P1`; clean exit if unlicensed),
resolves language/audio/port (env > license feature > `config/p1_config.json`).
- `Dockerfile` / `requirements-p1.txt``FROM sanad-base`, adds PortAudio +
`google-genai`.
- `Dockerfile` / `requirements.txt`**SELF-CONTAINED**: `FROM python:3.10-slim`,
installs all deps, and bakes the vendored engine — **no `sanad-base`**.
- `vendor/Sanad` + `vendor/sanad_pkg` — the vendored Sanad engine + license/bus
lib. Refresh from a monorepo checkout with `./sync_vendor.sh`.
- `config/p1_config.json` — defaults (language, audio profile, port, tab set).
- `docker-compose.p1.yml` — standalone run; top-level compose wires `--profile p1`.
- `docker-compose.yml` — standalone build + run (`context: .`); the top-level
`Packages/docker-compose.yml` can still run P1 in the fleet via `--profile p1`.
- `license/` — public verification key + an example license for the default mount.
It does **not** fork Sanad — it reuses the canonical source baked into
`sanad-base`.
It does **not** fork Sanad — it **vendors** the canonical source under `vendor/`
(re-synced by `sync_vendor.sh`), so the package builds and runs entirely on its
own with **no sibling folders**.
## Run & stop P1
**A) Docker (the productized way)** — from `Project/Packages` on the robot:
**A) Docker (the productized way)** — **self-contained**; from this package dir
(`Sanad_Package_1/`) on the robot, nothing else required:
```bash
docker compose --profile base build # build sanad-base once
docker compose --profile p1 up -d --build # run -> http://<robot>:8011
docker compose --profile p1 logs -f p1 # view logs
docker compose --profile p1 down # stop
# audio: SANAD_AUDIO_PROFILE=builtin (chest) | plugged (USB/Anker)
# language: license `language` feature, or SANAD_LANGUAGE=en docker compose --profile p1 up -d
docker compose up -d --build # build (vendored engine) + run -> http://<robot>:8011
docker compose logs -f # view logs
docker compose down # stop
# Jetson Docker without buildx: DOCKER_BUILDKIT=0 docker compose up -d --build
# audio: SANAD_AUDIO_PROFILE=builtin (chest) | plugged (USB/Anker)
# language: license `language` feature, or SANAD_LANGUAGE=en docker compose up -d --build
```
No `sanad-base` image and no sibling `Sanad/` checkout are needed — the engine is
vendored under `vendor/`. (The top-level `Packages/docker-compose.yml` can still
run P1 in the multi-package fleet via `--profile p1`.)
**B) Dev mode (no Docker)** — run P1 in the robot's `gemini_sdk` conda env via the
control script (deployed to `~/sanad_deploy/Packages/Sanad_Package_1/p1ctl.sh`):
control script. It runs against the vendored engine in `./vendor`, so only the
package folder is needed:
```bash
cd ~/sanad_deploy/Packages/Sanad_Package_1
./p1ctl.sh start # launch on :8011 (coexists with Sanad on :8000)
cd ~/sanad_deploy/Sanad_Package_1
./p1ctl.sh start # launch on :8011 (runs against ./vendor)
./p1ctl.sh status # process + /api/health
./p1ctl.sh logs 80 # tail the P1 log
./p1ctl.sh restart
./p1ctl.sh stop
```
Deploy/update from the workstation first:
`rsync -az --exclude __pycache__ Project/Packages Project/Sanad unitree@<robot>:~/sanad_deploy/`
`rsync -az --exclude __pycache__ Project/Packages/Sanad_Package_1 unitree@<robot>:~/sanad_deploy/`
**Logs:** the dashboard's **Logs** card streams live (`/ws/logs`) and the **⬇ Download**
button saves the full bundle (`/api/logs/bundle`) as `sanad_p1_logs_<ts>.txt`.
@ -113,13 +151,15 @@ of `SANAD_GEMINI_API_KEY` env and `core_config.json`).
## Plug-and-play status
- **Base:** `python:3.10-slim` (multi-arch) → `google-genai` installs cleanly, no
CUDA needed. Build on the Jetson (or x86) with `docker compose --profile base build`.
CUDA needed. Build on the Jetson (or x86) with `docker compose up -d --build`.
- **Works out of the box** with a plugged USB speaker/mic. The entrypoint runs a
**preflight** (python / google-genai / pyaudio / Unitree-SDK / audio profile)
and prints clear guidance if something's missing.
- **Language** is set via the **Persona** card (put the dialect/language directive
in the system prompt — saving applies it to the live session immediately).
- **Pending for true "pull-and-run":** prebuilt `linux/arm64` image in a registry;
bundling `unitree_sdk2_python` for turnkey chest (`builtin`) audio (today: use
`plugged`, or mount the SDK). In a multi-package deployment, audio output later
routes through the `Sanad_Core` hwbroker audio-lock (P1 standalone speaks directly).
- **Self-contained:** the Sanad engine is vendored under `vendor/` and chest-audio
`unitree_sdk2_python` is built into the image (`WITH_UNITREE_SDK=1`), so a clone
of this repo builds and runs with no sibling folders. For pull-and-run at fleet
scale, publish the `linux/arm64` image to a registry and `docker compose pull`.
In a multi-package deployment, audio output can route through the `Sanad_Core`
hwbroker audio-lock (P1 standalone speaks directly).

0
data/audio/.gitkeep Normal file
View File

1
data/audio_device.json Normal file
View File

@ -0,0 +1 @@
{}

5
data/camera_device.json Normal file
View File

@ -0,0 +1,5 @@
{
"profile_serial_assignments": {
"realsense_primary": ""
}
}

0
data/faces/.gitkeep Normal file
View File

21
data/motions/config.json Normal file
View File

@ -0,0 +1,21 @@
{
"gemini": {
"api_key": "",
"model": "models/gemini-2.5-flash-native-audio-preview-12-2025",
"voice_name": "Charon"
},
"audio": {
"send_sample_rate": 16000,
"receive_sample_rate": 24000,
"chunk_size": 512,
"g1_volume": 100
},
"motion": {
"action_cooldown_sec": 1.0,
"replay_hz": 60.0
},
"dashboard": {
"host": "0.0.0.0",
"port": 8000
}
}

0
data/photos/.gitkeep Normal file
View File

0
data/recordings/.gitkeep Normal file
View File

View File

@ -1,38 +0,0 @@
# Standalone compose for Package 1 (Basic Communication).
# Prereq: build the base image first:
# docker build -f sanad-base/Dockerfile -t sanad-base:latest ..
# Then from Packages/:
# docker compose -f Sanad_Package_1/docker-compose.p1.yml up --build
#
# (The top-level Packages/docker-compose.yml wires this under the `p1` profile.)
services:
p1:
build:
context: .. # = Project/Packages
dockerfile: Sanad_Package_1/Dockerfile
args:
BASE_IMAGE: sanad-base:latest
image: sanad-p1:latest
container_name: sanad-p1
# Host networking is REQUIRED — the G1 DDS link + Gemini cloud + chest audio.
network_mode: host
restart: on-failure
environment:
SANAD_PACKAGE: P1
SANAD_DASHBOARD_PORT: "8011"
SANAD_DASHBOARD_HOST: "0.0.0.0"
SANAD_VOICE_BRAIN: gemini
SANAD_AUDIO_PROFILE: "${SANAD_AUDIO_PROFILE:-builtin}" # builtin (chest) | plugged (USB/Anker)
SANAD_DDS_INTERFACE: "${SANAD_DDS_INTERFACE:-eth0}"
SANAD_LICENSE: /etc/sanad/sanad.lic
SANAD_PUBKEY: /etc/sanad/pubkey.ed25519
SANAD_LICENSE_BIND: "${SANAD_LICENSE_BIND:-0}"
# SANAD_LANGUAGE overrides the license `language` feature if set:
SANAD_LANGUAGE: "${SANAD_LANGUAGE:-}"
devices:
- "/dev/snd:/dev/snd" # USB/plugged audio (Anker) via ALSA/Pulse
volumes:
- "${SANAD_LICENSE_FILE:-./licensing/sanad.lic.example}:/etc/sanad/sanad.lic:ro"
- "../Sanad/data:/app/Sanad/data" # faces/recordings/state persist on host
# Optional chest-audio over DDS — mount the vendored SDK if using 'builtin':
# - "${UNITREE_SDK_DIR:-/home/unitree/unitree_sdk2_python}:/opt/unitree_sdk2_python:ro"

46
docker-compose.yml Normal file
View File

@ -0,0 +1,46 @@
# Self-contained compose for Sanad Package 1 (Basic Communication).
# NO sibling folders, NO sanad-base — the Sanad engine is vendored in ./vendor.
# This file + this directory are all you need to build and run P1.
#
# docker compose up -d --build # build + run -> http://<robot>:8011
# docker compose logs -f # view logs
# docker compose down # stop
# # Jetson Docker without buildx: DOCKER_BUILDKIT=0 docker compose up -d --build
#
# Audio: SANAD_AUDIO_PROFILE=builtin (G1 chest, needs the Unitree SDK baked at
# build) | plugged (USB/Anker via PulseAudio, no SDK needed)
# License: point SANAD_LICENSE_FILE at your signed sanad.lic (default = example).
# Language: set SANAD_LANGUAGE, or via the license `language` feature, or persona.
services:
p1:
build:
context: .
dockerfile: Dockerfile
args:
BASE_OS_IMAGE: "${BASE_OS_IMAGE:-python:3.10-slim-bookworm}"
WITH_UNITREE_SDK: "${WITH_UNITREE_SDK:-1}"
image: "${SANAD_IMAGE:-sanad-p1:latest}"
container_name: sanad-p1
# Host networking REQUIRED — the G1 DDS link + Gemini cloud + chest audio.
network_mode: host
# `unless-stopped` survives reboot (with `sudo systemctl enable docker`).
restart: unless-stopped
environment:
SANAD_PACKAGE: P1
SANAD_DASHBOARD_PORT: "8011"
SANAD_DASHBOARD_HOST: "0.0.0.0"
SANAD_VOICE_BRAIN: gemini
SANAD_AUDIO_PROFILE: "${SANAD_AUDIO_PROFILE:-builtin}" # builtin (chest) | plugged (USB/Anker)
SANAD_DDS_INTERFACE: "${SANAD_DDS_INTERFACE:-eth0}"
SANAD_LICENSE: /etc/sanad/sanad.lic
SANAD_PUBKEY: /etc/sanad/pubkey.ed25519
SANAD_LICENSE_BIND: "${SANAD_LICENSE_BIND:-0}" # 1 = enforce machine fingerprint
SANAD_LANGUAGE: "${SANAD_LANGUAGE:-}"
devices:
- "/dev/snd:/dev/snd" # USB/plugged audio (Anker) via ALSA/Pulse
volumes:
- "${SANAD_LICENSE_FILE:-./license/sanad.lic.example}:/etc/sanad/sanad.lic:ro"
- "./data:/app/Sanad/data" # persist persona/recordings/config on host
# Bound license (SANAD_LICENSE_BIND=1) also needs the host machine-id so the
# in-container fingerprint matches the host — uncomment:
# - "/etc/machine-id:/etc/machine-id:ro"

1
license/pubkey.ed25519 Normal file
View File

@ -0,0 +1 @@
ZOFerXRMTVQxkxsawjmGXJz8n5HmXfb8qLMhO/7DIC4=

27
license/sanad.lic.example Normal file
View File

@ -0,0 +1,27 @@
{
"payload": {
"robot_id": "G1-SN-DEMO-0001",
"machine_fingerprint": null,
"packages": {
"P1": true,
"P2": false,
"P3": true,
"P4": false
},
"features": {
"language": "ar",
"multilingual": false,
"voice_command_motion": false,
"lipsync": false,
"mask": false,
"face_rec": true,
"places": true,
"memory": true,
"guide_tour": false,
"navigation": false
},
"issued": "2026-06-01",
"expires": "2030-01-01"
},
"sig": "cww/6qRfRsZhMa7G6D7A3V5MrdqU3Mg/nKTed/q1wHLcBOv7qKkeisPZRMcynj4E6RAcpAV1iiN2GbrlutVCCA=="
}

View File

@ -8,27 +8,30 @@
# ./p1ctl.sh status # process + /api/health
# ./p1ctl.sh logs [N] # tail N lines of the P1 log
#
# Overridable env: SANAD_DEPLOY_ROOT (default ~/sanad_deploy), SANAD_P1_PY,
# SANAD_DASHBOARD_PORT (8011), SANAD_AUDIO_PROFILE (builtin), SANAD_DDS_INTERFACE (eth0).
# Self-contained: runs against the vendored engine in ./vendor — no sibling
# Sanad/ or Packages/ checkout needed. Overridable env: SANAD_P1_PY,
# SANAD_DASHBOARD_PORT (8011), SANAD_AUDIO_PROFILE (builtin), SANAD_DDS_INTERFACE (eth0),
# SANAD_LICENSE / SANAD_PUBKEY.
set -u
ROOT="${SANAD_DEPLOY_ROOT:-$HOME/sanad_deploy}"
PKG_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PY="${SANAD_P1_PY:-$HOME/miniconda3/envs/gemini_sdk/bin/python}"
PORT="${SANAD_DASHBOARD_PORT:-8011}"
APP="$ROOT/Packages/Sanad_Package_1/app_p1.py"
LOG="$ROOT/p1.log"
APP="$PKG_DIR/app_p1.py"
LOG="$PKG_DIR/p1.log"
LIC="${SANAD_LICENSE:-$PKG_DIR/license/sanad.lic}"; [ -f "$LIC" ] || LIC="$PKG_DIR/license/sanad.lic.example"
_start() {
if pgrep -f app_p1.py >/dev/null 2>&1; then
echo "P1 already running on :$PORT"; return 0
fi
[ -f "$APP" ] || { echo "ERROR: $APP not found (deploy first)"; return 1; }
cd "$ROOT"
export SANAD_APP_DIR="$ROOT" \
SANAD_LICENSE="$ROOT/Packages/licensing/sanad.lic.example" \
SANAD_PUBKEY="$ROOT/Packages/sanad_pkg/pubkey.ed25519" \
SANAD_P1_STATIC="$ROOT/Packages/Sanad_Package_1/static" \
PYTHONPATH="$ROOT:$ROOT/Packages" \
cd "$PKG_DIR"
export SANAD_APP_DIR="$PKG_DIR/vendor" \
SANAD_LICENSE="$LIC" \
SANAD_PUBKEY="${SANAD_PUBKEY:-$PKG_DIR/license/pubkey.ed25519}" \
SANAD_P1_STATIC="$PKG_DIR/static" \
PYTHONPATH="$PKG_DIR/vendor" \
SANAD_DASHBOARD_PORT="$PORT" SANAD_DASHBOARD_HOST="0.0.0.0" \
SANAD_VOICE_BRAIN="gemini" \
SANAD_AUDIO_PROFILE="${SANAD_AUDIO_PROFILE:-builtin}" \

View File

@ -1,10 +0,0 @@
# Sanad Package 1 (Basic Communication) extra deps, on top of sanad-base.
# Comms-minimal subset of Sanad/requirements.txt. sanad-base is python:3.10-slim,
# so google-genai installs cleanly (no version gymnastics).
google-genai>=1.0.0
pyaudio
soundfile
requests
# unitree_sdk2py is NOT on PyPI — needed only for the 'builtin' (chest)
# audio profile over DDS. Provide it via the vendored unitree_sdk2_python
# (COPY/volume) or run P1 with SANAD_AUDIO_PROFILE=plugged (PulseAudio).

29
requirements.txt Normal file
View File

@ -0,0 +1,29 @@
# Sanad Package 1 — Basic Communication — self-contained dependency set.
# python:3.10-slim base, so google-genai installs cleanly with no version
# gymnastics. NO torch / transformers / opencv / pyrealsense2 — P1 is comms
# only (no local-TTS model, no vision, no CUDA).
# Web dashboard (FastAPI SPA + websockets)
fastapi
uvicorn[standard]
pydantic
python-multipart
websockets
# IPC bus shim + offline license verification
pyzmq
cryptography
# Audio framing
numpy
# Gemini Live voice + audio I/O
google-genai>=1.0.0
pyaudio
soundfile
requests
# NOTE: unitree_sdk2py is NOT on PyPI. The 'builtin' (G1 chest) audio profile
# over DDS needs it — the Dockerfile builds it from source when
# WITH_UNITREE_SDK=1 (default). With SANAD_AUDIO_PROFILE=plugged (USB/Anker) it
# is not required.

65
sync_vendor.sh Executable file
View File

@ -0,0 +1,65 @@
#!/usr/bin/env bash
# Refresh the vendored Sanad engine + sanad_pkg from a full monorepo checkout.
# P1 ships a SELF-CONTAINED copy of the Sanad source under ./vendor so the repo
# builds standalone. When Sanad/ changes upstream, run this from a checkout that
# has Project/Sanad + Project/Packages, then commit the updated ./vendor.
#
# ./sync_vendor.sh [/path/to/Project] # default: ../../ (Packages/.. = Project/)
#
# Excludes runtime data (recordings/audio/faces), Logs, caches, the 53M temp3d
# 3D viewer (P1 hides that tab), and tests — keeps code + config + dashboard SPA.
set -euo pipefail
HERE="$(cd "$(dirname "$0")" && pwd)"
PROJECT="${1:-$(cd "$HERE/../.." && pwd)}" # Packages/Sanad_Package_1 -> ../../ = Project/
SRC_SANAD="$PROJECT/Sanad"
SRC_PKG="$PROJECT/Packages/sanad_pkg"
SRC_LIC="$PROJECT/Packages/licensing"
[ -d "$SRC_SANAD" ] || { echo "ERROR: no Sanad/ at $SRC_SANAD (pass the Project/ path)"; exit 1; }
[ -d "$SRC_PKG" ] || { echo "ERROR: no sanad_pkg at $SRC_PKG"; exit 1; }
echo ">> vendoring Sanad engine from $SRC_SANAD"
rm -rf "$HERE/vendor"; mkdir -p "$HERE/vendor"
rsync -a \
--exclude 'data/' --exclude 'Logs/' --exclude '__pycache__/' --exclude '*.pyc' \
--exclude '.git/' --exclude 'dashboard/static/temp3d/' --exclude 'tests/' \
"$SRC_SANAD/" "$HERE/vendor/Sanad/"
echo ">> seeding minimal data/"
mkdir -p "$HERE/vendor/Sanad/data/motions"
cp "$SRC_SANAD/data/motions/config.json" "$HERE/vendor/Sanad/data/motions/config.json"
for j in audio_device.json camera_device.json; do
[ -f "$SRC_SANAD/data/$j" ] && cp "$SRC_SANAD/data/$j" "$HERE/vendor/Sanad/data/$j" || true
done
for d in recordings audio faces photos; do mkdir -p "$HERE/vendor/Sanad/data/$d"; touch "$HERE/vendor/Sanad/data/$d/.gitkeep"; done
echo ">> vendoring sanad_pkg + public key"
rm -rf "$HERE/vendor/sanad_pkg"; cp -r "$SRC_PKG" "$HERE/vendor/sanad_pkg"
find "$HERE/vendor/sanad_pkg" -name __pycache__ -type d -prune -exec rm -rf {} + 2>/dev/null || true
mkdir -p "$HERE/license"
cp "$SRC_LIC/pubkey.ed25519" "$HERE/license/pubkey.ed25519"
echo ">> ship keyless (blank any baked Gemini key in the seed)"
python3 - "$HERE" <<'PY'
import json, sys
h = sys.argv[1]
for p, sec in ((h+"/vendor/Sanad/config/core_config.json", "gemini_defaults"),
(h+"/vendor/Sanad/data/motions/config.json", "gemini")):
try:
d = json.load(open(p))
except Exception:
continue
s = d.get(sec)
if isinstance(s, dict) and s.get("api_key"):
s["api_key"] = ""
json.dump(d, open(p, "w"), ensure_ascii=False, indent=2)
print(" blanked", sec, "in", p)
PY
echo ">> refresh ./data seed mirror (keep structure, drop runtime media)"
rsync -a --delete \
--exclude 'recordings/*' --exclude 'audio/*' --exclude 'faces/*' --exclude 'photos/*' \
"$HERE/vendor/Sanad/data/" "$HERE/data/"
for d in recordings audio faces photos; do mkdir -p "$HERE/data/$d"; touch "$HERE/data/$d/.gitkeep"; done
echo ">> done. vendor: $(du -sh "$HERE/vendor" | cut -f1) — review & commit ./vendor ./data ./license"

7
vendor/Sanad/.claude/settings.json vendored Normal file
View File

@ -0,0 +1,7 @@
{
"permissions": {
"allow": [
"Bash(node -e ' *)"
]
}
}

4
vendor/Sanad/.gitignore vendored Normal file
View File

@ -0,0 +1,4 @@
__pycache__/
*.pyc
Logs/
*.log

12
vendor/Sanad/G1_Controller/__init__.py vendored Normal file
View File

@ -0,0 +1,12 @@
"""G1_Controller — manual dashboard locomotion control (N2 Phase 1).
`LocoController` wraps the Unitree `LocoClient` + `MotionSwitcherClient` for
operator-driven walking, postures and a discrete step pad. It reuses the arm
controller's single process-wide DDS init (one `ChannelFactoryInitialize`) and
is gated behind an in-memory "Enable movement" arm flag that defaults OFF every
boot. See dashboard/routes/controller.py for the REST surface.
"""
from Project.Sanad.G1_Controller.loco_controller import LocoController
__all__ = ["LocoController"]

View File

@ -0,0 +1,567 @@
"""LocoController — manual G1 locomotion via the Unitree LocoClient (N2 Phase 1).
Ported from the proven scripts in G1_Lootah/Controller (g1_mode_controller.py,
keyboard_controller.py, hanger_boot_sequence.py). Design notes:
* **One DDS init per process.** The arm controller owns the single
`ChannelFactoryInitialize(0, nic)` (motion/arm_controller.py). This class
NEVER initialises DDS it lazily builds its `LocoClient` /
`MotionSwitcherClient` only after `arm._initialized` is True.
* **Default DISARMED.** `_armed` starts False every boot and gates every WRITE
method. Reads (status / fsm / joints), E-STOP and disarm are ALWAYS allowed.
* **StopMove watchdog.** Continuous `Move(..., True)` never self-terminates, so a
daemon thread StopMoves if no `move()` refresh arrives within
`watchdog_timeout_sec`. The frontend re-sends setpoints at ~10 Hz, so a tab
close / network drop trips the watchdog within the timeout.
* **Velocity caps.** Symmetric clamp on vx/vy/vyaw Walk 0.6, Run 1.2.
* **Allow-anytime-warn.** move/step never hard-block on FSM; if not walk-ready
they still execute but return a `warning`.
* **Sim fallback.** When `unitree_sdk2py` is absent (workstation), every write
returns `{"simulated": True}` (never raises) so the whole UI is testable.
SDK facts confirmed from source do not "fix" them:
* `LocoClient.Move(vx, vy, vyaw, True)` the continuous-mode kwarg is misspelled
`continous_move` (one n); we pass it POSITIONALLY to avoid a TypeError.
* `LocoClient` has NO StandUp()/Squat() use SetFsmId(4)/SetFsmId(2).
* FSM id / mode are read via the private RPC `bot._Call(7001/7002, "{}")`.
"""
from __future__ import annotations
import json
import threading
import time
from typing import Any, Optional
from Project.Sanad.core.config_loader import section as _cfg_section
from Project.Sanad.core.logger import get_logger
log = get_logger("loco_controller")
# -- SDK import (optional) -----------------------------------------------------
try:
from unitree_sdk2py.g1.loco.g1_loco_client import LocoClient
from unitree_sdk2py.comm.motion_switcher.motion_switcher_client import (
MotionSwitcherClient,
)
_HAS_SDK = True
except ImportError:
LocoClient = None
MotionSwitcherClient = None
_HAS_SDK = False
log.warning("Unitree SDK not available — LocoController in simulation mode")
# LocoClient general RPC api-ids for FSM read-back (stable across SDK builds).
ROBOT_API_ID_LOCO_GET_FSM_ID = 7001
ROBOT_API_ID_LOCO_GET_FSM_MODE = 7002
# G1 29-DoF joint names for indices 12-28 (0-11 legs, 12-14 waist, 15-21 left
# arm, 22-28 right arm). Used by the Diagnostics joint read-out.
JOINT_NAMES = {
12: "WAIST_YAW", 13: "WAIST_ROLL", 14: "WAIST_PITCH",
15: "L_SHOULDER_PITCH", 16: "L_SHOULDER_ROLL", 17: "L_SHOULDER_YAW",
18: "L_ELBOW", 19: "L_WRIST_ROLL", 20: "L_WRIST_PITCH", 21: "L_WRIST_YAW",
22: "R_SHOULDER_PITCH", 23: "R_SHOULDER_ROLL", 24: "R_SHOULDER_YAW",
25: "R_ELBOW", 26: "R_WRIST_ROLL", 27: "R_WRIST_PITCH", 28: "R_WRIST_YAW",
}
# Discrete step pad — (vx, vy, vyaw) sign per direction; magnitude is
# step_speed_frac * cap_walk (a gentle single step).
_STEP_DIRS = {
"forward": (1.0, 0.0, 0.0),
"backward": (-1.0, 0.0, 0.0),
"slide_left": (0.0, 1.0, 0.0),
"slide_right": (0.0, -1.0, 0.0),
"rotate_left": (0.0, 0.0, 1.0),
"rotate_right": (0.0, 0.0, -1.0),
}
_POSTURES = (
"zero_torque", "damp", "stand_up", "squat", "sit",
"low_stand", "high_stand", "lie_to_stand",
)
class LocoController:
"""Thread-safe manual locomotion control with a simulation fallback."""
def __init__(self, arm=None):
self._arm = arm # shared ArmController (owns the ONE DDS init)
self._bot = None # LocoClient (lazy)
self._msc = None # MotionSwitcherClient (lazy)
self._lc_ready = False
self._lock = threading.RLock() # serialise all loco client WRITE calls
self._armed = False # in-memory MANUAL gate — OFF every boot
self._cur_v = (0.0, 0.0, 0.0) # last commanded (vx, vy, vyaw)
self._teleop_active = False
self._last_msc_mode: Optional[str] = None
# watchdog
self._last_move_ts = 0.0
self._wd_thread: Optional[threading.Thread] = None
self._wd_stop = threading.Event()
self._wd_stop.set() # not running until armed
# Monotonic stop-generation counter, bumped under _lock by
# estop/stop/disarm. move()/step()/prep_mode() capture it at start and
# bail the instant it changes — so E-STOP preempts an in-flight motion
# immediately AND can never be silently "un-cancelled" by a concurrent
# command (a lock-free Event clear() could; an int compare under the
# lock cannot).
self._stop_gen = 0
# Serializes the discrete blocking operations (step/prep_mode) so two
# can't overlap and interleave Move commands. Continuous teleop move()
# is intentionally NOT guarded by this.
self._discrete_busy = False
cfg = _cfg_section("motion", "loco_controller")
self._cap_walk = float(cfg.get("cap_walk", 0.6))
self._cap_run = float(cfg.get("cap_run", 1.2))
self._lin_step = float(cfg.get("lin_step", 0.05))
self._ang_step = float(cfg.get("ang_step", 0.2))
self._wd_timeout = float(cfg.get("watchdog_timeout_sec", 0.5))
self._block_window = float(cfg.get("arm_block_window_sec", 1.5))
self._step_dur = float(cfg.get("step_duration_sec", 0.6))
self._step_frac = float(cfg.get("step_speed_frac", 0.5))
self._loco_timeout = float(cfg.get("loco_timeout_sec", 10.0))
self._msc_timeout = float(cfg.get("msc_timeout_sec", 5.0))
# ── client lifecycle ─────────────────────────────────────────────────────
def _ensure_client(self) -> bool:
"""Lazily build LocoClient + MotionSwitcherClient. Returns readiness.
Never initialises DDS requires the shared arm to have already run the
single ChannelFactoryInitialize.
"""
if not _HAS_SDK:
return False
if self._lc_ready:
return True
if self._arm is None or not getattr(self._arm, "_initialized", False):
return False
with self._lock:
if self._lc_ready:
return True
try:
bot = LocoClient()
bot.SetTimeout(self._loco_timeout)
bot.Init()
msc = MotionSwitcherClient()
msc.SetTimeout(self._msc_timeout)
msc.Init()
self._bot = bot
self._msc = msc
self._lc_ready = True
log.info("LocoClient + MotionSwitcherClient ready")
except Exception as exc:
log.error("LocoClient init failed: %s", exc)
self._lc_ready = False
return self._lc_ready
def _safe_call(self, name: str, fn, *a, **kw):
try:
return True, fn(*a, **kw)
except Exception as exc:
log.error("%s failed: %s", name, exc)
return False, None
def _rpc_get_int(self, api_id: int):
bot = self._bot
if bot is None:
return None
try:
code, data = bot._Call(api_id, "{}")
if code == 0 and data:
return json.loads(data).get("data")
except Exception:
pass
return None
@staticmethod
def _clamp(v: float, cap: float) -> float:
return max(-cap, min(cap, float(v)))
# ── FSM / readiness ──────────────────────────────────────────────────────
def fsm_id(self):
return self._rpc_get_int(ROBOT_API_ID_LOCO_GET_FSM_ID)
def fsm_mode(self):
return self._rpc_get_int(ROBOT_API_ID_LOCO_GET_FSM_MODE)
def _walk_ready_warning(self) -> Optional[str]:
"""allow-anytime-warn: None when ready, else a human message."""
if not self._lc_ready:
return None
fid = self.fsm_id()
fmode = self.fsm_mode()
if fid == 200 and fmode not in (None, 2):
return None
return (f"Robot not in walk-ready FSM (id={fid}, mode={fmode}). "
f"Command sent anyway.")
# ── arm flag + watchdog ──────────────────────────────────────────────────
def is_armed(self) -> bool:
return self._armed
def movement_active(self) -> bool:
"""True when the robot may be walking: manual armed, teleop active, OR a
move/step issued within the block window. Used as the arm's motion-block
predicate so the arm never replays while the robot is (or just was)
moving regardless of whether the MANUAL gate or the GEMINI gate
(Phase 3 voice dispatch, which calls move/step directly) triggered it."""
if self._armed or self._teleop_active:
return True
return (time.monotonic() - self._last_move_ts) < self._block_window
def arm_movement(self) -> dict:
"""Unlock manual control. Cancels any in-flight arm motion first so the
arm and locomotion are never active simultaneously (movement wins)."""
try:
if self._arm is not None and getattr(self._arm, "is_busy", False):
log.info("arming movement — cancelling in-flight arm motion")
self._arm.cancel()
except Exception:
log.exception("arm.cancel() on arm_movement failed")
with self._lock:
self._armed = True
self._start_watchdog()
log.info("movement ARMED")
return {"ok": True, "armed": True}
def disarm_movement(self) -> dict:
with self._lock:
self._stop_gen += 1 # break any in-flight step/prep/move
self._armed = False
self._teleop_active = False
self._wd_stop.set()
try:
self._raw_stop()
except Exception:
log.exception("StopMove on disarm failed")
log.info("movement DISARMED")
return {"ok": True, "armed": False}
def _start_watchdog(self):
self._wd_stop.clear()
if self._wd_thread is None or not self._wd_thread.is_alive():
self._wd_thread = threading.Thread(
target=self._watchdog_loop, daemon=True, name="loco-watchdog")
self._wd_thread.start()
def _watchdog_loop(self):
period = max(0.02, min(0.1, self._wd_timeout / 2.0))
while not self._wd_stop.is_set():
fire = False
# Read-and-decide under the lock (atomic check-then-act); the actual
# StopMove runs after release so the critical section stays tiny.
with self._lock:
if self._teleop_active and (time.monotonic() - self._last_move_ts) > self._wd_timeout:
self._teleop_active = False
fire = True
if fire:
log.warning("watchdog: teleop setpoint stale (>%.2fs) — StopMove",
self._wd_timeout)
try:
self._raw_stop()
except Exception:
log.exception("watchdog StopMove failed")
self._wd_stop.wait(period)
def _raw_stop(self) -> bool:
"""Issue StopMove if the client is up; no-op in sim. Lock-light."""
if not self._lc_ready or self._bot is None:
return False
with self._lock:
ok, _ = self._safe_call("StopMove", self._bot.StopMove)
return ok
# ── movement ─────────────────────────────────────────────────────────────
def move(self, vx: float, vy: float, vyaw: float, run: bool = False) -> dict:
cap = self._cap_run if run else self._cap_walk
cvx, cvy, cvyaw = self._clamp(vx, cap), self._clamp(vy, cap), self._clamp(vyaw, cap)
capped = (cvx, cvy, cvyaw) != (float(vx), float(vy), float(vyaw))
warning = self._walk_ready_warning()
sent = {"vx": cvx, "vy": cvy, "vyaw": cvyaw}
with self._lock:
my_gen = self._stop_gen # capture under lock
if not self._ensure_client():
with self._lock: # sim: record intent for UI/watchdog
self._cur_v = (cvx, cvy, cvyaw)
self._last_move_ts = time.monotonic()
self._teleop_active = True
self._start_watchdog()
return {"ok": True, "sent": sent, "capped": capped,
"warning": warning, "simulated": True}
with self._lock:
# If an E-STOP / stop / disarm landed since we captured my_gen, do NOT
# (re)command velocity — and do NOT stamp the motion flags (so a
# cancelled tick doesn't extend the arm-block window).
if self._stop_gen != my_gen:
return {"ok": False, "cancelled": True, "sent": sent,
"capped": capped, "warning": warning, "simulated": False}
self._cur_v = (cvx, cvy, cvyaw)
self._last_move_ts = time.monotonic()
self._teleop_active = True
self._safe_call("SetBalanceMode", self._bot.SetBalanceMode, 1)
ok, _ = self._safe_call("Move", self._bot.Move, cvx, cvy, cvyaw, True)
self._start_watchdog()
return {"ok": bool(ok), "sent": sent, "capped": capped,
"warning": warning, "simulated": False}
def stop_move(self) -> dict:
"""Halt translation/rotation. Allowed even when disarmed."""
with self._lock:
self._stop_gen += 1
self._teleop_active = False
if not self._ensure_client():
return {"ok": True, "simulated": True}
ok = self._raw_stop()
return {"ok": bool(ok), "simulated": False}
def estop(self) -> dict:
"""Emergency stop = StopMove only (no Damp / FSM change → keeps posture).
ALWAYS allowed, even disarmed and in sim. Bumps the stop generation so any
in-flight move()/step()/prep_mode() bails immediately (no lock wait)."""
with self._lock:
self._stop_gen += 1
self._teleop_active = False
self._cur_v = (0.0, 0.0, 0.0)
if not self._ensure_client():
log.warning("E-STOP (sim)")
return {"ok": True, "simulated": True}
ok = self._raw_stop()
log.warning("E-STOP — StopMove issued")
return {"ok": bool(ok), "simulated": False}
def step(self, direction: str) -> dict:
"""Discrete one-step pad: Move for step_duration then StopMove.
Blocking (~step_duration); call via asyncio.to_thread from the route.
The sleep loop does NOT hold self._lock, so E-STOP / StopMove (which take
the lock briefly) preempt it immediately; the loop also bails the moment
the stop generation changes."""
if direction not in _STEP_DIRS:
return {"ok": False, "reason": f"unknown direction: {direction}"}
sx, sy, syaw = _STEP_DIRS[direction]
k = self._cap_walk * self._step_frac
vx, vy, vyaw = sx * k, sy * k, syaw * k
warning = self._walk_ready_warning()
with self._lock:
if self._discrete_busy:
return {"ok": False, "dir": direction, "reason": "busy",
"warning": warning, "simulated": not self._lc_ready}
self._discrete_busy = True
my_gen = self._stop_gen
self._last_move_ts = time.monotonic()
self._teleop_active = True
self._start_watchdog()
if not self._ensure_client():
with self._lock:
self._teleop_active = False
self._discrete_busy = False
return {"ok": True, "dir": direction, "warning": warning, "simulated": True}
try:
with self._lock:
if self._stop_gen != my_gen: # stopped before we began
return {"ok": False, "dir": direction, "cancelled": True,
"warning": warning, "simulated": False}
self._safe_call("SetBalanceMode", self._bot.SetBalanceMode, 1)
self._safe_call("Move", self._bot.Move, vx, vy, vyaw, True)
t_end = time.monotonic() + self._step_dur
while time.monotonic() < t_end:
if self._stop_gen != my_gen:
break
with self._lock:
self._last_move_ts = time.monotonic() # keep watchdog fed
time.sleep(0.05)
finally:
with self._lock:
self._safe_call("StopMove", self._bot.StopMove)
self._teleop_active = False
self._discrete_busy = False
return {"ok": True, "dir": direction, "warning": warning, "simulated": False}
# ── postures / modes ─────────────────────────────────────────────────────
def prep_mode(self) -> dict:
"""PREP — StopMove → Damp → StandUp(FSM4) → height ramp → BalanceStand(0).
Exact order from g1_mode_controller.prep_mode, minus the blocking input().
Blocking (~1s); call via asyncio.to_thread."""
if not self._ensure_client():
return {"ok": True, "mode": "prep", "simulated": True}
with self._lock:
if self._discrete_busy:
return {"ok": False, "mode": "prep", "reason": "busy", "simulated": False}
self._discrete_busy = True
my_gen = self._stop_gen
self._safe_call("StopMove", self._bot.StopMove)
self._safe_call("Damp", self._bot.Damp)
self._safe_call("SetFsmId(4)", self._bot.SetFsmId, 4)
try:
# Height ramp OUTSIDE the lock so E-STOP can preempt at any time.
h = 0.02
while h <= 0.5 + 1e-9:
if self._stop_gen != my_gen:
log.warning("PREP cancelled (E-STOP)")
return {"ok": False, "mode": "prep", "cancelled": True, "simulated": False}
with self._lock:
self._safe_call("SetStandHeight", self._bot.SetStandHeight, round(h, 3))
time.sleep(0.03)
h += 0.02
with self._lock:
self._safe_call("BalanceStand", self._bot.BalanceStand, 0)
self._safe_call("SetStandHeight", self._bot.SetStandHeight, 0.22)
finally:
with self._lock:
self._discrete_busy = False
log.info("PREP complete")
return {"ok": True, "mode": "prep", "simulated": False}
def ready_start_mode(self) -> dict:
"""READY = PREP then Start (FSM 200 / balance engaged)."""
self.prep_mode()
if not self._ensure_client():
return {"ok": True, "mode": "ready", "simulated": True}
with self._lock:
if hasattr(self._bot, "Start"):
ok, _ = self._safe_call("Start", self._bot.Start)
else:
ok, _ = self._safe_call("SetFsmId(200)", self._bot.SetFsmId, 200)
log.info("READY/START complete")
return {"ok": bool(ok), "mode": "ready", "simulated": False}
def posture(self, name: str) -> dict:
if name not in _POSTURES:
return {"ok": False, "reason": f"unknown posture: {name}"}
if not self._ensure_client():
return {"ok": True, "posture": name, "simulated": True}
bot = self._bot
with self._lock:
if name == "zero_torque":
ok, _ = self._safe_call("ZeroTorque", bot.ZeroTorque)
elif name == "damp":
ok, _ = self._safe_call("Damp", bot.Damp)
elif name == "stand_up":
ok, _ = self._safe_call("SetFsmId(4)", bot.SetFsmId, 4)
elif name == "squat":
ok, _ = self._safe_call("SetFsmId(2)", bot.SetFsmId, 2)
elif name == "sit":
ok, _ = self._safe_call("Sit", bot.Sit)
elif name == "low_stand":
ok, _ = self._safe_call("LowStand", bot.LowStand)
elif name == "high_stand":
ok, _ = self._safe_call("HighStand", bot.HighStand)
elif name == "lie_to_stand":
if hasattr(bot, "Lie2StandUp"):
ok, _ = self._safe_call("Lie2StandUp", bot.Lie2StandUp)
else:
ok, _ = self._safe_call("SetFsmId(702)", bot.SetFsmId, 702)
else: # unreachable (guarded above)
ok = False
return {"ok": bool(ok), "posture": name, "simulated": False}
def set_balance_mode(self, mode: int) -> dict:
if not self._ensure_client():
return {"ok": True, "balance_mode": int(mode), "simulated": True}
with self._lock:
ok, _ = self._safe_call("SetBalanceMode", self._bot.SetBalanceMode, int(mode))
return {"ok": bool(ok), "balance_mode": int(mode), "simulated": False}
def set_stand_height(self, h: float) -> dict:
if not self._ensure_client():
return {"ok": True, "height": float(h), "simulated": True}
with self._lock:
ok, _ = self._safe_call("SetStandHeight", self._bot.SetStandHeight, float(h))
return {"ok": bool(ok), "height": float(h), "simulated": False}
# ── MotionSwitcher ───────────────────────────────────────────────────────
def msc_check(self) -> dict:
if not self._ensure_client() or self._msc is None:
return {"mode_name": None, "simulated": not self._lc_ready}
try:
ret = self._msc.CheckMode()
name = None
if isinstance(ret, tuple) and len(ret) >= 2 and isinstance(ret[1], dict):
name = ret[1].get("name")
elif isinstance(ret, dict):
name = ret.get("name")
self._last_msc_mode = name
return {"mode_name": name}
except Exception as exc:
log.error("msc_check failed: %s", exc)
return {"mode_name": None}
def msc_select_ai(self) -> dict:
if not self._ensure_client() or self._msc is None:
return {"ok": True, "simulated": True}
with self._lock:
ok, _ = self._safe_call("SelectMode(ai)", self._msc.SelectMode, "ai")
return {"ok": bool(ok), "simulated": False}
def msc_release(self) -> dict:
if not self._ensure_client() or self._msc is None:
return {"ok": True, "simulated": True}
with self._lock:
ok, _ = self._safe_call("ReleaseMode", self._msc.ReleaseMode)
return {"ok": bool(ok), "simulated": False}
def reconnect(self) -> dict:
"""Drop and rebuild Loco + MSC clients (does NOT re-init the DDS factory)."""
with self._lock:
self._bot = None
self._msc = None
self._lc_ready = False
ok = self._ensure_client()
return {"ok": bool(ok), "lc_ready": self._lc_ready}
# ── reads ────────────────────────────────────────────────────────────────
def joints(self) -> dict:
q: list = []
try:
if self._arm is not None:
q = self._arm.get_current_q()
except Exception:
q = []
out = []
for idx in range(12, 29):
val = q[idx] if idx < len(q) else 0.0
out.append({"idx": idx, "name": JOINT_NAMES.get(idx, f"motor_{idx}"),
"q": float(val)})
return {"joints": out}
def status(self) -> dict:
# Polling /status lazily brings up the client once arm DDS is ready.
self._ensure_client()
fid = self.fsm_id() if self._lc_ready else None
fmode = self.fsm_mode() if self._lc_ready else None
walk_ready = bool(self._lc_ready and fid == 200 and fmode not in (None, 2))
return {
"sdk_available": _HAS_SDK,
"lc_ready": self._lc_ready,
"armed": self._armed,
"fsm_id": fid,
"fsm_mode": fmode,
"walk_ready": walk_ready,
"msc_mode": self._last_msc_mode,
"teleop_active": self._teleop_active,
"last_velocity": {"vx": self._cur_v[0], "vy": self._cur_v[1], "vyaw": self._cur_v[2]},
"caps": {"walk": self._cap_walk, "run": self._cap_run},
"arm_initialized": bool(self._arm is not None and getattr(self._arm, "_initialized", False)),
}
# ── shutdown helper ──────────────────────────────────────────────────────
def shutdown(self):
"""Best-effort StopMove + disarm for process shutdown."""
try:
self.estop()
finally:
self.disarm_movement()

412
vendor/Sanad/README.md vendored Normal file
View File

@ -0,0 +1,412 @@
# Sanad
Voice + motion assistant for the Unitree G1 humanoid. **Gemini Live** (or a
fully-offline pipeline) handles bilingual Arabic/English conversation; an arm
controller plays built-in SDK poses and recorded JSONL macros; a locomotion
controller walks/turns the robot; an optional camera feeds **Gemini-side face &
place recognition**; everything is orchestrated through a fault-isolated
**FastAPI dashboard** on `http://<robot>:8000`.
```
┌──────────────────────────────────────────────────────────────────────┐
│ Dashboard (FastAPI) ── http://<robot>:8000 │
│ ├─ Operations Quick-fire arm actions + gestural-speaking │
│ ├─ Voice & Audio Live Gemini, Typed Replay, Wake Phrases, Audio │
│ ├─ Motion & Replay SDK actions, JSONL replays, macros, teaching │
│ ├─ Controller Locomotion teleop, postures, FSM modes, E-STOP │
│ ├─ Recognition Camera vision + face gallery + zones/places │
│ ├─ Recordings Skill registry, saved Gemini turns │
│ ├─ Temperature Live 3D motor-temperature heatmap (three.js) │
│ ├─ Terminal In-browser shell (PTY) to the robot │
│ └─ Settings & Logs System info, tail/stream live logs │
└──────────────────────────────────────────────────────────────────────┘
├─ voice/sanad_voice.py (subprocess — model-agnostic voice loop)
│ ├─ gemini/script.py (Gemini Live brain — audio+video+state)
│ └─ local/script.py (offline brain — VAD→STT→LLM→TTS)
├─ gemini/client.py (short-session client for Typed Replay)
├─ gemini/subprocess.py (spawns+supervises sanad_voice.py;
│ pushes camera frames + motion state
│ to the child over its stdin)
├─ voice/movement_dispatch.py(Gemini spoken phrase → locomotion)
├─ vision/camera.py (RealSense/USB capture daemon)
├─ vision/face_gallery.py (data/faces/ CRUD for the primer turn)
├─ vision/zone_gallery.py (data/zones/ places + "go here" targets)
├─ motion/arm_controller.py (G1 arm DDS publisher — owns DDS init)
├─ G1_Controller/loco_controller.py (G1 locomotion via LocoClient)
├─ voice/audio_io.py (mic + speaker abstraction — 3 profiles)
└─ core/brain.py (skill dispatcher, event bus)
```
### Camera + face/place recognition data flow
```
CameraDaemon (parent, in-memory JPEG+b64 cache)
├─→ dashboard /api/recognition/frame.jpg ── snapshot_jpeg()
└─→ GeminiSubprocess._frame_forwarder ── get_frame_b64()
│ "frame:<b64>\n" over stdin
ArmController ─emit→ event bus ─→ main.py ─→ live_sub.send_state()
│ "state:<json>\n" over stdin
gemini/script.py _stdin_watcher thread
├─ frame: → _LATEST_FRAME → _send_frame_loop →
│ session.send_realtime_input(video=Blob)
└─ state: → _STATE_PENDING → _send_state_loop →
session.send_realtime_input(text=…)
Recognition toggles (vision / face-rec / zone-rec / movement) are written by the
dashboard to data/.recognition_state.json and POLLED by the Gemini child at 1 Hz
— so flipping a toggle takes effect mid-session with NO restart.
```
## Quick start (on the robot)
```bash
conda activate gemini_sdk
cd ~/Sanad
python3 main.py
```
Then open `http://<robot-ip>:8000` in a browser. (The dashboard binds to the
`wlan0` IP by default — see *Runtime selection* to override.)
Fully-offline brain (no cloud): `SANAD_VOICE_BRAIN=local python3 main.py`
(requires `ollama serve` + the local model env — see *Voice brains*).
> **Gemini API key — required, none ships with the repo.** The `api_key`
> fields in `config/core_config.json` (`gemini_defaults`) and
> `data/motions/config.json` (`gemini`) are intentionally empty (`""`).
> The voice loop cannot connect until you supply one, by any of:
> - **Dashboard***Voice & Audio → Gemini API Key* — paste + save, hot-swaps live (no restart). Persists to `data/motions/config.json`.
> - **Env var**`export SANAD_GEMINI_API_KEY=AIza...` before `python3 main.py`.
> - **Config file** — set `gemini_defaults.api_key` in `config/core_config.json`.
>
> Precedence (highest first): `data/motions/config.json``SANAD_GEMINI_API_KEY``config/core_config.json`. Get a key at <https://aistudio.google.com/apikey>.
## Dashboard features
### Operations
Quick-fire SDK + JSONL arm actions (chip buttons), gestural-speaking toggle.
### Voice & Audio
- **Live Voice Commands** — fire arm gestures from the *user's* transcript
(wake-phrase → arm action). Master gate + Deferred-trigger toggle.
- **Live Gemini Process** — start/stop the voice conversation subprocess, tail
its log. Choose the Gemini cloud brain or the offline brain via
`SANAD_VOICE_BRAIN`.
- **Typed Replay** — Gemini reads typed text aloud (wrapped with a
"repeat verbatim" prompt); optionally records the clip.
- **Gemini API Key** — hot-swap the key without restart.
- **Wake Phrase Manager** — add/remove phrase → action bindings.
- **Audio Controls** — mic/speaker mute, G1 chest-speaker volume (DDS), device
profile selection, PulseAudio soft-reset and Anker USB hard-reset.
### Motion & Replay
- **Motion Control** — list SDK (built-in) + JSONL (recorded) actions, select +
play. Cancel smoothly returns to `arm_home.jsonl`.
- **Replay Manager** — upload `.jsonl` files, test-play with speed, Teaching
Mode (kinesthetic record — limp the arm and hand-guide it).
- **Macro Recorder** — record a new audio+motion pair, OR pick any WAV + any
motion (SDK or JSONL) and play them in parallel.
### Controller *(locomotion)*
Manual teleoperation of the G1's **legs** via the Unitree `LocoClient`.
**Disarmed every boot**; all motion writes require Arm first.
- **Move / Step** — continuous teleop (vx/vy/vyaw) or discrete one-shot steps.
- **Postures & FSM modes** — zero-torque, damp, squat, sit, stand, balance,
stand-height; prep/ready sequences; MotionSwitcher select-AI/release.
- **Gemini Movement** — toggle voice-driven walking: the `MovementDispatcher`
parses Gemini's *own spoken confirmation phrases* ("Turning right." /
"أستدير يميناً.") and drives the legs (gated on this toggle + an E-STOP latch).
- **E-STOP** — always available; `StopMove` + disarm + latch the dispatcher.
> **Safety:** the arm and locomotion are **mutually exclusive**
> `arm.set_motion_block(loco.movement_active)` makes every arm
> replay/gesture refuse while the robot is (or just was, within ~1.5 s) walking.
### Recognition
Camera vision + Gemini-side **face** and **zone/place** recognition. All are
**off by default**; each is a **hot toggle** (≈1 s to take effect, no restart).
- **Camera Vision**`CameraDaemon` captures from a RealSense (preferred) or
USB camera; the supervisor streams JPEG frames to Gemini Live so it can answer
"what do you see?". Live preview panel. Auto-reconnects on USB unplug/stall
and warns if a RealSense negotiated USB 2.0 (Marcus-ported resilience).
- **Face Recognition** — manage `data/faces/face_{id}/` galleries: enroll from
the live camera or upload photos, rename, describe, download (per-photo or
ZIP), delete. On session start (and on any gallery change) the child sends a
**primer turn** carrying every enrolled face + a Khaleeji greeting
instruction — **Gemini matches in-context, so there is no local
face-recognition model**. Recognition needs vision on.
- **Zones & Places**`data/zones/zone_{zid}/place_{pid}/` two-level gallery:
reference photos per place, optional linked face_ids, and a **"go here"** nav
target (`nav_target_zone/place_id` in the recognition-state file) for
place-aware navigation.
- **Sync Gallery** — force-resend the face/zone primer to the live session.
### Recordings
Skill Registry (predefined audio+motion+callback skills from `skills.json`) +
Saved Records (captured Gemini turn recordings; play/pause/stop/rename/delete).
### Temperature
Live **3D motor-temperature heatmap** — a standalone three.js viewer
(`dashboard/static/temp3d/`) loads the G1 29-DoF URDF + STL meshes and colors
each joint blue→red from the arm controller's throttled `rt/lowstate` snapshot,
streamed over `/ws/motor-temps` at ~8 fps. No second DDS subscriber.
### Terminal
In-browser **PTY shell** to the robot (`/ws/terminal`, xterm.js) — a `bash -i`
as the dashboard's user, with resize + backpressure, bounded to 4 sessions.
(See *Security* — this is full shell access to whoever reaches the URL.)
### Settings & Logs
System info (host, network interfaces, DDS interface, bound dashboard host/port,
per-subsystem status, audio devices), live log stream (`/ws/logs`), per-file
tail, snapshot, and a one-blob "Copy All Logs" bundle.
## Directory layout
| Path | Contents |
|---|---|
| `main.py` | Entry point — fault-isolated boot of all subsystems + the dashboard. Doubles as the service container (route handlers `import` its module globals). |
| `config.py` | Runtime constants + layout-agnostic path resolution; layers `data/motions/config.json` over the JSON config at import. |
| `config/` | Per-subsystem JSON: `core`, `voice`, `gemini`, `local`, `motion`, `dashboard`. |
| `core/` | `brain.py` (skill dispatcher), `event_bus.py`, `skill_registry.py`, `config_loader.py`, `logger.py` (rotating + WS push), `asyncio_compat.py` (3.8 `to_thread` shim). |
| `gemini/` | Gemini Live — `client.py` (one-shot), `script.py` (live brain: audio + video + motion-state), `subprocess.py` (supervisor + stdin frame/state push). |
| `local/` | Fully-offline brain — `vad.py` (Silero), `stt.py` (faster-whisper), `llm.py` (Qwen via Ollama/llama.cpp), `tts.py` (CosyVoice2), `script.py` (the brain), `subprocess.py` (supervisor). Opt-in via `SANAD_VOICE_BRAIN=local`. |
| `voice/` | `sanad_voice.py` (subprocess entry, model-agnostic), `audio_io.py` / `audio_manager.py` / `audio_devices.py` (mic/speaker), `local_tts.py` (SpeechT5 Arabic TTS), `live_voice_loop.py` (user-transcript → arm gesture), `movement_dispatch.py` (Gemini-phrase → locomotion), `typed_replay.py`, `wake_phrase_manager.py`, `text_utils.py` (Arabic normalization + phrase matching), `model_script.py` / `model_subprocess.py` (brain templates). |
| `motion/` | `arm_controller.py` (production 5-phase JSONL replay engine, owns the single DDS init), `macro_player.py`, `macro_recorder.py`, `teaching.py`. (`sanad_arm_controller.py` is a legacy alternate — not wired by `main.py`.) |
| `G1_Controller/` | `loco_controller.py` — locomotion via Unitree `LocoClient` (move/step/postures/FSM/E-STOP); reuses the arm's DDS participant. |
| `vision/` | `camera.py` (RealSense/USB daemon, auto-reconnect), `face_gallery.py`, `zone_gallery.py`, `recognition_state.py` (atomic-JSON toggle IPC). |
| `dashboard/` | `app.py` (FastAPI factory + fault-isolated router registration), `routes/*.py` (20 REST routers), `websockets/*.py` (logs, motor-temps, terminal), `static/index.html` (single-page UI), `static/temp3d/` (3D viewer). |
| `scripts/` | Persona files — `sanad_script.txt` (voice persona "Bousandah"), `sanad_rule.txt`, `sanad_arm.txt` (voice→arm phrases). |
| `data/` | Runtime state — `motions/*.jsonl` (arm trajectories) + `instruction.json` (locomotion phrase map) + `skills.json` + `config.json` (dashboard-editable), `recordings/` (captured turns + macros), `faces/face_{id}/` + `zones/zone_{zid}/place_{pid}/` (galleries), `audio/` (typed-replay WAVs + records index), `.recognition_state.json` (toggle IPC). |
| `model/` | Local SpeechT5 / Whisper / CosyVoice2 weights when using the offline pipeline. |
| `logs/` | Per-module rotating logs. |
## Voice brains
The child `voice/sanad_voice.py` is model-agnostic and selects a brain via
`SANAD_VOICE_BRAIN`. Every brain implements the same contract
(`__init__(audio_io, recorder, voice, system_prompt)`, `async run()`, `stop()`)
and ships a sibling supervisor that spawns the child and parses its
`USER:` / `BOT:` / state log markers.
| Value | Brain | Pipeline |
|---|---|---|
| `gemini` *(default)* | `gemini/script.py` | Gemini Live native-audio (full-duplex speech-to-speech, server-side VAD, vision frames, face/zone primers, voice→movement). Cloud. |
| `local` | `local/script.py` | Silero VAD → faster-whisper (large-v3-turbo, CUDA int8) → Qwen2.5 (Ollama/llama.cpp) → CosyVoice2 streaming TTS. Fully on-device. |
| `model` | `voice/model_script.py` | Template/stub for adding a new provider (OpenAI Realtime, Claude Voice, …). |
To add a brain: drop a file in `voice/` or a new `<brand>/` folder and add a
branch to `voice/sanad_voice.py:_build_brain()`; ship a supervisor modeled on
`voice/model_subprocess.py`.
## Runtime selection (env vars)
| Var | Values | Default | Effect |
|---|---|---|---|
| `SANAD_VOICE_BRAIN` | `gemini`, `local`, `model` | `gemini` | Which brain the subprocess loads (see `voice/sanad_voice.py:_build_brain`). |
| `SANAD_AUDIO_PROFILE` | `builtin`, `anker`, `hollyland_builtin` | `builtin` | Mic + speaker pair. `builtin` = G1 UDP mic + G1 chest speaker via DDS. |
| `SANAD_DDS_INTERFACE` | network iface | `eth0` | DDS network for G1 low-level comms (arm + locomotion + speaker). |
| `SANAD_DASHBOARD_HOST` / `_INTERFACE` | IP / iface | `wlan0` IP | Dashboard bind address. |
| `SANAD_GEMINI_API_KEY` | string | `""` (empty) | Gemini API key. No key ships in the repo — set this, paste one in the dashboard (**Voice & Audio → Gemini API Key**), or fill `gemini_defaults.api_key` in `config/core_config.json`. See [Quick start](#quick-start-on-the-robot). |
| `SANAD_GEMINI_MODEL` / `_VOICE` | string | reads config | Override the Gemini model id / prebuilt voice. |
| `SANAD_G1_VOLUME` | `0``100` | `100` | G1 chest-speaker volume; also scales the barge-in threshold. |
| `SANAD_LIVE_SCRIPT` | path | auto | Override the subprocess entry script path. |
| `SANAD_RECORD` | `0` or `1` | `1` | Record every Gemini turn to `data/recordings/`. |
| `SANAD_AEC_ENABLE` | `0` or `1` | `1` | Enable WebRTC AEC3 (if the Python binding is installed). |
| `SANAD_VISION_ENABLE` | `0` or `1` | `0` | Boot default for camera vision. **Runtime truth is the Recognition-tab toggle**`data/.recognition_state.json`, hot-applied without a restart. |
| `SANAD_FACE_RECOGNITION_ENABLE` | `0` or `1` | `0` | Boot default for Gemini-side face recognition. Also a hot toggle. |
| `SANAD_VISION_SEND_HZ` | float | `2` | Frames/sec the Gemini child relays to Live. |
| `SANAD_CAMERA_WIDTH` / `_HEIGHT` / `_FPS` | int | `424` / `240` / `15` | Capture profile. Also settable per-deploy in `config/core_config.json > camera`. |
| `SANAD_CAMERA_USB_INDEX` | int | auto | Pin a `/dev/videoN` node (avoids picking a RealSense IR stream). |
| `SANAD_FACES_MAX_SAMPLES` | int | `3` | Max photos per person fed into the gallery primer turn (token budget). |
| `SANAD_PROJECT_ROOT` | path | auto | Override the project root (see *Dynamic paths*). |
> All `SANAD_VISION_*` / `SANAD_CAMERA_*` / `SANAD_FACE_*` vars are **boot
> defaults** forwarded to the Gemini child via `LIVE_TUNE`. Once running, the
> Recognition tab's toggles (vision / face-rec / zone-rec / movement) are the
> live source of truth in `data/.recognition_state.json`, polled at 1 Hz.
CLI flags: `python3 main.py --host <ip> --port 8000 --network <dds_iface>`;
`--check-env` prints a subsystem/environment diagnostic and exits.
## API surface
All routes are registered defensively — a router whose import fails is recorded
(`GET /api/_dashboard_status`) and the server still boots without it.
**REST** (prefix → controls): `/api` health · `/api/system` info ·
`/api/voice` Gemini/local generate+connect+key · `/api/motion` arm actions ·
`/api/skills` skill registry · `/api/macros` record/play · `/api/replay` JSONL
CRUD + teaching · `/api/audio` mute/volume/devices/reset · `/api/scripts`
persona files · `/api/records` saved WAVs · `/api/prompt` system prompt ·
`/api/wake-phrases` bindings · `/api/live-voice` arm-phrase dispatcher ·
`/api/live-subprocess` Gemini child · `/api/typed-replay` TTS · `/api/recognition`
vision + face gallery · `/api/zones` zones/places + nav target · `/api/temp`
motor map + snapshot · `/api/controller` locomotion (move/step/postures/modes/
E-STOP).
**WebSockets**: `/ws/logs` (live log stream + 500-line replay) ·
`/ws/motor-temps` (3D heatmap data, ~8 fps) · `/ws/terminal` (PTY shell).
## Architecture notes
- **Subprocess isolation**: `voice/sanad_voice.py` runs as a child of `main.py`
via the supervisor. If the voice loop crashes, the dashboard + arm + legs stay
up.
- **Single DDS init**: `motion/arm_controller.py` owns the one
`ChannelFactoryInitialize`; `LocoController` and the audio routes reuse that
participant rather than re-initializing.
- **Brain contract**: see `voice/model_script.py` — any new model implements
`__init__(audio_io, recorder, voice, system_prompt)`, `async run()`, `stop()`.
- **Supervisor contract**: each brain ships a sibling supervisor (e.g.
`gemini/subprocess.py`) that spawns `sanad_voice.py` with its
`SANAD_VOICE_BRAIN` and parses the brain's log markers. Template:
`voice/model_subprocess.py`.
- **Locomotion safety**: `LocoController` is disarmed every boot, has velocity
caps + a `StopMove` watchdog, and is mutually exclusive with the arm.
Voice-driven movement is **off by default** and gated by the Controller
toggle. Distances/degrees in `data/motions/instruction.json` are
**approximate and must be calibrated on the real robot** — there is no
obstacle/abort stack.
- **Audio routing**: the G1's platform-sound PulseAudio sink is NOT wired to a
physical speaker. All dashboard-triggered playback (`play_wav`, typed-replay
audio, record playback) routes through DDS `AudioClient.PlayStream` via
`audio_manager._play_pcm_via_g1`. The PyAudio path is a desktop/dev fallback.
- **Arm replay**: `motion/arm_controller.py:_replay_file_inner()` is a port of
`G1_Lootah/Manual_Recorder/g1_replay_v4_stable.py:Run()` — ramp-in → settle
hold → playback → smooth return → disable SDK. Body motors (014) lock to a
live snapshot while arm motors (1528) follow the file at 60 Hz. `_return_home()`
runs unconditionally after a cancel for a jerk-free return.
- **Camera frame transport (stdin push)**: the `CameraDaemon` lives in the
parent and caches frames in memory. `GeminiSubprocess` base64-encodes the
latest frame to the child's stdin (~2 fps); the child's `_stdin_watcher`
relays it to Gemini Live with a staleness guard. Chosen over a file drop so
the parent owns the camera once and the dashboard preview reads the same cache.
- **Motion-state channel**: `arm_controller._execute()` emits
`motion.action_started` / `_done` / `_error` on the event bus. `main.py`
forwards each to the child as `state:<json>\n`, injected to Gemini Live as
silent `[STATE-START] wave_hand` / `[STATE-DONE] wave_hand (2.3s)` text so it
can honestly answer "what are you doing?".
- **Recognition is Gemini-side**: no dlib/insightface/onnxruntime. Galleries are
pure file IO; `gemini/script.py:_send_gallery_primer()` builds one multimodal
`send_client_content` turn — every enrolled face/place's photos + a greeting
instruction — and Gemini matches incoming frames against it in-context.
## Camera vision on Jetson
The Recognition tab needs `pyrealsense2` to talk to the Intel RealSense.
**Do not `pip install pyrealsense2` on JetPack 5** — the PyPI wheel is built
against glibc 2.32+ (Ubuntu 22.04) and fails to load on JetPack 5's glibc
2.31 with `ImportError: ... version 'GLIBC_2.32' not found`.
The native runtime is already there (`apt`-installed `librealsense2`). Build
just the Python binding from source against it, into the `gemini_sdk` env:
```bash
rs-enumerate-devices # confirm the D435I shows up at OS level first
source ~/miniconda3/etc/profile.d/conda.sh && conda activate gemini_sdk
pip uninstall -y pyrealsense2 # remove the broken wheel if present
sudo apt install -y cmake build-essential git python3-dev libusb-1.0-0-dev pkg-config libssl-dev
cd /tmp && rm -rf librealsense
git clone --depth=1 --branch v2.56.5 https://github.com/IntelRealSense/librealsense.git
cd librealsense && mkdir -p build && cd build
cmake .. -DBUILD_PYTHON_BINDINGS=ON -DPYTHON_EXECUTABLE=$(which python3) \
-DBUILD_EXAMPLES=OFF -DBUILD_GRAPHICAL_EXAMPLES=OFF \
-DBUILD_UNIT_TESTS=OFF -DCHECK_FOR_UPDATES=OFF -DCMAKE_BUILD_TYPE=Release
make -j$(nproc) pyrealsense2
SITE=$(python3 -c "import sysconfig; print(sysconfig.get_paths()['purelib'])")
mkdir -p "$SITE/pyrealsense2"
cp wrappers/python/pyrealsense2*.so "$SITE/pyrealsense2/"
cp ../wrappers/python/pyrealsense2/__init__.py "$SITE/pyrealsense2/" 2>/dev/null || true
python3 -c 'import pyrealsense2 as rs; print([d.get_info(rs.camera_info.name) for d in rs.context().query_devices()])'
```
Match the `--branch` tag to the installed runtime (`dpkg -l | grep librealsense2`).
If the build isn't worth it, `CameraDaemon` falls back to `cv2.VideoCapture(0)`
automatically — fine for a plain USB webcam, but note a RealSense exposes its
*depth* stream at `/dev/video0`, not RGB, so a real USB cam is the cleaner
fallback (or pin `SANAD_CAMERA_USB_INDEX`). On x86_64 / Ubuntu 22.04+ desktops,
`pip install pyrealsense2` just works.
## Dynamic paths
Every path is derived at runtime — no hard-coded `/home/...` anywhere.
Resolution order for `BASE_DIR` in `config.py`:
1. `SANAD_PROJECT_ROOT` env var (if set).
2. `PROJECT_BASE + PROJECT_NAME` from a `.env` file in `Sanad/` or its parent.
3. `Path(__file__).resolve().parent` — auto-detected.
The project runs unchanged from either layout:
- dev: `<anywhere>/Project/Sanad/`
- deployed: `/home/unitree/Sanad/`
## Deployment (workstation → robot)
```bash
rsync -av --delete \
--exclude=__pycache__ --exclude=logs --exclude=model --exclude=.git \
/path/to/Sanad/ \
unitree@192.168.123.164:/home/unitree/Sanad/
```
Then on the robot: `Ctrl+C` the running `main.py` and re-run.
## Security
The dashboard has **no authentication**. Anyone who can reach
`http://<robot>:8000` gets full robot control — locomotion, arm, audio, file
upload/delete — and, via the **Terminal tab**, an interactive shell as the
dashboard's user. Bind it to a **trusted LAN only**; add auth before any wider
exposure.
## Troubleshooting
| Symptom | Fix |
|---|---|
| `No LowState received in 2s — refusing to replay` | `main.py` was re-executed as both `__main__` and `Project.Sanad.main`, creating two arm instances. Fix lives in the `sys.modules` alias near the top of `main.py`. Restart. |
| `G1ArmActionClient not available — skipping` for SDK actions | Same duplicate-init issue as above. |
| `No module named 'Project'` in subprocess | Bootstrap preamble in `voice/sanad_voice.py:~30` synthesises the `Project.Sanad` namespace when run as `__main__`. |
| Controller moves rejected (409) | The Controller is **disarmed by default** — hit Arm first. Reads + E-STOP are always allowed. |
| Arm action refused while "movement armed" | Arm ↔ locomotion are mutually exclusive. Disarm/stop locomotion, then trigger the arm. |
| Voice-driven walking does nothing | "Gemini Movement" toggle off, or E-STOP latched. Toggle on; clear E-STOP. Distances are uncalibrated. |
| Arm jumps at start of JSONL replay | `SETTLE_HOLD_SEC` (in `config/motion_config.json > arm_controller`) too low — try `0.7` or `1.0`. |
| Record playback silent | `audio_mgr.play_wav` only routes to G1 DDS if the Unitree SDK is importable; on desktop it falls back to the PulseAudio sink. |
| Live Voice Commands transcript stuck | Deferred trigger was queued but `trigger_enabled` toggle was off. Toggle on — or the pending-trigger poll fires it automatically once enabled. |
| Gemini "no audio" on Typed Replay | Non-deterministic; the retry chain in `voice/typed_replay.py:generate_audio` tries three prompt variants. For reliable TTS, use the offline `local_tts` SpeechT5 path. |
| Local brain exits immediately | `ollama serve` not running / model not pulled, or weights missing under `model/`. Check `logs/local_subprocess.log`. The Gemini brain is the safe default. |
| Recognition tab: "Camera could not start (no backend)" | No camera backend acquired. Check `rs-enumerate-devices` (RealSense at OS level) and `python3 -c 'import pyrealsense2'` in the `gemini_sdk` env. The glibc `ImportError` means the pip wheel is incompatible — see "Camera vision on Jetson" above. |
| Camera badge stuck on "reconnecting…" | `CameraDaemon` lost the device and is retrying with exponential backoff. Re-seat the USB 3 cable; check `logs/camera.log` for the USB-2.0 warning. |
| Gemini doesn't greet an enrolled face | Face Recognition toggle on? Vision on? (Face rec needs frames.) Check `logs/gemini_brain.log` for `face gallery primed: N person(s)`. Hit "Sync Gallery" to force a re-prime. |
| Gemini unaware of motion state | The `motion.action_*``send_state` chain only runs when Live Gemini is up. Check `logs/gemini_subprocess.log` and `logs/gemini_brain.log` for `STATE injected:` lines. |
## License / attribution
Internal project for YS Lootah Technology. Reuses/ports patterns from:
- `G1_Lootah/Manual_Recorder/g1_replay_v4_stable.py` (arm replay math)
- `SanadVoice/gemini_interact` (arm-phrase dispatch, skill registry)
- `SanadVoice/gemini_voice_v2` (local SpeechT5 TTS)
- `Project/Marcus` — camera→Gemini stdin-push transport, motion-state
injection, camera daemon resilience (auto-reconnect, USB-2.0 warning), the
`API/camera_api.py` cache shape (`get_frame_b64` / `get_fresh_frame`), and the
confirmation-phrase → locomotion pattern (`movement_dispatch`).
- Unitree `unitree_sdk2py` (G1 low-level SDK, `LocoClient`, `G1ArmActionClient`,
`AudioClient.PlayStream`).

0
vendor/Sanad/__init__.py vendored Normal file
View File

465
vendor/Sanad/config.py vendored Normal file
View File

@ -0,0 +1,465 @@
"""Centralized configuration for the Sanad robot assistant.
Resolution order for BASE_DIR (highest priority first):
1. SANAD_PROJECT_ROOT environment variable
2. PROJECT_BASE + PROJECT_NAME from .env file (or env vars)
3. Path(__file__).resolve().parent.parent (auto-detected from this file's location)
Every other directory is derived from BASE_DIR never hardcode an absolute path.
"""
from __future__ import annotations
import json
import os
from pathlib import Path
from typing import Any
def _read_env_file(env_path: Path) -> dict[str, str]:
"""Minimal .env reader (no python-dotenv dependency)."""
out: dict[str, str] = {}
if not env_path.exists():
return out
try:
for raw in env_path.read_text(encoding="utf-8").splitlines():
line = raw.strip()
if not line or line.startswith("#") or "=" not in line:
continue
k, v = line.split("=", 1)
out[k.strip()] = v.strip().strip('"').strip("'")
except OSError:
pass
return out
def _resolve_base_dir() -> Path:
"""Resolve the Sanad project root with override support."""
# 1. Direct env override
override = os.environ.get("SANAD_PROJECT_ROOT", "").strip()
if override:
p = Path(override).expanduser().resolve()
if p.exists():
return p
# 2. PROJECT_BASE + PROJECT_NAME pattern
_here = Path(__file__).resolve().parent # Sanad/
env_files = [
_here / ".env", # Sanad/.env
_here.parent / ".env", # Project/.env
]
for env_path in env_files:
env = _read_env_file(env_path)
base = env.get("PROJECT_BASE") or os.environ.get("PROJECT_BASE", "")
name = env.get("PROJECT_NAME") or os.environ.get("PROJECT_NAME", "")
if base and name:
candidate = Path(base).expanduser().resolve() / name
if candidate.exists():
return candidate
# 3. Auto-detect — this file lives at Sanad/config.py, so parent = Sanad/
return _here
BASE_DIR = _resolve_base_dir()
DATA_DIR = BASE_DIR / "data"
LOGS_DIR = BASE_DIR / "logs"
SCRIPTS_DIR = BASE_DIR / "scripts"
MODEL_DIR = BASE_DIR / "model"
# Audio recordings (typed-replay, etc.) live under data/audio
AUDIO_RECORDINGS_DIR = DATA_DIR / "audio"
# Motion macro recordings (paired with audio) live under data/recordings/motion
MOTION_RECORDINGS_DIR = DATA_DIR / "recordings" / "motion"
# Motion JSONL macros (auto-discovered as actions)
MOTIONS_DIR = DATA_DIR / "motions"
SKILLS_FILE = MOTIONS_DIR / "skills.json"
CONFIG_FILE = MOTIONS_DIR / "config.json"
# ─── Load baseline defaults from config/core_config.json ───
# Single source of truth. Runtime overrides via:
# 1. env vars (SANAD_GEMINI_API_KEY, SANAD_GEMINI_MODEL, ...)
# 2. data/motions/config.json (dashboard-editable — see load_config())
# 3. config/core_config.json (this file)
def _load_core_config() -> dict[str, Any]:
cfg_path = BASE_DIR / "config" / "core_config.json"
if not cfg_path.exists():
return {}
try:
raw = json.loads(cfg_path.read_text(encoding="utf-8"))
except (OSError, json.JSONDecodeError):
return {}
# Strip _comment / _description noise
return {k: v for k, v in raw.items() if not k.startswith("_")}
_CORE_CFG = _load_core_config()
_GEMINI = _CORE_CFG.get("gemini_defaults", {})
_AUDIO = _CORE_CFG.get("audio_defaults", {})
# -- Gemini defaults (override via data/motions/config.json or env) --
GEMINI_API_KEY = os.environ.get(
"SANAD_GEMINI_API_KEY",
_GEMINI.get("api_key", ""))
GEMINI_MODEL = os.environ.get(
"SANAD_GEMINI_MODEL",
"models/" + _GEMINI.get("model_live", "gemini-2.5-flash-native-audio-preview-12-2025"))
GEMINI_VOICE = os.environ.get(
"SANAD_GEMINI_VOICE",
_GEMINI.get("voice_name", "Charon"))
GEMINI_WS_URI = _GEMINI.get(
"model_ws_uri",
"wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent")
GEMINI_WS_TIMEOUT = _GEMINI.get("ws_timeout_sec", 30)
# -- Audio defaults --
SEND_SAMPLE_RATE = _AUDIO.get("send_sample_rate", 16000)
RECEIVE_SAMPLE_RATE = _AUDIO.get("receive_sample_rate", 24000)
CHUNK_SIZE = _AUDIO.get("chunk_size", 512)
CHANNELS = _AUDIO.get("channels", 1)
# -- PulseAudio hardware IDs --
SINK = _AUDIO.get("sink", "alsa_output.usb-Anker_PowerConf_A3321-DEV-SN1-01.analog-stereo")
SOURCE = _AUDIO.get("source", "alsa_input.usb-Anker_PowerConf_A3321-DEV-SN1-01.mono-fallback")
MONITOR_SOURCE = f"{SINK}.monitor"
# -- Dashboard --
# Default: bind to wlan0's IP (auto-detected at startup) so the dashboard is
# reachable on the wireless network. Falls back to 0.0.0.0 (all interfaces)
# if wlan0 isn't present.
#
# Resolution order (highest priority first):
# 1. SANAD_DASHBOARD_HOST env var (explicit IP or hostname)
# 2. SANAD_DASHBOARD_INTERFACE env var → that interface's IP
# 3. wlan0 interface IP (default)
# 4. 0.0.0.0 (bind to all)
#
# Override via --host CLI flag too.
DASHBOARD_INTERFACE = os.environ.get("SANAD_DASHBOARD_INTERFACE", "wlan0")
def _get_interface_ip(iface: str) -> str | None:
"""Return the IPv4 address bound to `iface`, or None if not present.
Tries multiple strategies in order different Linux setups expose
interface info via different mechanisms.
"""
# Strategy 1: fcntl SIOCGIFADDR (fastest, no subprocess)
ip = _get_iface_ip_fcntl(iface)
if ip:
return ip
# Strategy 2: parse `ip -4 -o addr show <iface>` (works on Ubuntu/Jetson)
ip = _get_iface_ip_via_ip_cmd(iface)
if ip:
return ip
# Strategy 3: parse `/proc/net/fib_trie` (last resort)
ip = _get_iface_ip_via_proc(iface)
if ip:
return ip
return None
def _get_iface_ip_fcntl(iface: str) -> str | None:
try:
import fcntl
import socket
import struct
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
try:
ifname = iface[:15].encode("utf-8")
packed = fcntl.ioctl(
s.fileno(),
0x8915, # SIOCGIFADDR
struct.pack("256s", ifname),
)
return socket.inet_ntoa(packed[20:24])
finally:
s.close()
except Exception:
return None
def _get_iface_ip_via_ip_cmd(iface: str) -> str | None:
try:
import subprocess
r = subprocess.run(
["ip", "-4", "-o", "addr", "show", iface],
capture_output=True, text=True, timeout=2.0,
)
if r.returncode != 0:
return None
# Output: "5: wlan0 inet 10.255.254.86/24 brd ..."
for line in r.stdout.splitlines():
parts = line.split()
for i, p in enumerate(parts):
if p == "inet" and i + 1 < len(parts):
return parts[i + 1].split("/")[0]
except Exception:
return None
return None
def _get_iface_ip_via_proc(iface: str) -> str | None:
"""Fallback: scrape /proc/net/fib_trie for an IP advertised on this iface.
Less reliable than fcntl/ip cmd but doesn't need any external tooling.
"""
try:
import subprocess
# Try `hostname -I` as a final fallback (returns space-separated IPs)
r = subprocess.run(["hostname", "-I"], capture_output=True, text=True, timeout=1.0)
if r.returncode == 0:
ips = (r.stdout or "").strip().split()
# Return first non-loopback IPv4
for ip in ips:
if "." in ip and not ip.startswith("127."):
return ip
except Exception:
return None
return None
def list_network_interfaces() -> list[dict]:
"""Return [{name, ip, is_up}] for every interface on the box.
Used by the dashboard's system-info panel.
"""
out: list[dict] = []
try:
import socket
for idx, name in socket.if_nameindex():
ip = _get_interface_ip(name)
out.append({
"name": name,
"index": idx,
"ip": ip or "",
"is_up": ip is not None,
})
except Exception:
pass
return out
def _resolve_dashboard_host() -> str:
"""Resolve the host the dashboard should bind to.
Order:
1. SANAD_DASHBOARD_HOST env var (explicit IP/hostname)
2. SANAD_DASHBOARD_INTERFACE that interface's IP
3. wlan0's IP (default)
4. First non-loopback IP from `hostname -I`
5. 0.0.0.0 (bind everywhere)
"""
explicit = os.environ.get("SANAD_DASHBOARD_HOST", "").strip()
if explicit:
return explicit
iface_ip = _get_interface_ip(DASHBOARD_INTERFACE)
if iface_ip:
return iface_ip
# Try `hostname -I` as a final non-loopback fallback
try:
import subprocess
r = subprocess.run(["hostname", "-I"], capture_output=True, text=True, timeout=1.0)
if r.returncode == 0:
for ip in (r.stdout or "").strip().split():
if "." in ip and not ip.startswith("127."):
return ip
except Exception:
pass
return "0.0.0.0"
DASHBOARD_HOST = _resolve_dashboard_host()
DASHBOARD_PORT = 8000
# -- Local TTS --
LOCAL_TTS_MODEL = "MBZUAI/speecht5_tts_clartts_ar"
LOCAL_TTS_MODEL_PATH = str(MODEL_DIR / "speecht5_tts_clartts_ar")
LOCAL_TTS_HIFIGAN_PATH = str(MODEL_DIR / "speecht5_hifigan")
LOCAL_TTS_XVECTOR_PATH = str(MODEL_DIR / "arabic_xvector_embedding.pt")
# -- Motion --
_G1 = _CORE_CFG.get("g1_hardware", {})
REPLAY_HZ = _G1.get("replay_hz", 60.0)
G1_NUM_MOTOR = _G1.get("num_motor", 29)
ENABLE_ARM_SDK_INDEX = _G1.get("enable_arm_sdk_index", 29)
KP_HIGH = 300.0
KD_HIGH = 3.0
KP_LOW = 80.0
KD_LOW = 3.0
KP_WRIST = 40.0
KD_WRIST = 1.5
WEAK_MOTORS = {4, 10, 15, 16, 17, 18, 22, 23, 24, 25}
WRIST_MOTORS = {19, 20, 21, 26, 27, 28}
# -- Live Gemini subprocess tuning --
LIVE_TUNE: dict[str, str] = {
"SANAD_REQUIRED_LOUD_CHUNKS": "5",
"SANAD_PREBUFFER_CHUNKS": "3",
"SANAD_PLAYBACK_TIMEOUT": "0.25",
"SANAD_BARGE_IN_COOLDOWN": "1.0",
"SANAD_AI_SPEAK_GRACE": "0.5",
# ECHO_GUARD_SEC suppresses USER SAID log lines for this many seconds
# after the robot finishes a chunk. Previously 1.2 — caused a visible
# lag where "robot finished talking" was followed by silence in the
# log even though Gemini was transcribing the user's new speech
# immediately. Lowered to 0.3 to match typical room reverb tail; the
# real echo protection is the silence-during-speaking gate, not this.
"SANAD_ECHO_GUARD_SEC": "0.3",
"SANAD_SPEAKING_ENERGY_GATE": "0.90",
"SANAD_CALIBRATION_CHUNKS": "30",
"SANAD_THRESHOLD_MULTIPLIER": "4.0",
# Base barge-in threshold calibrated at the REFERENCE volume (50%).
# At runtime, scaled QUADRATICALLY with actual G1 volume:
# scale = (actual_vol / ref_vol) ** 2
#
# Physical reason: doubling digital speaker volume doubles sample
# amplitude, which means RECEIVED energy at the mic quadruples
# (energy ~ amplitude²). Linear scaling under-threshold echo at
# high volumes → caused "robot listening to himself" feedback.
#
# Measured on Hollyland + G1 speaker at 100% volume:
# echo peak (no user) up to ~15700
# voice peak (user) 25000-32000+ (often saturates 32767)
# Safe threshold at 100% vol: ~18000, above echo / below voice.
#
# Working back with quadratic scale: base × (100/50)² = 18000
# base × 4 = 18000 → base = 4500 at 50% ref volume.
"SANAD_MIN_THRESHOLD": "800",
"SANAD_PLAYBACK_BARGE_MIN": "2500",
"SANAD_PLAYBACK_BARGE_MULT": "1.5",
# Sustained-chunk requirement for barge-in. Balance:
# higher = fewer false triggers from echo bursts
# lower = quicker response to short commands ("stop", "توقف")
# Default 5 = ~160ms sustained voice. Real speech reliably
# sustains that long; single-chunk echo spikes don't.
"SANAD_PLAYBACK_REQUIRED_CHUNKS": "2",
"SANAD_SILENCE_AFTER_SPEECH": "1.2",
"SANAD_SPEECH_THRESHOLD": "300",
"SANAD_DDS_INTERFACE": os.environ.get("SANAD_DDS_INTERFACE", "eth0"),
# G1 built-in mic — UDP multicast 239.168.123.161:5555.
# Requires wake-up conversation mode ON in Unitree app.
"SANAD_USE_G1_MIC": "1",
# ── Recognition (camera vision + face recognition) ──
# All of these are BOOT defaults. The runtime source of truth is the
# state file data/.recognition_state.json — toggled live from the
# Recognition tab and polled by the Gemini child at 1 Hz.
"SANAD_VISION_ENABLE": "0",
"SANAD_VISION_SEND_HZ": "2",
"SANAD_VISION_STALE_MS": "1500",
"SANAD_CAMERA_WIDTH": "424",
"SANAD_CAMERA_HEIGHT": "240",
"SANAD_CAMERA_FPS": "15",
"SANAD_CAMERA_JPEG_QUALITY": "70",
"SANAD_FACE_RECOGNITION_ENABLE": "0",
"SANAD_FACES_DIR": str(DATA_DIR / "faces"),
"SANAD_FACES_MAX_SAMPLES": "3",
"SANAD_FACES_PRIMER_RESIZE": "256",
"SANAD_RECOGNITION_STATE_PATH": str(DATA_DIR / ".recognition_state.json"),
"SANAD_RECOGNITION_POLL_S": "1.0",
}
# -- Camera --
CAMERA_SERVICE_PORT = 8091
DIRECT_CAMERA_URL = f"http://127.0.0.1:{CAMERA_SERVICE_PORT}"
# -- DDS / hardware --
# Jetson G1 default is eth0 (the robot's internal network).
# Override with SANAD_DDS_INTERFACE=lo for desktop/sim development.
DDS_NETWORK_INTERFACE = os.environ.get("SANAD_DDS_INTERFACE", "eth0")
def _ensure_dirs() -> list[str]:
"""Create runtime directories. Failures are collected, not raised.
Returns the list of directories that failed to create caller can decide
whether to log/abort. The module import never crashes due to a single
permission error on a single directory.
"""
failed: list[str] = []
for d in (DATA_DIR, LOGS_DIR, SCRIPTS_DIR, AUDIO_RECORDINGS_DIR,
MOTION_RECORDINGS_DIR, MOTIONS_DIR):
try:
d.mkdir(parents=True, exist_ok=True)
except OSError:
failed.append(str(d))
return failed
# Best-effort: create dirs at import. Ignore failures here — individual
# subsystems will handle missing dirs at usage time and isolation prevents
# cascading import failures.
_DIRS_FAILED = _ensure_dirs()
def load_config() -> dict[str, Any]:
"""Load runtime config overrides from CONFIG_FILE (if present)."""
if CONFIG_FILE.exists():
try:
with open(CONFIG_FILE, "r", encoding="utf-8") as f:
return json.load(f)
except (json.JSONDecodeError, OSError):
return {}
return {}
def save_config(cfg: dict[str, Any]):
CONFIG_FILE.parent.mkdir(parents=True, exist_ok=True)
import os, tempfile
fd, tmp = tempfile.mkstemp(
prefix=f".{CONFIG_FILE.name}.", suffix=".tmp",
dir=str(CONFIG_FILE.parent),
)
try:
with os.fdopen(fd, "w", encoding="utf-8") as f:
json.dump(cfg, f, ensure_ascii=False, indent=2)
os.replace(tmp, CONFIG_FILE)
except Exception:
try:
os.unlink(tmp)
except OSError:
pass
raise
# Apply config.json overrides on top of module constants (was previously dead code).
def _apply_overrides():
cfg = load_config()
if not cfg:
return
g = globals()
gemini = cfg.get("gemini", {})
if isinstance(gemini, dict):
if "api_key" in gemini and gemini["api_key"]:
g["GEMINI_API_KEY"] = gemini["api_key"]
if "model" in gemini:
g["GEMINI_MODEL"] = gemini["model"]
if "voice" in gemini:
g["GEMINI_VOICE"] = gemini["voice"]
audio = cfg.get("audio", {})
if isinstance(audio, dict):
if "send_sample_rate" in audio:
g["SEND_SAMPLE_RATE"] = int(audio["send_sample_rate"])
if "receive_sample_rate" in audio:
g["RECEIVE_SAMPLE_RATE"] = int(audio["receive_sample_rate"])
if "chunk_size" in audio:
g["CHUNK_SIZE"] = int(audio["chunk_size"])
if "sink" in audio:
g["SINK"] = audio["sink"]
if "source" in audio:
g["SOURCE"] = audio["source"]
dashboard = cfg.get("dashboard", {})
if isinstance(dashboard, dict):
if "host" in dashboard:
g["DASHBOARD_HOST"] = dashboard["host"]
if "port" in dashboard:
g["DASHBOARD_PORT"] = int(dashboard["port"])
try:
_apply_overrides()
except Exception:
# Never let a malformed config.json kill module import.
pass

89
vendor/Sanad/config/core_config.json vendored Normal file
View File

@ -0,0 +1,89 @@
{
"_description": "Tunables for core/* modules. Loaded via core.config_loader.load('core').",
"brain": {
"allowed_callback_prefixes": [
"Project.Sanad.motion.",
"Project.Sanad.voice.",
"motion.",
"voice."
],
"gestural_speaking_default": false
},
"logger": {
"log_level": "INFO",
"format": "%(asctime)s [%(name)s] %(levelname)-7s %(message)s",
"datefmt": "%Y-%m-%d %H:%M:%S",
"file_max_bytes": 10485760,
"file_backup_count": 7
},
"event_bus": {
"emit_timeout_sec": 0.5
},
"paths": {
"_comment": "Path roots — resolved against BASE_DIR in core/config.py",
"data": "data",
"logs": "logs",
"scripts": "scripts",
"model": "model",
"audio_recordings": "data/audio",
"motion_recordings": "data/recordings/motion",
"motions": "data/motions"
},
"gemini_defaults": {
"_comment": "Baseline Gemini API config — SINGLE SOURCE OF TRUTH. All voice modules read from here.",
"api_key": "",
"model_live": "gemini-2.5-flash-native-audio-preview-12-2025",
"model_ws_uri": "wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent",
"voice_name": "Charon",
"ws_timeout_sec": 30,
"default_system_prompt": "You are Bousandah, a wise and friendly Emirati assistant. Speak strictly in the UAE dialect (Khaleeji). Be helpful, concise, and use local greetings like 'Marhaba' and 'Ya Khoy'."
},
"g1_hardware": {
"_comment": "G1 humanoid hardware constants — shared by every motion/voice module that talks to the arm.",
"num_motor": 29,
"enable_arm_sdk_index": 29,
"replay_hz": 60.0
},
"script_files": {
"_comment": "Filenames (under scripts/) used across voice + dashboard",
"persona": "sanad_script.txt",
"rules": "sanad_rule.txt",
"arm_phrases": "sanad_arm.txt"
},
"dashboard_defaults": {
"host": null,
"port": 8000,
"interface": "wlan0"
},
"audio_defaults": {
"_comment": "Host PulseAudio fallback only — the G1 deployment uses UDP multicast mic + AudioClient.PlayStream speaker (see SANAD_USE_G1_MIC in config.py LIVE_TUNE). Default here is the Jetson/G1 built-in platform-sound chip.",
"send_sample_rate": 16000,
"receive_sample_rate": 24000,
"chunk_size": 512,
"channels": 1,
"sink": "alsa_output.platform-sound.analog-stereo",
"source": "alsa_input.platform-sound.analog-stereo"
},
"dds": {
"network_interface_default": "eth0"
},
"camera": {
"_comment": "Recognition tab camera daemon (parent process reads this). width/height/fps/jpeg_quality + the reconnect knobs configure CameraDaemon. Frames are cached in memory and pushed to the Gemini child over its stdin (no file drop). send_hz/stale_ms are read by the Gemini child via SANAD_VISION_SEND_HZ / SANAD_VISION_STALE_MS env vars (LIVE_TUNE).",
"width": 424,
"height": 240,
"fps": 15,
"jpeg_quality": 70,
"send_hz": 2,
"stale_ms": 1500,
"stale_threshold_s": 10.0,
"reconnect_min_s": 2.0,
"reconnect_max_s": 10.0,
"capture_timeout_ms": 5000
},
"faces": {
"_comment": "Face gallery for Gemini-side recognition. Folder layout: data/faces/face_{id}/{face_1.jpg, ...} + optional meta.json {\"name\": \"...\"}. Gemini does the matching — no local ML model.",
"dir_rel": "data/faces",
"max_samples_per_face": 3,
"primer_resize_long_side": 256
}
}

View File

@ -0,0 +1,49 @@
{
"_description": "Tunables for dashboard/* modules. Loaded via core.config_loader.load('dashboard').",
"app": {
"_comment": "dashboard/app.py — FastAPI app",
"title": "Sanad Dashboard",
"version": "1.0.0",
"static_subdir": "dashboard/static"
},
"api_input": {
"_comment": "Shared by every route that accepts user text input / uploads. Single source of truth.",
"max_text_len": 2000,
"max_upload_bytes": 8388608
},
"voice_route": {
"_comment": "dashboard/routes/voice.py — reads max_text_len from api_input above",
"api_key_mask_visible": 4
},
"typed_replay_route": {
"_comment": "dashboard/routes/typed_replay.py — reads max_text_len from api_input above"
},
"records_route": {
"_comment": "dashboard/routes/records.py",
"index_filename": "records.json"
},
"prompt_route": {
"_comment": "dashboard/routes/prompt.py — script/rule filenames come from core.script_files; default prompt from core.gemini_defaults.default_system_prompt"
},
"logs_route": {
"_comment": "dashboard/routes/logs.py",
"default_tail_lines": 200,
"max_tail_lines": 5000
},
"scripts_route": {
"_comment": "dashboard/routes/scripts.py — max_script_bytes reads from api_input.max_upload_bytes"
},
"live_subprocess_route": {
"_comment": "dashboard/routes/live_subprocess.py",
"tail_default_lines": 100
}
}

35
vendor/Sanad/config/gemini_config.json vendored Normal file
View File

@ -0,0 +1,35 @@
{
"_description": "Tunables for gemini/* modules. Loaded via core.config_loader.load('gemini'). API credentials (api_key, model, voice_name) still live in core_config.json > gemini_defaults — single source of truth shared with config.py.",
"client": {
"_comment": "gemini/client.py — short-session WebSocket client used by dashboard /generate + typed replay. default_system_prompt comes from core.gemini_defaults.",
"recv_timeout_sec": 30,
"reconnect_max_attempts": 3,
"reconnect_initial_delay_sec": 1.0,
"reconnect_max_delay_sec": 10.0
},
"subprocess": {
"_comment": "gemini/subprocess.py — GeminiSubprocess supervisor. Spawns voice/sanad_voice.py as a child, tails stdout for Gemini-specific log markers, pushes camera frames + motion state to the child over its stdin, exposes transcript + state to the dashboard.",
"log_tail_size": 2000,
"transcript_tail_size": 30,
"log_name": "gemini_subprocess",
"stop_timeout_sec": 3.0,
"terminate_timeout_sec": 2.0,
"frame_forward_interval_sec": 0.5,
"noisy_prefixes": [
"ALSA lib ",
"Expression 'alsa_",
"Cannot connect to server socket",
"jack server is not running"
],
"noisy_fragments": [
"Unknown PCM",
"Evaluate error",
"snd_pcm_open_noupdate",
"PaAlsaStream",
"snd_config_evaluate",
"snd_func_refer"
]
}
}

92
vendor/Sanad/config/local_config.json vendored Normal file
View File

@ -0,0 +1,92 @@
{
"_description": "Tunables for local/* — fully on-device voice pipeline (Silero VAD → Whisper → Qwen via llama.cpp → CosyVoice2). Loaded via core.config_loader.load('local').",
"subprocess": {
"_comment": "local/subprocess.py — LocalSubprocess supervisor. Mirrors gemini/subprocess.py. IMPORTANT: python_bin points at the `local` conda env (Python 3.8 + Jetson CUDA torch) so CosyVoice+Whisper run with GPU, while the dashboard/Gemini stack stays in gemini_sdk (Python 3.10).",
"python_bin": "/home/unitree/miniconda3/envs/local/bin/python",
"log_tail_size": 2000,
"transcript_tail_size": 30,
"log_name": "local_subprocess",
"stop_timeout_sec": 5.0,
"terminate_timeout_sec": 3.0,
"noisy_prefixes": [
"ALSA lib ",
"Expression 'alsa_",
"Cannot connect to server socket",
"jack server is not running"
],
"noisy_fragments": [
"Unknown PCM",
"Evaluate error",
"snd_pcm_open_noupdate",
"PaAlsaStream"
]
},
"vad": {
"_comment": "Silero VAD — CPU. Emits speech_start / speech_end events.",
"sample_rate": 16000,
"frame_ms": 32,
"threshold": 0.55,
"min_silence_ms": 400,
"min_speech_ms": 250,
"pad_start_ms": 200,
"pad_end_ms": 200,
"device": "cpu"
},
"stt": {
"_comment": "faster-whisper Large V3 Turbo, INT8 on GPU.",
"model_name": "large-v3-turbo",
"model_subdir": "faster-whisper-large-v3-turbo",
"device": "cuda",
"compute_type": "int8_float16",
"beam_size": 1,
"language": null,
"vad_filter": false,
"no_speech_threshold": 0.6,
"min_utterance_chars": 2,
"temperature": 0.0
},
"llm": {
"_comment": "Qwen 2.5 Instruct via Ollama (default) OR self-managed llama.cpp. Set backend to pick.",
"backend": "ollama",
"_ollama_comment": "Ollama daemon — assumes `ollama serve` is running; `ollama pull qwen2.5:1.5b` to fetch.",
"ollama_host": "127.0.0.1",
"ollama_port": 11434,
"ollama_model": "qwen2.5:1.5b",
"ollama_keep_alive": "5m",
"_llamacpp_comment": "Self-managed llama-server subprocess. Only used when backend='llama_cpp'.",
"model_subdir": "qwen2.5-1.5b-instruct-q4_k_m.gguf",
"server_binary": "llama-server",
"host": "127.0.0.1",
"port": 8080,
"n_gpu_layers": 99,
"ctx_size": 2048,
"threads": 4,
"startup_timeout_sec": 30,
"_shared_comment": "Generation params — both backends.",
"request_timeout_sec": 30,
"max_tokens": 200,
"temperature": 0.7,
"top_p": 0.9,
"stop": ["<|im_end|>", "\n\n\n"],
"chunk_delimiters": ".,?!؟،",
"chunk_min_chars": 8
},
"tts": {
"_comment": "CosyVoice2 0.5B streaming — GPU. Uses a 3s reference WAV for voice cloning.",
"model_subdir": "CosyVoice2-0.5B",
"reference_wav_subdir": "khaleeji_reference_3s.wav",
"reference_prompt": "",
"stream_chunk_sec": 0.25,
"sample_rate": 16000,
"queue_max": 3,
"device": "cuda"
}
}

70
vendor/Sanad/config/motion_config.json vendored Normal file
View File

@ -0,0 +1,70 @@
{
"_description": "Tunables for motion/* modules. Loaded via core.config_loader.load('motion').",
"arm_controller": {
"_comment": "motion/arm_controller.py — enable_arm_sdk_index + replay_hz come from core.g1_hardware",
"ramp_in_steps": 60,
"ramp_out_steps": 180,
"settle_hold_sec": 0.5,
"watchdog_timeout_sec": 0.25,
"watchdog_disable_after_sec": 1.0,
"arm_indices_start": 15,
"arm_indices_stop": 29,
"jsonl_id_start": 100
},
"loco_controller": {
"_comment": "G1_Controller/loco_controller.py — manual locomotion. NIC is shared from the arm's DDS init (config core.dds / SANAD_DDS_INTERFACE), not set here.",
"cap_walk": 0.6,
"cap_run": 1.2,
"lin_step": 0.05,
"ang_step": 0.2,
"watchdog_timeout_sec": 0.5,
"arm_block_window_sec": 1.5,
"step_duration_sec": 0.6,
"step_speed_frac": 0.5,
"loco_timeout_sec": 10.0,
"msc_timeout_sec": 5.0
},
"macro_player": {
"_comment": "motion/macro_player.py — JSONL playback",
"ramp_in_steps": 60,
"ramp_out_steps": 60,
"watchdog_disable_after_sec": 1.0
},
"macro_recorder": {
"_comment": "motion/macro_recorder.py — record arm trajectories",
"sample_rate_hz": 60.0,
"smoothing_window": 5
},
"teaching": {
"_comment": "motion/teaching.py — teach-by-demo",
"safe_hold_sec": 3.0,
"waist_kp": 60.0,
"waist_kd": 4.0,
"hold_arm_kp": 60.0,
"hold_arm_kd": 4.0,
"teach_arm_kp": 0.0,
"teach_arm_kd": 2.0
},
"sanad_arm_controller": {
"_comment": "motion/sanad_arm_controller.py — g1_num_motor + enable_arm_sdk_index + replay_hz come from core.g1_hardware",
"action_cooldown_sec": 1.0,
"stability_threshold": 0.06,
"gains": {
"kp_high": 300.0,
"kd_high": 3.0,
"kp_low": 80.0,
"kd_low": 3.0,
"kp_wrist": 40.0,
"kd_wrist": 1.5
},
"weak_motors": [4, 10, 15, 16, 17, 18, 22, 23, 24, 25],
"wrist_motors": [19, 20, 21, 26, 27, 28],
"data_subdir": "DataG1"
}
}

75
vendor/Sanad/config/voice_config.json vendored Normal file
View File

@ -0,0 +1,75 @@
{
"_description": "Tunables for voice/* modules. Loaded via core.config_loader.load('voice').",
"sanad_voice": {
"_comment": "voice/sanad_voice.py — main live voice subprocess. Gemini API credentials (api_key, model, voice_name) come from core_config.json's gemini_defaults — single source of truth.",
"mic_gain": 1.0,
"play_chunk_bytes": 96000,
"log_dir": "~/logs",
"log_name": "gemini_live_v2",
"session_timeout_sec": 660,
"max_reconnect_delay_sec": 30,
"max_consecutive_errors": 10,
"no_messages_timeout_sec": 30
},
"mic_udp": {
"_comment": "G1 built-in mic — UDP multicast subscriber",
"group": "239.168.123.161",
"port": 5555,
"buffer_max_bytes": 64000,
"read_timeout_sec": 0.04,
"socket_timeout_sec": 1.0
},
"speaker": {
"_comment": "G1 built-in speaker — AudioClient.PlayStream wrapper",
"app_name": "sanad",
"begin_stream_pause_sec": 0.15,
"wait_finish_margin_sec": 0.3
},
"vad": {
"_comment": "Gemini Live server-side voice-activity-detection config",
"start_sensitivity": "START_SENSITIVITY_HIGH",
"end_sensitivity": "END_SENSITIVITY_LOW",
"prefix_padding_ms": 20,
"silence_duration_ms": 200
},
"barge_in": {
"threshold": 500,
"loud_chunks_needed": 3,
"cooldown_sec": 0.3,
"echo_suppress_below": 500,
"ai_speak_grace_sec": 0.15
},
"recording": {
"enabled": true,
"dir_relative": "data/recordings"
},
"typed_replay": {
"_comment": "voice/typed_replay.py — max_text_len comes from dashboard.api_input",
"monitor_chunk_size": 512,
"monitor_tail_sec": 0.2
},
"live_voice_loop": {
"_comment": "voice/live_voice_loop.py — arm phrase dispatcher. arm_txt filename comes from core.script_files.arm_phrases",
"trigger_log_size": 100,
"poll_interval_sec": 0.1,
"deferred_default": false,
"trigger_enabled_default": false
},
"local_tts": {
"_comment": "voice/local_tts.py — offline Coqui TTS",
"model_subdir": "speecht5_tts_clartts_ar",
"vocoder_subdir": "speecht5_hifigan",
"xvector_filename": "arabic_xvector_embedding.pt",
"sample_rate": 16000,
"channels": 1
}
}

0
vendor/Sanad/core/__init__.py vendored Normal file
View File

33
vendor/Sanad/core/asyncio_compat.py vendored Normal file
View File

@ -0,0 +1,33 @@
"""asyncio compatibility shim for Python 3.8.
`asyncio.to_thread` only exists from Python 3.9. The Jetson runs 3.8, so we
backfill it via run_in_executor on the default thread pool.
Usage:
from Project.Sanad.core.asyncio_compat import to_thread
result = await to_thread(blocking_fn, arg1, arg2, kw=val)
"""
from __future__ import annotations
import asyncio
import functools
import sys
from typing import Any, Callable, TypeVar
_T = TypeVar("_T")
if sys.version_info >= (3, 9):
# Native implementation
to_thread = asyncio.to_thread # type: ignore[attr-defined]
else:
async def to_thread(func: Callable[..., _T], /, *args: Any, **kwargs: Any) -> _T:
"""Backport of asyncio.to_thread for Python 3.8."""
loop = asyncio.get_event_loop()
ctx = functools.partial(func, *args, **kwargs)
return await loop.run_in_executor(None, ctx)
# Also patch the asyncio module so existing `asyncio.to_thread` calls work
# without rewriting every consumer file. Done lazily — only if missing.
if not hasattr(asyncio, "to_thread"):
asyncio.to_thread = to_thread # type: ignore[attr-defined]

272
vendor/Sanad/core/brain.py vendored Normal file
View File

@ -0,0 +1,272 @@
"""The Brain — central orchestrator for the Sanad robot assistant.
Responsibilities:
1. Owns the SkillRegistry, resolves callbacks at runtime.
2. Coordinates voice motion vision pipelines.
3. Executes skills (audio + motion + callback) with configurable sync modes.
4. Exposes a thread-safe API consumed by the FastAPI dashboard.
"""
from __future__ import annotations
import asyncio
import importlib
import time
from pathlib import Path
from typing import Any, Callable
from Project.Sanad.config import (
AUDIO_RECORDINGS_DIR,
MOTIONS_DIR,
MOTION_RECORDINGS_DIR,
)
from Project.Sanad.core.event_bus import bus
from Project.Sanad.core.logger import get_logger
from Project.Sanad.core.skill_registry import Skill, SkillRegistry
log = get_logger("brain")
# Whitelist of module path prefixes allowed for skill callbacks.
# Prevents arbitrary code execution via dashboard-editable skills.json.
from Project.Sanad.core.config_loader import section as _cfg_section
_BRAIN_CFG = _cfg_section("core", "brain")
ALLOWED_CALLBACK_PREFIXES = tuple(_BRAIN_CFG.get("allowed_callback_prefixes", [
"Project.Sanad.motion.",
"Project.Sanad.voice.",
"motion.",
"voice.",
]))
class Brain:
"""Singleton-style manager that bridges all subsystems."""
def __init__(self):
self.registry = SkillRegistry()
self._lock = asyncio.Lock()
# Sub-modules are injected after construction so imports stay lazy.
self._voice = None # gemini.client.GeminiVoiceClient
self._audio_mgr = None # voice.audio_manager.AudioManager
self._arm = None # motion.arm_controller.ArmController
self._macro_rec = None # motion.macro_recorder.MacroRecorder
self._macro_play = None # motion.macro_player.MacroPlayer
self._live_voice = None # voice.live_voice_loop.LiveVoiceLoop
self.gestural_speaking = False # toggle: move while Gemini speaks
self._running_skill: str | None = None
# -- dependency injection --
def attach_voice(self, client):
self._voice = client
log.info("Voice client attached")
def attach_audio_manager(self, mgr):
self._audio_mgr = mgr
log.info("Audio manager attached")
def attach_arm(self, arm):
self._arm = arm
log.info("Arm controller attached")
def attach_macro_recorder(self, rec):
self._macro_rec = rec
def attach_macro_player(self, player):
self._macro_play = player
def attach_live_voice(self, lv):
self._live_voice = lv
log.info("LiveVoiceLoop attached")
# -- callback resolution --
def _resolve_callback(self, callback_str: str) -> Callable | None:
"""Resolve 'module.submodule:function_name' → callable.
SECURITY: only modules under ALLOWED_CALLBACK_PREFIXES may be imported.
Skill JSON is dashboard-editable and otherwise an arbitrary-import RCE.
Examples:
"Project.Sanad.motion.arm_controller:wave_hand"
"motion.arm_controller:wave_hand"
"""
if not callback_str:
return None
if ":" not in callback_str:
log.error("Invalid callback (missing ':'): %s", callback_str)
return None
module_path, func_name = callback_str.rsplit(":", 1)
if not any(module_path.startswith(prefix) or module_path == prefix.rstrip(".")
for prefix in ALLOWED_CALLBACK_PREFIXES):
log.error(
"Callback %s rejected — module '%s' not in whitelist",
callback_str, module_path,
)
return None
try:
mod = importlib.import_module(module_path)
return getattr(mod, func_name)
except Exception:
log.exception("Cannot resolve callback '%s'", callback_str)
return None
# -- skill execution --
async def execute_skill(self, skill_id: str) -> dict[str, Any]:
"""Run a skill: play audio + execute motion + fire callback."""
skill = self.registry.get(skill_id)
if skill is None:
raise KeyError(f"Skill not found: {skill_id}")
if not skill.enabled:
raise RuntimeError(f"Skill '{skill_id}' is disabled.")
async with self._lock:
if self._running_skill:
raise RuntimeError(f"Skill '{self._running_skill}' is already running.")
self._running_skill = skill_id
t0 = time.monotonic()
result: dict[str, Any] = {"skill_id": skill_id, "ok": True}
try:
await bus.emit("skill.started", skill_id=skill_id)
# Validate required attachments before partial execution
if skill.audio_file and self._audio_mgr is None:
raise RuntimeError("AudioManager not attached but skill requires audio")
if skill.motion_file and self._arm is None:
raise RuntimeError("ArmController not attached but skill requires motion")
if skill.sync_mode == "parallel":
await self._exec_parallel(skill, result)
elif skill.sync_mode == "audio_first":
await self._exec_audio_first(skill, result)
elif skill.sync_mode == "motion_first":
await self._exec_motion_first(skill, result)
else:
await self._exec_parallel(skill, result)
# Fire callback — run blocking callbacks in a thread to avoid stalling the loop
cb = self._resolve_callback(skill.callback)
if cb is not None:
if asyncio.iscoroutinefunction(cb):
cb_result = await cb()
else:
cb_result = await asyncio.to_thread(cb)
result["callback_result"] = str(cb_result) if cb_result else "ok"
except Exception as exc:
result["ok"] = False
result["error"] = str(exc)
log.exception("Skill %s failed", skill_id)
finally:
elapsed = time.monotonic() - t0
result["elapsed_sec"] = round(elapsed, 3)
async with self._lock:
self._running_skill = None
await bus.emit("skill.finished", skill_id=skill_id, result=result)
return result
async def cancel_skill(self) -> dict[str, Any]:
"""Cancel any running skill — sends cancel to arm controller."""
cancelled = self._running_skill
if self._arm is not None and hasattr(self._arm, "cancel"):
try:
self._arm.cancel()
except Exception:
log.exception("arm.cancel() failed")
if self._audio_mgr is not None and hasattr(self._audio_mgr, "stop_playback"):
try:
self._audio_mgr.stop_playback()
except Exception:
pass
return {"cancelled": cancelled}
async def _exec_parallel(self, skill: Skill, result: dict):
tasks = []
if skill.audio_file:
tasks.append(asyncio.create_task(self._play_audio(skill.audio_file, result)))
if skill.motion_file:
tasks.append(asyncio.create_task(self._play_motion(skill.motion_file, result)))
if tasks:
await asyncio.gather(*tasks)
async def _exec_audio_first(self, skill: Skill, result: dict):
if skill.audio_file:
await self._play_audio(skill.audio_file, result)
if skill.motion_file:
await self._play_motion(skill.motion_file, result)
async def _exec_motion_first(self, skill: Skill, result: dict):
if skill.motion_file:
await self._play_motion(skill.motion_file, result)
if skill.audio_file:
await self._play_audio(skill.audio_file, result)
async def _play_audio(self, audio_file: str, result: dict):
path = Path(audio_file)
if not path.is_absolute():
path = AUDIO_RECORDINGS_DIR / path
if not path.exists():
result["audio_error"] = f"File not found: {path}"
log.warning("Audio file missing: %s", path)
return
if self._audio_mgr is not None:
await asyncio.to_thread(self._audio_mgr.play_wav, path)
result["audio_played"] = str(path)
else:
result["audio_error"] = "AudioManager not attached"
async def _play_motion(self, motion_file: str, result: dict):
path = Path(motion_file)
if not path.is_absolute():
path = MOTIONS_DIR / path
if not path.exists():
result["motion_error"] = f"File not found: {path}"
log.warning("Motion file missing: %s", path)
return
if self._arm is not None:
await asyncio.to_thread(self._arm.replay_file, str(path))
result["motion_played"] = str(path)
else:
result["motion_error"] = "ArmController not attached"
# -- macro recording --
async def start_macro_recording(self, name: str) -> dict[str, Any]:
if self._macro_rec is None:
raise RuntimeError("MacroRecorder not attached.")
return await asyncio.to_thread(self._macro_rec.start, name)
async def stop_macro_recording(self) -> dict[str, Any]:
if self._macro_rec is None:
raise RuntimeError("MacroRecorder not attached.")
return await asyncio.to_thread(self._macro_rec.stop)
async def play_macro(self, name: str) -> dict[str, Any]:
if self._macro_play is None:
raise RuntimeError("MacroPlayer not attached.")
return await asyncio.to_thread(self._macro_play.play, name)
# -- gestural speaking toggle --
def set_gestural_speaking(self, enabled: bool):
self.gestural_speaking = enabled
bus.emit_sync("brain.gestural_speaking_changed", enabled=enabled)
log.info("Gestural speaking: %s", "ON" if enabled else "OFF")
# -- status --
def status(self) -> dict[str, Any]:
return {
"voice_attached": self._voice is not None,
"arm_attached": self._arm is not None,
"audio_manager_attached": self._audio_mgr is not None,
"live_voice_attached": self._live_voice is not None,
"gestural_speaking": self.gestural_speaking,
"running_skill": self._running_skill,
"total_skills": len(self.registry.list_skills()),
}

124
vendor/Sanad/core/config_loader.py vendored Normal file
View File

@ -0,0 +1,124 @@
"""Single-source config loader for all Sanad subsystems.
Each subsystem (core, voice, motion, dashboard) has its own JSON file at
`config/<subsystem>_config.json`. This module loads them on demand, caches
the result, and exposes helpers for pulling nested sections.
Usage:
from Project.Sanad.core.config_loader import load, get
cfg = load("voice") # full voice config dict
threshold = get("voice", "barge_in.threshold", 500)
rates = get("voice", "sanad_voice", {}) # whole section
Why JSON (not TOML/YAML): standard library only, editable in any text
editor, commented via "_comment" keys. No third-party dep.
"""
from __future__ import annotations
import json
import threading
from pathlib import Path
from typing import Any
from Project.Sanad.core.logger import get_logger
log = get_logger("config_loader")
# Resolved at first-load time (avoids circular import with config.py)
_BASE_DIR: Path | None = None
_CONFIG_DIR: Path | None = None
_CACHE: dict[str, dict[str, Any]] = {}
_LOCK = threading.Lock()
def _resolve_dirs() -> tuple[Path, Path]:
"""Find Sanad's root and config/ directory (lazy + cached)."""
global _BASE_DIR, _CONFIG_DIR
if _BASE_DIR is not None and _CONFIG_DIR is not None:
return _BASE_DIR, _CONFIG_DIR
here = Path(__file__).resolve().parent # Sanad/core
base = here.parent # Sanad/
_BASE_DIR = base
_CONFIG_DIR = base / "config"
return _BASE_DIR, _CONFIG_DIR
def _strip_comments(d: Any) -> Any:
"""Remove top-level "_comment"/"_description" keys — noise for callers."""
if isinstance(d, dict):
return {
k: _strip_comments(v) for k, v in d.items()
if not (isinstance(k, str) and k.startswith("_"))
}
if isinstance(d, list):
return [_strip_comments(x) for x in d]
return d
def load(subsystem: str) -> dict[str, Any]:
"""Load + cache config/<subsystem>_config.json.
Returns a dict with all leading-underscore keys stripped. Missing
file returns an empty dict (callers supply their own defaults via
`get(..., default)`).
"""
with _LOCK:
if subsystem in _CACHE:
return _CACHE[subsystem]
_, cfg_dir = _resolve_dirs()
path = cfg_dir / f"{subsystem}_config.json"
if not path.exists():
log.warning("config file missing: %s — using empty dict", path)
_CACHE[subsystem] = {}
return _CACHE[subsystem]
try:
raw = json.loads(path.read_text(encoding="utf-8"))
except json.JSONDecodeError as exc:
log.error("config file %s unreadable: %s", path, exc)
_CACHE[subsystem] = {}
return _CACHE[subsystem]
cleaned = _strip_comments(raw)
_CACHE[subsystem] = cleaned
return cleaned
def get(subsystem: str, dotted_key: str, default: Any = None) -> Any:
"""Fetch a nested key. Supports dotted-paths: 'barge_in.threshold'."""
cfg = load(subsystem)
parts = dotted_key.split(".")
cur: Any = cfg
for p in parts:
if not isinstance(cur, dict) or p not in cur:
return default
cur = cur[p]
return cur
def section(subsystem: str, name: str) -> dict[str, Any]:
"""Convenience — load one top-level section, always returning a dict.
Example: `section("voice", "sanad_voice")` dict of that section.
"""
s = get(subsystem, name, {})
return s if isinstance(s, dict) else {}
def reload(subsystem: str | None = None) -> None:
"""Drop cached config so next load() re-reads from disk."""
with _LOCK:
if subsystem is None:
_CACHE.clear()
else:
_CACHE.pop(subsystem, None)
def config_dir() -> Path:
"""Absolute path to Sanad/config/."""
_, d = _resolve_dirs()
return d

91
vendor/Sanad/core/event_bus.py vendored Normal file
View File

@ -0,0 +1,91 @@
"""Lightweight in-process event bus for inter-module communication.
Usage:
from core.event_bus import bus
# Subscribe
bus.on("voice.user_said", my_handler) # sync or async callable
bus.on("motion.action_done", other_handler)
# Publish
await bus.emit("voice.user_said", text="hello")
"""
from __future__ import annotations
import asyncio
import threading
from collections import defaultdict
from typing import Any, Callable
from Project.Sanad.core.logger import get_logger
log = get_logger("event_bus", to_console=False)
class EventBus:
def __init__(self):
self._lock = threading.Lock()
self._listeners: dict[str, list[Callable]] = defaultdict(list)
def on(self, event: str, callback: Callable):
with self._lock:
self._listeners[event].append(callback)
log.debug("Subscribed %s%s", event, callback.__qualname__)
def off(self, event: str, callback: Callable):
with self._lock:
try:
self._listeners[event].remove(callback)
except ValueError:
pass
async def emit(self, event: str, **kwargs: Any):
with self._lock:
handlers = list(self._listeners.get(event, []))
for handler in handlers:
try:
result = handler(**kwargs)
if asyncio.iscoroutine(result):
await result
except Exception:
log.exception("Handler %s for event '%s' failed", handler.__qualname__, event)
def emit_sync(self, event: str, **kwargs: Any):
"""Fire-and-forget from a sync context.
Async handlers are scheduled on the running event loop if one exists.
Otherwise they are dropped with a warning (the original silent-no-op
bug at least now it's logged).
"""
with self._lock:
handlers = list(self._listeners.get(event, []))
for handler in handlers:
try:
if asyncio.iscoroutinefunction(handler):
try:
loop = asyncio.get_running_loop()
loop.create_task(handler(**kwargs))
except RuntimeError:
log.warning(
"Async handler %s for '%s' dropped — no running loop",
handler.__qualname__, event,
)
continue
result = handler(**kwargs)
if asyncio.iscoroutine(result):
# Sync handler returned a coroutine — schedule it
try:
loop = asyncio.get_running_loop()
loop.create_task(result)
except RuntimeError:
result.close()
log.warning(
"Coroutine result from %s for '%s' dropped — no running loop",
handler.__qualname__, event,
)
except Exception:
log.exception("Handler %s for event '%s' failed", handler.__qualname__, event)
bus = EventBus()

67
vendor/Sanad/core/logger.py vendored Normal file
View File

@ -0,0 +1,67 @@
"""Unified logging with RotatingFileHandler for all Sanad modules."""
from __future__ import annotations
import logging
import sys
from logging.handlers import RotatingFileHandler
from pathlib import Path
from Project.Sanad.config import LOGS_DIR
_MAX_BYTES = 10 * 1024 * 1024 # 10 MB
_BACKUP_COUNT = 3
_FMT = "%(asctime)s [%(name)s] %(levelname)s %(message)s"
_formatter = logging.Formatter(_FMT)
# Callback for the WebSocket log stream — set by log_stream.py at import time.
_ws_push_fn = None
def set_ws_push(fn):
"""Register the push function from dashboard.websockets.log_stream."""
global _ws_push_fn
_ws_push_fn = fn
class _WSHandler(logging.Handler):
"""Forwards every log record to the WebSocket log stream."""
def emit(self, record: logging.LogRecord):
if _ws_push_fn is not None:
try:
_ws_push_fn(self.format(record))
except Exception:
pass
def get_logger(name: str, *, to_console: bool = True) -> logging.Logger:
"""Return a module-level logger that writes to logs/<name>.log (rotating)."""
logger = logging.getLogger(f"sanad.{name}")
if logger.handlers:
return logger
logger.setLevel(logging.DEBUG)
logger.propagate = False
LOGS_DIR.mkdir(parents=True, exist_ok=True)
fh = RotatingFileHandler(
LOGS_DIR / f"{name}.log", maxBytes=_MAX_BYTES, backupCount=_BACKUP_COUNT
)
fh.setFormatter(_formatter)
fh.setLevel(logging.DEBUG)
logger.addHandler(fh)
if to_console:
sh = logging.StreamHandler(sys.stdout)
sh.setFormatter(_formatter)
sh.setLevel(logging.INFO)
logger.addHandler(sh)
# WebSocket stream handler
wsh = _WSHandler()
wsh.setFormatter(_formatter)
wsh.setLevel(logging.INFO)
logger.addHandler(wsh)
return logger

175
vendor/Sanad/core/skill_registry.py vendored Normal file
View File

@ -0,0 +1,175 @@
"""Skill Registry — maps audio files to motion commands and callback functions.
A "skill" is a named unit that ties together:
- An audio clip (e.g. recordings/audio/intro.wav)
- A motion file (e.g. data/motions/wave.jsonl) optional
- A callback (e.g. "motion.wave_hand") resolved at runtime
The registry is persisted in data/skills.json and can be edited via the
dashboard or programmatically through the Brain.
Skill entry schema:
{
"id": "intro_greeting",
"audio_file": "recordings/audio/intro.wav",
"motion_file": "data/motions/right_hand_up.jsonl",
"callback": "motion.trigger:wave_hand",
"sync_mode": "parallel", # parallel | audio_first | motion_first
"enabled": true,
"description": "Wave hand while playing intro audio"
}
"""
from __future__ import annotations
import json
import os
import tempfile
import threading
import uuid
from dataclasses import dataclass, field, asdict
from pathlib import Path
from typing import Any
from Project.Sanad.config import SKILLS_FILE
from Project.Sanad.core.logger import get_logger
log = get_logger("skill_registry")
@dataclass
class Skill:
id: str
audio_file: str = ""
motion_file: str = ""
callback: str = ""
sync_mode: str = "parallel"
enabled: bool = True
description: str = ""
meta: dict[str, Any] = field(default_factory=dict)
def to_dict(self) -> dict[str, Any]:
return asdict(self)
@classmethod
def from_dict(cls, data: dict[str, Any]) -> Skill:
known = {f.name for f in cls.__dataclass_fields__.values()}
filtered = {k: v for k, v in data.items() if k in known}
return cls(**filtered)
class SkillRegistry:
"""Thread-safe, JSON-backed registry of skills."""
def __init__(self, path: Path = SKILLS_FILE):
self._path = path
self._lock = threading.Lock()
self._skills: dict[str, Skill] = {}
self._load()
# -- persistence --
def _load(self):
if not self._path.exists():
self._skills = {}
return
try:
with open(self._path, "r", encoding="utf-8") as f:
payload = json.load(f)
for entry in payload.get("skills", []):
skill = Skill.from_dict(entry)
self._skills[skill.id] = skill
log.info("Loaded %d skills from %s", len(self._skills), self._path)
except Exception as exc:
log.warning("Could not load skills: %s", exc)
# Backup corrupt file rather than silently nuking
try:
self._path.rename(self._path.with_suffix(".json.corrupt"))
log.warning("Backed up corrupt skills to %s.corrupt", self._path)
except OSError:
pass
self._skills = {}
_VALID_SYNC_MODES = {"parallel", "audio_first", "motion_first"}
def _save(self):
self._path.parent.mkdir(parents=True, exist_ok=True)
payload = {
"version": 1,
"total": len(self._skills),
"skills": [s.to_dict() for s in self._skills.values()],
}
# Atomic write: tempfile + os.replace
fd, tmp = tempfile.mkstemp(
prefix=f".{self._path.name}.", suffix=".tmp",
dir=str(self._path.parent),
)
try:
with os.fdopen(fd, "w", encoding="utf-8") as f:
json.dump(payload, f, ensure_ascii=False, indent=2)
os.replace(tmp, self._path)
except Exception:
try:
os.unlink(tmp)
except OSError:
pass
raise
# -- CRUD --
def list_skills(self) -> list[dict[str, Any]]:
with self._lock:
return [s.to_dict() for s in self._skills.values()]
def get(self, skill_id: str) -> Skill | None:
with self._lock:
return self._skills.get(skill_id)
def add(self, skill: Skill) -> Skill:
if skill.sync_mode not in self._VALID_SYNC_MODES:
raise ValueError(
f"Invalid sync_mode '{skill.sync_mode}' (allowed: {sorted(self._VALID_SYNC_MODES)})"
)
with self._lock:
if not skill.id:
skill.id = uuid.uuid4().hex[:12]
elif skill.id in self._skills:
raise ValueError(f"Skill id already exists: {skill.id}")
self._skills[skill.id] = skill
self._save()
log.info("Added skill %s (%s)", skill.id, skill.description)
return skill
def update(self, skill_id: str, updates: dict[str, Any]) -> Skill | None:
with self._lock:
existing = self._skills.get(skill_id)
if existing is None:
return None
if "sync_mode" in updates and updates["sync_mode"] not in self._VALID_SYNC_MODES:
raise ValueError(
f"Invalid sync_mode '{updates['sync_mode']}'"
)
for key, value in updates.items():
if hasattr(existing, key) and key != "id":
setattr(existing, key, value)
self._save()
log.info("Updated skill %s", skill_id)
return existing
def delete(self, skill_id: str) -> dict[str, Any] | None:
with self._lock:
skill = self._skills.pop(skill_id, None)
if skill is None:
return None
self._save()
log.info("Deleted skill %s", skill_id)
return skill.to_dict()
def find_by_audio(self, audio_file: str) -> list[Skill]:
"""Find all skills linked to a given audio file."""
with self._lock:
return [s for s in self._skills.values() if s.audio_file == audio_file and s.enabled]
def find_by_callback(self, callback: str) -> list[Skill]:
with self._lock:
return [s for s in self._skills.values() if s.callback == callback and s.enabled]

0
vendor/Sanad/dashboard/__init__.py vendored Normal file
View File

134
vendor/Sanad/dashboard/app.py vendored Normal file
View File

@ -0,0 +1,134 @@
"""FastAPI application — Sanad Dashboard.
Each route module is imported INDIVIDUALLY inside try/except so that one
broken router (missing dep, syntax error in a sibling) cannot break the
entire dashboard. Failed routers are logged and the server starts without
them.
"""
from __future__ import annotations
import importlib
import logging
# Backfill asyncio.to_thread on Python 3.8 — must run before any router import.
from Project.Sanad.core import asyncio_compat # noqa: F401
from fastapi import FastAPI
from fastapi.staticfiles import StaticFiles
from Project.Sanad.config import BASE_DIR
from Project.Sanad.core.logger import get_logger
log = get_logger("dashboard.app")
from Project.Sanad.core.config_loader import section as _cfg_section
_APP_CFG = _cfg_section("dashboard", "app")
app = FastAPI(
title=_APP_CFG.get("title", "Sanad Dashboard"),
version=_APP_CFG.get("version", "1.0.0"),
)
# -- isolated route registration --
_REST_ROUTES: list[tuple[str, str, str]] = [
# (module_name, prefix, tag)
("health", "/api", "health"),
("system", "/api/system", "system"),
("voice", "/api/voice", "voice"),
("motion", "/api/motion", "motion"),
("skills", "/api/skills", "skills"),
("macros", "/api/macros", "macros"),
("logs", "/api/logs", "logs"),
("replay", "/api/replay", "replay"),
("audio_control", "/api/audio", "audio"),
("scripts", "/api/scripts", "scripts"),
("records", "/api/records", "records"),
("prompt", "/api/prompt", "prompt"),
("wake_phrases", "/api/wake-phrases", "wake-phrases"),
("live_voice", "/api/live-voice", "live-voice"),
("live_subprocess", "/api/live-subprocess", "live-subprocess"),
("typed_replay", "/api/typed-replay", "typed-replay"),
("recognition", "/api/recognition", "recognition"),
("zones", "/api/zones", "zones"),
("temp_monitor", "/api/temp", "temperature"),
("controller", "/api/controller", "controller"),
]
_WS_ROUTES: list[str] = ["log_stream", "motor_temps", "terminal"]
_loaded_routes: list[str] = []
_failed_routes: dict[str, str] = {}
def _register_router(module_name: str, prefix: str | None = None, tag: str | None = None,
package: str = "Project.Sanad.dashboard.routes"):
"""Import + register one router. Failures are logged, never raised."""
full_name = f"{package}.{module_name}"
try:
mod = importlib.import_module(full_name)
if not hasattr(mod, "router"):
raise AttributeError(f"{full_name} has no 'router' attribute")
kwargs: dict = {}
if prefix is not None:
kwargs["prefix"] = prefix
if tag is not None:
kwargs["tags"] = [tag]
app.include_router(mod.router, **kwargs)
_loaded_routes.append(module_name)
log.info("Registered router: %s", module_name)
except Exception as exc:
_failed_routes[module_name] = str(exc)
log.exception("Failed to register router %s — skipping", module_name)
# REST routes
for mod_name, prefix, tag in _REST_ROUTES:
_register_router(mod_name, prefix=prefix, tag=tag)
# WebSocket routes
for mod_name in _WS_ROUTES:
_register_router(
mod_name,
package="Project.Sanad.dashboard.websockets",
tag="websocket",
)
# -- Static files (dashboard UI) — best effort --
STATIC_DIR = BASE_DIR / _APP_CFG.get("static_subdir", "dashboard/static")
try:
STATIC_DIR.mkdir(parents=True, exist_ok=True)
app.mount("/static", StaticFiles(directory=str(STATIC_DIR)), name="static")
log.info("Static dir mounted: %s", STATIC_DIR)
except Exception:
log.exception("Could not mount static dir %s — serving without it", STATIC_DIR)
@app.get("/")
async def root():
"""Serve the dashboard SPA."""
index = STATIC_DIR / "index.html"
if index.exists():
from fastapi.responses import HTMLResponse
try:
return HTMLResponse(index.read_text(encoding="utf-8"))
except OSError as exc:
return {"error": f"Could not read index.html: {exc}"}
return {
"message": "Sanad Dashboard — index.html not found",
"loaded_routes": _loaded_routes,
"failed_routes": _failed_routes,
}
@app.get("/api/_dashboard_status")
async def dashboard_load_status():
"""Diagnostic — which routers loaded, which failed."""
return {
"loaded": _loaded_routes,
"failed": _failed_routes,
"total_loaded": len(_loaded_routes),
"total_failed": len(_failed_routes),
}

View File

View File

@ -0,0 +1,81 @@
"""Shared filesystem safety helpers for dashboard routes.
Provides:
- safe_filename: validate + reject traversal/special chars
- safe_path_under: ensure resolved path stays inside a base dir
- atomic_write_bytes: write-to-temp + os.replace
- atomic_write_text
- atomic_write_json
"""
from __future__ import annotations
import json
import os
import tempfile
from pathlib import Path
from typing import Any
from fastapi import HTTPException
from Project.Sanad.core.config_loader import section as _cfg_section
# Maximum upload size in bytes — SINGLE SOURCE in dashboard.api_input
MAX_UPLOAD_BYTES = _cfg_section("dashboard", "api_input").get(
"max_upload_bytes", 8 * 1024 * 1024)
def safe_filename(name: str | None) -> str:
"""Strip directory components and reject obviously unsafe names."""
if not name:
raise HTTPException(400, "Filename required.")
cleaned = os.path.basename(name).strip()
if not cleaned or cleaned in {".", ".."}:
raise HTTPException(400, "Invalid filename.")
if any(c in cleaned for c in ("\x00", "\n", "\r")):
raise HTTPException(400, "Invalid characters in filename.")
return cleaned
def safe_path_under(base: Path, name: str) -> Path:
"""Resolve `base/name` and verify it stays inside `base`."""
cleaned = safe_filename(name)
base_resolved = base.resolve()
candidate = (base / cleaned).resolve()
try:
candidate.relative_to(base_resolved)
except ValueError:
raise HTTPException(400, "Path traversal denied.")
return candidate
def check_upload_size(content: bytes, max_bytes: int = MAX_UPLOAD_BYTES) -> None:
if len(content) > max_bytes:
raise HTTPException(
413,
f"Upload too large: {len(content)} bytes (max {max_bytes}).",
)
def atomic_write_bytes(path: Path, data: bytes) -> None:
"""Write bytes atomically via tempfile + os.replace."""
path.parent.mkdir(parents=True, exist_ok=True)
fd, tmp = tempfile.mkstemp(prefix=f".{path.name}.", suffix=".tmp", dir=str(path.parent))
try:
with os.fdopen(fd, "wb") as f:
f.write(data)
os.replace(tmp, path)
except Exception:
try:
os.unlink(tmp)
except OSError:
pass
raise
def atomic_write_text(path: Path, text: str, encoding: str = "utf-8") -> None:
atomic_write_bytes(path, text.encode(encoding))
def atomic_write_json(path: Path, payload: Any, indent: int = 2) -> None:
atomic_write_text(path, json.dumps(payload, ensure_ascii=False, indent=indent))

View File

@ -0,0 +1,922 @@
"""Audio control endpoints — mic mute, speaker mute, device profile selection."""
from __future__ import annotations
import asyncio
import os
import subprocess
import threading
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
from Project.Sanad.core.logger import get_logger
from Project.Sanad.voice import audio_devices as ad
log = get_logger("audio_route")
router = APIRouter()
# ─────────────────────── G1 built-in speaker (DDS) ───────────────────────
#
# pactl set-sink-mute has NO effect on the G1 built-in speaker because
# sanad_voice.py streams PCM to it via the Unitree DDS AudioClient API,
# bypassing PulseAudio entirely. To actually silence the built-in speaker
# mid-playback we must call AudioClient.SetVolume(0) over DDS.
#
# This module keeps a lazily-initialized AudioClient + a cached volume so
# the dashboard can mute/unmute without waiting on DDS init for every click.
_g1_audio_client = None
_g1_audio_lock = threading.Lock()
_g1_current_volume: int = 100 # what's actually on the hardware right now
_g1_user_volume: int = 100 # the user's preferred "unmuted" level
_g1_init_error: str = ""
def _load_persisted_g1_volume() -> int:
"""Read the saved G1 volume from data/motions/config.json.
Keys are `audio.g1_volume` (persistent target level 0-100). Returns
100 if no value is stored matches the default the Unitree SDK sets
on the voice service.
"""
try:
from Project.Sanad.config import load_config
cfg = load_config() or {}
audio = cfg.get("audio") or {}
vol = int(audio.get("g1_volume", 100))
return max(0, min(100, vol))
except Exception:
return 100
def _save_persisted_g1_volume(level: int) -> None:
"""Persist the user's volume choice to config.json so it survives restart."""
try:
from Project.Sanad.config import load_config, save_config
cfg = load_config() or {}
audio = cfg.get("audio") if isinstance(cfg.get("audio"), dict) else {}
audio["g1_volume"] = max(0, min(100, int(level)))
cfg["audio"] = audio
save_config(cfg)
except Exception as exc:
log.warning("could not persist g1_volume: %s", exc)
# Initialize user volume from the persisted value so the dashboard shows
# the correct level on first load even if no one has touched it yet.
_g1_user_volume = _load_persisted_g1_volume()
_g1_current_volume = _g1_user_volume
def _get_g1_audio_client():
"""Lazy-init AudioClient. Safe to call from multiple routes."""
global _g1_audio_client, _g1_init_error
if _g1_audio_client is not None:
return _g1_audio_client
try:
from unitree_sdk2py.core.channel import ChannelFactoryInitialize
from unitree_sdk2py.g1.audio.g1_audio_client import AudioClient
except ImportError as exc:
_g1_init_error = f"unitree_sdk2py not installed: {exc}"
return None
iface = os.environ.get("SANAD_DDS_INTERFACE", "eth0")
# ChannelFactoryInitialize can only be called once per process. The
# arm controller normally calls it first at startup — the second call
# either no-ops or raises, so wrap it defensively.
try:
ChannelFactoryInitialize(0, iface)
except Exception as exc:
log.debug("ChannelFactoryInitialize already called or failed: %s", exc)
try:
client = AudioClient()
client.SetTimeout(5.0)
client.Init()
_g1_audio_client = client
log.info("G1 AudioClient initialized for dashboard mute control (iface=%s)", iface)
return client
except Exception as exc:
_g1_init_error = f"AudioClient init failed: {exc}"
log.warning("G1 AudioClient init failed: %s", exc)
return None
def _pactl(args: list[str]) -> subprocess.CompletedProcess[str]:
return subprocess.run(["pactl", *args], check=True, text=True, capture_output=True)
def _get_muted(kind: str, name: str) -> bool:
if not name:
return False
try:
cmd = "get-source-mute" if kind == "source" else "get-sink-mute"
r = _pactl([cmd, name])
return (r.stdout or "").strip().lower().endswith("yes")
except (FileNotFoundError, subprocess.CalledProcessError):
return False
def _set_muted(kind: str, name: str, muted: bool) -> bool:
if not name:
return False
cmd = "set-source-mute" if kind == "source" else "set-sink-mute"
_pactl([cmd, name, "1" if muted else "0"])
return _get_muted(kind, name)
def _current_sink_source() -> tuple[str, str]:
cur = ad.current_selection()
return cur.get("sink", ""), cur.get("source", "")
# ─────────────────────── status / mute ───────────────────────
@router.get("/status")
async def audio_status():
"""Return current device + mute state + G1 speaker volume.
`speaker_muted` is the EFFECTIVE mute state True if either the
PulseAudio sink is muted OR the G1 built-in speaker volume is 0.
`pulse_sink_muted` and `g1_speaker_muted` are the per-path states.
`g1_current_volume` = what's live on the hardware.
`g1_user_volume` = the user's preferred unmuted level (what we
restore to when they un-mute).
"""
def _do():
sink, source = _current_sink_source()
cur = ad.current_selection()
pulse_muted = _get_muted("sink", sink)
# Read cached state — avoid DDS GetVolume round-trips on every poll
global _g1_current_volume, _g1_user_volume
g1_muted = _g1_current_volume == 0
return {
"mic_muted": _get_muted("source", source),
# Effective (OR of both paths) — the badge the user sees
"speaker_muted": pulse_muted or g1_muted,
# Per-path breakdown so the UI can distinguish
"pulse_sink_muted": pulse_muted,
"g1_speaker_muted": g1_muted,
"g1_current_volume": _g1_current_volume,
"g1_user_volume": _g1_user_volume,
"g1_available": _g1_audio_client is not None or (_g1_init_error == ""),
"sink": sink,
"source": source,
"current": cur,
"pactl_available": ad.pactl_available(),
}
return await asyncio.to_thread(_do)
@router.post("/mic/mute")
async def toggle_mic(muted: bool | None = None):
def _do():
_, source = _current_sink_source()
if not source:
raise HTTPException(503, "No source device selected")
target = muted if muted is not None else not _get_muted("source", source)
try:
actual = _set_muted("source", source, target)
except (FileNotFoundError, subprocess.CalledProcessError) as exc:
raise HTTPException(500, f"pactl failed: {exc}")
return {"mic_muted": actual, "source": source}
return await asyncio.to_thread(_do)
@router.post("/speaker/mute")
async def toggle_speaker(muted: bool | None = None):
"""Mute/unmute the SPEAKER — both the PulseAudio sink AND the G1
built-in speaker, so the effect is audible regardless of which
playback path is currently active (Anker PowerConf via PyAudio vs
G1 built-in via Unitree DDS AudioClient).
Each of the two paths is attempted independently; the response
reports which one(s) succeeded. If either path is muted, the button
shows as "Muted".
"""
def _do():
global _g1_current_volume, _g1_user_volume
sink, _ = _current_sink_source()
# Decide target state — if muted is None, toggle based on
# whichever path is currently "not muted"
if muted is None:
pulse_cur = _get_muted("sink", sink) if sink else False
g1_cur = _g1_current_volume == 0
# Toggle: if anything is live, mute everything; else unmute all
target = not (pulse_cur or g1_cur)
else:
target = bool(muted)
result = {"speaker_muted": target, "pulse": None, "g1": None}
# ── Path 1: PulseAudio sink (Anker PowerConf, USB, etc.) ──
if sink:
try:
actual_pulse = _set_muted("sink", sink, target)
result["pulse"] = {"ok": True, "muted": actual_pulse, "sink": sink}
except (FileNotFoundError, subprocess.CalledProcessError) as exc:
result["pulse"] = {"ok": False, "error": f"pactl failed: {exc}"}
else:
result["pulse"] = {"ok": False, "error": "no sink selected"}
# ── Path 2: G1 built-in speaker via DDS AudioClient ──
# Mute = SetVolume(0). Unmute = SetVolume(_g1_user_volume) so the
# user's chosen level is restored (instead of always jumping back
# to 100).
client = _get_g1_audio_client()
if client is None:
result["g1"] = {"ok": False, "error": _g1_init_error or "AudioClient unavailable"}
else:
volume = 0 if target else _g1_user_volume
try:
with _g1_audio_lock:
code = client.SetVolume(volume)
_g1_current_volume = volume
result["g1"] = {
"ok": True, "muted": volume == 0,
"volume": volume, "code": code,
}
log.info("G1 speaker volume set to %d (rc=%s)", volume, code)
except Exception as exc:
result["g1"] = {"ok": False, "error": f"SetVolume failed: {exc}"}
# Final effective state — either path counts as muted
pulse_muted = result["pulse"].get("muted", False) if result["pulse"] else False
g1_muted = result["g1"].get("muted", False) if result["g1"] else False
result["speaker_muted"] = bool(pulse_muted or g1_muted) if target else False
result["sink"] = sink
result["g1_current_volume"] = _g1_current_volume
result["g1_user_volume"] = _g1_user_volume
return result
return await asyncio.to_thread(_do)
@router.post("/g1-speaker/mute")
async def toggle_g1_speaker_only(muted: bool | None = None):
"""Mute/unmute ONLY the G1 built-in speaker via DDS AudioClient.
Useful for testing the DDS path in isolation the normal
/speaker/mute endpoint hits both PulseAudio and G1 at once.
On unmute, restores the user's last chosen volume (not always 100).
"""
def _do():
global _g1_current_volume
client = _get_g1_audio_client()
if client is None:
raise HTTPException(
503,
f"G1 AudioClient unavailable: {_g1_init_error or 'unknown'}",
)
if muted is None:
target = _g1_current_volume > 0 # toggle
else:
target = bool(muted)
volume = 0 if target else _g1_user_volume
try:
with _g1_audio_lock:
code = client.SetVolume(volume)
_g1_current_volume = volume
except Exception as exc:
raise HTTPException(500, f"SetVolume failed: {exc}")
log.info("G1 speaker volume set to %d (rc=%s)", volume, code)
return {
"g1_muted": volume == 0,
"volume": volume,
"user_volume": _g1_user_volume,
"return_code": code,
}
return await asyncio.to_thread(_do)
# ─────────────────────── G1 speaker volume (0-100) ───────────────────────
class G1VolumePayload(BaseModel):
level: int # 0..100
@router.get("/g1-speaker/volume")
async def get_g1_volume():
"""Return the current G1 speaker volume state.
Response:
{
"available": true, # AudioClient available?
"current_volume": 75, # what's on hardware right now
"user_volume": 75, # user's preferred unmuted level
"muted": false, # current_volume == 0
"persisted": 75, # value from config.json
}
"""
def _do():
return {
"available": _g1_audio_client is not None or (_g1_init_error == ""),
"current_volume": _g1_current_volume,
"user_volume": _g1_user_volume,
"muted": _g1_current_volume == 0,
"persisted": _load_persisted_g1_volume(),
"init_error": _g1_init_error,
}
return await asyncio.to_thread(_do)
@router.post("/g1-speaker/volume")
async def set_g1_volume(payload: G1VolumePayload):
"""Set the G1 built-in speaker volume via DDS AudioClient.
Body: `{"level": 0..100}`
Effects:
- Immediately applies to hardware via AudioClient.SetVolume(level).
- Persists to data/motions/config.json under `audio.g1_volume` so
it survives restarts.
- If level > 0, updates _g1_user_volume (the "unmuted" restore
target). level == 0 is a soft mute that preserves user_volume.
- Takes effect on the live playback immediately you can slide
the volume down mid-speech and hear it get quieter.
"""
def _do():
global _g1_current_volume, _g1_user_volume
level = int(payload.level)
if not 0 <= level <= 100:
raise HTTPException(400, "level must be 0..100")
client = _get_g1_audio_client()
if client is None:
raise HTTPException(
503,
f"G1 AudioClient unavailable: {_g1_init_error or 'unknown'}",
)
try:
with _g1_audio_lock:
code = client.SetVolume(level)
_g1_current_volume = level
if level > 0:
# Only update the "preferred unmuted" level when the
# user is setting a non-zero volume. Setting 0 is a
# mute, which shouldn't overwrite their preference.
_g1_user_volume = level
except Exception as exc:
raise HTTPException(500, f"SetVolume failed: {exc}")
# Persist the user's preferred level (not the current) so a
# subsequent mute-then-restart restores to the preferred level
_save_persisted_g1_volume(_g1_user_volume)
log.info("G1 volume → %d (user_pref=%d, rc=%s)",
level, _g1_user_volume, code)
return {
"ok": True,
"current_volume": level,
"user_volume": _g1_user_volume,
"muted": level == 0,
"return_code": code,
"persisted": True,
}
return await asyncio.to_thread(_do)
# ─────────────────────── device profiles ───────────────────────
@router.get("/devices")
async def list_devices():
"""Full device + profile listing for the dashboard picker."""
return await asyncio.to_thread(ad.status)
@router.get("/profiles")
async def list_profiles():
"""Just the named profiles + which are currently plugged in."""
def _do():
from dataclasses import asdict
detected = ad.detect_plugged_profiles() if ad.pactl_available() else []
detected_ids = {d["profile"]["id"] for d in detected}
return {
"profiles": [
{
**asdict(p),
"available": p.id in detected_ids,
}
for p in ad.PROFILES
],
"detected_ids": list(detected_ids),
}
return await asyncio.to_thread(_do)
class ProfileSelect(BaseModel):
profile_id: str
@router.post("/select-profile")
async def select_profile(payload: ProfileSelect):
def _do():
result = ad.select_profile(payload.profile_id)
if not result.get("ok"):
raise HTTPException(409, result.get("error") or "Could not select profile")
# Best-effort: tell the audio_manager to refresh its cached state
try:
from Project.Sanad.main import audio_mgr
if audio_mgr is not None and hasattr(audio_mgr, "refresh_devices"):
audio_mgr.refresh_devices()
except Exception:
pass
return result
return await asyncio.to_thread(_do)
class ManualSelect(BaseModel):
sink: str
source: str
@router.post("/select-manual")
async def select_manual(payload: ManualSelect):
def _do():
if not payload.sink and not payload.source:
raise HTTPException(400, "At least one of sink/source required")
result = ad.select_manual(payload.sink, payload.source)
if not result.get("ok"):
raise HTTPException(500, str(result.get("errors") or "Selection failed"))
try:
from Project.Sanad.main import audio_mgr
if audio_mgr is not None and hasattr(audio_mgr, "refresh_devices"):
audio_mgr.refresh_devices()
except Exception:
pass
return result
return await asyncio.to_thread(_do)
@router.post("/refresh")
async def refresh_devices():
"""Re-scan plugged devices and re-resolve current selection."""
return await asyncio.to_thread(ad.status)
@router.post("/apply")
async def apply_audio():
"""Re-scan all USB ports, resolve the best profile, and set pactl defaults.
Use this after plugging/unplugging devices or switching USB ports.
"""
def _do():
result = ad.apply_current_selection()
# Also refresh AudioManager so it picks up the new sink/source
try:
from Project.Sanad.main import audio_mgr
if audio_mgr is not None:
audio_mgr.refresh_devices()
except Exception:
pass
return result
return await asyncio.to_thread(_do)
# ─────────────────────── Reset endpoints (Pulse + USB) ───────────────────────
#
# Two distinct recovery paths for the dashboard's audio panel:
#
# POST /api/audio/reset — SOFT: restart pulseaudio / pipewire-pulse.
# Fixes Pulse-side state (stuck profile, lost default sink, crashed
# module). Cannot recover a kernel-side missing USB capture descriptor
# — snd-usb-audio parses those at probe time and Pulse can't influence
# that. Use for "devices look weird" failures.
#
# POST /api/audio/usb-reset — HARD: unbind+rebind snd-usb-audio scoped
# to the Anker VID:PID. Forces snd-usb-audio to re-parse UAC1
# descriptors → input profile reappears even after the firmware/USB
# handshake dropped it. Use for "Anker mic missing from pactl" — the
# symptom soft-reset cannot fix.
#
# Both gate with module-level locks (no concurrent reset), refuse while Live
# Gemini is running or a record is mid-playback, and return structured
# before/after diagnostics so the dashboard can show meaningful toasts.
_RESET_LOCK = threading.Lock()
_USB_RESET_LOCK = threading.Lock()
# Anker PowerConf A3321 — used both for VID:PID matching in sysfs and for
# logging. Change here if you add support for a different USB conference
# device (Hollyland etc).
_USB_RESET_TARGETS = (
{"vid": "291a", "pid": "3301", "label": "Anker PowerConf"},
)
def _refuse_if_busy() -> None:
"""Raise HTTPException(409) if Live Gemini is active or a record is playing.
Used by both reset endpoints a userspace audio restart mid-stream
leaves the active session in a broken state (PortAudio handle pointing
at a dead Pulse, in-flight write() raises, etc.). Cheaper to refuse
than to recover.
"""
try:
from Project.Sanad.main import live_sub
except Exception:
live_sub = None
if live_sub is not None:
try:
st = live_sub.status() or {}
except Exception:
st = {}
state = (st.get("state") or "").lower()
if st.get("running") or state not in ("", "stopped", "error"):
raise HTTPException(
409, f"Stop Live Gemini before resetting audio (state={state or '?'}).",
)
try:
from Project.Sanad.main import audio_mgr
except Exception:
audio_mgr = None
if audio_mgr is not None and hasattr(audio_mgr, "playback_status"):
try:
ps = audio_mgr.playback_status() or {}
if ps.get("playing"):
raise HTTPException(
409, "Stop the active playback before resetting audio.",
)
except HTTPException:
raise
except Exception:
pass
def _detect_pa_flavour() -> str:
"""Return 'pipewire' if pipewire-pulse is the active daemon, else 'pulse'."""
try:
r = subprocess.run(
["pgrep", "-x", "pipewire-pulse"],
check=False, capture_output=True, text=True, timeout=1.0,
)
if r.returncode == 0 and (r.stdout or "").strip():
return "pipewire"
except (FileNotFoundError, subprocess.SubprocessError):
pass
return "pulse"
def _kill_audio_daemon(flavour: str) -> dict:
"""Issue the restart command for the detected daemon. Non-zero exit is a
soft warning (some installs return 1 when there's no daemon to kill)."""
if flavour == "pipewire":
cmd = ["systemctl", "--user", "restart", "pipewire-pulse.service"]
else:
cmd = ["pulseaudio", "-k"]
try:
r = subprocess.run(cmd, check=False, capture_output=True,
text=True, timeout=5.0)
info = {"cmd": " ".join(cmd), "returncode": r.returncode,
"stderr": (r.stderr or "").strip()[:300]}
if r.returncode != 0:
log.warning("audio reset: %s exited %d (%s)",
cmd[0], r.returncode, info["stderr"])
return info
except FileNotFoundError as exc:
return {"cmd": " ".join(cmd), "returncode": -1,
"stderr": f"binary missing: {exc}"}
except subprocess.TimeoutExpired:
return {"cmd": " ".join(cmd), "returncode": -1,
"stderr": "timeout (>5s)"}
def _wait_for_pactl(deadline_s: float = 5.0, interval_s: float = 0.2) -> bool:
"""Poll `pactl info` until it returns 0 or the deadline expires."""
import time as _time
end = _time.monotonic() + deadline_s
while _time.monotonic() < end:
if ad.pactl_available():
return True
_time.sleep(interval_s)
return False
@router.post("/reset")
async def reset_audio_subsystem():
"""SOFT reset — restart pulseaudio/pipewire-pulse and re-resolve devices.
Use when devices look stuck, pactl is unavailable, or the wrong sink
is being selected. **Does NOT recover a kernel-side missing USB capture
descriptor** for that symptom use /api/audio/usb-reset.
"""
if os.geteuid() == 0:
raise HTTPException(
403, "Refusing to reset audio as root — Sanad must run as the "
"unitree user so the per-user PulseAudio session is reachable.",
)
if not _RESET_LOCK.acquire(blocking=False):
raise HTTPException(429, "Reset already in progress.")
try:
_refuse_if_busy()
log.info(
"audio reset requested (uid=%s PULSE_RUNTIME_PATH=%s XDG_RUNTIME_DIR=%s)",
os.geteuid(),
os.environ.get("PULSE_RUNTIME_PATH") or "-",
os.environ.get("XDG_RUNTIME_DIR") or "-",
)
try:
from Project.Sanad.main import audio_mgr
except Exception:
audio_mgr = None
def _do() -> dict:
before = {"pactl_available": ad.pactl_available(),
"selection": ad.current_selection()}
# Quiesce AudioManager so the next play_wav rebinds cleanly.
pya_closed = False
if audio_mgr is not None:
play_lock = getattr(audio_mgr, "play_lock", None)
acquired = False
if play_lock is not None:
acquired = play_lock.acquire(timeout=2.0)
try:
try:
audio_mgr.close()
pya_closed = True
except Exception as exc:
log.warning("audio reset: audio_mgr.close failed: %s", exc)
finally:
if acquired and play_lock is not None:
play_lock.release()
flavour = _detect_pa_flavour()
kill_info = _kill_audio_daemon(flavour)
came_back = _wait_for_pactl(deadline_s=5.0)
if not came_back and flavour == "pulse":
# autospawn may be disabled — try an explicit start.
try:
subprocess.run(["pulseaudio", "--start"], check=False,
capture_output=True, text=True, timeout=3.0)
except (FileNotFoundError, subprocess.SubprocessError) as exc:
log.warning("audio reset: pulseaudio --start failed: %s", exc)
came_back = _wait_for_pactl(deadline_s=2.0)
if not came_back:
raise HTTPException(500, {
"error": "audio daemon did not return within ~7s",
"flavour": flavour, "kill": kill_info,
})
apply_result: dict = {}
try:
apply_result = ad.apply_current_selection() or {}
except Exception as exc:
log.warning("audio reset: apply_current_selection failed: %s", exc)
apply_result = {"error": str(exc)}
if audio_mgr is not None:
try:
import pyaudio
audio_mgr.pya = pyaudio.PyAudio()
audio_mgr.refresh_devices()
except Exception as exc:
log.error("audio reset: PyAudio re-init failed: %s", exc)
raise HTTPException(
500, f"PortAudio re-init failed after daemon restart: {exc}")
after_sel = ad.current_selection() or {}
detected = ad.detect_plugged_profiles() or []
after = {
"pactl_available": ad.pactl_available(),
"selection": after_sel,
"detected_profiles": [p.get("profile", {}).get("id") for p in detected],
}
return {
"ok": True, "best_effort": True, "flavour": flavour,
"kill": kill_info, "pya_reinitialized": pya_closed,
"apply_result": apply_result,
"input_recovered": bool(after_sel.get("source")),
"output_recovered": bool(after_sel.get("sink")),
"before": before, "after": after,
"hint": ("Soft reset only fixes Pulse-side state. If "
"input_recovered is False, try POST /api/audio/usb-reset "
"or physically replug the dongle."),
}
return await asyncio.to_thread(_do)
finally:
_RESET_LOCK.release()
def _find_usb_devices_by_vid_pid(vid: str, pid: str) -> list[str]:
"""Return sysfs bus-id strings (e.g. '1-3') for every USB device whose
idVendor/idProduct match. Empty list when nothing matches.
We read /sys/bus/usb/devices/* every USB *device* (not interface) has
idVendor/idProduct files. Interfaces (paths with a colon, e.g. '1-3:1.1')
do not, so they're naturally skipped.
"""
import glob
hits: list[str] = []
for path in glob.glob("/sys/bus/usb/devices/*"):
name = os.path.basename(path)
if ":" in name:
continue
try:
with open(os.path.join(path, "idVendor")) as f:
v = f.read().strip().lower()
with open(os.path.join(path, "idProduct")) as f:
p = f.read().strip().lower()
except OSError:
continue
if v == vid.lower() and p == pid.lower():
hits.append(name)
return hits
def _snd_usb_interfaces_for_device(bus_id: str) -> list[str]:
"""For USB device `bus_id` (e.g. '1-3'), return all interface names that
are currently bound to the snd-usb-audio driver (e.g. ['1-3:1.0']).
Used so we unbind ONLY the audio interfaces and don't touch HID / HUB
interfaces on the same composite device.
"""
import glob
bound: list[str] = []
base = f"/sys/bus/usb/devices/{bus_id}"
for iface in glob.glob(f"{base}/{bus_id}:*"):
driver_link = os.path.join(iface, "driver")
if not os.path.islink(driver_link):
continue
try:
driver = os.path.basename(os.readlink(driver_link))
except OSError:
continue
if driver == "snd-usb-audio":
bound.append(os.path.basename(iface))
return bound
def _write_sysfs(path: str, value: str) -> tuple[bool, str]:
"""Write `value` to a sysfs file. Returns (success, error_message).
Writes to /sys/bus/usb/drivers/snd-usb-audio/{bind,unbind} usually
require root. If permission denied, the caller should fall back to
invoking shell_scripts/reset_anker_usb.sh via sudo (one-time sudoers
setup documented in that script's header).
"""
try:
with open(path, "w") as f:
f.write(value)
return True, ""
except PermissionError as exc:
return False, f"permission denied: {path} ({exc})"
except OSError as exc:
return False, f"write failed: {path} ({exc})"
@router.post("/usb-reset")
async def usb_reset_anker():
"""HARD reset — unbind+rebind snd-usb-audio for the Anker (VID:PID
291a:3301). Forces the kernel to re-parse the USB Audio Class
descriptors, which is the only way to recover a missing capture profile
on this Jetson without a physical replug.
Tries two paths:
1. Direct sysfs write (no sudo) works if a udev rule has set
`audio` group ownership / world-write on the snd-usb-audio bind
files, or if Sanad runs as root (it shouldn't).
2. Fallback to `sudo shell_scripts/reset_anker_usb.sh` works after
a one-time sudoers entry; see that script's header for setup.
Refuses while Live Gemini or a record playback is in flight (same
guard as the soft reset).
"""
if not _USB_RESET_LOCK.acquire(blocking=False):
raise HTTPException(429, "USB reset already in progress.")
try:
_refuse_if_busy()
# Find candidate Anker USB devices currently enumerated.
candidates: list[dict] = []
for tgt in _USB_RESET_TARGETS:
for bus_id in _find_usb_devices_by_vid_pid(tgt["vid"], tgt["pid"]):
candidates.append({"bus_id": bus_id, **tgt})
if not candidates:
wanted = ", ".join(
"{}:{}".format(t["vid"], t["pid"]) for t in _USB_RESET_TARGETS
)
raise HTTPException(
404,
f"No matching USB device found (looked for {wanted}). "
"Plug the Anker dongle and try again.",
)
log.info("usb reset: candidates=%s", candidates)
def _do() -> dict:
before_detected = [
p.get("profile", {}).get("id")
for p in (ad.detect_plugged_profiles() or [])
]
results: list[dict] = []
for cand in candidates:
bus = cand["bus_id"]
ifaces = _snd_usb_interfaces_for_device(bus)
attempt = {"bus_id": bus, "label": cand["label"],
"snd_interfaces": ifaces, "method": None,
"ok": False, "error": ""}
if not ifaces:
attempt["error"] = ("no snd-usb-audio interfaces bound "
"to this device — already unbound or "
"kernel didn't claim it")
results.append(attempt)
continue
# ─── Path 1: direct sysfs write ───
unbind_path = "/sys/bus/usb/drivers/snd-usb-audio/unbind"
bind_path = "/sys/bus/usb/drivers/snd-usb-audio/bind"
direct_ok = True
direct_err = ""
for iface in ifaces:
ok, err = _write_sysfs(unbind_path, iface)
if not ok:
direct_ok = False
direct_err = err
break
if direct_ok:
import time as _time
_time.sleep(0.5)
for iface in ifaces:
ok, err = _write_sysfs(bind_path, iface)
if not ok:
direct_ok = False
direct_err = err
break
if direct_ok:
attempt.update({"method": "direct-sysfs", "ok": True})
results.append(attempt)
continue
# ─── Path 2: sudo helper script ───
from pathlib import Path as _Path
helper = (_Path(__file__).resolve().parent.parent.parent
/ "shell_scripts" / "reset_anker_usb.sh")
if not helper.exists():
attempt.update({"method": "direct-sysfs",
"error": f"{direct_err}; helper not present "
f"at {helper}"})
results.append(attempt)
continue
try:
r = subprocess.run(
["sudo", "-n", str(helper), bus],
check=False, capture_output=True, text=True, timeout=10.0,
)
attempt["method"] = "sudo-helper"
if r.returncode == 0:
attempt["ok"] = True
else:
attempt["error"] = (
f"sudo helper exited {r.returncode}: "
f"{(r.stderr or r.stdout or '').strip()[:300]}"
)
except subprocess.TimeoutExpired:
attempt["error"] = "sudo helper timed out (>10s)"
except FileNotFoundError as exc:
attempt["error"] = f"sudo not available: {exc}"
results.append(attempt)
# Settle, then re-detect
import time as _time
_time.sleep(1.0)
try:
ad.apply_current_selection()
except Exception:
pass
try:
from Project.Sanad.main import audio_mgr
if audio_mgr is not None and hasattr(audio_mgr, "refresh_devices"):
audio_mgr.refresh_devices()
except Exception:
pass
after_detected = [
p.get("profile", {}).get("id")
for p in (ad.detect_plugged_profiles() or [])
]
any_ok = any(r.get("ok") for r in results)
mic_now = any(
"anker" in (p.get("profile", {}).get("id") or "").lower()
for p in (ad.detect_plugged_profiles() or [])
)
return {
"ok": any_ok,
"candidates": results,
"before_detected_profiles": before_detected,
"after_detected_profiles": after_detected,
"input_recovered": mic_now,
"hint": (
"If ok is False, the unbind/rebind path needs sudo. "
"Run `bash shell_scripts/reset_anker_usb.sh --setup-sudoers` "
"once on the robot to install the sudoers entry, then retry."
) if not any_ok else None,
}
return await asyncio.to_thread(_do)
finally:
_USB_RESET_LOCK.release()

View File

@ -0,0 +1,295 @@
"""Controller tab — manual dashboard locomotion control (N2 Phase 1/2).
Routes live under /api/controller. All WRITE actions (move / step / postures /
modes / MotionSwitcher) require the in-memory "Enable movement" arm flag and
return 409 when disarmed. Reads (/status, /joints, /msc, /status/summary),
E-STOP and the arm toggle are ALWAYS available.
`/status/summary` is the aggregate the dashboard polls for the global subsystem
status strip (Camera / Face / Place / Movement). It is kept under /api/controller
(final path /api/controller/status/summary) so no second router is needed; note
/api/status (no /summary) is already used by the SPA, so the suffix matters.
"""
from __future__ import annotations
import asyncio
from fastapi import APIRouter, HTTPException, Query
from pydantic import BaseModel
from Project.Sanad.config import BASE_DIR
from Project.Sanad.core.logger import get_logger
from Project.Sanad.vision import recognition_state
log = get_logger("controller_routes")
router = APIRouter()
STATE_PATH = BASE_DIR / "data" / ".recognition_state.json"
# ── lazy subsystem accessors ────────────────────────────────
def _get_loco():
try:
from Project.Sanad.main import loco_controller # type: ignore
return loco_controller
except Exception:
return None
def _get_camera():
try:
from Project.Sanad.main import camera # type: ignore
return camera
except Exception:
return None
def _get_live_sub():
try:
from Project.Sanad.main import live_sub # type: ignore
return live_sub
except Exception:
return None
def _get_dispatch():
try:
from Project.Sanad.main import movement_dispatch # type: ignore
return movement_dispatch
except Exception:
return None
def _require_loco():
lc = _get_loco()
if lc is None:
raise HTTPException(503, "Locomotion controller subsystem unavailable.")
return lc
def _require_armed(lc):
if not lc.is_armed():
raise HTTPException(409, "Movement is disarmed. Enable movement first.")
# ── reads ───────────────────────────────────────────────────
@router.get("/status")
async def get_status():
lc = _require_loco()
return await asyncio.to_thread(lc.status)
@router.get("/joints")
async def get_joints():
lc = _require_loco()
return await asyncio.to_thread(lc.joints)
@router.get("/msc")
async def get_msc():
lc = _require_loco()
return await asyncio.to_thread(lc.msc_check)
# ── arm flag / E-STOP (always available) ────────────────────
@router.post("/arm")
async def set_arm(on: bool = Query(...)):
lc = _require_loco()
res = await asyncio.to_thread(lc.arm_movement if on else lc.disarm_movement)
return res
@router.post("/gemini-movement")
async def set_gemini_movement(on: bool = Query(...)):
"""Enable / disable Gemini voice-driven locomotion (N2 Phase 3 gate).
Writes recognition_state.movement_enabled SEPARATE from the manual arm
flag. The Gemini child announces the toggle (spoken), and the parent
MovementDispatcher starts/stops acting on confirmation phrases. Default OFF.
"""
st = await asyncio.to_thread(recognition_state.mutate, STATE_PATH,
movement_enabled=bool(on))
# Enabling Gemini movement also clears any E-STOP latch on the dispatcher.
if on:
md = _get_dispatch()
if md is not None:
try:
md.clear_estop()
except Exception:
log.exception("clear_estop failed")
log.info("gemini-movement %s", "ON" if on else "OFF")
return {"ok": True, "movement_enabled": st.movement_enabled}
@router.post("/estop")
async def estop():
lc = _require_loco()
res = await asyncio.to_thread(lc.estop)
# Full stop: drop the manual arm flag AND latch the voice dispatcher off, so
# no source (teleop, step, or voice dispatch) can keep driving the robot. The
# dispatcher latch is used instead of flipping movement_enabled so the Gemini
# child does not deliver a spoken "movement disabled" line during an E-STOP.
try:
await asyncio.to_thread(lc.disarm_movement)
except Exception:
log.exception("estop disarm failed")
md = _get_dispatch()
if md is not None:
try:
md.emergency_stop()
except Exception:
log.exception("estop dispatcher latch failed")
return {"ok": True, **res}
@router.post("/stop")
async def stop():
lc = _require_loco()
# Allowed even when disarmed — StopMove is always safe.
res = await asyncio.to_thread(lc.stop_move)
return res
# ── movement (armed) ────────────────────────────────────────
class MoveBody(BaseModel):
vx: float = 0.0
vy: float = 0.0
vyaw: float = 0.0
run: bool = False
@router.post("/move")
async def move(body: MoveBody):
lc = _require_loco()
_require_armed(lc)
return await asyncio.to_thread(lc.move, body.vx, body.vy, body.vyaw, body.run)
@router.post("/step")
async def step(dir: str = Query(...)):
lc = _require_loco()
_require_armed(lc)
res = await asyncio.to_thread(lc.step, dir)
if not res.get("ok"):
raise HTTPException(400, res.get("reason", "step failed"))
return res
# ── modes / postures (armed) ────────────────────────────────
@router.post("/mode/prep")
async def mode_prep():
lc = _require_loco()
_require_armed(lc)
return await asyncio.to_thread(lc.prep_mode)
@router.post("/mode/ready")
async def mode_ready():
lc = _require_loco()
_require_armed(lc)
return await asyncio.to_thread(lc.ready_start_mode)
@router.post("/posture/{name}")
async def posture(name: str):
lc = _require_loco()
_require_armed(lc)
res = await asyncio.to_thread(lc.posture, name)
if not res.get("ok") and res.get("reason"):
raise HTTPException(400, res["reason"])
return res
@router.post("/balance")
async def balance(mode: int = Query(...)):
lc = _require_loco()
_require_armed(lc)
return await asyncio.to_thread(lc.set_balance_mode, mode)
@router.post("/height")
async def height(h: float = Query(...)):
lc = _require_loco()
_require_armed(lc)
return await asyncio.to_thread(lc.set_stand_height, h)
# ── MotionSwitcher / reconnect (armed) ──────────────────────
@router.post("/msc/select-ai")
async def msc_select_ai():
lc = _require_loco()
_require_armed(lc)
return await asyncio.to_thread(lc.msc_select_ai)
@router.post("/msc/release")
async def msc_release():
lc = _require_loco()
_require_armed(lc)
return await asyncio.to_thread(lc.msc_release)
@router.post("/reconnect")
async def reconnect():
lc = _require_loco()
_require_armed(lc)
return await asyncio.to_thread(lc.reconnect)
# ── aggregate subsystem summary (always available) ──────────
@router.get("/status/summary")
async def status_summary():
"""Live on/off state for the header status strip. Never raises."""
try:
st = recognition_state.read(STATE_PATH)
except Exception:
st = recognition_state.RecognitionState()
cam = _get_camera()
camera_running = False
try:
camera_running = bool(cam is not None and cam.is_running())
except Exception:
camera_running = False
lc = _get_loco()
movement_armed = False
try:
movement_armed = bool(lc is not None and lc.is_armed())
except Exception:
movement_armed = False
sub = _get_live_sub()
gemini_running = False
try:
runner = getattr(sub, "is_running", None)
gemini_running = bool(callable(runner) and runner())
except Exception:
gemini_running = False
# Effective Gemini-movement = the file flag AND not latched off by an E-STOP.
md = _get_dispatch()
estopped = False
try:
estopped = bool(md is not None and md.is_estopped())
except Exception:
estopped = False
return {
"vision_enabled": st.vision_enabled,
"camera_running": camera_running,
"face_rec_enabled": st.face_rec_enabled,
"zone_rec_enabled": st.zone_rec_enabled,
"movement_armed": movement_armed,
"gemini_movement_enabled": st.movement_enabled and not estopped,
"gemini_running": gemini_running,
}

51
vendor/Sanad/dashboard/routes/health.py vendored Normal file
View File

@ -0,0 +1,51 @@
"""Health and status endpoints."""
from __future__ import annotations
from fastapi import APIRouter
from Project.Sanad.core.logger import get_logger
log = get_logger("health_route")
router = APIRouter()
def _safe_status(component, name: str) -> dict:
"""Get status without crashing the whole endpoint if one component fails."""
if component is None:
return {"available": False}
try:
if hasattr(component, "status") and callable(component.status):
return component.status()
return {"available": True}
except Exception as exc:
log.warning("status() failed for %s: %s", name, exc)
return {"available": True, "error": str(exc)}
@router.get("/health")
async def health():
from Project.Sanad.main import brain
return {
"status": "ok",
"brain": _safe_status(brain, "brain"),
}
@router.get("/status")
async def full_status():
from Project.Sanad.main import (
brain, arm, voice_client, macro_rec, macro_play,
live_voice, live_sub, wake_mgr,
)
return {
"brain": _safe_status(brain, "brain"),
"voice": _safe_status(voice_client, "voice"),
"arm": _safe_status(arm, "arm"),
"macro_recorder": _safe_status(macro_rec, "macro_rec"),
"macro_player": _safe_status(macro_play, "macro_play"),
"live_voice": _safe_status(live_voice, "live_voice"),
"live_subprocess": _safe_status(live_sub, "live_sub"),
"wake_manager": _safe_status(wake_mgr, "wake_mgr"),
}

View File

@ -0,0 +1,38 @@
"""Live Gemini Subprocess control endpoints."""
from __future__ import annotations
import asyncio
from fastapi import APIRouter, HTTPException
router = APIRouter()
def _sub_or_503():
from Project.Sanad.main import live_sub
if live_sub is None:
raise HTTPException(503, "Live subprocess not available")
return live_sub
@router.get("/status")
async def subprocess_status():
from Project.Sanad.main import live_sub
if live_sub is None:
return {"available": False, "state": "unavailable"}
return live_sub.status()
@router.post("/start")
async def start_subprocess():
live_sub = _sub_or_503()
try:
return await asyncio.to_thread(live_sub.start)
except RuntimeError as exc:
raise HTTPException(404, str(exc))
@router.post("/stop")
async def stop_subprocess():
return await asyncio.to_thread(_sub_or_503().stop)

View File

@ -0,0 +1,73 @@
"""Live Voice Commands — voice-to-arm phrase trigger dispatcher.
Listens to GeminiSubprocess user transcripts, matches against
sanad_arm.txt phrases, and fires ARM.trigger_action_by_id.
Endpoints:
POST /start begin polling transcripts
POST /stop stop polling
POST /deferred-mode?enabled toggle instant vs deferred trigger
POST /trigger-enabled?enabled master gate allow arm actions or not
GET /status running, last heard, last action, etc.
GET /triggers arm trigger history (log)
"""
from __future__ import annotations
from fastapi import APIRouter, HTTPException
router = APIRouter()
def _loop():
from Project.Sanad.main import live_voice
if live_voice is None:
raise HTTPException(503, "LiveVoiceLoop not initialized.")
return live_voice
@router.get("/status")
async def status():
from Project.Sanad.main import live_voice
if live_voice is None:
return {"available": False}
return {"available": True, **live_voice.status()}
@router.post("/start")
async def start():
loop = _loop()
await loop.start()
return {"ok": True, **loop.status()}
@router.post("/stop")
async def stop():
loop = _loop()
await loop.stop()
return {"ok": True, **loop.status()}
@router.post("/deferred-mode")
async def set_deferred(enabled: bool):
loop = _loop()
loop.set_deferred(enabled)
return {"ok": True, "deferred_mode": loop.deferred_mode}
@router.post("/trigger-enabled")
async def set_trigger_enabled(enabled: bool):
"""Master gate for voice → arm triggering. Default OFF."""
loop = _loop()
loop.set_trigger_enabled(enabled)
return {"ok": True, "trigger_enabled": loop.trigger_enabled}
@router.get("/triggers")
async def triggers():
loop = _loop()
return {
"triggers": list(loop.triggers),
"total": len(loop.triggers),
"dispatch_actions": len(loop.wake_dispatch),
}

203
vendor/Sanad/dashboard/routes/logs.py vendored Normal file
View File

@ -0,0 +1,203 @@
"""Log viewing and snapshot endpoints."""
from __future__ import annotations
import asyncio
import json
import platform
import shutil
import socket
import sys
from collections import deque
from datetime import datetime
from fastapi import APIRouter, HTTPException
from fastapi.responses import PlainTextResponse
from Project.Sanad.config import BASE_DIR, LOGS_DIR
from Project.Sanad.dashboard.routes._safe_io import safe_path_under
router = APIRouter()
def _list_logs_sync():
LOGS_DIR.mkdir(parents=True, exist_ok=True)
files = []
for p in sorted(LOGS_DIR.glob("*.log*")):
files.append({
"name": p.name,
"size_bytes": p.stat().st_size,
})
return files
@router.get("/")
async def list_logs():
files = await asyncio.to_thread(_list_logs_sync)
return {"logs_dir": str(LOGS_DIR), "files": files}
def _tail_sync(path, lines: int) -> list[str]:
with open(path, "r", encoding="utf-8", errors="replace") as f:
tail = deque(f, maxlen=lines)
return [l.rstrip("\n") for l in tail]
@router.get("/tail/{filename}")
async def tail_log(filename: str, lines: int = 200):
path = safe_path_under(LOGS_DIR, filename)
if not path.exists():
raise HTTPException(404, "File not found")
lines_out = await asyncio.to_thread(_tail_sync, path, lines)
return {"filename": path.name, "lines": lines_out}
def _snapshot_sync(ts: str):
saved = []
for p in LOGS_DIR.glob("*.log"):
# Skip prior snapshots to avoid recursive growth
if "_snapshot_" in p.stem:
continue
dest = LOGS_DIR / f"{p.stem}_snapshot_{ts}.log"
shutil.copy2(p, dest)
saved.append({"source": p.name, "snapshot": dest.name, "size_bytes": dest.stat().st_size})
return saved
@router.post("/snapshot")
async def save_log_snapshot():
"""Save timestamped copy of all log files."""
LOGS_DIR.mkdir(parents=True, exist_ok=True)
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
saved = await asyncio.to_thread(_snapshot_sync, ts)
return {"ok": True, "saved_at": ts, "snapshots": saved}
# ─────────────────────── full bundle (everything in one text blob) ───────────────────────
def _build_bundle_sync(lines_per_file: int, include_system: bool) -> str:
"""Build the full text bundle — header, subsystem status, all logs.
Returns a single string safe to copy directly into a bug report.
"""
out: list[str] = []
ts = datetime.now().isoformat(timespec="seconds")
out.append("=" * 72)
out.append(f"SANAD LOG BUNDLE — {ts}")
out.append("=" * 72)
out.append(f"Hostname : {socket.gethostname()}")
out.append(f"Platform : {platform.platform()}")
out.append(f"Python : {sys.version.split()[0]}")
out.append(f"Executable: {sys.executable}")
out.append(f"BASE_DIR : {BASE_DIR}")
out.append(f"LOGS_DIR : {LOGS_DIR}")
# Subsystems — pull live status from main.SUBSYSTEMS
if include_system:
out.append("")
out.append("-" * 72)
out.append("SUBSYSTEMS")
out.append("-" * 72)
try:
from Project.Sanad.main import SUBSYSTEMS
except Exception as exc:
out.append(f" could not import SUBSYSTEMS: {exc}")
SUBSYSTEMS = {}
for name in sorted(SUBSYSTEMS):
comp = SUBSYSTEMS[name]
if comp is None:
out.append(f"{name:15s} unavailable")
continue
status: dict = {}
if hasattr(comp, "status") and callable(comp.status):
try:
s = comp.status()
if isinstance(s, dict):
status = s
else:
status = {"raw": str(s)}
except Exception as exc:
status = {"status_error": str(exc)}
try:
status_str = json.dumps(status, ensure_ascii=False, default=str)
except Exception:
status_str = str(status)
out.append(f"{name:15s} {status_str}")
# Dashboard router load state
out.append("")
out.append("-" * 72)
out.append("DASHBOARD ROUTERS")
out.append("-" * 72)
try:
from Project.Sanad.dashboard.app import _loaded_routes, _failed_routes
out.append(f" loaded ({len(_loaded_routes)}): {', '.join(_loaded_routes)}")
if _failed_routes:
out.append(f" failed ({len(_failed_routes)}):")
for name, err in _failed_routes.items():
out.append(f" - {name}: {err}")
else:
out.append(" failed (0): —")
except Exception as exc:
out.append(f" could not read dashboard state: {exc}")
# All log files — tail N lines each, skip snapshots
out.append("")
out.append("-" * 72)
out.append(f"LOG FILES (last {lines_per_file} lines each)")
out.append("-" * 72)
LOGS_DIR.mkdir(parents=True, exist_ok=True)
log_paths = sorted(LOGS_DIR.glob("*.log*"))
files_included = 0
for p in log_paths:
if "_snapshot_" in p.stem:
continue # skip stale snapshots
try:
size = p.stat().st_size
except OSError:
size = 0
out.append("")
out.append(f"=== {p.name} ({size} bytes) ===")
try:
with open(p, "r", encoding="utf-8", errors="replace") as f:
tail = deque(f, maxlen=lines_per_file)
for raw in tail:
out.append(raw.rstrip("\n"))
files_included += 1
except OSError as exc:
out.append(f" <could not read: {exc}>")
out.append("")
out.append("=" * 72)
out.append(f"END OF BUNDLE — {files_included} log file(s) included")
out.append("=" * 72)
return "\n".join(out)
@router.get("/bundle")
async def logs_bundle(lines: int = 1000, include_system: bool = True):
"""Return a single plain-text dump of everything useful for debugging.
Includes:
- Timestamp, hostname, platform, Python, BASE_DIR, LOGS_DIR
- Live status of every subsystem in main.SUBSYSTEMS
- Dashboard router load/fail state
- Tail of every .log file in LOGS_DIR (configurable per-file limit)
Response is `text/plain` so it's safe to copy straight to clipboard
or pipe into a file. Intended use: dashboard "Copy All Logs" button
and manual `curl ... > sanad_bundle.txt` debugging.
"""
# Clamp lines to keep the payload sane
lines = max(10, min(int(lines), 50000))
text = await asyncio.to_thread(_build_bundle_sync, lines, include_system)
return PlainTextResponse(
text,
headers={
"Content-Disposition": (
f'inline; filename="sanad_bundle_{datetime.now().strftime("%Y%m%d_%H%M%S")}.txt"'
),
},
)

238
vendor/Sanad/dashboard/routes/macros.py vendored Normal file
View File

@ -0,0 +1,238 @@
"""Macro recording and playback endpoints."""
from __future__ import annotations
import asyncio
from pathlib import Path
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
from Project.Sanad.config import AUDIO_RECORDINGS_DIR, MOTIONS_DIR
from Project.Sanad.core.logger import get_logger
log = get_logger("macros_route")
router = APIRouter()
def _block_if_movement_armed():
"""409 when locomotion movement is armed — arm motion is mutually exclusive
with walking. The arm controller's motion-block is the safety net."""
try:
from Project.Sanad.main import loco_controller # type: ignore
armed = loco_controller is not None and loco_controller.is_armed()
except HTTPException:
raise
except Exception:
return
if armed:
raise HTTPException(
409, "Arm actions are disabled while movement is enabled. "
"Disable movement in the Controller tab first.")
class MacroName(BaseModel):
name: str
class ComboPlayPayload(BaseModel):
audio_file: str = "" # filename under data/audio/ (or empty for none)
motion_file: str = "" # DEPRECATED — use action_id. Still accepted for bare JSONL by filename.
action_id: int | None = None # arm_controller action id (SDK built-in OR JSONL) — preferred
speed: float = 1.0
@router.get("/")
async def list_macros():
from Project.Sanad.main import macro_play
if macro_play is None:
return {"macros": []}
return {"macros": macro_play.list_macros()}
@router.get("/status")
async def macro_status():
from Project.Sanad.main import macro_rec, macro_play
return {
"recorder": macro_rec.status() if macro_rec else {},
"player": macro_play.status() if macro_play else {},
}
@router.post("/record/start")
async def start_recording(payload: MacroName):
from Project.Sanad.main import macro_rec
if macro_rec is None:
raise HTTPException(503, "Macro recorder not available.")
return macro_rec.start(payload.name)
@router.post("/record/stop")
async def stop_recording():
import asyncio
from Project.Sanad.main import macro_rec
if macro_rec is None:
raise HTTPException(503, "Macro recorder not available.")
return await asyncio.to_thread(macro_rec.stop)
@router.post("/play")
async def play_macro(payload: MacroName):
from Project.Sanad.main import brain
_block_if_movement_armed()
return await brain.play_macro(payload.name)
@router.post("/stop")
async def stop_macro():
from Project.Sanad.main import macro_play
if macro_play:
macro_play.stop()
return {"ok": True}
# ─── Ad-hoc audio + motion combined playback ─────────────────────────
# List the two catalogues so the dashboard can populate dropdowns, then
# play the chosen pair in parallel (asyncio.gather) — same scheme the
# Brain uses for `parallel`-mode skills, but ad-hoc instead of predefined.
@router.get("/audio-files")
async def list_audio_files():
"""Enumerate playable audio files under data/audio/."""
AUDIO_RECORDINGS_DIR.mkdir(parents=True, exist_ok=True)
files = []
for p in sorted(AUDIO_RECORDINGS_DIR.glob("*.wav")):
try:
files.append({
"name": p.name,
"size_kb": round(p.stat().st_size / 1024, 1),
})
except OSError:
continue
return {"files": files, "dir": str(AUDIO_RECORDINGS_DIR)}
@router.get("/motion-files")
async def list_motion_files():
"""Enumerate playable .jsonl motions under data/motions/ (thin wrapper
so the Macro Recorder dropdown doesn't have to call the replay route)."""
MOTIONS_DIR.mkdir(parents=True, exist_ok=True)
files = []
for p in sorted(MOTIONS_DIR.glob("*.jsonl")):
try:
files.append({
"name": p.name,
"size_kb": round(p.stat().st_size / 1024, 1),
})
except OSError:
continue
return {"files": files, "dir": str(MOTIONS_DIR)}
@router.post("/stop-combined")
async def stop_combined():
"""Immediately stop any in-flight combined playback.
- `arm.cancel()` breaks the replay loop and triggers the smooth
return-to-home ramp (see `_return_home` in arm_controller.py).
- `audio_mgr.stop_playback()` sends AUDIO_STOP_PLAY to the G1
chest speaker via DDS.
Both run unconditionally so Stop works even if only one side was
actually playing.
"""
from Project.Sanad.main import audio_mgr, arm
result = {"motion_stopped": False, "audio_stopped": False}
if arm is not None:
try:
arm.cancel()
result["motion_stopped"] = True
except Exception as exc:
log.warning("stop-combined: arm.cancel failed: %s", exc)
result["motion_error"] = str(exc)
if audio_mgr is not None:
try:
audio_mgr.stop_playback()
result["audio_stopped"] = True
except Exception as exc:
log.warning("stop-combined: audio stop failed: %s", exc)
result["audio_error"] = str(exc)
return {"ok": True, **result}
@router.post("/play-combined")
async def play_combined(payload: ComboPlayPayload):
"""Fire a user-picked audio clip and arm action in parallel.
Motion dispatch is via `arm.trigger_by_id(action_id)` which handles
BOTH SDK built-in actions (shake_hand, wave, ) and recorded JSONL
replays. Audio goes through `audio_mgr.play_wav` (routed to the G1
chest speaker via DDS). Either side may be omitted.
"""
from Project.Sanad.main import audio_mgr, arm
has_audio = bool(payload.audio_file)
has_motion = payload.action_id is not None or bool(payload.motion_file)
if not has_audio and not has_motion:
raise HTTPException(400, "pick at least one of audio_file / action_id / motion_file")
if has_motion:
_block_if_movement_armed() # audio-only combos still allowed while armed
tasks = []
result: dict = {
"audio_file": payload.audio_file,
"action_id": payload.action_id,
"motion_file": payload.motion_file,
}
if has_audio:
if audio_mgr is None:
raise HTTPException(503, "AudioManager not available")
audio_path = (AUDIO_RECORDINGS_DIR / payload.audio_file).resolve()
try:
audio_path.relative_to(AUDIO_RECORDINGS_DIR.resolve())
except ValueError:
raise HTTPException(400, "audio_file path traversal denied")
if not audio_path.exists():
raise HTTPException(404, f"audio not found: {payload.audio_file}")
async def _play_audio():
try:
await asyncio.to_thread(audio_mgr.play_wav, audio_path)
result["audio_played"] = audio_path.name
except Exception as exc:
log.exception("combined play: audio failed")
result["audio_error"] = str(exc)
tasks.append(_play_audio())
if has_motion:
if arm is None:
raise HTTPException(503, "ArmController not available")
async def _play_motion():
try:
if payload.action_id is not None:
# SDK built-in OR JSONL — arm.trigger_by_id handles both
await asyncio.to_thread(arm.trigger_by_id,
int(payload.action_id),
payload.speed)
result["motion_played"] = f"action_id={payload.action_id}"
else:
# Legacy path: bare JSONL filename
motion_path = (MOTIONS_DIR / payload.motion_file).resolve()
try:
motion_path.relative_to(MOTIONS_DIR.resolve())
except ValueError:
result["motion_error"] = "motion_file path traversal denied"
return
if not motion_path.exists():
result["motion_error"] = f"motion not found: {payload.motion_file}"
return
await asyncio.to_thread(arm.replay_file, str(motion_path), payload.speed)
result["motion_played"] = motion_path.name
except Exception as exc:
log.exception("combined play: motion failed")
result["motion_error"] = str(exc)
tasks.append(_play_motion())
await asyncio.gather(*tasks)
return {"ok": True, **result}

89
vendor/Sanad/dashboard/routes/motion.py vendored Normal file
View File

@ -0,0 +1,89 @@
"""Motion endpoints — arm actions, replay management."""
from __future__ import annotations
import asyncio
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
router = APIRouter()
def _block_if_movement_armed():
"""409 if locomotion movement is armed — arm actions are mutually exclusive
with walking. The arm controller's own motion-block is the safety net; this
just gives the dashboard a clear message instead of a silent no-op."""
try:
from Project.Sanad.main import loco_controller # type: ignore
armed = loco_controller is not None and loco_controller.is_armed()
except HTTPException:
raise
except Exception:
return
if armed:
raise HTTPException(
409, "Arm actions are disabled while movement is enabled. "
"Disable movement in the Controller tab first.")
@router.get("/status")
async def motion_status():
from Project.Sanad.main import arm
return arm.status() if arm else {"error": "Arm not attached"}
@router.get("/actions")
async def list_actions():
from Project.Sanad.main import arm
return {"actions": arm.list_actions() if arm else []}
class TriggerPayload(BaseModel):
action_id: int | None = None
action_name: str | None = None
speed: float = 1.0
@router.post("/trigger")
async def trigger_action(payload: TriggerPayload):
from Project.Sanad.main import arm
if arm is None:
raise HTTPException(503, "Arm controller not attached.")
_block_if_movement_armed()
speed = max(0.1, min(payload.speed, 5.0))
# NOTE: TOCTOU on arm.is_busy is unavoidable from the route layer.
# The internal arm controller has its own _lock + _is_busy guard inside
# _execute() that returns silently if busy. We rely on that.
if payload.action_id is not None:
try:
await asyncio.to_thread(arm.trigger_by_id, payload.action_id, speed)
except KeyError as exc:
raise HTTPException(404, str(exc))
return {"ok": True, "action_id": payload.action_id, "speed": speed}
elif payload.action_name:
try:
await asyncio.to_thread(arm.trigger_by_name, payload.action_name, speed)
except KeyError as exc:
raise HTTPException(404, str(exc))
return {"ok": True, "action_name": payload.action_name, "speed": speed}
else:
raise HTTPException(400, "Provide action_id or action_name.")
@router.post("/cancel")
async def cancel_motion():
from Project.Sanad.main import arm
if arm is None:
raise HTTPException(503, "Arm controller not attached.")
arm.cancel()
return {"ok": True, "cancelled": True}
@router.post("/gestural-speaking")
async def toggle_gestural(enabled: bool = True):
from Project.Sanad.main import brain
brain.set_gestural_speaking(enabled)
return {"gestural_speaking": brain.gestural_speaking}

98
vendor/Sanad/dashboard/routes/prompt.py vendored Normal file
View File

@ -0,0 +1,98 @@
"""Prompt management — view, edit, reload system prompts."""
from __future__ import annotations
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
from Project.Sanad.config import SCRIPTS_DIR
from Project.Sanad.core.config_loader import section as _cfg_section
from Project.Sanad.dashboard.routes._safe_io import (
atomic_write_text, MAX_UPLOAD_BYTES,
)
router = APIRouter()
# Filenames — SINGLE SOURCE in core.script_files
_SCRIPTS = _cfg_section("core", "script_files")
SCRIPT_PROMPT_PATH = SCRIPTS_DIR / _SCRIPTS.get("persona", "sanad_script.txt")
RULE_PROMPT_PATH = SCRIPTS_DIR / _SCRIPTS.get("rules", "sanad_rule.txt")
MAX_PROMPT_BYTES = MAX_UPLOAD_BYTES
# Default system prompt — SINGLE SOURCE in core.gemini_defaults
DEFAULT_SYSTEM_PROMPT = _cfg_section("core", "gemini_defaults").get(
"default_system_prompt",
"You are Sanad (Bousandah), a wise and friendly Emirati assistant. "
"Speak strictly in the UAE dialect (Khaleeji). "
"Be helpful, concise, and use local greetings like 'Marhaba' and 'Ya Khoy'."
)
def _load_system_prompt() -> str:
try:
content = SCRIPT_PROMPT_PATH.read_text(encoding="utf-8-sig").strip()
if content:
return content
except FileNotFoundError:
pass
return DEFAULT_SYSTEM_PROMPT
def _load_rule_prompts() -> dict[str, str]:
result = {"system_prompt": "", "replay_prompt": ""}
try:
content = RULE_PROMPT_PATH.read_text(encoding="utf-8-sig").strip()
sections: dict[str, list[str]] = {}
current = None
for line in content.splitlines():
stripped = line.strip()
if stripped.startswith("[") and stripped.endswith("]"):
current = stripped[1:-1].strip()
sections[current] = []
elif current is not None:
sections[current].append(line.rstrip())
result["system_prompt"] = "\n".join(sections.get("SYSTEM_PROMPT", [])).strip()
result["replay_prompt"] = "\n".join(sections.get("REPLAY_SYSTEM_PROMPT", [])).strip()
except FileNotFoundError:
pass
if not result["system_prompt"]:
result["system_prompt"] = _load_system_prompt()
return result
@router.get("/")
async def get_prompt():
return {
"script_path": str(SCRIPT_PROMPT_PATH),
"rule_path": str(RULE_PROMPT_PATH),
"system_prompt": _load_system_prompt(),
"rules": _load_rule_prompts(),
}
class PromptUpdate(BaseModel):
content: str
@router.post("/update")
async def update_prompt(payload: PromptUpdate):
if len(payload.content.encode("utf-8")) > MAX_PROMPT_BYTES:
raise HTTPException(413, f"Prompt too large (max {MAX_PROMPT_BYTES} bytes).")
try:
SCRIPTS_DIR.mkdir(parents=True, exist_ok=True)
atomic_write_text(SCRIPT_PROMPT_PATH, payload.content.rstrip() + "\n")
except OSError as exc:
raise HTTPException(500, f"Could not write prompt: {exc}")
return {"ok": True, "path": str(SCRIPT_PROMPT_PATH), "length": len(payload.content)}
@router.post("/reload")
async def reload_prompts():
rules = _load_rule_prompts()
return {
"ok": True,
"system_prompt": rules["system_prompt"],
"replay_prompt": rules["replay_prompt"],
"script_path": str(SCRIPT_PROMPT_PATH),
"rule_path": str(RULE_PROMPT_PATH),
}

View File

@ -0,0 +1,457 @@
"""Recognition tab — camera vision + face gallery + hot toggles.
Single router covering:
- Vision / Face Recognition toggles (hot no Gemini restart needed)
- Live camera preview (latest JPEG drop)
- Face gallery CRUD: enroll, upload, capture, rename, delete, ZIP
- Per-photo download + delete
Toggle changes write data/.recognition_state.json atomically. The Gemini
child polls that file at 1 Hz and applies changes mid-session.
"""
from __future__ import annotations
import io
from typing import Optional
from fastapi import APIRouter, File, HTTPException, Query, UploadFile
from fastapi.responses import FileResponse, Response, StreamingResponse
from pydantic import BaseModel
from Project.Sanad.config import BASE_DIR
from Project.Sanad.core.logger import get_logger
from Project.Sanad.dashboard.routes._safe_io import check_upload_size
from Project.Sanad.vision import recognition_state
log = get_logger("recognition_routes")
router = APIRouter()
# ── paths (resolved from BASE_DIR) ──────────────────────────
STATE_PATH = BASE_DIR / "data" / ".recognition_state.json"
FACES_DIR = BASE_DIR / "data" / "faces"
ALLOWED_IMAGE_EXTS = {".jpg", ".jpeg", ".png"}
# ── helpers ─────────────────────────────────────────────────
def _get_camera():
"""Lazy import to avoid circular import on dashboard load."""
try:
from Project.Sanad.main import camera # type: ignore
return camera
except Exception:
return None
def _get_gallery():
"""Lazy import — same reason."""
try:
from Project.Sanad.main import gallery # type: ignore
return gallery
except Exception:
return None
def _bump_and_write_state(**changes) -> recognition_state.RecognitionState:
"""Apply changes (vision_enabled, face_rec_enabled) and persist."""
return recognition_state.mutate(STATE_PATH, **changes)
def _bump_gallery_version() -> int:
cur = recognition_state.read(STATE_PATH)
new_version = cur.gallery_version + 1
recognition_state.mutate(STATE_PATH, gallery_version=new_version)
return new_version
# ── state + toggles ─────────────────────────────────────────
@router.get("/state")
async def get_state():
"""Return the current toggle/camera/gallery state."""
st = recognition_state.read(STATE_PATH)
cam = _get_camera()
gallery = _get_gallery()
faces_count = 0
photos_count = 0
if gallery is not None:
try:
entries = gallery.list()
faces_count = len(entries)
photos_count = sum(len(e.sample_paths) for e in entries)
except Exception:
pass
return {
"vision_enabled": st.vision_enabled,
"face_rec_enabled": st.face_rec_enabled,
"gallery_version": st.gallery_version,
"camera": cam.status() if cam is not None else {
"running": False, "backend": None, "error": "camera subsystem unavailable"
},
"faces_count": faces_count,
"photos_count": photos_count,
}
@router.post("/vision")
async def set_vision(on: bool = Query(...)):
"""Enable / disable camera vision (hot — no Gemini restart)."""
cam = _get_camera()
if cam is None:
log.warning("vision toggle requested but camera subsystem unavailable")
raise HTTPException(503, "Camera subsystem not available.")
if on and not cam.is_running():
ok = cam.start()
if not ok:
log.warning("vision ON requested but camera.start() failed: %s",
cam.error or "no backend")
_bump_and_write_state(vision_enabled=False)
raise HTTPException(503,
f"Camera could not start (no backend). {cam.error or ''}")
elif (not on) and cam.is_running():
cam.stop()
st = _bump_and_write_state(vision_enabled=bool(on))
log.info("vision %s (backend=%s)", "ON" if on else "OFF",
cam.backend if cam.is_running() else "none")
return {"ok": True, "vision_enabled": st.vision_enabled,
"camera": cam.status()}
@router.post("/face-rec")
async def set_face_rec(on: bool = Query(...)):
"""Enable / disable face recognition (hot — no Gemini restart).
The Gemini child picks the change up within ~1 s: ON re-sends the
gallery primer and tells Gemini it can recognise people; OFF tells
Gemini to disregard the gallery and stop identifying anyone. Both
take effect on the live session no reconnect needed.
"""
st = _bump_and_write_state(face_rec_enabled=bool(on))
log.info("face recognition %s", "ON" if on else "OFF")
return {"ok": True, "face_rec_enabled": st.face_rec_enabled}
@router.post("/sync")
async def sync_gallery():
"""Bump gallery_version so the child re-sends the primer if face-rec is ON."""
v = _bump_gallery_version()
log.info("gallery sync requested → v.%d", v)
return {"ok": True, "gallery_version": v}
# ── live preview ────────────────────────────────────────────
@router.get("/frame.jpg")
async def latest_frame():
"""Serve the most recent camera frame straight from the daemon's
in-memory cache (no file drop frames are also pushed to the Gemini
child over its stdin)."""
cam = _get_camera()
if cam is None:
raise HTTPException(503, "Camera subsystem unavailable.")
jpeg = cam.snapshot_jpeg()
if not jpeg:
raise HTTPException(404, "No frame captured yet.")
return Response(
content=jpeg,
media_type="image/jpeg",
headers={"Cache-Control": "no-store, must-revalidate"},
)
# ── camera resolution / quality ─────────────────────────────
class CameraConfigPayload(BaseModel):
width: Optional[int] = None
height: Optional[int] = None
fps: Optional[int] = None
jpeg_quality: Optional[int] = None
@router.post("/camera-config")
async def set_camera_config(payload: CameraConfigPayload):
"""Hot-swap the camera capture profile (resolution / fps / JPEG quality).
If the camera is running, CameraDaemon.reconfigure() rebuilds the
pipeline at the new profile (~0.5 s gap). If idle, the values just
take effect on the next start. Bounds are sanity-checked here so a
fat-fingered value can't wedge the daemon."""
cam = _get_camera()
if cam is None:
raise HTTPException(503, "Camera subsystem unavailable.")
if payload.width is not None and not (160 <= payload.width <= 1920):
raise HTTPException(400, "width out of range (1601920)")
if payload.height is not None and not (120 <= payload.height <= 1080):
raise HTTPException(400, "height out of range (1201080)")
if payload.fps is not None and not (1 <= payload.fps <= 60):
raise HTTPException(400, "fps out of range (160)")
if payload.jpeg_quality is not None and not (10 <= payload.jpeg_quality <= 95):
raise HTTPException(400, "jpeg_quality out of range (1095)")
profile = cam.reconfigure(
width=payload.width, height=payload.height,
fps=payload.fps, jpeg_quality=payload.jpeg_quality,
)
log.info("camera reconfigured via dashboard → %s", profile)
return {"ok": True, "profile": profile, "camera": cam.status()}
# ── face gallery routes ─────────────────────────────────────
def _validate_image(content: bytes, filename: str | None = None) -> None:
"""Reject non-JPEG/PNG content + oversize uploads."""
check_upload_size(content)
if len(content) < 16:
raise HTTPException(400, "Image too small / empty.")
is_jpeg = content[:3] == b"\xff\xd8\xff"
is_png = content[:8] == b"\x89PNG\r\n\x1a\n"
if not (is_jpeg or is_png):
raise HTTPException(
400,
f"Only JPEG/PNG accepted (got {filename or 'unknown'}).",
)
def _entry_to_dict(entry) -> dict:
photos = []
for p in entry.sample_paths:
try:
photos.append({"name": p.name, "size_bytes": p.stat().st_size})
except OSError:
continue
return {
"id": entry.id,
"name": entry.name,
"description": entry.description,
"added_at": entry.added_at,
"photos": photos,
}
@router.get("/faces")
async def list_faces():
gallery = _get_gallery()
if gallery is None:
raise HTTPException(503, "Face gallery subsystem unavailable.")
entries = gallery.list()
return {"faces": [_entry_to_dict(e) for e in entries],
"total": len(entries)}
class RenamePayload(BaseModel):
name: Optional[str] = None
class DescribePayload(BaseModel):
description: Optional[str] = None
@router.post("/faces/enroll")
async def enroll_from_camera(name: Optional[str] = Query(default=None),
description: Optional[str] = Query(default=None)):
"""Create a new face from the camera's latest snapshot."""
gallery = _get_gallery()
if gallery is None:
raise HTTPException(503, "Face gallery subsystem unavailable.")
cam = _get_camera()
if cam is None or not cam.is_running():
raise HTTPException(409, "Camera is not running. Toggle Vision ON first.")
# get_fresh_frame waits briefly for a current frame so the enrolled
# photo is the scene the user is posing for, not a stale buffer.
jpeg = cam.get_fresh_frame(max_age_s=0.5, timeout_s=1.5)
if not jpeg:
raise HTTPException(409, "Camera has no frame yet. Wait a moment and retry.")
entry = gallery.create_face(
[jpeg],
name=name.strip() if name else None,
description=description.strip() if description else None,
)
v = _bump_gallery_version()
log.info("enrolled face_%d via camera (name=%s, desc=%s, v.%d)",
entry.id, name or "(unnamed)",
"yes" if description else "no", v)
return {"ok": True, "face": _entry_to_dict(entry)}
@router.post("/faces/upload")
async def enroll_from_upload(
files: list[UploadFile] = File(...),
name: Optional[str] = Query(default=None),
description: Optional[str] = Query(default=None),
):
"""Create a new face from uploaded image file(s)."""
gallery = _get_gallery()
if gallery is None:
raise HTTPException(503, "Face gallery subsystem unavailable.")
if not files:
raise HTTPException(400, "At least one image file required.")
image_bytes: list[bytes] = []
for f in files:
content = await f.read()
_validate_image(content, f.filename)
image_bytes.append(content)
entry = gallery.create_face(
image_bytes,
name=name.strip() if name else None,
description=description.strip() if description else None,
)
v = _bump_gallery_version()
log.info("enrolled face_%d via upload (%d photos, name=%s, desc=%s, v.%d)",
entry.id, len(image_bytes), name or "(unnamed)",
"yes" if description else "no", v)
return {"ok": True, "face": _entry_to_dict(entry)}
@router.post("/faces/{face_id}/capture")
async def capture_to_face(face_id: int):
"""Add a new sample (from the camera) to an existing face."""
gallery = _get_gallery()
if gallery is None:
raise HTTPException(503, "Face gallery subsystem unavailable.")
cam = _get_camera()
if cam is None or not cam.is_running():
raise HTTPException(409, "Camera is not running. Toggle Vision ON first.")
jpeg = cam.get_fresh_frame(max_age_s=0.5, timeout_s=1.5)
if not jpeg:
raise HTTPException(409, "Camera has no frame yet.")
try:
fname = gallery.add_photo(face_id, jpeg)
except FileNotFoundError as exc:
raise HTTPException(404, str(exc))
v = _bump_gallery_version()
log.info("captured new photo for face_%d%s (v.%d)", face_id, fname, v)
return {"ok": True, "added": fname, "face": _entry_to_dict(gallery.get(face_id))}
@router.post("/faces/{face_id}/upload")
async def upload_to_face(face_id: int, files: list[UploadFile] = File(...)):
"""Add one or more uploaded samples to an existing face."""
gallery = _get_gallery()
if gallery is None:
raise HTTPException(503, "Face gallery subsystem unavailable.")
if gallery.get(face_id) is None:
raise HTTPException(404, f"face_{face_id} not found")
added: list[str] = []
for f in files:
content = await f.read()
_validate_image(content, f.filename)
try:
fname = gallery.add_photo(face_id, content)
added.append(fname)
except FileNotFoundError as exc:
raise HTTPException(404, str(exc))
v = _bump_gallery_version()
log.info("uploaded %d photo(s) to face_%d (v.%d)", len(added), face_id, v)
return {"ok": True, "added": added,
"face": _entry_to_dict(gallery.get(face_id))}
@router.post("/faces/{face_id}/rename")
async def rename_face(face_id: int, payload: RenamePayload):
gallery = _get_gallery()
if gallery is None:
raise HTTPException(503, "Face gallery subsystem unavailable.")
try:
gallery.rename(face_id, payload.name)
except FileNotFoundError as exc:
raise HTTPException(404, str(exc))
v = _bump_gallery_version()
log.info("renamed face_%d%s (v.%d)", face_id,
payload.name or "(unnamed)", v)
return {"ok": True, "face": _entry_to_dict(gallery.get(face_id))}
@router.post("/faces/{face_id}/describe")
async def describe_face(face_id: int, payload: DescribePayload):
"""Set / clear a face's free-text description. The description is
folded into the Gemini primer turn so Gemini can reference it."""
gallery = _get_gallery()
if gallery is None:
raise HTTPException(503, "Face gallery subsystem unavailable.")
try:
gallery.set_description(face_id, payload.description)
except FileNotFoundError as exc:
raise HTTPException(404, str(exc))
v = _bump_gallery_version()
log.info("described face_%d (%s, v.%d)", face_id,
"set" if payload.description else "cleared", v)
return {"ok": True, "face": _entry_to_dict(gallery.get(face_id))}
@router.delete("/faces/{face_id}")
async def delete_face(face_id: int):
gallery = _get_gallery()
if gallery is None:
raise HTTPException(503, "Face gallery subsystem unavailable.")
try:
gallery.delete_face(face_id)
except FileNotFoundError as exc:
raise HTTPException(404, str(exc))
v = _bump_gallery_version()
log.info("deleted face_%d (v.%d)", face_id, v)
return {"ok": True, "deleted": face_id}
@router.delete("/faces/{face_id}/photo/{photo_name}")
async def delete_photo(face_id: int, photo_name: str):
gallery = _get_gallery()
if gallery is None:
raise HTTPException(503, "Face gallery subsystem unavailable.")
# safe filename — only allow simple file names, no traversal
if "/" in photo_name or ".." in photo_name or "\x00" in photo_name:
raise HTTPException(400, "Invalid photo name.")
try:
gallery.delete_photo(face_id, photo_name)
except FileNotFoundError as exc:
raise HTTPException(404, str(exc))
except ValueError as exc:
raise HTTPException(400, str(exc))
v = _bump_gallery_version()
log.info("deleted photo %s from face_%d (v.%d)", photo_name, face_id, v)
return {"ok": True, "deleted": photo_name}
@router.get("/faces/{face_id}/photo/{photo_name}")
async def get_photo(face_id: int, photo_name: str,
download: int = Query(default=0)):
"""Serve a single photo. Add ?download=1 for attachment disposition."""
gallery = _get_gallery()
if gallery is None:
raise HTTPException(503, "Face gallery subsystem unavailable.")
if "/" in photo_name or ".." in photo_name or "\x00" in photo_name:
raise HTTPException(400, "Invalid photo name.")
path = gallery.get_photo(face_id, photo_name)
if path is None:
raise HTTPException(404, "Photo not found.")
media = "image/png" if path.suffix.lower() == ".png" else "image/jpeg"
headers = {}
if download:
headers["Content-Disposition"] = (
f'attachment; filename="face_{face_id}_{photo_name}"'
)
return FileResponse(path, media_type=media, headers=headers)
@router.get("/faces/{face_id}/download.zip")
async def download_face_zip(face_id: int):
gallery = _get_gallery()
if gallery is None:
raise HTTPException(503, "Face gallery subsystem unavailable.")
try:
data = gallery.zip_face(face_id)
except FileNotFoundError as exc:
raise HTTPException(404, str(exc))
return StreamingResponse(
io.BytesIO(data),
media_type="application/zip",
headers={
"Content-Disposition": f'attachment; filename="face_{face_id}.zip"',
"Content-Length": str(len(data)),
},
)

230
vendor/Sanad/dashboard/routes/records.py vendored Normal file
View File

@ -0,0 +1,230 @@
"""Saved records management — list, play, pause, resume, stop, rename, delete.
Manages WAV recordings saved via the typed replay engine.
"""
from __future__ import annotations
import json
import threading
from pathlib import Path
from typing import Any
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
from Project.Sanad.config import AUDIO_RECORDINGS_DIR
from Project.Sanad.dashboard.routes._safe_io import (
safe_filename, safe_path_under, atomic_write_json,
)
router = APIRouter()
RECORDS_INDEX = AUDIO_RECORDINGS_DIR / "records.json"
_INDEX_LOCK = threading.Lock()
def _load_index() -> dict[str, Any]:
if not RECORDS_INDEX.exists():
return {"records": [], "total_records": 0, "last_updated": ""}
try:
with open(RECORDS_INDEX, "r", encoding="utf-8") as f:
return json.load(f)
except (json.JSONDecodeError, OSError):
# Backup corrupt file rather than nuking it
try:
RECORDS_INDEX.rename(RECORDS_INDEX.with_suffix(".json.corrupt"))
except OSError:
pass
return {"records": [], "total_records": 0, "last_updated": ""}
def _save_index(payload: dict[str, Any]):
AUDIO_RECORDINGS_DIR.mkdir(parents=True, exist_ok=True)
payload["total_records"] = len(payload.get("records", []))
atomic_write_json(RECORDS_INDEX, payload)
def _resolve_path(path_str: str) -> Path:
"""Resolve record path — basename / relative / absolute.
Legacy records stored absolute paths. New records store basenames.
Both flavors resolve to a real file under AUDIO_RECORDINGS_DIR.
"""
if not path_str:
return AUDIO_RECORDINGS_DIR
p = Path(path_str)
if p.is_absolute():
return p
return AUDIO_RECORDINGS_DIR / p
def _reconcile(payload: dict[str, Any]) -> tuple[dict[str, Any], int]:
kept, removed = [], 0
for entry in payload.get("records", []):
try:
sp = _resolve_path(entry["files"]["speaker_recording"]["path"])
rp = _resolve_path(entry["files"]["gemini_raw_output"]["path"])
if sp.exists() and rp.exists():
kept.append(entry)
else:
removed += 1
except (KeyError, TypeError):
removed += 1
payload["records"] = kept
payload["total_records"] = len(kept)
return payload, removed
@router.get("/")
async def list_records():
with _INDEX_LOCK:
payload = _load_index()
payload, removed = _reconcile(payload)
if removed:
_save_index(payload)
return payload
class RecordPlay(BaseModel):
record_name: str
file_kind: str = "speaker" # speaker | raw
@router.post("/play")
async def play_record(payload: RecordPlay):
with _INDEX_LOCK:
index = _load_index()
entry = next((r for r in index.get("records", []) if r.get("record_name") == payload.record_name), None)
if entry is None:
raise HTTPException(404, f"Record not found: {payload.record_name}")
file_key = "speaker_recording" if payload.file_kind == "speaker" else "gemini_raw_output"
raw_path = _resolve_path(entry["files"][file_key]["path"]).resolve()
base = AUDIO_RECORDINGS_DIR.resolve()
try:
raw_path.relative_to(base)
except ValueError:
raise HTTPException(400, "Record path outside recordings directory.")
if not raw_path.exists():
raise HTTPException(404, f"File not found: {raw_path.name}")
from Project.Sanad.main import audio_mgr
import asyncio
# Fire-and-forget — play_wav blocks for the clip duration on the G1
# DDS path, and the dashboard's pause / resume / stop / status calls
# need to be served while it's running. Without this, /play wouldn't
# return until the clip finished and the UI couldn't interact with
# the in-flight playback.
asyncio.create_task(asyncio.to_thread(
audio_mgr.play_wav, raw_path, payload.record_name,
))
return {"ok": True, "record_name": payload.record_name,
"file_kind": payload.file_kind, "path": str(raw_path)}
@router.post("/pause")
async def pause_playback():
from Project.Sanad.main import audio_mgr
return audio_mgr.pause_playback()
@router.post("/resume")
async def resume_playback():
from Project.Sanad.main import audio_mgr
return audio_mgr.resume_playback()
@router.post("/stop")
async def stop_playback():
from Project.Sanad.main import audio_mgr
import asyncio
await asyncio.to_thread(audio_mgr.stop_playback)
return {"ok": True, "stopped": True}
@router.get("/playback-status")
async def playback_status():
from Project.Sanad.main import audio_mgr
return audio_mgr.playback_status()
class RecordRename(BaseModel):
record_name: str
new_name: str
@router.post("/rename")
async def rename_record(payload: RecordRename):
new_name = safe_filename(payload.new_name)
# Strip any extension the user provided — we add our own
if new_name.lower().endswith(".wav"):
new_name = new_name[:-4]
if not new_name or new_name.startswith("."):
raise HTTPException(400, "Invalid new name.")
with _INDEX_LOCK:
index = _load_index()
entry = next(
(r for r in index.get("records", []) if r.get("record_name") == payload.record_name),
None,
)
if entry is None:
raise HTTPException(404, f"Record not found: {payload.record_name}")
base = AUDIO_RECORDINGS_DIR.resolve()
for key in ("speaker_recording", "gemini_raw_output"):
try:
old_path = _resolve_path(entry["files"][key]["path"]).resolve()
old_path.relative_to(base) # ensure inside recordings dir
except (KeyError, ValueError):
continue
if not old_path.exists():
continue
suffix = "_raw.wav" if key == "gemini_raw_output" else ".wav"
new_path = safe_path_under(AUDIO_RECORDINGS_DIR, f"{new_name}{suffix}")
if new_path.exists():
raise HTTPException(409, f"File already exists: {new_path.name}")
old_path.rename(new_path)
entry["files"][key]["path"] = new_path.name # basename — portable
entry["files"][key]["name"] = new_path.name
entry["record_name"] = new_name
_save_index(index)
return {"ok": True, "record": entry}
class RecordDelete(BaseModel):
record_name: str
@router.post("/delete")
async def delete_record(payload: RecordDelete):
with _INDEX_LOCK:
index = _load_index()
kept = []
deleted_entry = None
for r in index.get("records", []):
if r.get("record_name") == payload.record_name and deleted_entry is None:
deleted_entry = r
else:
kept.append(r)
if deleted_entry is None:
raise HTTPException(404, f"Record not found: {payload.record_name}")
base = AUDIO_RECORDINGS_DIR.resolve()
deleted_files = []
for fi in deleted_entry.get("files", {}).values():
try:
p = Path(fi.get("path", "")).resolve()
p.relative_to(base) # never delete files outside recordings dir
except (ValueError, OSError):
continue
if p.exists():
p.unlink()
deleted_files.append(str(p))
index["records"] = kept
_save_index(index)
return {"ok": True, "deleted": payload.record_name, "deleted_files": deleted_files}

184
vendor/Sanad/dashboard/routes/replay.py vendored Normal file
View File

@ -0,0 +1,184 @@
"""Replay management endpoints — JSONL files, teaching, test replay, speed control.
Mirrors the replay management features from AI_Photographer/Server/photo_server.py.
"""
from __future__ import annotations
import asyncio
from fastapi import APIRouter, HTTPException, UploadFile, File
from fastapi.responses import FileResponse
from pydantic import BaseModel
from Project.Sanad.config import MOTIONS_DIR
from Project.Sanad.core.logger import get_logger
from Project.Sanad.dashboard.routes._safe_io import (
safe_path_under, check_upload_size, atomic_write_bytes,
)
log = get_logger("replay_route")
router = APIRouter()
def _block_if_movement_armed():
"""409 when locomotion movement is armed — arm motion (replay / teaching) is
mutually exclusive with walking."""
try:
from Project.Sanad.main import loco_controller # type: ignore
armed = loco_controller is not None and loco_controller.is_armed()
except HTTPException:
raise
except Exception:
return
if armed:
raise HTTPException(
409, "Arm actions are disabled while movement is enabled. "
"Disable movement in the Controller tab first.")
# -- models --
class ReplayRequest(BaseModel):
name: str
speed: float = 1.0
class RenameRequest(BaseModel):
old_name: str
new_name: str
class TeachRequest(BaseModel):
name: str
duration_sec: float = 15.0
# -- motion file CRUD --
@router.get("/files")
async def list_motion_files():
from Project.Sanad.main import arm
return {"files": arm.list_motion_files()}
@router.get("/files/{filename}")
async def download_motion_file(filename: str):
path = safe_path_under(MOTIONS_DIR, filename)
if not path.exists():
raise HTTPException(404, "File not found.")
return FileResponse(path, filename=path.name, media_type="application/json")
@router.post("/files/upload")
async def upload_motion_file(file: UploadFile = File(...)):
if not file.filename or not file.filename.lower().endswith(".jsonl"):
raise HTTPException(400, "Only .jsonl files accepted.")
MOTIONS_DIR.mkdir(parents=True, exist_ok=True)
dest = safe_path_under(MOTIONS_DIR, file.filename)
content = await file.read()
check_upload_size(content)
atomic_write_bytes(dest, content)
return {"ok": True, "name": dest.name, "size_bytes": len(content)}
@router.post("/files/rename")
async def rename_motion_file(payload: RenameRequest):
old = safe_path_under(MOTIONS_DIR, payload.old_name)
new = safe_path_under(MOTIONS_DIR, payload.new_name)
if not old.exists():
raise HTTPException(404, f"File not found: {payload.old_name}")
if new.exists():
raise HTTPException(409, f"File already exists: {payload.new_name}")
old.rename(new)
return {"ok": True, "old_name": old.name, "new_name": new.name}
@router.delete("/files/{filename}")
async def delete_motion_file(filename: str):
path = safe_path_under(MOTIONS_DIR, filename)
if not path.exists():
raise HTTPException(404, "File not found.")
path.unlink()
return {"ok": True, "deleted": path.name}
# -- test replay --
_BG_TASKS: set[asyncio.Task] = set()
@router.post("/test")
async def test_replay(payload: ReplayRequest):
"""Test-play a motion file at the given speed."""
from Project.Sanad.main import arm
_block_if_movement_armed()
if arm.is_busy:
raise HTTPException(409, "Arm is busy.")
path = safe_path_under(MOTIONS_DIR, payload.name)
if not path.exists():
raise HTTPException(404, f"Motion file not found: {path.name}")
async def _run():
try:
await asyncio.to_thread(arm.replay_file, str(path), payload.speed)
except Exception:
log.exception("Test replay failed")
task = asyncio.create_task(_run())
_BG_TASKS.add(task)
task.add_done_callback(_BG_TASKS.discard)
return {"ok": True, "name": path.name, "speed": payload.speed}
@router.post("/cancel")
async def cancel_replay():
"""Stop the current replay — the smooth return-to-home runs as the
final phase of the replay itself.
Matches g1_replay_v4_stable.py's behaviour: the play loop breaks on
the cancel flag, then the same Run() function executes its
return-home ramp + DisableSDK. No separate scheduling needed.
"""
from Project.Sanad.main import arm
arm.cancel()
return {"ok": True, "message": "Cancelled — returning to home pose smoothly."}
@router.get("/status")
async def replay_status():
from Project.Sanad.main import arm, teacher
return {
"arm": arm.status(),
"teaching": teacher.status() if teacher else {},
}
# -- teaching mode --
@router.post("/teach/start")
async def start_teaching(payload: TeachRequest):
from Project.Sanad.main import teacher
if teacher is None:
raise HTTPException(503, "Teaching module not available.")
_block_if_movement_armed()
if teacher.is_recording:
raise HTTPException(409, "Teaching session already active.")
existing = MOTIONS_DIR / f"{payload.name}.jsonl"
if existing.exists():
raise HTTPException(409, f"Motion file already exists: {payload.name}.jsonl")
return teacher.start(payload.name, payload.duration_sec)
@router.post("/teach/stop")
async def stop_teaching():
from Project.Sanad.main import teacher
if teacher is None:
raise HTTPException(503, "Teaching module not available.")
return teacher.stop()
@router.get("/teach/status")
async def teaching_status():
from Project.Sanad.main import teacher
if teacher is None:
return {"recording": False, "phase": "idle"}
return teacher.status()

120
vendor/Sanad/dashboard/routes/scripts.py vendored Normal file
View File

@ -0,0 +1,120 @@
"""Script/prompt file management — CRUD for sanad_script.txt, sanad_rule.txt, etc."""
from __future__ import annotations
from datetime import datetime
from pathlib import Path
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
from Project.Sanad.config import SCRIPTS_DIR
from Project.Sanad.dashboard.routes._safe_io import (
atomic_write_text, MAX_UPLOAD_BYTES,
)
router = APIRouter()
MAX_SCRIPT_BYTES = MAX_UPLOAD_BYTES
def _safe_path(name: str) -> Path:
cleaned = name.strip()
if not cleaned or "/" in cleaned or "\\" in cleaned or cleaned in {".", ".."}:
raise HTTPException(400, "Invalid script name.")
path = (SCRIPTS_DIR / cleaned).resolve()
if not str(path).startswith(str(SCRIPTS_DIR.resolve())):
raise HTTPException(400, "Path traversal denied.")
return path
@router.get("/")
async def list_scripts():
SCRIPTS_DIR.mkdir(parents=True, exist_ok=True)
items = []
for p in sorted(SCRIPTS_DIR.iterdir(), key=lambda x: x.name.lower()):
if not p.is_file():
continue
st = p.stat()
items.append({
"name": p.name,
"size_bytes": st.st_size,
"modified_at": datetime.fromtimestamp(st.st_mtime).isoformat(timespec="seconds"),
})
return {"path": str(SCRIPTS_DIR), "files": items}
class ScriptLoad(BaseModel):
name: str
@router.post("/load")
async def load_script(payload: ScriptLoad):
path = _safe_path(payload.name)
if not path.exists():
raise HTTPException(404, f"Script not found: {payload.name}")
content = path.read_text(encoding="utf-8-sig")
st = path.stat()
return {
"name": path.name,
"content": content,
"size_bytes": st.st_size,
"modified_at": datetime.fromtimestamp(st.st_mtime).isoformat(timespec="seconds"),
}
class ScriptSave(BaseModel):
name: str
content: str
@router.post("/save")
async def save_script(payload: ScriptSave):
if len(payload.content.encode("utf-8")) > MAX_SCRIPT_BYTES:
raise HTTPException(413, f"Content too large (max {MAX_SCRIPT_BYTES} bytes).")
path = _safe_path(payload.name)
SCRIPTS_DIR.mkdir(parents=True, exist_ok=True)
atomic_write_text(path, payload.content)
return {"ok": True, "name": path.name, "size_bytes": path.stat().st_size}
class ScriptCreate(BaseModel):
name: str
content: str = ""
@router.post("/create")
async def create_script(payload: ScriptCreate):
if len(payload.content.encode("utf-8")) > MAX_SCRIPT_BYTES:
raise HTTPException(413, f"Content too large (max {MAX_SCRIPT_BYTES} bytes).")
path = _safe_path(payload.name)
if path.exists():
raise HTTPException(409, f"File already exists: {payload.name}")
SCRIPTS_DIR.mkdir(parents=True, exist_ok=True)
atomic_write_text(path, payload.content)
return {"ok": True, "name": path.name}
class ScriptRename(BaseModel):
old_name: str
new_name: str
@router.post("/rename")
async def rename_script(payload: ScriptRename):
old = _safe_path(payload.old_name)
new = _safe_path(payload.new_name)
if not old.exists():
raise HTTPException(404, f"Not found: {payload.old_name}")
if new.exists():
raise HTTPException(409, f"Already exists: {payload.new_name}")
old.rename(new)
return {"ok": True, "old_name": payload.old_name, "new_name": new.name}
class ScriptDelete(BaseModel):
name: str
@router.post("/delete")
async def delete_script(payload: ScriptDelete):
path = _safe_path(payload.name)
if not path.exists():
raise HTTPException(404, f"Not found: {payload.name}")
path.unlink()
return {"ok": True, "deleted": payload.name}

101
vendor/Sanad/dashboard/routes/skills.py vendored Normal file
View File

@ -0,0 +1,101 @@
"""Skill registry CRUD endpoints + skill execution."""
from __future__ import annotations
from fastapi import APIRouter, HTTPException, UploadFile, File
from pydantic import BaseModel
from Project.Sanad.config import AUDIO_RECORDINGS_DIR
from Project.Sanad.dashboard.routes._safe_io import (
safe_path_under, check_upload_size, atomic_write_bytes,
)
router = APIRouter()
class SkillCreate(BaseModel):
id: str = ""
audio_file: str = ""
motion_file: str = ""
callback: str = ""
sync_mode: str = "parallel"
enabled: bool = True
description: str = ""
class SkillUpdate(BaseModel):
audio_file: str | None = None
motion_file: str | None = None
callback: str | None = None
sync_mode: str | None = None
enabled: bool | None = None
description: str | None = None
@router.get("/")
async def list_skills():
from Project.Sanad.main import brain
return {"skills": brain.registry.list_skills()}
@router.get("/{skill_id}")
async def get_skill(skill_id: str):
from Project.Sanad.main import brain
skill = brain.registry.get(skill_id)
if skill is None:
raise HTTPException(404, f"Skill not found: {skill_id}")
return skill.to_dict()
@router.post("/")
async def create_skill(payload: SkillCreate):
from Project.Sanad.main import brain
from Project.Sanad.core.skill_registry import Skill
try:
skill = Skill(**payload.model_dump())
created = brain.registry.add(skill)
except ValueError as exc:
raise HTTPException(400, str(exc))
return {"ok": True, "skill": created.to_dict()}
@router.put("/{skill_id}")
async def update_skill(skill_id: str, payload: SkillUpdate):
from Project.Sanad.main import brain
updates = {k: v for k, v in payload.model_dump().items() if v is not None}
try:
updated = brain.registry.update(skill_id, updates)
except ValueError as exc:
raise HTTPException(400, str(exc))
if updated is None:
raise HTTPException(404, f"Skill not found: {skill_id}")
return {"ok": True, "skill": updated.to_dict()}
@router.delete("/{skill_id}")
async def delete_skill(skill_id: str):
from Project.Sanad.main import brain
deleted = brain.registry.delete(skill_id)
if not deleted:
raise HTTPException(404, f"Skill not found: {skill_id}")
return {"ok": True, "deleted": deleted}
@router.post("/{skill_id}/execute")
async def execute_skill(skill_id: str):
from Project.Sanad.main import brain
result = await brain.execute_skill(skill_id)
return result
@router.post("/upload-audio")
async def upload_audio(file: UploadFile = File(...)):
"""Upload a .wav file for skill binding."""
if not file.filename or not file.filename.lower().endswith(".wav"):
raise HTTPException(400, "Only .wav files are accepted.")
AUDIO_RECORDINGS_DIR.mkdir(parents=True, exist_ok=True)
dest = safe_path_under(AUDIO_RECORDINGS_DIR, file.filename)
content = await file.read()
check_upload_size(content)
atomic_write_bytes(dest, content)
return {"ok": True, "path": str(dest), "size_bytes": len(content)}

133
vendor/Sanad/dashboard/routes/system.py vendored Normal file
View File

@ -0,0 +1,133 @@
"""System information endpoints — network, subsystems, dashboard binding."""
from __future__ import annotations
import asyncio
import os
import platform
import socket
import sys
from typing import Any
from fastapi import APIRouter
from Project.Sanad.config import (
BASE_DIR,
DASHBOARD_HOST,
DASHBOARD_INTERFACE,
DASHBOARD_PORT,
DDS_NETWORK_INTERFACE,
list_network_interfaces,
)
from Project.Sanad.core.logger import get_logger
log = get_logger("system_route")
router = APIRouter()
def _safe_status(component, name: str) -> dict[str, Any]:
if component is None:
return {"available": False}
try:
if hasattr(component, "status") and callable(component.status):
s = component.status()
if not isinstance(s, dict):
s = {"raw": str(s)}
s.setdefault("available", True)
return s
return {"available": True}
except Exception as exc:
log.warning("status() failed for %s: %s", name, exc)
return {"available": True, "error": str(exc)}
@router.get("/info")
async def system_info():
"""One-shot system snapshot for the dashboard system panel."""
def _do():
# Subsystems
try:
from Project.Sanad.main import SUBSYSTEMS
except Exception:
SUBSYSTEMS = {}
subsystem_list = []
for name in sorted(SUBSYSTEMS):
comp = SUBSYSTEMS[name]
entry = {
"name": name,
"connected": comp is not None,
}
if comp is not None and hasattr(comp, "status") and callable(comp.status):
try:
s = comp.status()
if isinstance(s, dict):
entry["status"] = s
except Exception as exc:
entry["status_error"] = str(exc)
subsystem_list.append(entry)
connected_count = sum(1 for s in subsystem_list if s["connected"])
# Audio device current selection (best-effort)
audio_info = {}
try:
from Project.Sanad.voice import audio_devices as ad
audio_info = {
"pactl_available": ad.pactl_available(),
"current": ad.current_selection(),
"detected_profile_ids": [
d["profile"]["id"] for d in ad.detect_plugged_profiles()
] if ad.pactl_available() else [],
}
except Exception as exc:
audio_info = {"error": str(exc)}
# Network interfaces
try:
interfaces = list_network_interfaces()
except Exception:
interfaces = []
# Determine the URL the dashboard is reachable at
bound_host = DASHBOARD_HOST
if bound_host == "0.0.0.0":
# Try to find the wlan0 IP for display purposes
up_ifaces = [i for i in interfaces if i["is_up"] and i["ip"] and not i["ip"].startswith("127.")]
display_host = up_ifaces[0]["ip"] if up_ifaces else bound_host
else:
display_host = bound_host
return {
"host": {
"hostname": socket.gethostname(),
"platform": platform.platform(),
"python": sys.version.split()[0],
"executable": sys.executable,
"base_dir": str(BASE_DIR),
"pid": os.getpid(),
},
"dashboard": {
"interface": DASHBOARD_INTERFACE,
"bound_host": bound_host,
"display_host": display_host,
"port": DASHBOARD_PORT,
"url": f"http://{display_host}:{DASHBOARD_PORT}",
},
"dds": {
"interface": DDS_NETWORK_INTERFACE,
},
"network": {
"interfaces": interfaces,
},
"subsystems": {
"total": len(subsystem_list),
"connected": connected_count,
"disconnected": len(subsystem_list) - connected_count,
"list": subsystem_list,
},
"audio": audio_info,
}
return await asyncio.to_thread(_do)

View File

@ -0,0 +1,67 @@
"""REST endpoints backing the 3D motor-temperature dashboard (N1).
Serves the motor name/mesh mapping + thresholds, and a one-shot temperature
snapshot (the front-end's initial fetch fallback). The live stream is over
/ws/motor-temps (dashboard/websockets/motor_temps.py). The 3D view itself is
the static page at /static/temp3d/index.html.
"""
from __future__ import annotations
import time
from fastapi import APIRouter
from Project.Sanad.dashboard.temp_motor_map import (
MOTOR_NAMES,
MOTOR_TO_MESH,
TEMP_HOT_THRESHOLD,
TEMP_MAX,
TEMP_MIN,
TEMP_WARM_THRESHOLD,
build_payload,
)
router = APIRouter()
def _get_arm():
"""Lazy import — avoids a circular import on dashboard load."""
try:
from Project.Sanad.main import arm # type: ignore
return arm
except Exception:
return None
@router.get("/mapping")
async def motor_mapping():
"""Motor id → name / mesh map + the temperature gradient thresholds."""
return {
"motor_names": MOTOR_NAMES,
"motor_to_mesh": MOTOR_TO_MESH,
"thresholds": {
"min": TEMP_MIN,
"max": TEMP_MAX,
"warm": TEMP_WARM_THRESHOLD,
"hot": TEMP_HOT_THRESHOLD,
},
}
@router.get("/motors")
async def motors_snapshot():
"""One-shot motor temperature + position snapshot (Marcus payload shape)."""
arm = _get_arm()
temps: list = []
positions: list = []
if arm is not None:
try:
temps = arm.get_motor_temps()
except Exception:
temps = []
try:
positions = arm.get_current_q()
except Exception:
positions = []
return build_payload(temps, positions, time.time())

View File

@ -0,0 +1,146 @@
"""Typed Replay dashboard endpoints.
Full CRUD over the records index:
POST /say generate + play + optionally record
POST /replay-last re-play cached audio
POST /save-last persist cached generation
GET /records list
GET /records/{name} get one
POST /records/{name}/play play saved WAV (speaker or raw)
POST /records/{name}/rename rename
DELETE /records/{name} delete
GET /status engine + session state
"""
from __future__ import annotations
import asyncio
from typing import Literal
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
from Project.Sanad.core.config_loader import section as _cfg_section
router = APIRouter()
# MAX_TEXT_LEN — SINGLE SOURCE in dashboard.api_input
MAX_TEXT_LEN = _cfg_section("dashboard", "api_input").get("max_text_len", 2000)
class SayPayload(BaseModel):
text: str
record: bool = False
record_name: str = ""
class SaveLastPayload(BaseModel):
record_name: str = ""
class RenamePayload(BaseModel):
new_name: str
class PlayRecordPayload(BaseModel):
file_kind: Literal["speaker", "raw"] = "speaker"
def _engine():
from Project.Sanad.main import typed_replay
if typed_replay is None:
raise HTTPException(503, "TypedReplayEngine not initialized.")
return typed_replay
# ───────────────────── generate / replay ─────────────────────
@router.post("/say")
async def say(payload: SayPayload):
if not payload.text or not payload.text.strip():
raise HTTPException(400, "text cannot be empty")
if len(payload.text) > MAX_TEXT_LEN:
raise HTTPException(413, f"text too long (max {MAX_TEXT_LEN})")
eng = _engine()
try:
return await eng.say(payload.text, record=payload.record,
record_name=payload.record_name)
except ValueError as exc:
raise HTTPException(400, str(exc))
except RuntimeError as exc:
raise HTTPException(503, str(exc))
@router.post("/replay-last")
async def replay_last():
eng = _engine()
try:
return await asyncio.to_thread(eng.replay_last)
except RuntimeError as exc:
raise HTTPException(400, str(exc))
@router.post("/save-last")
async def save_last(payload: SaveLastPayload):
eng = _engine()
try:
return {"ok": True, "record": eng.save_last(payload.record_name)}
except RuntimeError as exc:
raise HTTPException(400, str(exc))
# ───────────────────── record CRUD ───────────────────────────
@router.get("/records")
async def list_records():
return _engine().list_records()
@router.get("/records/{name}")
async def get_record(name: str):
try:
return _engine().find_record(name)
except KeyError:
raise HTTPException(404, f"record not found: {name}")
@router.post("/records/{name}/play")
async def play_record(name: str, payload: PlayRecordPayload):
eng = _engine()
try:
return await asyncio.to_thread(eng.play_record, name, payload.file_kind)
except KeyError:
raise HTTPException(404, f"record not found: {name}")
except FileNotFoundError as exc:
raise HTTPException(410, f"file missing on disk: {exc}")
except RuntimeError as exc:
raise HTTPException(503, str(exc))
@router.post("/records/{name}/rename")
async def rename_record(name: str, payload: RenamePayload):
eng = _engine()
try:
return {"ok": True, "record": eng.rename_record(name, payload.new_name)}
except KeyError:
raise HTTPException(404, f"record not found: {name}")
except ValueError as exc:
raise HTTPException(400, str(exc))
@router.delete("/records/{name}")
async def delete_record(name: str):
eng = _engine()
try:
return {"ok": True, **eng.delete_record(name)}
except KeyError:
raise HTTPException(404, f"record not found: {name}")
# ───────────────────── status ────────────────────────────────
@router.get("/status")
async def status():
from Project.Sanad.main import typed_replay
if typed_replay is None:
return {"available": False}
return {"available": True, **typed_replay.status()}

237
vendor/Sanad/dashboard/routes/voice.py vendored Normal file
View File

@ -0,0 +1,237 @@
"""Voice endpoints — Gemini interaction, local TTS, prompt management."""
from __future__ import annotations
import asyncio
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
from Project.Sanad.core.config_loader import section as _cfg_section
from Project.Sanad.core.logger import get_logger
log = get_logger("voice_route")
router = APIRouter()
_VR = _cfg_section("dashboard", "voice_route")
_API = _cfg_section("dashboard", "api_input")
# MAX_TEXT_LEN — SINGLE SOURCE in dashboard.api_input
MAX_TEXT_LEN = _API.get("max_text_len", 2000)
_API_KEY_MASK_VISIBLE = _VR.get("api_key_mask_visible", 4)
def _mask_api_key(key: str) -> str:
"""Mask an API key for display — keeps 4 chars on each end.
Examples:
"" ""
"AIza123" "*******" (8 chars = full mask)
"AIzaSy...kqf8" "AIza***...kqf8" (>8 chars = partial mask)
"""
if not key:
return ""
if len(key) <= 8:
return "*" * len(key)
return f"{key[:4]}{'*' * (len(key) - 8)}{key[-4:]}"
class TextPayload(BaseModel):
text: str
engine: str = "gemini" # "gemini" | "local"
@router.get("/status")
async def voice_status():
from Project.Sanad.main import voice_client, local_tts
return {
"gemini": voice_client.status() if voice_client else {},
"local_tts": local_tts.status() if local_tts else {},
}
@router.post("/generate")
async def generate_speech(payload: TextPayload):
"""Generate speech from text using Gemini or local TTS."""
if not payload.text.strip():
raise HTTPException(400, "Text cannot be empty.")
if len(payload.text) > MAX_TEXT_LEN:
raise HTTPException(413, f"Text too long (max {MAX_TEXT_LEN} chars).")
from Project.Sanad.main import voice_client, local_tts, audio_mgr
if payload.engine == "local":
if local_tts is None:
raise HTTPException(503, "Local TTS not available.")
pcm = await asyncio.to_thread(local_tts.synthesize, payload.text)
if audio_mgr:
await asyncio.to_thread(audio_mgr.play_pcm, pcm, 1, 16000, 2)
return {
"ok": True,
"engine": "local",
"duration_sec": round(len(pcm) / (16000 * 2), 3),
}
else:
if voice_client is None:
raise HTTPException(503, "Voice client not initialized.")
if not voice_client.connected:
try:
await voice_client.connect()
except Exception:
log.exception("Gemini reconnect failed in /generate")
raise HTTPException(503, "Gemini not connected and reconnect failed.")
# Check session ownership — TypedReplay or live loop may hold it
if voice_client.session_owner is not None:
raise HTTPException(
409,
f"Voice session busy (owned by {voice_client.session_owner})",
)
try:
audio_bytes, text_parts = await voice_client.send_text(
payload.text, owner="voice_route"
)
except RuntimeError as exc:
raise HTTPException(503, str(exc))
except Exception as exc:
raise HTTPException(502, f"Gemini communication error: {exc}")
if audio_bytes and audio_mgr:
await asyncio.to_thread(audio_mgr.play_pcm, audio_bytes, 1, 24000, 2)
return {
"ok": True,
"engine": "gemini",
"has_audio": bool(audio_bytes),
"text_response": text_parts,
}
@router.post("/connect")
async def connect_gemini():
from Project.Sanad.main import voice_client
if voice_client is None:
raise HTTPException(503, "Voice client not initialized.")
try:
await voice_client.connect()
except Exception as exc:
raise HTTPException(502, f"Gemini connection failed: {exc}")
return {"connected": voice_client.connected}
@router.post("/disconnect")
async def disconnect_gemini():
from Project.Sanad.main import voice_client
if voice_client:
await voice_client.disconnect()
return {"connected": False}
# ─────────────────────── Gemini API key management ───────────────────────
class ApiKeyPayload(BaseModel):
api_key: str
@router.get("/api-key")
async def get_api_key():
"""Return the current Gemini API key in masked form.
Never returns the full key. Response:
{
"has_key": true,
"masked": "AIza***...kqf8",
"length": 39,
"source": "config_file" | "default"
}
"""
import Project.Sanad.config as cfg_mod
key = getattr(cfg_mod, "GEMINI_API_KEY", "") or ""
# Detect where the value came from (persisted override vs module default)
try:
from Project.Sanad.config import load_config
stored = load_config().get("gemini", {}) or {}
source = "config_file" if stored.get("api_key") else "default"
except Exception:
source = "default"
return {
"has_key": bool(key),
"masked": _mask_api_key(key),
"length": len(key),
"source": source,
}
@router.post("/api-key")
async def update_api_key(payload: ApiKeyPayload):
"""Update the Gemini API key — persists to data/motions/config.json and
hot-swaps the in-memory value so the next Gemini connect uses it.
Also disconnects any currently-connected Gemini session so that the
next reconnect picks up the new key cleanly. Returns the NEW masked
key + a flag telling the dashboard to trigger a reconnect.
"""
key = payload.api_key.strip()
if not key:
raise HTTPException(400, "API key cannot be empty.")
if len(key) < 20:
raise HTTPException(400, "API key looks too short.")
if not key.startswith("AIza"):
raise HTTPException(
400,
"Gemini API keys normally start with 'AIza'. "
"Double-check you're pasting a Google AI Studio key.",
)
# Persist to data/motions/config.json (atomic temp-then-replace)
try:
from Project.Sanad.config import load_config, save_config
cfg = load_config() or {}
gemini_cfg = cfg.get("gemini") if isinstance(cfg.get("gemini"), dict) else {}
gemini_cfg["api_key"] = key
cfg["gemini"] = gemini_cfg
save_config(cfg)
except Exception as exc:
log.exception("Failed to persist API key to config.json")
raise HTTPException(500, f"Could not save config: {exc}")
# Hot-swap the in-memory module globals.
# Both Project.Sanad.config AND Project.Sanad.gemini.client
# have their OWN reference to GEMINI_API_KEY (the latter was created
# at `from Project.Sanad.config import GEMINI_API_KEY` at import time).
# Python's `from X import Y` binds a local name — updating config.Y
# alone does NOT propagate to the importer, so we must patch both.
try:
import Project.Sanad.config as _cfg_mod
_cfg_mod.GEMINI_API_KEY = key
except Exception:
log.exception("could not patch config.GEMINI_API_KEY")
try:
import Project.Sanad.gemini.client as _gc
_gc.GEMINI_API_KEY = key
except Exception:
log.exception("could not patch gemini.client.GEMINI_API_KEY")
# Disconnect any live session so reconnect uses the new key.
from Project.Sanad.main import voice_client
was_connected = False
if voice_client is not None:
was_connected = bool(getattr(voice_client, "connected", False))
if was_connected:
try:
await voice_client.disconnect()
except Exception:
log.exception("disconnect during api-key swap failed")
log.info("Gemini API key updated (length=%d) source=config_file", len(key))
return {
"ok": True,
"masked": _mask_api_key(key),
"length": len(key),
"source": "config_file",
"was_connected": was_connected,
"message": (
"API key saved. Click 'Connect' to reopen the Gemini session with "
"the new key. Any running Live Gemini subprocess must be restarted "
"separately (Stop → Start) to pick up the new key."
),
}

View File

@ -0,0 +1,72 @@
"""Wake-phrase CRUD endpoints.
Lets the dashboard edit the wake-phrase action mapping stored in
data/wake_phrases.json.
"""
from __future__ import annotations
from typing import Optional
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
router = APIRouter()
class WakePhrasePayload(BaseModel):
phrase: str
action_id: str
class EnablePayload(BaseModel):
phrase: str
action_id: str
enabled: bool
def _mgr():
from Project.Sanad.main import wake_mgr
if wake_mgr is None:
raise HTTPException(503, "WakePhraseManager not initialized.")
return wake_mgr
@router.get("/")
async def list_phrases():
m = _mgr()
return {
"status": m.status(),
"phrases": m.list(),
}
@router.post("/")
async def add_phrase(payload: WakePhrasePayload):
m = _mgr()
try:
entry = m.add(payload.phrase, payload.action_id)
except ValueError as exc:
raise HTTPException(400, str(exc))
return {"ok": True, "entry": entry}
@router.delete("/")
async def remove_phrase(phrase: str, action_id: Optional[str] = None):
m = _mgr()
removed = m.remove(phrase, action_id)
return {"ok": True, "removed": removed}
@router.post("/enable")
async def set_enabled(payload: EnablePayload):
m = _mgr()
ok = m.set_enabled(payload.phrase, payload.action_id, payload.enabled)
if not ok:
raise HTTPException(404, "phrase+action_id not found")
return {"ok": True}
@router.get("/status")
async def status():
return _mgr().status()

421
vendor/Sanad/dashboard/routes/zones.py vendored Normal file
View File

@ -0,0 +1,421 @@
"""Zones tab — zone → place → linked-faces management + "go here" destination.
Hierarchy (replaces the old flat places):
Zone (name + description)
Place (name + description + optional reference photos + linked face ids)
Routes live under /api/zones. Toggle + CRUD changes write
data/.recognition_state.json (the SAME file faces use); the Gemini child polls
it at 1 Hz and re-primes / announces mid-session. The "go here" endpoints set a
navigation target the robot will head to once N2 locomotion is wired for now
they just record the target and feed Gemini the place's reference.
"""
from __future__ import annotations
import io
from typing import Optional
from fastapi import APIRouter, File, HTTPException, Query, UploadFile
from fastapi.responses import FileResponse, StreamingResponse
from pydantic import BaseModel
from Project.Sanad.config import BASE_DIR
from Project.Sanad.core.logger import get_logger
from Project.Sanad.dashboard.routes._safe_io import check_upload_size
from Project.Sanad.vision import recognition_state
log = get_logger("zones_routes")
router = APIRouter()
STATE_PATH = BASE_DIR / "data" / ".recognition_state.json"
# ── lazy subsystem accessors ────────────────────────────────
def _get_camera():
try:
from Project.Sanad.main import camera # type: ignore
return camera
except Exception:
return None
def _get_zone_gallery():
try:
from Project.Sanad.main import zone_gallery # type: ignore
return zone_gallery
except Exception:
return None
def _get_face_gallery():
try:
from Project.Sanad.main import gallery # type: ignore
return gallery
except Exception:
return None
def _require_zones():
g = _get_zone_gallery()
if g is None:
raise HTTPException(503, "Zone gallery subsystem unavailable.")
return g
def _bump_zones_version() -> int:
cur = recognition_state.read(STATE_PATH)
v = cur.zones_version + 1
recognition_state.mutate(STATE_PATH, zones_version=v)
return v
def _validate_image(content: bytes, filename: str | None = None) -> None:
check_upload_size(content)
if len(content) < 16:
raise HTTPException(400, "Image too small / empty.")
if not (content[:3] == b"\xff\xd8\xff" or content[:8] == b"\x89PNG\r\n\x1a\n"):
raise HTTPException(400, f"Only JPEG/PNG accepted (got {filename or 'unknown'}).")
def _safe_photo_name(name: str) -> None:
if "/" in name or ".." in name or "\x00" in name:
raise HTTPException(400, "Invalid photo name.")
def _resolve_faces(face_ids: list[int]) -> list[dict]:
"""Turn linked face ids into [{id, name}] using the face gallery."""
g = _get_face_gallery()
out = []
for fid in face_ids:
name = None
if g is not None:
try:
e = g.get(fid)
name = e.name if e else None
except Exception:
name = None
out.append({"id": fid, "name": name})
return out
def _place_to_dict(p) -> dict:
d = p.to_dict()
d["faces"] = _resolve_faces(p.face_ids)
return d
def _zone_to_dict(z) -> dict:
return {
"id": z.id, "name": z.name, "description": z.description,
"added_at": z.added_at,
"places": [_place_to_dict(p) for p in z.places],
}
def _nav_target_dict(st, gallery) -> Optional[dict]:
zid, pid = st.nav_target_zone_id, st.nav_target_place_id
if not zid or not pid:
return None
zone_name = place_name = None
if gallery is not None:
try:
z = gallery.get_zone(zid)
zone_name = z.name if z else None
p = gallery.get_place(zid, pid)
place_name = p.name if p else None
except Exception:
pass
return {"zone_id": zid, "place_id": pid,
"zone_name": zone_name, "place_name": place_name}
# ── state + toggle ──────────────────────────────────────────
@router.get("/state")
async def get_state():
st = recognition_state.read(STATE_PATH)
g = _get_zone_gallery()
zones_count = places_count = 0
if g is not None:
try:
zones = g.list_zones()
zones_count = len(zones)
places_count = sum(len(z.places) for z in zones)
except Exception:
pass
return {
"zone_rec_enabled": st.zone_rec_enabled,
"zones_version": st.zones_version,
"zones_count": zones_count,
"places_count": places_count,
"nav_target": _nav_target_dict(st, g),
}
@router.post("/zone-rec")
async def set_zone_rec(on: bool = Query(...)):
"""Enable / disable the robot's knowledge of zones & places (hot)."""
st = recognition_state.mutate(STATE_PATH, zone_rec_enabled=bool(on))
log.info("zone recognition %s", "ON" if on else "OFF")
return {"ok": True, "zone_rec_enabled": st.zone_rec_enabled}
@router.post("/sync")
async def sync_zones():
v = _bump_zones_version()
log.info("zones sync requested → v.%d", v)
return {"ok": True, "zones_version": v}
# ── zones CRUD ──────────────────────────────────────────────
class NamePayload(BaseModel):
name: Optional[str] = None
class DescribePayload(BaseModel):
description: Optional[str] = None
class FacesPayload(BaseModel):
face_ids: list[int] = []
@router.get("")
async def list_zones():
g = _require_zones()
zones = g.list_zones()
return {"zones": [_zone_to_dict(z) for z in zones], "total": len(zones)}
@router.post("/create")
async def create_zone(name: Optional[str] = Query(default=None),
description: Optional[str] = Query(default=None)):
g = _require_zones()
if not (name or "").strip() and not (description or "").strip():
raise HTTPException(400, "A zone needs at least a name or a description.")
z = g.create_zone(name=name, description=description)
_bump_zones_version()
return {"ok": True, "zone": _zone_to_dict(z)}
@router.post("/{zone_id}/rename")
async def rename_zone(zone_id: int, payload: NamePayload):
g = _require_zones()
try:
g.rename_zone(zone_id, payload.name)
except FileNotFoundError as exc:
raise HTTPException(404, str(exc))
_bump_zones_version()
return {"ok": True, "zone": _zone_to_dict(g.get_zone(zone_id))}
@router.post("/{zone_id}/describe")
async def describe_zone(zone_id: int, payload: DescribePayload):
g = _require_zones()
try:
g.describe_zone(zone_id, payload.description)
except FileNotFoundError as exc:
raise HTTPException(404, str(exc))
_bump_zones_version()
return {"ok": True, "zone": _zone_to_dict(g.get_zone(zone_id))}
@router.delete("/{zone_id}")
async def delete_zone(zone_id: int):
g = _require_zones()
try:
g.delete_zone(zone_id)
except FileNotFoundError as exc:
raise HTTPException(404, str(exc))
# If the active destination was inside this zone, clear it.
st = recognition_state.read(STATE_PATH)
if st.nav_target_zone_id == zone_id:
recognition_state.mutate(STATE_PATH, nav_target_zone_id=0, nav_target_place_id=0)
_bump_zones_version()
return {"ok": True, "deleted": zone_id}
# ── places CRUD (within a zone) ─────────────────────────────
@router.post("/{zone_id}/places/create")
async def create_place(
zone_id: int,
name: Optional[str] = Query(default=None),
description: Optional[str] = Query(default=None),
face_ids: list[int] = Query(default=[]),
files: Optional[list[UploadFile]] = File(default=None),
):
g = _require_zones()
if g.get_zone(zone_id) is None:
raise HTTPException(404, f"zone_{zone_id} not found")
if not (name or "").strip() and not (description or "").strip():
raise HTTPException(400, "A place needs at least a name or a description.")
image_bytes: list[bytes] = []
for f in (files or []):
content = await f.read()
if not content:
continue
_validate_image(content, f.filename)
image_bytes.append(content)
p = g.create_place(zone_id, name=name, description=description,
face_ids=face_ids, image_bytes_list=image_bytes or None)
_bump_zones_version()
return {"ok": True, "place": _place_to_dict(p)}
@router.post("/{zone_id}/places/{place_id}/rename")
async def rename_place(zone_id: int, place_id: int, payload: NamePayload):
g = _require_zones()
try:
g.rename_place(zone_id, place_id, payload.name)
except FileNotFoundError as exc:
raise HTTPException(404, str(exc))
_bump_zones_version()
return {"ok": True, "place": _place_to_dict(g.get_place(zone_id, place_id))}
@router.post("/{zone_id}/places/{place_id}/describe")
async def describe_place(zone_id: int, place_id: int, payload: DescribePayload):
g = _require_zones()
try:
g.describe_place(zone_id, place_id, payload.description)
except FileNotFoundError as exc:
raise HTTPException(404, str(exc))
_bump_zones_version()
return {"ok": True, "place": _place_to_dict(g.get_place(zone_id, place_id))}
@router.post("/{zone_id}/places/{place_id}/faces")
async def set_place_faces(zone_id: int, place_id: int, payload: FacesPayload):
"""Replace the set of saved faces linked to this place."""
g = _require_zones()
try:
g.set_place_faces(zone_id, place_id, payload.face_ids)
except FileNotFoundError as exc:
raise HTTPException(404, str(exc))
_bump_zones_version()
return {"ok": True, "place": _place_to_dict(g.get_place(zone_id, place_id))}
@router.post("/{zone_id}/places/{place_id}/capture")
async def capture_to_place(zone_id: int, place_id: int):
g = _require_zones()
cam = _get_camera()
if cam is None or not cam.is_running():
raise HTTPException(409, "Camera is not running. Toggle Vision ON first.")
jpeg = cam.get_fresh_frame(max_age_s=0.5, timeout_s=1.5)
if not jpeg:
raise HTTPException(409, "Camera has no frame yet.")
try:
fname = g.add_photo(zone_id, place_id, jpeg)
except FileNotFoundError as exc:
raise HTTPException(404, str(exc))
_bump_zones_version()
return {"ok": True, "added": fname, "place": _place_to_dict(g.get_place(zone_id, place_id))}
@router.post("/{zone_id}/places/{place_id}/upload")
async def upload_to_place(zone_id: int, place_id: int,
files: list[UploadFile] = File(...)):
g = _require_zones()
if g.get_place(zone_id, place_id) is None:
raise HTTPException(404, f"zone_{zone_id}/place_{place_id} not found")
added: list[str] = []
for f in files:
content = await f.read()
_validate_image(content, f.filename)
try:
added.append(g.add_photo(zone_id, place_id, content))
except FileNotFoundError as exc:
raise HTTPException(404, str(exc))
_bump_zones_version()
return {"ok": True, "added": added, "place": _place_to_dict(g.get_place(zone_id, place_id))}
@router.delete("/{zone_id}/places/{place_id}")
async def delete_place(zone_id: int, place_id: int):
g = _require_zones()
try:
g.delete_place(zone_id, place_id)
except FileNotFoundError as exc:
raise HTTPException(404, str(exc))
st = recognition_state.read(STATE_PATH)
if st.nav_target_zone_id == zone_id and st.nav_target_place_id == place_id:
recognition_state.mutate(STATE_PATH, nav_target_zone_id=0, nav_target_place_id=0)
_bump_zones_version()
return {"ok": True, "deleted": place_id}
@router.delete("/{zone_id}/places/{place_id}/photo/{photo_name}")
async def delete_place_photo(zone_id: int, place_id: int, photo_name: str):
g = _require_zones()
_safe_photo_name(photo_name)
try:
g.delete_photo(zone_id, place_id, photo_name)
except FileNotFoundError as exc:
raise HTTPException(404, str(exc))
_bump_zones_version()
return {"ok": True, "deleted": photo_name}
@router.get("/{zone_id}/places/{place_id}/photo/{photo_name}")
async def get_place_photo(zone_id: int, place_id: int, photo_name: str,
download: int = Query(default=0)):
g = _require_zones()
_safe_photo_name(photo_name)
path = g.get_photo(zone_id, place_id, photo_name)
if path is None:
raise HTTPException(404, "Photo not found.")
media = "image/png" if path.suffix.lower() == ".png" else "image/jpeg"
headers = {}
if download:
headers["Content-Disposition"] = (
f'attachment; filename="zone_{zone_id}_place_{place_id}_{photo_name}"')
return FileResponse(path, media_type=media, headers=headers)
@router.get("/{zone_id}/places/{place_id}/download.zip")
async def download_place_zip(zone_id: int, place_id: int):
g = _require_zones()
try:
data = g.zip_place(zone_id, place_id)
except FileNotFoundError as exc:
raise HTTPException(404, str(exc))
return StreamingResponse(
io.BytesIO(data), media_type="application/zip",
headers={
"Content-Disposition": f'attachment; filename="zone_{zone_id}_place_{place_id}.zip"',
"Content-Length": str(len(data)),
},
)
# ── "go here" navigation target ─────────────────────────────
@router.post("/{zone_id}/places/{place_id}/go")
async def go_to_place(zone_id: int, place_id: int):
"""Set this place as the active destination. Records the target and lets
the Gemini child pick it up (reference photo + goal). Actual robot motion
is wired by N2 locomotion until then this just establishes the goal."""
g = _require_zones()
p = g.get_place(zone_id, place_id)
if p is None:
raise HTTPException(404, f"zone_{zone_id}/place_{place_id} not found")
recognition_state.mutate(STATE_PATH,
nav_target_zone_id=zone_id,
nav_target_place_id=place_id)
log.info("nav target set → zone_%d/place_%d (%s)", zone_id, place_id,
p.name or "(unnamed)")
return {"ok": True, "nav_target": {"zone_id": zone_id, "place_id": place_id,
"place_name": p.name}}
@router.post("/nav/clear")
async def clear_nav_target():
recognition_state.mutate(STATE_PATH, nav_target_zone_id=0, nav_target_place_id=0)
log.info("nav target cleared")
return {"ok": True, "nav_target": None}

2347
vendor/Sanad/dashboard/static/index.html vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,90 @@
"""G1 29-DoF motor → name / mesh mapping for the 3D temperature dashboard.
Ported verbatim from Marcus/Features/TempMonitor/config_g1.py so the copied
three.js front-end (static/temp3d/index.html) binds temperature colours to the
correct STL meshes. `build_payload()` turns the arm controller's raw lowstate
snapshot into the exact 'motor_update' payload shape that front-end expects.
"""
from __future__ import annotations
from typing import Any, Optional
# Motor ID → human name (29 motors = 29 DOF)
MOTOR_NAMES: dict[int, str] = {
0: "Left Hip Pitch", 1: "Left Hip Roll", 2: "Left Hip Yaw", 3: "Left Knee",
4: "Left Ankle Pitch", 5: "Left Ankle Roll",
6: "Right Hip Pitch", 7: "Right Hip Roll", 8: "Right Hip Yaw", 9: "Right Knee",
10: "Right Ankle Pitch", 11: "Right Ankle Roll",
12: "Waist Yaw", 13: "Waist Roll", 14: "Waist Pitch",
15: "Left Shoulder Pitch", 16: "Left Shoulder Roll", 17: "Left Shoulder Yaw",
18: "Left Elbow", 19: "Left Wrist Roll", 20: "Left Wrist Pitch", 21: "Left Wrist Yaw",
22: "Right Shoulder Pitch", 23: "Right Shoulder Roll", 24: "Right Shoulder Yaw",
25: "Right Elbow", 26: "Right Wrist Roll", 27: "Right Wrist Pitch", 28: "Right Wrist Yaw",
}
# Motor ID → URDF link / STL mesh name
MOTOR_TO_MESH: dict[int, str] = {
0: "left_hip_pitch_link", 1: "left_hip_roll_link", 2: "left_hip_yaw_link",
3: "left_knee_link", 4: "left_ankle_pitch_link", 5: "left_ankle_roll_link",
6: "right_hip_pitch_link", 7: "right_hip_roll_link", 8: "right_hip_yaw_link",
9: "right_knee_link", 10: "right_ankle_pitch_link", 11: "right_ankle_roll_link",
12: "waist_yaw_link", 13: "waist_roll_link", 14: "torso_link",
15: "left_shoulder_pitch_link", 16: "left_shoulder_roll_link", 17: "left_shoulder_yaw_link",
18: "left_elbow_link", 19: "left_wrist_roll_link", 20: "left_wrist_pitch_link",
21: "left_wrist_yaw_link",
22: "right_shoulder_pitch_link", 23: "right_shoulder_roll_link", 24: "right_shoulder_yaw_link",
25: "right_elbow_link", 26: "right_wrist_roll_link", 27: "right_wrist_pitch_link",
28: "right_wrist_yaw_link",
}
# Temperature thresholds (°C) — the three.js gradient maps MIN→MAX (blue→red).
TEMP_MIN = 30
TEMP_MAX = 120
TEMP_WARM_THRESHOLD = 45
TEMP_HOT_THRESHOLD = 60
def _coerce(v: Optional[int]) -> float:
"""Temperatures default to 0 when the firmware didn't report one, so the
front-end's Math.max / .toFixed never sees null/NaN."""
return float(v) if v is not None else 0.0
def build_payload(temps: list[dict[str, Any]],
positions: list[float],
timestamp: float) -> dict[str, Any]:
"""Build the Marcus-compatible 'motor_update' payload.
`temps` arm.get_motor_temps(): [{motor_id, surface, winding}]
`positions` arm.get_current_q(): joint angles indexed by motor id
"""
temperatures: list[dict[str, Any]] = []
for t in temps or []:
i = t.get("motor_id")
surface = t.get("surface")
winding = t.get("winding")
if surface is not None and winding is not None:
avg = (_coerce(surface) + _coerce(winding)) / 2.0
else:
avg = _coerce(surface if surface is not None else winding)
entry: dict[str, Any] = {
"motor_id": i,
"motor_name": MOTOR_NAMES.get(i, f"Motor {i}"),
"mesh_name": MOTOR_TO_MESH.get(i, ""),
"surface": _coerce(surface),
"winding": _coerce(winding),
"temp1": _coerce(surface),
"temp2": _coerce(winding),
"avg": avg,
}
if positions and isinstance(i, int) and i < len(positions):
entry["position"] = float(positions[i])
temperatures.append(entry)
pos_list: list[dict[str, Any]] = [
{"motor_id": i, "position": float(q), "link_name": MOTOR_TO_MESH.get(i)}
for i, q in enumerate(positions or [])
]
return {"temperatures": temperatures, "positions": pos_list,
"timestamp": timestamp}

View File

View File

@ -0,0 +1,80 @@
"""WebSocket endpoint for real-time log streaming.
Clients connect to /ws/logs and receive live log lines from all modules.
"""
from __future__ import annotations
import asyncio
import threading
from collections import deque
from fastapi import APIRouter, WebSocket, WebSocketDisconnect
from Project.Sanad.core.logger import set_ws_push
router = APIRouter()
MAX_WATCHERS = 50
# Ring buffer of recent log lines (shared across connections).
_recent: deque[str] = deque(maxlen=500)
_watchers: set[asyncio.Queue] = set()
_watchers_lock = threading.Lock()
def push_log_line(line: str):
"""Called from the logging system to feed new lines.
May be called from any thread (logging is multi-threaded), so we
snapshot the watchers under a lock before iterating.
"""
_recent.append(line)
with _watchers_lock:
snapshot = list(_watchers)
for q in snapshot:
try:
q.put_nowait(line)
except asyncio.QueueFull:
# Drop on overflow rather than block — logs are not critical data
pass
# Register with the logger so all log records are pushed to WS clients.
# Wrap so a logger registration failure doesn't break Dashboard import.
try:
set_ws_push(push_log_line)
except Exception:
pass
@router.websocket("/ws/logs")
async def log_ws(ws: WebSocket):
await ws.accept()
with _watchers_lock:
if len(_watchers) >= MAX_WATCHERS:
await ws.close(code=1013, reason="Too many log watchers")
return
queue: asyncio.Queue[str] = asyncio.Queue(maxsize=200)
_watchers.add(queue)
try:
# Send recent history
for line in list(_recent):
await ws.send_text(line)
while True:
line = await queue.get()
await ws.send_text(line)
except WebSocketDisconnect:
pass
except Exception:
# Any other error closes the connection cleanly
try:
await ws.close()
except Exception:
pass
finally:
with _watchers_lock:
_watchers.discard(queue)

View File

@ -0,0 +1,81 @@
"""WebSocket endpoint streaming G1 motor temperatures to the 3D dashboard (N1).
Polls the arm controller's throttled rt/lowstate snapshot (arm.get_motor_temps
/ arm.get_current_q NO second DDS subscriber, no second ChannelFactoryInitialize)
and pushes a Marcus-compatible 'motor_update' payload to each connected client.
Front-end: dashboard/static/temp3d/index.html (ported three.js view), which
opens this socket via a tiny shim in place of socket.io.
"""
from __future__ import annotations
import asyncio
import threading
import time
from fastapi import APIRouter, WebSocket, WebSocketDisconnect
from Project.Sanad.core.logger import get_logger
from Project.Sanad.dashboard.temp_motor_map import build_payload
log = get_logger("motor_temps_ws")
router = APIRouter()
MAX_WATCHERS = 20
PUSH_HZ = 8.0 # ~8 fps is plenty for a temperature heatmap
_count = 0
_count_lock = threading.Lock()
def _get_arm():
"""Lazy import — avoids a circular import on dashboard load."""
try:
from Project.Sanad.main import arm # type: ignore
return arm
except Exception:
return None
@router.websocket("/ws/motor-temps")
async def motor_temps_ws(ws: WebSocket):
await ws.accept()
global _count
with _count_lock:
if _count >= MAX_WATCHERS:
await ws.close(code=1013, reason="Too many temperature watchers")
return
_count += 1
period = 1.0 / PUSH_HZ
try:
while True:
arm = _get_arm()
temps: list = []
positions: list = []
if arm is not None:
try:
temps = arm.get_motor_temps()
except Exception:
temps = []
try:
positions = arm.get_current_q()
except Exception:
positions = []
payload = build_payload(temps, positions, time.time())
await ws.send_json(payload)
await asyncio.sleep(period)
except WebSocketDisconnect:
pass
except Exception:
# Any other error (client gone mid-send, serialise issue) closes cleanly.
try:
await ws.close()
except Exception:
pass
finally:
with _count_lock:
_count -= 1

View File

@ -0,0 +1,323 @@
"""WebSocket → PTY bridge for the dashboard's Terminal tab.
Spawns a shell (bash by default) inside a pseudo-terminal on the robot and
relays stdin/stdout to a browser xterm.js instance over WebSocket. From the
operator's seat this is functionally identical to an in-browser
`ssh unitree@<robot>` except no SSH handshake is needed because the
dashboard process already runs as unitree on the robot. The Terminal tab
connects to ws://<dashboard>/ws/terminal and you land in unitree's shell
directly.
PROTOCOL text frames only. Control vs. keystrokes are disambiguated by
the leading byte:
client server:
"\\x1f" + json-encoded control object (init / resize)
e.g. "\\x1f{\\"type\\":\\"init\\",\\"cols\\":80,\\"rows\\":24}"
<any other text> keystrokes written to PTY
server client:
<text> PTY stdout/stderr chunks
The \\x1f prefix (ASCII Unit Separator) is the disambiguator. If we just
JSON-sniffed every message, a user pasting `{"type":"resize",...}` into
their shell would silently resize the PTY instead of pasting the text.
SECURITY NOTE: anyone who can reach the dashboard URL gets shell access
as the unitree user. The dashboard already exposes equally-powerful
endpoints (E-STOP, motion replay, audio mute, etc.) so this isn't a new
threat class but it IS a single-bullet kill switch for the robot. Bind
the dashboard to a trusted network only.
"""
from __future__ import annotations
import asyncio
import fcntl
import json
import os
import pty
import select
import shutil
import signal
import struct
import termios
import threading
from fastapi import APIRouter, WebSocket, WebSocketDisconnect
from Project.Sanad.core.logger import get_logger
log = get_logger("terminal_ws")
router = APIRouter()
# Magic prefix that distinguishes control messages from raw keystrokes.
# ASCII 0x1F (Unit Separator) — not produced by normal keyboard input,
# so user-pasted JSON can never spoof a control frame.
_CTRL_PREFIX = "\x1f"
# Concurrent-session cap so a runaway tab can't spawn 50 bashes on the robot.
_MAX_SESSIONS = 4
_active: set[int] = set()
_active_lock = threading.Lock()
# Bounded queue depth between the PTY reader thread and the WS sender.
# A chatty shell command (e.g. `yes`, `cat /dev/urandom`) at gigabytes/sec
# would otherwise pile up unbounded asyncio tasks + string refs. Past the
# cap we drop chunks and surface a single drop notice — ANSI may corrupt
# briefly but the session stays alive.
_SEND_QUEUE_MAX = 64
def _resolve_shell() -> list[str]:
"""Pick a sensible shell. SHELL env first, then /bin/bash, then sh."""
sh = os.environ.get("SHELL", "")
if sh and shutil.which(sh):
return [sh, "-i"]
if shutil.which("/bin/bash"):
return ["/bin/bash", "-i"]
return ["/bin/sh", "-i"]
def _set_pty_size(fd: int, cols: int, rows: int) -> None:
"""Inform the PTY of its new window size so curses-style apps (htop,
less, vim) lay out correctly."""
try:
# TIOCSWINSZ payload: rows, cols, xpixel, ypixel (xpixel/ypixel
# unused, kept 0).
fcntl.ioctl(fd, termios.TIOCSWINSZ,
struct.pack("HHHH", rows, cols, 0, 0))
except Exception as exc:
log.debug("TIOCSWINSZ failed (cols=%s rows=%s): %s", cols, rows, exc)
async def _reap_child(pid: int) -> None:
"""SIGHUP → wait briefly → SIGKILL → wait briefly → giveup.
Earlier version SIGKILLed unconditionally because the WNOHANG check
happened immediately after SIGHUP (which never returns true that fast).
Now we poll for up to ~1.5s after SIGHUP before escalating.
"""
async def _wait_exit(timeout_s: float, interval_s: float = 0.1) -> bool:
end = asyncio.get_running_loop().time() + timeout_s
while asyncio.get_running_loop().time() < end:
try:
done_pid, _ = os.waitpid(pid, os.WNOHANG)
except ChildProcessError:
return True # already reaped
except OSError:
return False
if done_pid:
return True
await asyncio.sleep(interval_s)
return False
# 1. Polite request
try:
os.kill(pid, signal.SIGHUP)
except ProcessLookupError:
return
except OSError as exc:
log.debug("SIGHUP pid=%d: %s", pid, exc)
return
if await _wait_exit(1.5):
return
# 2. Force
try:
os.kill(pid, signal.SIGKILL)
except ProcessLookupError:
return
except OSError as exc:
log.debug("SIGKILL pid=%d: %s", pid, exc)
return
if not await _wait_exit(1.0):
log.warning("terminal child pid=%d failed to exit after SIGKILL", pid)
@router.websocket("/ws/terminal")
async def terminal_ws(ws: WebSocket) -> None:
"""Bridge a browser xterm.js to a shell PTY on the robot."""
await ws.accept()
# Concurrent-session guard.
with _active_lock:
if len(_active) >= _MAX_SESSIONS:
await ws.send_text(
f"\r\n[terminal] Refused — already have {_MAX_SESSIONS} "
f"open sessions. Close another tab and reconnect.\r\n"
)
await ws.close(code=1008)
return
# Fork + exec the shell. Parent gets the master fd; child becomes the
# shell with stdin/stdout/stderr wired to the slave end.
cmd = _resolve_shell()
try:
pid, fd = pty.fork()
except OSError as exc:
log.error("pty.fork failed: %s", exc)
await ws.send_text(f"\r\n[terminal] pty.fork failed: {exc}\r\n")
await ws.close(code=1011)
return
if pid == 0:
# CHILD — set env so the shell is interactive and looks right.
os.environ.setdefault("TERM", "xterm-256color")
os.environ.setdefault("LANG", os.environ.get("LANG", "en_US.UTF-8"))
try:
os.execvp(cmd[0], cmd)
except OSError as exc:
# exec failed — printing to fd 2 reaches the parent via the
# PTY so the browser sees the error before we _exit.
os.write(2, f"[terminal] exec failed: {exc}\n".encode())
os._exit(127)
return # unreachable in child
# PARENT
with _active_lock:
_active.add(pid)
log.info("terminal session started pid=%d cmd=%s", pid, cmd[0])
loop = asyncio.get_running_loop()
closed = asyncio.Event()
# Bounded queue + dedicated sender task = backpressure. If the queue
# fills up we drop the chunk and bump _dropped so we can surface a
# short notice in the stream.
send_q: asyncio.Queue[str] = asyncio.Queue(maxsize=_SEND_QUEUE_MAX)
dropped = 0
def _reader_thread() -> None:
"""Drain PTY master fd → queue. Runs in a daemon thread because
select.select on a pipe blocks; asyncio has no portable
equivalent for arbitrary fds on Windows (and we want one code
path)."""
nonlocal dropped
try:
while not closed.is_set():
try:
r, _, _ = select.select([fd], [], [], 0.1)
except (OSError, ValueError):
break
if not r:
continue
try:
data = os.read(fd, 4096)
except OSError:
break
if not data: # EOF — child exited / PTY closed
break
try:
text = data.decode("utf-8", errors="replace")
except Exception:
continue
# put_nowait raises on full — we drop and count.
try:
loop.call_soon_threadsafe(_enqueue, text)
except RuntimeError:
# loop closed — bail
break
finally:
loop.call_soon_threadsafe(closed.set)
def _enqueue(text: str) -> None:
nonlocal dropped
try:
send_q.put_nowait(text)
except asyncio.QueueFull:
dropped += 1
async def _sender_task() -> None:
"""Drains send_q → WebSocket. Single producer, single consumer
means no extra locking needed. Backoff on send failure and let
the closed flag end the session."""
nonlocal dropped
while not closed.is_set():
try:
text = await asyncio.wait_for(send_q.get(), timeout=0.5)
except asyncio.TimeoutError:
continue
try:
await ws.send_text(text)
except Exception as exc:
log.info("terminal ws.send failed (likely client gone): %s", exc)
closed.set()
return
# If we dropped chunks since the last successful send, tell
# the user once so the ANSI corruption isn't mysterious.
if dropped:
d = dropped
dropped = 0
try:
await ws.send_text(
f"\r\n\x1b[2m[term: dropped {d} chunk(s) — slow client]"
f"\x1b[0m\r\n",
)
except Exception:
closed.set()
return
reader = threading.Thread(target=_reader_thread, daemon=True,
name=f"terminal-rx-{pid}")
reader.start()
sender = asyncio.create_task(_sender_task())
# Initial sizing — xterm.js will send a {type:"init",...} control
# frame right after onopen with the actual window size.
_set_pty_size(fd, 80, 24)
try:
while not closed.is_set():
try:
msg = await asyncio.wait_for(ws.receive_text(), timeout=0.5)
except asyncio.TimeoutError:
continue
except WebSocketDisconnect:
break
if not msg:
continue
# Control frame? Must start with the magic prefix. User-typed
# / pasted text can never spoof this — \x1f isn't producible
# by normal keyboard input.
if msg[:1] == _CTRL_PREFIX:
try:
ctrl = json.loads(msg[1:])
except (json.JSONDecodeError, ValueError):
ctrl = None
if isinstance(ctrl, dict) and ctrl.get("type") in ("init", "resize"):
cols = int(ctrl.get("cols") or 80)
rows = int(ctrl.get("rows") or 24)
_set_pty_size(fd, cols, rows)
# Either way, control frames are NEVER forwarded to PTY.
continue
# Plain keystrokes — write to PTY master.
try:
os.write(fd, msg.encode("utf-8", errors="replace"))
except OSError as exc:
log.info("terminal pty write failed (child likely exited): %s", exc)
break
finally:
closed.set()
try:
sender.cancel()
except Exception:
pass
try:
await _reap_child(pid)
except Exception as exc:
log.debug("reap_child pid=%d: %s", pid, exc)
try:
os.close(fd)
except OSError:
pass
with _active_lock:
_active.discard(pid)
log.info("terminal session ended pid=%d", pid)
try:
await ws.close()
except Exception:
pass

0
vendor/Sanad/data/audio/.gitkeep vendored Normal file
View File

1
vendor/Sanad/data/audio_device.json vendored Normal file
View File

@ -0,0 +1 @@
{}

5
vendor/Sanad/data/camera_device.json vendored Normal file
View File

@ -0,0 +1,5 @@
{
"profile_serial_assignments": {
"realsense_primary": ""
}
}

0
vendor/Sanad/data/faces/.gitkeep vendored Normal file
View File

21
vendor/Sanad/data/motions/config.json vendored Normal file
View File

@ -0,0 +1,21 @@
{
"gemini": {
"api_key": "",
"model": "models/gemini-2.5-flash-native-audio-preview-12-2025",
"voice_name": "Charon"
},
"audio": {
"send_sample_rate": 16000,
"receive_sample_rate": 24000,
"chunk_size": 512,
"g1_volume": 100
},
"motion": {
"action_cooldown_sec": 1.0,
"replay_hz": 60.0
},
"dashboard": {
"host": "0.0.0.0",
"port": 8000
}
}

0
vendor/Sanad/data/photos/.gitkeep vendored Normal file
View File

0
vendor/Sanad/data/recordings/.gitkeep vendored Normal file
View File

136
vendor/Sanad/examples/voice_example.py vendored Normal file
View File

@ -0,0 +1,136 @@
#!/usr/bin/env python3
"""voice_example.py — demos for each voice subsystem in isolation.
Each subcommand exercises one component so you can debug pieces without
running the full Sanad stack.
Usage:
python3 voice_example.py gemini "hello" # one-shot Gemini text→audio
python3 voice_example.py local_tts "hello" # local Coqui TTS
python3 voice_example.py typed_replay "hello" # typed replay engine
python3 voice_example.py live # spawn GeminiSubprocess
python3 voice_example.py status # show status of all subsystems
Assumes Project.Sanad is importable (run from repo root or with PYTHONPATH set).
"""
from __future__ import annotations
import argparse
import asyncio
import sys
def _demo_gemini(text: str) -> None:
"""One-shot: connect Gemini, send text, play reply."""
from Project.Sanad.gemini.client import GeminiVoiceClient
from Project.Sanad.voice.audio_manager import AudioManager
async def run():
client = GeminiVoiceClient()
audio = AudioManager()
await client.connect()
try:
audio_bytes, text_parts = await client.send_text(text, owner="example")
print(f"[gemini] got {len(audio_bytes)} bytes audio, text={text_parts}")
if audio_bytes:
await asyncio.to_thread(audio.play_pcm, audio_bytes, 1, 24000, 2)
finally:
await client.disconnect()
asyncio.run(run())
def _demo_local_tts(text: str) -> None:
"""Synthesize with local Coqui TTS and play."""
from Project.Sanad.voice.local_tts import LocalTTSEngine
from Project.Sanad.voice.audio_manager import AudioManager
tts = LocalTTSEngine()
audio = AudioManager()
pcm = tts.synthesize(text)
print(f"[local_tts] generated {len(pcm)} bytes")
audio.play_pcm(pcm, 1, 16000, 2)
def _demo_typed_replay(text: str) -> None:
"""Exercise the TypedReplayEngine end-to-end."""
from Project.Sanad.gemini.client import GeminiVoiceClient
from Project.Sanad.voice.audio_manager import AudioManager
from Project.Sanad.voice.typed_replay import TypedReplayEngine
async def run():
client = GeminiVoiceClient()
await client.connect()
audio = AudioManager()
engine = TypedReplayEngine(client, audio)
result = await engine.say(text)
print(f"[typed_replay] {result}")
await client.disconnect()
asyncio.run(run())
def _demo_live() -> None:
"""Spawn the live voice subprocess — same as dashboard /api/live-subprocess."""
from Project.Sanad.gemini.subprocess import GeminiSubprocess
mgr = GeminiSubprocess()
info = mgr.start()
print(f"[live] {info}")
print("Running. Ctrl+C to stop.")
try:
while True:
import time
time.sleep(1)
except KeyboardInterrupt:
print("\n[live] stopping...")
print(mgr.stop())
def _demo_status() -> None:
"""Print status of all voice subsystems."""
from Project.Sanad.gemini.client import GeminiVoiceClient
try:
from Project.Sanad.voice.local_tts import LocalTTSEngine
except Exception:
LocalTTSEngine = None
client = GeminiVoiceClient()
print("[gemini]", client.status())
if LocalTTSEngine:
try:
tts = LocalTTSEngine()
print("[local_tts]", tts.status())
except Exception as exc:
print(f"[local_tts] unavailable: {exc}")
else:
print("[local_tts] not installed")
def main():
ap = argparse.ArgumentParser(description=__doc__)
sub = ap.add_subparsers(dest="cmd", required=True)
for name in ("gemini", "local_tts", "typed_replay"):
sp = sub.add_parser(name, help=f"demo {name}")
sp.add_argument("text", help="text to speak")
sub.add_parser("live", help="spawn live voice subprocess")
sub.add_parser("status", help="print subsystem status")
args = ap.parse_args()
if args.cmd == "gemini":
_demo_gemini(args.text)
elif args.cmd == "local_tts":
_demo_local_tts(args.text)
elif args.cmd == "typed_replay":
_demo_typed_replay(args.text)
elif args.cmd == "live":
_demo_live()
elif args.cmd == "status":
_demo_status()
if __name__ == "__main__":
main()

0
vendor/Sanad/gemini/__init__.py vendored Normal file
View File

341
vendor/Sanad/gemini/client.py vendored Normal file
View File

@ -0,0 +1,341 @@
"""Gemini WebSocket client for real-time voice interaction.
Provides:
- Bidirectional audio streaming (mic Gemini speaker)
- Text-to-speech via typed input
- Voice-command detection through transcription parsing
- System instruction injection for persona control
"""
from __future__ import annotations
import asyncio
import base64
import inspect
import json
from typing import Any
import websockets
from Project.Sanad.config import (
GEMINI_API_KEY,
GEMINI_MODEL,
GEMINI_VOICE,
GEMINI_WS_TIMEOUT,
GEMINI_WS_URI,
)
from Project.Sanad.core.config_loader import section as _cfg_section
from Project.Sanad.core.event_bus import bus
from Project.Sanad.core.logger import get_logger
log = get_logger("gemini_client")
_GC = _cfg_section("gemini", "client")
# Default system prompt — SINGLE SOURCE in core.gemini_defaults
_DEFAULT_SYSTEM_PROMPT = _cfg_section("core", "gemini_defaults").get(
"default_system_prompt",
"You are Sanad (Bousandah), a wise and friendly Emirati assistant. "
"Speak in UAE dialect (Khaleeji). Be helpful and concise."
)
_RECV_TIMEOUT_SEC = _GC.get("recv_timeout_sec", 30)
_RECONNECT_MAX_ATTEMPTS = _GC.get("reconnect_max_attempts", 3)
_RECONNECT_INITIAL_DELAY_SEC = _GC.get("reconnect_initial_delay_sec", 1.0)
_RECONNECT_MAX_DELAY_SEC = _GC.get("reconnect_max_delay_sec", 10.0)
class GeminiVoiceClient:
"""Manages one WebSocket session to the Gemini Bidi audio API.
Concurrency model:
- `_send_lock` serializes ALL websocket writes.
- `_session_lock` ensures only one consumer (live loop OR typed replay)
owns the receive stream at a time. Acquired by send_text and
receive_stream context managers.
- `_owner` records who currently holds the session lock for diagnostics.
"""
def __init__(self, system_prompt: str = ""):
self.system_prompt = system_prompt or _DEFAULT_SYSTEM_PROMPT
self._ws: Any = None
self._connected = False
self._send_lock = asyncio.Lock()
self._session_lock = asyncio.Lock()
self._connect_lock = asyncio.Lock() # serializes reconnect attempts
self._owner: str | None = None
self._reconnect_attempts = 0
@property
def connected(self) -> bool:
return self._connected
@property
def session_owner(self) -> str | None:
return self._owner
def _ws_kwargs(self) -> dict[str, Any]:
kwargs: dict[str, Any] = {"max_size": None, "open_timeout": 30}
try:
sig = inspect.signature(websockets.connect)
key = "extra_headers" if "extra_headers" in sig.parameters else "additional_headers"
except Exception:
key = "extra_headers"
kwargs[key] = {"Content-Type": "application/json"}
return kwargs
async def connect(self):
uri = f"{GEMINI_WS_URI}?key={GEMINI_API_KEY}"
try:
self._ws = await websockets.connect(uri, **self._ws_kwargs())
setup = {
"setup": {
"model": GEMINI_MODEL,
"generationConfig": {
"responseModalities": ["AUDIO"],
"speechConfig": {
"voiceConfig": {
"prebuiltVoiceConfig": {"voiceName": GEMINI_VOICE}
}
},
},
"systemInstruction": {"parts": [{"text": self.system_prompt}]},
}
}
await self._ws.send(json.dumps(setup))
await self._ws.recv() # ACK
self._connected = True
self._reconnect_attempts = 0
log.info("Connected to Gemini (%s)", GEMINI_MODEL)
await bus.emit("voice.connected")
except Exception:
self._connected = False
self._ws = None
log.exception("Failed to connect to Gemini")
raise
async def disconnect(self):
try:
if self._ws is not None:
await self._ws.close()
except Exception:
pass
finally:
self._ws = None
self._connected = False
self._owner = None
log.info("Disconnected from Gemini")
await bus.emit("voice.disconnected")
async def _ensure_connected(self):
"""Reconnect if dropped, with bounded retries.
Serialized via _connect_lock so concurrent callers don't trigger
duplicate handshakes.
"""
# Fast path — no lock needed
if self._connected and self._ws is not None:
return True
async with self._connect_lock:
# Re-check inside the lock (another coroutine may have just connected)
if self._connected and self._ws is not None:
return True
max_attempts = _RECONNECT_MAX_ATTEMPTS
delay = _RECONNECT_INITIAL_DELAY_SEC
for attempt in range(max_attempts):
try:
log.warning("Reconnecting to Gemini (attempt %d/%d)", attempt + 1, max_attempts)
await self.connect()
return True
except Exception:
self._reconnect_attempts += 1
await asyncio.sleep(delay)
delay = min(delay * 2, _RECONNECT_MAX_DELAY_SEC)
log.error("Reconnect failed after %d attempts", max_attempts)
await bus.emit("voice.error", reason="reconnect_failed")
return False
async def send_audio_chunk(self, pcm_b64: str) -> bool:
"""Send a base64-encoded PCM audio chunk (mic input).
Returns False on failure so the caller can react instead of silently
no-op'ing forever (the original bug).
"""
if not self._connected or self._ws is None:
return False
msg = {
"realtimeInput": {
"mediaChunks": [
{"mimeType": "audio/pcm;rate=16000", "data": pcm_b64}
]
}
}
try:
async with self._send_lock:
await self._ws.send(json.dumps(msg))
return True
except websockets.exceptions.ConnectionClosed:
log.warning("send_audio_chunk: connection closed")
self._connected = False
await bus.emit("voice.error", reason="connection_closed")
return False
except Exception:
log.exception("send_audio_chunk failed")
return False
async def send_text(self, text: str, owner: str = "send_text") -> tuple[bytes, list[str]]:
"""Send text, receive audio response. Returns (audio_bytes, text_parts).
Acquires the session lock for the entire request/response cycle so
no other consumer can steal frames from the receive side.
If the connection drops mid-request, reconnects once and retries.
"""
if not await self._ensure_connected():
raise RuntimeError("Not connected to Gemini and reconnect failed.")
async with self._session_lock:
self._owner = owner
try:
return await self._send_text_inner(text)
except websockets.exceptions.ConnectionClosed:
log.warning("send_text: connection died on send — reconnecting once")
self._connected = False
if not await self._ensure_connected():
raise RuntimeError("Reconnect after send failure also failed.")
return await self._send_text_inner(text)
finally:
self._owner = None
async def _send_text_inner(self, text: str) -> tuple[bytes, list[str]]:
"""Inner send/receive loop — caller must hold _session_lock."""
request = {
"client_content": {
"turns": [{"role": "user", "parts": [{"text": text}]}],
"turn_complete": True,
}
}
async with self._send_lock:
await self._ws.send(json.dumps(request))
audio_chunks: list[bytes] = []
text_parts: list[str] = []
while True:
try:
raw = await asyncio.wait_for(self._ws.recv(), timeout=GEMINI_WS_TIMEOUT)
except asyncio.TimeoutError:
log.warning("send_text: recv timed out")
break
except websockets.exceptions.ConnectionClosed:
log.warning("send_text: connection closed mid-stream")
self._connected = False
break
try:
resp = json.loads(raw)
except json.JSONDecodeError:
log.warning("send_text: bad JSON from server")
continue
if "error" in resp:
log.error("Gemini error: %s", resp["error"])
await bus.emit("voice.error", reason=str(resp["error"]))
break
sc = resp.get("serverContent", {})
mt = sc.get("modelTurn", {})
for part in mt.get("parts", []):
inline = part.get("inlineData")
if inline and inline.get("data"):
audio_chunks.append(base64.b64decode(inline["data"]))
tp = part.get("text")
if isinstance(tp, str) and tp.strip():
text_parts.append(tp.strip())
input_tr = sc.get("inputTranscription", {})
if input_tr.get("text"):
await bus.emit("voice.user_said", text=input_tr["text"])
if sc.get("turnComplete") or sc.get("generationComplete"):
break
audio_bytes = b"".join(audio_chunks)
if audio_bytes:
await bus.emit("voice.gemini_spoke", audio_len=len(audio_bytes))
return audio_bytes, text_parts
def acquire_session(self, owner: str) -> "_SessionGuard":
"""Return an async context manager for exclusive session ownership.
Use as `async with client.acquire_session("live_voice"):`.
While held, no other consumer may call send_text or receive_stream.
"""
return _SessionGuard(self, owner)
async def receive_stream(self):
"""Yield server events. Caller MUST hold the session lock."""
if self._owner is None:
raise RuntimeError(
"receive_stream requires session lock — use acquire_session() first"
)
if not self._connected or self._ws is None:
return
try:
async for raw in self._ws:
try:
resp = json.loads(raw)
except json.JSONDecodeError:
continue
yield resp.get("serverContent", {})
except websockets.exceptions.ConnectionClosed:
log.warning("receive_stream: connection closed")
self._connected = False
await bus.emit("voice.error", reason="connection_closed")
async def raw_send(self, payload: dict):
"""Low-level send for the live loop. Always use through send lock."""
if not self._connected or self._ws is None:
return False
try:
async with self._send_lock:
await self._ws.send(json.dumps(payload))
return True
except Exception:
log.exception("raw_send failed")
return False
def status(self) -> dict[str, Any]:
return {
"connected": self._connected,
"model": GEMINI_MODEL,
"voice": GEMINI_VOICE,
"session_owner": self._owner,
"reconnect_attempts": self._reconnect_attempts,
}
class _SessionGuard:
"""Async context manager for exclusive session ownership.
Always releases owner + lock on exit, even on exceptions.
"""
def __init__(self, client: GeminiVoiceClient, owner: str):
self._client = client
self._owner = owner
self._held = False
async def __aenter__(self):
await self._client._session_lock.acquire()
self._held = True
self._client._owner = self._owner
return self._client
async def __aexit__(self, exc_type, exc, tb):
try:
self._client._owner = None
finally:
if self._held:
self._client._session_lock.release()
self._held = False
return False # don't suppress exceptions

1290
vendor/Sanad/gemini/script.py vendored Normal file

File diff suppressed because it is too large Load Diff

604
vendor/Sanad/gemini/subprocess.py vendored Normal file
View File

@ -0,0 +1,604 @@
"""Gemini live subprocess supervisor.
Spawns `voice/sanad_voice.py` as a managed child with `SANAD_VOICE_BRAIN=gemini`,
tails the child's stdout, and extracts state transitions + user transcripts
from the Gemini-specific log lines emitted by `gemini/script.py:GeminiBrain`.
When a new model is added, build its own sibling supervisor (see
`voice/model_subprocess.py` for the template) do not refactor this file.
"""
from __future__ import annotations
import base64
import json
import os
import signal
import subprocess
import sys
import threading
from collections import deque
from datetime import datetime
from typing import Any, Optional, Union
from pathlib import Path
from Project.Sanad.config import BASE_DIR, LOGS_DIR, SCRIPTS_DIR, LIVE_TUNE
from Project.Sanad.core.config_loader import section as _cfg_section
from Project.Sanad.core.logger import get_logger
log = get_logger("gemini_subprocess")
_LS_CFG = _cfg_section("gemini", "subprocess")
# Camera frame forwarding — push the latest JPEG to the child over stdin
# at this interval (seconds). 0.5 s ≈ 2 fps, matching the child's
# SANAD_VISION_SEND_HZ default. The child de-stales + relays to Gemini.
_FRAME_FORWARD_INTERVAL_S = float(_LS_CFG.get("frame_forward_interval_sec", 0.5))
# Audio profile watcher — poll pactl for the Anker USB device at this
# interval, send "profile:<json>" to the child on every state change.
_AUDIO_WATCH_INTERVAL_S = float(_LS_CFG.get("audio_watch_interval_sec", 1.5))
# The Anker profile id, as defined in voice/audio_devices.py. When this
# profile is fully plugged (both sink + source present), we switch the
# child to "anker"; otherwise we hold the boot fallback profile.
_ANKER_PROFILE_ID = "anker_powerconf"
def _resolve_live_script() -> Path:
"""Locate the voice script to run as subprocess.
Default: voice/sanad_voice.py (the canonical G1 built-in mic +
AudioClient speaker path). Override with SANAD_LIVE_SCRIPT.
"""
override = os.environ.get("SANAD_LIVE_SCRIPT", "").strip()
if override:
p = Path(override).expanduser()
if p.exists():
return p
for c in (BASE_DIR / "voice" / "sanad_voice.py",
SCRIPTS_DIR / "sanad_voice.py"):
if c.exists():
return c
return SCRIPTS_DIR / "sanad_voice.py"
LIVE_SCRIPT = _resolve_live_script()
LOG_TAIL_SIZE = _LS_CFG.get("log_tail_size", 2000)
TRANSCRIPT_TAIL_SIZE = _LS_CFG.get("transcript_tail_size", 30)
# Persistent on-disk log for the full subprocess session.
LIVE_LOG_DIR = LOGS_DIR
LIVE_LOG_NAME = _LS_CFG.get("log_name", "gemini_subprocess")
_STOP_TIMEOUT_SEC = _LS_CFG.get("stop_timeout_sec", 3.0)
_TERMINATE_TIMEOUT_SEC = _LS_CFG.get("terminate_timeout_sec", 2.0)
_NOISY_PREFIXES = tuple(_LS_CFG.get("noisy_prefixes", [
"ALSA lib ", "Expression 'alsa_", "Cannot connect to server socket",
"jack server is not running",
]))
_NOISY_FRAGMENTS = tuple(_LS_CFG.get("noisy_fragments", [
"Unknown PCM", "Evaluate error", "snd_pcm_open_noupdate",
"PaAlsaStream", "snd_config_evaluate", "snd_func_refer",
]))
class GeminiSubprocess:
def __init__(self):
self._lock = threading.Lock()
self.process: subprocess.Popen | None = None
self.log_tail: deque[str] = deque(maxlen=LOG_TAIL_SIZE)
self.user_transcript: deque[str] = deque(maxlen=TRANSCRIPT_TAIL_SIZE)
# Gemini's OWN spoken text (output transcription). The movement
# dispatcher (N2) polls this the way LiveVoiceLoop polls
# user_transcript — it reads what Gemini *said* and fires motion on
# a confirmation-phrase match (the Marcus pattern). Also handy for
# surfacing the bot side of the conversation on the dashboard.
self.bot_transcript: deque[str] = deque(maxlen=TRANSCRIPT_TAIL_SIZE)
# N2 Phase 3 — callbacks fired with each new BOT: line (Gemini's own
# spoken text). The MovementDispatcher registers here to drive
# locomotion off Gemini's confirmation phrases. Fired on the reader
# thread; callbacks must be cheap / non-blocking (the dispatcher just
# enqueues to its own worker).
self._bot_callbacks: list = []
# _track_line (which runs under self._lock) stashes the latest BOT text
# here; the reader loop fires callbacks AFTER releasing the lock so a
# slow callback (e.g. movement dispatch reading state) never stalls the
# reader thread or blocks log parsing.
self._pending_bot: str | None = None
self._reader_thread: threading.Thread | None = None
self._log_file = None # opened per-session in _reader_loop
self.state = "stopped"
self.state_message = "Idle."
self.last_user_text = ""
self.last_bot_text = ""
self.suppressed_noise = 0
# ── stdin push channel (camera frames + motion state + profile) ──
# The child (gemini/script.py) reads "frame:<b64>\n",
# "state:<json>\n", and "profile:<json>\n" lines off its stdin.
# Writes are serialised because the frame forwarder, motion-state
# bus handler, and audio watcher all call from different threads.
self._stdin_lock = threading.Lock()
self._camera = None # set via attach_camera()
self._frame_thread: threading.Thread | None = None
self._frame_stop = threading.Event()
# ── audio profile hot-swap ────────────────────────────────
# _audio_mgr is the parent's AudioManager — needed so we can keep
# PulseAudio defaults in sync (so /api/records/play etc. follow
# the same device the live session uses). Set via attach_audio_manager.
self._audio_mgr = None
self._audio_thread: threading.Thread | None = None
self._audio_stop = threading.Event()
# The boot profile captured at start() — what we revert to when
# the Anker is unplugged. Read from env (already in LIVE_TUNE).
self._boot_profile_id: str = "builtin"
# Last profile signalled to the child (for edge-only dispatch).
self._last_profile_id: str | None = None
# ── camera attach (called once from main.py) ──────────────
def register_bot_callback(self, callback) -> None:
"""Register a fn(text) fired on each new BOT: line (Gemini's spoken
text). Used by the N2 movement dispatcher. Cheap/non-blocking only."""
if callback not in self._bot_callbacks:
self._bot_callbacks.append(callback)
def attach_camera(self, camera) -> None:
"""Give the supervisor a reference to the CameraDaemon so it can
forward frames to the child over stdin while a session runs."""
self._camera = camera
def attach_audio_manager(self, audio_mgr) -> None:
"""Hand the parent's AudioManager to the supervisor so the audio
watcher can keep PulseAudio defaults in sync on every swap (so
typed-replay / record playback follow the same device as the live
Gemini session)."""
self._audio_mgr = audio_mgr
def _open_session_log(self, pid: int):
"""Open (or re-open) the per-day append log file for this session."""
try:
LIVE_LOG_DIR.mkdir(parents=True, exist_ok=True)
fname = f"{LIVE_LOG_NAME}_{datetime.now().strftime('%Y%m%d')}.log"
fh = open(LIVE_LOG_DIR / fname, "a", encoding="utf-8", buffering=1)
fh.write(
f"\n===== live_gemini subprocess start "
f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} pid={pid} =====\n"
)
return fh
except Exception as exc:
log.warning("Could not open live-gemini log file: %s", exc)
return None
def _is_noisy(self, line: str) -> bool:
return line.startswith(_NOISY_PREFIXES) or any(f in line for f in _NOISY_FRAGMENTS)
def _set_state(self, state: str, msg: str):
self.state = state
self.state_message = msg
def _track_line(self, line: str):
"""Parse Gemini-specific log markers emitted by `gemini/script.py`.
Must stay in lock-step with the `log.info(...)` strings in
`GeminiBrain`. If you add a new state, add the emit in the brain
AND the matching detector here in one PR.
"""
if "connecting to Gemini" in line:
self._set_state("connecting", line)
elif "connected — speak anytime" in line or "connected - speak anytime" in line:
self._set_state("listening", "Listening for speech.")
elif " USER: " in line or line.strip().startswith("USER:"):
# GeminiBrain emits: log.info("USER: %s", text)
text = line.split("USER:", 1)[1].strip()
if text:
self.last_user_text = text
self.user_transcript.append(text)
self._set_state("hearing", f"User: {text}")
elif " BOT: " in line or line.strip().startswith("BOT:"):
# GeminiBrain emits: log.info("BOT: %s", text) — Gemini's own
# spoken text. The movement dispatcher (N2) reads this deque to
# match confirmation phrases. Deliberately does NOT change the
# session state (that stays driven by USER / listening markers).
# NOTE: must precede the generic "listening" catch below, else a
# bot line that happens to contain "listening" would be misrouted.
text = line.split("BOT:", 1)[1].strip()
if text:
self.last_bot_text = text
self.bot_transcript.append(text)
# Defer callback firing to the reader loop, OUTSIDE self._lock.
self._pending_bot = text
elif "BARGE-IN" in line or "Gemini interrupted" in line or "interrupt (" in line:
self._set_state("interrupting", line)
elif "listening" in line.lower() and "no speech" not in line:
# Fires on "listening" (post-turn) — keep the state fresh.
self._set_state("listening", "Listening for speech.")
elif "session error" in line or "client recreation failed" in line:
self._set_state("error", line)
elif "server going away" in line or "session ended" in line or "session dead" in line:
self._set_state("warning", line)
elif "keyboard interrupt" in line or "cancelled — stopping" in line:
self._set_state("stopped", line)
def _reader_loop(self):
proc = self.process
if proc is None or proc.stdout is None:
return
# Every line goes to the on-disk log — including the ALSA noise
# that we filter out of the in-memory tail. That way a field
# post-mortem has the full raw capture if we need it.
fh = self._open_session_log(proc.pid)
self._log_file = fh
for line in proc.stdout:
clean = line.rstrip()
if not clean:
continue
if fh is not None:
try:
fh.write(clean + "\n")
except Exception:
pass
fired_bot = None
with self._lock:
if self._is_noisy(clean):
self.suppressed_noise += 1
continue
self.log_tail.append(clean)
self._track_line(clean)
fired_bot = self._pending_bot
self._pending_bot = None
# Fire BOT-text callbacks (movement dispatch) OUTSIDE the lock so a
# slow callback can't stall transcript parsing.
if fired_bot is not None:
for cb in self._bot_callbacks:
try:
cb(fired_bot)
except Exception:
log.exception("bot-text callback failed")
with self._lock:
self.log_tail.append("Live Gemini process exited.")
self._set_state("stopped", "Process exited.")
if fh is not None:
try:
fh.write(
f"===== live_gemini subprocess exit "
f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} =====\n"
)
fh.close()
except Exception:
pass
self._log_file = None
def is_running(self) -> bool:
with self._lock:
return self.process is not None and self.process.poll() is None
def start(self) -> dict[str, Any]:
with self._lock:
if self.process is not None and self.process.poll() is None:
return {"started": False, "message": "Already running.", "pid": self.process.pid}
self._set_state("starting", "Starting...")
script = LIVE_SCRIPT
if not script.exists():
raise RuntimeError(f"Script not found: {script}")
env = os.environ.copy()
env.update({"PYTHONUNBUFFERED": "1", **LIVE_TUNE})
# Pass the current G1 speaker volume as an env var so the
# subprocess can compute the correct barge-in threshold at
# startup. Without this, sanad_voice.py would read the volume
# from a stale or non-existent config file path and default to
# 100, scaling the barge-in threshold wrong for any non-100%
# volume. load_config() reads data/motions/config.json — the
# file the dashboard writes to when the user moves the slider.
try:
from Project.Sanad.config import load_config
_cfg = load_config() or {}
_audio_cfg = _cfg.get("audio") if isinstance(_cfg.get("audio"), dict) else {}
_g1_vol = int(_audio_cfg.get("g1_volume", 100))
_g1_vol = max(0, min(100, _g1_vol))
env["SANAD_G1_VOLUME"] = str(_g1_vol)
log.info("Passing SANAD_G1_VOLUME=%d to subprocess", _g1_vol)
except Exception as exc:
log.warning("Could not read g1_volume for subprocess: %s", exc)
# sanad_voice.py takes the DDS interface as the first positional arg
dds_iface = env.get("SANAD_DDS_INTERFACE", "eth0")
cmd = [sys.executable, str(script), dds_iface]
proc = subprocess.Popen(
cmd,
cwd=str(script.parent),
stdin=subprocess.PIPE, # camera frames + motion state push
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
bufsize=1,
env=env,
)
# Reap any stale frame forwarder / audio watcher from a previous
# session that ended by a child crash rather than a clean stop() —
# otherwise they'd keep spinning and we'd leak threads per restart.
for stale, stop_evt in (
(self._frame_thread, self._frame_stop),
(self._audio_thread, self._audio_stop),
):
if stale is not None and stale.is_alive():
stop_evt.set()
stale.join(timeout=2.0)
# Capture the boot profile for this session — the audio watcher
# uses it as the fallback when the Anker is unplugged. Env var is
# already in LIVE_TUNE so parent + child agree.
self._boot_profile_id = os.environ.get(
"SANAD_AUDIO_PROFILE", "builtin").strip().lower()
self._last_profile_id = None # force one initial send_profile
with self._lock:
self.process = proc
self.log_tail.append(f"Started: pid={proc.pid}")
self._set_state("starting", f"pid={proc.pid}")
self._reader_thread = threading.Thread(target=self._reader_loop, daemon=True)
self._reader_thread.start()
# Frame forwarder — pushes camera JPEGs to the child over stdin.
self._frame_stop.clear()
self._frame_thread = threading.Thread(
target=self._frame_forwarder, daemon=True, name="gemini-frame-fwd",
)
self._frame_thread.start()
# Audio watcher — polls pactl for Anker presence and signals
# the child to hot-swap mic+speaker when it changes.
self._audio_stop.clear()
self._audio_thread = threading.Thread(
target=self._audio_watcher, daemon=True, name="gemini-audio-watcher",
)
self._audio_thread.start()
log.info("Live Gemini subprocess started: pid=%d", proc.pid)
return {"started": True, "pid": proc.pid}
# ── stdin push channel ────────────────────────────────────
def _send_stdin(self, line: str) -> None:
"""Serialised stdin write — frame forwarder + motion-state handler
both call this from different threads. Best-effort: a closed pipe
or a not-yet-started process is a silent no-op."""
proc = self.process
if proc is None or proc.stdin is None:
return
try:
with self._stdin_lock:
if not proc.stdin.closed:
proc.stdin.write(line)
proc.stdin.flush()
except Exception:
# Pipe broke (child exited) — drop silently; the reader thread
# will surface the exit via state="stopped".
pass
def send_frame(self, jpeg: Union[bytes, str]) -> None:
"""Forward one camera frame to the child as 'frame:<base64>\\n'.
Accepts raw JPEG bytes (base64-encoded here) or an already-base64
ASCII string (e.g. CameraDaemon.get_frame_b64() no re-encode)."""
if isinstance(jpeg, bytes):
b64 = base64.b64encode(jpeg).decode("ascii")
elif isinstance(jpeg, str):
b64 = jpeg.strip()
else:
return
if b64:
self._send_stdin("frame:" + b64 + "\n")
def send_state(self, event: str, cmd: str,
elapsed_sec: Optional[float] = None,
reason: Optional[str] = None) -> None:
"""Push a motion-state update to the child as 'state:<json>\\n'.
Events: start | complete | interrupted | error. The child injects
'[STATE-...] <cmd>' into the live Gemini session as silent text
context so Gemini can answer "what are you doing?" honestly."""
if not event or not cmd:
return
payload: dict[str, Any] = {"event": event, "cmd": cmd}
if elapsed_sec is not None:
payload["elapsed_sec"] = round(float(elapsed_sec), 2)
if reason:
payload["reason"] = str(reason)[:200]
try:
line = "state:" + json.dumps(payload, ensure_ascii=False) + "\n"
except Exception:
return
self._send_stdin(line)
def _frame_forwarder(self) -> None:
"""Background thread — push the camera's latest frame to the child.
Runs for the lifetime of one subprocess session. Gated on the
camera actually running; the child does its own vision-enabled +
staleness checks, so this stays dumb (camera up push)."""
cam = self._camera
if cam is None:
return
while not self._frame_stop.is_set():
if self._frame_stop.wait(_FRAME_FORWARD_INTERVAL_S):
break
try:
if not cam.is_running():
continue
b64 = cam.get_frame_b64()
if b64:
self.send_frame(b64)
except Exception:
# Best-effort — never let a frame hiccup kill the thread.
pass
# ── audio profile watcher (parent-side detection) ────────────
def send_profile(self, profile_id: str, reason: str = "") -> None:
"""Push an audio-profile hot-swap command to the child as
'profile:<json>\\n'. The child's _stdin_watcher parses it and
_audio_swap_loop performs the actual mic/speaker rebind. No-op
if the process isn't running or stdin is closed."""
pid = (profile_id or "").strip().lower()
if pid not in {"builtin", "anker", "hollyland_builtin"}:
log.warning("send_profile: ignoring unknown profile %r", profile_id)
return
payload: dict[str, Any] = {"id": pid}
if reason:
payload["reason"] = reason[:120]
try:
line = "profile:" + json.dumps(payload, ensure_ascii=False) + "\n"
except Exception:
return
self._send_stdin(line)
def _audio_watcher(self) -> None:
"""Background thread — poll pactl for the Anker USB device, signal
the child on every plug/unplug edge transition.
Detection reuses voice.audio_devices.detect_plugged_profiles() which
already shells to `pactl list short` and matches against the same
`powerconf,anker` substring AnkerMic uses. Zero new deps.
Edge-only dispatch: we only call send_profile() when the target
flips. Rapid bounce (loose cable) is naturally rate-limited by the
poll interval. After every send_profile we also refresh the parent
audio_manager's PulseAudio defaults so non-live playback (typed
replay, record playback) follows the same device.
"""
# Lazy import — voice.audio_devices is imported at module load to
# check pactl availability without polluting our top-level imports.
try:
from Project.Sanad.voice import audio_devices as _ad
except Exception as exc:
log.warning("audio watcher disabled — audio_devices import failed: %s", exc)
return
try:
if not _ad.pactl_available():
log.warning("audio watcher disabled — pactl not available")
return
except Exception:
# If pactl_available itself isn't exposed, fall through and try
# detect_plugged_profiles — it'll raise/return empty if pactl
# is missing and we handle that below.
pass
boot_profile = self._boot_profile_id or "builtin"
log.info("audio watcher started — Anker→anker, no-Anker→%s (poll=%.1fs)",
boot_profile, _AUDIO_WATCH_INTERVAL_S)
while not self._audio_stop.is_set():
if self._audio_stop.wait(_AUDIO_WATCH_INTERVAL_S):
break
try:
# Recovery script (set_powerconf_audio.sh) is intentionally
# NOT invoked from the watcher — its old card-discovery /
# module-alsa-source attempts loaded the wrong hw device
# on this Jetson and knocked the Anker out of pactl
# entirely (observed 2026-06-03). The script is now a
# passive set-default-sink/source helper meant to be run
# by hand, not from the watcher. The watcher just detects
# plug edges and dispatches profile changes to the child.
plugged = _ad.detect_plugged_profiles()
ids = {p.get("profile", {}).get("id") for p in (plugged or [])}
anker_present = _ANKER_PROFILE_ID in ids
target = "anker" if anker_present else boot_profile
reason = "anker plugged" if anker_present else "anker unplugged"
# Surface which detection path succeeded (Path A vs pactl)
if anker_present:
for p in plugged:
if p.get("profile", {}).get("id") == _ANKER_PROFILE_ID:
via = p.get("source_via", "pactl")
if via != "pactl":
reason += f" via {via}"
break
if target == self._last_profile_id:
continue # edge-only
prev = self._last_profile_id
log.info("audio watcher: %s%s (%s)",
prev or "", target, reason)
self.send_profile(target, reason=reason)
self._last_profile_id = target
# Keep PulseAudio defaults aligned so non-live playback
# follows the same device the live session uses.
if self._audio_mgr is not None:
try:
self._audio_mgr.refresh_devices()
except Exception as exc:
log.warning("audio watcher: refresh_devices failed: %s", exc)
except Exception as exc:
# Never let a transient pactl glitch kill the thread.
log.warning("audio watcher iteration failed: %s", exc)
def stop(self) -> dict[str, Any]:
with self._lock:
proc = self.process
if proc is None or proc.poll() is not None:
return {"stopped": False, "message": "Not running."}
self._set_state("stopping", "Stopping...")
# Halt forwarder + audio watcher before we tear the pipe down.
self._frame_stop.set()
self._audio_stop.set()
ft = self._frame_thread
if ft is not None:
ft.join(timeout=2.0)
self._frame_thread = None
at = self._audio_thread
if at is not None:
at.join(timeout=2.0)
self._audio_thread = None
try:
proc.send_signal(signal.SIGINT)
proc.wait(timeout=_STOP_TIMEOUT_SEC)
except subprocess.TimeoutExpired:
proc.terminate()
try:
proc.wait(timeout=_TERMINATE_TIMEOUT_SEC)
except subprocess.TimeoutExpired:
proc.kill()
proc.wait(timeout=_TERMINATE_TIMEOUT_SEC)
rc = proc.returncode
# Close stdin/stdout explicitly — without this each start/stop
# cycle leaks FDs (relied on Popen.__del__ which only runs at GC;
# a reconnect loop would march the FD count to the OS limit).
for pipe in (getattr(proc, "stdin", None), getattr(proc, "stdout", None)):
if pipe is not None:
try:
pipe.close()
except Exception:
pass
with self._lock:
self.process = None
self.log_tail.append("Stopped.")
self._set_state("stopped", "Stopped.")
log.info("Live Gemini subprocess stopped (rc=%s)", rc)
return {"stopped": True, "returncode": rc}
def status(self) -> dict[str, Any]:
with self._lock:
running = self.process is not None and self.process.poll() is None
return {
"running": running,
"pid": self.process.pid if running and self.process else None,
"state": self.state,
"state_message": self.state_message,
"last_user_text": self.last_user_text,
"last_bot_text": self.last_bot_text,
"user_transcript": list(self.user_transcript),
"bot_transcript": list(self.bot_transcript),
"log_tail": list(self.log_tail),
"suppressed_noise": self.suppressed_noise,
}

0
vendor/Sanad/local/__init__.py vendored Normal file
View File

305
vendor/Sanad/local/llm.py vendored Normal file
View File

@ -0,0 +1,305 @@
"""LLM layer — Qwen 2.5 Instruct via Ollama (default) or self-managed llama.cpp.
Phase 3 of the local pipeline. Two backends, selectable via
`config/local_config.json > llm.backend`:
"ollama" talk to a running `ollama serve` daemon (default).
No subprocess management, no CUDA build. Just:
ollama pull qwen2.5:1.5b
# daemon usually auto-starts; if not: `ollama serve &`
"llama_cpp" launch our own `llama-server` subprocess. Requires
a CUDA build of llama.cpp and a GGUF file at
`model/local/<llm.model_subdir>`.
Both backends stream tokens and chunk them on sentence delimiters so
the TTS can start synthesising before the LLM finishes.
"""
from __future__ import annotations
import asyncio
import json
import shutil
import subprocess
import time
from typing import AsyncIterator, Optional
from Project.Sanad.config import MODEL_DIR
from Project.Sanad.core.config_loader import section as _cfg_section
from Project.Sanad.core.logger import get_logger
log = get_logger("local_llm")
_CFG = _cfg_section("local", "llm")
BACKEND = (_CFG.get("backend") or "ollama").strip().lower()
# Ollama
OLLAMA_HOST = _CFG.get("ollama_host", "127.0.0.1")
OLLAMA_PORT = int(_CFG.get("ollama_port", 11434))
OLLAMA_MODEL = _CFG.get("ollama_model", "qwen2.5:1.5b")
OLLAMA_KEEP_ALIVE = _CFG.get("ollama_keep_alive", "5m")
# llama.cpp
MODEL_SUBDIR = _CFG.get("model_subdir", "qwen2.5-1.5b-instruct-q4_k_m.gguf")
SERVER_BIN = _CFG.get("server_binary", "llama-server")
HOST = _CFG.get("host", "127.0.0.1")
PORT = int(_CFG.get("port", 8080))
N_GPU_LAYERS = _CFG.get("n_gpu_layers", 99)
CTX_SIZE = _CFG.get("ctx_size", 2048)
THREADS = _CFG.get("threads", 4)
STARTUP_TIMEOUT = _CFG.get("startup_timeout_sec", 30)
# Shared generation params
REQUEST_TIMEOUT = _CFG.get("request_timeout_sec", 30)
MAX_TOKENS = _CFG.get("max_tokens", 200)
TEMPERATURE = _CFG.get("temperature", 0.7)
TOP_P = _CFG.get("top_p", 0.9)
STOP_SEQS = list(_CFG.get("stop", ["<|im_end|>"]))
CHUNK_DELIMS = _CFG.get("chunk_delimiters", ".,?!؟،")
CHUNK_MIN_CHARS = int(_CFG.get("chunk_min_chars", 8))
LOCAL_MODEL_PATH = MODEL_DIR / "local" / MODEL_SUBDIR
class LlamaServer:
"""Thin wrapper — owns subprocess (llama.cpp) or no-op (ollama)."""
def __init__(self) -> None:
self._proc: Optional[subprocess.Popen] = None
# ─── lifecycle ────────────────────────────────────────
def start(self) -> None:
if BACKEND == "ollama":
self._check_ollama()
log.info("LLM backend=ollama model=%s (@ %s:%d)",
OLLAMA_MODEL, OLLAMA_HOST, OLLAMA_PORT)
return
if BACKEND == "llama_cpp":
self._start_llama_cpp()
return
raise RuntimeError(f"unknown llm.backend: {BACKEND!r}")
def stop(self) -> None:
if self._proc is None:
return
try:
self._proc.terminate()
self._proc.wait(timeout=3)
except subprocess.TimeoutExpired:
self._proc.kill()
self._proc.wait(timeout=2)
except Exception as exc:
log.warning("llama-server stop error: %s", exc)
self._proc = None
def alive(self) -> bool:
if BACKEND == "ollama":
return self._ping_ollama()
return self._proc is not None and self._proc.poll() is None
# ─── Ollama backend ───────────────────────────────────
def _check_ollama(self) -> None:
"""Verify the Ollama daemon is running + the model is pulled."""
import urllib.request
tags_url = f"http://{OLLAMA_HOST}:{OLLAMA_PORT}/api/tags"
try:
with urllib.request.urlopen(tags_url, timeout=3) as r:
body = json.loads(r.read().decode("utf-8"))
except Exception as exc:
raise RuntimeError(
f"Ollama daemon not reachable at {tags_url} — is `ollama serve` running? ({exc})"
)
models = [m.get("name", "") for m in body.get("models", [])]
if not any(OLLAMA_MODEL in m for m in models):
raise RuntimeError(
f"Ollama model {OLLAMA_MODEL!r} not pulled. "
f"Run: `ollama pull {OLLAMA_MODEL}`. Available: {models}"
)
def _ping_ollama(self) -> bool:
import urllib.request
try:
with urllib.request.urlopen(
f"http://{OLLAMA_HOST}:{OLLAMA_PORT}/api/tags", timeout=1,
) as r:
return r.status == 200
except Exception:
return False
async def _stream_ollama(self, user_text: str, system_prompt: str,
cancel: asyncio.Event) -> AsyncIterator[str]:
import aiohttp
url = f"http://{OLLAMA_HOST}:{OLLAMA_PORT}/api/generate"
payload = {
"model": OLLAMA_MODEL,
"system": system_prompt,
"prompt": user_text,
"stream": True,
"keep_alive": OLLAMA_KEEP_ALIVE,
"options": {
"num_predict": MAX_TOKENS,
"temperature": TEMPERATURE,
"top_p": TOP_P,
"stop": STOP_SEQS,
},
}
buf = ""
async with aiohttp.ClientSession() as sess:
try:
async with sess.post(
url, json=payload,
timeout=aiohttp.ClientTimeout(total=REQUEST_TIMEOUT)) as resp:
async for raw in resp.content:
if cancel.is_set():
log.info("LLM stream cancelled (barge-in)")
return
line = raw.decode("utf-8", errors="ignore").strip()
if not line:
continue
try:
obj = json.loads(line)
except json.JSONDecodeError:
continue
token = obj.get("response", "")
if token:
buf += token
if len(buf) >= CHUNK_MIN_CHARS and buf[-1] in CHUNK_DELIMS:
yield buf.strip()
buf = ""
if obj.get("done"):
break
except asyncio.CancelledError:
return
except Exception as exc:
log.warning("Ollama stream error: %s", exc)
return
if buf.strip():
yield buf.strip()
# ─── llama.cpp backend ────────────────────────────────
def _start_llama_cpp(self) -> None:
if self._proc is not None and self._proc.poll() is None:
return
if not LOCAL_MODEL_PATH.exists():
raise RuntimeError(f"LLM model not found at {LOCAL_MODEL_PATH}")
bin_path = shutil.which(SERVER_BIN) or SERVER_BIN
cmd = [
bin_path,
"-m", str(LOCAL_MODEL_PATH),
"--host", HOST,
"--port", str(PORT),
"--n-gpu-layers", str(N_GPU_LAYERS),
"--ctx-size", str(CTX_SIZE),
"--threads", str(THREADS),
"--log-disable",
]
log.info("launching llama-server: %s", " ".join(cmd))
self._proc = subprocess.Popen(
cmd,
stdout=subprocess.DEVNULL,
stderr=subprocess.PIPE,
text=True,
)
self._wait_llama_cpp_ready()
log.info("llama-server ready (pid=%d)", self._proc.pid)
def _wait_llama_cpp_ready(self) -> None:
import urllib.request
deadline = time.time() + STARTUP_TIMEOUT
url = f"http://{HOST}:{PORT}/health"
while time.time() < deadline:
if self._proc and self._proc.poll() is not None:
stderr = self._proc.stderr.read() if self._proc.stderr else ""
raise RuntimeError(
f"llama-server exited early (code={self._proc.returncode}): {stderr[:500]}"
)
try:
with urllib.request.urlopen(url, timeout=1) as r:
if r.status == 200:
return
except Exception:
time.sleep(0.3)
raise RuntimeError(f"llama-server did not come up within {STARTUP_TIMEOUT}s")
async def _stream_llama_cpp(self, user_text: str, system_prompt: str,
cancel: asyncio.Event) -> AsyncIterator[str]:
import aiohttp
prompt = self._format_chatml_prompt(user_text, system_prompt)
payload = {
"prompt": prompt,
"stream": True,
"n_predict": MAX_TOKENS,
"temperature": TEMPERATURE,
"top_p": TOP_P,
"stop": STOP_SEQS,
"cache_prompt": True,
}
url = f"http://{HOST}:{PORT}/completion"
buf = ""
async with aiohttp.ClientSession() as sess:
try:
async with sess.post(
url, json=payload,
timeout=aiohttp.ClientTimeout(total=REQUEST_TIMEOUT)) as resp:
async for raw in resp.content:
if cancel.is_set():
log.info("LLM stream cancelled (barge-in)")
return
line = raw.decode("utf-8", errors="ignore").strip()
if not line.startswith("data:"):
continue
line = line[len("data:"):].strip()
if not line or line == "[DONE]":
continue
try:
obj = json.loads(line)
except json.JSONDecodeError:
continue
token = obj.get("content", "")
if not token:
if obj.get("stop"):
break
continue
buf += token
if len(buf) >= CHUNK_MIN_CHARS and buf[-1] in CHUNK_DELIMS:
yield buf.strip()
buf = ""
except asyncio.CancelledError:
return
except Exception as exc:
log.warning("llama-server stream error: %s", exc)
return
if buf.strip():
yield buf.strip()
@staticmethod
def _format_chatml_prompt(user_text: str, system_prompt: str) -> str:
return (
f"<|im_start|>system\n{system_prompt}<|im_end|>\n"
f"<|im_start|>user\n{user_text}<|im_end|>\n"
f"<|im_start|>assistant\n"
)
# ─── public streaming entry point ─────────────────────
async def stream(self, user_text: str, system_prompt: str,
cancel: asyncio.Event) -> AsyncIterator[str]:
"""Yield sentence-sized text chunks as the LLM generates.
Chunk boundaries: any char in `CHUNK_DELIMS` AND buffer length
`CHUNK_MIN_CHARS`. The final buffer is flushed on completion
even without a delimiter. If `cancel` is set, the request is
aborted and the generator returns.
"""
if BACKEND == "ollama":
async for chunk in self._stream_ollama(user_text, system_prompt, cancel):
yield chunk
elif BACKEND == "llama_cpp":
async for chunk in self._stream_llama_cpp(user_text, system_prompt, cancel):
yield chunk
else:
raise RuntimeError(f"unknown llm.backend: {BACKEND!r}")

259
vendor/Sanad/local/script.py vendored Normal file
View File

@ -0,0 +1,259 @@
"""LocalBrain — fully on-device voice pipeline.
Implements the same contract as `gemini/script.py:GeminiBrain` so
`voice/sanad_voice.py` can swap it in via `SANAD_VOICE_BRAIN=local`.
Wires together four subsystems:
Phase 1 Silero VAD (mic speech boundaries)
Phase 2 faster-whisper (speech text)
Phase 3 llama.cpp + Qwen (text streaming text chunks)
Phase 4 CosyVoice2 streaming (text chunk cloned-voice audio)
Phase 5 barge-in (user speaks cancel LLM + stop speaker)
Phase 6 stability model load fails cleanly, crashes are logged.
Async structure:
run() is the main coroutine. It spawns three tasks:
_mic_task reads mic, VAD, Whisper, pushes user text to _llm_queue
_dialogue_task pops user text, streams LLM tokens into _tts_queue
_tts_task pops text chunks, synthesises, feeds the speaker
Logging contract (matched by local/subprocess.py._track_line):
"connecting to local pipeline"
"listening"
"USER: <text>"
"BOT: <text>"
"BARGE-IN (local)"
"session error: <msg>"
"""
from __future__ import annotations
import asyncio
import time
from typing import Optional
from Project.Sanad.core.config_loader import section as _cfg_section
from Project.Sanad.core.logger import get_logger
from Project.Sanad.local.llm import LlamaServer
from Project.Sanad.local.stt import WhisperSTT
from Project.Sanad.local.tts import CosyVoiceTTS
from Project.Sanad.local.vad import SileroVAD, FRAME_SAMPLES
log = get_logger("local_brain")
_CFG_SV = _cfg_section("voice", "sanad_voice")
_CHUNK_BYTES = FRAME_SAMPLES * 2 # int16 mono
class LocalBrain:
"""Fully on-device Gemini replacement."""
def __init__(self, audio_io, recorder, voice_name: Optional[str] = None,
system_prompt: str = ""):
self._audio = audio_io
self._mic = audio_io.mic
self._speaker = audio_io.speaker
self._recorder = recorder
self._voice = voice_name
self._system_prompt = system_prompt
# subsystems — instantiated here, loaded in run()
self._vad = SileroVAD()
self._stt = WhisperSTT()
self._llm = LlamaServer()
self._tts = CosyVoiceTTS()
# pipeline queues
self._llm_queue: asyncio.Queue[str] = asyncio.Queue(maxsize=4)
self._tts_queue: asyncio.Queue[str] = asyncio.Queue(maxsize=4)
# control flags
self._stop_flag = asyncio.Event() # full shutdown
self._interrupt = asyncio.Event() # per-turn barge-in
self._speaking = False
self._speak_start_time = 0.0
# ─── lifecycle ────────────────────────────────────────
def stop(self) -> None:
self._stop_flag.set()
self._interrupt.set()
async def run(self) -> None:
"""Main entry. Loads models, runs pipeline, handles shutdown."""
log.info("connecting to local pipeline")
try:
await asyncio.to_thread(self._vad.start)
await asyncio.to_thread(self._stt.start)
await asyncio.to_thread(self._llm.start)
await asyncio.to_thread(self._tts.start)
except Exception as exc:
log.error("session error: local pipeline startup failed — %s", exc)
return
log.info("listening")
try:
await asyncio.gather(
self._mic_task(),
self._dialogue_task(),
self._tts_task(),
)
except asyncio.CancelledError:
log.info("cancelled — stopping")
except Exception as exc:
log.error("session error: %s", exc)
finally:
try:
self._llm.stop()
except Exception:
log.warning("LlamaServer.stop failed", exc_info=True)
self._tts.stop()
self._stt.stop()
self._vad.stop()
log.info("local pipeline stopped")
# ─── barge-in ─────────────────────────────────────────
def _begin_barge_in(self) -> None:
"""Called from mic task when user starts speaking while bot is."""
if not self._speaking:
return
log.info("BARGE-IN (local)")
self._interrupt.set()
try:
self._speaker.stop()
except Exception:
log.warning("speaker.stop during barge-in failed", exc_info=True)
# drain pipelines — discard any pending LLM/TTS chunks for this turn
self._drain_queue(self._llm_queue)
self._drain_queue(self._tts_queue)
self._speaking = False
try:
self._recorder.finish_turn()
except Exception:
pass
@staticmethod
def _drain_queue(q: asyncio.Queue) -> None:
try:
while True:
q.get_nowait()
q.task_done()
except asyncio.QueueEmpty:
pass
# ─── Task 1: mic → VAD → Whisper → LLM queue ──────────
async def _mic_task(self) -> None:
loop = asyncio.get_event_loop()
while not self._stop_flag.is_set():
try:
pcm = await loop.run_in_executor(
None, self._mic.read_chunk, _CHUNK_BYTES,
)
except Exception:
await asyncio.sleep(0.01)
continue
event = self._vad.process(pcm)
if event == "speech_start":
# user started talking — if bot is speaking, it's a barge-in
if self._speaking:
self._begin_barge_in()
elif event == "speech_end":
utt = self._vad.collected_audio()
if not utt:
continue
try:
self._recorder.capture_user(utt)
except Exception:
pass
text = await loop.run_in_executor(None, self._stt.transcribe, utt)
if not text:
continue
log.info("USER: %s", text)
try:
self._recorder.add_user_text(text)
except Exception:
pass
# wake the LLM side — drop older pending item if full (latency > throughput)
if self._llm_queue.full():
try:
self._llm_queue.get_nowait()
except asyncio.QueueEmpty:
pass
await self._llm_queue.put(text)
# ─── Task 2: LLM streaming → TTS queue ────────────────
async def _dialogue_task(self) -> None:
while not self._stop_flag.is_set():
try:
user_text = await asyncio.wait_for(
self._llm_queue.get(), timeout=0.2)
except asyncio.TimeoutError:
continue
self._interrupt.clear()
full_response = []
async for chunk in self._llm.stream(
user_text, self._system_prompt, self._interrupt):
if self._interrupt.is_set():
break
full_response.append(chunk)
await self._tts_queue.put(chunk)
self._llm_queue.task_done()
if full_response and not self._interrupt.is_set():
bot_text = " ".join(full_response).strip()
if bot_text:
log.info("BOT: %s", bot_text)
try:
self._recorder.add_robot_text(bot_text)
except Exception:
pass
# ─── Task 3: TTS → speaker ────────────────────────────
async def _tts_task(self) -> None:
loop = asyncio.get_event_loop()
while not self._stop_flag.is_set():
try:
chunk_text = await asyncio.wait_for(
self._tts_queue.get(), timeout=0.2)
except asyncio.TimeoutError:
# idle — if we've been speaking and queue drained, close stream
if self._speaking and self._llm_queue.empty() and self._tts_queue.empty():
await loop.run_in_executor(None, self._speaker.wait_finish)
self._speaking = False
log.info("listening")
try:
self._recorder.finish_turn()
except Exception:
pass
continue
if self._interrupt.is_set():
self._tts_queue.task_done()
continue
# synthesise this text chunk → stream to speaker
if not self._speaking:
await loop.run_in_executor(None, self._speaker.begin_stream)
self._speaking = True
self._speak_start_time = time.time()
try:
for pcm in self._tts.synthesize_stream(chunk_text):
if self._interrupt.is_set():
break
try:
self._recorder.capture_robot(pcm)
except Exception:
pass
await loop.run_in_executor(
None, self._speaker.send_chunk,
pcm, self._tts.output_rate,
)
except Exception as exc:
log.warning("TTS chunk failed: %s", exc)
finally:
self._tts_queue.task_done()

96
vendor/Sanad/local/stt.py vendored Normal file
View File

@ -0,0 +1,96 @@
"""faster-whisper Large V3 Turbo — GPU INT8 transcription.
Phase 2 of the local pipeline. Given an utterance (int16 PCM bytes at
16 kHz), returns transcribed text. Short / empty / no-speech results are
filtered out per config thresholds to avoid firing phantom triggers.
Install (on the robot, in the `local` env):
pip install faster-whisper==1.0.*
# model auto-downloads from HuggingFace on first `WhisperModel(...)` call,
# OR pre-download to model/local/faster-whisper-large-v3-turbo/ and point
# `local.stt.model_subdir` at it.
"""
from __future__ import annotations
from typing import Optional
import numpy as np
from Project.Sanad.config import MODEL_DIR
from Project.Sanad.core.config_loader import section as _cfg_section
from Project.Sanad.core.logger import get_logger
log = get_logger("local_stt")
_CFG = _cfg_section("local", "stt")
MODEL_NAME = _CFG.get("model_name", "large-v3-turbo")
MODEL_SUBDIR = _CFG.get("model_subdir", "faster-whisper-large-v3-turbo")
DEVICE = _CFG.get("device", "cuda")
COMPUTE_TYPE = _CFG.get("compute_type", "int8_float16")
BEAM_SIZE = _CFG.get("beam_size", 1)
LANGUAGE = _CFG.get("language") # None = auto-detect
VAD_FILTER = _CFG.get("vad_filter", False)
NO_SPEECH_THRESHOLD = _CFG.get("no_speech_threshold", 0.6)
MIN_CHARS = _CFG.get("min_utterance_chars", 2)
TEMPERATURE = _CFG.get("temperature", 0.0)
LOCAL_MODEL_DIR = MODEL_DIR / "local" / MODEL_SUBDIR
class WhisperSTT:
"""Thin wrapper around faster_whisper.WhisperModel."""
def __init__(self) -> None:
self._model = None
def start(self) -> None:
"""Load the model into VRAM. ~4 s on first call, 100 ms after."""
try:
from faster_whisper import WhisperModel
except ImportError as exc:
raise RuntimeError(
f"WhisperSTT requires 'faster-whisper': {exc}"
)
model_src = str(LOCAL_MODEL_DIR) if LOCAL_MODEL_DIR.exists() else MODEL_NAME
log.info("loading Whisper: src=%s device=%s compute=%s",
model_src, DEVICE, COMPUTE_TYPE)
self._model = WhisperModel(
model_src,
device=DEVICE,
compute_type=COMPUTE_TYPE,
)
log.info("WhisperSTT ready")
def transcribe(self, pcm: bytes) -> str:
"""Blocking transcription. Returns the full text or ''."""
if self._model is None:
log.warning("WhisperSTT.transcribe called before start()")
return ""
if not pcm:
return ""
audio = np.frombuffer(pcm, dtype=np.int16).astype(np.float32) / 32768.0
if audio.size == 0:
return ""
try:
segments, info = self._model.transcribe(
audio,
beam_size=BEAM_SIZE,
language=LANGUAGE,
vad_filter=VAD_FILTER,
no_speech_threshold=NO_SPEECH_THRESHOLD,
temperature=TEMPERATURE,
)
text = " ".join(seg.text.strip() for seg in segments).strip()
except Exception as exc:
log.warning("Whisper transcribe failed: %s", exc)
return ""
if len(text) < MIN_CHARS:
log.debug("drop short transcript: %r", text)
return ""
return text
def stop(self) -> None:
self._model = None

261
vendor/Sanad/local/subprocess.py vendored Normal file
View File

@ -0,0 +1,261 @@
"""Local live subprocess supervisor.
Spawns `voice/sanad_voice.py` as a managed child with
`SANAD_VOICE_BRAIN=local`, tails the child's stdout, and extracts state
transitions + user transcripts from the log markers emitted by
`local/script.py:LocalBrain`.
Mirror of `gemini/subprocess.py`. Lives separately so the two supervisors
stay decoupled adding a new model does not touch this file.
"""
from __future__ import annotations
import os
import signal
import subprocess
import sys
import threading
from collections import deque
from datetime import datetime
from pathlib import Path
from typing import Any
from Project.Sanad.config import BASE_DIR, LOGS_DIR, SCRIPTS_DIR, LIVE_TUNE
from Project.Sanad.core.config_loader import section as _cfg_section
from Project.Sanad.core.logger import get_logger
log = get_logger("local_subprocess")
_LS_CFG = _cfg_section("local", "subprocess")
def _resolve_live_script() -> Path:
"""Locate the voice script to run as subprocess (same as Gemini's)."""
override = os.environ.get("SANAD_LIVE_SCRIPT", "").strip()
if override:
p = Path(override).expanduser()
if p.exists():
return p
for c in (BASE_DIR / "voice" / "sanad_voice.py",
SCRIPTS_DIR / "sanad_voice.py"):
if c.exists():
return c
return SCRIPTS_DIR / "sanad_voice.py"
LIVE_SCRIPT = _resolve_live_script()
LOG_TAIL_SIZE = _LS_CFG.get("log_tail_size", 2000)
TRANSCRIPT_TAIL_SIZE = _LS_CFG.get("transcript_tail_size", 30)
LIVE_LOG_DIR = LOGS_DIR
LIVE_LOG_NAME = _LS_CFG.get("log_name", "local_subprocess")
# Python binary for the child process. The local pipeline runs in a
# separate conda env (Python 3.8 + Jetson CUDA torch + CosyVoice/Whisper);
# the dashboard stays in gemini_sdk (Python 3.10). Override with
# SANAD_LOCAL_PYTHON env var at runtime.
LOCAL_PYTHON_BIN = os.environ.get(
"SANAD_LOCAL_PYTHON",
_LS_CFG.get("python_bin", sys.executable),
)
_STOP_TIMEOUT_SEC = _LS_CFG.get("stop_timeout_sec", 5.0)
_TERMINATE_TIMEOUT_SEC = _LS_CFG.get("terminate_timeout_sec", 3.0)
_NOISY_PREFIXES = tuple(_LS_CFG.get("noisy_prefixes", [
"ALSA lib ", "Expression 'alsa_", "Cannot connect to server socket",
"jack server is not running",
]))
_NOISY_FRAGMENTS = tuple(_LS_CFG.get("noisy_fragments", [
"Unknown PCM", "Evaluate error", "snd_pcm_open_noupdate", "PaAlsaStream",
]))
class LocalSubprocess:
def __init__(self):
self._lock = threading.Lock()
self.process: subprocess.Popen | None = None
self.log_tail: deque[str] = deque(maxlen=LOG_TAIL_SIZE)
self.user_transcript: deque[str] = deque(maxlen=TRANSCRIPT_TAIL_SIZE)
self._reader_thread: threading.Thread | None = None
self._log_file = None
self.state = "stopped"
self.state_message = "Idle."
self.last_user_text = ""
self.suppressed_noise = 0
# ─── log I/O ──────────────────────────────────────────
def _open_session_log(self, pid: int):
try:
LIVE_LOG_DIR.mkdir(parents=True, exist_ok=True)
fname = f"{LIVE_LOG_NAME}_{datetime.now().strftime('%Y%m%d')}.log"
fh = open(LIVE_LOG_DIR / fname, "a", encoding="utf-8", buffering=1)
fh.write(
f"\n===== local subprocess start "
f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} pid={pid} =====\n"
)
return fh
except Exception as exc:
log.warning("Could not open local subprocess log file: %s", exc)
return None
def _is_noisy(self, line: str) -> bool:
return line.startswith(_NOISY_PREFIXES) or any(f in line for f in _NOISY_FRAGMENTS)
def _set_state(self, state: str, msg: str):
self.state = state
self.state_message = msg
def _track_line(self, line: str):
"""Parse log markers emitted by `local/script.py:LocalBrain`.
Must stay in lock-step with the `log.info(...)` strings there.
"""
if "connecting to local pipeline" in line:
self._set_state("connecting", line)
elif " USER: " in line or line.strip().startswith("USER:"):
text = line.split("USER:", 1)[1].strip()
if text:
self.last_user_text = text
self.user_transcript.append(text)
self._set_state("hearing", f"User: {text}")
elif " BOT: " in line or line.strip().startswith("BOT:"):
self._set_state("speaking", line.split("BOT:", 1)[1].strip()[:80])
elif "BARGE-IN (local)" in line:
self._set_state("interrupting", line)
elif "session error" in line:
self._set_state("error", line)
elif "local pipeline stopped" in line or "cancelled — stopping" in line:
self._set_state("stopped", line)
elif "listening" in line.lower() and "no speech" not in line:
self._set_state("listening", "Listening for speech.")
def _reader_loop(self):
proc = self.process
if proc is None or proc.stdout is None:
return
fh = self._open_session_log(proc.pid)
self._log_file = fh
for line in proc.stdout:
clean = line.rstrip()
if not clean:
continue
if fh is not None:
try:
fh.write(clean + "\n")
except Exception:
pass
with self._lock:
if self._is_noisy(clean):
self.suppressed_noise += 1
continue
self.log_tail.append(clean)
self._track_line(clean)
with self._lock:
self.log_tail.append("Local pipeline process exited.")
self._set_state("stopped", "Process exited.")
if fh is not None:
try:
fh.write(
f"===== local subprocess exit "
f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} =====\n"
)
fh.close()
except Exception:
pass
self._log_file = None
# ─── lifecycle ────────────────────────────────────────
def is_running(self) -> bool:
with self._lock:
return self.process is not None and self.process.poll() is None
def start(self) -> dict[str, Any]:
with self._lock:
if self.process is not None and self.process.poll() is None:
return {"started": False, "message": "Already running.", "pid": self.process.pid}
self._set_state("starting", "Starting local pipeline (loading models)...")
script = LIVE_SCRIPT
if not script.exists():
raise RuntimeError(f"Script not found: {script}")
env = os.environ.copy()
env.update({
"PYTHONUNBUFFERED": "1",
**LIVE_TUNE,
"SANAD_VOICE_BRAIN": "local",
})
dds_iface = env.get("SANAD_DDS_INTERFACE", "eth0")
# Use the `local` env's Python so CUDA torch + CosyVoice are available.
# Fall back to sys.executable only if the configured bin doesn't exist.
py_bin = LOCAL_PYTHON_BIN
if not Path(py_bin).exists():
log.warning("LOCAL_PYTHON_BIN=%s not found, falling back to %s",
py_bin, sys.executable)
py_bin = sys.executable
cmd = [py_bin, str(script), dds_iface]
proc = subprocess.Popen(
cmd,
cwd=str(script.parent),
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
bufsize=1,
env=env,
)
with self._lock:
self.process = proc
self.log_tail.append(f"Started: pid={proc.pid}")
self._set_state("starting", f"pid={proc.pid}")
self._reader_thread = threading.Thread(target=self._reader_loop, daemon=True)
self._reader_thread.start()
log.info("Local subprocess started: pid=%d", proc.pid)
return {"started": True, "pid": proc.pid}
def stop(self) -> dict[str, Any]:
with self._lock:
proc = self.process
if proc is None or proc.poll() is not None:
return {"stopped": False, "message": "Not running."}
self._set_state("stopping", "Stopping...")
try:
proc.send_signal(signal.SIGINT)
proc.wait(timeout=_STOP_TIMEOUT_SEC)
except subprocess.TimeoutExpired:
proc.terminate()
try:
proc.wait(timeout=_TERMINATE_TIMEOUT_SEC)
except subprocess.TimeoutExpired:
proc.kill()
proc.wait(timeout=_TERMINATE_TIMEOUT_SEC)
rc = proc.returncode
with self._lock:
self.process = None
self.log_tail.append("Stopped.")
self._set_state("stopped", "Stopped.")
log.info("Local subprocess stopped (rc=%s)", rc)
return {"stopped": True, "returncode": rc}
def status(self) -> dict[str, Any]:
with self._lock:
running = self.process is not None and self.process.poll() is None
return {
"running": running,
"pid": self.process.pid if running and self.process else None,
"state": self.state,
"state_message": self.state_message,
"last_user_text": self.last_user_text,
"user_transcript": list(self.user_transcript),
"log_tail": list(self.log_tail),
"suppressed_noise": self.suppressed_noise,
}

126
vendor/Sanad/local/tts.py vendored Normal file
View File

@ -0,0 +1,126 @@
"""CosyVoice2 0.5B streaming TTS — GPU.
Phase 4 of the local pipeline. Holds a 3-second reference WAV in VRAM
and synthesises streaming Arabic/English audio for every text chunk
arriving from the LLM. Emits int16 PCM at the model's native rate
(CosyVoice2 outputs 22 050 Hz we resample to `sample_rate` from
config so the downstream `audio_io.speaker` gets a consistent rate).
Install (on the robot):
cd ~/src
git clone --recursive https://github.com/FunAudioLLM/CosyVoice
cd CosyVoice
pip install -r requirements.txt
pip install -e .
# model + reference voice
huggingface-cli download FunAudioLLM/CosyVoice2-0.5B \\
--local-dir ~/sanad/model/local/CosyVoice2-0.5B
# place a 3-s Khaleeji clip at model/local/khaleeji_reference_3s.wav
# (16 kHz mono int16 WAV)
API note:
CosyVoice2 is evolving. We use the published `inference_zero_shot`
with `stream=True` which yields `{"tts_speech": tensor}` chunks.
If the upstream API renames, adapt in one place `TtsEngine._stream`.
"""
from __future__ import annotations
from pathlib import Path
from typing import AsyncIterator, Iterator, Optional
import numpy as np
from Project.Sanad.config import MODEL_DIR
from Project.Sanad.core.config_loader import section as _cfg_section
from Project.Sanad.core.logger import get_logger
log = get_logger("local_tts")
_CFG = _cfg_section("local", "tts")
MODEL_SUBDIR = _CFG.get("model_subdir", "CosyVoice2-0.5B")
REFERENCE_WAV_SUBDIR = _CFG.get("reference_wav_subdir", "khaleeji_reference_3s.wav")
REFERENCE_PROMPT = _CFG.get("reference_prompt", "")
OUT_RATE = int(_CFG.get("sample_rate", 16000))
QUEUE_MAX = int(_CFG.get("queue_max", 3))
DEVICE = _CFG.get("device", "cuda")
LOCAL_MODEL_DIR = MODEL_DIR / "local" / MODEL_SUBDIR
REFERENCE_WAV_PATH = MODEL_DIR / "local" / REFERENCE_WAV_SUBDIR
def _resample_int16(pcm: np.ndarray, src_rate: int, dst_rate: int) -> np.ndarray:
if src_rate == dst_rate or pcm.size == 0:
return pcm.astype(np.int16, copy=False)
target_len = max(1, int(len(pcm) * dst_rate / src_rate))
return np.interp(
np.linspace(0, len(pcm), target_len, endpoint=False),
np.arange(len(pcm)),
pcm.astype(np.float64),
).astype(np.int16)
class CosyVoiceTTS:
"""Thin async wrapper around CosyVoice2 streaming inference."""
def __init__(self) -> None:
self._model = None
self._ref_speech = None # preloaded reference tensor
self._ref_prompt = REFERENCE_PROMPT
self._model_rate: int = 22050
def start(self) -> None:
try:
from cosyvoice.cli.cosyvoice import CosyVoice2
from cosyvoice.utils.file_utils import load_wav
except ImportError as exc:
raise RuntimeError(
f"CosyVoiceTTS requires the CosyVoice package from source: {exc}"
)
if not LOCAL_MODEL_DIR.exists():
raise RuntimeError(f"CosyVoice2 model not found at {LOCAL_MODEL_DIR}")
if not REFERENCE_WAV_PATH.exists():
raise RuntimeError(
f"Reference voice WAV not found at {REFERENCE_WAV_PATH}"
)
log.info("loading CosyVoice2: %s", LOCAL_MODEL_DIR)
self._model = CosyVoice2(str(LOCAL_MODEL_DIR), load_jit=True, fp16=True)
# model.sample_rate is an instance attr on CosyVoice2
self._model_rate = getattr(self._model, "sample_rate", 22050)
self._ref_speech = load_wav(str(REFERENCE_WAV_PATH), 16000)
log.info("CosyVoiceTTS ready (model_rate=%d)", self._model_rate)
def synthesize_stream(self, text: str) -> Iterator[bytes]:
"""Yield int16 PCM bytes at OUT_RATE, one streaming chunk at a time."""
if self._model is None or self._ref_speech is None:
return
try:
# CosyVoice2 streaming generator. Each step yields a tensor
# of float32 waveform samples at the model's native rate.
for step in self._model.inference_zero_shot(
text,
self._ref_prompt,
self._ref_speech,
stream=True):
wave = step.get("tts_speech")
if wave is None:
continue
# tensor → float32 numpy → int16 at OUT_RATE
arr = wave.cpu().numpy().squeeze()
if arr.size == 0:
continue
pcm_i16 = np.clip(arr * 32767.0, -32768, 32767).astype(np.int16)
if self._model_rate != OUT_RATE:
pcm_i16 = _resample_int16(pcm_i16, self._model_rate, OUT_RATE)
yield pcm_i16.tobytes()
except Exception as exc:
log.warning("TTS synth failed for chunk %r: %s", text[:40], exc)
def stop(self) -> None:
self._model = None
self._ref_speech = None
@property
def output_rate(self) -> int:
return OUT_RATE

150
vendor/Sanad/local/vad.py vendored Normal file
View File

@ -0,0 +1,150 @@
"""Silero VAD wrapper — CPU-only speech boundary detection.
Phase 1 of the local pipeline. Consumes 16 kHz mono int16 PCM in short
frames, emits speech_start / speech_end events. All thresholds + frame
sizes come from config/local_config.json > vad.
Install (on the robot):
pip install silero-vad torch==2.2.* torchaudio==2.2.*
Usage:
vad = SileroVAD()
vad.start()
evt = vad.process(pcm_bytes)
if evt == 'speech_start': ...
elif evt == 'speech_end': buf = vad.collected_audio()
"""
from __future__ import annotations
import time
from typing import Optional
import numpy as np
from Project.Sanad.core.config_loader import section as _cfg_section
from Project.Sanad.core.logger import get_logger
log = get_logger("local_vad")
_CFG = _cfg_section("local", "vad")
SAMPLE_RATE = _CFG.get("sample_rate", 16000)
FRAME_MS = _CFG.get("frame_ms", 32)
THRESHOLD = _CFG.get("threshold", 0.55)
MIN_SILENCE_MS = _CFG.get("min_silence_ms", 400)
MIN_SPEECH_MS = _CFG.get("min_speech_ms", 250)
PAD_START_MS = _CFG.get("pad_start_ms", 200)
PAD_END_MS = _CFG.get("pad_end_ms", 200)
FRAME_SAMPLES = SAMPLE_RATE * FRAME_MS // 1000 # 512 @ 16k/32ms
class SileroVAD:
"""Streaming VAD with buffered utterance capture.
Fed one mic frame at a time via `process()`. Internal state tracks
whether we're inside an utterance; on speech_end, `collected_audio()`
returns the full utterance (with configured padding).
"""
def __init__(self) -> None:
self._model = None
self._audio_buf: list[bytes] = [] # utterance being collected
self._pre_buf: list[bytes] = [] # rolling "pre-speech" ring
self._pre_frames = max(1, PAD_START_MS // FRAME_MS)
self._pad_end_frames = max(1, PAD_END_MS // FRAME_MS)
self._in_speech = False
self._last_speech_time = 0.0
self._speech_start_time = 0.0
self._trailing_silence_frames = 0
self._last_utterance: Optional[bytes] = None
def start(self) -> None:
"""Load the Silero model once. Call before `process()`."""
try:
import torch
from silero_vad import load_silero_vad
except ImportError as exc:
raise RuntimeError(
f"SileroVAD requires 'silero-vad' + torch: {exc}"
)
self._model = load_silero_vad()
log.info("SileroVAD ready (threshold=%.2f, frame=%dms)",
THRESHOLD, FRAME_MS)
def process(self, pcm: bytes) -> Optional[str]:
"""Feed one frame (≈ FRAME_MS of audio). Returns an event or None.
Events: 'speech_start' | 'speech_end' | None
"""
if self._model is None:
return None
# keep a rolling pre-buffer so captured utterances include lead-in
self._pre_buf.append(pcm)
if len(self._pre_buf) > self._pre_frames:
self._pre_buf.pop(0)
# VAD expects float32 in [-1, 1]
arr = np.frombuffer(pcm, dtype=np.int16).astype(np.float32) / 32768.0
if arr.size < FRAME_SAMPLES:
# pad if short tail chunk arrived
arr = np.concatenate([arr, np.zeros(FRAME_SAMPLES - arr.size, dtype=np.float32)])
elif arr.size > FRAME_SAMPLES:
arr = arr[:FRAME_SAMPLES]
try:
import torch
with torch.no_grad():
prob = float(self._model(torch.from_numpy(arr), SAMPLE_RATE).item())
except Exception as exc:
log.warning("VAD inference failed: %s", exc)
return None
now = time.time()
is_speech = prob >= THRESHOLD
if is_speech:
self._trailing_silence_frames = 0
self._last_speech_time = now
if not self._in_speech:
# transition → speech
self._in_speech = True
self._speech_start_time = now
self._audio_buf = list(self._pre_buf) # seed with pad
self._audio_buf.append(pcm)
return "speech_start"
self._audio_buf.append(pcm)
return None
# silent frame
if self._in_speech:
self._audio_buf.append(pcm) # collect trailing pad
self._trailing_silence_frames += 1
silence_ms = self._trailing_silence_frames * FRAME_MS
if silence_ms >= MIN_SILENCE_MS:
# speech ended — validate min_speech
speech_dur_ms = (now - self._speech_start_time) * 1000
self._in_speech = False
if speech_dur_ms < MIN_SPEECH_MS:
log.debug("drop short utterance (%.0fms)", speech_dur_ms)
self._audio_buf.clear()
self._last_utterance = None
return None
self._last_utterance = b"".join(self._audio_buf)
self._audio_buf.clear()
return "speech_end"
return None
def collected_audio(self) -> Optional[bytes]:
"""After a speech_end event, return the full utterance bytes."""
return self._last_utterance
def reset(self) -> None:
"""Drop any in-flight utterance (used on barge-in)."""
self._in_speech = False
self._audio_buf.clear()
self._trailing_silence_frames = 0
self._last_utterance = None
def stop(self) -> None:
self._model = None

545
vendor/Sanad/main.py vendored Normal file
View File

@ -0,0 +1,545 @@
#!/usr/bin/env python3
"""Sanad — unified robot assistant entry point.
Starts all subsystems and the FastAPI dashboard.
python main.py # default port 8000
python main.py --port 8080 # custom port
"""
from __future__ import annotations
import argparse
import importlib
import os
import sys
import types
from pathlib import Path
# ─────────────────────────────────────────────────────────────────────────────
# Layout detection — support BOTH:
# 1. Dev layout: <root>/Project/Sanad/main.py (imports use Project.Sanad.*)
# 2. Deployed layout: /home/unitree/Sanad/main.py (no Project/ wrapper)
#
# In the deployed case we synthesize a `Project` namespace package and alias
# `Project.Sanad` → the local `Sanad` package, so every `from Project.Sanad.X
# import Y` keeps working without rewriting any other file.
# ─────────────────────────────────────────────────────────────────────────────
_THIS_DIR = Path(__file__).resolve().parent # .../Sanad
_PARENT = _THIS_DIR.parent # .../Project OR /home/unitree
if _PARENT.name == "Project":
# Dev layout — add the directory containing Project/
_ROOT = _PARENT.parent
if str(_ROOT) not in sys.path:
sys.path.insert(0, str(_ROOT))
else:
# Deployed layout — create a virtual Project package and alias
if str(_PARENT) not in sys.path:
sys.path.insert(0, str(_PARENT))
if "Project" not in sys.modules:
_proj = types.ModuleType("Project")
_proj.__path__ = [] # mark as namespace package
sys.modules["Project"] = _proj
if "Project.Sanad" not in sys.modules:
# Import the local Sanad package as a top-level module first
_sanad = importlib.import_module(_THIS_DIR.name)
sys.modules["Project.Sanad"] = _sanad
sys.modules["Project"].Sanad = _sanad # type: ignore[attr-defined]
# When main.py runs as a script (`python3 main.py`), Python loads it as the
# `__main__` module — NOT as `Project.Sanad.main`. Route handlers later do
# `from Project.Sanad.main import arm` etc; without the alias below, Python
# would re-execute this file from scratch under a different module name,
# creating a SECOND set of subsystem instances (uninitialised). Every
# `subsystem not available` / `No LowState` symptom traces back to this.
# The alias ensures both names point at the exact same module object.
if __name__ == "__main__":
sys.modules["Project.Sanad.main"] = sys.modules["__main__"]
# asyncio compat shim — backfills asyncio.to_thread for Python 3.8.
# MUST be imported before any other Sanad module that uses asyncio.to_thread.
from Project.Sanad.core import asyncio_compat # noqa: F401
from Project.Sanad.config import (
DASHBOARD_HOST,
DASHBOARD_PORT,
DASHBOARD_INTERFACE,
DDS_NETWORK_INTERFACE,
)
from Project.Sanad.core.logger import get_logger
log = get_logger("main")
def _safe_import(label: str, importer):
"""Import a module by callable, returning None if it fails."""
try:
return importer()
except Exception:
log.exception("Failed to import %s — that subsystem will be unavailable", label)
return None
def _safe_construct(name: str, factory):
"""Construct a subsystem, log + return None on failure."""
if factory is None:
return None
try:
return factory()
except Exception:
log.exception("Failed to construct %s — that subsystem will be unavailable", name)
return None
# ── isolated imports — one bad module never blocks the others ──
Brain = _safe_import("Brain", lambda: __import__("Project.Sanad.core.brain", fromlist=["Brain"]).Brain)
ArmController = _safe_import("ArmController", lambda: __import__("Project.Sanad.motion.arm_controller", fromlist=["ArmController"]).ArmController)
MacroPlayer = _safe_import("MacroPlayer", lambda: __import__("Project.Sanad.motion.macro_player", fromlist=["MacroPlayer"]).MacroPlayer)
MacroRecorder = _safe_import("MacroRecorder", lambda: __import__("Project.Sanad.motion.macro_recorder", fromlist=["MacroRecorder"]).MacroRecorder)
TeachingSession = _safe_import("TeachingSession", lambda: __import__("Project.Sanad.motion.teaching", fromlist=["TeachingSession"]).TeachingSession)
AudioManager = _safe_import("AudioManager", lambda: __import__("Project.Sanad.voice.audio_manager", fromlist=["AudioManager"]).AudioManager)
LocalTTSEngine = _safe_import("LocalTTSEngine", lambda: __import__("Project.Sanad.voice.local_tts", fromlist=["LocalTTSEngine"]).LocalTTSEngine)
WakePhraseManager = _safe_import("WakePhraseManager", lambda: __import__("Project.Sanad.voice.wake_phrase_manager", fromlist=["WakePhraseManager"]).WakePhraseManager)
LiveVoiceLoop = _safe_import("LiveVoiceLoop", lambda: __import__("Project.Sanad.voice.live_voice_loop", fromlist=["LiveVoiceLoop"]).LiveVoiceLoop)
TypedReplayEngine = _safe_import("TypedReplayEngine", lambda: __import__("Project.Sanad.voice.typed_replay", fromlist=["TypedReplayEngine"]).TypedReplayEngine)
GeminiVoiceClient = _safe_import("GeminiVoiceClient", lambda: __import__("Project.Sanad.gemini.client", fromlist=["GeminiVoiceClient"]).GeminiVoiceClient)
GeminiSubprocess = _safe_import("GeminiSubprocess", lambda: __import__("Project.Sanad.gemini.subprocess", fromlist=["GeminiSubprocess"]).GeminiSubprocess)
LocalSubprocess = _safe_import("LocalSubprocess", lambda: __import__("Project.Sanad.local.subprocess", fromlist=["LocalSubprocess"]).LocalSubprocess)
CameraDaemon = _safe_import("CameraDaemon", lambda: __import__("Project.Sanad.vision.camera", fromlist=["CameraDaemon"]).CameraDaemon)
FaceGallery = _safe_import("FaceGallery", lambda: __import__("Project.Sanad.vision.face_gallery", fromlist=["FaceGallery"]).FaceGallery)
ZoneGallery = _safe_import("ZoneGallery", lambda: __import__("Project.Sanad.vision.zone_gallery", fromlist=["ZoneGallery"]).ZoneGallery)
LocoController = _safe_import("LocoController", lambda: __import__("Project.Sanad.G1_Controller.loco_controller", fromlist=["LocoController"]).LocoController)
MovementDispatcher = _safe_import("MovementDispatcher", lambda: __import__("Project.Sanad.voice.movement_dispatch", fromlist=["MovementDispatcher"]).MovementDispatcher)
# ── global instances (imported by route modules) ──
brain = _safe_construct("brain", Brain) if Brain else None
arm = _safe_construct("arm", ArmController)
audio_mgr = _safe_construct("audio_mgr", AudioManager)
voice_client = _safe_construct("voice_client", GeminiVoiceClient)
local_tts = _safe_construct("local_tts", LocalTTSEngine)
wake_mgr = _safe_construct("wake_mgr", WakePhraseManager)
macro_rec = _safe_construct("macro_rec", (lambda: MacroRecorder(arm)) if (MacroRecorder and arm) else None)
macro_play = _safe_construct("macro_play", (lambda: MacroPlayer(audio_mgr, arm)) if (MacroPlayer and arm) else None)
teacher = _safe_construct("teacher", (lambda: TeachingSession(arm)) if (TeachingSession and arm) else None)
live_voice = _safe_construct("live_voice", (lambda: LiveVoiceLoop(voice_client, arm, wake_mgr, audio_mgr)) if (LiveVoiceLoop and voice_client and arm and wake_mgr and audio_mgr) else None)
# Which voice supervisor to mount. SANAD_VOICE_BRAIN chooses the brain
# that runs INSIDE the subprocess (see voice/sanad_voice.py); the same
# env var picks WHICH supervisor here manages that subprocess so its
# log-line parser matches the brain's emit format.
_brain_choice = os.environ.get("SANAD_VOICE_BRAIN", "gemini").strip().lower()
if _brain_choice == "local" and LocalSubprocess is not None:
live_sub = _safe_construct("live_sub", LocalSubprocess)
else:
live_sub = _safe_construct("live_sub", GeminiSubprocess)
typed_replay = _safe_construct("typed_replay", (lambda: TypedReplayEngine(voice_client, audio_mgr)) if (TypedReplayEngine and voice_client and audio_mgr) else None)
# ── Locomotion controller (N2) — manual dashboard locomotion ────────────────
# Reuses the arm controller's single ChannelFactoryInitialize (one DDS init per
# process) — it does NOT init DDS itself. Disarmed every boot. See
# G1_Controller/loco_controller.py and dashboard/routes/controller.py.
loco_controller = _safe_construct(
"loco_controller",
(lambda: LocoController(arm)) if (LocoController and arm) else None)
# Arm ⇄ locomotion mutual exclusion: the arm must NEVER run a replay / SDK
# action / gesture while the robot may be walking. `movement_active` is True for
# the MANUAL gate (armed/teleop) AND for ~1.5s after any move/step — so it also
# covers Phase-3 Gemini-driven moves (which call loco.move/step directly).
# Checked at every arm playback chokepoint (replay_file / _execute), so it blocks
# voice/Gemini-triggered gestures too, not just the dashboard.
if arm is not None and loco_controller is not None:
try:
if hasattr(arm, "set_motion_block"):
arm.set_motion_block(loco_controller.movement_active)
log.info("Arm motion-block wired to locomotion movement_active")
except Exception:
log.exception("Could not wire arm motion-block")
# ── Gemini voice → movement dispatcher (N2 Phase 3) ─────────────────────────
# Reads Gemini's spoken (BOT) transcript via the live supervisor's bot-callback
# and drives loco_controller on a confirmation-phrase match (Marcus pattern).
# Gated on recognition_state.movement_enabled (the "Enable Gemini movement"
# toggle) — SEPARATE from the manual arm flag. Inert until that flag is on.
movement_dispatch = None
if MovementDispatcher and loco_controller is not None:
try:
from Project.Sanad.config import BASE_DIR as _BD2, MOTIONS_DIR as _MD
movement_dispatch = _safe_construct(
"movement_dispatch",
lambda: MovementDispatcher(
loco_controller,
_MD / "instruction.json",
_BD2 / "data" / ".recognition_state.json"))
if movement_dispatch is not None:
movement_dispatch.start()
if live_sub is not None and hasattr(live_sub, "register_bot_callback"):
live_sub.register_bot_callback(movement_dispatch.on_bot_text)
log.info("Movement dispatcher wired to Gemini BOT transcript")
except Exception:
log.exception("Could not wire movement dispatcher")
# ── Recognition (camera + face gallery) ─────────────────────────────────────
# Camera is idle until the dashboard toggles vision on; face gallery is pure
# file IO and always available if the import succeeded.
#
# Config precedence (highest first): explicit env var → config/core_config.json
# section → hardcoded default. The parent process normally has no SANAD_CAMERA_*
# env vars (LIVE_TUNE is only forwarded to the Gemini child), so in practice the
# core_config.json `camera` / `faces` sections are the live source here.
def _build_camera():
from Project.Sanad.core.config_loader import section as _cfg_section
cam_cfg = _cfg_section("core", "camera")
def _knob(env_key: str, cfg_key: str, default):
env_val = os.environ.get(env_key)
if env_val is not None and env_val != "":
return type(default)(env_val)
return type(default)(cam_cfg.get(cfg_key, default))
# Frames are cached in memory and pushed to the Gemini child over its
# stdin (see GeminiSubprocess._frame_forwarder) — no file drop.
return CameraDaemon(
width=_knob("SANAD_CAMERA_WIDTH", "width", 424),
height=_knob("SANAD_CAMERA_HEIGHT", "height", 240),
fps=_knob("SANAD_CAMERA_FPS", "fps", 15),
jpeg_quality=_knob("SANAD_CAMERA_JPEG_QUALITY", "jpeg_quality", 70),
stale_threshold_s=float(cam_cfg.get("stale_threshold_s", 10.0)),
reconnect_min_s=float(cam_cfg.get("reconnect_min_s", 2.0)),
reconnect_max_s=float(cam_cfg.get("reconnect_max_s", 10.0)),
capture_timeout_ms=int(cam_cfg.get("capture_timeout_ms", 5000)),
)
def _build_gallery():
from Project.Sanad.config import BASE_DIR
from Project.Sanad.core.config_loader import section as _cfg_section
faces_cfg = _cfg_section("core", "faces")
# SANAD_FACES_DIR is set absolute by LIVE_TUNE (the Gemini child reads the
# same var). In the parent it's usually unset → fall back to the JSON's
# dir_rel, then the hardcoded default. Honour absolute paths as-is.
raw = os.environ.get("SANAD_FACES_DIR") or faces_cfg.get("dir_rel", "data/faces")
p = Path(raw)
root = p if p.is_absolute() else (BASE_DIR / raw)
return FaceGallery(root)
def _build_zone_gallery():
# N3 — zones gallery (zone → place → linked faces). Honours SANAD_ZONES_DIR
# (absolute) then the core_config 'zones' section dir_rel, then a default.
from Project.Sanad.config import BASE_DIR
from Project.Sanad.core.config_loader import section as _cfg_section
zones_cfg = _cfg_section("core", "zones")
raw = os.environ.get("SANAD_ZONES_DIR") or zones_cfg.get("dir_rel", "data/zones")
p = Path(raw)
root = p if p.is_absolute() else (BASE_DIR / raw)
return ZoneGallery(root)
camera = _safe_construct("camera", _build_camera if CameraDaemon else None)
gallery = _safe_construct("gallery", _build_gallery if FaceGallery else None)
zone_gallery = _safe_construct("zone_gallery", _build_zone_gallery if ZoneGallery else None)
# Restore persisted vision_enabled at boot — start camera if the user left
# it on across a reboot. Face-rec state is read by the Gemini child directly.
try:
from Project.Sanad.vision import recognition_state as _recog_state
from Project.Sanad.config import BASE_DIR as _BD
_state = _recog_state.read(_BD / "data" / ".recognition_state.json")
if _state.vision_enabled and camera is not None:
if camera.start():
log.info("Camera vision restored from state (backend=%s)", camera.backend)
else:
log.warning("Camera vision was ON but no backend available — leaving OFF")
_recog_state.mutate(_BD / "data" / ".recognition_state.json",
vision_enabled=False)
except Exception:
log.exception("Could not restore recognition state")
# Hand the camera to the Gemini supervisor so it can forward frames to the
# child over stdin while a live session runs.
if live_sub is not None and camera is not None:
try:
if hasattr(live_sub, "attach_camera"):
live_sub.attach_camera(camera)
log.info("Camera attached to live subprocess supervisor")
except Exception:
log.exception("attach_camera failed")
# Hand the AudioManager to the supervisor so the audio watcher can keep
# PulseAudio defaults aligned with the live profile on every Anker
# plug/unplug. Without this, typed-replay / record playback would stay on
# the boot device even after the live session swapped to Anker.
if live_sub is not None and audio_mgr is not None:
try:
if hasattr(live_sub, "attach_audio_manager"):
live_sub.attach_audio_manager(audio_mgr)
log.info("AudioManager attached to live subprocess supervisor")
except Exception:
log.exception("attach_audio_manager failed")
# ── Motion-state → Gemini channel ───────────────────────────────────────────
# The arm controller emits motion.action_started / _done / _error on the bus.
# Forward each to the Gemini child as a 'state:' line so the live session can
# answer "what are you doing?" honestly. Sync handlers, fired via emit_sync
# from the arm's worker thread — send_state just writes to a pipe (cheap).
if live_sub is not None and hasattr(live_sub, "send_state"):
try:
from Project.Sanad.core.event_bus import bus as _bus
def _on_motion_started(action: str = "", **_kw):
live_sub.send_state("start", action)
def _on_motion_done(action: str = "", elapsed_sec=None,
failed: bool = False, **_kw):
# action_error already covered the failure case with a reason;
# here just emit complete (skip if it failed to avoid a dup).
if not failed:
live_sub.send_state("complete", action, elapsed_sec=elapsed_sec)
def _on_motion_error(action: str = "", reason: str = "", **_kw):
live_sub.send_state("error", action, reason=reason)
_bus.on("motion.action_started", _on_motion_started)
_bus.on("motion.action_done", _on_motion_done)
_bus.on("motion.action_error", _on_motion_error)
log.info("Motion-state → Gemini channel wired")
except Exception:
log.exception("Could not wire motion-state → Gemini channel")
# Wire everything into the Brain (only what was constructed)
def _safe_attach(method_name: str, value):
if brain is None or value is None:
return
method = getattr(brain, method_name, None)
if method is None:
return
try:
method(value)
except Exception:
log.exception("brain.%s failed", method_name)
_safe_attach("attach_voice", voice_client)
_safe_attach("attach_audio_manager", audio_mgr)
_safe_attach("attach_arm", arm)
_safe_attach("attach_macro_recorder", macro_rec)
_safe_attach("attach_macro_player", macro_play)
_safe_attach("attach_live_voice", live_voice)
# ── Runtime sanity report ────────────────────────────────────────────────
SUBSYSTEMS = {
"brain": brain,
"arm": arm,
"audio_mgr": audio_mgr,
"voice_client": voice_client,
"local_tts": local_tts,
"macro_rec": macro_rec,
"macro_play": macro_play,
"teacher": teacher,
"wake_mgr": wake_mgr,
"live_voice": live_voice,
"live_sub": live_sub,
"typed_replay": typed_replay,
"camera": camera,
"gallery": gallery,
"zone_gallery": zone_gallery,
"loco_controller": loco_controller,
"movement_dispatch": movement_dispatch,
}
# Critical subsystems — if any of these are None, log a warning at startup
CRITICAL_SUBSYSTEMS = ("brain",)
for _name in CRITICAL_SUBSYSTEMS:
if SUBSYSTEMS.get(_name) is None:
log.error("CRITICAL subsystem '%s' is None — application will be unusable", _name)
_available = [k for k, v in SUBSYSTEMS.items() if v is not None]
_missing = [k for k, v in SUBSYSTEMS.items() if v is None]
log.info("Subsystems available (%d): %s", len(_available), ", ".join(_available))
if _missing:
log.warning("Subsystems unavailable (%d): %s", len(_missing), ", ".join(_missing))
_already_shut_down = False
def _do_shutdown(from_signal: bool = False):
"""Clean shutdown — release hardware, stop background tasks. Idempotent."""
global _already_shut_down
if _already_shut_down:
return
_already_shut_down = True
log.info("Shutdown requested")
if arm is not None:
try:
if hasattr(arm, "cancel"):
arm.cancel()
except Exception:
log.exception("arm.cancel() failed")
try:
if hasattr(arm, "disable"):
arm.disable()
except Exception:
log.exception("arm.disable() failed")
if movement_dispatch is not None:
try:
movement_dispatch.stop()
except Exception:
log.exception("movement_dispatch.stop() failed")
if loco_controller is not None:
try:
loco_controller.shutdown() # StopMove (no FSM change) + disarm
except Exception:
log.exception("loco_controller.shutdown() failed")
if live_sub is not None:
try:
running = live_sub.is_running() if callable(getattr(live_sub, "is_running", None)) else False
if running:
live_sub.stop()
except Exception:
log.exception("live_sub.stop() failed")
if audio_mgr is not None:
try:
if hasattr(audio_mgr, "close"):
audio_mgr.close()
except Exception:
log.exception("audio_mgr.close() failed")
if camera is not None:
try:
if camera.is_running():
camera.stop()
except Exception:
log.exception("camera.stop() failed")
log.info("Shutdown complete")
import atexit # noqa: E402
atexit.register(_do_shutdown)
# NOTE: Do NOT install custom SIGINT/SIGTERM handlers here.
# Uvicorn installs its own signal handlers for graceful shutdown.
# If we override them, Ctrl+C never reaches uvicorn and the server
# keeps running forever. Our _do_shutdown runs via atexit instead.
def _print_env_diagnostic():
"""Print everything you'd need to debug a deployment issue."""
print("=" * 60)
print("SANAD ENVIRONMENT DIAGNOSTIC")
print("=" * 60)
print(f"Python: {sys.version}")
print(f"Executable: {sys.executable}")
print(f"Platform: {sys.platform}")
print(f"BASE_DIR: {_THIS_DIR}")
print(f"Parent: {_PARENT}")
print(f"Layout: {'dev (Project/Sanad)' if _PARENT.name == 'Project' else 'deployed (top-level Sanad)'}")
print(f"Dashboard: {DASHBOARD_HOST}:{DASHBOARD_PORT} (interface: {DASHBOARD_INTERFACE})")
print(f"DDS interface: {DDS_NETWORK_INTERFACE}")
print()
print("sys.path[0:8]:")
for p in sys.path[:8]:
print(f" {p}")
print()
print("Critical imports:")
for mod_name in ("uvicorn", "fastapi", "pydantic", "starlette",
"websockets", "httpx", "pyaudio", "pyrealsense2",
"unitree_sdk2py", "ultralytics", "numpy", "cv2"):
try:
mod = __import__(mod_name)
ver = getattr(mod, "__version__", "?")
path = getattr(mod, "__file__", "?")
print(f"{mod_name:18s} {ver:12s} {path}")
except BaseException as exc:
print(f"{mod_name:18s} {type(exc).__name__}: {exc}")
print()
print("Subsystems available (after constructing main module globals):")
for name in sorted(SUBSYSTEMS):
print(f" {'' if SUBSYSTEMS[name] is not None else ''} {name}")
print("=" * 60)
def main():
parser = argparse.ArgumentParser(description="Sanad Robot Assistant")
parser.add_argument("--host", default=DASHBOARD_HOST,
help=f"Dashboard bind address. Default is wlan0's IP "
f"({DASHBOARD_HOST!r}). Override with SANAD_DASHBOARD_HOST "
f"or SANAD_DASHBOARD_INTERFACE.")
parser.add_argument("--port", type=int, default=DASHBOARD_PORT)
parser.add_argument("--network", default=DDS_NETWORK_INTERFACE,
help="DDS network interface (e.g. eth0, lo). "
"Override with SANAD_DDS_INTERFACE env var.")
parser.add_argument("--check-env", action="store_true",
help="Print environment diagnostic and exit "
"(no server start, no hardware init)")
args = parser.parse_args()
if args.check_env:
_print_env_diagnostic()
return
log.info("Sanad starting — Python %s @ %s", sys.version.split()[0], sys.executable)
log.info("BASE_DIR: %s", _THIS_DIR)
log.info("Dashboard interface: %s → bound to %s", DASHBOARD_INTERFACE, args.host)
log.info("Starting Sanad — host=%s port=%d network=%s", args.host, args.port, args.network)
if brain is not None:
try:
log.info("Brain status: %s", brain.status())
except Exception:
log.exception("brain.status() failed")
# Initialize hardware (graceful if unavailable)
if arm is not None:
try:
arm.init(network_interface=args.network)
except Exception:
log.exception("arm.init() failed — continuing without hardware")
# ── import uvicorn ──────────────────────────────────────────────────
# Catch ANY exception (not just ImportError) so the real failure reason
# is surfaced. The previous narrow catch hid issues like uvicorn pulling
# in a broken transitive dep, or being installed under a different
# site-packages than the active interpreter.
uvicorn = None
try:
import uvicorn # type: ignore
log.info("uvicorn %s loaded from %s",
getattr(uvicorn, "__version__", "?"),
getattr(uvicorn, "__file__", "?"))
except BaseException as exc:
log.error("Could not import uvicorn: %s: %s", type(exc).__name__, exc)
log.error("Python: %s", sys.executable)
log.error("sys.path[0:5]: %s", sys.path[:5])
log.error("Try: %s -m pip install --user 'uvicorn[standard]' fastapi", sys.executable)
sys.exit(1)
# ── import the FastAPI app ──────────────────────────────────────────
# Pass the app object directly so uvicorn doesn't have to re-resolve the
# import path (which differs between dev and deployed layouts).
try:
from Project.Sanad.dashboard.app import app as _app
except BaseException:
log.exception("Could not import Dashboard.app — aborting")
sys.exit(1)
# ── start the server ────────────────────────────────────────────────
try:
uvicorn.run(
_app,
host=args.host,
port=args.port,
log_level="info",
)
except BaseException:
log.exception("uvicorn.run() failed")
sys.exit(1)
if __name__ == "__main__":
main()

0
vendor/Sanad/motion/__init__.py vendored Normal file
View File

832
vendor/Sanad/motion/arm_controller.py vendored Normal file
View File

@ -0,0 +1,832 @@
"""Robot arm controller — real DDS motor commands and JSONL motion replay.
Production-grade replay engine ported from G1_Lootah/Controller/g1_replay_trigger_r2x.py.
Features: body-lock, ramp-in/out interpolation, watchdog, speed control, CRC.
Falls back gracefully to simulation when the Unitree SDK is unavailable.
"""
from __future__ import annotations
import json
import threading
import time
from dataclasses import dataclass
from pathlib import Path
from typing import Any
from Project.Sanad.config import (
G1_NUM_MOTOR,
KD_HIGH,
KD_LOW,
KD_WRIST,
KP_HIGH,
KP_LOW,
KP_WRIST,
MOTIONS_DIR,
REPLAY_HZ,
WEAK_MOTORS,
WRIST_MOTORS,
)
from Project.Sanad.core.config_loader import section as _cfg_section
from Project.Sanad.core.event_bus import bus
from Project.Sanad.core.logger import get_logger
log = get_logger("arm_controller")
_AC = _cfg_section("motion", "arm_controller")
# G1 hardware constants — single source in config/core_config.json
from Project.Sanad.config import ENABLE_ARM_SDK_INDEX
RAMP_IN_STEPS = _AC.get("ramp_in_steps", 60) # ~1.0s smooth move to start pose
RAMP_OUT_STEPS = _AC.get("ramp_out_steps", 180) # ~3.0s smooth return to home
SETTLE_HOLD_SEC = _AC.get("settle_hold_sec", 0.5) # hold start pose before replay begins
WATCHDOG_TIMEOUT = _AC.get("watchdog_timeout_sec", 0.25) # hold last pose if state stale
WATCHDOG_DISABLE_AFTER = _AC.get("watchdog_disable_after_sec", 1.0) # abort if state stale this long
ARM_INDICES = range(
_AC.get("arm_indices_start", 15),
_AC.get("arm_indices_stop", 29),
)
# N1 — motor temperature sampling. rt/lowstate arrives ~500 Hz; building the
# per-motor temperature snapshot that often is wasteful, so we refresh it at
# most this often. The 3D dashboard polls the snapshot over a WebSocket at a
# similar rate.
_TEMP_SAMPLE_INTERVAL_S = float(_AC.get("temp_sample_interval_sec", 0.1))
# -- SDK import (optional) --
try:
from unitree_sdk2py.core.channel import (
ChannelFactoryInitialize,
ChannelPublisher,
ChannelSubscriber,
)
from unitree_sdk2py.idl.unitree_hg.msg.dds_ import LowCmd_, LowState_
from unitree_sdk2py.utils.crc import CRC
# IDL factory — LowCmd_() with no args fails because the dataclass
# has 5 required positional fields. The SDK ships a default factory
# named `unitree_hg_msg_dds__LowCmd_` that constructs a fully-zeroed
# instance with the right number of motor_cmd entries.
try:
from unitree_sdk2py.idl.default import unitree_hg_msg_dds__LowCmd_
_make_low_cmd = unitree_hg_msg_dds__LowCmd_
except ImportError:
_make_low_cmd = None
_HAS_SDK = True
except ImportError:
_HAS_SDK = False
_make_low_cmd = None
log.warning("Unitree SDK not available — running in simulation mode")
# G1 arm-action client for built-in arm moves (wave, shake_hand, hug, …).
# NOTE: do NOT use LocoClient here — LocoClient is the locomotion/body-move
# client and its ExecuteAction() doesn't recognise arm-action IDs, so arm
# commands become silent no-ops. The correct client is the arm-specific
# G1ArmActionClient with the SDK's action_map (name → opcode lookup).
try:
from unitree_sdk2py.g1.arm.g1_arm_action_client import (
G1ArmActionClient,
action_map as _ARM_ACTION_MAP,
)
_HAS_ARM_CLIENT = True
except ImportError:
G1ArmActionClient = None
_ARM_ACTION_MAP = {}
_HAS_ARM_CLIENT = False
@dataclass
class Action:
name: str
id: int
file: str = "" # JSONL filename (empty = SDK built-in)
category: str = "sdk" # "sdk" | "jsonl"
# -- SDK actions (fixed — built into Unitree firmware) --
SDK_ACTIONS: list[Action] = [
Action("release_arm", 0, category="sdk"),
Action("shake_hand", 1, category="sdk"),
Action("high_five", 2, category="sdk"),
Action("hug", 3, category="sdk"),
Action("high_wave", 4, category="sdk"),
Action("clap", 5, category="sdk"),
Action("face_wave", 6, category="sdk"),
Action("left_kiss", 7, category="sdk"),
Action("heart", 8, category="sdk"),
Action("right_heart", 9, category="sdk"),
Action("hands_up", 10, category="sdk"),
Action("x_ray", 11, category="sdk"),
Action("right_hand_up", 12, category="sdk"),
Action("reject", 13, category="sdk"),
Action("right_kiss", 14, category="sdk"),
Action("two_hand_kiss", 15, category="sdk"),
]
# Next auto-ID for JSONL actions starts after SDK range.
_JSONL_ID_START = _AC.get("jsonl_id_start", 100)
def _scan_jsonl_actions() -> list[Action]:
"""Auto-discover all .jsonl files in data/motions/ and create actions.
Called at startup and whenever the dashboard requests a refresh.
The action name is derived from the filename (without extension),
with underscores replacing hyphens/spaces.
"""
MOTIONS_DIR.mkdir(parents=True, exist_ok=True)
actions = []
for idx, path in enumerate(sorted(MOTIONS_DIR.glob("*.jsonl"))):
name = path.stem.replace("-", "_").replace(" ", "_")
actions.append(Action(
name=name,
id=_JSONL_ID_START + idx,
file=path.name,
category="jsonl",
))
return actions
def rebuild_action_registry() -> tuple[list[Action], dict[int, Action], dict[str, Action]]:
"""Rebuild the full action list from SDK + disk scan. Called on startup and refresh."""
jsonl_actions = _scan_jsonl_actions()
all_actions = list(SDK_ACTIONS) + jsonl_actions
by_id = {a.id: a for a in all_actions}
by_name = {a.name: a for a in all_actions}
log.info("Action registry: %d SDK + %d JSONL = %d total",
len(SDK_ACTIONS), len(jsonl_actions), len(all_actions))
return all_actions, by_id, by_name
# Initial build
ACTIONS, ACTION_BY_ID, ACTION_BY_NAME = rebuild_action_registry()
def _lerp_q(q_start: list[float], q_end: list[float], t: float) -> list[float]:
"""Linear interpolation between two joint-position vectors, t in [0,1]."""
return [a + (b - a) * t for a, b in zip(q_start, q_end)]
def _load_frames(path: Path) -> list[dict[str, Any]]:
"""Read JSONL file, return list of frames with 't' and 'q' keys."""
frames = []
with open(path, "r") as f:
for line in f:
line = line.strip()
if not line:
continue
data = json.loads(line)
if "q" in data:
frames.append(data)
return frames
def _load_home_q(home_file: str = "arm_home.jsonl") -> list[float] | None:
path = MOTIONS_DIR / home_file
if not path.exists():
return None
frames = _load_frames(path)
return frames[0]["q"] if frames else None
class ArmController:
"""Thread-safe arm controller with real DDS replay and simulation fallback."""
def __init__(self):
self._lock = threading.Lock()
self._cancel = threading.Event()
self._is_busy = False
self._last_action_time = 0.0
self.cooldown_sec = 1.0
self._initialized = False
# N2 — arm ⇄ locomotion mutual exclusion. While locomotion movement is
# armed, the arm must NOT run any replay / SDK action / gesture. main.py
# wires this to LocoController.is_armed via set_motion_block(); checked at
# every playback chokepoint (replay_file / _execute), so it also blocks
# voice/Gemini-triggered gestures, not just the dashboard.
self._motion_block = None
# DDS handles (set in init())
self._arm_pub = None
self._state_sub = None
self._low_cmd = None
self._crc = None
self._arm_client = None
# Live state from LowState_ subscriber
self._current_q: list[float] = [0.0] * G1_NUM_MOTOR
self._last_state_time = 0.0
self._state_lock = threading.Lock()
self._first_state_event = threading.Event()
# N1 — latest per-motor temperatures (throttled snapshot for the 3D
# temperature dashboard). Each entry: {motor_id, surface, winding}.
self._current_temps: list[dict[str, Any]] = []
self._last_temp_time = 0.0
# Cached motion file metadata
self._motion_files_cache: dict[str, dict[str, Any]] = {}
# -- initialization --
def init(self, network_interface: str = "lo") -> bool:
if self._initialized:
return True
if not _HAS_SDK:
log.info("Simulation mode — DDS init skipped")
return False
try:
ChannelFactoryInitialize(0, network_interface)
self._arm_pub = ChannelPublisher("rt/arm_sdk", LowCmd_)
self._arm_pub.Init()
self._state_sub = ChannelSubscriber("rt/lowstate", LowState_)
self._state_sub.Init(self._on_low_state, 10)
# IDL types need the SDK's default factory — bare LowCmd_() fails
# because the dataclass has 5 required positional fields.
if _make_low_cmd is not None:
self._low_cmd = _make_low_cmd()
else:
# Last-resort: try a few constructor signatures
try:
self._low_cmd = LowCmd_()
except TypeError:
# Build with explicit zeroed fields
from unitree_sdk2py.idl.unitree_hg.msg.dds_ import MotorCmd_
try:
from unitree_sdk2py.idl.default import (
unitree_hg_msg_dds__MotorCmd_ as _make_motor_cmd,
)
except ImportError:
_make_motor_cmd = lambda: MotorCmd_(
mode=0, q=0.0, dq=0.0, tau=0.0, kp=0.0, kd=0.0, reserve=0,
)
self._low_cmd = LowCmd_(
mode_pr=0,
mode_machine=0,
motor_cmd=[_make_motor_cmd() for _ in range(35)],
reserve=[0, 0, 0, 0],
crc=0,
)
self._crc = CRC()
# Arm-specific action client for built-in moves
if _HAS_ARM_CLIENT:
try:
self._arm_client = G1ArmActionClient()
self._arm_client.SetTimeout(10.0)
self._arm_client.Init()
log.info("G1ArmActionClient initialized (%d actions) — built-in moves available",
len(_ARM_ACTION_MAP))
except Exception as exc:
log.warning("G1ArmActionClient init failed: %s — built-in actions disabled", exc)
self._arm_client = None
self._initialized = True
log.info("DDS initialized on %s", network_interface)
except Exception as exc:
log.error("DDS init failed: %s", exc)
return self._initialized
def _on_low_state(self, msg):
"""Callback from DDS subscriber — updates current joint positions.
Also refreshes the per-motor temperature snapshot (N1) at most every
_TEMP_SAMPLE_INTERVAL_S so the 3D temperature dashboard has live data
without a second DDS subscriber.
"""
now = time.monotonic()
with self._state_lock:
self._current_q = [float(msg.motor_state[i].q) for i in range(G1_NUM_MOTOR)]
self._last_state_time = now
if not self._first_state_event.is_set():
self._first_state_event.set()
# Throttled temperature capture (separate from q, which we keep at the
# full callback rate for the replay watchdog).
if (now - self._last_temp_time) >= _TEMP_SAMPLE_INTERVAL_S:
temps = []
for i in range(G1_NUM_MOTOR):
ms = msg.motor_state[i]
surface = winding = None
t = getattr(ms, "temperature", None)
try:
# G1 firmware reports [surface, winding]; some builds give
# a single value or a scalar — handle all three defensively
# (matches Marcus/Features/TempMonitor low_state_callback).
if t is not None and hasattr(t, "__len__"):
if len(t) >= 2:
surface, winding = int(t[0]), int(t[1])
elif len(t) == 1:
surface = winding = int(t[0])
elif t is not None:
surface = winding = int(t)
except (TypeError, ValueError):
surface = winding = None
temps.append({"motor_id": i, "surface": surface, "winding": winding})
with self._state_lock:
self._current_temps = temps
self._last_temp_time = now
def wait_for_state(self, timeout: float = 2.0) -> bool:
"""Block until first LowState_ callback fires (or timeout). Returns True if state received."""
return self._first_state_event.wait(timeout=timeout)
# -- internal API exposed for teaching/macro_player (encapsulation boundary) --
def get_current_q(self) -> list[float]:
"""Public read of current joint positions."""
return self._get_current_q()
def get_motor_temps(self) -> list[dict[str, Any]]:
"""Public read of the latest per-motor temperature snapshot (N1).
Returns a list of {motor_id, surface, winding} (values may be None if
the firmware didn't report a temperature). Empty until the first
LowState_ callback fires.
"""
with self._state_lock:
return list(self._current_temps)
def send_frame(self, arm_target_q: list[float], body_lock_q: list[float]):
"""Public single-frame send. Use only inside a controlled playback loop."""
self._send_frame(arm_target_q, body_lock_q)
def disable(self):
"""Public disable — releases arm SDK control."""
self._disable_sdk()
def state_age(self) -> float:
"""Seconds since last LowState_ callback."""
return self._state_age()
def _get_current_q(self) -> list[float]:
with self._state_lock:
return list(self._current_q)
def _state_age(self) -> float:
with self._state_lock:
return time.monotonic() - self._last_state_time if self._last_state_time else 999.0
# -- frame sending (real DDS with CRC) --
def _send_frame(self, arm_target_q: list[float], body_lock_q: list[float]):
"""Send one motor frame via DDS. Body stays locked, arms get target."""
if not self._initialized or self._low_cmd is None:
return
# Enable ARM_SDK
self._low_cmd.motor_cmd[ENABLE_ARM_SDK_INDEX].q = 1.0
for i in range(G1_NUM_MOTOR):
self._low_cmd.motor_cmd[i].mode = 1
self._low_cmd.motor_cmd[i].dq = 0.0
self._low_cmd.motor_cmd[i].tau = 0.0
# Arms get replay data, body stays locked
if i >= 15:
self._low_cmd.motor_cmd[i].q = arm_target_q[i]
else:
self._low_cmd.motor_cmd[i].q = body_lock_q[i]
# Per-motor gains
if i in WEAK_MOTORS:
kp, kd = KP_LOW, KD_LOW
elif i in WRIST_MOTORS:
kp, kd = KP_WRIST, KD_WRIST
else:
kp, kd = KP_HIGH, KD_HIGH
self._low_cmd.motor_cmd[i].kp = kp
self._low_cmd.motor_cmd[i].kd = kd
self._low_cmd.crc = self._crc.Crc(self._low_cmd)
self._arm_pub.Write(self._low_cmd)
def _disable_sdk(self):
"""Send 10 disable frames at 50 Hz — direct port of
g1_replay_v4_stable.py:DisableSDK (lines 141-147)."""
if not self._initialized or self._low_cmd is None:
return
self._low_cmd.motor_cmd[ENABLE_ARM_SDK_INDEX].q = 0.0
self._low_cmd.crc = self._crc.Crc(self._low_cmd)
for _ in range(10):
self._arm_pub.Write(self._low_cmd)
time.sleep(0.02)
# -- replay engine --
def replay_file(self, filepath: str, speed: float = 1.0):
"""Play a JSONL motion file with full production replay logic.
Args:
filepath: Path to .jsonl motion file.
speed: Playback speed multiplier (1.0 = normal).
"""
if self._blocked():
log.warning("replay_file refused — locomotion movement is armed")
return
with self._lock:
if self._is_busy:
log.warning("replay_file: arm busy, skipping")
return
self._is_busy = True
self._cancel.clear()
try:
self._replay_file_inner(filepath, speed)
finally:
with self._lock:
self._is_busy = False
self._last_action_time = time.monotonic()
def _replay_file_inner(self, filepath: str, speed: float = 1.0):
"""One-for-one port of g1_replay_v4_stable.py:ReplayWithHome.Run().
Five phases timing and math match the reference exactly:
1. Wait for first LowState_ message (no body-lock from zeros).
2. Load data: home_q (last valid frame of arm_home.jsonl),
full_body_lock_q (live snapshot), and the replay frames.
3. MOVE TO START 60 steps at 60 Hz, alpha = k/steps (starts
at 0 = exact current pose, ends at 59/60 just shy of target).
3b. SETTLE HOLD replaces the reference's human
`input("Press Enter to Begin")` pause; gives the physical
motors time to reach the commanded start pose before
playback so the first real frames don't jerk.
4. PLAY `for f in frames: if f['t']-t0 >= play_elapsed`
frame-select pattern, fixed 1/REPLAY_HZ sleep per iteration.
5. RETURN HOME 180 steps alpha = k/steps from last_played_q
to home_q, body locked. Then DisableSDK (10 frames).
"""
path = Path(filepath)
if not path.is_absolute():
path = MOTIONS_DIR / path
if not path.exists():
raise FileNotFoundError(f"Motion file not found: {path}")
frames = _load_frames(path)
if not frames:
log.warning("Empty motion file: %s", path)
return
if not _HAS_SDK:
duration = len(frames) / REPLAY_HZ / speed if speed else len(frames) / REPLAY_HZ
log.info("[SIM] Replay %s (%.1fs, %d frames, speed=%.1f)",
path.name, duration, len(frames), speed)
self._sim_replay(frames, speed)
return
log.info("Replay %s (%d frames @ %.0f Hz, speed=%.1f)",
path.name, len(frames), REPLAY_HZ, speed)
# ─── 1. Wait for first LowState ─────────────────────────
if not self._first_state_event.is_set():
log.warning("Waiting for first LowState message...")
if not self._first_state_event.wait(timeout=2.0):
log.error("No LowState received in 2s — refusing to replay (would lock body to zeros)")
return
# ─── 2. Load data (ref lines 154-166) ───────────────────
home_q = _load_home_q() or [0.0] * G1_NUM_MOTOR
full_body_lock_q = self._get_current_q() # snapshot live state
interval = 1.0 / REPLAY_HZ
file_start_q = frames[0]["q"]
# ─── 3. MOVE TO START (ref lines 171-181) ───────────────
log.debug("Moving to start (%d steps)", RAMP_IN_STEPS)
for k in range(RAMP_IN_STEPS):
if self._cancel.is_set():
self._return_home(full_body_lock_q, full_body_lock_q, home_q)
return
alpha = k / RAMP_IN_STEPS # 0 .. (RAMP_IN_STEPS-1)/RAMP_IN_STEPS
interp_q = list(full_body_lock_q)
for j in range(15, G1_NUM_MOTOR):
interp_q[j] = (1 - alpha) * full_body_lock_q[j] + alpha * file_start_q[j]
self._send_frame(interp_q, full_body_lock_q)
time.sleep(interval)
# ─── 3b. SETTLE HOLD — replaces reference's Enter pause ─
settle_frames = max(0, int(SETTLE_HOLD_SEC * REPLAY_HZ))
if settle_frames > 0:
log.debug("Settle hold (%d frames / %.2fs)", settle_frames, SETTLE_HOLD_SEC)
for _ in range(settle_frames):
if self._cancel.is_set():
self._return_home(file_start_q, full_body_lock_q, home_q)
return
self._send_frame(file_start_q, full_body_lock_q)
time.sleep(interval)
# ─── 4. PLAY (ref lines 183-234) ────────────────────────
log.debug("Playing %d frames", len(frames))
last_played_q = file_start_q
play_elapsed = 0.0
last_real = time.monotonic()
t0 = frames[0].get("t", 0.0)
while True:
if self._cancel.is_set():
break
# Watchdog — abort if LowState goes stale
age = self._state_age()
if age > WATCHDOG_DISABLE_AFTER:
log.error("Watchdog abort — LowState stale %.2fs", age)
self._disable_sdk()
return
now_real = time.monotonic()
dt_real = now_real - last_real
last_real = now_real
play_elapsed += dt_real * speed
# Pick the next frame whose timestamp has elapsed (reference pattern)
target_frame = None
for f in frames:
if f.get("t", 0.0) - t0 >= play_elapsed:
target_frame = f
break
if target_frame is None:
break
self._send_frame(target_frame["q"], full_body_lock_q)
last_played_q = target_frame["q"]
time.sleep(interval)
# ─── 5. RETURN HOME (ref lines 239-256) + DisableSDK ────
self._return_home(last_played_q, full_body_lock_q, home_q)
def _return_home(self, from_q: list[float], body_lock_q: list[float], home_q: list[float]):
"""Smooth return to home — direct port of g1_replay_v4_stable.py:239-256.
180 steps × (1/60)s = 3s linear ramp on arm motors only (indices
15-28); body motors (0-14) stay locked to `body_lock_q`. Then
DisableSDK sends 10 disable-bit frames at 50 Hz.
IMPORTANT: the reference's return-home is unconditional — it
always runs to completion regardless of why the play loop ended
(natural end OR 'q' press). We clear `_cancel` at entry so a
user-hit Cancel (which set `_cancel` to break the play loop)
doesn't also abort the return ramp. Without this, the arm
"snaps" home because the loop exits on the first iteration.
"""
self._cancel.clear()
log.info("Returning home (%d steps / %.1fs)", RAMP_OUT_STEPS, RAMP_OUT_STEPS / REPLAY_HZ)
interval = 1.0 / REPLAY_HZ
for k in range(RAMP_OUT_STEPS):
alpha = k / RAMP_OUT_STEPS # 0 .. (RAMP_OUT_STEPS-1)/RAMP_OUT_STEPS
interp_q = list(from_q)
for j in range(15, G1_NUM_MOTOR):
interp_q[j] = (1 - alpha) * from_q[j] + alpha * home_q[j]
self._send_frame(interp_q, body_lock_q)
time.sleep(interval)
self._disable_sdk()
log.info("Home reached, SDK disabled")
def _sim_replay(self, frames: list[dict], speed: float):
"""Simulation replay — emit events, sleep for equivalent duration."""
interval = 1.0 / REPLAY_HZ
for i, frame in enumerate(frames):
if self._cancel.is_set():
log.info("[SIM] Replay cancelled at frame %d/%d", i, len(frames))
return
time.sleep(interval / max(speed, 0.1))
log.info("[SIM] Replay complete")
# -- public API --
@property
def is_busy(self) -> bool:
return self._is_busy
def set_motion_block(self, predicate):
"""Install a no-args predicate; while it returns True, every arm
playback path (replay_file / _execute SDK actions, JSONL replays,
macros, gestures) refuses to run. Used for arm locomotion exclusion."""
self._motion_block = predicate
def _blocked(self) -> bool:
pred = self._motion_block
if pred is None:
return False
try:
return bool(pred())
except Exception:
log.exception("motion-block predicate raised — treating as NOT blocked")
return False
def cancel(self):
"""Cancel the currently running replay.
Sets the cancel flag the play loop in `_replay_file_inner`
checks this and breaks out; `_return_home` then runs as the
final phase of the same replay (matches the reference's
end-of-Run behaviour in g1_replay_v4_stable.py).
"""
self._cancel.set()
def _unused_return_to_home(self, duration_sec: float = 3.0,
home_file: str = "arm_home.jsonl") -> None:
"""Deprecated — replay's own `_return_home` is called automatically
when cancel breaks the play loop. Kept here only to preserve any
external caller; no new code should use this.
"""
if not self._initialized or self._low_cmd is None:
log.warning("return_to_home: arm controller not initialised")
return
if not self._first_state_event.wait(timeout=2.0):
log.error("return_to_home: no LowState received in 2s — aborting")
return
home_path = MOTIONS_DIR / home_file
if not home_path.exists():
log.warning("return_to_home: %s missing — skipping", home_path.name)
return
# Use the LAST valid 'q' in the file as the settle pose
home_q: list[float] | None = None
try:
for frame in _load_frames(home_path):
q = frame.get("q")
if q and len(q) == G1_NUM_MOTOR:
home_q = q
except Exception as exc:
log.warning("return_to_home: reading %s failed: %s",
home_path.name, exc)
return
if home_q is None:
log.warning("return_to_home: %s has no valid 'q' frames",
home_path.name)
return
with self._state_lock:
start_q = list(self._current_q)
body_lock_q = list(start_q)
# Let the ramp publish frames even though we just cancelled
self._cancel.clear()
with self._lock:
if self._is_busy:
# A pending replay is still winding down — wait a beat
log.debug("return_to_home: arm busy, waiting briefly")
self._is_busy = True
try:
steps = max(30, int(duration_sec * REPLAY_HZ)) # ≥ 0.5s ramp
dt = 1.0 / REPLAY_HZ
log.info("return_to_home: ramp %d steps (%.1fs) → %s",
steps, duration_sec, home_file)
for k in range(steps):
if self._cancel.is_set():
log.info("return_to_home: cancelled mid-ramp")
break
alpha = (k + 1) / steps
interp_q = list(body_lock_q)
for j in range(15, G1_NUM_MOTOR):
interp_q[j] = (1 - alpha) * start_q[j] + alpha * home_q[j]
self._send_frame(interp_q, body_lock_q)
time.sleep(dt)
log.info("return_to_home: done")
finally:
with self._lock:
self._is_busy = False
def refresh_actions(self):
"""Re-scan data/motions/ and rebuild the action registry."""
global ACTIONS, ACTION_BY_ID, ACTION_BY_NAME
ACTIONS, ACTION_BY_ID, ACTION_BY_NAME = rebuild_action_registry()
def list_actions(self) -> list[dict[str, Any]]:
return [
{"id": a.id, "name": a.name, "file": a.file, "category": a.category}
for a in ACTIONS
]
def list_motion_files(self) -> list[dict[str, Any]]:
"""List all JSONL files in data/motions/ with metadata.
Caches frame count by (path, mtime) to avoid re-parsing megabytes of
JSONL on every dashboard refresh.
"""
MOTIONS_DIR.mkdir(parents=True, exist_ok=True)
result = []
for p in sorted(MOTIONS_DIR.glob("*.jsonl")):
stat = p.stat()
cache_key = f"{p}:{stat.st_mtime_ns}"
cached = self._motion_files_cache.get(cache_key)
if cached is None:
frames = _load_frames(p)
duration = len(frames) / REPLAY_HZ if frames else 0
cached = {
"name": p.name,
"path": str(p),
"frames": len(frames),
"duration_sec": round(duration, 2),
"size_kb": round(stat.st_size / 1024, 1),
}
# Drop stale entries for this path before adding new one
stale = [k for k in self._motion_files_cache if k.startswith(f"{p}:")]
for k in stale:
self._motion_files_cache.pop(k, None)
self._motion_files_cache[cache_key] = cached
result.append(cached)
return result
def trigger_by_id(self, action_id: int, speed: float = 1.0):
action = ACTION_BY_ID.get(action_id)
if action is None:
raise KeyError(f"Unknown action id: {action_id}")
self._execute(action, speed)
def trigger_by_name(self, name: str, speed: float = 1.0):
action = ACTION_BY_NAME.get(name)
if action is None:
raise KeyError(f"Unknown action: {name}")
self._execute(action, speed)
def _execute(self, action: Action, speed: float = 1.0):
if self._blocked():
log.warning("arm action %s refused — locomotion movement is armed", action.name)
bus.emit_sync("motion.action_error", action=action.name,
reason="movement armed")
return
with self._lock:
if self._is_busy:
log.warning("Arm busy, skipping %s", action.name)
return
self._is_busy = True
self._cancel.clear()
_start = time.monotonic()
_failed = False
try:
bus.emit_sync("motion.action_started", action=action.name)
if action.file:
self._replay_file_inner(action.file, speed=speed)
else:
self._run_sdk_action(action)
except Exception as exc:
_failed = True
log.error("Action %s failed: %s", action.name, exc)
bus.emit_sync("motion.action_error", action=action.name,
reason=str(exc))
finally:
with self._lock:
self._is_busy = False
self._last_action_time = time.monotonic()
# action_done always fires (back-compat for existing listeners);
# action_error above is the extra signal for the Gemini
# motion-state channel. elapsed_sec lets Gemini say "...took 2.3s".
bus.emit_sync("motion.action_done", action=action.name,
elapsed_sec=round(time.monotonic() - _start, 2),
failed=_failed)
def _run_sdk_action(self, action: Action):
if not _HAS_SDK:
log.info("[SIM] SDK action: %s (id=%d)", action.name, action.id)
time.sleep(2.0)
return
if self._arm_client is None:
log.warning(
"SDK action %s requested but G1ArmActionClient not available — skipping",
action.name,
)
return
# Sanad's registry uses underscored names ("shake_hand", "x_ray");
# the SDK's action_map is keyed by human-readable forms that mix
# spaces and hyphens ("shake hand", "x-ray", "two-hand kiss").
# Try each candidate in turn.
name = action.name
candidates = [
name,
name.replace("_", " "), # shake_hand → shake hand
name.replace("_", "-"), # x_ray → x-ray
# two-word with specific hyphenation: first token with hyphen,
# rest with spaces (matches SDK's "two-hand kiss" pattern)
name.replace("_", "-", 1).replace("_", " "),
]
sdk_name = next((c for c in candidates if c in _ARM_ACTION_MAP), None)
if sdk_name is None:
log.warning(
"SDK action %s not in G1ArmActionClient action_map — tried %s. keys=%s",
action.name, candidates, sorted(_ARM_ACTION_MAP.keys())[:12],
)
return
opcode = _ARM_ACTION_MAP[sdk_name]
log.info("SDK action: %s (opcode=%s)", action.name, opcode)
try:
self._arm_client.ExecuteAction(opcode)
# Built-in arm actions block on the robot side for ~3s; the SDK
# call returns immediately. Sleep so we don't hammer it back-to-back.
time.sleep(3.0)
except Exception as exc:
log.error("SDK action %s failed: %s", action.name, exc)
def status(self) -> dict[str, Any]:
return {
"initialized": self._initialized,
"sdk_available": _HAS_SDK,
"busy": self._is_busy,
"state_age_sec": round(self._state_age(), 3),
"sdk_actions": len(SDK_ACTIONS),
"jsonl_actions": len([a for a in ACTIONS if a.category == "jsonl"]),
"total_actions": len(ACTIONS),
"total_motion_files": len(list(MOTIONS_DIR.glob("*.jsonl"))) if MOTIONS_DIR.exists() else 0,
}

275
vendor/Sanad/motion/macro_player.py vendored Normal file
View File

@ -0,0 +1,275 @@
"""Macro Player — synchronized playback of audio + motion recordings.
Reads paired files:
recordings/audio/<name>.wav
recordings/motion/<name>.jsonl
Plays audio and streams joint commands simultaneously so the robot's
physical movements perfectly match the recorded speech timing.
"""
from __future__ import annotations
import json
import threading
import time
import wave
from pathlib import Path
from typing import Any
from Project.Sanad.config import AUDIO_RECORDINGS_DIR, MOTION_RECORDINGS_DIR, REPLAY_HZ
from Project.Sanad.core.config_loader import section as _cfg_section
from Project.Sanad.core.logger import get_logger
log = get_logger("macro_player")
_MP = _cfg_section("motion", "macro_player")
RAMP_IN_STEPS = _MP.get("ramp_in_steps", 60)
RAMP_OUT_STEPS = _MP.get("ramp_out_steps", 60)
WATCHDOG_DISABLE_AFTER = _MP.get("watchdog_disable_after_sec", 1.0)
def _lerp_q(a: list[float], b: list[float], t: float) -> list[float]:
return [x + (y - x) * t for x, y in zip(a, b)]
class _ArmAdapter:
"""Uniform interface over either the public arm API or its private members.
Hides the hasattr branching that previously polluted _play_motion. If the
arm controller exposes the new public methods (get_current_q, send_frame,
disable, state_age, wait_for_state) we use those; otherwise we fall back
to the private versions for backward compatibility.
"""
def __init__(self, arm):
self._arm = arm
def wait_for_state(self, timeout: float = 2.0) -> bool:
fn = getattr(self._arm, "wait_for_state", None)
if callable(fn):
return fn(timeout=timeout)
# No state-wait API: assume ready
return True
def get_current_q(self) -> list[float]:
fn = getattr(self._arm, "get_current_q", None)
if callable(fn):
return fn()
return self._arm._get_current_q()
def send_frame(self, target_q: list[float], body_lock_q: list[float]):
fn = getattr(self._arm, "send_frame", None)
if callable(fn):
return fn(target_q, body_lock_q)
return self._arm._send_frame(target_q, body_lock_q)
def disable(self):
fn = getattr(self._arm, "disable", None)
if callable(fn):
return fn()
return self._arm._disable_sdk()
def state_age(self) -> float:
fn = getattr(self._arm, "state_age", None)
if callable(fn):
return fn()
# No watchdog support: pretend state is fresh
return 0.0
class MacroPlayer:
def __init__(self, audio_manager=None, arm_controller=None):
self._audio_mgr = audio_manager
self._arm = arm_controller
self._lock = threading.Lock()
self._playing = False
self._stop_event = threading.Event()
@property
def is_playing(self) -> bool:
return self._playing
def play(self, name: str) -> dict[str, Any]:
audio_path = AUDIO_RECORDINGS_DIR / f"{name}.wav"
motion_path = MOTION_RECORDINGS_DIR / f"{name}.jsonl"
if not audio_path.exists():
raise FileNotFoundError(f"Audio not found: {audio_path}")
if not motion_path.exists():
raise FileNotFoundError(f"Motion not found: {motion_path}")
with self._lock:
if self._playing:
raise RuntimeError("Macro playback already in progress.")
self._playing = True
self._stop_event.clear()
t0 = time.monotonic()
audio_thread = threading.Thread(target=self._play_audio, args=(audio_path,), daemon=True)
motion_thread = threading.Thread(target=self._play_motion, args=(motion_path,), daemon=True)
log.info("Macro playback starting: %s", name)
audio_thread.start()
motion_thread.start()
audio_thread.join()
motion_thread.join()
elapsed = time.monotonic() - t0
with self._lock:
self._playing = False
log.info("Macro playback complete: %s (%.1fs)", name, elapsed)
return {"name": name, "duration_sec": round(elapsed, 2)}
def stop(self):
self._stop_event.set()
# Best-effort: stop audio playback if the manager exposes a stop method
if self._audio_mgr is not None and hasattr(self._audio_mgr, "stop_playback"):
try:
self._audio_mgr.stop_playback()
except Exception as exc:
log.warning("audio stop failed: %s", exc)
def _play_audio(self, path: Path):
if self._audio_mgr is None:
log.warning("No audio manager — skipping audio playback")
return
try:
self._audio_mgr.play_wav(path)
except Exception as exc:
log.error("Audio playback failed: %s", exc)
def _play_motion(self, path: Path):
"""Stream JSONL motion frames at recorded timing — with ramp-in, watchdog, ramp-out."""
frames = self._load_frames(path)
if not frames:
return
if self._arm is None:
# Simulated playback — just sleep through
duration = frames[-1].get("t", 0)
log.info("[SIM] MacroPlayer would play %d frames over %.1fs", len(frames), duration)
time.sleep(min(duration, 30.0))
return
adapter = _ArmAdapter(self._arm)
interval = 1.0 / REPLAY_HZ
# CRITICAL: wait for first LowState before reading current pose
if not adapter.wait_for_state(timeout=2.0):
log.error("MacroPlayer aborting — no LowState received in 2s")
return
try:
current_q = adapter.get_current_q()
except Exception:
log.exception("Failed to read current pose")
return
body_lock_q = list(current_q)
first_frame_q = frames[0]["q"]
# Phase 1: Ramp-in
if not self._ramp(adapter, current_q, first_frame_q, body_lock_q, RAMP_IN_STEPS, "ramp-in"):
return
# Phase 2: Stream recorded frames with timing + watchdog
last_q = self._stream_frames(adapter, frames, body_lock_q, interval) or first_frame_q
# Phase 3: Ramp-out back to starting pose
self._ramp(adapter, last_q, body_lock_q, body_lock_q, RAMP_OUT_STEPS, "ramp-out")
# Phase 4: Disable arm SDK
try:
adapter.disable()
except Exception:
log.exception("disable() failed")
def _load_frames(self, path: Path) -> list[dict]:
"""Read JSONL motion file. Returns list of frames or [] on failure."""
frames: list[dict] = []
try:
with open(path, "r") as f:
for line in f:
line = line.strip()
if not line:
continue
try:
data = json.loads(line)
except json.JSONDecodeError as exc:
log.warning("Skipping bad line in %s: %s", path.name, exc)
continue
if "q" in data:
frames.append(data)
except OSError:
log.exception("Failed to read motion file %s", path)
if not frames:
log.warning("No usable frames in %s", path.name)
return frames
def _ramp(self, adapter: "_ArmAdapter", from_q: list[float], to_q: list[float],
body_lock_q: list[float], steps: int, label: str) -> bool:
"""Smoothly interpolate from `from_q` to `to_q` over `steps` frames.
Returns True on success, False if cancelled or send failed."""
log.info("MacroPlayer %s (%d steps)", label, steps)
interval = 1.0 / REPLAY_HZ
for step in range(steps):
if self._stop_event.is_set():
return False
t = (step + 1) / steps
interp = _lerp_q(from_q, to_q, t)
try:
adapter.send_frame(interp, body_lock_q)
except Exception:
log.exception("%s send_frame failed", label)
return False
time.sleep(interval)
return True
def _stream_frames(self, adapter: "_ArmAdapter", frames: list[dict],
body_lock_q: list[float], interval: float) -> list[float] | None:
"""Stream the recorded frames with watchdog. Returns the last successfully sent q."""
t0 = time.monotonic()
last_q: list[float] | None = None
for frame in frames:
if self._stop_event.is_set():
break
age = adapter.state_age()
if age > WATCHDOG_DISABLE_AFTER:
log.error("MacroPlayer watchdog abort — state stale %.2fs", age)
break
target_t = frame.get("t", 0)
elapsed = time.monotonic() - t0
sleep_time = target_t - elapsed
if sleep_time > 0:
time.sleep(sleep_time)
try:
adapter.send_frame(frame["q"], body_lock_q)
last_q = frame["q"]
except Exception:
log.exception("send_frame failed mid-replay")
return last_q
def list_macros(self) -> list[dict[str, Any]]:
"""List available macro recordings (paired audio + motion)."""
macros = []
for audio_path in sorted(AUDIO_RECORDINGS_DIR.glob("*.wav")):
name = audio_path.stem
motion_path = MOTION_RECORDINGS_DIR / f"{name}.jsonl"
if motion_path.exists():
macros.append({
"name": name,
"audio_path": str(audio_path),
"motion_path": str(motion_path),
"audio_size_kb": round(audio_path.stat().st_size / 1024, 1),
"motion_size_kb": round(motion_path.stat().st_size / 1024, 1),
})
return macros
def status(self) -> dict[str, Any]:
return {"playing": self._playing}

163
vendor/Sanad/motion/macro_recorder.py vendored Normal file
View File

@ -0,0 +1,163 @@
"""Macro Recorder — simultaneously captures audio + robot joint positions.
Produces a paired set of files:
recordings/audio/<name>.wav microphone or Gemini output audio
recordings/motion/<name>.jsonl timestamped joint positions
These can be replayed in sync via MacroPlayer.
"""
from __future__ import annotations
import json
import threading
import time
import wave
from pathlib import Path
from typing import Any
from Project.Sanad.config import (
AUDIO_RECORDINGS_DIR,
CHANNELS,
CHUNK_SIZE,
MOTION_RECORDINGS_DIR,
RECEIVE_SAMPLE_RATE,
REPLAY_HZ,
)
from Project.Sanad.core.logger import get_logger
log = get_logger("macro_recorder")
class MacroRecorder:
"""Records audio + joint positions simultaneously."""
def __init__(self, arm_controller=None):
self._arm = arm_controller
self._lock = threading.Lock()
self._recording = False
self._audio_thread: threading.Thread | None = None
self._motion_thread: threading.Thread | None = None
self._stop_event = threading.Event()
self._name = ""
self._audio_frames: list[bytes] = []
self._motion_frames: list[dict[str, Any]] = []
self._started_at = 0.0
@property
def is_recording(self) -> bool:
return self._recording
def start(self, name: str) -> dict[str, Any]:
with self._lock:
if self._recording:
raise RuntimeError("Already recording a macro.")
self._recording = True
self._name = name
self._stop_event.clear()
self._audio_frames = []
self._motion_frames = []
self._started_at = time.monotonic()
AUDIO_RECORDINGS_DIR.mkdir(parents=True, exist_ok=True)
MOTION_RECORDINGS_DIR.mkdir(parents=True, exist_ok=True)
self._audio_thread = threading.Thread(target=self._record_audio, daemon=True)
self._motion_thread = threading.Thread(target=self._record_motion, daemon=True)
self._audio_thread.start()
self._motion_thread.start()
log.info("Macro recording started: %s", name)
return {"recording": True, "name": name}
def stop(self) -> dict[str, Any]:
with self._lock:
if not self._recording:
raise RuntimeError("No macro recording in progress.")
self._stop_event.set()
if self._audio_thread:
self._audio_thread.join(timeout=3.0)
if self._motion_thread:
self._motion_thread.join(timeout=3.0)
# Save audio
audio_path = AUDIO_RECORDINGS_DIR / f"{self._name}.wav"
pcm = b"".join(self._audio_frames)
with wave.open(str(audio_path), "wb") as wf:
wf.setnchannels(CHANNELS)
wf.setsampwidth(2) # int16
wf.setframerate(RECEIVE_SAMPLE_RATE)
wf.writeframes(pcm)
# Save motion
motion_path = MOTION_RECORDINGS_DIR / f"{self._name}.jsonl"
with open(motion_path, "w") as f:
f.write(json.dumps({"meta": {"hz": REPLAY_HZ, "motors": 29}}) + "\n")
for frame in self._motion_frames:
f.write(json.dumps(frame) + "\n")
duration = time.monotonic() - self._started_at
with self._lock:
self._recording = False
log.info("Macro saved: audio=%s motion=%s (%.1fs)", audio_path, motion_path, duration)
return {
"recording": False,
"name": self._name,
"audio_path": str(audio_path),
"motion_path": str(motion_path),
"duration_sec": round(duration, 2),
"audio_frames": len(self._audio_frames),
"motion_frames": len(self._motion_frames),
}
def _record_audio(self):
"""Capture mic audio in background thread."""
try:
import pyaudio
pya = pyaudio.PyAudio()
stream = pya.open(
format=pyaudio.paInt16,
channels=CHANNELS,
rate=RECEIVE_SAMPLE_RATE,
input=True,
frames_per_buffer=CHUNK_SIZE,
)
while not self._stop_event.is_set():
data = stream.read(CHUNK_SIZE, exception_on_overflow=False)
self._audio_frames.append(data)
stream.stop_stream()
stream.close()
pya.terminate()
except Exception as exc:
log.error("Audio recording failed: %s", exc)
def _record_motion(self):
"""Capture joint positions at REPLAY_HZ."""
interval = 1.0 / REPLAY_HZ
t0 = time.monotonic()
while not self._stop_event.is_set():
t = round(time.monotonic() - t0, 4)
# Read current joint positions from arm controller
q = self._read_joint_positions()
self._motion_frames.append({"t": t, "q": q})
time.sleep(interval)
def _read_joint_positions(self) -> list[float]:
"""Read current joint positions. Returns zeros if SDK unavailable."""
if self._arm is not None and self._arm._initialized:
return self._arm._get_current_q()
return [0.0] * 29
def status(self) -> dict[str, Any]:
elapsed = time.monotonic() - self._started_at if self._recording else 0
return {
"recording": self._recording,
"name": self._name,
"elapsed_sec": round(elapsed, 1),
"audio_frames": len(self._audio_frames),
"motion_frames": len(self._motion_frames),
}

View File

@ -0,0 +1,285 @@
import os
import time
import json
import threading
import traceback
import numpy as np
from pathlib import Path
from dataclasses import dataclass
# ==================================================
# ⚙️ Config (from config/motion_config.json)
# ==================================================
BASE_DIR = Path(__file__).resolve().parent
try:
from Project.Sanad.core.config_loader import section as _cfg_section
_MCFG = _cfg_section("motion", "sanad_arm_controller")
except Exception:
_MCFG = {}
# Ensure defaults for any missing key
_MCFG.setdefault("action_cooldown_sec", 1.0)
_MCFG.setdefault("stability_threshold", 0.06)
_MCFG.setdefault("gains", {})
_MCFG["gains"].setdefault("kp_high", 300.0)
_MCFG["gains"].setdefault("kd_high", 3.0)
_MCFG["gains"].setdefault("kp_low", 80.0)
_MCFG["gains"].setdefault("kd_low", 3.0)
_MCFG["gains"].setdefault("kp_wrist", 40.0)
_MCFG["gains"].setdefault("kd_wrist", 1.5)
_MCFG.setdefault("weak_motors", [4, 10, 15, 16, 17, 18, 22, 23, 24, 25])
_MCFG.setdefault("wrist_motors", [19, 20, 21, 26, 27, 28])
_MCFG.setdefault("data_subdir", "DataG1")
# ==================================================
# ✅ Option List
# ==================================================
@dataclass(frozen=True)
class TestOption:
name: str
id: int
file: str = ""
OPTION_LIST = [
TestOption(name="release arm", id=0),
TestOption(name="shake hand", id=1),
TestOption(name="high five", id=2),
TestOption(name="hug", id=3),
TestOption(name="high wave", id=4),
TestOption(name="clap", id=5),
TestOption(name="face wave", id=6),
TestOption(name="left kiss", id=7),
TestOption(name="heart", id=8),
TestOption(name="right heart", id=9),
TestOption(name="hands up", id=10),
TestOption(name="x-ray", id=11),
TestOption(name="right hand up", id=12),
TestOption(name="reject", id=13),
TestOption(name="right kiss", id=14),
TestOption(name="two-hand kiss", id=15),
TestOption(name="release arm recorded", id=30, file="arm_home.jsonl"),
TestOption(name="laugh", id=23, file="laugh.jsonl"),
TestOption(name="bird", id=24, file="bird.jsonl"),
TestOption(name="change battery", id=25, file="change_battery.jsonl"),
TestOption(name="move hands up", id=26, file="hands_up.jsonl"),
TestOption(name="move right hand up", id=27, file="right_hand_up.jsonl"),
TestOption(name="move left hand up", id=28, file="left_hand_up.jsonl"),
]
OPTION_BY_ID = {o.id: o for o in OPTION_LIST}
OPTION_BY_NAME = {o.name.lower(): o for o in OPTION_LIST}
# ==================================================
# 🦾 Unitree SDK Configuration
# ==================================================
try:
from unitree_sdk2py.core.channel import ChannelFactoryInitialize, ChannelPublisher, ChannelSubscriber
from unitree_sdk2py.g1.arm.g1_arm_action_client import G1ArmActionClient, action_map
from unitree_sdk2py.idl.default import unitree_hg_msg_dds__LowCmd_
from unitree_sdk2py.idl.unitree_hg.msg.dds_ import LowCmd_, LowState_
from unitree_sdk2py.utils.crc import CRC
_ROBOT_SDK_AVAILABLE = True
except Exception:
ChannelFactoryInitialize = None
G1ArmActionClient = None
action_map = {}
LowCmd_ = LowState_ = None
unitree_hg_msg_dds__LowCmd_ = None
CRC = None
_ROBOT_SDK_AVAILABLE = False
_UNITREE_IMPORT_ERR = traceback.format_exc()
# G1 hardware constants — single source in config/core_config.json
from Project.Sanad.config import G1_NUM_MOTOR, ENABLE_ARM_SDK_INDEX, REPLAY_HZ
DATA_DIR = BASE_DIR / _MCFG["data_subdir"]
ACTION_COOLDOWN_SEC = _MCFG["action_cooldown_sec"]
STABILITY_THRESHOLD = _MCFG["stability_threshold"]
_G = _MCFG["gains"]
KP_HIGH, KD_HIGH = _G["kp_high"], _G["kd_high"]
KP_LOW, KD_LOW = _G["kp_low"], _G["kd_low"]
KP_WRIST, KD_WRIST = _G["kp_wrist"], _G["kd_wrist"]
WEAK_MOTORS = _MCFG["weak_motors"]
WRIST_MOTORS = _MCFG["wrist_motors"]
class ArmController:
def __init__(self, cooldown_sec: float = ACTION_COOLDOWN_SEC):
self._ready = False
self.cooldown_sec = float(cooldown_sec)
self._last_action_time = 0.0
self.low_state = None
self.crc = CRC() if CRC else None
self._pub = None
self._sub = None
self._client = None
self._is_busy = False
self._init_lock = threading.Lock()
def init(self) -> bool:
with self._init_lock:
if self._ready:
return True
if ChannelFactoryInitialize is None:
return False
try:
ChannelFactoryInitialize(0)
self._pub = ChannelPublisher("rt/arm_sdk", LowCmd_)
self._pub.Init()
self._sub = ChannelSubscriber("rt/lowstate", LowState_)
self._sub.Init(self._low_state_handler, 10)
if G1ArmActionClient:
self._client = G1ArmActionClient()
self._client.SetTimeout(10.0)
self._client.Init()
self._ready = True
return True
except Exception:
return False
def _low_state_handler(self, msg: LowState_):
self.low_state = msg
def _cooldown_ok(self) -> bool:
now = time.time()
return (now - self._last_action_time) >= self.cooldown_sec
def _load_home_pose(self):
path = DATA_DIR / "arm_home.jsonl"
try:
last_q = [0.0] * G1_NUM_MOTOR
with open(path, "r", encoding="utf-8") as f:
for line in f:
d = json.loads(line)
if "q" in d:
last_q = d["q"]
return last_q
except Exception:
return [0.0] * G1_NUM_MOTOR
def _is_pose_stable(self, target_q):
if not self.low_state:
return False
current_q = np.array([self.low_state.motor_state[i].q for i in range(15, 29)])
target_arm_q = np.array(target_q[15:29])
diff = np.abs(current_q - target_arm_q)
return np.max(diff) < STABILITY_THRESHOLD
def _send_frame(self, arm_q, body_lock_q):
if not self._pub:
return
cmd = unitree_hg_msg_dds__LowCmd_()
cmd.motor_cmd[ENABLE_ARM_SDK_INDEX].q = 1.0
for i in range(G1_NUM_MOTOR):
cmd.motor_cmd[i].mode = 1
cmd.motor_cmd[i].q = arm_q[i] if i >= 15 else body_lock_q[i]
if i in WEAK_MOTORS:
cmd.motor_cmd[i].kp, cmd.motor_cmd[i].kd = KP_LOW, KD_LOW
elif i in WRIST_MOTORS:
cmd.motor_cmd[i].kp, cmd.motor_cmd[i].kd = KP_WRIST, KD_WRIST
else:
cmd.motor_cmd[i].kp, cmd.motor_cmd[i].kd = KP_HIGH, KD_HIGH
cmd.crc = self.crc.Crc(cmd)
self._pub.Write(cmd)
def _managed_replay(self, filename: str):
try:
path = DATA_DIR / filename
frames = []
with open(path, "r", encoding="utf-8") as f:
for line in f:
d = json.loads(line)
if "q" in d:
frames.append(d)
if not frames or not self.low_state:
return
body_lock_q = [self.low_state.motor_state[i].q for i in range(G1_NUM_MOTOR)]
home_q = self._load_home_pose()
# 1. Smooth match to start pose (90 frames ≈ 1.5s — prevents jerk)
start_q = frames[0]["q"]
ramp_in = 90
for k in range(ramp_in):
alpha = k / ramp_in
interp_q = list(body_lock_q)
for j in range(15, 29):
interp_q[j] = (1 - alpha) * body_lock_q[j] + alpha * start_q[j]
self._send_frame(interp_q, body_lock_q)
time.sleep(1.0 / REPLAY_HZ)
# 2. Play frames
last_played_q = start_q
for f in frames:
self._send_frame(f["q"], body_lock_q)
last_played_q = f["q"]
time.sleep(1.0 / REPLAY_HZ)
# 3. Smooth return to home
for k in range(80):
alpha = k / 80
interp_home_q = list(body_lock_q)
for j in range(15, 29):
interp_home_q[j] = (1 - alpha) * last_played_q[j] + alpha * home_q[j]
self._send_frame(interp_home_q, body_lock_q)
time.sleep(1.0 / REPLAY_HZ)
# Sensor confirmation
confirm_start = time.time()
while time.time() - confirm_start < 2.0:
if self._is_pose_stable(home_q):
break
time.sleep(0.05)
finally:
if self._pub:
cmd = unitree_hg_msg_dds__LowCmd_()
cmd.motor_cmd[ENABLE_ARM_SDK_INDEX].q = 0.0
cmd.crc = self.crc.Crc(cmd)
for _ in range(5):
self._pub.Write(cmd)
time.sleep(0.01)
self._is_busy = False
self._last_action_time = time.time()
print("🔓 Ready.")
def _managed_sdk_action(self, action_name: str):
try:
if self._client and action_name in action_map:
print(f"🤖 SDK START: {action_name}")
self._client.ExecuteAction(action_map.get(action_name))
time.sleep(3.5)
finally:
self._is_busy = False
self._last_action_time = time.time()
print("🔓 Ready.")
def trigger_action_by_id(self, action_id: int):
if not self.init():
return
if self._is_busy:
return
if not self._cooldown_ok():
return
opt = OPTION_BY_ID.get(int(action_id))
if opt:
self._is_busy = True
if opt.file:
threading.Thread(target=self._managed_replay, args=(opt.file,), daemon=True).start()
elif self._client and opt.name in action_map:
threading.Thread(target=self._managed_sdk_action, args=(opt.name,), daemon=True).start()
else:
self._is_busy = False
def trigger_action_by_name(self, action_name: str):
opt = OPTION_BY_NAME.get(action_name.lower())
if opt:
self.trigger_action_by_id(opt.id)
ARM = ArmController()

275
vendor/Sanad/motion/teaching.py vendored Normal file
View File

@ -0,0 +1,275 @@
"""Teaching mode — safe hold → limp arms → record joint positions.
Ported from G1_Lootah/Manual_Recorder/g1_teach_v4_stable.py.
Sequence:
1. Safe hold (3s): Arms rigid at current pose, waist locked.
2. Teach phase: Arms go limp (KP=0), user physically moves them.
Joint positions are recorded at 60 Hz.
3. Return home: Smooth interpolation back to arm_home.jsonl.
4. Save: Writes JSONL to data/motions/<name>.jsonl.
"""
from __future__ import annotations
import json
import os
import tempfile
import threading
import time
from pathlib import Path
from typing import Any
from Project.Sanad.config import G1_NUM_MOTOR, MOTIONS_DIR, REPLAY_HZ
from Project.Sanad.core.config_loader import section as _cfg_section
from Project.Sanad.core.event_bus import bus
from Project.Sanad.core.logger import get_logger
log = get_logger("teaching")
_T = _cfg_section("motion", "teaching")
SAFE_HOLD_SEC = _T.get("safe_hold_sec", 3.0)
WAIST_KP = _T.get("waist_kp", 60.0)
WAIST_KD = _T.get("waist_kd", 4.0)
HOLD_ARM_KP = _T.get("hold_arm_kp", 60.0)
HOLD_ARM_KD = _T.get("hold_arm_kd", 4.0)
TEACH_ARM_KP = _T.get("teach_arm_kp", 0.0) # limp — no stiffness
TEACH_ARM_KD = _T.get("teach_arm_kd", 2.0) # damping only
try:
from unitree_sdk2py.idl.unitree_hg.msg.dds_ import LowCmd_
from unitree_sdk2py.utils.crc import CRC
_HAS_SDK = True
except ImportError:
_HAS_SDK = False
class TeachingSession:
"""Records a teaching session (one at a time)."""
def __init__(self, arm_controller):
self._arm = arm_controller
self._lock = threading.Lock()
self._recording = False
self._stop_event = threading.Event()
self._thread: threading.Thread | None = None
self._name = ""
self._frames: list[dict[str, Any]] = []
self._phase = "idle" # idle | holding | teaching | returning | done
self._started_at = 0.0
self._finalized = False
self._finalize_lock = threading.Lock()
self._final_result: dict[str, Any] | None = None
@property
def is_recording(self) -> bool:
return self._recording
def start(self, name: str, duration_sec: float = 15.0) -> dict[str, Any]:
with self._lock:
if self._recording:
raise RuntimeError("Teaching session already active.")
self._recording = True
self._finalized = False
self._final_result = None
self._name = name
self._frames = []
self._stop_event.clear()
self._phase = "holding"
self._started_at = time.monotonic()
self._thread = threading.Thread(
target=self._run, args=(name, duration_sec), daemon=True
)
self._thread.start()
log.info("Teaching started: %s (%.0fs)", name, duration_sec)
bus.emit_sync("motion.teaching_started", name=name, duration_sec=duration_sec)
return {"recording": True, "name": name, "duration_sec": duration_sec}
def stop(self) -> dict[str, Any]:
with self._lock:
if not self._recording:
raise RuntimeError("No teaching session active.")
self._stop_event.set()
if self._thread:
self._thread.join(timeout=10.0)
# Finalize is now ALWAYS done by the worker thread (_run).
# If for some reason the worker died without finalizing, do it here.
result = self._finalize()
return result
def _run(self, name: str, duration_sec: float):
interval = 1.0 / REPLAY_HZ
arm = self._arm
try:
if _HAS_SDK and arm._initialized:
self._run_hardware(name, duration_sec, interval)
else:
self._run_simulation(name, duration_sec, interval)
except Exception:
log.exception("Teaching session crashed")
finally:
# Always finalize from the worker thread — stop() will see _finalized=True.
self._finalize()
def _run_hardware(self, name: str, duration_sec: float, interval: float):
"""Real hardware teaching: hold → limp → record → home."""
arm = self._arm
low_cmd = arm._low_cmd
crc = arm._crc
initial_q = arm._get_current_q()
waist_lock = list(initial_q)
# Phase 1: Safe hold
self._phase = "holding"
hold_end = time.monotonic() + SAFE_HOLD_SEC
log.info("Safe hold (%.1fs) — arms rigid", SAFE_HOLD_SEC)
while time.monotonic() < hold_end and not self._stop_event.is_set():
for i in range(G1_NUM_MOTOR):
low_cmd.motor_cmd[i].mode = 1
low_cmd.motor_cmd[i].q = initial_q[i]
low_cmd.motor_cmd[i].dq = 0.0
low_cmd.motor_cmd[i].tau = 0.0
if i < 15: # body/waist
low_cmd.motor_cmd[i].kp = WAIST_KP
low_cmd.motor_cmd[i].kd = WAIST_KD
else: # arms
low_cmd.motor_cmd[i].kp = HOLD_ARM_KP
low_cmd.motor_cmd[i].kd = HOLD_ARM_KD
low_cmd.motor_cmd[29].q = 1.0
low_cmd.crc = crc.Crc(low_cmd)
arm._arm_pub.Write(low_cmd)
time.sleep(interval)
if self._stop_event.is_set():
return
# Phase 2: Teaching — arms go limp, record
self._phase = "teaching"
log.info("Arms released — move them now! Recording at %d Hz", int(REPLAY_HZ))
t0 = time.monotonic()
while not self._stop_event.is_set():
elapsed = time.monotonic() - t0
if elapsed >= duration_sec:
break
# Limp arms, locked waist
current_q = arm._get_current_q()
for i in range(G1_NUM_MOTOR):
low_cmd.motor_cmd[i].mode = 1
low_cmd.motor_cmd[i].dq = 0.0
low_cmd.motor_cmd[i].tau = 0.0
if i < 15:
low_cmd.motor_cmd[i].q = waist_lock[i]
low_cmd.motor_cmd[i].kp = WAIST_KP
low_cmd.motor_cmd[i].kd = WAIST_KD
else:
low_cmd.motor_cmd[i].q = current_q[i]
low_cmd.motor_cmd[i].kp = TEACH_ARM_KP
low_cmd.motor_cmd[i].kd = TEACH_ARM_KD
low_cmd.motor_cmd[29].q = 1.0
low_cmd.crc = crc.Crc(low_cmd)
arm._arm_pub.Write(low_cmd)
self._frames.append({"t": round(elapsed, 4), "q": current_q})
time.sleep(interval)
# Phase 3: Return home
self._phase = "returning"
from Project.Sanad.motion.arm_controller import _load_home_q, _lerp_q
home_q = _load_home_q() or initial_q
last_q = self._frames[-1]["q"] if self._frames else initial_q
for step in range(180):
t = (step + 1) / 180
interp = _lerp_q(last_q, home_q, t)
arm._send_frame(interp, waist_lock)
time.sleep(1.0 / REPLAY_HZ)
arm._disable_sdk()
def _run_simulation(self, name: str, duration_sec: float, interval: float):
"""Simulation: just record zero-pose frames for the given duration."""
self._phase = "holding"
time.sleep(min(SAFE_HOLD_SEC, 1.0)) # shortened in sim
self._phase = "teaching"
t0 = time.monotonic()
log.info("[SIM] Teaching — recording for %.0fs", duration_sec)
while not self._stop_event.is_set():
elapsed = time.monotonic() - t0
if elapsed >= duration_sec:
break
self._frames.append({"t": round(elapsed, 4), "q": [0.0] * G1_NUM_MOTOR})
time.sleep(interval)
self._phase = "returning"
time.sleep(0.5)
def _finalize(self) -> dict[str, Any]:
"""Save frames to JSONL and return result. Idempotent — safe to call twice."""
with self._finalize_lock:
if self._finalized:
return self._final_result or {
"name": self._name, "frames": len(self._frames),
"path": "", "duration_sec": 0,
}
self._phase = "done"
result: dict[str, Any] = {"name": self._name, "frames": len(self._frames)}
if self._frames:
MOTIONS_DIR.mkdir(parents=True, exist_ok=True)
out_path = MOTIONS_DIR / f"{self._name}.jsonl"
# Atomic write: tempfile + os.replace
content_lines = [
json.dumps({"meta": {"hz": REPLAY_HZ, "motors": G1_NUM_MOTOR}}),
]
for frame in self._frames:
content_lines.append(json.dumps(frame))
content = ("\n".join(content_lines) + "\n").encode("utf-8")
fd, tmp = tempfile.mkstemp(
prefix=f".{out_path.name}.", suffix=".tmp",
dir=str(out_path.parent),
)
try:
with os.fdopen(fd, "wb") as f:
f.write(content)
os.replace(tmp, out_path)
except Exception:
try:
os.unlink(tmp)
except OSError:
pass
raise
duration = self._frames[-1]["t"] if self._frames else 0
result["path"] = str(out_path)
result["duration_sec"] = round(duration, 2)
result["size_kb"] = round(out_path.stat().st_size / 1024, 1)
log.info("Teaching saved: %s (%d frames, %.1fs)", out_path.name, len(self._frames), duration)
else:
result["path"] = ""
result["duration_sec"] = 0
self._finalized = True
self._final_result = result
with self._lock:
self._recording = False
self._phase = "idle"
bus.emit_sync("motion.teaching_finished", name=result.get("name"), frames=result.get("frames"))
return result
def status(self) -> dict[str, Any]:
elapsed = time.monotonic() - self._started_at if self._recording else 0
return {
"recording": self._recording,
"phase": self._phase,
"name": self._name,
"elapsed_sec": round(elapsed, 1),
"frames_recorded": len(self._frames),
}

12
vendor/Sanad/note.txt vendored Normal file
View File

@ -0,0 +1,12 @@
N2 — Gemini-phrase movement via direct LocoClient · effort L (highest risk)
This is the Marcus phrase-confirmation pattern, but driven by direct LocoClient on the robot (no Holosoma/ZMQ, no laptop). The full chain:
Gemini speaks a confirmation phrase → parent reads it → matches → LocoClient.Move().
Where LocoClient lives → the parent process, next to the arm. main.py:425 already calls ChannelFactoryInitialize once for the arm; the new LocoController reuses that same DDS participant (no second init). Port init_loco/move_cmd/stop_only from G1_Lootah/Controller/g1_mode_controller.py, plus a non-interactive FSM-200 bring-up adapted from hanger_boot_sequence.py (strip its own ChannelFactoryInitialize and the blocking input()).
The dispatch loop (voice/movement_loop.py) mirrors voice/live_voice_loop.py but polls the new bot_transcript, gated on a movement_enabled flag (default OFF). The matcher (voice/bot_dispatch.py) ports Marcus's _dispatch_gemini_bot: strip [STATE-]/quoted/question clauses → normalize numbers + Arabic→English → parametric-regex scan → longest-needle bot-phrase scan → dedup/cooldown. Vocabulary comes from a ported instruction.json.
Canonical → velocity (Marcus's MOVE_MAP ports 1:1 since LocoClient.Move uses the same m/s, rad/s units): forward→Move(0.3,0,0), backward→Move(-0.2,0,0), turn-left→Move(0,0,0.3), turn-right→Move(0,0,-0.3), stop→StopMove().
Toggle in the Voice & Audio tab (#tab-voice): add movement_enabled to recognition_state.py, a dashboard/routes/movement.py route (clone the face-rec toggle), and a switch in the UI.
Announce on toggle: _announce_movement_state in script.py (clone _announce_facerec_state) — robot says "movement enabled" / "movement disabled."
Persona rules: append Marcus-style rules to Sanad's prompt (wake-word + action required; clarification questions must not contain motion verbs) so Gemini reliably emits the confirmation phrases.
🛑 Safety is mandatory, not optional: Move(continous_move=True) walks until StopMove. Needs a FIFO worker with stop-priority, a per-motion watchdog that forces StopMove after a max duration, velocity caps + deadband, and StopMove on both disable-toggle and shutdown. Sanad has no obstacle/abort stack today.

43
vendor/Sanad/requirements.txt vendored Normal file
View File

@ -0,0 +1,43 @@
# Sanad — Python dependencies
# Install: pip install -r requirements.txt
# Dashboard
fastapi>=0.110.0
uvicorn[standard]>=0.29.0
python-multipart>=0.0.9
# Gemini voice
# google-genai: the Gemini Live SDK — used by gemini/script.py (live brain)
# and gemini/client.py. Needs Python 3.10+, which is why the voice loop
# runs in the gemini_sdk conda env. send_realtime_input(video=)/(text=)
# and send_client_content() require a reasonably recent (>=1.x) release.
google-genai>=1.0.0
websockets>=12.0
pyaudio>=0.2.13
# Recognition (camera vision + face gallery for Gemini-side face recognition)
# opencv-python-headless: JPEG encode + USB-camera fallback. Headless wheel —
# the dashboard renders frames; we never need a GUI window.
# Pillow: resize face samples before the Gemini primer turn.
opencv-python-headless>=4.8.0
Pillow>=10.0.0
#
# pyrealsense2 — DO NOT `pip install` on Jetson / JetPack 5.
# The PyPI wheel is built against glibc 2.32+ (Ubuntu 22.04); JetPack 5 ships
# glibc 2.31, so the wheel fails to load with:
# ImportError: ... version `GLIBC_2.32' not found
# On Jetson, build the Python binding from source against the apt-installed
# librealsense2 runtime (see README → "Camera vision on Jetson").
# On x86_64 / Ubuntu 22.04+ desktops, `pip install pyrealsense2` works fine.
# If pyrealsense2 is absent, CameraDaemon falls back to cv2.VideoCapture(0).
# pyrealsense2>=2.50.0 # intentionally commented — see note above
# Local TTS (optional — only needed for MBZUAI model)
transformers>=4.40.0
sentencepiece>=0.2.0
torch>=2.2.0
datasets>=2.19.0
soundfile>=0.12.0
# General
numpy>=1.24.0

724
vendor/Sanad/scripts/sanad_arm.txt vendored Normal file
View File

@ -0,0 +1,724 @@
# ==================================================
# 📄 sanad_arm.txt
# قاموس سند الشامل (لهجات عربية + تعرف أعمق + أخطاء شائعة)
# ملاحظة:
# - خففنا الكلمات "العامة جدًا" قدر الإمكان لتقليل التفعيل بالغلط.
# - بدون تشكيل لزيادة دقة المطابقة البرمجية.
# - IDs محفوظة كما هي في OPTION_LIST (0-28).
# ==================================================
# =====================
# Release arm (id=0) — stop/neutral / lower hands
# =====================
WAKE_PHRASES_release_arm = {
# أوامر إيقاف عامة
"وقف",
"وقف بس",
"وقف الحين",
"وقف الحركة",
"وقف هالحركة",
"وقف يا سند",
"وقف يا بوسنده",
"وقف يا بوسنيده",
"وقف كذا",
"وقف خلاص",
# خلاص
"خلاص",
"خلاص بس",
"خلاص الحين",
"تمام خلاص",
"تمام بس",
"بس خلاص",
"خلاص يا سند",
"خلاص يا بوسنده",
# راحة / استرخاء
"ارتح",
"ارتاح",
"استريح",
"ريح",
"ريح يدك",
"ريح ايدك",
"ريح يدينك",
"ريح ايدينك",
"ارتح يا سند",
"استريح يا سند",
# تحرير
"حرر اليد",
"حرر الذراع",
"حرر يدك",
"حرر ايدك",
"حرر يدينك",
"حرر ايدينك",
"فك يدك",
"فك ايدك",
"فك يدينك",
"فك ايدينك",
"فك الذراع",
"فك ايديك",
# رجوع للوضع الطبيعي
"ارجع طبيعي",
"ارجع وضع طبيعي",
"رجع طبيعي",
"وضع طبيعي",
"نيوترال",
"محايد",
"هوم",
"ارجع هوم",
"ارجع للوضع",
"رجع للوضع",
# إنزال اليد/الأيدي (لهجات + شدّة)
"نزل",
"نزل يدك",
"نزل ايدك",
"نزل يدينك",
"نزل ايدينك",
"نزل يدك تحت",
"نزل ايدك تحت",
"نزل يدينك تحت",
"نزل ايدينك تحت",
"نزل ايدك لتحت",
"نزل يدك لتحت",
"نزّل",
"نزّل يدك",
"نزّل ايدك",
"نزّل يدينك",
"نزّل ايدينك",
"انزل",
"انزل يدك",
"انزل ايدك",
"انزل يدينك",
"انزل ايدينك",
# English (added)
"stop",
"stop now",
"stop moving",
"enough",
"relax",
"rest",
"neutral",
"home",
"go home",
"hands down",
"lower your hands",
"lower your arms",
"arms down",
"release",
"stand down",
}
# =====================
# Shake hand (id=1) — formal greeting (SDK)
# =====================
WAKE_PHRASES_shake_hand = {
# تحية دينية
"السلام عليكم",
"سلام عليكم",
"السلام عليكم ورحمة الله",
"السلام عليكم ورحمة الله وبركاته",
# مع الاسم
"السلام عليكم يا سند",
"السلام عليكم سند",
"السلام عليكم يا بوسنده",
"السلام عليكم بوسنده",
"السلام عليكم يا بوسنيده",
"السلام عليكم بوسنيده",
# طلب مصافحة
"صافحني",
"صافحني يا سند",
"صافحني يا بوسنده",
"صافحني يا بوسنيده",
"صافح",
"صافحنا",
"مد يدك",
"مد ايدك",
"مد ايدك اليمين",
"هات يدك",
"هات ايدك",
"عطني يدك",
"عطني ايدك",
"اعطيني يدك",
"اعطيني ايدك",
"سلم بيدك",
"سلم بايدك",
"سلم يا بطل",
"شيك هاند",
"شيك هاند يا سند",
# English (added)
"assalamu alaykum",
"as-salamu alaykum",
"peace be upon you",
"handshake",
"shake hands",
"shake my hand",
"let's shake hands",
"nice to meet you",
"greetings",
}
# =====================
# High five (id=2) — palm greeting (SDK)
# =====================
WAKE_PHRASES_high_five = {
"هاي فايف",
"هاي فايف يا سند",
"هاي فايڤ",
"هاي فايڤ يا سند",
"عالي خمسة",
"عالي خمسه",
"اعطني خمسة",
"اعطني خمسه",
"هات خمسة",
"هات خمسه",
"دق كف",
"طق كف",
"دقلي كف",
"هات كفك",
"كف عالي",
"يلا كف",
"فرجينا الكف",
"خمسة بيدك",
"خمسه بايدك",
# English (added)
"high five",
"high-five",
"give me five",
"gimme five",
"up top",
"slap my hand",
}
# =====================
# Hug (id=3) — hug (SDK)
# =====================
WAKE_PHRASES_hug = {
"حضن",
"حضني",
"حضنني",
"عناق",
"عانقني",
"ضمني",
"عطني حضن",
"عطني ضمه",
"ابي حضن",
"ابغى حضن",
"ممكن حضن",
"تعال حضن",
"تعال حضني",
"احتاج حضن",
"حضن كبير",
"حضنه قوية",
"ضمه",
"ضمة",
# English (added)
"hug",
"give me a hug",
"big hug",
"come hug",
"embrace",
"cuddle",
}
# =====================
# High wave (id=4) — goodbye / big wave (SDK)
# =====================
WAKE_PHRASES_high_wave = {
"مع السلامه",
"مع السلامة",
"باي",
"باي باي",
"وداعا",
"وداعًا",
"ودع",
"ودعنا",
"اشوفك على خير",
"نشوفك على خير",
"في امان الله",
"الله وياك",
"تصبح على خير",
"تصبحون على خير",
# تلويح
"لوح",
"لوح لي",
"لوح بيدك",
"لوح بايدك",
"لوح بيدك فوق",
"سلم عليهم",
"سلم عليهم يا سند",
"سلم عليهم كلهم",
"ودعهم",
"ودع الضيوف",
# English (added)
"bye",
"bye bye",
"goodbye",
"see you",
"see you later",
"take care",
"wave",
"wave goodbye",
}
# =====================
# Clap (id=5) — clap (SDK)
# =====================
WAKE_PHRASES_clap = {
"صفق",
"صقف",
"تصفيق",
"يلا صفق",
"صفق لهم",
"يلا صفق لهم",
"سوي تصفيق",
"ابدأ تصفيق",
"وريني كيف تصفق",
"صفق بيدك",
# (قد تكون عامة - احذفها إذا تسبب تفعيل بالغلط)
"برافو",
"حيوهم",
# English (added)
"clap",
"applause",
"clapping",
"round of applause",
"bravo",
"clap for them",
}
# =====================
# Face wave (id=6) — friendly greeting / calling Sanad (SDK)
# =====================
WAKE_PHRASES_face_wave = {
# الاسم
"سند",
"يا سند",
"بوسنده",
"يا بوسنده",
"بوسنيده",
"يا بوسنيده",
"بو سنده",
"بو سنيده",
"يا بو سنده",
"يا بو سنيده",
# تحيات
"هلا",
"يا هلا",
"ياهلا",
"هلا والله",
"ياهلا والله",
"هلا وغلا",
"مرحبا",
"مرحبتين",
"حي الله",
"حي الله من جانا",
# تحية + الاسم
"هلا سند",
"هلا يا سند",
"مرحبا سند",
"مرحبا يا سند",
"يا هلا سند",
"يا هلا يا سند",
"هلا بوسنده",
"مرحبا بوسنده",
"هلا بوسنيده",
"مرحبا بوسنيده",
# نداءات (تجنبنا الكلمات العامة وحدها)
"سند تعال",
"سند تعال هنا",
"سند اسمعني",
"سند اسمع",
"سند وينك",
"سند موجود",
"سند حاضر",
"سند شوفني",
"سند ركز",
"سند ركز معي",
"سند انت وين",
"بوسنده تعال",
"بوسنده اسمعني",
"بوسنده وينك",
"بوسنيده تعال",
"بوسنيده اسمعني",
"بوسنيده وينك",
# English (added)
"sanad",
"hey sanad",
"hello sanad",
"hi sanad",
"bosanda",
"bosandah",
"are you there",
"listen to me",
"come here",
"look at me",
"pay attention",
}
# =====================
# Left kiss (id=7) — left cheek kiss (SDK)
# =====================
WAKE_PHRASES_left_kiss = {
"بوسه يسار",
"بوسة يسار",
"بوسه على الخد اليسار",
"بوسة على الخد اليسار",
"بوسه على اليسار",
"بوسة يسار يا سند",
"بوسه يسار يا سند",
"قبلة يسار",
"قبله يسار",
# English (added)
"left kiss",
"kiss left",
"kiss on the left cheek",
"left cheek kiss",
}
# =====================
# Heart (id=8) — heart (SDK)
# =====================
WAKE_PHRASES_heart = {
"قلب",
"سوي قلب",
"سوي قلب كبير",
"اعطني قلب",
"عطني قلب",
"ابي قلب",
"ابغى قلب",
"هارت",
"سوي هارت",
"وريني قلب",
"شكل قلب",
"قلب قلب",
"لوف",
"اعطيني لوف",
"سوي لوف",
# English (added)
"heart",
"make a heart",
"do a heart",
"love",
"i love you",
}
# =====================
# Right heart (id=9) — right-side heart (SDK)
# =====================
WAKE_PHRASES_right_heart = {
"قلب يمين",
"سوي قلب يمين",
"سوي هارت يمين",
"هارت يمين",
"قلب على اليمين",
"قلب يمين يا سند",
# English (added)
"right heart",
"make a right heart",
"heart right",
"heart on the right",
}
# =====================
# Hands up (id=10) — built-in display only (SDK)
# =====================
WAKE_PHRASES_hands_up = {
"وريني يدينك",
"وريني ايدينك",
"فرجينا يدينك",
"فرجينا ايدينك",
"هات يدينك نشوف",
"ايديك الثنتين",
"يديك الثنتين",
"طلع يدينك",
"طلع ايدينك",
"ارفع يدينك شوي",
"ارفع ايدينك شوي",
# English (added)
"hands up",
"put your hands up",
"raise your hands",
"raise both hands",
"arms up",
"lift your hands",
}
# =====================
# X-ray (id=11) — x-ray pose (SDK)
# =====================
WAKE_PHRASES_x_ray = {
"اكس راي",
"xray",
"اشعه",
"اشعة",
"اشعة اكس",
"اشعة سينية",
"سكان",
"سكانر",
"فحص",
"فحص اشعة",
"سوي اكس راي",
# English (added)
"x-ray",
"scan",
"scanner",
"xray pose",
"medical scan",
}
# =====================
# Right hand up (id=12) — built-in display only (SDK)
# =====================
WAKE_PHRASES_right_hand_up = {
"وريني يدك اليمين",
"وريني ايدك اليمين",
"فرجينا يدك اليمين",
"هات اليمين نشوف",
"طلع يدك اليمين",
"طلع ايدك اليمين",
"ارفع يدك اليمين",
"ارفع ايدك اليمين",
# English (added)
"right hand up",
"raise your right hand",
"lift your right hand",
"right arm up",
"put your right hand up",
}
# =====================
# Reject (id=13) — reject/decline (SDK)
# =====================
WAKE_PHRASES_reject = {
"ارفض",
"رفض",
"مرفوض",
"مو موافق",
"مش موافق",
"لا ابي",
"لا ابغا",
"ما ابي",
"ما ابغا",
"لا شكرا",
"لا شكرًا",
"كنسل",
"الغيه",
"الغيها",
"لا تسويها",
"لا تعملها",
"لا تساويها",
# English (added)
"reject",
"decline",
}
# =====================
# Right kiss (id=14) — right cheek kiss (SDK)
# =====================
WAKE_PHRASES_right_kiss = {
"بوسه يمين",
"بوسة يمين",
"بوسه على الخد اليمين",
"بوسة على الخد اليمين",
"بوسه على اليمين",
"بوسة يمين يا سند",
"قبلة يمين",
"قبله يمين",
# English (added)
"right kiss",
"kiss right",
"kiss on the right cheek",
"right cheek kiss",
}
# =====================
# Two-hand kiss (id=15) — two-hand kiss (SDK)
# =====================
WAKE_PHRASES_two_hand_kiss = {
"بوسات",
"بوسات كثير",
"بوسه كبيرة",
"بوسة كبيرة",
"بوسه بكل اليدين",
"بوسة بكل اليدين",
"بوسه بيدينك",
"بوسة بيدينك",
"بوسه قوية",
"بوسة قوية",
"قبلة كبيرة",
"قبلات",
# English (added)
"two hand kiss",
"two-hand kiss",
"big kiss",
"many kisses",
"lots of kisses",
"blow a big kiss",
}
# ==================================================
# Recorded Actions (IDs 23+ for JSONL Replay)
# ==================================================
# =====================
# Laugh (id=23) — recorded
# =====================
WAKE_PHRASES_laugh = {
"اضحك",
"ضحكه",
"ضحكة",
"يضحك",
"ضحك",
"هههه",
"ههههه",
"نكته",
"نكتة",
"مضحك",
"كركر",
"اضحك يا سند",
"فرجينا ضحكتك",
"ضحكنا",
# English (added)
"laugh",
"haha",
"hahaha",
"lol",
"that's funny",
"make me laugh",
"giggle",
}
# =====================
# Bird (id=24) — recorded
# =====================
WAKE_PHRASES_bird = {
"طير",
"سوي طير",
"عصفور",
"جناح",
"رفرف",
"رفرف بيدك",
"رفرف بايدك",
"حرك يدينك مثل الطير",
"وريني كيف يطير العصفور",
"سوي جناحات",
# English (added)
"bird",
"fly",
"flap",
"flap your wings",
"wings",
}
# =====================
# Change Battery (id=25) — recorded
# =====================
WAKE_PHRASES_change_battery = {
"غير البطاريه",
"غير البطارية",
"بدل البطاريه",
"بدل البطارية",
"تغيير بطاريه",
"تغيير بطارية",
"البطاريه خلصت",
"البطارية خلصت",
"شحن البطاريه",
"شحن البطارية",
"تشينج باتري",
"change battery",
"battery low",
# English (added)
"replace battery",
"swap battery",
"need charging",
"charge battery",
}
# ==================================================
# Active Movement (Recorded) — IDs 2628
# ==================================================
# =====================
# Move hands up (id=26) — active movement (dressing/exercise)
# =====================
WAKE_PHRASES_move_hands_up = {
"ارفع يدينك",
"ارفع ايدينك",
"يدينك فوق",
"ايدينك فوق",
"ارفعهم فوق",
"ارفع يدينك الاثنين",
"ارفع ايديك الثنتين",
"هاندز اب",
# dressing/exercise context
"نلبسك",
"بلبسك",
"البس",
"غير ملابسك",
"ساعدنا نلبسك",
"يالله نلبس",
"تمرين ارفع يدينك",
"رفع يدين للتلبيس",
# English (added)
"hands up",
"raise your hands",
"raise both hands",
"lift your arms",
"arms up",
"raise both arms",
}
# =====================
# Move right hand up (id=27) — active movement
# =====================
WAKE_PHRASES_move_right_hand_up = {
"ارفع اليمين فوق",
"يدك اليمين فوق",
"ايدك اليمين فوق",
"ارفع يدك اليمين",
"ارفع ايدك اليمين",
"يمينك فوق",
"يمين فوق للاعلى",
"وريني يدك اليمين فوق",
"ارفع يمينك",
# English (added)
"right hand up",
"raise your right hand",
"lift your right hand",
"right arm up",
}
# =====================
# Move left hand up (id=28) — active movement
# =====================
WAKE_PHRASES_move_left_hand_up = {
"ارفع اليسار فوق",
"يدك اليسار فوق",
"ايدك اليسار فوق",
"ارفع يدك اليسار",
"ارفع ايدك اليسار",
"يسارك فوق",
"يسار فوق للاعلى",
"وريني يدك اليسار فوق",
"ارفع يسارك",
# English (added)
"left hand up",
"raise your left hand",
"lift your left hand",
"left arm up",
}

19
vendor/Sanad/scripts/sanad_rule.txt vendored Normal file
View File

@ -0,0 +1,19 @@
[SYSTEM_PROMPT]
You are Sanad (Bousandah), a wise and friendly Emirati assistant.
Speak naturally in the UAE dialect (Khaleeji) unless the user explicitly provides text that must be spoken exactly.
Keep the tone warm, respectful, and clear.
Do not be robotic.
Do not over-explain.
Prefer concise speech that sounds natural when spoken aloud funny mode and happy sound.
[REPLAY_SYSTEM_PROMPT]
You are Sanad (Bousandah), using the same Emirati voice and personality.
For replay mode, the user will provide text that you must speak exactly as written.
You may sound warm and lively, but you must preserve the exact text.
Do not translate it.
Do not summarize it.
Do not answer it.
Do not rephrase it into another dialect or style.
Do not add greetings, punctuation changes, comments, or extra words.
Keep the same word order and language as the provided text.
Your only task is to speak the exact user text verbatim.

Some files were not shown because too many files have changed in this diff Show More