Update 2026-05-15 09:39:52
This commit is contained in:
parent
edddb7e0c3
commit
811a391932
113
README.md
113
README.md
@ -7,21 +7,44 @@ JSONL macros; everything is orchestrated by a FastAPI dashboard.
|
|||||||
```
|
```
|
||||||
┌────────────────────────────────────────────────────────────────────┐
|
┌────────────────────────────────────────────────────────────────────┐
|
||||||
│ Dashboard (FastAPI) ── http://<robot>:8000 │
|
│ Dashboard (FastAPI) ── http://<robot>:8000 │
|
||||||
|
│ ├─ Operations Quick-fire arm actions │
|
||||||
│ ├─ Voice & Audio Live Gemini, Typed Replay, Wake Phrases │
|
│ ├─ Voice & Audio Live Gemini, Typed Replay, Wake Phrases │
|
||||||
│ ├─ Motion & Replay SDK actions, JSONL replays, teaching mode │
|
│ ├─ Motion & Replay SDK actions, JSONL replays, teaching mode │
|
||||||
│ ├─ Camera & Vision (deprecated, UI kept for compat) │
|
│ ├─ Recognition Camera vision + face gallery (Gemini-side) │
|
||||||
│ ├─ Recordings Skills registry, saved Gemini turns │
|
│ ├─ Recordings Skills registry, saved Gemini turns │
|
||||||
│ └─ Settings & Logs System info, tail live log │
|
│ └─ Settings & Logs System info, tail live log │
|
||||||
└────────────────────────────────────────────────────────────────────┘
|
└────────────────────────────────────────────────────────────────────┘
|
||||||
│
|
│
|
||||||
├─ voice/sanad_voice.py (subprocess — Gemini Live audio loop)
|
├─ voice/sanad_voice.py (subprocess — Gemini Live audio loop)
|
||||||
|
├─ gemini/script.py (Gemini Live brain — audio + video + state)
|
||||||
├─ gemini/client.py (short-session client for Typed Replay)
|
├─ gemini/client.py (short-session client for Typed Replay)
|
||||||
├─ gemini/subprocess.py (spawns+supervises sanad_voice.py)
|
├─ gemini/subprocess.py (spawns+supervises sanad_voice.py;
|
||||||
|
│ pushes camera frames + motion state
|
||||||
|
│ to the child over its stdin)
|
||||||
|
├─ vision/camera.py (RealSense/USB capture daemon)
|
||||||
|
├─ vision/face_gallery.py (data/faces/ CRUD for the primer turn)
|
||||||
├─ motion/arm_controller.py (G1 arm DDS publisher)
|
├─ motion/arm_controller.py (G1 arm DDS publisher)
|
||||||
├─ voice/audio_io.py (mic + speaker abstraction — 3 profiles)
|
├─ voice/audio_io.py (mic + speaker abstraction — 3 profiles)
|
||||||
└─ core/brain.py (skill dispatcher, event bus)
|
└─ core/brain.py (skill dispatcher, event bus)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Camera + face recognition data flow
|
||||||
|
|
||||||
|
```
|
||||||
|
CameraDaemon (parent, in-memory JPEG+b64 cache)
|
||||||
|
├─→ dashboard /api/recognition/frame.jpg ── snapshot_jpeg()
|
||||||
|
└─→ GeminiSubprocess._frame_forwarder ── get_frame_b64()
|
||||||
|
│ "frame:<b64>\n" over stdin
|
||||||
|
ArmController ─emit→ event bus ─→ main.py ─→ live_sub.send_state()
|
||||||
|
│ "state:<json>\n" over stdin
|
||||||
|
▼
|
||||||
|
gemini/script.py _stdin_watcher thread
|
||||||
|
├─ frame: → _LATEST_FRAME → _send_frame_loop →
|
||||||
|
│ session.send_realtime_input(video=Blob)
|
||||||
|
└─ state: → _STATE_PENDING → _send_state_loop →
|
||||||
|
session.send_realtime_input(text=…)
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
## Quick start (on the robot)
|
## Quick start (on the robot)
|
||||||
|
|
||||||
@ -42,13 +65,14 @@ Then open `http://<robot-ip>:8000` in a browser.
|
|||||||
| `config.py` | Runtime constants derived from `config/*_config.json`. |
|
| `config.py` | Runtime constants derived from `config/*_config.json`. |
|
||||||
| `config/` | Per-subsystem JSON config: `core`, `voice`, `gemini`, `motion`, `dashboard`, `local`. |
|
| `config/` | Per-subsystem JSON config: `core`, `voice`, `gemini`, `motion`, `dashboard`, `local`. |
|
||||||
| `core/` | Brain, skill registry, event bus, config loader, logger. |
|
| `core/` | Brain, skill registry, event bus, config loader, logger. |
|
||||||
| `gemini/` | Gemini Live — `client.py` (one-shot), `script.py` (live brain), `subprocess.py` (supervisor). |
|
| `gemini/` | Gemini Live — `client.py` (one-shot), `script.py` (live brain: audio + video + motion-state), `subprocess.py` (supervisor + stdin frame/state push). |
|
||||||
| `voice/` | `sanad_voice.py` (subprocess entry), `audio_io.py` (mic/speaker), `audio_manager.py`, `local_tts.py`, `live_voice_loop.py`, `typed_replay.py`, `wake_phrase_manager.py`, `text_utils.py`, `model_script.py` (brain template). |
|
| `voice/` | `sanad_voice.py` (subprocess entry), `audio_io.py` (mic/speaker), `audio_manager.py`, `local_tts.py`, `live_voice_loop.py`, `typed_replay.py`, `wake_phrase_manager.py`, `text_utils.py`, `model_script.py` (brain template). |
|
||||||
|
| `vision/` | `camera.py` (RealSense/USB capture daemon, auto-reconnect), `face_gallery.py` (`data/faces/` CRUD), `recognition_state.py` (toggle state file I/O). |
|
||||||
| `local/` | Offline pipeline skeleton — Silero VAD, Whisper, Qwen (via Ollama), CosyVoice2. Opt-in via `SANAD_VOICE_BRAIN=local`. |
|
| `local/` | Offline pipeline skeleton — Silero VAD, Whisper, Qwen (via Ollama), CosyVoice2. Opt-in via `SANAD_VOICE_BRAIN=local`. |
|
||||||
| `motion/` | `arm_controller.py` (main), `sanad_arm_controller.py`, `macro_player.py`, `macro_recorder.py`, `teaching.py`. |
|
| `motion/` | `arm_controller.py` (main), `sanad_arm_controller.py`, `macro_player.py`, `macro_recorder.py`, `teaching.py`. |
|
||||||
| `dashboard/` | FastAPI routes (`dashboard/routes/*.py`) + static UI (`dashboard/static/index.html`). |
|
| `dashboard/` | FastAPI routes (`dashboard/routes/*.py`) + static UI (`dashboard/static/index.html`). |
|
||||||
| `scripts/` | Persona files — `sanad_script.txt` (voice persona), `sanad_rule.txt`, `sanad_arm.txt` (voice→arm phrases). |
|
| `scripts/` | Persona files — `sanad_v2` (voice persona), `sanad_rule.txt`, `sanad_arm.txt` (voice→arm phrases). |
|
||||||
| `data/` | Runtime state — `audio/` (typed-replay WAVs), `motions/` (arm JSONL files), `recordings/` (live-captured turns), `motions/config.json` (dashboard-editable settings). |
|
| `data/` | Runtime state — `audio/` (typed-replay WAVs), `motions/` (arm JSONL files), `recordings/` (live-captured turns), `faces/face_{id}/` (enrolled face galleries), `.recognition_state.json` (vision/face-rec toggle state), `motions/config.json` (dashboard-editable settings). |
|
||||||
| `model/` | Place for local SpeechT5 / CosyVoice2 weights when using offline pipeline. |
|
| `model/` | Place for local SpeechT5 / CosyVoice2 weights when using offline pipeline. |
|
||||||
| `logs/` | Per-module rotating logs. |
|
| `logs/` | Per-module rotating logs. |
|
||||||
|
|
||||||
@ -64,6 +88,16 @@ Then open `http://<robot-ip>:8000` in a browser.
|
|||||||
| `SANAD_LIVE_SCRIPT` | path | auto | Override the subprocess entry script path. |
|
| `SANAD_LIVE_SCRIPT` | path | auto | Override the subprocess entry script path. |
|
||||||
| `SANAD_RECORD` | `0` or `1` | `1` | Record every Gemini turn to `data/recordings/`. |
|
| `SANAD_RECORD` | `0` or `1` | `1` | Record every Gemini turn to `data/recordings/`. |
|
||||||
| `SANAD_AEC_ENABLE` | `0` or `1` | `1` | Enable WebRTC AEC3 (if the Python binding is installed). |
|
| `SANAD_AEC_ENABLE` | `0` or `1` | `1` | Enable WebRTC AEC3 (if the Python binding is installed). |
|
||||||
|
| `SANAD_VISION_ENABLE` | `0` or `1` | `0` | Boot default for camera vision. **Runtime truth is the Recognition-tab toggle** → `data/.recognition_state.json`, hot-applied without a restart. |
|
||||||
|
| `SANAD_FACE_RECOGNITION_ENABLE` | `0` or `1` | `0` | Boot default for Gemini-side face recognition. Also a hot toggle. |
|
||||||
|
| `SANAD_VISION_SEND_HZ` | float | `2` | Frames/sec the Gemini child relays to Live. |
|
||||||
|
| `SANAD_CAMERA_WIDTH` / `_HEIGHT` / `_FPS` | int | `424` / `240` / `15` | Capture profile. Also settable per-deploy in `config/core_config.json > camera`. |
|
||||||
|
| `SANAD_FACES_MAX_SAMPLES` | int | `3` | Max photos per person fed into the gallery primer turn (token budget). |
|
||||||
|
|
||||||
|
> All `SANAD_VISION_*` / `SANAD_CAMERA_*` / `SANAD_FACE_*` vars are **boot
|
||||||
|
> defaults** forwarded to the Gemini child via `LIVE_TUNE`. Once running,
|
||||||
|
> the Recognition tab's toggles are the live source of truth — they write
|
||||||
|
> `data/.recognition_state.json`, which the child polls at 1 Hz.
|
||||||
|
|
||||||
|
|
||||||
## Dashboard features
|
## Dashboard features
|
||||||
@ -83,6 +117,25 @@ Quick-fire SDK + JSONL arm actions (chip buttons), gestural speaking toggle.
|
|||||||
- **Replay Manager** — upload `.jsonl` files, test-play with speed, Teaching Mode (kinesthetic record).
|
- **Replay Manager** — upload `.jsonl` files, test-play with speed, Teaching Mode (kinesthetic record).
|
||||||
- **Macro Recorder** — Record new audio+motion pair, OR pick any WAV + any motion (SDK or JSONL) and Play them in parallel.
|
- **Macro Recorder** — Record new audio+motion pair, OR pick any WAV + any motion (SDK or JSONL) and Play them in parallel.
|
||||||
|
|
||||||
|
### Recognition
|
||||||
|
Camera vision + Gemini-side face recognition. Both are **off by default**;
|
||||||
|
each is a **hot toggle** — flipping it takes effect on the running Gemini
|
||||||
|
session within ~1 s, no restart.
|
||||||
|
|
||||||
|
- **Camera Vision** — when on, the `CameraDaemon` captures from a RealSense
|
||||||
|
(preferred) or USB camera and the supervisor streams JPEG frames to
|
||||||
|
Gemini Live so it can answer "what do you see?". Live preview panel.
|
||||||
|
- **Face Recognition** — manage `data/faces/face_{id}/` galleries: enroll
|
||||||
|
from the live camera or upload photos, rename, download (per-photo or
|
||||||
|
ZIP), delete. On a session start (and on any gallery change) the child
|
||||||
|
sends a **primer turn** carrying every enrolled face + a Khaleeji
|
||||||
|
greeting instruction — Gemini itself does the matching in-context, so
|
||||||
|
there's **no local face-recognition model**. Recognition needs vision on.
|
||||||
|
- **Sync Gallery** — force-resend the primer to the live session.
|
||||||
|
|
||||||
|
The camera daemon auto-reconnects on USB unplug / stalled frames and warns
|
||||||
|
if a RealSense negotiated USB 2.0 (Marcus-ported resilience).
|
||||||
|
|
||||||
### Recordings
|
### Recordings
|
||||||
Skill Registry (predefined audio+motion skills from `skills.json`) + Saved Records (Gemini turn recordings).
|
Skill Registry (predefined audio+motion skills from `skills.json`) + Saved Records (Gemini turn recordings).
|
||||||
|
|
||||||
@ -94,6 +147,48 @@ Skill Registry (predefined audio+motion skills from `skills.json`) + Saved Recor
|
|||||||
- **Supervisor contract**: each brain ships a sibling supervisor (e.g., `gemini/subprocess.py`) that spawns `sanad_voice.py` with its `SANAD_VOICE_BRAIN` env var and parses the brain's log markers. Template: `voice/model_subprocess.py`.
|
- **Supervisor contract**: each brain ships a sibling supervisor (e.g., `gemini/subprocess.py`) that spawns `sanad_voice.py` with its `SANAD_VOICE_BRAIN` env var and parses the brain's log markers. Template: `voice/model_subprocess.py`.
|
||||||
- **Audio routing**: the G1's platform-sound PulseAudio sink is NOT wired to a physical speaker. All dashboard-triggered playback (`play_wav`, typed-replay audio, record playback) routes through DDS `AudioClient.PlayStream` via `audio_manager._play_pcm_via_g1`. The PyAudio path is kept as a desktop/dev fallback only.
|
- **Audio routing**: the G1's platform-sound PulseAudio sink is NOT wired to a physical speaker. All dashboard-triggered playback (`play_wav`, typed-replay audio, record playback) routes through DDS `AudioClient.PlayStream` via `audio_manager._play_pcm_via_g1`. The PyAudio path is kept as a desktop/dev fallback only.
|
||||||
- **Arm replay**: `motion/arm_controller.py:_replay_file_inner()` is a verbatim port of `G1_Lootah/Manual_Recorder/g1_replay_v4_stable.py:Run()` — ramp-in → settle hold → playback → smooth return → disable SDK. Cancel breaks the play loop; `_return_home()` runs unconditionally afterwards for a jerk-free return.
|
- **Arm replay**: `motion/arm_controller.py:_replay_file_inner()` is a verbatim port of `G1_Lootah/Manual_Recorder/g1_replay_v4_stable.py:Run()` — ramp-in → settle hold → playback → smooth return → disable SDK. Cancel breaks the play loop; `_return_home()` runs unconditionally afterwards for a jerk-free return.
|
||||||
|
- **Camera frame transport (stdin push)**: the `CameraDaemon` lives in the parent and caches frames in memory. `GeminiSubprocess` runs a `_frame_forwarder` thread that base64-encodes the latest frame and writes `frame:<b64>\n` to the child's stdin (~2 fps). The child's `_stdin_watcher` thread decodes into `_LATEST_FRAME`; `_send_frame_loop` relays it to Gemini Live with a staleness guard. This is the Marcus pattern — chosen over a file drop so the parent owns the camera once and the dashboard preview reads the same in-memory cache.
|
||||||
|
- **Motion-state channel**: `arm_controller._execute()` emits `motion.action_started` / `_done` / `_error` on the event bus. `main.py` forwards each to `live_sub.send_state()`, which writes `state:<json>\n` to the child's stdin. The child injects `[STATE-START] wave_hand`, `[STATE-DONE] wave_hand (2.3s)`, etc. into Gemini Live as silent text context (`send_realtime_input(text=…)`) so it can honestly answer "what are you doing?".
|
||||||
|
- **Face recognition is Gemini-side**: no dlib/insightface/onnxruntime. `vision/face_gallery.py` is pure file IO over `data/faces/face_{id}/` (`face_N.jpg|png` samples + optional `meta.json` with a `name`). At session start (and on any gallery change) `gemini/script.py:_send_gallery_primer()` builds one multimodal `send_client_content` turn — every enrolled face's photos + a greeting instruction — and Gemini matches incoming frames against it in-context.
|
||||||
|
|
||||||
|
|
||||||
|
## Camera vision on Jetson
|
||||||
|
|
||||||
|
The Recognition tab needs `pyrealsense2` to talk to the Intel RealSense.
|
||||||
|
**Do not `pip install pyrealsense2` on JetPack 5** — the PyPI wheel is built
|
||||||
|
against glibc 2.32+ (Ubuntu 22.04) and fails to load on JetPack 5's glibc
|
||||||
|
2.31 with `ImportError: ... version 'GLIBC_2.32' not found`.
|
||||||
|
|
||||||
|
The native runtime is already there (`apt`-installed `librealsense2`). Build
|
||||||
|
just the Python binding from source against it, into the `gemini_sdk` env:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
rs-enumerate-devices # confirm the D435I shows up at OS level first
|
||||||
|
|
||||||
|
source ~/miniconda3/etc/profile.d/conda.sh && conda activate gemini_sdk
|
||||||
|
pip uninstall -y pyrealsense2 # remove the broken wheel if present
|
||||||
|
sudo apt install -y cmake build-essential git python3-dev libusb-1.0-0-dev pkg-config libssl-dev
|
||||||
|
|
||||||
|
cd /tmp && rm -rf librealsense
|
||||||
|
git clone --depth=1 --branch v2.56.5 https://github.com/IntelRealSense/librealsense.git
|
||||||
|
cd librealsense && mkdir -p build && cd build
|
||||||
|
cmake .. -DBUILD_PYTHON_BINDINGS=ON -DPYTHON_EXECUTABLE=$(which python3) \
|
||||||
|
-DBUILD_EXAMPLES=OFF -DBUILD_GRAPHICAL_EXAMPLES=OFF \
|
||||||
|
-DBUILD_UNIT_TESTS=OFF -DCHECK_FOR_UPDATES=OFF -DCMAKE_BUILD_TYPE=Release
|
||||||
|
make -j$(nproc) pyrealsense2
|
||||||
|
SITE=$(python3 -c "import sysconfig; print(sysconfig.get_paths()['purelib'])")
|
||||||
|
mkdir -p "$SITE/pyrealsense2"
|
||||||
|
cp wrappers/python/pyrealsense2*.so "$SITE/pyrealsense2/"
|
||||||
|
cp ../wrappers/python/pyrealsense2/__init__.py "$SITE/pyrealsense2/" 2>/dev/null || true
|
||||||
|
|
||||||
|
python3 -c 'import pyrealsense2 as rs; print([d.get_info(rs.camera_info.name) for d in rs.context().query_devices()])'
|
||||||
|
```
|
||||||
|
|
||||||
|
Match the `--branch` tag to the installed runtime (`dpkg -l | grep librealsense2`).
|
||||||
|
If the build isn't worth it, `CameraDaemon` falls back to `cv2.VideoCapture(0)`
|
||||||
|
automatically — fine for a plain USB webcam, but note a RealSense exposes its
|
||||||
|
*depth* stream at `/dev/video0`, not RGB, so a real USB cam is the cleaner
|
||||||
|
fallback. On x86_64 / Ubuntu 22.04+ desktops, `pip install pyrealsense2` just works.
|
||||||
|
|
||||||
|
|
||||||
## Dynamic paths
|
## Dynamic paths
|
||||||
@ -133,7 +228,10 @@ Then on the robot: `Ctrl+C` the running `main.py` and re-run.
|
|||||||
| Record playback silent | `audio_mgr.play_wav` only routes to G1 DDS if the Unitree SDK is importable; on desktop it falls back to the PulseAudio sink. |
|
| Record playback silent | `audio_mgr.play_wav` only routes to G1 DDS if the Unitree SDK is importable; on desktop it falls back to the PulseAudio sink. |
|
||||||
| Live Voice Commands transcript stuck | Deferred trigger was queued but `trigger_enabled` toggle was off. Toggle on — or the pending-trigger poll now fires it automatically once enabled. |
|
| Live Voice Commands transcript stuck | Deferred trigger was queued but `trigger_enabled` toggle was off. Toggle on — or the pending-trigger poll now fires it automatically once enabled. |
|
||||||
| Gemini "no audio" on Typed Replay | Non-deterministic; the retry chain in `voice/typed_replay.py:generate_audio` tries three prompt variants. For reliable TTS, use the offline `local_tts` SpeechT5 path. |
|
| Gemini "no audio" on Typed Replay | Non-deterministic; the retry chain in `voice/typed_replay.py:generate_audio` tries three prompt variants. For reliable TTS, use the offline `local_tts` SpeechT5 path. |
|
||||||
| Dashboard `Not Found` 404s for `/api/vision/*` | Vision module was deleted; HTML still has stale fetches for a few endpoints. Cosmetic — `dashboard/static/index.html` init block already skips most. |
|
| Recognition tab: "Camera could not start (no backend)" | No camera backend acquired. Check `rs-enumerate-devices` (RealSense at OS level) and `python3 -c 'import pyrealsense2'` in the `gemini_sdk` env. The glibc `ImportError` means the pip wheel is incompatible — see "Camera vision on Jetson" above. |
|
||||||
|
| Camera badge stuck on "reconnecting…" | `CameraDaemon` lost the device and is retrying with exponential backoff. Re-seat the USB 3 cable; check `logs/camera.log` for the USB-2.0 warning. |
|
||||||
|
| Gemini doesn't greet an enrolled face | Face Recognition toggle on? Vision on? (Face rec needs frames.) Check `logs/gemini_brain.log` for `face gallery primed: N person(s)`. Hit "Sync Gallery" to force a re-prime. |
|
||||||
|
| Gemini unaware of motion state | The `motion.action_*` → `send_state` chain only runs when Live Gemini is up. Check `logs/gemini_subprocess.log` and `logs/gemini_brain.log` for `STATE injected:` lines. |
|
||||||
|
|
||||||
|
|
||||||
## License / attribution
|
## License / attribution
|
||||||
@ -142,4 +240,7 @@ Internal project for YS Lootah Technology. Reuses/ports patterns from:
|
|||||||
- `G1_Lootah/Manual_Recorder/g1_replay_v4_stable.py` (arm replay math)
|
- `G1_Lootah/Manual_Recorder/g1_replay_v4_stable.py` (arm replay math)
|
||||||
- `SanadVoice/gemini_interact` (arm-phrase dispatch, skill registry)
|
- `SanadVoice/gemini_interact` (arm-phrase dispatch, skill registry)
|
||||||
- `SanadVoice/gemini_voice_v2` (local SpeechT5 TTS)
|
- `SanadVoice/gemini_voice_v2` (local SpeechT5 TTS)
|
||||||
|
- `Project/Marcus` — camera→Gemini stdin-push transport, motion-state
|
||||||
|
injection, camera daemon resilience (auto-reconnect, USB-2.0 warning),
|
||||||
|
and the `API/camera_api.py` cache shape (`get_frame_b64` / `get_fresh_frame`).
|
||||||
- Unitree `unitree_sdk2py` (G1 low-level SDK, LocoClient, G1ArmActionClient)
|
- Unitree `unitree_sdk2py` (G1 low-level SDK, LocoClient, G1ArmActionClient)
|
||||||
|
|||||||
18
config.py
18
config.py
@ -341,6 +341,24 @@ LIVE_TUNE: dict[str, str] = {
|
|||||||
# G1 built-in mic — UDP multicast 239.168.123.161:5555.
|
# G1 built-in mic — UDP multicast 239.168.123.161:5555.
|
||||||
# Requires wake-up conversation mode ON in Unitree app.
|
# Requires wake-up conversation mode ON in Unitree app.
|
||||||
"SANAD_USE_G1_MIC": "1",
|
"SANAD_USE_G1_MIC": "1",
|
||||||
|
|
||||||
|
# ── Recognition (camera vision + face recognition) ──
|
||||||
|
# All of these are BOOT defaults. The runtime source of truth is the
|
||||||
|
# state file data/.recognition_state.json — toggled live from the
|
||||||
|
# Recognition tab and polled by the Gemini child at 1 Hz.
|
||||||
|
"SANAD_VISION_ENABLE": "0",
|
||||||
|
"SANAD_VISION_SEND_HZ": "2",
|
||||||
|
"SANAD_VISION_STALE_MS": "1500",
|
||||||
|
"SANAD_CAMERA_WIDTH": "424",
|
||||||
|
"SANAD_CAMERA_HEIGHT": "240",
|
||||||
|
"SANAD_CAMERA_FPS": "15",
|
||||||
|
"SANAD_CAMERA_JPEG_QUALITY": "70",
|
||||||
|
"SANAD_FACE_RECOGNITION_ENABLE": "0",
|
||||||
|
"SANAD_FACES_DIR": str(DATA_DIR / "faces"),
|
||||||
|
"SANAD_FACES_MAX_SAMPLES": "3",
|
||||||
|
"SANAD_FACES_PRIMER_RESIZE": "256",
|
||||||
|
"SANAD_RECOGNITION_STATE_PATH": str(DATA_DIR / ".recognition_state.json"),
|
||||||
|
"SANAD_RECOGNITION_POLL_S": "1.0",
|
||||||
}
|
}
|
||||||
|
|
||||||
# -- Camera --
|
# -- Camera --
|
||||||
|
|||||||
@ -76,5 +76,26 @@
|
|||||||
|
|
||||||
"dds": {
|
"dds": {
|
||||||
"network_interface_default": "eth0"
|
"network_interface_default": "eth0"
|
||||||
|
},
|
||||||
|
|
||||||
|
"camera": {
|
||||||
|
"_comment": "Recognition tab camera daemon (parent process reads this). width/height/fps/jpeg_quality + the reconnect knobs configure CameraDaemon. Frames are cached in memory and pushed to the Gemini child over its stdin (no file drop). send_hz/stale_ms are read by the Gemini child via SANAD_VISION_SEND_HZ / SANAD_VISION_STALE_MS env vars (LIVE_TUNE).",
|
||||||
|
"width": 424,
|
||||||
|
"height": 240,
|
||||||
|
"fps": 15,
|
||||||
|
"jpeg_quality": 70,
|
||||||
|
"send_hz": 2,
|
||||||
|
"stale_ms": 1500,
|
||||||
|
"stale_threshold_s": 10.0,
|
||||||
|
"reconnect_min_s": 2.0,
|
||||||
|
"reconnect_max_s": 10.0,
|
||||||
|
"capture_timeout_ms": 5000
|
||||||
|
},
|
||||||
|
|
||||||
|
"faces": {
|
||||||
|
"_comment": "Face gallery for Gemini-side recognition. Folder layout: data/faces/face_{id}/{face_1.jpg, ...} + optional meta.json {\"name\": \"...\"}. Gemini does the matching — no local ML model.",
|
||||||
|
"dir_rel": "data/faces",
|
||||||
|
"max_samples_per_face": 3,
|
||||||
|
"primer_resize_long_side": 256
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -10,12 +10,13 @@
|
|||||||
},
|
},
|
||||||
|
|
||||||
"subprocess": {
|
"subprocess": {
|
||||||
"_comment": "gemini/subprocess.py — GeminiSubprocess supervisor. Spawns voice/sanad_voice.py as a child, tails stdout for Gemini-specific log markers, exposes transcript + state to the dashboard.",
|
"_comment": "gemini/subprocess.py — GeminiSubprocess supervisor. Spawns voice/sanad_voice.py as a child, tails stdout for Gemini-specific log markers, pushes camera frames + motion state to the child over its stdin, exposes transcript + state to the dashboard.",
|
||||||
"log_tail_size": 2000,
|
"log_tail_size": 2000,
|
||||||
"transcript_tail_size": 30,
|
"transcript_tail_size": 30,
|
||||||
"log_name": "gemini_subprocess",
|
"log_name": "gemini_subprocess",
|
||||||
"stop_timeout_sec": 3.0,
|
"stop_timeout_sec": 3.0,
|
||||||
"terminate_timeout_sec": 2.0,
|
"terminate_timeout_sec": 2.0,
|
||||||
|
"frame_forward_interval_sec": 0.5,
|
||||||
"noisy_prefixes": [
|
"noisy_prefixes": [
|
||||||
"ALSA lib ",
|
"ALSA lib ",
|
||||||
"Expression 'alsa_",
|
"Expression 'alsa_",
|
||||||
|
|||||||
@ -50,6 +50,7 @@ _REST_ROUTES: list[tuple[str, str, str]] = [
|
|||||||
("live_voice", "/api/live-voice", "live-voice"),
|
("live_voice", "/api/live-voice", "live-voice"),
|
||||||
("live_subprocess", "/api/live-subprocess", "live-subprocess"),
|
("live_subprocess", "/api/live-subprocess", "live-subprocess"),
|
||||||
("typed_replay", "/api/typed-replay", "typed-replay"),
|
("typed_replay", "/api/typed-replay", "typed-replay"),
|
||||||
|
("recognition", "/api/recognition", "recognition"),
|
||||||
]
|
]
|
||||||
|
|
||||||
_WS_ROUTES: list[str] = ["log_stream"]
|
_WS_ROUTES: list[str] = ["log_stream"]
|
||||||
|
|||||||
457
dashboard/routes/recognition.py
Normal file
457
dashboard/routes/recognition.py
Normal file
@ -0,0 +1,457 @@
|
|||||||
|
"""Recognition tab — camera vision + face gallery + hot toggles.
|
||||||
|
|
||||||
|
Single router covering:
|
||||||
|
- Vision / Face Recognition toggles (hot — no Gemini restart needed)
|
||||||
|
- Live camera preview (latest JPEG drop)
|
||||||
|
- Face gallery CRUD: enroll, upload, capture, rename, delete, ZIP
|
||||||
|
- Per-photo download + delete
|
||||||
|
|
||||||
|
Toggle changes write data/.recognition_state.json atomically. The Gemini
|
||||||
|
child polls that file at 1 Hz and applies changes mid-session.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import io
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from fastapi import APIRouter, File, HTTPException, Query, UploadFile
|
||||||
|
from fastapi.responses import FileResponse, Response, StreamingResponse
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
from Project.Sanad.config import BASE_DIR
|
||||||
|
from Project.Sanad.core.logger import get_logger
|
||||||
|
from Project.Sanad.dashboard.routes._safe_io import check_upload_size
|
||||||
|
from Project.Sanad.vision import recognition_state
|
||||||
|
|
||||||
|
log = get_logger("recognition_routes")
|
||||||
|
|
||||||
|
router = APIRouter()
|
||||||
|
|
||||||
|
|
||||||
|
# ── paths (resolved from BASE_DIR) ──────────────────────────
|
||||||
|
|
||||||
|
STATE_PATH = BASE_DIR / "data" / ".recognition_state.json"
|
||||||
|
FACES_DIR = BASE_DIR / "data" / "faces"
|
||||||
|
|
||||||
|
ALLOWED_IMAGE_EXTS = {".jpg", ".jpeg", ".png"}
|
||||||
|
|
||||||
|
|
||||||
|
# ── helpers ─────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def _get_camera():
|
||||||
|
"""Lazy import to avoid circular import on dashboard load."""
|
||||||
|
try:
|
||||||
|
from Project.Sanad.main import camera # type: ignore
|
||||||
|
return camera
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _get_gallery():
|
||||||
|
"""Lazy import — same reason."""
|
||||||
|
try:
|
||||||
|
from Project.Sanad.main import gallery # type: ignore
|
||||||
|
return gallery
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _bump_and_write_state(**changes) -> recognition_state.RecognitionState:
|
||||||
|
"""Apply changes (vision_enabled, face_rec_enabled) and persist."""
|
||||||
|
return recognition_state.mutate(STATE_PATH, **changes)
|
||||||
|
|
||||||
|
|
||||||
|
def _bump_gallery_version() -> int:
|
||||||
|
cur = recognition_state.read(STATE_PATH)
|
||||||
|
new_version = cur.gallery_version + 1
|
||||||
|
recognition_state.mutate(STATE_PATH, gallery_version=new_version)
|
||||||
|
return new_version
|
||||||
|
|
||||||
|
|
||||||
|
# ── state + toggles ─────────────────────────────────────────
|
||||||
|
|
||||||
|
@router.get("/state")
|
||||||
|
async def get_state():
|
||||||
|
"""Return the current toggle/camera/gallery state."""
|
||||||
|
st = recognition_state.read(STATE_PATH)
|
||||||
|
cam = _get_camera()
|
||||||
|
gallery = _get_gallery()
|
||||||
|
faces_count = 0
|
||||||
|
photos_count = 0
|
||||||
|
if gallery is not None:
|
||||||
|
try:
|
||||||
|
entries = gallery.list()
|
||||||
|
faces_count = len(entries)
|
||||||
|
photos_count = sum(len(e.sample_paths) for e in entries)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return {
|
||||||
|
"vision_enabled": st.vision_enabled,
|
||||||
|
"face_rec_enabled": st.face_rec_enabled,
|
||||||
|
"gallery_version": st.gallery_version,
|
||||||
|
"camera": cam.status() if cam is not None else {
|
||||||
|
"running": False, "backend": None, "error": "camera subsystem unavailable"
|
||||||
|
},
|
||||||
|
"faces_count": faces_count,
|
||||||
|
"photos_count": photos_count,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/vision")
|
||||||
|
async def set_vision(on: bool = Query(...)):
|
||||||
|
"""Enable / disable camera vision (hot — no Gemini restart)."""
|
||||||
|
cam = _get_camera()
|
||||||
|
if cam is None:
|
||||||
|
log.warning("vision toggle requested but camera subsystem unavailable")
|
||||||
|
raise HTTPException(503, "Camera subsystem not available.")
|
||||||
|
|
||||||
|
if on and not cam.is_running():
|
||||||
|
ok = cam.start()
|
||||||
|
if not ok:
|
||||||
|
log.warning("vision ON requested but camera.start() failed: %s",
|
||||||
|
cam.error or "no backend")
|
||||||
|
_bump_and_write_state(vision_enabled=False)
|
||||||
|
raise HTTPException(503,
|
||||||
|
f"Camera could not start (no backend). {cam.error or ''}")
|
||||||
|
elif (not on) and cam.is_running():
|
||||||
|
cam.stop()
|
||||||
|
|
||||||
|
st = _bump_and_write_state(vision_enabled=bool(on))
|
||||||
|
log.info("vision %s (backend=%s)", "ON" if on else "OFF",
|
||||||
|
cam.backend if cam.is_running() else "none")
|
||||||
|
return {"ok": True, "vision_enabled": st.vision_enabled,
|
||||||
|
"camera": cam.status()}
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/face-rec")
|
||||||
|
async def set_face_rec(on: bool = Query(...)):
|
||||||
|
"""Enable / disable face recognition (hot — no Gemini restart).
|
||||||
|
|
||||||
|
The Gemini child picks the change up within ~1 s: ON re-sends the
|
||||||
|
gallery primer and tells Gemini it can recognise people; OFF tells
|
||||||
|
Gemini to disregard the gallery and stop identifying anyone. Both
|
||||||
|
take effect on the live session — no reconnect needed.
|
||||||
|
"""
|
||||||
|
st = _bump_and_write_state(face_rec_enabled=bool(on))
|
||||||
|
log.info("face recognition %s", "ON" if on else "OFF")
|
||||||
|
return {"ok": True, "face_rec_enabled": st.face_rec_enabled}
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/sync")
|
||||||
|
async def sync_gallery():
|
||||||
|
"""Bump gallery_version so the child re-sends the primer if face-rec is ON."""
|
||||||
|
v = _bump_gallery_version()
|
||||||
|
log.info("gallery sync requested → v.%d", v)
|
||||||
|
return {"ok": True, "gallery_version": v}
|
||||||
|
|
||||||
|
|
||||||
|
# ── live preview ────────────────────────────────────────────
|
||||||
|
|
||||||
|
@router.get("/frame.jpg")
|
||||||
|
async def latest_frame():
|
||||||
|
"""Serve the most recent camera frame straight from the daemon's
|
||||||
|
in-memory cache (no file drop — frames are also pushed to the Gemini
|
||||||
|
child over its stdin)."""
|
||||||
|
cam = _get_camera()
|
||||||
|
if cam is None:
|
||||||
|
raise HTTPException(503, "Camera subsystem unavailable.")
|
||||||
|
jpeg = cam.snapshot_jpeg()
|
||||||
|
if not jpeg:
|
||||||
|
raise HTTPException(404, "No frame captured yet.")
|
||||||
|
return Response(
|
||||||
|
content=jpeg,
|
||||||
|
media_type="image/jpeg",
|
||||||
|
headers={"Cache-Control": "no-store, must-revalidate"},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ── camera resolution / quality ─────────────────────────────
|
||||||
|
|
||||||
|
class CameraConfigPayload(BaseModel):
|
||||||
|
width: Optional[int] = None
|
||||||
|
height: Optional[int] = None
|
||||||
|
fps: Optional[int] = None
|
||||||
|
jpeg_quality: Optional[int] = None
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/camera-config")
|
||||||
|
async def set_camera_config(payload: CameraConfigPayload):
|
||||||
|
"""Hot-swap the camera capture profile (resolution / fps / JPEG quality).
|
||||||
|
|
||||||
|
If the camera is running, CameraDaemon.reconfigure() rebuilds the
|
||||||
|
pipeline at the new profile (~0.5 s gap). If idle, the values just
|
||||||
|
take effect on the next start. Bounds are sanity-checked here so a
|
||||||
|
fat-fingered value can't wedge the daemon."""
|
||||||
|
cam = _get_camera()
|
||||||
|
if cam is None:
|
||||||
|
raise HTTPException(503, "Camera subsystem unavailable.")
|
||||||
|
if payload.width is not None and not (160 <= payload.width <= 1920):
|
||||||
|
raise HTTPException(400, "width out of range (160–1920)")
|
||||||
|
if payload.height is not None and not (120 <= payload.height <= 1080):
|
||||||
|
raise HTTPException(400, "height out of range (120–1080)")
|
||||||
|
if payload.fps is not None and not (1 <= payload.fps <= 60):
|
||||||
|
raise HTTPException(400, "fps out of range (1–60)")
|
||||||
|
if payload.jpeg_quality is not None and not (10 <= payload.jpeg_quality <= 95):
|
||||||
|
raise HTTPException(400, "jpeg_quality out of range (10–95)")
|
||||||
|
profile = cam.reconfigure(
|
||||||
|
width=payload.width, height=payload.height,
|
||||||
|
fps=payload.fps, jpeg_quality=payload.jpeg_quality,
|
||||||
|
)
|
||||||
|
log.info("camera reconfigured via dashboard → %s", profile)
|
||||||
|
return {"ok": True, "profile": profile, "camera": cam.status()}
|
||||||
|
|
||||||
|
|
||||||
|
# ── face gallery routes ─────────────────────────────────────
|
||||||
|
|
||||||
|
def _validate_image(content: bytes, filename: str | None = None) -> None:
|
||||||
|
"""Reject non-JPEG/PNG content + oversize uploads."""
|
||||||
|
check_upload_size(content)
|
||||||
|
if len(content) < 16:
|
||||||
|
raise HTTPException(400, "Image too small / empty.")
|
||||||
|
is_jpeg = content[:3] == b"\xff\xd8\xff"
|
||||||
|
is_png = content[:8] == b"\x89PNG\r\n\x1a\n"
|
||||||
|
if not (is_jpeg or is_png):
|
||||||
|
raise HTTPException(
|
||||||
|
400,
|
||||||
|
f"Only JPEG/PNG accepted (got {filename or 'unknown'}).",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _entry_to_dict(entry) -> dict:
|
||||||
|
photos = []
|
||||||
|
for p in entry.sample_paths:
|
||||||
|
try:
|
||||||
|
photos.append({"name": p.name, "size_bytes": p.stat().st_size})
|
||||||
|
except OSError:
|
||||||
|
continue
|
||||||
|
return {
|
||||||
|
"id": entry.id,
|
||||||
|
"name": entry.name,
|
||||||
|
"description": entry.description,
|
||||||
|
"added_at": entry.added_at,
|
||||||
|
"photos": photos,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/faces")
|
||||||
|
async def list_faces():
|
||||||
|
gallery = _get_gallery()
|
||||||
|
if gallery is None:
|
||||||
|
raise HTTPException(503, "Face gallery subsystem unavailable.")
|
||||||
|
entries = gallery.list()
|
||||||
|
return {"faces": [_entry_to_dict(e) for e in entries],
|
||||||
|
"total": len(entries)}
|
||||||
|
|
||||||
|
|
||||||
|
class RenamePayload(BaseModel):
|
||||||
|
name: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
class DescribePayload(BaseModel):
|
||||||
|
description: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/faces/enroll")
|
||||||
|
async def enroll_from_camera(name: Optional[str] = Query(default=None),
|
||||||
|
description: Optional[str] = Query(default=None)):
|
||||||
|
"""Create a new face from the camera's latest snapshot."""
|
||||||
|
gallery = _get_gallery()
|
||||||
|
if gallery is None:
|
||||||
|
raise HTTPException(503, "Face gallery subsystem unavailable.")
|
||||||
|
cam = _get_camera()
|
||||||
|
if cam is None or not cam.is_running():
|
||||||
|
raise HTTPException(409, "Camera is not running. Toggle Vision ON first.")
|
||||||
|
# get_fresh_frame waits briefly for a current frame so the enrolled
|
||||||
|
# photo is the scene the user is posing for, not a stale buffer.
|
||||||
|
jpeg = cam.get_fresh_frame(max_age_s=0.5, timeout_s=1.5)
|
||||||
|
if not jpeg:
|
||||||
|
raise HTTPException(409, "Camera has no frame yet. Wait a moment and retry.")
|
||||||
|
entry = gallery.create_face(
|
||||||
|
[jpeg],
|
||||||
|
name=name.strip() if name else None,
|
||||||
|
description=description.strip() if description else None,
|
||||||
|
)
|
||||||
|
v = _bump_gallery_version()
|
||||||
|
log.info("enrolled face_%d via camera (name=%s, desc=%s, v.%d)",
|
||||||
|
entry.id, name or "(unnamed)",
|
||||||
|
"yes" if description else "no", v)
|
||||||
|
return {"ok": True, "face": _entry_to_dict(entry)}
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/faces/upload")
|
||||||
|
async def enroll_from_upload(
|
||||||
|
files: list[UploadFile] = File(...),
|
||||||
|
name: Optional[str] = Query(default=None),
|
||||||
|
description: Optional[str] = Query(default=None),
|
||||||
|
):
|
||||||
|
"""Create a new face from uploaded image file(s)."""
|
||||||
|
gallery = _get_gallery()
|
||||||
|
if gallery is None:
|
||||||
|
raise HTTPException(503, "Face gallery subsystem unavailable.")
|
||||||
|
if not files:
|
||||||
|
raise HTTPException(400, "At least one image file required.")
|
||||||
|
image_bytes: list[bytes] = []
|
||||||
|
for f in files:
|
||||||
|
content = await f.read()
|
||||||
|
_validate_image(content, f.filename)
|
||||||
|
image_bytes.append(content)
|
||||||
|
entry = gallery.create_face(
|
||||||
|
image_bytes,
|
||||||
|
name=name.strip() if name else None,
|
||||||
|
description=description.strip() if description else None,
|
||||||
|
)
|
||||||
|
v = _bump_gallery_version()
|
||||||
|
log.info("enrolled face_%d via upload (%d photos, name=%s, desc=%s, v.%d)",
|
||||||
|
entry.id, len(image_bytes), name or "(unnamed)",
|
||||||
|
"yes" if description else "no", v)
|
||||||
|
return {"ok": True, "face": _entry_to_dict(entry)}
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/faces/{face_id}/capture")
|
||||||
|
async def capture_to_face(face_id: int):
|
||||||
|
"""Add a new sample (from the camera) to an existing face."""
|
||||||
|
gallery = _get_gallery()
|
||||||
|
if gallery is None:
|
||||||
|
raise HTTPException(503, "Face gallery subsystem unavailable.")
|
||||||
|
cam = _get_camera()
|
||||||
|
if cam is None or not cam.is_running():
|
||||||
|
raise HTTPException(409, "Camera is not running. Toggle Vision ON first.")
|
||||||
|
jpeg = cam.get_fresh_frame(max_age_s=0.5, timeout_s=1.5)
|
||||||
|
if not jpeg:
|
||||||
|
raise HTTPException(409, "Camera has no frame yet.")
|
||||||
|
try:
|
||||||
|
fname = gallery.add_photo(face_id, jpeg)
|
||||||
|
except FileNotFoundError as exc:
|
||||||
|
raise HTTPException(404, str(exc))
|
||||||
|
v = _bump_gallery_version()
|
||||||
|
log.info("captured new photo for face_%d → %s (v.%d)", face_id, fname, v)
|
||||||
|
return {"ok": True, "added": fname, "face": _entry_to_dict(gallery.get(face_id))}
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/faces/{face_id}/upload")
|
||||||
|
async def upload_to_face(face_id: int, files: list[UploadFile] = File(...)):
|
||||||
|
"""Add one or more uploaded samples to an existing face."""
|
||||||
|
gallery = _get_gallery()
|
||||||
|
if gallery is None:
|
||||||
|
raise HTTPException(503, "Face gallery subsystem unavailable.")
|
||||||
|
if gallery.get(face_id) is None:
|
||||||
|
raise HTTPException(404, f"face_{face_id} not found")
|
||||||
|
added: list[str] = []
|
||||||
|
for f in files:
|
||||||
|
content = await f.read()
|
||||||
|
_validate_image(content, f.filename)
|
||||||
|
try:
|
||||||
|
fname = gallery.add_photo(face_id, content)
|
||||||
|
added.append(fname)
|
||||||
|
except FileNotFoundError as exc:
|
||||||
|
raise HTTPException(404, str(exc))
|
||||||
|
v = _bump_gallery_version()
|
||||||
|
log.info("uploaded %d photo(s) to face_%d (v.%d)", len(added), face_id, v)
|
||||||
|
return {"ok": True, "added": added,
|
||||||
|
"face": _entry_to_dict(gallery.get(face_id))}
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/faces/{face_id}/rename")
|
||||||
|
async def rename_face(face_id: int, payload: RenamePayload):
|
||||||
|
gallery = _get_gallery()
|
||||||
|
if gallery is None:
|
||||||
|
raise HTTPException(503, "Face gallery subsystem unavailable.")
|
||||||
|
try:
|
||||||
|
gallery.rename(face_id, payload.name)
|
||||||
|
except FileNotFoundError as exc:
|
||||||
|
raise HTTPException(404, str(exc))
|
||||||
|
v = _bump_gallery_version()
|
||||||
|
log.info("renamed face_%d → %s (v.%d)", face_id,
|
||||||
|
payload.name or "(unnamed)", v)
|
||||||
|
return {"ok": True, "face": _entry_to_dict(gallery.get(face_id))}
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/faces/{face_id}/describe")
|
||||||
|
async def describe_face(face_id: int, payload: DescribePayload):
|
||||||
|
"""Set / clear a face's free-text description. The description is
|
||||||
|
folded into the Gemini primer turn so Gemini can reference it."""
|
||||||
|
gallery = _get_gallery()
|
||||||
|
if gallery is None:
|
||||||
|
raise HTTPException(503, "Face gallery subsystem unavailable.")
|
||||||
|
try:
|
||||||
|
gallery.set_description(face_id, payload.description)
|
||||||
|
except FileNotFoundError as exc:
|
||||||
|
raise HTTPException(404, str(exc))
|
||||||
|
v = _bump_gallery_version()
|
||||||
|
log.info("described face_%d (%s, v.%d)", face_id,
|
||||||
|
"set" if payload.description else "cleared", v)
|
||||||
|
return {"ok": True, "face": _entry_to_dict(gallery.get(face_id))}
|
||||||
|
|
||||||
|
|
||||||
|
@router.delete("/faces/{face_id}")
|
||||||
|
async def delete_face(face_id: int):
|
||||||
|
gallery = _get_gallery()
|
||||||
|
if gallery is None:
|
||||||
|
raise HTTPException(503, "Face gallery subsystem unavailable.")
|
||||||
|
try:
|
||||||
|
gallery.delete_face(face_id)
|
||||||
|
except FileNotFoundError as exc:
|
||||||
|
raise HTTPException(404, str(exc))
|
||||||
|
v = _bump_gallery_version()
|
||||||
|
log.info("deleted face_%d (v.%d)", face_id, v)
|
||||||
|
return {"ok": True, "deleted": face_id}
|
||||||
|
|
||||||
|
|
||||||
|
@router.delete("/faces/{face_id}/photo/{photo_name}")
|
||||||
|
async def delete_photo(face_id: int, photo_name: str):
|
||||||
|
gallery = _get_gallery()
|
||||||
|
if gallery is None:
|
||||||
|
raise HTTPException(503, "Face gallery subsystem unavailable.")
|
||||||
|
# safe filename — only allow simple file names, no traversal
|
||||||
|
if "/" in photo_name or ".." in photo_name or "\x00" in photo_name:
|
||||||
|
raise HTTPException(400, "Invalid photo name.")
|
||||||
|
try:
|
||||||
|
gallery.delete_photo(face_id, photo_name)
|
||||||
|
except FileNotFoundError as exc:
|
||||||
|
raise HTTPException(404, str(exc))
|
||||||
|
except ValueError as exc:
|
||||||
|
raise HTTPException(400, str(exc))
|
||||||
|
v = _bump_gallery_version()
|
||||||
|
log.info("deleted photo %s from face_%d (v.%d)", photo_name, face_id, v)
|
||||||
|
return {"ok": True, "deleted": photo_name}
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/faces/{face_id}/photo/{photo_name}")
|
||||||
|
async def get_photo(face_id: int, photo_name: str,
|
||||||
|
download: int = Query(default=0)):
|
||||||
|
"""Serve a single photo. Add ?download=1 for attachment disposition."""
|
||||||
|
gallery = _get_gallery()
|
||||||
|
if gallery is None:
|
||||||
|
raise HTTPException(503, "Face gallery subsystem unavailable.")
|
||||||
|
if "/" in photo_name or ".." in photo_name or "\x00" in photo_name:
|
||||||
|
raise HTTPException(400, "Invalid photo name.")
|
||||||
|
path = gallery.get_photo(face_id, photo_name)
|
||||||
|
if path is None:
|
||||||
|
raise HTTPException(404, "Photo not found.")
|
||||||
|
media = "image/png" if path.suffix.lower() == ".png" else "image/jpeg"
|
||||||
|
headers = {}
|
||||||
|
if download:
|
||||||
|
headers["Content-Disposition"] = (
|
||||||
|
f'attachment; filename="face_{face_id}_{photo_name}"'
|
||||||
|
)
|
||||||
|
return FileResponse(path, media_type=media, headers=headers)
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/faces/{face_id}/download.zip")
|
||||||
|
async def download_face_zip(face_id: int):
|
||||||
|
gallery = _get_gallery()
|
||||||
|
if gallery is None:
|
||||||
|
raise HTTPException(503, "Face gallery subsystem unavailable.")
|
||||||
|
try:
|
||||||
|
data = gallery.zip_face(face_id)
|
||||||
|
except FileNotFoundError as exc:
|
||||||
|
raise HTTPException(404, str(exc))
|
||||||
|
return StreamingResponse(
|
||||||
|
io.BytesIO(data),
|
||||||
|
media_type="application/zip",
|
||||||
|
headers={
|
||||||
|
"Content-Disposition": f'attachment; filename="face_{face_id}.zip"',
|
||||||
|
"Content-Length": str(len(data)),
|
||||||
|
},
|
||||||
|
)
|
||||||
@ -123,7 +123,7 @@
|
|||||||
<div class="tab active" onclick="switchTab('operations')">Operations</div>
|
<div class="tab active" onclick="switchTab('operations')">Operations</div>
|
||||||
<div class="tab" onclick="switchTab('voice')">Voice & Audio</div>
|
<div class="tab" onclick="switchTab('voice')">Voice & Audio</div>
|
||||||
<div class="tab" onclick="switchTab('motion')">Motion & Replay</div>
|
<div class="tab" onclick="switchTab('motion')">Motion & Replay</div>
|
||||||
<div class="tab" onclick="switchTab('vision')">Camera & Vision</div>
|
<div class="tab" onclick="switchTab('recognition')">Recognition</div>
|
||||||
<div class="tab" onclick="switchTab('recordings')">Recordings</div>
|
<div class="tab" onclick="switchTab('recordings')">Recordings</div>
|
||||||
<div class="tab" onclick="switchTab('settings')">Settings & Logs</div>
|
<div class="tab" onclick="switchTab('settings')">Settings & Logs</div>
|
||||||
</div>
|
</div>
|
||||||
@ -211,23 +211,6 @@
|
|||||||
<div style="margin-top:.5rem;font-size:.72rem;color:var(--dim)" id="audio-status-text"></div>
|
<div style="margin-top:.5rem;font-size:.72rem;color:var(--dim)" id="audio-status-text"></div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<!-- Live Camera Feed -->
|
|
||||||
<div class="card">
|
|
||||||
<h3><svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><path d="M23 7l-7 5 7 5V7z"/><rect x="1" y="5" width="15" height="14" rx="2"/></svg>Live Camera</h3>
|
|
||||||
<canvas id="camera-feed" width="640" height="480" style="width:100%;max-height:260px;border-radius:8px;background:#000"></canvas>
|
|
||||||
<div class="row" style="margin-top:.4rem">
|
|
||||||
<button class="btn btn-success btn-sm" onclick="startLocalCam(this)">Start</button>
|
|
||||||
<button class="btn btn-danger btn-sm" onclick="stopLocalCam(this)">Stop</button>
|
|
||||||
<button class="btn btn-ghost btn-sm" onclick="reconnectCamera()">Reconnect</button>
|
|
||||||
<span id="ops-cam-state" class="badge" style="margin-left:.3rem"></span>
|
|
||||||
</div>
|
|
||||||
<div class="row" style="margin-top:.3rem">
|
|
||||||
<button class="btn btn-primary" onclick="capturePhoto(this)">Capture</button>
|
|
||||||
<select id="capture-gesture" style="width:130px"><option value="">No gesture</option></select>
|
|
||||||
<button class="btn btn-success" onclick="captureWithMotion(this)">Capture + Gesture</button>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<!-- Motion Quick Panel -->
|
<!-- Motion Quick Panel -->
|
||||||
<div class="card">
|
<div class="card">
|
||||||
<h3><svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><path d="M18 8A6 6 0 0 0 6 8c0 7-3 9-3 9h18s-3-2-3-9"/><path d="M13.73 21a2 2 0 0 1-3.46 0"/></svg>Quick Actions</h3>
|
<h3><svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><path d="M18 8A6 6 0 0 0 6 8c0 7-3 9-3 9h18s-3-2-3-9"/><path d="M13.73 21a2 2 0 0 1-3.46 0"/></svg>Quick Actions</h3>
|
||||||
@ -288,6 +271,7 @@
|
|||||||
<div class="row">
|
<div class="row">
|
||||||
<button class="btn btn-success" onclick="startLiveSub(this)">Start</button>
|
<button class="btn btn-success" onclick="startLiveSub(this)">Start</button>
|
||||||
<button class="btn btn-danger" onclick="stopLiveSub(this)">Stop</button>
|
<button class="btn btn-danger" onclick="stopLiveSub(this)">Stop</button>
|
||||||
|
<button id="ls-cam-btn" class="btn btn-sm btn-ghost" onclick="toggleGeminiCamera(this)" title="Stream camera frames to Gemini Live — same toggle as the Recognition tab">Camera: --</button>
|
||||||
<span id="ls-state" class="badge"></span>
|
<span id="ls-state" class="badge"></span>
|
||||||
<button class="btn btn-sm mic-mute-shortcut btn-success" onclick="toggleMic()" style="margin-left:auto">Mic: LIVE</button>
|
<button class="btn btn-sm mic-mute-shortcut btn-success" onclick="toggleMic()" style="margin-left:auto">Mic: LIVE</button>
|
||||||
<button class="btn btn-sm spk-mute-shortcut btn-success" onclick="toggleSpeaker()">Speaker: LIVE</button>
|
<button class="btn btn-sm spk-mute-shortcut btn-success" onclick="toggleSpeaker()">Speaker: LIVE</button>
|
||||||
@ -470,78 +454,102 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<!-- ==================== TAB: Camera & Vision ==================== -->
|
<!-- ==================== TAB: Recognition ==================== -->
|
||||||
<div class="tab-content" id="tab-vision">
|
<div class="tab-content" id="tab-recognition">
|
||||||
<div class="grid">
|
<div class="grid">
|
||||||
|
|
||||||
<!-- Camera Devices -->
|
<!-- Status & Toggles -->
|
||||||
<div class="card">
|
|
||||||
<h3>Camera Device</h3>
|
|
||||||
<div class="row">
|
|
||||||
<label>Profile</label>
|
|
||||||
<select id="camdev-profile" style="flex:1" onchange="selectCamProfile(this.value)">
|
|
||||||
<option value="">Loading...</option>
|
|
||||||
</select>
|
|
||||||
<button class="btn btn-ghost btn-sm" onclick="scanCameras(this)" title="Re-scan plugged cameras">Scan</button>
|
|
||||||
</div>
|
|
||||||
<div id="camdev-detected" style="margin-top:.3rem;font-size:.65rem;color:var(--dim)"></div>
|
|
||||||
<details style="margin-top:.5rem">
|
|
||||||
<summary style="cursor:pointer;font-size:.72rem;color:var(--dim)">All plugged cameras</summary>
|
|
||||||
<div id="camdev-list" style="margin-top:.3rem;font-size:.7rem"></div>
|
|
||||||
</details>
|
|
||||||
<details style="margin-top:.4rem">
|
|
||||||
<summary style="cursor:pointer;font-size:.72rem;color:var(--dim)">Pin RealSense serial to slot</summary>
|
|
||||||
<div class="row" style="margin-top:.3rem">
|
|
||||||
<select id="camdev-pin-profile" style="flex:1"></select>
|
|
||||||
<input id="camdev-pin-serial" placeholder="Serial..." style="flex:1">
|
|
||||||
<button class="btn btn-primary btn-sm" onclick="pinCamSerial(this)">Pin</button>
|
|
||||||
</div>
|
|
||||||
<div style="font-size:.65rem;color:var(--dim);margin-top:.2rem">Use this when you have two RealSense units and want to lock which one is "primary".</div>
|
|
||||||
</details>
|
|
||||||
<div class="row" style="margin-top:.5rem">
|
|
||||||
<button class="btn btn-success btn-sm" onclick="startLocalCam(this)">Start Capture</button>
|
|
||||||
<button class="btn btn-danger btn-sm" onclick="stopLocalCam(this)">Stop Capture</button>
|
|
||||||
<span id="local-cam-state" class="badge"></span>
|
|
||||||
</div>
|
|
||||||
<div style="margin-top:.5rem;font-size:.72rem;color:var(--dim)" id="camdev-status"></div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<!-- Camera Config (legacy) -->
|
|
||||||
<div class="card">
|
|
||||||
<h3>Camera Configuration</h3>
|
|
||||||
<div class="row"><label>Source</label><select id="cam-source" style="flex:1" onchange="setCamSource(this.value)"></select></div>
|
|
||||||
<div class="row">
|
|
||||||
<label>Resolution</label>
|
|
||||||
<input id="cam-w" type="number" value="640" style="width:60px"> <span style="color:var(--dim)">x</span>
|
|
||||||
<input id="cam-h" type="number" value="480" style="width:60px"> <span style="color:var(--dim)">@</span>
|
|
||||||
<input id="cam-fps" type="number" value="30" style="width:45px"> <span style="color:var(--dim)">fps</span>
|
|
||||||
<button class="btn btn-primary btn-sm" onclick="setCamRes()">Set</button>
|
|
||||||
</div>
|
|
||||||
<div class="row"><label>RealSense</label><input id="cam-serial" placeholder="Serial number" style="flex:1"><button class="btn btn-ghost btn-sm" onclick="setPreferredCam()">Save</button></div>
|
|
||||||
<div style="margin-top:.4rem"><a href="/api/vision/preview.mjpg" target="_blank" style="font-size:.72rem;color:var(--accent);text-decoration:none">Open MJPEG Stream in new tab</a></div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<!-- YOLO Detector -->
|
|
||||||
<div class="card">
|
|
||||||
<h3>YOLO Vision Detector</h3>
|
|
||||||
<div class="row">
|
|
||||||
<button class="btn btn-primary" onclick="loadDetector(this)">Load Model</button>
|
|
||||||
<button class="btn btn-success" onclick="runDetection(this)">Detect Now</button>
|
|
||||||
<span id="yolo-status" class="badge"></span>
|
|
||||||
</div>
|
|
||||||
<div id="yolo-result" style="margin-top:.4rem;font-size:.72rem;color:var(--muted)"></div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<!-- Photo Gallery -->
|
|
||||||
<div class="card card-full">
|
<div class="card card-full">
|
||||||
<h3>Photo Gallery</h3>
|
<h3>Camera Vision & Face Recognition</h3>
|
||||||
<div class="row">
|
<div class="row" style="gap:1rem;flex-wrap:wrap">
|
||||||
<button class="btn btn-ghost btn-sm" onclick="refreshPhotos()">Refresh</button>
|
<div class="row" style="gap:.4rem">
|
||||||
<button class="btn btn-primary btn-sm" onclick="downloadAllPhotos()">Download ZIP</button>
|
<label style="min-width:7rem">Camera Vision</label>
|
||||||
<button class="btn btn-danger btn-sm" onclick="clearPhotos()">Clear All</button>
|
<label class="switch">
|
||||||
<span id="photo-count" style="margin-left:auto;font-size:.72rem;color:var(--muted)"></span>
|
<input type="checkbox" id="rec-vision-toggle" onchange="setVisionEnabled(this.checked)">
|
||||||
|
<span class="slider"></span>
|
||||||
|
</label>
|
||||||
|
<span id="rec-camera-status" class="badge" style="margin-left:.5rem">--</span>
|
||||||
|
</div>
|
||||||
|
<div class="row" style="gap:.4rem">
|
||||||
|
<label style="min-width:7rem">Face Recognition</label>
|
||||||
|
<label class="switch">
|
||||||
|
<input type="checkbox" id="rec-facerec-toggle" onchange="setFaceRecEnabled(this.checked)">
|
||||||
|
<span class="slider"></span>
|
||||||
|
</label>
|
||||||
|
<span id="rec-facerec-status" class="badge" style="margin-left:.5rem">--</span>
|
||||||
|
</div>
|
||||||
|
<button class="btn btn-ghost btn-sm" onclick="syncGallery(this)" title="Re-send gallery to live Gemini session">↻ Sync Gallery</button>
|
||||||
</div>
|
</div>
|
||||||
<div class="gallery-grid" id="photo-gallery"><div class="empty">No photos yet</div></div>
|
<div style="margin-top:.4rem;font-size:.7rem;color:var(--dim)" id="rec-status-line">
|
||||||
|
Toggles take effect within ~1 second on the running Gemini session — no restart required.
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Live Preview -->
|
||||||
|
<div class="card">
|
||||||
|
<h3>Live Preview</h3>
|
||||||
|
<div id="rec-preview-wrap" style="background:#000;border-radius:.4rem;overflow:hidden;text-align:center;min-height:180px;display:flex;align-items:center;justify-content:center">
|
||||||
|
<img id="rec-preview-img" src="" alt="" style="max-width:100%;display:none">
|
||||||
|
<div id="rec-preview-empty" style="color:var(--dim);font-size:.75rem;padding:1rem">Camera off — toggle Vision ON to see the live feed.</div>
|
||||||
|
</div>
|
||||||
|
<div style="margin-top:.3rem;font-size:.65rem;color:var(--dim)" id="rec-preview-meta">--</div>
|
||||||
|
<div style="margin-top:.45rem">
|
||||||
|
<div style="font-size:.7rem;color:var(--dim);margin-bottom:.2rem">Resolution / FPS</div>
|
||||||
|
<div class="row" style="gap:.25rem;flex-wrap:wrap" id="rec-res-buttons">
|
||||||
|
<button class="btn btn-ghost btn-sm" data-w="424" data-h="240" data-fps="15" onclick="setCameraMode(this)">424×240 · 15</button>
|
||||||
|
<button class="btn btn-ghost btn-sm" data-w="424" data-h="240" data-fps="30" onclick="setCameraMode(this)">424×240 · 30</button>
|
||||||
|
<button class="btn btn-ghost btn-sm" data-w="640" data-h="480" data-fps="15" onclick="setCameraMode(this)">640×480 · 15</button>
|
||||||
|
<button class="btn btn-ghost btn-sm" data-w="640" data-h="480" data-fps="30" onclick="setCameraMode(this)">640×480 · 30</button>
|
||||||
|
<button class="btn btn-ghost btn-sm" data-w="1280" data-h="720" data-fps="15" onclick="setCameraMode(this)">1280×720 · 15</button>
|
||||||
|
<button class="btn btn-ghost btn-sm" data-w="1920" data-h="1080" data-fps="8" onclick="setCameraMode(this)">1920×1080 · 8</button>
|
||||||
|
</div>
|
||||||
|
<div style="font-size:.7rem;color:var(--dim);margin:.35rem 0 .2rem">JPEG Quality</div>
|
||||||
|
<div class="row" style="gap:.25rem" id="rec-quality-buttons">
|
||||||
|
<button class="btn btn-ghost btn-sm" data-q="50" onclick="setCameraQuality(this)">Low</button>
|
||||||
|
<button class="btn btn-ghost btn-sm" data-q="70" onclick="setCameraQuality(this)">Med</button>
|
||||||
|
<button class="btn btn-ghost btn-sm" data-q="85" onclick="setCameraQuality(this)">High</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div style="margin-top:.3rem;font-size:.6rem;color:var(--dim)">
|
||||||
|
Each button rebuilds the capture pipeline (~0.5 s). Modes match the
|
||||||
|
RealSense D435I colour sensor — on USB 2.x, stick to 424×240 or 640×480.
|
||||||
|
If the feed is grayscale/IR, pin the colour node with <code>SANAD_CAMERA_USB_INDEX</code>.
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Add New Face -->
|
||||||
|
<div class="card">
|
||||||
|
<h3>Add New Face</h3>
|
||||||
|
<div class="row">
|
||||||
|
<label>Name</label>
|
||||||
|
<input id="rec-newface-name" placeholder="(optional)" style="flex:1">
|
||||||
|
</div>
|
||||||
|
<div style="margin-top:.4rem">
|
||||||
|
<label style="font-size:.72rem;color:var(--dim)">Description — who is this person? (Gemini reads it)</label>
|
||||||
|
<textarea id="rec-newface-desc" rows="2" placeholder="e.g. Qassam, lead engineer on the robotics team — likes coffee" style="width:100%;margin-top:.2rem;font-size:.78rem;resize:vertical"></textarea>
|
||||||
|
</div>
|
||||||
|
<div class="row" style="margin-top:.4rem">
|
||||||
|
<button class="btn btn-success btn-sm" onclick="enrollFromCamera(this)" title="Snap current frame">📷 Capture</button>
|
||||||
|
<label class="btn btn-primary btn-sm" style="cursor:pointer;margin:0">
|
||||||
|
📁 Upload images
|
||||||
|
<input type="file" id="rec-upload-input" multiple accept="image/jpeg,image/png" style="display:none" onchange="enrollFromUpload(this)">
|
||||||
|
</label>
|
||||||
|
</div>
|
||||||
|
<div style="margin-top:.4rem;font-size:.65rem;color:var(--dim)">
|
||||||
|
Tip: add 2–3 photos / different angles per person for best recognition.
|
||||||
|
The description is sent to Gemini with the photos — it can then greet
|
||||||
|
and talk about the person using what you wrote.
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Enrolled Faces -->
|
||||||
|
<div class="card card-full">
|
||||||
|
<h3>Enrolled Faces <span id="rec-faces-count" style="font-weight:normal;color:var(--dim);font-size:.75rem"></span></h3>
|
||||||
|
<div class="row">
|
||||||
|
<button class="btn btn-ghost btn-sm" onclick="refreshFaces()">↻ Refresh</button>
|
||||||
|
<span style="margin-left:auto;font-size:.65rem;color:var(--dim)" id="rec-gallery-version"></span>
|
||||||
|
</div>
|
||||||
|
<div id="rec-faces-list" style="margin-top:.6rem"><div class="empty">Loading…</div></div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
@ -621,7 +629,14 @@ function btnDone(b){if(b&&b.classList)b.classList.remove('loading');}
|
|||||||
async function api(m,p,b){const o={method:m,headers:{'Content-Type':'application/json'}};if(b)o.body=JSON.stringify(b);const r=await fetch(API+p,o);const j=await r.json();if(!r.ok){toast(j.detail||j.error||'Error '+r.status,'err');throw new Error(j.detail||j.error);}return j;}
|
async function api(m,p,b){const o={method:m,headers:{'Content-Type':'application/json'}};if(b)o.body=JSON.stringify(b);const r=await fetch(API+p,o);const j=await r.json();if(!r.ok){toast(j.detail||j.error||'Error '+r.status,'err');throw new Error(j.detail||j.error);}return j;}
|
||||||
|
|
||||||
// Tabs
|
// Tabs
|
||||||
function switchTab(name){document.querySelectorAll('.tab').forEach(t=>t.classList.toggle('active',t.textContent.toLowerCase().includes(name.slice(0,4))));document.querySelectorAll('.tab-content').forEach(c=>c.classList.toggle('active',c.id==='tab-'+name));}
|
function switchTab(name){
|
||||||
|
// Match the nav tab by its exact onclick target — NOT a substring of the
|
||||||
|
// label. "recognition" and "recordings" both start with "reco", so the old
|
||||||
|
// textContent.includes(name.slice(0,4)) lit up both tabs at once.
|
||||||
|
const want="switchTab('"+name+"')";
|
||||||
|
document.querySelectorAll('.tab').forEach(t=>t.classList.toggle('active',(t.getAttribute('onclick')||'').includes(want)));
|
||||||
|
document.querySelectorAll('.tab-content').forEach(c=>c.classList.toggle('active',c.id==='tab-'+name));
|
||||||
|
}
|
||||||
|
|
||||||
// Emergency Stop
|
// Emergency Stop
|
||||||
async function emergencyStop(){try{await api('POST','/api/replay/cancel');await api('POST','/api/live-voice/stop');toast('EMERGENCY STOP sent','err');}catch(e){}}
|
async function emergencyStop(){try{await api('POST','/api/replay/cancel');await api('POST','/api/live-voice/stop');toast('EMERGENCY STOP sent','err');}catch(e){}}
|
||||||
@ -894,155 +909,6 @@ async function applyManualAudio(b){
|
|||||||
refreshAudioDevices();
|
refreshAudioDevices();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Camera
|
|
||||||
async function capturePhoto(b){btnLoad(b);try{await api('POST','/api/vision/capture');toast('Photo captured','ok');refreshPhotos();}catch(e){}btnDone(b);}
|
|
||||||
async function captureWithMotion(b){btnLoad(b);const g=document.getElementById('capture-gesture').value;const url=g?'/api/vision/capture?motion_file='+encodeURIComponent(g):'/api/vision/capture';try{await api('POST',url);toast('Captured'+(g?' + gesture':''),'ok');refreshPhotos();}catch(e){}btnDone(b);}
|
|
||||||
async function refreshCamSources(){try{const r=await api('GET','/api/vision/cameras');const sel=document.getElementById('cam-source');sel.innerHTML='<option value="">Auto</option>'+(r||[]).map(c=>`<option value="${esc(c.source||c.serial||c.name)}">${esc(c.name||c.source||c.serial)}</option>`).join('');}catch(e){}}
|
|
||||||
async function populateGestureSelect(){try{const r=await api('GET','/api/replay/files');const sel=document.getElementById('capture-gesture');sel.innerHTML='<option value="">No gesture</option>'+(r.files||[]).map(f=>`<option value="${esc(f.name)}">${esc(f.name)}</option>`).join('');}catch(e){}}
|
|
||||||
async function setCamSource(v){if(v)try{await api('POST','/api/vision/set-source',{source:v});toast('Camera source set','ok');}catch(e){}}
|
|
||||||
async function setCamRes(){const w=+document.getElementById('cam-w').value,h=+document.getElementById('cam-h').value,f=+document.getElementById('cam-fps').value;try{await api('POST','/api/vision/set-resolution',{width:w,height:h,fps:f});toast(`${w}x${h}@${f}fps`,'ok');}catch(e){}}
|
|
||||||
async function setPreferredCam(){const s=document.getElementById('cam-serial').value;if(s)try{await api('POST','/api/vision/set-preferred-camera',{serial:s});toast('Saved','ok');}catch(e){}}
|
|
||||||
|
|
||||||
// Photos
|
|
||||||
async function refreshPhotos(){try{const r=await api('GET','/api/vision/photos');const el=document.getElementById('photo-gallery');document.getElementById('photo-count').textContent=`${r.total} photos`;if(!r.photos?.length){el.innerHTML='<div class="empty">No photos yet</div>';return;}el.innerHTML=r.photos.map(p=>{const n=esc(p.name);return`<img src="/api/vision/photos/${encodeURIComponent(p.name)}" title="${n}\n${p.size_kb}KB\n${p.created_at}" onclick="if(confirm('Delete ${n}?'))deletePhoto('${n}')">`;}).join('');}catch(e){}}
|
|
||||||
async function deletePhoto(n){try{await api('DELETE','/api/vision/photos/'+encodeURIComponent(n));toast('Deleted','ok');refreshPhotos();}catch(e){}}
|
|
||||||
function downloadAllPhotos(){window.open('/api/vision/photos/download-zip');}
|
|
||||||
async function clearPhotos(){if(confirm('Delete ALL photos?'))try{const r=await api('POST','/api/vision/photos/clear');toast(`Cleared ${r.deleted_count}`,'ok');refreshPhotos();}catch(e){}}
|
|
||||||
|
|
||||||
// Camera devices
|
|
||||||
async function refreshCamDevices(){
|
|
||||||
try{
|
|
||||||
const r=await api('GET','/api/vision/devices');
|
|
||||||
const cur=r.current||{};
|
|
||||||
const curDev=cur.device||{};
|
|
||||||
const curId=cur.profile?cur.profile.id:'';
|
|
||||||
const detIds=r.detected_ids||[];
|
|
||||||
// Profile dropdown
|
|
||||||
const profSel=document.getElementById('camdev-profile');
|
|
||||||
profSel.innerHTML=(r.profiles||[]).map(p=>{
|
|
||||||
const avail=detIds.indexOf(p.id)>=0;
|
|
||||||
const sel=p.id===curId?' selected':'';
|
|
||||||
const tag=avail?'':' (no device)';
|
|
||||||
return `<option value="${esc(p.id)}"${sel}${avail?'':' disabled'}>${esc(p.label)}${tag}</option>`;
|
|
||||||
}).join('');
|
|
||||||
// Pin profile dropdown
|
|
||||||
const pinSel=document.getElementById('camdev-pin-profile');
|
|
||||||
pinSel.innerHTML=(r.profiles||[]).filter(p=>p.backend==='realsense').map(p=>
|
|
||||||
`<option value="${esc(p.id)}">${esc(p.label)}</option>`).join('');
|
|
||||||
// Detected summary
|
|
||||||
const det=document.getElementById('camdev-detected');
|
|
||||||
const counts=r.counts||{};
|
|
||||||
det.innerHTML=`<strong>Detected:</strong> ${counts.realsense||0} RealSense, ${counts.v4l2||0} V4L2 (total ${counts.total||0})`;
|
|
||||||
// All devices list
|
|
||||||
const list=document.getElementById('camdev-list');
|
|
||||||
if(!(r.all_devices||[]).length){
|
|
||||||
list.innerHTML='<div class="empty">No cameras plugged</div>';
|
|
||||||
}else{
|
|
||||||
list.innerHTML='<table><tr><th>Backend</th><th>Name</th><th>Serial / Path</th><th></th></tr>'+
|
|
||||||
r.all_devices.map(d=>{
|
|
||||||
const idVal=d.serial||d.device_path;
|
|
||||||
const action=d.serial
|
|
||||||
?`<button class="btn btn-primary btn-sm" onclick="selectCamSerial('${esc(d.serial)}')">Use</button>`
|
|
||||||
:`<button class="btn btn-primary btn-sm" onclick="selectCamPath('${esc(d.device_path)}')">Use</button>`;
|
|
||||||
return `<tr><td>${esc(d.backend)}</td><td>${esc(d.name||'-')}</td><td><code style="font-size:.65rem">${esc(idVal||'-')}</code></td><td>${action}</td></tr>`;
|
|
||||||
}).join('')+'</table>';
|
|
||||||
}
|
|
||||||
// Status text
|
|
||||||
const st=document.getElementById('camdev-status');
|
|
||||||
if(curDev.name){
|
|
||||||
st.innerHTML=`<strong>Active:</strong> ${esc(curDev.name)}<br>`+
|
|
||||||
(curDev.serial?`Serial: <code>${esc(curDev.serial)}</code><br>`:'')+
|
|
||||||
(curDev.device_path?`Path: <code>${esc(curDev.device_path)}</code><br>`:'')+
|
|
||||||
`<span style="color:var(--muted)">via ${esc(cur.source_kind||'?')}</span>`;
|
|
||||||
}else{
|
|
||||||
st.innerHTML='<span style="color:#f55">No camera selected</span>';
|
|
||||||
}
|
|
||||||
}catch(e){}
|
|
||||||
}
|
|
||||||
async function scanCameras(b){
|
|
||||||
if(b)btnLoad(b);
|
|
||||||
try{await api('POST','/api/vision/devices/scan');toast('Re-scanned cameras','ok');}catch(e){}
|
|
||||||
if(b)btnDone(b);
|
|
||||||
refreshCamDevices();
|
|
||||||
}
|
|
||||||
async function selectCamProfile(profileId){
|
|
||||||
if(!profileId)return;
|
|
||||||
try{
|
|
||||||
await api('POST','/api/vision/devices/select-profile',{profile_id:profileId});
|
|
||||||
toast('Camera profile switched','ok');
|
|
||||||
}catch(e){}
|
|
||||||
refreshCamDevices();
|
|
||||||
}
|
|
||||||
async function selectCamSerial(serial){
|
|
||||||
if(!serial)return;
|
|
||||||
try{
|
|
||||||
await api('POST','/api/vision/devices/select-serial',{serial});
|
|
||||||
toast('Camera selected by serial','ok');
|
|
||||||
}catch(e){}
|
|
||||||
refreshCamDevices();
|
|
||||||
}
|
|
||||||
async function selectCamPath(path){
|
|
||||||
if(!path)return;
|
|
||||||
try{
|
|
||||||
await api('POST','/api/vision/devices/select-path',{device_path:path});
|
|
||||||
toast('Camera selected by path','ok');
|
|
||||||
}catch(e){}
|
|
||||||
refreshCamDevices();
|
|
||||||
}
|
|
||||||
async function startLocalCam(b){
|
|
||||||
btnLoad(b);
|
|
||||||
try{
|
|
||||||
const r=await api('POST','/api/vision/local/start',{});
|
|
||||||
if(r.ok){
|
|
||||||
toast('Camera started: '+(r.backend||'?'),'ok');
|
|
||||||
}else{
|
|
||||||
toast('Camera start failed: '+(r.error||'unknown'),'err');
|
|
||||||
}
|
|
||||||
}catch(e){}
|
|
||||||
btnDone(b);
|
|
||||||
setTimeout(refreshLocalCam, 500);
|
|
||||||
}
|
|
||||||
async function stopLocalCam(b){
|
|
||||||
btnLoad(b);
|
|
||||||
try{await api('POST','/api/vision/local/stop');toast('Camera stopped','info');}catch(e){}
|
|
||||||
btnDone(b);
|
|
||||||
refreshLocalCam();
|
|
||||||
}
|
|
||||||
async function refreshLocalCam(){
|
|
||||||
try{
|
|
||||||
const r=await api('GET','/api/vision/local/status');
|
|
||||||
const els=[document.getElementById('local-cam-state'),document.getElementById('ops-cam-state')];
|
|
||||||
els.forEach(el=>{
|
|
||||||
if(!el)return;
|
|
||||||
if(r.running){
|
|
||||||
el.textContent=(r.backend||'on')+(r.serial?(' '+r.serial.slice(-6)):'')+' '+r.width+'x'+r.height+'@'+r.fps;
|
|
||||||
el.className='badge badge-ok';
|
|
||||||
}else if(r.last_error){
|
|
||||||
el.textContent='error';
|
|
||||||
el.className='badge badge-err';
|
|
||||||
el.title=r.last_error;
|
|
||||||
}else{
|
|
||||||
el.textContent='stopped';
|
|
||||||
el.className='badge badge-warn';
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}catch(e){}
|
|
||||||
}
|
|
||||||
|
|
||||||
async function pinCamSerial(b){
|
|
||||||
const pid=document.getElementById('camdev-pin-profile').value;
|
|
||||||
const serial=document.getElementById('camdev-pin-serial').value.trim();
|
|
||||||
if(!pid||!serial){toast('Pick a profile and enter a serial','err');return;}
|
|
||||||
btnLoad(b);
|
|
||||||
try{
|
|
||||||
await api('POST','/api/vision/devices/assign-serial',{profile_id:pid,serial:serial});
|
|
||||||
toast(`Pinned ${serial} → ${pid}`,'ok');
|
|
||||||
document.getElementById('camdev-pin-serial').value='';
|
|
||||||
}catch(e){}
|
|
||||||
btnDone(b);
|
|
||||||
refreshCamDevices();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Motion
|
// Motion
|
||||||
async function toggleGestural(v){try{await api('POST','/api/motion/gestural-speaking?enabled='+v);}catch(e){}}
|
async function toggleGestural(v){try{await api('POST','/api/motion/gestural-speaking?enabled='+v);}catch(e){}}
|
||||||
let _armBusy=false,_runId=null;
|
let _armBusy=false,_runId=null;
|
||||||
@ -1212,10 +1078,10 @@ async function stopCombo(b){
|
|||||||
async function refreshReplayFiles(){try{const r=await api('GET','/api/replay/files');const el=document.getElementById('replay-files');if(!(r.files||[]).length){el.innerHTML='<div class="empty">No motion files</div>';return;}el.innerHTML='<table><tr><th>File</th><th>Frames</th><th>Duration</th><th>Size</th><th></th></tr>'+(r.files||[]).map(f=>`<tr><td>${esc(f.name)}</td><td>${f.frames}</td><td>${f.duration_sec}s</td><td>${f.size_kb}KB</td><td><button class="btn btn-primary btn-sm" onclick="document.getElementById('replay-name').value='${esc(f.name)}';testReplay()">Play</button> <button class="btn btn-danger btn-sm" onclick="deleteMotionFile('${esc(f.name)}')">Del</button></td></tr>`).join('')+'</table>';}catch(e){}}
|
async function refreshReplayFiles(){try{const r=await api('GET','/api/replay/files');const el=document.getElementById('replay-files');if(!(r.files||[]).length){el.innerHTML='<div class="empty">No motion files</div>';return;}el.innerHTML='<table><tr><th>File</th><th>Frames</th><th>Duration</th><th>Size</th><th></th></tr>'+(r.files||[]).map(f=>`<tr><td>${esc(f.name)}</td><td>${f.frames}</td><td>${f.duration_sec}s</td><td>${f.size_kb}KB</td><td><button class="btn btn-primary btn-sm" onclick="document.getElementById('replay-name').value='${esc(f.name)}';testReplay()">Play</button> <button class="btn btn-danger btn-sm" onclick="deleteMotionFile('${esc(f.name)}')">Del</button></td></tr>`).join('')+'</table>';}catch(e){}}
|
||||||
async function testReplay(b){const n=document.getElementById('replay-name').value,s=parseFloat(document.getElementById('replay-speed').value);if(!n)return;btnLoad(b);try{await api('POST','/api/replay/test',{name:n,speed:s});toast('Replay: '+n,'ok');pollArmBusy();}catch(e){}btnDone(b);}
|
async function testReplay(b){const n=document.getElementById('replay-name').value,s=parseFloat(document.getElementById('replay-speed').value);if(!n)return;btnLoad(b);try{await api('POST','/api/replay/test',{name:n,speed:s});toast('Replay: '+n,'ok');pollArmBusy();}catch(e){}btnDone(b);}
|
||||||
async function cancelReplay(){try{const r=await api('POST','/api/replay/cancel');toast(r&&r.message?r.message:'Cancelled','info');}catch(e){}}
|
async function cancelReplay(){try{const r=await api('POST','/api/replay/cancel');toast(r&&r.message?r.message:'Cancelled','info');}catch(e){}}
|
||||||
async function deleteMotionFile(n){if(confirm('Delete '+n+'?'))try{await api('DELETE','/api/replay/files/'+encodeURIComponent(n));toast('Deleted','ok');refreshReplayFiles();populateGestureSelect();}catch(e){}}
|
async function deleteMotionFile(n){if(confirm('Delete '+n+'?'))try{await api('DELETE','/api/replay/files/'+encodeURIComponent(n));toast('Deleted','ok');refreshReplayFiles();}catch(e){}}
|
||||||
async function uploadMotionFile(input){if(!input.files[0])return;const fd=new FormData();fd.append('file',input.files[0]);try{const r=await fetch('/api/replay/files/upload',{method:'POST',body:fd});if(!r.ok){const j=await r.json();toast(j.detail||'Upload failed','err');}else{toast('Uploaded','ok');refreshReplayFiles();populateGestureSelect();}}catch(e){toast('Upload error','err');}input.value='';}
|
async function uploadMotionFile(input){if(!input.files[0])return;const fd=new FormData();fd.append('file',input.files[0]);try{const r=await fetch('/api/replay/files/upload',{method:'POST',body:fd});if(!r.ok){const j=await r.json();toast(j.detail||'Upload failed','err');}else{toast('Uploaded','ok');refreshReplayFiles();}}catch(e){toast('Upload error','err');}input.value='';}
|
||||||
async function startTeaching(b){const n=document.getElementById('teach-name').value,d=parseFloat(document.getElementById('teach-duration').value);if(!n)return toast('Enter name','err');btnLoad(b);try{await api('POST','/api/replay/teach/start',{name:n,duration_sec:d});toast('Teaching: '+n,'ok');pollTeachStatus();}catch(e){}btnDone(b);}
|
async function startTeaching(b){const n=document.getElementById('teach-name').value,d=parseFloat(document.getElementById('teach-duration').value);if(!n)return toast('Enter name','err');btnLoad(b);try{await api('POST','/api/replay/teach/start',{name:n,duration_sec:d});toast('Teaching: '+n,'ok');pollTeachStatus();}catch(e){}btnDone(b);}
|
||||||
async function stopTeaching(b){btnLoad(b);try{const r=await api('POST','/api/replay/teach/stop');toast(`Saved: ${r.name} (${r.frames} frames)`,'ok');document.getElementById('teach-status').textContent=`Done: ${r.frames} frames`;refreshReplayFiles();populateGestureSelect();}catch(e){}btnDone(b);}
|
async function stopTeaching(b){btnLoad(b);try{const r=await api('POST','/api/replay/teach/stop');toast(`Saved: ${r.name} (${r.frames} frames)`,'ok');document.getElementById('teach-status').textContent=`Done: ${r.frames} frames`;refreshReplayFiles();}catch(e){}btnDone(b);}
|
||||||
let _teachPoll;function pollTeachStatus(){clearInterval(_teachPoll);_teachPoll=setInterval(async()=>{try{const r=await api('GET','/api/replay/teach/status');document.getElementById('teach-status').textContent=`${r.phase} | ${r.elapsed_sec}s | ${r.frames_recorded} frames`;if(!r.recording){clearInterval(_teachPoll);refreshReplayFiles();}}catch(e){clearInterval(_teachPoll);}},500);}
|
let _teachPoll;function pollTeachStatus(){clearInterval(_teachPoll);_teachPoll=setInterval(async()=>{try{const r=await api('GET','/api/replay/teach/status');document.getElementById('teach-status').textContent=`${r.phase} | ${r.elapsed_sec}s | ${r.frames_recorded} frames`;if(!r.recording){clearInterval(_teachPoll);refreshReplayFiles();}}catch(e){clearInterval(_teachPoll);}},500);}
|
||||||
|
|
||||||
// Scripts
|
// Scripts
|
||||||
@ -1288,11 +1154,6 @@ async function trReplayLast(b){btnLoad(b);try{await api('POST','/api/typed-repla
|
|||||||
async function trSaveLast(b){btnLoad(b);try{await api('POST','/api/typed-replay/save-last',{record_name:document.getElementById('tr-name').value});toast('Saved','ok');refreshTR();refreshRecords();}catch(e){}btnDone(b);}
|
async function trSaveLast(b){btnLoad(b);try{await api('POST','/api/typed-replay/save-last',{record_name:document.getElementById('tr-name').value});toast('Saved','ok');refreshTR();refreshRecords();}catch(e){}btnDone(b);}
|
||||||
async function refreshTR(){try{const r=await api('GET','/api/typed-replay/status');const s=r.session||{};document.getElementById('tr-session').innerHTML=`<strong>Text:</strong> ${esc(s.text||'--')}<br><strong>Audio:</strong> ${s.has_audio?'Yes':'No'} | <strong>Capture:</strong> ${s.has_capture?'Yes':'No'}<br><strong>Replays:</strong> ${s.replay_count||0}<br><strong>Generated:</strong> ${s.generated_at||'--'}<br><strong>Saved:</strong> ${esc(s.saved_as||'--')}`;}catch(e){}}
|
async function refreshTR(){try{const r=await api('GET','/api/typed-replay/status');const s=r.session||{};document.getElementById('tr-session').innerHTML=`<strong>Text:</strong> ${esc(s.text||'--')}<br><strong>Audio:</strong> ${s.has_audio?'Yes':'No'} | <strong>Capture:</strong> ${s.has_capture?'Yes':'No'}<br><strong>Replays:</strong> ${s.replay_count||0}<br><strong>Generated:</strong> ${s.generated_at||'--'}<br><strong>Saved:</strong> ${esc(s.saved_as||'--')}`;}catch(e){}}
|
||||||
|
|
||||||
// YOLO
|
|
||||||
async function loadDetector(b){btnLoad(b);try{const r=await api('POST','/api/detector/load');const el=document.getElementById('yolo-status');el.textContent=r.ok?'Loaded':'Failed';el.className='badge '+(r.ok?'badge-ok':'badge-err');toast(r.ok?'Model loaded':'Failed',r.ok?'ok':'err');}catch(e){}btnDone(b);}
|
|
||||||
async function runDetection(b){btnLoad(b);try{const r=await api('POST','/api/detector/detect');document.getElementById('yolo-result').innerHTML=`<strong>Persons:</strong> ${r.person_count} | <strong>Faces:</strong> ${r.face_count} | <strong>Group:</strong> ${r.group_detected?'Yes ('+r.group_size+')':'No'} | <strong>Intent:</strong> ${r.intent_detected?'Yes':'No'} | ${r.detection_ms}ms`;}catch(e){document.getElementById('yolo-result').textContent='Detection failed';}btnDone(b);}
|
|
||||||
async function refreshDetector(){try{const r=await api('GET','/api/detector/status');const el=document.getElementById('yolo-status');el.textContent=r.loaded?'Loaded':'Not loaded';el.className='badge '+(r.loaded?'badge-ok':'badge-warn');}catch(e){}}
|
|
||||||
|
|
||||||
// Wake Phrases
|
// Wake Phrases
|
||||||
async function refreshWakeActions(){try{const r=await api('GET','/api/wake-phrases/');const sel=document.getElementById('wp-action');sel.innerHTML='<option value="">-- select action --</option>'+(r.actions||[]).map(a=>`<option value="${esc(a.action)}">${esc(a.action)} (${a.phrase_count})</option>`).join('');}catch(e){}}
|
async function refreshWakeActions(){try{const r=await api('GET','/api/wake-phrases/');const sel=document.getElementById('wp-action');sel.innerHTML='<option value="">-- select action --</option>'+(r.actions||[]).map(a=>`<option value="${esc(a.action)}">${esc(a.action)} (${a.phrase_count})</option>`).join('');}catch(e){}}
|
||||||
async function loadWakePhrases(action){if(!action)return;try{const r=await api('GET',`/api/wake-phrases/${encodeURIComponent(action)}`);const el=document.getElementById('wp-phrases');if(!(r.phrases||[]).length){el.innerHTML='<div class="empty">No phrases</div>';return;}el.innerHTML=(r.phrases||[]).map(p=>`<div class="row"><span style="flex:1;font-size:.78rem">${esc(p)}</span><button class="btn btn-danger btn-sm" onclick="removeWakePhrase(document.getElementById('wp-action').value,this.dataset.p)" data-p="${esc(p)}">X</button></div>`).join('');}catch(e){}}
|
async function loadWakePhrases(action){if(!action)return;try{const r=await api('GET',`/api/wake-phrases/${encodeURIComponent(action)}`);const el=document.getElementById('wp-phrases');if(!(r.phrases||[]).length){el.innerHTML='<div class="empty">No phrases</div>';return;}el.innerHTML=(r.phrases||[]).map(p=>`<div class="row"><span style="flex:1;font-size:.78rem">${esc(p)}</span><button class="btn btn-danger btn-sm" onclick="removeWakePhrase(document.getElementById('wp-action').value,this.dataset.p)" data-p="${esc(p)}">X</button></div>`).join('');}catch(e){}}
|
||||||
@ -1386,10 +1247,6 @@ async function refreshStatus(){try{const s=await api('GET','/api/status');docume
|
|||||||
// WebSocket logs
|
// WebSocket logs
|
||||||
let logWs;function connectLogs(){const p=location.protocol==='https:'?'wss':'ws';logWs=new WebSocket(`${p}://${location.host}/ws/logs`);const box=document.getElementById('log-box');logWs.onmessage=e=>{box.textContent+=e.data+'\n';if(box.childNodes.length>1000)box.textContent=box.textContent.split('\n').slice(-500).join('\n');box.scrollTop=box.scrollHeight;};logWs.onclose=()=>setTimeout(connectLogs,3000);}
|
let logWs;function connectLogs(){const p=location.protocol==='https:'?'wss':'ws';logWs=new WebSocket(`${p}://${location.host}/ws/logs`);const box=document.getElementById('log-box');logWs.onmessage=e=>{box.textContent+=e.data+'\n';if(box.childNodes.length>1000)box.textContent=box.textContent.split('\n').slice(-500).join('\n');box.scrollTop=box.scrollHeight;};logWs.onclose=()=>setTimeout(connectLogs,3000);}
|
||||||
|
|
||||||
// WebSocket camera
|
|
||||||
let camWs;function connectCamera(){if(camWs&&camWs.readyState<=1)try{camWs.close();}catch(e){}const p=location.protocol==='https:'?'wss':'ws';camWs=new WebSocket(`${p}://${location.host}/ws/camera`);camWs.binaryType='arraybuffer';const canvas=document.getElementById('camera-feed'),ctx=canvas.getContext('2d');camWs.onmessage=e=>{const url=URL.createObjectURL(new Blob([e.data],{type:'image/jpeg'})),img=new Image();img.onload=()=>{ctx.drawImage(img,0,0,canvas.width,canvas.height);URL.revokeObjectURL(url);};img.onerror=()=>URL.revokeObjectURL(url);img.src=url;};camWs.onclose=()=>setTimeout(connectCamera,3000);}
|
|
||||||
function reconnectCamera(){if(camWs)try{camWs.close();}catch(e){}setTimeout(connectCamera,300);toast('Camera reconnecting...','info');}
|
|
||||||
|
|
||||||
// Auto-connect Gemini and auto-start Live Subprocess on page load
|
// Auto-connect Gemini and auto-start Live Subprocess on page load
|
||||||
async function autoConnectGemini(){
|
async function autoConnectGemini(){
|
||||||
try{
|
try{
|
||||||
@ -1413,10 +1270,331 @@ async function autoStartLiveSub(){
|
|||||||
}catch(e){}
|
}catch(e){}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── Recognition tab (camera vision + face recognition) ──
|
||||||
|
// Mirror of /api/recognition/state.vision_enabled — kept fresh by
|
||||||
|
// refreshRecognition() so the Live-Gemini-panel Camera button can flip
|
||||||
|
// it without a round-trip GET.
|
||||||
|
let _recVisionEnabled=false;
|
||||||
|
async function refreshRecognition(){
|
||||||
|
try{
|
||||||
|
const r=await api('GET','/api/recognition/state');
|
||||||
|
_recVisionEnabled=!!r.vision_enabled;
|
||||||
|
const v=document.getElementById('rec-vision-toggle');
|
||||||
|
const f=document.getElementById('rec-facerec-toggle');
|
||||||
|
if(v) v.checked=!!r.vision_enabled;
|
||||||
|
if(f) f.checked=!!r.face_rec_enabled;
|
||||||
|
const cs=document.getElementById('rec-camera-status');
|
||||||
|
if(cs){
|
||||||
|
const c=r.camera||{};
|
||||||
|
cs.title=c.error||'';
|
||||||
|
if(c.running&&c.backend){
|
||||||
|
cs.textContent=c.backend+' '+(c.width||'')+'x'+(c.height||'')
|
||||||
|
+(c.reconnect_count?(' ↻'+c.reconnect_count):'');
|
||||||
|
cs.className='badge badge-ok';
|
||||||
|
}else if(c.running&&!c.backend){
|
||||||
|
// thread alive but between reconnect attempts (camera unplugged)
|
||||||
|
cs.textContent='reconnecting…';cs.className='badge badge-warn';
|
||||||
|
}else if(c.error){
|
||||||
|
cs.textContent='error';cs.className='badge badge-warn';
|
||||||
|
}else{
|
||||||
|
cs.textContent='off';cs.className='badge';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Camera button in the Live Gemini Process panel (Voice & Audio tab) —
|
||||||
|
// same toggle as the Recognition tab, surfaced where it's handy.
|
||||||
|
const cb=document.getElementById('ls-cam-btn');
|
||||||
|
if(cb){
|
||||||
|
const c=r.camera||{};
|
||||||
|
if(c.running&&c.backend){
|
||||||
|
cb.textContent='Camera: ON';
|
||||||
|
cb.className='btn btn-sm btn-success';
|
||||||
|
cb.title='Streaming '+(c.backend||'')+' '+(c.width||'')+'x'+(c.height||'')+' to Gemini — click to turn off';
|
||||||
|
}else if(c.running&&!c.backend){
|
||||||
|
cb.textContent='Camera: …';
|
||||||
|
cb.className='btn btn-sm btn-ghost';
|
||||||
|
cb.title='Camera reconnecting…';
|
||||||
|
}else if(r.vision_enabled&&c.error){
|
||||||
|
cb.textContent='Camera: N/A';
|
||||||
|
cb.className='btn btn-sm btn-danger';
|
||||||
|
cb.title='Vision on but no camera backend: '+(c.error||'');
|
||||||
|
}else{
|
||||||
|
cb.textContent='Camera: OFF';
|
||||||
|
cb.className='btn btn-sm btn-ghost';
|
||||||
|
cb.title='Click to stream camera frames to Gemini Live';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const fs=document.getElementById('rec-facerec-status');
|
||||||
|
if(fs){fs.textContent=r.face_rec_enabled?'on':'off';fs.className='badge '+(r.face_rec_enabled?'badge-ok':'');}
|
||||||
|
const fc=document.getElementById('rec-faces-count');
|
||||||
|
if(fc) fc.textContent=`(${r.faces_count} faces, ${r.photos_count} photos)`;
|
||||||
|
const gv=document.getElementById('rec-gallery-version');
|
||||||
|
if(gv) gv.textContent='v.'+r.gallery_version;
|
||||||
|
// toggle preview visibility — only when actively capturing (has a backend)
|
||||||
|
const img=document.getElementById('rec-preview-img');
|
||||||
|
const empty=document.getElementById('rec-preview-empty');
|
||||||
|
const meta=document.getElementById('rec-preview-meta');
|
||||||
|
const c2=r.camera||{};
|
||||||
|
if(c2.running&&c2.backend){
|
||||||
|
img.style.display='inline-block';empty.style.display='none';
|
||||||
|
if(meta) meta.textContent=`${c2.width}x${c2.height} @ ${c2.fps}fps · seq=${c2.frame_seq}`;
|
||||||
|
}else{
|
||||||
|
img.style.display='none';empty.style.display='block';
|
||||||
|
if(empty) empty.textContent=(c2.running&&!c2.backend)
|
||||||
|
? 'Camera reconnecting…'
|
||||||
|
: 'Camera off — toggle Vision ON to see the live feed.';
|
||||||
|
if(meta) meta.textContent='--';
|
||||||
|
}
|
||||||
|
// Highlight the active resolution / quality buttons to match the live
|
||||||
|
// capture profile (works whether the camera is running or idle).
|
||||||
|
document.querySelectorAll('#rec-res-buttons button').forEach(btn=>{
|
||||||
|
const on = parseInt(btn.dataset.w)===c2.width
|
||||||
|
&& parseInt(btn.dataset.h)===c2.height
|
||||||
|
&& parseInt(btn.dataset.fps)===c2.fps;
|
||||||
|
btn.className='btn btn-sm '+(on?'btn-primary':'btn-ghost');
|
||||||
|
});
|
||||||
|
document.querySelectorAll('#rec-quality-buttons button').forEach(btn=>{
|
||||||
|
const on = parseInt(btn.dataset.q)===c2.jpeg_quality;
|
||||||
|
btn.className='btn btn-sm '+(on?'btn-primary':'btn-ghost');
|
||||||
|
});
|
||||||
|
}catch(e){}
|
||||||
|
}
|
||||||
|
// Resolution / FPS button menu — each click POSTs one mode and the
|
||||||
|
// CameraDaemon rebuilds the pipeline at it. refreshRecognition() then
|
||||||
|
// highlights whichever button matches the live profile.
|
||||||
|
async function setCameraMode(btn){
|
||||||
|
btnLoad(btn);
|
||||||
|
try{
|
||||||
|
const body={
|
||||||
|
width: parseInt(btn.dataset.w),
|
||||||
|
height: parseInt(btn.dataset.h),
|
||||||
|
fps: parseInt(btn.dataset.fps),
|
||||||
|
};
|
||||||
|
const r=await api('POST','/api/recognition/camera-config',body);
|
||||||
|
const p=r.profile||body;
|
||||||
|
toast(`Camera → ${p.width}×${p.height} @ ${p.fps}fps`,'ok');
|
||||||
|
refreshRecognition();
|
||||||
|
}catch(e){toast('Resolution change failed: '+(e.message||e),'err');}
|
||||||
|
btnDone(btn);
|
||||||
|
}
|
||||||
|
async function setCameraQuality(btn){
|
||||||
|
btnLoad(btn);
|
||||||
|
try{
|
||||||
|
const q=parseInt(btn.dataset.q);
|
||||||
|
await api('POST','/api/recognition/camera-config',{jpeg_quality:q});
|
||||||
|
toast('JPEG quality → '+q,'ok');
|
||||||
|
refreshRecognition();
|
||||||
|
}catch(e){toast('Quality change failed: '+(e.message||e),'err');}
|
||||||
|
btnDone(btn);
|
||||||
|
}
|
||||||
|
// Camera button in the Live Gemini Process panel — flips the same
|
||||||
|
// vision toggle the Recognition tab owns. _recVisionEnabled is the
|
||||||
|
// last-known state (refreshed every 5 s by refreshRecognition).
|
||||||
|
async function toggleGeminiCamera(b){
|
||||||
|
if(b) btnLoad(b);
|
||||||
|
const next=!_recVisionEnabled;
|
||||||
|
try{
|
||||||
|
const r=await api('POST','/api/recognition/vision?on='+(next?'1':'0'));
|
||||||
|
_recVisionEnabled=!!(r&&r.vision_enabled);
|
||||||
|
toast(next?'Camera ON for Gemini':'Camera OFF for Gemini','ok');
|
||||||
|
}catch(e){
|
||||||
|
toast('Camera toggle failed: '+(e.message||e),'err');
|
||||||
|
}
|
||||||
|
if(b) btnDone(b);
|
||||||
|
refreshRecognition(); // refresh both the panel button + the Recognition tab
|
||||||
|
}
|
||||||
|
async function setVisionEnabled(on){
|
||||||
|
try{
|
||||||
|
const r=await api('POST','/api/recognition/vision?on='+(on?'1':'0'));
|
||||||
|
toast(on?'Vision ON':'Vision OFF','ok');
|
||||||
|
refreshRecognition();
|
||||||
|
}catch(e){
|
||||||
|
toast('Vision toggle failed: '+(e.message||e),'err');
|
||||||
|
refreshRecognition();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
async function setFaceRecEnabled(on){
|
||||||
|
try{
|
||||||
|
const r=await api('POST','/api/recognition/face-rec?on='+(on?'1':'0'));
|
||||||
|
toast(on?'Face Recognition ON':'Face Recognition OFF','ok');
|
||||||
|
if(r&&r.warning) toast(r.warning,'info');
|
||||||
|
refreshRecognition();
|
||||||
|
}catch(e){
|
||||||
|
toast('Face Rec toggle failed: '+(e.message||e),'err');
|
||||||
|
refreshRecognition();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
async function syncGallery(b){
|
||||||
|
if(b) btnLoad(b);
|
||||||
|
try{await api('POST','/api/recognition/sync');toast('Gallery sync requested','ok');refreshRecognition();}
|
||||||
|
catch(e){toast('Sync failed','err');}
|
||||||
|
if(b) btnDone(b);
|
||||||
|
}
|
||||||
|
// Preview poller — bumps the img src each tick to defeat caching.
|
||||||
|
let _recPreviewTimer=null;
|
||||||
|
function startRecPreview(){
|
||||||
|
if(_recPreviewTimer) return;
|
||||||
|
const tick=()=>{
|
||||||
|
const img=document.getElementById('rec-preview-img');
|
||||||
|
if(img && img.style.display!=='none'){
|
||||||
|
img.src='/api/recognition/frame.jpg?t='+Date.now();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
tick();
|
||||||
|
_recPreviewTimer=setInterval(tick,500);
|
||||||
|
}
|
||||||
|
function stopRecPreview(){if(_recPreviewTimer){clearInterval(_recPreviewTimer);_recPreviewTimer=null;}}
|
||||||
|
// Hook into tab switch — start/stop preview when recognition tab is active.
|
||||||
|
(function(){
|
||||||
|
const origSwitchTab=window.switchTab;
|
||||||
|
window.switchTab=function(name){
|
||||||
|
origSwitchTab(name);
|
||||||
|
if(name==='recognition'){refreshRecognition();refreshFaces();startRecPreview();}
|
||||||
|
else{stopRecPreview();}
|
||||||
|
};
|
||||||
|
})();
|
||||||
|
// Face CRUD stubs — filled in milestone 5
|
||||||
|
async function refreshFaces(){
|
||||||
|
const el=document.getElementById('rec-faces-list');
|
||||||
|
if(!el) return;
|
||||||
|
try{
|
||||||
|
const r=await api('GET','/api/recognition/faces');
|
||||||
|
if(!r.faces||!r.faces.length){el.innerHTML='<div class="empty">No faces enrolled yet</div>';return;}
|
||||||
|
el.innerHTML=r.faces.map(f=>renderFaceCard(f)).join('');
|
||||||
|
}catch(e){
|
||||||
|
el.innerHTML='<div class="empty">(face gallery not yet wired)</div>';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
function renderFaceCard(f){
|
||||||
|
const name=f.name||`(face_${f.id})`;
|
||||||
|
const photos=(f.photos||[]).map(p=>{
|
||||||
|
const url=`/api/recognition/faces/${f.id}/photo/${encodeURIComponent(p.name)}`;
|
||||||
|
return `<div style="display:inline-block;margin:.2rem;text-align:center">
|
||||||
|
<img src="${url}?t=${Date.now()}" alt="${esc(p.name)}" style="width:72px;height:72px;object-fit:cover;border-radius:.3rem;background:#222"/>
|
||||||
|
<div style="font-size:.6rem;color:var(--dim);margin-top:.1rem">
|
||||||
|
<a href="${url}?download=1" download style="color:var(--accent);text-decoration:none">⬇</a>
|
||||||
|
<a href="#" onclick="deletePhoto(${f.id},'${esc(p.name)}');return false" style="color:var(--err);text-decoration:none;margin-left:.3rem">🗑</a>
|
||||||
|
</div>
|
||||||
|
</div>`;
|
||||||
|
}).join('');
|
||||||
|
return `<div class="card" style="margin-top:.5rem">
|
||||||
|
<div class="row" style="align-items:center">
|
||||||
|
<strong>face_${f.id}</strong>
|
||||||
|
<span style="color:var(--dim)">—</span>
|
||||||
|
<span id="rec-name-${f.id}" style="flex:1">${esc(name)}</span>
|
||||||
|
<button class="btn btn-ghost btn-sm" onclick="renameFace(${f.id})" title="Rename">✏</button>
|
||||||
|
<span style="color:var(--dim);font-size:.7rem">${(f.photos||[]).length} photo(s)</span>
|
||||||
|
</div>
|
||||||
|
<div style="margin-top:.25rem;font-size:.72rem">
|
||||||
|
<span style="color:var(--dim)">Description:</span>
|
||||||
|
<span id="rec-desc-${f.id}" style="color:var(--muted)">${f.description?esc(f.description):''}</span>${f.description?'':'<span style="color:var(--dim)">(none — no extra context for Gemini)</span>'}
|
||||||
|
<button class="btn btn-ghost btn-sm" onclick="describeFace(${f.id})" title="Edit description Gemini sees">✏</button>
|
||||||
|
</div>
|
||||||
|
<div style="margin-top:.3rem">${photos}</div>
|
||||||
|
<div class="row" style="margin-top:.4rem">
|
||||||
|
<button class="btn btn-success btn-sm" onclick="captureToFace(${f.id},this)">📷 Capture</button>
|
||||||
|
<label class="btn btn-primary btn-sm" style="cursor:pointer;margin:0">
|
||||||
|
📁 Upload
|
||||||
|
<input type="file" multiple accept="image/jpeg,image/png" style="display:none" onchange="uploadToFace(${f.id},this)">
|
||||||
|
</label>
|
||||||
|
<a class="btn btn-ghost btn-sm" href="/api/recognition/faces/${f.id}/download.zip" download>⬇ ZIP</a>
|
||||||
|
<button class="btn btn-danger btn-sm" style="margin-left:auto" onclick="deleteFace(${f.id})">🗑 Delete face</button>
|
||||||
|
</div>
|
||||||
|
</div>`;
|
||||||
|
}
|
||||||
|
// Build the ?name=&description= query string from the Add-New-Face inputs.
|
||||||
|
function _newFaceQuery(){
|
||||||
|
const name=document.getElementById('rec-newface-name').value.trim();
|
||||||
|
const desc=document.getElementById('rec-newface-desc').value.trim();
|
||||||
|
const qs=[];
|
||||||
|
if(name) qs.push('name='+encodeURIComponent(name));
|
||||||
|
if(desc) qs.push('description='+encodeURIComponent(desc));
|
||||||
|
return qs.length?('?'+qs.join('&')):'';
|
||||||
|
}
|
||||||
|
function _clearNewFaceInputs(){
|
||||||
|
document.getElementById('rec-newface-name').value='';
|
||||||
|
document.getElementById('rec-newface-desc').value='';
|
||||||
|
}
|
||||||
|
async function enrollFromCamera(b){
|
||||||
|
btnLoad(b);
|
||||||
|
try{
|
||||||
|
const r=await api('POST','/api/recognition/faces/enroll'+_newFaceQuery());
|
||||||
|
toast('Enrolled face_'+r.face.id+(r.face.description?' (with description)':''),'ok');
|
||||||
|
_clearNewFaceInputs();
|
||||||
|
refreshFaces();refreshRecognition();
|
||||||
|
}catch(e){toast('Enroll failed: '+(e.message||e),'err');}
|
||||||
|
btnDone(b);
|
||||||
|
}
|
||||||
|
async function enrollFromUpload(input){
|
||||||
|
const files=input.files;if(!files||!files.length)return;
|
||||||
|
const fd=new FormData();for(const f of files) fd.append('files',f);
|
||||||
|
try{
|
||||||
|
const resp=await fetch('/api/recognition/faces/upload'+_newFaceQuery(),{method:'POST',body:fd});
|
||||||
|
if(!resp.ok)throw new Error(await resp.text());
|
||||||
|
const r=await resp.json();
|
||||||
|
toast('Uploaded face_'+r.face.id+' ('+files.length+' photos'+(r.face.description?', with description':'')+')','ok');
|
||||||
|
_clearNewFaceInputs();
|
||||||
|
input.value='';
|
||||||
|
refreshFaces();refreshRecognition();
|
||||||
|
}catch(e){toast('Upload failed: '+(e.message||e),'err');}
|
||||||
|
}
|
||||||
|
async function captureToFace(id,b){
|
||||||
|
btnLoad(b);
|
||||||
|
try{await api('POST','/api/recognition/faces/'+id+'/capture');toast('Added photo','ok');refreshFaces();}
|
||||||
|
catch(e){toast('Capture failed','err');}
|
||||||
|
btnDone(b);
|
||||||
|
}
|
||||||
|
async function uploadToFace(id,input){
|
||||||
|
const files=input.files;if(!files||!files.length)return;
|
||||||
|
const fd=new FormData();for(const f of files) fd.append('files',f);
|
||||||
|
try{
|
||||||
|
const resp=await fetch('/api/recognition/faces/'+id+'/upload',{method:'POST',body:fd});
|
||||||
|
if(!resp.ok)throw new Error(await resp.text());
|
||||||
|
toast('Uploaded '+files.length+' photo(s)','ok');
|
||||||
|
input.value='';
|
||||||
|
refreshFaces();
|
||||||
|
}catch(e){toast('Upload failed: '+(e.message||e),'err');}
|
||||||
|
}
|
||||||
|
async function renameFace(id){
|
||||||
|
const el=document.getElementById('rec-name-'+id);if(!el)return;
|
||||||
|
const cur=el.textContent.replace(/^\((.*)\)$/,'$1');
|
||||||
|
const next=prompt('New name (blank to clear):',cur==='face_'+id?'':cur);
|
||||||
|
if(next===null) return;
|
||||||
|
try{
|
||||||
|
await api('POST','/api/recognition/faces/'+id+'/rename',{name:next});
|
||||||
|
toast('Renamed','ok');refreshFaces();
|
||||||
|
}catch(e){toast('Rename failed','err');}
|
||||||
|
}
|
||||||
|
async function describeFace(id){
|
||||||
|
const el=document.getElementById('rec-desc-'+id);
|
||||||
|
const cur=el?el.textContent.trim():'';
|
||||||
|
const next=prompt('Description for Gemini — who is this person? '+
|
||||||
|
'(blank to clear)',cur);
|
||||||
|
if(next===null) return;
|
||||||
|
try{
|
||||||
|
await api('POST','/api/recognition/faces/'+id+'/describe',{description:next});
|
||||||
|
toast(next.trim()?'Description saved':'Description cleared','ok');
|
||||||
|
refreshFaces();
|
||||||
|
}catch(e){toast('Save failed: '+(e.message||e),'err');}
|
||||||
|
}
|
||||||
|
async function deletePhoto(id,name){
|
||||||
|
if(!confirm('Delete photo '+name+'?'))return;
|
||||||
|
try{
|
||||||
|
await api('DELETE','/api/recognition/faces/'+id+'/photo/'+encodeURIComponent(name));
|
||||||
|
toast('Photo deleted','ok');refreshFaces();
|
||||||
|
}catch(e){toast('Delete failed: '+(e.message||e),'err');}
|
||||||
|
}
|
||||||
|
async function deleteFace(id){
|
||||||
|
if(!confirm('Delete face_'+id+' and all photos?'))return;
|
||||||
|
try{
|
||||||
|
await api('DELETE','/api/recognition/faces/'+id);
|
||||||
|
toast('Face deleted','ok');refreshFaces();refreshRecognition();
|
||||||
|
}catch(e){toast('Delete failed','err');}
|
||||||
|
}
|
||||||
|
|
||||||
// Init — vision/camera/detector fetches removed; those endpoints were deleted.
|
// Init — vision/camera/detector fetches removed; those endpoints were deleted.
|
||||||
refreshStatus();refreshSystem();refreshAudio();refreshAudioDevices();refreshSkills();refreshReplayFiles();refreshScripts();refreshPrompt();refreshRecords();populateGestureSelect();refreshLiveVoice();refreshLiveSub();refreshTR();refreshWakeActions();refreshApiKey();refreshCombo();connectLogs();
|
refreshStatus();refreshSystem();refreshAudio();refreshAudioDevices();refreshSkills();refreshReplayFiles();refreshScripts();refreshPrompt();refreshRecords();refreshLiveVoice();refreshLiveSub();refreshTR();refreshWakeActions();refreshApiKey();refreshCombo();refreshRecognition();connectLogs();
|
||||||
setTimeout(autoConnectGemini,2000);setTimeout(autoStartLiveSub,3000);
|
setTimeout(autoConnectGemini,2000);setTimeout(autoStartLiveSub,3000);
|
||||||
setInterval(refreshStatus,5000);setInterval(refreshSystem,30000);setInterval(refreshLiveVoice,5000);setInterval(refreshLiveSub,5000);
|
setInterval(refreshStatus,5000);setInterval(refreshSystem,30000);setInterval(refreshLiveVoice,5000);setInterval(refreshLiveSub,5000);setInterval(refreshRecognition,5000);
|
||||||
</script>
|
</script>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
|||||||
421
gemini/script.py
421
gemini/script.py
@ -19,8 +19,13 @@ from __future__ import annotations
|
|||||||
|
|
||||||
import array
|
import array
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import base64
|
||||||
|
import json
|
||||||
import os
|
import os
|
||||||
|
import sys
|
||||||
|
import threading
|
||||||
import time
|
import time
|
||||||
|
from pathlib import Path
|
||||||
from typing import Any, Optional
|
from typing import Any, Optional
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@ -29,6 +34,7 @@ from google import genai
|
|||||||
from google.genai import types
|
from google.genai import types
|
||||||
|
|
||||||
from Project.Sanad.config import (
|
from Project.Sanad.config import (
|
||||||
|
BASE_DIR,
|
||||||
CHUNK_SIZE,
|
CHUNK_SIZE,
|
||||||
GEMINI_API_KEY,
|
GEMINI_API_KEY,
|
||||||
GEMINI_VOICE,
|
GEMINI_VOICE,
|
||||||
@ -37,6 +43,7 @@ from Project.Sanad.config import (
|
|||||||
)
|
)
|
||||||
from Project.Sanad.core.config_loader import section as _cfg_section
|
from Project.Sanad.core.config_loader import section as _cfg_section
|
||||||
from Project.Sanad.core.logger import get_logger
|
from Project.Sanad.core.logger import get_logger
|
||||||
|
from Project.Sanad.vision import recognition_state as _recog_state
|
||||||
|
|
||||||
log = get_logger("gemini_brain")
|
log = get_logger("gemini_brain")
|
||||||
|
|
||||||
@ -57,6 +64,93 @@ _NO_MESSAGES_TIMEOUT = _SV.get("no_messages_timeout_sec", 30)
|
|||||||
_CHUNK_BYTES = CHUNK_SIZE * 2
|
_CHUNK_BYTES = CHUNK_SIZE * 2
|
||||||
_SILENCE_PCM = b"\x00" * _CHUNK_BYTES
|
_SILENCE_PCM = b"\x00" * _CHUNK_BYTES
|
||||||
|
|
||||||
|
# ── Recognition (camera + face gallery) tunables ──
|
||||||
|
_RECOG_STATE_PATH = Path(os.environ.get(
|
||||||
|
"SANAD_RECOGNITION_STATE_PATH",
|
||||||
|
str(BASE_DIR / "data" / ".recognition_state.json"),
|
||||||
|
))
|
||||||
|
_VISION_SEND_HZ = float(os.environ.get("SANAD_VISION_SEND_HZ", "2"))
|
||||||
|
_VISION_STALE_MS = int(os.environ.get("SANAD_VISION_STALE_MS", "1500"))
|
||||||
|
_RECOG_POLL_S = float(os.environ.get("SANAD_RECOGNITION_POLL_S", "1.0"))
|
||||||
|
_FACES_DIR = Path(os.environ.get(
|
||||||
|
"SANAD_FACES_DIR",
|
||||||
|
str(BASE_DIR / "data" / "faces"),
|
||||||
|
))
|
||||||
|
_FACES_MAX_SAMPLES = int(os.environ.get("SANAD_FACES_MAX_SAMPLES", "3"))
|
||||||
|
_FACES_PRIMER_RESIZE = int(os.environ.get("SANAD_FACES_PRIMER_RESIZE", "256"))
|
||||||
|
|
||||||
|
|
||||||
|
# ── stdin push channel (Marcus pattern) ──────────────────────
|
||||||
|
# The GeminiSubprocess supervisor writes two line types to this process's
|
||||||
|
# stdin:
|
||||||
|
# "frame:<base64-jpeg>\n" — a camera frame to relay to Gemini Live
|
||||||
|
# "state:<json>\n" — a motion-state update to inject as text
|
||||||
|
# A daemon thread parses them into the caches below; the asyncio tasks
|
||||||
|
# _send_frame_loop / _send_state_loop drain those caches.
|
||||||
|
|
||||||
|
_LATEST_FRAME_LOCK = threading.Lock()
|
||||||
|
_LATEST_FRAME: dict = {"bytes": None, "ts": 0.0}
|
||||||
|
|
||||||
|
_STATE_LOCK = threading.Lock()
|
||||||
|
_STATE_PENDING: list[str] = []
|
||||||
|
|
||||||
|
_STATE_TAGS = {
|
||||||
|
"start": "[STATE-START]",
|
||||||
|
"complete": "[STATE-DONE]",
|
||||||
|
"interrupted": "[STATE-INTERRUPTED]",
|
||||||
|
"error": "[STATE-ERROR]",
|
||||||
|
"paused": "[STATE-PAUSED]",
|
||||||
|
"resumed": "[STATE-RESUMED]",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _stdin_watcher() -> None:
|
||||||
|
"""Daemon thread — parse 'frame:' / 'state:' lines off stdin.
|
||||||
|
|
||||||
|
Best-effort: any malformed line is skipped. Exits when the parent
|
||||||
|
closes our stdin (subprocess teardown)."""
|
||||||
|
try:
|
||||||
|
for line in sys.stdin:
|
||||||
|
line = line.rstrip("\n")
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
if line.startswith("frame:"):
|
||||||
|
b64 = line[len("frame:"):]
|
||||||
|
try:
|
||||||
|
data = base64.b64decode(b64)
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
if data:
|
||||||
|
with _LATEST_FRAME_LOCK:
|
||||||
|
_LATEST_FRAME["bytes"] = data
|
||||||
|
_LATEST_FRAME["ts"] = time.time()
|
||||||
|
elif line.startswith("state:"):
|
||||||
|
try:
|
||||||
|
payload = json.loads(line[len("state:"):])
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
event = (payload.get("event") or "").strip().lower()
|
||||||
|
cmd = (payload.get("cmd") or "").strip()
|
||||||
|
tag = _STATE_TAGS.get(event)
|
||||||
|
if not tag or not cmd:
|
||||||
|
continue
|
||||||
|
msg = f"{tag} {cmd}"
|
||||||
|
elapsed = payload.get("elapsed_sec")
|
||||||
|
if isinstance(elapsed, (int, float)):
|
||||||
|
msg += f" ({float(elapsed):.1f}s)"
|
||||||
|
reason = payload.get("reason")
|
||||||
|
if reason and event == "error":
|
||||||
|
msg += f" — {reason}"
|
||||||
|
with _STATE_LOCK:
|
||||||
|
_STATE_PENDING.append(msg)
|
||||||
|
except Exception:
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
# Start the watcher at import time — it blocks harmlessly on sys.stdin
|
||||||
|
# until the supervisor sends something. Daemon so it never blocks exit.
|
||||||
|
threading.Thread(target=_stdin_watcher, daemon=True, name="stdin-watcher").start()
|
||||||
|
|
||||||
|
|
||||||
def _audio_energy(pcm: bytes) -> int:
|
def _audio_energy(pcm: bytes) -> int:
|
||||||
try:
|
try:
|
||||||
@ -86,6 +180,19 @@ class GeminiBrain:
|
|||||||
self._ai_speak_start = 0.0
|
self._ai_speak_start = 0.0
|
||||||
self._last_ai_audio = 0.0
|
self._last_ai_audio = 0.0
|
||||||
self._done: Optional[asyncio.Event] = None
|
self._done: Optional[asyncio.Event] = None
|
||||||
|
# ── Recognition flags — kept in sync with the state file by
|
||||||
|
# _recognition_state_watcher. Boot defaults come from the file (or
|
||||||
|
# the SANAD_* env vars if the file is missing).
|
||||||
|
_initial = _recog_state.read(_RECOG_STATE_PATH)
|
||||||
|
self._vision_enabled = bool(
|
||||||
|
_initial.vision_enabled
|
||||||
|
or os.environ.get("SANAD_VISION_ENABLE", "0") == "1"
|
||||||
|
)
|
||||||
|
self._face_rec_enabled = bool(
|
||||||
|
_initial.face_rec_enabled
|
||||||
|
or os.environ.get("SANAD_FACE_RECOGNITION_ENABLE", "0") == "1"
|
||||||
|
)
|
||||||
|
self._gallery_version_primed = -1 # bumped after first successful primer
|
||||||
|
|
||||||
def stop(self) -> None:
|
def stop(self) -> None:
|
||||||
"""Signal the run loop to exit at the next opportunity."""
|
"""Signal the run loop to exit at the next opportunity."""
|
||||||
@ -116,12 +223,19 @@ class GeminiBrain:
|
|||||||
consecutive_errors = 0
|
consecutive_errors = 0
|
||||||
self._mic.flush()
|
self._mic.flush()
|
||||||
self._done = asyncio.Event()
|
self._done = asyncio.Event()
|
||||||
|
# Reset per-session primer state so re-priming on reconnect
|
||||||
|
# actually happens. The state watcher will re-prime as soon
|
||||||
|
# as it sees vision+face-rec enabled.
|
||||||
|
self._gallery_version_primed = -1
|
||||||
|
|
||||||
try:
|
try:
|
||||||
await asyncio.wait_for(
|
await asyncio.wait_for(
|
||||||
asyncio.gather(
|
asyncio.gather(
|
||||||
self._send_mic_loop(session),
|
self._send_mic_loop(session),
|
||||||
self._receive_loop(session),
|
self._receive_loop(session),
|
||||||
|
self._send_frame_loop(session),
|
||||||
|
self._send_state_loop(session),
|
||||||
|
self._recognition_state_watcher(session),
|
||||||
),
|
),
|
||||||
timeout=_SESSION_TIMEOUT,
|
timeout=_SESSION_TIMEOUT,
|
||||||
)
|
)
|
||||||
@ -368,3 +482,310 @@ class GeminiBrain:
|
|||||||
log.warning("receive ended: %s", exc)
|
log.warning("receive ended: %s", exc)
|
||||||
finally:
|
finally:
|
||||||
self._done.set()
|
self._done.set()
|
||||||
|
|
||||||
|
# ─── vision-state announcer ───────────────────────────
|
||||||
|
# Injects the camera state into the live session as text context.
|
||||||
|
# On a live toggle Gemini is told to say so out loud ("I can see you
|
||||||
|
# now" / "I can't see you anymore"); at session start it's silent
|
||||||
|
# standing context so "can you see me?" is answered honestly.
|
||||||
|
|
||||||
|
async def _announce_vision_state(self, session: Any, enabled: bool,
|
||||||
|
is_toggle: bool) -> None:
|
||||||
|
if is_toggle and enabled:
|
||||||
|
text = (
|
||||||
|
"[VISION ON] Your camera was just enabled — you can now see "
|
||||||
|
"the user through it. Briefly tell them you can see them now, "
|
||||||
|
"in your normal Khaleeji style (for example: "
|
||||||
|
"'هلا، الحين أشوفك زين')."
|
||||||
|
)
|
||||||
|
elif is_toggle and not enabled:
|
||||||
|
text = (
|
||||||
|
"[VISION OFF] Your camera was just disabled — you can no "
|
||||||
|
"longer see anything. Briefly tell the user you can't see "
|
||||||
|
"them anymore. If they later ask whether you can see them, "
|
||||||
|
"tell them to enable the camera from the dashboard."
|
||||||
|
)
|
||||||
|
elif enabled: # session start, camera already on
|
||||||
|
text = (
|
||||||
|
"[VISION STATUS] Your camera is ON — you can see the user "
|
||||||
|
"through it. Do not announce this unprompted; just answer "
|
||||||
|
"naturally if they ask what you see."
|
||||||
|
)
|
||||||
|
else: # session start, camera off
|
||||||
|
text = (
|
||||||
|
"[VISION STATUS] Your camera is OFF — you cannot see anything "
|
||||||
|
"right now. If the user asks whether you can see them, tell "
|
||||||
|
"them to enable the camera from the dashboard. Do not announce "
|
||||||
|
"this unprompted."
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
await session.send_realtime_input(text=text)
|
||||||
|
log.info("vision-state injected (enabled=%s, toggle=%s)",
|
||||||
|
enabled, is_toggle)
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
raise
|
||||||
|
except Exception as exc:
|
||||||
|
log.warning("vision-state inject failed: %s", exc)
|
||||||
|
|
||||||
|
# ─── face-recognition-state announcer ─────────────────
|
||||||
|
# Same idea as _announce_vision_state, for the face-recognition toggle.
|
||||||
|
# On a live OFF toggle it also tells Gemini to disregard the gallery —
|
||||||
|
# so OFF takes effect immediately instead of lingering until reconnect.
|
||||||
|
|
||||||
|
async def _announce_facerec_state(self, session: Any, enabled: bool,
|
||||||
|
is_toggle: bool) -> None:
|
||||||
|
if is_toggle and enabled:
|
||||||
|
text = (
|
||||||
|
"[FACE RECOGNITION ON] Face recognition was just enabled — "
|
||||||
|
"you'll be shown the people you know in a moment. Briefly "
|
||||||
|
"tell the user you can now recognise the people you know, in "
|
||||||
|
"your normal Khaleeji style."
|
||||||
|
)
|
||||||
|
elif is_toggle and not enabled:
|
||||||
|
text = (
|
||||||
|
"[FACE RECOGNITION OFF] Face recognition was just disabled. "
|
||||||
|
"Disregard the face gallery you were given earlier — stop "
|
||||||
|
"greeting people by name and do not identify anyone. Briefly "
|
||||||
|
"tell the user you'll no longer recognise faces."
|
||||||
|
)
|
||||||
|
elif enabled: # session start, face rec already on
|
||||||
|
text = (
|
||||||
|
"[FACE RECOGNITION STATUS] Face recognition is ON — when you "
|
||||||
|
"see someone you've been shown in the gallery, greet them by "
|
||||||
|
"name. Do not announce this unprompted."
|
||||||
|
)
|
||||||
|
else: # session start, face rec off
|
||||||
|
text = (
|
||||||
|
"[FACE RECOGNITION STATUS] Face recognition is OFF — you "
|
||||||
|
"cannot identify people. If the user asks who someone is or "
|
||||||
|
"whether you recognise them, tell them to enable face "
|
||||||
|
"recognition from the dashboard. Do not announce this "
|
||||||
|
"unprompted."
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
await session.send_realtime_input(text=text)
|
||||||
|
log.info("face-rec-state injected (enabled=%s, toggle=%s)",
|
||||||
|
enabled, is_toggle)
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
raise
|
||||||
|
except Exception as exc:
|
||||||
|
log.warning("face-rec-state inject failed: %s", exc)
|
||||||
|
|
||||||
|
# ─── recognition state watcher ────────────────────────
|
||||||
|
# Polls data/.recognition_state.json at SANAD_RECOGNITION_POLL_S Hz and
|
||||||
|
# mirrors vision_enabled / face_rec_enabled into in-memory flags so the
|
||||||
|
# rest of the session can react WITHOUT a Gemini reconnect.
|
||||||
|
|
||||||
|
async def _recognition_state_watcher(self, session: Any) -> None:
|
||||||
|
last_mtime = 0.0
|
||||||
|
last_state = _recog_state.RecognitionState(
|
||||||
|
vision_enabled=self._vision_enabled,
|
||||||
|
face_rec_enabled=self._face_rec_enabled,
|
||||||
|
gallery_version=self._gallery_version_primed,
|
||||||
|
)
|
||||||
|
# Best-effort initial primer if face_rec is already on at session start.
|
||||||
|
if self._face_rec_enabled and self._vision_enabled:
|
||||||
|
try:
|
||||||
|
cur = _recog_state.read(_RECOG_STATE_PATH)
|
||||||
|
await self._send_gallery_primer(session, cur.gallery_version)
|
||||||
|
except Exception as exc:
|
||||||
|
log.warning("initial gallery primer failed: %s", exc)
|
||||||
|
|
||||||
|
# Tell Gemini the current camera + face-recognition state at session
|
||||||
|
# start — silent standing context so "can you see me?" / "do you know
|
||||||
|
# who I am?" are answered honestly even if the user never toggles.
|
||||||
|
await self._announce_vision_state(
|
||||||
|
session, self._vision_enabled, is_toggle=False,
|
||||||
|
)
|
||||||
|
await self._announce_facerec_state(
|
||||||
|
session, self._face_rec_enabled, is_toggle=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
while not self._done.is_set() and not self._stop_flag.is_set():
|
||||||
|
await asyncio.sleep(_RECOG_POLL_S)
|
||||||
|
try:
|
||||||
|
st = _RECOG_STATE_PATH.stat()
|
||||||
|
except FileNotFoundError:
|
||||||
|
continue
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
if st.st_mtime == last_mtime:
|
||||||
|
continue
|
||||||
|
last_mtime = st.st_mtime
|
||||||
|
new_state = _recog_state.read(_RECOG_STATE_PATH)
|
||||||
|
|
||||||
|
# Vision toggle — instant. Announce it out loud so Gemini reacts
|
||||||
|
# ("I can see you now" / "I can't see you anymore").
|
||||||
|
if new_state.vision_enabled != last_state.vision_enabled:
|
||||||
|
self._vision_enabled = new_state.vision_enabled
|
||||||
|
log.info("vision toggled → %s", self._vision_enabled)
|
||||||
|
await self._announce_vision_state(
|
||||||
|
session, self._vision_enabled, is_toggle=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Face-rec toggle — announce it out loud. The OFF announcement
|
||||||
|
# also tells Gemini to disregard the gallery, so OFF takes effect
|
||||||
|
# immediately instead of lingering until the next reconnect.
|
||||||
|
if new_state.face_rec_enabled != last_state.face_rec_enabled:
|
||||||
|
self._face_rec_enabled = new_state.face_rec_enabled
|
||||||
|
if self._face_rec_enabled:
|
||||||
|
log.info("face rec enabled — announcing + sending primer")
|
||||||
|
else:
|
||||||
|
log.info("face rec disabled — telling Gemini to "
|
||||||
|
"disregard the gallery")
|
||||||
|
await self._announce_facerec_state(
|
||||||
|
session, self._face_rec_enabled, is_toggle=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Conditions for re-priming:
|
||||||
|
# - face_rec just turned ON (no_face_rec_before)
|
||||||
|
# - gallery version bumped since the last primer
|
||||||
|
face_rec_just_on = (
|
||||||
|
new_state.face_rec_enabled and not last_state.face_rec_enabled
|
||||||
|
)
|
||||||
|
gallery_changed = (
|
||||||
|
new_state.gallery_version != self._gallery_version_primed
|
||||||
|
)
|
||||||
|
if (self._face_rec_enabled
|
||||||
|
and (face_rec_just_on or gallery_changed)
|
||||||
|
and self._vision_enabled):
|
||||||
|
try:
|
||||||
|
await self._send_gallery_primer(
|
||||||
|
session, new_state.gallery_version,
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
log.warning("gallery primer failed: %s", exc)
|
||||||
|
|
||||||
|
last_state = new_state
|
||||||
|
|
||||||
|
# ─── camera frame send loop ───────────────────────────
|
||||||
|
# Reads the latest JPEG from the _LATEST_FRAME cache (fed by the
|
||||||
|
# _stdin_watcher thread, which the GeminiSubprocess supervisor pushes
|
||||||
|
# 'frame:<b64>' lines into) and relays it to Gemini Live at
|
||||||
|
# _VISION_SEND_HZ. Only active when self._vision_enabled. Skips frames
|
||||||
|
# older than _VISION_STALE_MS so a stopped/unplugged camera doesn't
|
||||||
|
# waste tokens on a frozen scene.
|
||||||
|
|
||||||
|
async def _send_frame_loop(self, session: Any) -> None:
|
||||||
|
period = 1.0 / max(0.5, _VISION_SEND_HZ)
|
||||||
|
stale_s = _VISION_STALE_MS / 1000.0
|
||||||
|
backoff = 0.0
|
||||||
|
last_sent_ts = 0.0
|
||||||
|
|
||||||
|
while not self._done.is_set() and not self._stop_flag.is_set():
|
||||||
|
await asyncio.sleep(max(period, backoff))
|
||||||
|
if not self._vision_enabled:
|
||||||
|
continue
|
||||||
|
with _LATEST_FRAME_LOCK:
|
||||||
|
data = _LATEST_FRAME.get("bytes")
|
||||||
|
ts = _LATEST_FRAME.get("ts", 0.0)
|
||||||
|
if not data:
|
||||||
|
continue
|
||||||
|
# Stale — supervisor stopped pushing (camera off / unplugged).
|
||||||
|
if (time.time() - ts) > stale_s:
|
||||||
|
continue
|
||||||
|
# De-dup — don't re-send a frame we already relayed.
|
||||||
|
if ts == last_sent_ts:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
await session.send_realtime_input(
|
||||||
|
video=types.Blob(data=data, mime_type="image/jpeg"),
|
||||||
|
)
|
||||||
|
last_sent_ts = ts
|
||||||
|
backoff = 0.0
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
return
|
||||||
|
except Exception as exc:
|
||||||
|
log.warning("frame send failed: %s", exc)
|
||||||
|
backoff = min(backoff * 2 + 0.5, 5.0)
|
||||||
|
|
||||||
|
# ─── motion-state inject loop ─────────────────────────
|
||||||
|
# Drains _STATE_PENDING (fed by the _stdin_watcher from 'state:' lines
|
||||||
|
# the supervisor pushes when the arm starts/finishes/errors a motion)
|
||||||
|
# and injects each as silent text context into the live session, so
|
||||||
|
# Gemini can answer "what are you doing?" honestly. Per persona, Gemini
|
||||||
|
# reads these for context but does not narrate them unprompted.
|
||||||
|
|
||||||
|
async def _send_state_loop(self, session: Any) -> None:
|
||||||
|
while not self._done.is_set() and not self._stop_flag.is_set():
|
||||||
|
await asyncio.sleep(0.1)
|
||||||
|
with _STATE_LOCK:
|
||||||
|
if not _STATE_PENDING:
|
||||||
|
continue
|
||||||
|
pending = list(_STATE_PENDING)
|
||||||
|
_STATE_PENDING.clear()
|
||||||
|
for msg in pending:
|
||||||
|
try:
|
||||||
|
await session.send_realtime_input(text=msg)
|
||||||
|
log.info("STATE injected: %s", msg)
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
return
|
||||||
|
except Exception as exc:
|
||||||
|
# Some SDK versions may not accept text on
|
||||||
|
# send_realtime_input — log once-ish and keep going;
|
||||||
|
# motion still works, only this context channel is lost.
|
||||||
|
log.warning("state inject failed: %s", exc)
|
||||||
|
|
||||||
|
# ─── face gallery primer ──────────────────────────────
|
||||||
|
# Builds one multimodal turn carrying the entire face gallery + a Khaleeji
|
||||||
|
# greeting instruction, and sends it via send_client_content. Gemini keeps
|
||||||
|
# this in session context until reconnect. Re-sent on gallery_version bumps.
|
||||||
|
|
||||||
|
async def _send_gallery_primer(self, session: Any, version: int) -> None:
|
||||||
|
try:
|
||||||
|
from Project.Sanad.vision.face_gallery import FaceGallery
|
||||||
|
except Exception as exc:
|
||||||
|
log.info("face gallery module unavailable: %s", exc)
|
||||||
|
return
|
||||||
|
|
||||||
|
gallery = FaceGallery(_FACES_DIR)
|
||||||
|
try:
|
||||||
|
entries = gallery.load_for_primer(
|
||||||
|
max_samples_per_face=_FACES_MAX_SAMPLES,
|
||||||
|
resize_long_side=_FACES_PRIMER_RESIZE,
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
log.warning("face gallery load failed: %s", exc)
|
||||||
|
return
|
||||||
|
|
||||||
|
if not entries:
|
||||||
|
log.info("face gallery empty — primer skipped (v.%d)", version)
|
||||||
|
self._gallery_version_primed = version
|
||||||
|
return
|
||||||
|
|
||||||
|
parts: list[dict[str, Any]] = [{
|
||||||
|
"text": (
|
||||||
|
"GALLERY PRIMER (do not reply to this turn). "
|
||||||
|
"Below are people you know. When the live camera shows one of "
|
||||||
|
"them, greet them warmly by name in UAE Khaleeji dialect "
|
||||||
|
"(for example: 'هلا والله يا كسام، شحالك؟'), and you may use "
|
||||||
|
"the notes about them to make the conversation personal. "
|
||||||
|
"For faces NOT in this gallery, welcome them as a guest "
|
||||||
|
"without inventing a name. Greet each person only once per "
|
||||||
|
"minute to avoid repetition."
|
||||||
|
),
|
||||||
|
}]
|
||||||
|
for entry, jpegs in entries:
|
||||||
|
label = (
|
||||||
|
f"This person is named {entry.name}."
|
||||||
|
if entry.name
|
||||||
|
else "This person's name is unknown — greet as guest."
|
||||||
|
)
|
||||||
|
if entry.description:
|
||||||
|
label += f" Notes about them: {entry.description}"
|
||||||
|
parts.append({"text": f"\n— {label}"})
|
||||||
|
for jpeg in jpegs:
|
||||||
|
parts.append({
|
||||||
|
"inline_data": {"mime_type": "image/jpeg", "data": jpeg},
|
||||||
|
})
|
||||||
|
|
||||||
|
try:
|
||||||
|
await session.send_client_content(
|
||||||
|
turns=[{"role": "user", "parts": parts}],
|
||||||
|
turn_complete=True,
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
log.warning("primer send failed: %s", exc)
|
||||||
|
return
|
||||||
|
self._gallery_version_primed = version
|
||||||
|
log.info("face gallery primed: %d person(s), v.%d", len(entries), version)
|
||||||
|
|||||||
@ -10,15 +10,16 @@ When a new model is added, build its own sibling supervisor (see
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import base64
|
||||||
|
import json
|
||||||
import os
|
import os
|
||||||
import signal
|
import signal
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import threading
|
import threading
|
||||||
import time
|
|
||||||
from collections import deque
|
from collections import deque
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Any
|
from typing import Any, Optional, Union
|
||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
@ -30,6 +31,11 @@ log = get_logger("gemini_subprocess")
|
|||||||
|
|
||||||
_LS_CFG = _cfg_section("gemini", "subprocess")
|
_LS_CFG = _cfg_section("gemini", "subprocess")
|
||||||
|
|
||||||
|
# Camera frame forwarding — push the latest JPEG to the child over stdin
|
||||||
|
# at this interval (seconds). 0.5 s ≈ 2 fps, matching the child's
|
||||||
|
# SANAD_VISION_SEND_HZ default. The child de-stales + relays to Gemini.
|
||||||
|
_FRAME_FORWARD_INTERVAL_S = float(_LS_CFG.get("frame_forward_interval_sec", 0.5))
|
||||||
|
|
||||||
|
|
||||||
def _resolve_live_script() -> Path:
|
def _resolve_live_script() -> Path:
|
||||||
"""Locate the voice script to run as subprocess.
|
"""Locate the voice script to run as subprocess.
|
||||||
@ -82,6 +88,22 @@ class GeminiSubprocess:
|
|||||||
self.state_message = "Idle."
|
self.state_message = "Idle."
|
||||||
self.last_user_text = ""
|
self.last_user_text = ""
|
||||||
self.suppressed_noise = 0
|
self.suppressed_noise = 0
|
||||||
|
# ── stdin push channel (camera frames + motion state) ──
|
||||||
|
# The child (gemini/script.py) reads "frame:<b64>\n" and
|
||||||
|
# "state:<json>\n" lines off its stdin. Writes are serialised
|
||||||
|
# because the frame forwarder and the motion-state bus handler
|
||||||
|
# both call from different threads.
|
||||||
|
self._stdin_lock = threading.Lock()
|
||||||
|
self._camera = None # set via attach_camera()
|
||||||
|
self._frame_thread: threading.Thread | None = None
|
||||||
|
self._frame_stop = threading.Event()
|
||||||
|
|
||||||
|
# ── camera attach (called once from main.py) ──────────────
|
||||||
|
|
||||||
|
def attach_camera(self, camera) -> None:
|
||||||
|
"""Give the supervisor a reference to the CameraDaemon so it can
|
||||||
|
forward frames to the child over stdin while a session runs."""
|
||||||
|
self._camera = camera
|
||||||
|
|
||||||
def _open_session_log(self, pid: int):
|
def _open_session_log(self, pid: int):
|
||||||
"""Open (or re-open) the per-day append log file for this session."""
|
"""Open (or re-open) the per-day append log file for this session."""
|
||||||
@ -214,6 +236,7 @@ class GeminiSubprocess:
|
|||||||
proc = subprocess.Popen(
|
proc = subprocess.Popen(
|
||||||
cmd,
|
cmd,
|
||||||
cwd=str(script.parent),
|
cwd=str(script.parent),
|
||||||
|
stdin=subprocess.PIPE, # camera frames + motion state push
|
||||||
stdout=subprocess.PIPE,
|
stdout=subprocess.PIPE,
|
||||||
stderr=subprocess.STDOUT,
|
stderr=subprocess.STDOUT,
|
||||||
text=True,
|
text=True,
|
||||||
@ -221,16 +244,106 @@ class GeminiSubprocess:
|
|||||||
env=env,
|
env=env,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Reap any stale frame forwarder from a previous session that ended
|
||||||
|
# by a child crash rather than a clean stop() — otherwise it keeps
|
||||||
|
# spinning on a dead pipe and we'd leak a thread per restart.
|
||||||
|
stale_ft = self._frame_thread
|
||||||
|
if stale_ft is not None and stale_ft.is_alive():
|
||||||
|
self._frame_stop.set()
|
||||||
|
stale_ft.join(timeout=2.0)
|
||||||
|
|
||||||
with self._lock:
|
with self._lock:
|
||||||
self.process = proc
|
self.process = proc
|
||||||
self.log_tail.append(f"Started: pid={proc.pid}")
|
self.log_tail.append(f"Started: pid={proc.pid}")
|
||||||
self._set_state("starting", f"pid={proc.pid}")
|
self._set_state("starting", f"pid={proc.pid}")
|
||||||
self._reader_thread = threading.Thread(target=self._reader_loop, daemon=True)
|
self._reader_thread = threading.Thread(target=self._reader_loop, daemon=True)
|
||||||
self._reader_thread.start()
|
self._reader_thread.start()
|
||||||
|
# Frame forwarder — pushes camera JPEGs to the child over stdin.
|
||||||
|
self._frame_stop.clear()
|
||||||
|
self._frame_thread = threading.Thread(
|
||||||
|
target=self._frame_forwarder, daemon=True, name="gemini-frame-fwd",
|
||||||
|
)
|
||||||
|
self._frame_thread.start()
|
||||||
|
|
||||||
log.info("Live Gemini subprocess started: pid=%d", proc.pid)
|
log.info("Live Gemini subprocess started: pid=%d", proc.pid)
|
||||||
return {"started": True, "pid": proc.pid}
|
return {"started": True, "pid": proc.pid}
|
||||||
|
|
||||||
|
# ── stdin push channel ────────────────────────────────────
|
||||||
|
|
||||||
|
def _send_stdin(self, line: str) -> None:
|
||||||
|
"""Serialised stdin write — frame forwarder + motion-state handler
|
||||||
|
both call this from different threads. Best-effort: a closed pipe
|
||||||
|
or a not-yet-started process is a silent no-op."""
|
||||||
|
proc = self.process
|
||||||
|
if proc is None or proc.stdin is None:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
with self._stdin_lock:
|
||||||
|
if not proc.stdin.closed:
|
||||||
|
proc.stdin.write(line)
|
||||||
|
proc.stdin.flush()
|
||||||
|
except Exception:
|
||||||
|
# Pipe broke (child exited) — drop silently; the reader thread
|
||||||
|
# will surface the exit via state="stopped".
|
||||||
|
pass
|
||||||
|
|
||||||
|
def send_frame(self, jpeg: Union[bytes, str]) -> None:
|
||||||
|
"""Forward one camera frame to the child as 'frame:<base64>\\n'.
|
||||||
|
|
||||||
|
Accepts raw JPEG bytes (base64-encoded here) or an already-base64
|
||||||
|
ASCII string (e.g. CameraDaemon.get_frame_b64() — no re-encode)."""
|
||||||
|
if isinstance(jpeg, bytes):
|
||||||
|
b64 = base64.b64encode(jpeg).decode("ascii")
|
||||||
|
elif isinstance(jpeg, str):
|
||||||
|
b64 = jpeg.strip()
|
||||||
|
else:
|
||||||
|
return
|
||||||
|
if b64:
|
||||||
|
self._send_stdin("frame:" + b64 + "\n")
|
||||||
|
|
||||||
|
def send_state(self, event: str, cmd: str,
|
||||||
|
elapsed_sec: Optional[float] = None,
|
||||||
|
reason: Optional[str] = None) -> None:
|
||||||
|
"""Push a motion-state update to the child as 'state:<json>\\n'.
|
||||||
|
|
||||||
|
Events: start | complete | interrupted | error. The child injects
|
||||||
|
'[STATE-...] <cmd>' into the live Gemini session as silent text
|
||||||
|
context so Gemini can answer "what are you doing?" honestly."""
|
||||||
|
if not event or not cmd:
|
||||||
|
return
|
||||||
|
payload: dict[str, Any] = {"event": event, "cmd": cmd}
|
||||||
|
if elapsed_sec is not None:
|
||||||
|
payload["elapsed_sec"] = round(float(elapsed_sec), 2)
|
||||||
|
if reason:
|
||||||
|
payload["reason"] = str(reason)[:200]
|
||||||
|
try:
|
||||||
|
line = "state:" + json.dumps(payload, ensure_ascii=False) + "\n"
|
||||||
|
except Exception:
|
||||||
|
return
|
||||||
|
self._send_stdin(line)
|
||||||
|
|
||||||
|
def _frame_forwarder(self) -> None:
|
||||||
|
"""Background thread — push the camera's latest frame to the child.
|
||||||
|
|
||||||
|
Runs for the lifetime of one subprocess session. Gated on the
|
||||||
|
camera actually running; the child does its own vision-enabled +
|
||||||
|
staleness checks, so this stays dumb (camera up → push)."""
|
||||||
|
cam = self._camera
|
||||||
|
if cam is None:
|
||||||
|
return
|
||||||
|
while not self._frame_stop.is_set():
|
||||||
|
if self._frame_stop.wait(_FRAME_FORWARD_INTERVAL_S):
|
||||||
|
break
|
||||||
|
try:
|
||||||
|
if not cam.is_running():
|
||||||
|
continue
|
||||||
|
b64 = cam.get_frame_b64()
|
||||||
|
if b64:
|
||||||
|
self.send_frame(b64)
|
||||||
|
except Exception:
|
||||||
|
# Best-effort — never let a frame hiccup kill the thread.
|
||||||
|
pass
|
||||||
|
|
||||||
def stop(self) -> dict[str, Any]:
|
def stop(self) -> dict[str, Any]:
|
||||||
with self._lock:
|
with self._lock:
|
||||||
proc = self.process
|
proc = self.process
|
||||||
@ -238,6 +351,13 @@ class GeminiSubprocess:
|
|||||||
return {"stopped": False, "message": "Not running."}
|
return {"stopped": False, "message": "Not running."}
|
||||||
self._set_state("stopping", "Stopping...")
|
self._set_state("stopping", "Stopping...")
|
||||||
|
|
||||||
|
# Halt the frame forwarder before we tear the pipe down.
|
||||||
|
self._frame_stop.set()
|
||||||
|
ft = self._frame_thread
|
||||||
|
if ft is not None:
|
||||||
|
ft.join(timeout=2.0)
|
||||||
|
self._frame_thread = None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
proc.send_signal(signal.SIGINT)
|
proc.send_signal(signal.SIGINT)
|
||||||
proc.wait(timeout=_STOP_TIMEOUT_SEC)
|
proc.wait(timeout=_STOP_TIMEOUT_SEC)
|
||||||
@ -251,6 +371,16 @@ class GeminiSubprocess:
|
|||||||
|
|
||||||
rc = proc.returncode
|
rc = proc.returncode
|
||||||
|
|
||||||
|
# Close stdin/stdout explicitly — without this each start/stop
|
||||||
|
# cycle leaks FDs (relied on Popen.__del__ which only runs at GC;
|
||||||
|
# a reconnect loop would march the FD count to the OS limit).
|
||||||
|
for pipe in (getattr(proc, "stdin", None), getattr(proc, "stdout", None)):
|
||||||
|
if pipe is not None:
|
||||||
|
try:
|
||||||
|
pipe.close()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
with self._lock:
|
with self._lock:
|
||||||
self.process = None
|
self.process = None
|
||||||
self.log_tail.append("Stopped.")
|
self.log_tail.append("Stopped.")
|
||||||
|
|||||||
112
main.py
112
main.py
@ -106,6 +106,8 @@ TypedReplayEngine = _safe_import("TypedReplayEngine", lambda: __import__("
|
|||||||
GeminiVoiceClient = _safe_import("GeminiVoiceClient", lambda: __import__("Project.Sanad.gemini.client", fromlist=["GeminiVoiceClient"]).GeminiVoiceClient)
|
GeminiVoiceClient = _safe_import("GeminiVoiceClient", lambda: __import__("Project.Sanad.gemini.client", fromlist=["GeminiVoiceClient"]).GeminiVoiceClient)
|
||||||
GeminiSubprocess = _safe_import("GeminiSubprocess", lambda: __import__("Project.Sanad.gemini.subprocess", fromlist=["GeminiSubprocess"]).GeminiSubprocess)
|
GeminiSubprocess = _safe_import("GeminiSubprocess", lambda: __import__("Project.Sanad.gemini.subprocess", fromlist=["GeminiSubprocess"]).GeminiSubprocess)
|
||||||
LocalSubprocess = _safe_import("LocalSubprocess", lambda: __import__("Project.Sanad.local.subprocess", fromlist=["LocalSubprocess"]).LocalSubprocess)
|
LocalSubprocess = _safe_import("LocalSubprocess", lambda: __import__("Project.Sanad.local.subprocess", fromlist=["LocalSubprocess"]).LocalSubprocess)
|
||||||
|
CameraDaemon = _safe_import("CameraDaemon", lambda: __import__("Project.Sanad.vision.camera", fromlist=["CameraDaemon"]).CameraDaemon)
|
||||||
|
FaceGallery = _safe_import("FaceGallery", lambda: __import__("Project.Sanad.vision.face_gallery", fromlist=["FaceGallery"]).FaceGallery)
|
||||||
|
|
||||||
|
|
||||||
# ── global instances (imported by route modules) ──
|
# ── global instances (imported by route modules) ──
|
||||||
@ -131,6 +133,107 @@ else:
|
|||||||
live_sub = _safe_construct("live_sub", GeminiSubprocess)
|
live_sub = _safe_construct("live_sub", GeminiSubprocess)
|
||||||
typed_replay = _safe_construct("typed_replay", (lambda: TypedReplayEngine(voice_client, audio_mgr)) if (TypedReplayEngine and voice_client and audio_mgr) else None)
|
typed_replay = _safe_construct("typed_replay", (lambda: TypedReplayEngine(voice_client, audio_mgr)) if (TypedReplayEngine and voice_client and audio_mgr) else None)
|
||||||
|
|
||||||
|
# ── Recognition (camera + face gallery) ─────────────────────────────────────
|
||||||
|
# Camera is idle until the dashboard toggles vision on; face gallery is pure
|
||||||
|
# file IO and always available if the import succeeded.
|
||||||
|
#
|
||||||
|
# Config precedence (highest first): explicit env var → config/core_config.json
|
||||||
|
# section → hardcoded default. The parent process normally has no SANAD_CAMERA_*
|
||||||
|
# env vars (LIVE_TUNE is only forwarded to the Gemini child), so in practice the
|
||||||
|
# core_config.json `camera` / `faces` sections are the live source here.
|
||||||
|
def _build_camera():
|
||||||
|
from Project.Sanad.core.config_loader import section as _cfg_section
|
||||||
|
cam_cfg = _cfg_section("core", "camera")
|
||||||
|
|
||||||
|
def _knob(env_key: str, cfg_key: str, default):
|
||||||
|
env_val = os.environ.get(env_key)
|
||||||
|
if env_val is not None and env_val != "":
|
||||||
|
return type(default)(env_val)
|
||||||
|
return type(default)(cam_cfg.get(cfg_key, default))
|
||||||
|
|
||||||
|
# Frames are cached in memory and pushed to the Gemini child over its
|
||||||
|
# stdin (see GeminiSubprocess._frame_forwarder) — no file drop.
|
||||||
|
return CameraDaemon(
|
||||||
|
width=_knob("SANAD_CAMERA_WIDTH", "width", 424),
|
||||||
|
height=_knob("SANAD_CAMERA_HEIGHT", "height", 240),
|
||||||
|
fps=_knob("SANAD_CAMERA_FPS", "fps", 15),
|
||||||
|
jpeg_quality=_knob("SANAD_CAMERA_JPEG_QUALITY", "jpeg_quality", 70),
|
||||||
|
stale_threshold_s=float(cam_cfg.get("stale_threshold_s", 10.0)),
|
||||||
|
reconnect_min_s=float(cam_cfg.get("reconnect_min_s", 2.0)),
|
||||||
|
reconnect_max_s=float(cam_cfg.get("reconnect_max_s", 10.0)),
|
||||||
|
capture_timeout_ms=int(cam_cfg.get("capture_timeout_ms", 5000)),
|
||||||
|
)
|
||||||
|
|
||||||
|
def _build_gallery():
|
||||||
|
from Project.Sanad.config import BASE_DIR
|
||||||
|
from Project.Sanad.core.config_loader import section as _cfg_section
|
||||||
|
faces_cfg = _cfg_section("core", "faces")
|
||||||
|
# SANAD_FACES_DIR is set absolute by LIVE_TUNE (the Gemini child reads the
|
||||||
|
# same var). In the parent it's usually unset → fall back to the JSON's
|
||||||
|
# dir_rel, then the hardcoded default. Honour absolute paths as-is.
|
||||||
|
raw = os.environ.get("SANAD_FACES_DIR") or faces_cfg.get("dir_rel", "data/faces")
|
||||||
|
p = Path(raw)
|
||||||
|
root = p if p.is_absolute() else (BASE_DIR / raw)
|
||||||
|
return FaceGallery(root)
|
||||||
|
|
||||||
|
camera = _safe_construct("camera", _build_camera if CameraDaemon else None)
|
||||||
|
gallery = _safe_construct("gallery", _build_gallery if FaceGallery else None)
|
||||||
|
|
||||||
|
# Restore persisted vision_enabled at boot — start camera if the user left
|
||||||
|
# it on across a reboot. Face-rec state is read by the Gemini child directly.
|
||||||
|
try:
|
||||||
|
from Project.Sanad.vision import recognition_state as _recog_state
|
||||||
|
from Project.Sanad.config import BASE_DIR as _BD
|
||||||
|
_state = _recog_state.read(_BD / "data" / ".recognition_state.json")
|
||||||
|
if _state.vision_enabled and camera is not None:
|
||||||
|
if camera.start():
|
||||||
|
log.info("Camera vision restored from state (backend=%s)", camera.backend)
|
||||||
|
else:
|
||||||
|
log.warning("Camera vision was ON but no backend available — leaving OFF")
|
||||||
|
_recog_state.mutate(_BD / "data" / ".recognition_state.json",
|
||||||
|
vision_enabled=False)
|
||||||
|
except Exception:
|
||||||
|
log.exception("Could not restore recognition state")
|
||||||
|
|
||||||
|
# Hand the camera to the Gemini supervisor so it can forward frames to the
|
||||||
|
# child over stdin while a live session runs.
|
||||||
|
if live_sub is not None and camera is not None:
|
||||||
|
try:
|
||||||
|
if hasattr(live_sub, "attach_camera"):
|
||||||
|
live_sub.attach_camera(camera)
|
||||||
|
log.info("Camera attached to live subprocess supervisor")
|
||||||
|
except Exception:
|
||||||
|
log.exception("attach_camera failed")
|
||||||
|
|
||||||
|
# ── Motion-state → Gemini channel ───────────────────────────────────────────
|
||||||
|
# The arm controller emits motion.action_started / _done / _error on the bus.
|
||||||
|
# Forward each to the Gemini child as a 'state:' line so the live session can
|
||||||
|
# answer "what are you doing?" honestly. Sync handlers, fired via emit_sync
|
||||||
|
# from the arm's worker thread — send_state just writes to a pipe (cheap).
|
||||||
|
if live_sub is not None and hasattr(live_sub, "send_state"):
|
||||||
|
try:
|
||||||
|
from Project.Sanad.core.event_bus import bus as _bus
|
||||||
|
|
||||||
|
def _on_motion_started(action: str = "", **_kw):
|
||||||
|
live_sub.send_state("start", action)
|
||||||
|
|
||||||
|
def _on_motion_done(action: str = "", elapsed_sec=None,
|
||||||
|
failed: bool = False, **_kw):
|
||||||
|
# action_error already covered the failure case with a reason;
|
||||||
|
# here just emit complete (skip if it failed to avoid a dup).
|
||||||
|
if not failed:
|
||||||
|
live_sub.send_state("complete", action, elapsed_sec=elapsed_sec)
|
||||||
|
|
||||||
|
def _on_motion_error(action: str = "", reason: str = "", **_kw):
|
||||||
|
live_sub.send_state("error", action, reason=reason)
|
||||||
|
|
||||||
|
_bus.on("motion.action_started", _on_motion_started)
|
||||||
|
_bus.on("motion.action_done", _on_motion_done)
|
||||||
|
_bus.on("motion.action_error", _on_motion_error)
|
||||||
|
log.info("Motion-state → Gemini channel wired")
|
||||||
|
except Exception:
|
||||||
|
log.exception("Could not wire motion-state → Gemini channel")
|
||||||
|
|
||||||
# Wire everything into the Brain (only what was constructed)
|
# Wire everything into the Brain (only what was constructed)
|
||||||
def _safe_attach(method_name: str, value):
|
def _safe_attach(method_name: str, value):
|
||||||
if brain is None or value is None:
|
if brain is None or value is None:
|
||||||
@ -166,6 +269,8 @@ SUBSYSTEMS = {
|
|||||||
"live_voice": live_voice,
|
"live_voice": live_voice,
|
||||||
"live_sub": live_sub,
|
"live_sub": live_sub,
|
||||||
"typed_replay": typed_replay,
|
"typed_replay": typed_replay,
|
||||||
|
"camera": camera,
|
||||||
|
"gallery": gallery,
|
||||||
}
|
}
|
||||||
|
|
||||||
# Critical subsystems — if any of these are None, log a warning at startup
|
# Critical subsystems — if any of these are None, log a warning at startup
|
||||||
@ -220,6 +325,13 @@ def _do_shutdown(from_signal: bool = False):
|
|||||||
except Exception:
|
except Exception:
|
||||||
log.exception("audio_mgr.close() failed")
|
log.exception("audio_mgr.close() failed")
|
||||||
|
|
||||||
|
if camera is not None:
|
||||||
|
try:
|
||||||
|
if camera.is_running():
|
||||||
|
camera.stop()
|
||||||
|
except Exception:
|
||||||
|
log.exception("camera.stop() failed")
|
||||||
|
|
||||||
log.info("Shutdown complete")
|
log.info("Shutdown complete")
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -673,6 +673,8 @@ class ArmController:
|
|||||||
self._is_busy = True
|
self._is_busy = True
|
||||||
self._cancel.clear()
|
self._cancel.clear()
|
||||||
|
|
||||||
|
_start = time.monotonic()
|
||||||
|
_failed = False
|
||||||
try:
|
try:
|
||||||
bus.emit_sync("motion.action_started", action=action.name)
|
bus.emit_sync("motion.action_started", action=action.name)
|
||||||
if action.file:
|
if action.file:
|
||||||
@ -680,12 +682,20 @@ class ArmController:
|
|||||||
else:
|
else:
|
||||||
self._run_sdk_action(action)
|
self._run_sdk_action(action)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
|
_failed = True
|
||||||
log.error("Action %s failed: %s", action.name, exc)
|
log.error("Action %s failed: %s", action.name, exc)
|
||||||
|
bus.emit_sync("motion.action_error", action=action.name,
|
||||||
|
reason=str(exc))
|
||||||
finally:
|
finally:
|
||||||
with self._lock:
|
with self._lock:
|
||||||
self._is_busy = False
|
self._is_busy = False
|
||||||
self._last_action_time = time.monotonic()
|
self._last_action_time = time.monotonic()
|
||||||
bus.emit_sync("motion.action_done", action=action.name)
|
# action_done always fires (back-compat for existing listeners);
|
||||||
|
# action_error above is the extra signal for the Gemini
|
||||||
|
# motion-state channel. elapsed_sec lets Gemini say "...took 2.3s".
|
||||||
|
bus.emit_sync("motion.action_done", action=action.name,
|
||||||
|
elapsed_sec=round(time.monotonic() - _start, 2),
|
||||||
|
failed=_failed)
|
||||||
|
|
||||||
def _run_sdk_action(self, action: Action):
|
def _run_sdk_action(self, action: Action):
|
||||||
if not _HAS_SDK:
|
if not _HAS_SDK:
|
||||||
|
|||||||
@ -7,11 +7,30 @@ uvicorn[standard]>=0.29.0
|
|||||||
python-multipart>=0.0.9
|
python-multipart>=0.0.9
|
||||||
|
|
||||||
# Gemini voice
|
# Gemini voice
|
||||||
|
# google-genai: the Gemini Live SDK — used by gemini/script.py (live brain)
|
||||||
|
# and gemini/client.py. Needs Python 3.10+, which is why the voice loop
|
||||||
|
# runs in the gemini_sdk conda env. send_realtime_input(video=)/(text=)
|
||||||
|
# and send_client_content() require a reasonably recent (>=1.x) release.
|
||||||
|
google-genai>=1.0.0
|
||||||
websockets>=12.0
|
websockets>=12.0
|
||||||
pyaudio>=0.2.13
|
pyaudio>=0.2.13
|
||||||
|
|
||||||
# Camera proxy
|
# Recognition (camera vision + face gallery for Gemini-side face recognition)
|
||||||
httpx>=0.27.0
|
# opencv-python-headless: JPEG encode + USB-camera fallback. Headless wheel —
|
||||||
|
# the dashboard renders frames; we never need a GUI window.
|
||||||
|
# Pillow: resize face samples before the Gemini primer turn.
|
||||||
|
opencv-python-headless>=4.8.0
|
||||||
|
Pillow>=10.0.0
|
||||||
|
#
|
||||||
|
# pyrealsense2 — DO NOT `pip install` on Jetson / JetPack 5.
|
||||||
|
# The PyPI wheel is built against glibc 2.32+ (Ubuntu 22.04); JetPack 5 ships
|
||||||
|
# glibc 2.31, so the wheel fails to load with:
|
||||||
|
# ImportError: ... version `GLIBC_2.32' not found
|
||||||
|
# On Jetson, build the Python binding from source against the apt-installed
|
||||||
|
# librealsense2 runtime (see README → "Camera vision on Jetson").
|
||||||
|
# On x86_64 / Ubuntu 22.04+ desktops, `pip install pyrealsense2` works fine.
|
||||||
|
# If pyrealsense2 is absent, CameraDaemon falls back to cv2.VideoCapture(0).
|
||||||
|
# pyrealsense2>=2.50.0 # intentionally commented — see note above
|
||||||
|
|
||||||
# Local TTS (optional — only needed for MBZUAI model)
|
# Local TTS (optional — only needed for MBZUAI model)
|
||||||
transformers>=4.40.0
|
transformers>=4.40.0
|
||||||
|
|||||||
1
vision/__init__.py
Normal file
1
vision/__init__.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
"""Vision package — camera daemon + face gallery for Gemini-side recognition."""
|
||||||
560
vision/camera.py
Normal file
560
vision/camera.py
Normal file
@ -0,0 +1,560 @@
|
|||||||
|
"""Camera daemon — single producer, in-memory frame cache.
|
||||||
|
|
||||||
|
Captures frames at fixed FPS from a RealSense (preferred) or any USB
|
||||||
|
camera (fallback), JPEG-encodes them, and caches the latest frame in
|
||||||
|
memory in two views (matches Marcus's API/camera_api.py):
|
||||||
|
|
||||||
|
- `_latest_jpeg` raw JPEG bytes — dashboard preview + frame forwarder
|
||||||
|
- `_latest_b64` base64 ASCII — frame forwarder → Gemini child stdin
|
||||||
|
|
||||||
|
Consumers:
|
||||||
|
- dashboard preview → `snapshot_jpeg()` (served as an HTTP Response)
|
||||||
|
- face enrollment → `get_fresh_frame()` for a guaranteed-current capture
|
||||||
|
- GeminiSubprocess → `get_frame_b64()`, pushed over the child's stdin
|
||||||
|
|
||||||
|
Lifecycle is driven by the Recognition tab toggle. The daemon is idle
|
||||||
|
until `start()` is called; failures in start() are non-fatal and
|
||||||
|
reported via `is_running()` / `backend`. Once running it auto-reconnects
|
||||||
|
on USB unplug / stalled frames (Marcus-style resilience), and supports
|
||||||
|
hot `reconfigure()` of resolution/FPS without a full restart.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import base64
|
||||||
|
import os
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from Project.Sanad.core.logger import get_logger
|
||||||
|
|
||||||
|
log = get_logger("camera")
|
||||||
|
|
||||||
|
# How many /dev/video* indices to scan for a USB-style color camera when
|
||||||
|
# RealSense isn't available. A RealSense exposes ~6 V4L2 nodes (depth, IR,
|
||||||
|
# color, metadata…) — the color one is rarely index 0, so we probe each
|
||||||
|
# and accept the first that yields a real 3-channel BGR frame.
|
||||||
|
_USB_SCAN_RANGE = 10
|
||||||
|
|
||||||
|
|
||||||
|
class CameraDaemon:
|
||||||
|
"""RealSense → USB fallback camera capture with in-memory frame cache."""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
width: int = 424,
|
||||||
|
height: int = 240,
|
||||||
|
fps: int = 15,
|
||||||
|
jpeg_quality: int = 70,
|
||||||
|
stale_threshold_s: float = 10.0,
|
||||||
|
reconnect_min_s: float = 2.0,
|
||||||
|
reconnect_max_s: float = 10.0,
|
||||||
|
capture_timeout_ms: int = 5000,
|
||||||
|
) -> None:
|
||||||
|
# Active profile — guarded by _reconfig_lock so reconfigure() can
|
||||||
|
# hot-swap it from another thread between capture sessions.
|
||||||
|
self._reconfig_lock = threading.Lock()
|
||||||
|
self._w = int(width)
|
||||||
|
self._h = int(height)
|
||||||
|
self._fps = int(fps)
|
||||||
|
self._q = max(10, min(95, int(jpeg_quality)))
|
||||||
|
self._reconfig_pending = False
|
||||||
|
|
||||||
|
# Resilience knobs (Marcus-style)
|
||||||
|
self._stale_s = float(stale_threshold_s)
|
||||||
|
self._reconnect_min_s = float(reconnect_min_s)
|
||||||
|
self._reconnect_max_s = float(reconnect_max_s)
|
||||||
|
self._capture_timeout_ms = int(capture_timeout_ms)
|
||||||
|
|
||||||
|
self._thread: Optional[threading.Thread] = None
|
||||||
|
self._stop = threading.Event()
|
||||||
|
self._backend: Optional[str] = None
|
||||||
|
self._lock = threading.Lock()
|
||||||
|
self._latest_jpeg: Optional[bytes] = None
|
||||||
|
self._latest_b64: Optional[str] = None
|
||||||
|
self._latest_ts: float = 0.0
|
||||||
|
self._frame_seq: int = 0
|
||||||
|
self._error: Optional[str] = None
|
||||||
|
self._reconnect_count: int = 0
|
||||||
|
|
||||||
|
# ── public API ──────────────────────────────────────────
|
||||||
|
|
||||||
|
@property
|
||||||
|
def backend(self) -> Optional[str]:
|
||||||
|
return self._backend
|
||||||
|
|
||||||
|
@property
|
||||||
|
def error(self) -> Optional[str]:
|
||||||
|
return self._error
|
||||||
|
|
||||||
|
@property
|
||||||
|
def frame_seq(self) -> int:
|
||||||
|
return self._frame_seq
|
||||||
|
|
||||||
|
def is_running(self) -> bool:
|
||||||
|
return self._thread is not None and self._thread.is_alive()
|
||||||
|
|
||||||
|
def start(self) -> bool:
|
||||||
|
"""Start capture thread. Returns True if a backend was acquired.
|
||||||
|
|
||||||
|
Initial probe is synchronous; if it fails the thread isn't spawned.
|
||||||
|
Once running, the inner loop auto-reconnects on USB unplug or
|
||||||
|
stalled frames using exponential backoff (`reconnect_min_s` ..
|
||||||
|
`reconnect_max_s`).
|
||||||
|
"""
|
||||||
|
if self.is_running():
|
||||||
|
return True
|
||||||
|
self._stop.clear()
|
||||||
|
self._error = None
|
||||||
|
self._reconnect_count = 0
|
||||||
|
|
||||||
|
# One-shot USB-2.0 negotiation diagnostic (warns operator if D435I
|
||||||
|
# came up on USB 2.0 — frame drops would be likely otherwise).
|
||||||
|
self._check_usb_version()
|
||||||
|
|
||||||
|
backend = self._probe_any()
|
||||||
|
if backend is None:
|
||||||
|
log.warning("Camera: no backend available (RealSense + USB both failed)")
|
||||||
|
self._backend = None
|
||||||
|
return False
|
||||||
|
|
||||||
|
self._backend = backend["name"]
|
||||||
|
self._thread = threading.Thread(
|
||||||
|
target=self._reconnect_loop, args=(backend,),
|
||||||
|
daemon=True, name="camera-daemon",
|
||||||
|
)
|
||||||
|
self._thread.start()
|
||||||
|
with self._reconfig_lock:
|
||||||
|
w, h, f = self._w, self._h, self._fps
|
||||||
|
log.info("Camera started (backend=%s, %dx%d @ %dfps)",
|
||||||
|
self._backend, w, h, f)
|
||||||
|
return True
|
||||||
|
|
||||||
|
def stop(self) -> None:
|
||||||
|
"""Stop the capture thread and release the hardware."""
|
||||||
|
if not self.is_running():
|
||||||
|
self._backend = None
|
||||||
|
return
|
||||||
|
self._stop.set()
|
||||||
|
t = self._thread
|
||||||
|
if t is not None:
|
||||||
|
t.join(timeout=2.0)
|
||||||
|
self._thread = None
|
||||||
|
self._backend = None
|
||||||
|
log.info("Camera stopped")
|
||||||
|
|
||||||
|
def reconfigure(self, width: Optional[int] = None, height: Optional[int] = None,
|
||||||
|
fps: Optional[int] = None, jpeg_quality: Optional[int] = None) -> dict:
|
||||||
|
"""Hot-swap the capture profile without a full stop/start.
|
||||||
|
|
||||||
|
Sets a pending flag — the capture loop notices it, tears the
|
||||||
|
pipeline down, and rebuilds at the new resolution (~0.5 s gap).
|
||||||
|
If the daemon isn't running the new values just take effect on
|
||||||
|
the next `start()`. Returns the resulting active profile.
|
||||||
|
"""
|
||||||
|
with self._reconfig_lock:
|
||||||
|
if width is not None:
|
||||||
|
self._w = int(width)
|
||||||
|
if height is not None:
|
||||||
|
self._h = int(height)
|
||||||
|
if fps is not None:
|
||||||
|
self._fps = int(fps)
|
||||||
|
if jpeg_quality is not None:
|
||||||
|
self._q = max(10, min(95, int(jpeg_quality)))
|
||||||
|
if self.is_running():
|
||||||
|
self._reconfig_pending = True
|
||||||
|
profile = {"width": self._w, "height": self._h,
|
||||||
|
"fps": self._fps, "jpeg_quality": self._q}
|
||||||
|
log.info("Camera reconfigure → %s", profile)
|
||||||
|
return profile
|
||||||
|
|
||||||
|
def snapshot_jpeg(self) -> Optional[bytes]:
|
||||||
|
"""Return the latest JPEG bytes, or None if no frame yet."""
|
||||||
|
with self._lock:
|
||||||
|
return self._latest_jpeg
|
||||||
|
|
||||||
|
def get_frame_b64(self) -> Optional[str]:
|
||||||
|
"""Return the latest frame as a base64 ASCII string (or None).
|
||||||
|
|
||||||
|
Used by the frame forwarder to push frames over the Gemini child's
|
||||||
|
stdin without re-encoding — base64 is cached alongside the JPEG.
|
||||||
|
"""
|
||||||
|
with self._lock:
|
||||||
|
return self._latest_b64
|
||||||
|
|
||||||
|
def get_fresh_frame(self, max_age_s: float = 0.5,
|
||||||
|
timeout_s: float = 1.5) -> Optional[bytes]:
|
||||||
|
"""Return a JPEG frame newer than `max_age_s`, waiting up to `timeout_s`.
|
||||||
|
|
||||||
|
Used by face enrollment so the captured frame is guaranteed to be
|
||||||
|
the *current* scene, not a stale buffer from before the user got
|
||||||
|
into position. Falls back to whatever's cached on timeout.
|
||||||
|
"""
|
||||||
|
deadline = time.time() + timeout_s
|
||||||
|
while time.time() < deadline:
|
||||||
|
with self._lock:
|
||||||
|
if (self._latest_jpeg is not None
|
||||||
|
and self._latest_ts > 0
|
||||||
|
and (time.time() - self._latest_ts) <= max_age_s):
|
||||||
|
return self._latest_jpeg
|
||||||
|
time.sleep(0.03)
|
||||||
|
with self._lock:
|
||||||
|
return self._latest_jpeg
|
||||||
|
|
||||||
|
def latest_age_s(self) -> float:
|
||||||
|
"""Seconds since last successful frame; +inf if none."""
|
||||||
|
with self._lock:
|
||||||
|
if self._latest_ts <= 0:
|
||||||
|
return float("inf")
|
||||||
|
return time.time() - self._latest_ts
|
||||||
|
|
||||||
|
def status(self) -> dict:
|
||||||
|
with self._reconfig_lock:
|
||||||
|
w, h, f, q = self._w, self._h, self._fps, self._q
|
||||||
|
# latest_age_s() is +inf until the first frame lands. inf is NOT
|
||||||
|
# JSON-serialisable by Starlette's JSONResponse (allow_nan=False) —
|
||||||
|
# leaving it as inf would 500 the /api/recognition/* routes. Map
|
||||||
|
# "running but no frame yet" and "not running" both to None.
|
||||||
|
age = self.latest_age_s()
|
||||||
|
age_s = round(age, 2) if (self.is_running() and age != float("inf")) else None
|
||||||
|
return {
|
||||||
|
"running": self.is_running(),
|
||||||
|
"backend": self._backend,
|
||||||
|
"width": w,
|
||||||
|
"height": h,
|
||||||
|
"fps": f,
|
||||||
|
"jpeg_quality": q,
|
||||||
|
"frame_seq": self._frame_seq,
|
||||||
|
"age_s": age_s,
|
||||||
|
"error": self._error,
|
||||||
|
"reconnect_count": self._reconnect_count,
|
||||||
|
}
|
||||||
|
|
||||||
|
# ── helpers ─────────────────────────────────────────────
|
||||||
|
|
||||||
|
def _probe_any(self) -> Optional[dict]:
|
||||||
|
"""Try RealSense first, then USB. Returns backend dict or None."""
|
||||||
|
b = self._probe_realsense()
|
||||||
|
if b is None:
|
||||||
|
b = self._probe_usb()
|
||||||
|
return b
|
||||||
|
|
||||||
|
def _check_usb_version(self) -> None:
|
||||||
|
"""Warn if a connected RealSense negotiated USB 2.0 (needs 3.x).
|
||||||
|
|
||||||
|
Marcus has this same check — D435I on USB 2.0 can't deliver
|
||||||
|
color+depth+IMU and the pipeline silently stalls. Catching it at
|
||||||
|
startup lets the operator fix the cable/port instead of chasing a
|
||||||
|
"no frames" loop. Diagnostic only; never blocks startup.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
import pyrealsense2 as rs # type: ignore
|
||||||
|
ctx = rs.context()
|
||||||
|
for dev in ctx.query_devices():
|
||||||
|
try:
|
||||||
|
usb_type = dev.get_info(rs.camera_info.usb_type_descriptor)
|
||||||
|
name = dev.get_info(rs.camera_info.name)
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
if str(usb_type).startswith("2."):
|
||||||
|
log.warning(
|
||||||
|
"RealSense %s negotiated USB %s — expected 3.x. "
|
||||||
|
"Frame drops likely. Try a USB 3 port / shorter cable / "
|
||||||
|
"powered hub.", name, usb_type,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
log.info("RealSense %s on USB %s", name, usb_type)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# ── backend probing ─────────────────────────────────────
|
||||||
|
|
||||||
|
def _probe_realsense(self) -> Optional[dict]:
|
||||||
|
with self._reconfig_lock:
|
||||||
|
w, h, f = self._w, self._h, self._fps
|
||||||
|
try:
|
||||||
|
import pyrealsense2 as rs # type: ignore
|
||||||
|
pipeline = rs.pipeline()
|
||||||
|
cfg = rs.config()
|
||||||
|
cfg.enable_stream(rs.stream.color, w, h, rs.format.bgr8, f)
|
||||||
|
profile = pipeline.start(cfg)
|
||||||
|
return {"name": "realsense", "pipeline": pipeline, "rs": rs,
|
||||||
|
"profile": profile}
|
||||||
|
except Exception as exc:
|
||||||
|
log.info("RealSense unavailable: %s", exc)
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _open_usb_index(self, idx: int, w: int, h: int, f: int,
|
||||||
|
cv2) -> Optional[dict]:
|
||||||
|
"""Open one /dev/video<idx>, validate it yields a 3-channel frame,
|
||||||
|
and classify it as colour vs grayscale/IR.
|
||||||
|
|
||||||
|
A RealSense IR node delivers Y8 — cv2 replicates that single plane
|
||||||
|
across 3 channels, so the planes come back *bit-identical*. A real
|
||||||
|
colour sensor never produces bit-identical channels (per-channel
|
||||||
|
sensor noise differs even on a flat gray scene). That's the test.
|
||||||
|
Returns a backend dict with `is_color`, or None if the node is
|
||||||
|
unusable.
|
||||||
|
"""
|
||||||
|
cap = None
|
||||||
|
try:
|
||||||
|
cap = cv2.VideoCapture(idx)
|
||||||
|
if not cap.isOpened():
|
||||||
|
cap.release()
|
||||||
|
return None
|
||||||
|
cap.set(cv2.CAP_PROP_FRAME_WIDTH, w)
|
||||||
|
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, h)
|
||||||
|
cap.set(cv2.CAP_PROP_FPS, f)
|
||||||
|
good = None
|
||||||
|
for _ in range(5):
|
||||||
|
ok, frame = cap.read()
|
||||||
|
if (ok and frame is not None and frame.ndim == 3
|
||||||
|
and frame.shape[2] == 3):
|
||||||
|
good = frame
|
||||||
|
break
|
||||||
|
if good is None:
|
||||||
|
cap.release()
|
||||||
|
return None
|
||||||
|
is_color = not (
|
||||||
|
np.array_equal(good[:, :, 0], good[:, :, 1])
|
||||||
|
and np.array_equal(good[:, :, 1], good[:, :, 2])
|
||||||
|
)
|
||||||
|
return {"name": "usb", "cap": cap, "cv2": cv2, "index": idx,
|
||||||
|
"is_color": is_color,
|
||||||
|
"frame_wh": (good.shape[1], good.shape[0])}
|
||||||
|
except Exception as exc:
|
||||||
|
log.info("USB camera index %d: %s", idx, exc)
|
||||||
|
if cap is not None:
|
||||||
|
try:
|
||||||
|
cap.release()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _probe_usb(self) -> Optional[dict]:
|
||||||
|
"""Scan /dev/video* for a colour camera node, falling back to a
|
||||||
|
grayscale/IR node only if no colour node exists.
|
||||||
|
|
||||||
|
On a RealSense, /dev/video0 is the *depth* stream (Z16, cv2 can't
|
||||||
|
open it as a webcam); the IR nodes deliver Y8 (grayscale); the
|
||||||
|
*colour* node delivers YUYV/BGR. We can't know the index up front,
|
||||||
|
so we probe each and prefer the first genuine colour node — that's
|
||||||
|
why the dashboard preview used to come up grayscale. Pin a node
|
||||||
|
with SANAD_CAMERA_USB_INDEX=<n> to skip the scan entirely.
|
||||||
|
"""
|
||||||
|
with self._reconfig_lock:
|
||||||
|
w, h, f = self._w, self._h, self._fps
|
||||||
|
try:
|
||||||
|
import cv2 # type: ignore
|
||||||
|
except Exception as exc:
|
||||||
|
log.info("USB camera unavailable: %s", exc)
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Pinned index — accept whatever it is (colour or not).
|
||||||
|
explicit = os.environ.get("SANAD_CAMERA_USB_INDEX", "").strip()
|
||||||
|
if explicit.isdigit():
|
||||||
|
backend = self._open_usb_index(int(explicit), w, h, f, cv2)
|
||||||
|
if backend is not None:
|
||||||
|
fw, fh = backend["frame_wh"]
|
||||||
|
log.info("USB camera: pinned /dev/video%d (%dx%d, %s)",
|
||||||
|
backend["index"], fw, fh,
|
||||||
|
"colour" if backend["is_color"] else "grayscale/IR")
|
||||||
|
return backend
|
||||||
|
log.warning("USB camera: pinned index %s unusable", explicit)
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Scan — prefer a real colour node; keep the first grayscale node
|
||||||
|
# as a last resort so the camera still works if that's all there is.
|
||||||
|
gray_fallback: Optional[dict] = None
|
||||||
|
for idx in range(_USB_SCAN_RANGE):
|
||||||
|
backend = self._open_usb_index(idx, w, h, f, cv2)
|
||||||
|
if backend is None:
|
||||||
|
continue
|
||||||
|
fw, fh = backend["frame_wh"]
|
||||||
|
if backend["is_color"]:
|
||||||
|
log.info("USB camera: using /dev/video%d (colour, %dx%d)",
|
||||||
|
idx, fw, fh)
|
||||||
|
if gray_fallback is not None:
|
||||||
|
try:
|
||||||
|
gray_fallback["cap"].release()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return backend
|
||||||
|
# grayscale/IR — remember the first, release any extras
|
||||||
|
if gray_fallback is None:
|
||||||
|
gray_fallback = backend
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
backend["cap"].release()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if gray_fallback is not None:
|
||||||
|
fw, fh = gray_fallback["frame_wh"]
|
||||||
|
log.warning("USB camera: no colour node found — falling back to "
|
||||||
|
"/dev/video%d (grayscale/IR, %dx%d). For a RealSense, "
|
||||||
|
"build pyrealsense2 or pin the colour node with "
|
||||||
|
"SANAD_CAMERA_USB_INDEX.", gray_fallback["index"], fw, fh)
|
||||||
|
return gray_fallback
|
||||||
|
|
||||||
|
log.info("USB camera unavailable: no working /dev/video* node found "
|
||||||
|
"(scanned %d indices)", _USB_SCAN_RANGE)
|
||||||
|
return None
|
||||||
|
|
||||||
|
# ── main capture loop ───────────────────────────────────
|
||||||
|
|
||||||
|
def _reconnect_loop(self, initial_backend: dict) -> None:
|
||||||
|
"""Outer loop — owns reconnect with exponential backoff.
|
||||||
|
|
||||||
|
Inner `_capture_session` runs until the camera goes stale, the
|
||||||
|
stop flag is set, or a reconfigure is requested. On stall we
|
||||||
|
sleep + re-probe; on reconfigure we re-probe immediately at the
|
||||||
|
new resolution. Backoff resets after a successful session.
|
||||||
|
"""
|
||||||
|
backend = initial_backend
|
||||||
|
backoff = self._reconnect_min_s
|
||||||
|
|
||||||
|
while not self._stop.is_set():
|
||||||
|
reconfigured = False
|
||||||
|
try:
|
||||||
|
reconfigured = self._capture_session(backend)
|
||||||
|
except Exception as exc:
|
||||||
|
log.exception("Camera capture session crashed: %s", exc)
|
||||||
|
self._error = str(exc)
|
||||||
|
finally:
|
||||||
|
self._teardown(backend)
|
||||||
|
|
||||||
|
if self._stop.is_set():
|
||||||
|
break
|
||||||
|
|
||||||
|
if reconfigured:
|
||||||
|
# Fast path — rebuild immediately at the new profile.
|
||||||
|
with self._reconfig_lock:
|
||||||
|
self._reconfig_pending = False
|
||||||
|
new_backend = self._probe_any()
|
||||||
|
if new_backend is None:
|
||||||
|
self._error = "reconnecting"
|
||||||
|
log.warning("Camera reconfigure: re-probe failed — "
|
||||||
|
"retrying in %.1fs", backoff)
|
||||||
|
if self._stop.wait(backoff):
|
||||||
|
break
|
||||||
|
backoff = min(backoff * 2, self._reconnect_max_s)
|
||||||
|
continue
|
||||||
|
self._backend = new_backend["name"]
|
||||||
|
self._error = None
|
||||||
|
backend = new_backend
|
||||||
|
backoff = self._reconnect_min_s
|
||||||
|
log.info("Camera rebuilt after reconfigure (backend=%s)",
|
||||||
|
self._backend)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Capture session ended unexpectedly (stall / crash). Sleep + re-probe.
|
||||||
|
self._error = "reconnecting"
|
||||||
|
log.warning("Camera disconnected — reconnecting in %.1fs", backoff)
|
||||||
|
if self._stop.wait(backoff): # interruptible sleep
|
||||||
|
break
|
||||||
|
backoff = min(backoff * 2, self._reconnect_max_s)
|
||||||
|
|
||||||
|
new_backend = self._probe_any()
|
||||||
|
if new_backend is None:
|
||||||
|
self._backend = None
|
||||||
|
continue # stay in the loop; next iteration retries
|
||||||
|
self._backend = new_backend["name"]
|
||||||
|
self._reconnect_count += 1
|
||||||
|
self._error = None
|
||||||
|
log.info("Camera reconnected (backend=%s, attempt #%d)",
|
||||||
|
self._backend, self._reconnect_count)
|
||||||
|
backend = new_backend
|
||||||
|
backoff = self._reconnect_min_s # reset on success
|
||||||
|
|
||||||
|
def _capture_session(self, backend: dict) -> bool:
|
||||||
|
"""Inner capture loop — runs until stop, stale-frame timeout, or
|
||||||
|
a reconfigure request.
|
||||||
|
|
||||||
|
Returns True if it exited because of a reconfigure (caller rebuilds
|
||||||
|
immediately), False on a stall or clean stop.
|
||||||
|
"""
|
||||||
|
import cv2 # always available — used for JPEG encode
|
||||||
|
|
||||||
|
with self._reconfig_lock:
|
||||||
|
encode_params = [int(cv2.IMWRITE_JPEG_QUALITY), self._q]
|
||||||
|
last_frame_time = time.time()
|
||||||
|
consecutive_failures = 0
|
||||||
|
|
||||||
|
while not self._stop.is_set():
|
||||||
|
if self._reconfig_pending:
|
||||||
|
log.info("Camera reconfigure requested — rebuilding pipeline")
|
||||||
|
return True
|
||||||
|
|
||||||
|
bgr = self._read_frame(backend)
|
||||||
|
if bgr is None:
|
||||||
|
consecutive_failures += 1
|
||||||
|
age = time.time() - last_frame_time
|
||||||
|
if age > self._stale_s:
|
||||||
|
log.warning(
|
||||||
|
"Camera stalled %.1fs (%d consecutive timeouts) — "
|
||||||
|
"rebuilding pipeline", age, consecutive_failures,
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
# Intermediate warnings so degradation is visible early
|
||||||
|
if consecutive_failures in (3, 10, 30):
|
||||||
|
log.warning("Camera slow (%d failures, age %.1fs)",
|
||||||
|
consecutive_failures, age)
|
||||||
|
time.sleep(0.05)
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
ok, buf = cv2.imencode(".jpg", bgr, encode_params)
|
||||||
|
except Exception as exc:
|
||||||
|
log.warning("JPEG encode failed: %s", exc)
|
||||||
|
continue
|
||||||
|
if not ok:
|
||||||
|
continue
|
||||||
|
jpeg = bytes(buf)
|
||||||
|
b64 = base64.b64encode(jpeg).decode("ascii")
|
||||||
|
now = time.time()
|
||||||
|
with self._lock:
|
||||||
|
self._latest_jpeg = jpeg
|
||||||
|
self._latest_b64 = b64
|
||||||
|
self._latest_ts = now
|
||||||
|
self._frame_seq += 1
|
||||||
|
last_frame_time = now
|
||||||
|
consecutive_failures = 0
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _read_frame(self, backend: dict) -> Optional[np.ndarray]:
|
||||||
|
name = backend["name"]
|
||||||
|
if name == "realsense":
|
||||||
|
try:
|
||||||
|
frames = backend["pipeline"].wait_for_frames(
|
||||||
|
timeout_ms=self._capture_timeout_ms,
|
||||||
|
)
|
||||||
|
color = frames.get_color_frame()
|
||||||
|
if not color:
|
||||||
|
return None
|
||||||
|
return np.asanyarray(color.get_data())
|
||||||
|
except Exception:
|
||||||
|
# Soft path — single timeouts handled by _capture_session's
|
||||||
|
# stale-detection logic; don't spam the log per frame.
|
||||||
|
return None
|
||||||
|
elif name == "usb":
|
||||||
|
cap = backend["cap"]
|
||||||
|
ok, frame = cap.read()
|
||||||
|
if not ok or frame is None:
|
||||||
|
return None
|
||||||
|
return frame
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _teardown(self, backend: dict) -> None:
|
||||||
|
name = backend.get("name")
|
||||||
|
try:
|
||||||
|
if name == "realsense":
|
||||||
|
backend["pipeline"].stop()
|
||||||
|
elif name == "usb":
|
||||||
|
backend["cap"].release()
|
||||||
|
except Exception as exc:
|
||||||
|
log.info("Camera teardown: %s", exc)
|
||||||
363
vision/face_gallery.py
Normal file
363
vision/face_gallery.py
Normal file
@ -0,0 +1,363 @@
|
|||||||
|
"""Face gallery — pure file IO over data/faces/face_{id}/.
|
||||||
|
|
||||||
|
Layout per face:
|
||||||
|
face_{id}/
|
||||||
|
face_1.jpg ← samples (≥1 required)
|
||||||
|
face_2.jpg
|
||||||
|
face_3.png
|
||||||
|
meta.json ← optional: {"name": "...", "description": "...", "added_at": "..."}
|
||||||
|
|
||||||
|
`description` is free text the operator writes about the person ("lead
|
||||||
|
engineer, likes coffee") — it's folded into the Gemini primer turn so
|
||||||
|
Gemini can reference it when it recognises that face.
|
||||||
|
|
||||||
|
No ML — Gemini does the recognition in-context using the samples we feed it
|
||||||
|
via the primer turn. This module's only jobs are:
|
||||||
|
- enumerate enrolled faces
|
||||||
|
- serve & accept JPEG/PNG bytes per face
|
||||||
|
- rename / describe / delete / zip / load-for-primer
|
||||||
|
|
||||||
|
Thread-safe via a single internal RLock.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import io
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
import threading
|
||||||
|
import zipfile
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Iterable
|
||||||
|
|
||||||
|
from Project.Sanad.core.logger import get_logger
|
||||||
|
|
||||||
|
log = get_logger("face_gallery")
|
||||||
|
|
||||||
|
|
||||||
|
_DIR_RE = re.compile(r"^face_(\d+)$")
|
||||||
|
ALLOWED_EXTS = {".jpg", ".jpeg", ".png"}
|
||||||
|
SAMPLE_NAME_RE = re.compile(r"^face_(\d+)\.(jpg|jpeg|png)$", re.IGNORECASE)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class PhotoInfo:
|
||||||
|
name: str
|
||||||
|
size_bytes: int
|
||||||
|
path: Path
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class FaceEntry:
|
||||||
|
id: int
|
||||||
|
name: str | None
|
||||||
|
added_at: str | None
|
||||||
|
dir: Path
|
||||||
|
description: str | None = None
|
||||||
|
sample_paths: list[Path] = field(default_factory=list)
|
||||||
|
|
||||||
|
def to_dict(self) -> dict:
|
||||||
|
return {
|
||||||
|
"id": self.id,
|
||||||
|
"name": self.name,
|
||||||
|
"description": self.description,
|
||||||
|
"added_at": self.added_at,
|
||||||
|
"dir": str(self.dir),
|
||||||
|
"photos": [
|
||||||
|
{"name": p.name, "size_bytes": p.stat().st_size}
|
||||||
|
for p in self.sample_paths
|
||||||
|
if p.exists()
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class FaceGallery:
|
||||||
|
"""File-system backed gallery rooted at `root` (e.g. data/faces/)."""
|
||||||
|
|
||||||
|
def __init__(self, root: Path | str) -> None:
|
||||||
|
self.root = Path(root)
|
||||||
|
self._lock = threading.RLock()
|
||||||
|
|
||||||
|
# ── read ────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def _ensure_root(self) -> None:
|
||||||
|
self.root.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
def _iter_face_dirs(self) -> Iterable[tuple[int, Path]]:
|
||||||
|
if not self.root.exists():
|
||||||
|
return
|
||||||
|
for child in sorted(self.root.iterdir()):
|
||||||
|
if not child.is_dir():
|
||||||
|
continue
|
||||||
|
m = _DIR_RE.match(child.name)
|
||||||
|
if not m:
|
||||||
|
continue
|
||||||
|
yield int(m.group(1)), child
|
||||||
|
|
||||||
|
def _samples_in(self, face_dir: Path) -> list[Path]:
|
||||||
|
out: list[Path] = []
|
||||||
|
for p in sorted(face_dir.iterdir()):
|
||||||
|
if p.is_file() and p.suffix.lower() in ALLOWED_EXTS:
|
||||||
|
out.append(p)
|
||||||
|
return out
|
||||||
|
|
||||||
|
def _meta(self, face_dir: Path) -> tuple[str | None, str | None, str | None]:
|
||||||
|
"""Return (name, description, added_at) — any may be None."""
|
||||||
|
meta_path = face_dir / "meta.json"
|
||||||
|
if not meta_path.exists():
|
||||||
|
return None, None, None
|
||||||
|
try:
|
||||||
|
data = json.loads(meta_path.read_text(encoding="utf-8"))
|
||||||
|
except Exception:
|
||||||
|
return None, None, None
|
||||||
|
name = data.get("name")
|
||||||
|
description = data.get("description")
|
||||||
|
added = data.get("added_at")
|
||||||
|
return (name if name else None), (description if description else None), added
|
||||||
|
|
||||||
|
def list(self) -> list[FaceEntry]:
|
||||||
|
with self._lock:
|
||||||
|
entries: list[FaceEntry] = []
|
||||||
|
for face_id, face_dir in self._iter_face_dirs():
|
||||||
|
name, description, added = self._meta(face_dir)
|
||||||
|
entries.append(FaceEntry(
|
||||||
|
id=face_id,
|
||||||
|
name=name,
|
||||||
|
description=description,
|
||||||
|
added_at=added,
|
||||||
|
dir=face_dir,
|
||||||
|
sample_paths=self._samples_in(face_dir),
|
||||||
|
))
|
||||||
|
return entries
|
||||||
|
|
||||||
|
def get(self, face_id: int) -> FaceEntry | None:
|
||||||
|
with self._lock:
|
||||||
|
face_dir = self.root / f"face_{face_id}"
|
||||||
|
if not face_dir.is_dir():
|
||||||
|
return None
|
||||||
|
name, description, added = self._meta(face_dir)
|
||||||
|
return FaceEntry(
|
||||||
|
id=face_id, name=name, description=description, added_at=added,
|
||||||
|
dir=face_dir, sample_paths=self._samples_in(face_dir),
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_photo(self, face_id: int, photo_name: str) -> Path | None:
|
||||||
|
with self._lock:
|
||||||
|
face_dir = self.root / f"face_{face_id}"
|
||||||
|
if not face_dir.is_dir():
|
||||||
|
return None
|
||||||
|
p = face_dir / photo_name
|
||||||
|
try:
|
||||||
|
p.resolve().relative_to(face_dir.resolve())
|
||||||
|
except ValueError:
|
||||||
|
return None
|
||||||
|
if not p.exists() or p.suffix.lower() not in ALLOWED_EXTS:
|
||||||
|
return None
|
||||||
|
return p
|
||||||
|
|
||||||
|
# ── write ───────────────────────────────────────────────
|
||||||
|
|
||||||
|
def next_id(self) -> int:
|
||||||
|
with self._lock:
|
||||||
|
max_id = 0
|
||||||
|
for face_id, _ in self._iter_face_dirs():
|
||||||
|
if face_id > max_id:
|
||||||
|
max_id = face_id
|
||||||
|
return max_id + 1
|
||||||
|
|
||||||
|
def _next_sample_name(self, face_dir: Path, ext: str) -> str:
|
||||||
|
"""Return next free face_N.<ext> filename inside face_dir."""
|
||||||
|
existing = self._samples_in(face_dir)
|
||||||
|
max_n = 0
|
||||||
|
for p in existing:
|
||||||
|
m = SAMPLE_NAME_RE.match(p.name)
|
||||||
|
if m:
|
||||||
|
n = int(m.group(1))
|
||||||
|
if n > max_n:
|
||||||
|
max_n = n
|
||||||
|
return f"face_{max_n + 1}{ext.lower()}"
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _detect_ext(jpeg_or_png: bytes) -> str:
|
||||||
|
"""Sniff PNG vs JPEG from the magic bytes."""
|
||||||
|
if len(jpeg_or_png) >= 8 and jpeg_or_png[:8] == b"\x89PNG\r\n\x1a\n":
|
||||||
|
return ".png"
|
||||||
|
return ".jpg"
|
||||||
|
|
||||||
|
def _write_meta(self, face_dir: Path, name: str | None,
|
||||||
|
description: str | None = None,
|
||||||
|
added_at: str | None = None) -> None:
|
||||||
|
meta: dict[str, str] = {}
|
||||||
|
if name:
|
||||||
|
meta["name"] = name
|
||||||
|
if description:
|
||||||
|
meta["description"] = description
|
||||||
|
meta["added_at"] = added_at or datetime.now().isoformat(timespec="seconds")
|
||||||
|
(face_dir / "meta.json").write_text(
|
||||||
|
json.dumps(meta, ensure_ascii=False, indent=2),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
|
||||||
|
def create_face(self, image_bytes_list: list[bytes],
|
||||||
|
name: str | None = None,
|
||||||
|
description: str | None = None) -> FaceEntry:
|
||||||
|
"""Create a new face_{next_id}/ with one or more samples."""
|
||||||
|
if not image_bytes_list:
|
||||||
|
raise ValueError("create_face: empty image list")
|
||||||
|
with self._lock:
|
||||||
|
self._ensure_root()
|
||||||
|
face_id = self.next_id()
|
||||||
|
face_dir = self.root / f"face_{face_id}"
|
||||||
|
face_dir.mkdir(parents=True, exist_ok=False)
|
||||||
|
for idx, data in enumerate(image_bytes_list, start=1):
|
||||||
|
ext = self._detect_ext(data)
|
||||||
|
fname = f"face_{idx}{ext}"
|
||||||
|
(face_dir / fname).write_bytes(data)
|
||||||
|
clean_name = (name or "").strip() or None
|
||||||
|
clean_desc = (description or "").strip() or None
|
||||||
|
self._write_meta(face_dir, clean_name, clean_desc)
|
||||||
|
log.info("Created face_%d (samples=%d, name=%s, desc=%s)",
|
||||||
|
face_id, len(image_bytes_list), clean_name or "(unnamed)",
|
||||||
|
"yes" if clean_desc else "no")
|
||||||
|
return self.get(face_id) # type: ignore[return-value]
|
||||||
|
|
||||||
|
def add_photo(self, face_id: int, image_bytes: bytes) -> str:
|
||||||
|
"""Append a new sample to an existing face. Returns the filename."""
|
||||||
|
with self._lock:
|
||||||
|
face_dir = self.root / f"face_{face_id}"
|
||||||
|
if not face_dir.is_dir():
|
||||||
|
raise FileNotFoundError(f"face_{face_id} not found")
|
||||||
|
ext = self._detect_ext(image_bytes)
|
||||||
|
fname = self._next_sample_name(face_dir, ext)
|
||||||
|
(face_dir / fname).write_bytes(image_bytes)
|
||||||
|
log.info("Added sample %s to face_%d", fname, face_id)
|
||||||
|
return fname
|
||||||
|
|
||||||
|
def rename(self, face_id: int, name: str | None) -> None:
|
||||||
|
"""Update meta.json with a new name (or clear it if name is empty).
|
||||||
|
|
||||||
|
Preserves the existing description + added_at.
|
||||||
|
"""
|
||||||
|
with self._lock:
|
||||||
|
face_dir = self.root / f"face_{face_id}"
|
||||||
|
if not face_dir.is_dir():
|
||||||
|
raise FileNotFoundError(f"face_{face_id} not found")
|
||||||
|
_, description, added = self._meta(face_dir)
|
||||||
|
clean = (name or "").strip() or None
|
||||||
|
self._write_meta(face_dir, clean, description, added_at=added)
|
||||||
|
log.info("Renamed face_%d → %s", face_id, clean or "(unnamed)")
|
||||||
|
|
||||||
|
def set_description(self, face_id: int, description: str | None) -> None:
|
||||||
|
"""Update meta.json with a free-text description (or clear it).
|
||||||
|
|
||||||
|
Preserves the existing name + added_at. The description is folded
|
||||||
|
into the Gemini primer turn so Gemini can reference it.
|
||||||
|
"""
|
||||||
|
with self._lock:
|
||||||
|
face_dir = self.root / f"face_{face_id}"
|
||||||
|
if not face_dir.is_dir():
|
||||||
|
raise FileNotFoundError(f"face_{face_id} not found")
|
||||||
|
name, _, added = self._meta(face_dir)
|
||||||
|
clean = (description or "").strip() or None
|
||||||
|
self._write_meta(face_dir, name, clean, added_at=added)
|
||||||
|
log.info("Set description for face_%d (%s)", face_id,
|
||||||
|
"cleared" if not clean else f"{len(clean)} chars")
|
||||||
|
|
||||||
|
def delete_photo(self, face_id: int, photo_name: str) -> None:
|
||||||
|
"""Delete one photo. Refuses if it's the only remaining sample."""
|
||||||
|
with self._lock:
|
||||||
|
face_dir = self.root / f"face_{face_id}"
|
||||||
|
if not face_dir.is_dir():
|
||||||
|
raise FileNotFoundError(f"face_{face_id} not found")
|
||||||
|
samples = self._samples_in(face_dir)
|
||||||
|
if len(samples) <= 1:
|
||||||
|
raise ValueError(
|
||||||
|
"Cannot delete the only photo — delete the face instead."
|
||||||
|
)
|
||||||
|
target = self.get_photo(face_id, photo_name)
|
||||||
|
if target is None:
|
||||||
|
raise FileNotFoundError(f"photo {photo_name} not found")
|
||||||
|
target.unlink()
|
||||||
|
log.info("Deleted %s from face_%d", photo_name, face_id)
|
||||||
|
|
||||||
|
def delete_face(self, face_id: int) -> None:
|
||||||
|
"""Delete the entire face_{id}/ folder (including meta.json)."""
|
||||||
|
import shutil
|
||||||
|
with self._lock:
|
||||||
|
face_dir = self.root / f"face_{face_id}"
|
||||||
|
if not face_dir.is_dir():
|
||||||
|
raise FileNotFoundError(f"face_{face_id} not found")
|
||||||
|
shutil.rmtree(face_dir)
|
||||||
|
log.info("Deleted face_%d", face_id)
|
||||||
|
|
||||||
|
def zip_face(self, face_id: int) -> bytes:
|
||||||
|
"""Return the entire face_{id}/ folder packaged as a ZIP."""
|
||||||
|
with self._lock:
|
||||||
|
face_dir = self.root / f"face_{face_id}"
|
||||||
|
if not face_dir.is_dir():
|
||||||
|
raise FileNotFoundError(f"face_{face_id} not found")
|
||||||
|
buf = io.BytesIO()
|
||||||
|
with zipfile.ZipFile(buf, "w", compression=zipfile.ZIP_DEFLATED) as zf:
|
||||||
|
for p in sorted(face_dir.iterdir()):
|
||||||
|
if p.is_file():
|
||||||
|
zf.write(p, arcname=f"face_{face_id}/{p.name}")
|
||||||
|
return buf.getvalue()
|
||||||
|
|
||||||
|
# ── primer support (used by gemini/script.py) ───────────
|
||||||
|
|
||||||
|
def load_for_primer(
|
||||||
|
self, max_samples_per_face: int = 3, resize_long_side: int = 256,
|
||||||
|
) -> list[tuple[FaceEntry, list[bytes]]]:
|
||||||
|
"""Return [(FaceEntry, [jpeg_bytes,…]), …] for Gemini upload.
|
||||||
|
|
||||||
|
Resizes each sample to longest-side <= resize_long_side, re-encodes
|
||||||
|
as JPEG (q=85) to keep the token cost manageable. Falls back to
|
||||||
|
the raw bytes if PIL isn't available.
|
||||||
|
"""
|
||||||
|
entries = self.list()
|
||||||
|
if not entries:
|
||||||
|
return []
|
||||||
|
out: list[tuple[FaceEntry, list[bytes]]] = []
|
||||||
|
for e in entries:
|
||||||
|
paths = e.sample_paths[:max_samples_per_face]
|
||||||
|
jpegs: list[bytes] = []
|
||||||
|
for p in paths:
|
||||||
|
try:
|
||||||
|
raw = p.read_bytes()
|
||||||
|
except OSError:
|
||||||
|
continue
|
||||||
|
processed = self._resize_for_primer(raw, resize_long_side)
|
||||||
|
jpegs.append(processed or raw)
|
||||||
|
if jpegs:
|
||||||
|
out.append((e, jpegs))
|
||||||
|
return out
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _resize_for_primer(raw: bytes, long_side: int) -> bytes | None:
|
||||||
|
"""Resize image to longest-side ≤ long_side, re-encode JPEG q=85.
|
||||||
|
|
||||||
|
Returns None on any failure (caller falls back to raw bytes).
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from PIL import Image # type: ignore
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
img = Image.open(io.BytesIO(raw))
|
||||||
|
img.load()
|
||||||
|
if img.mode not in ("RGB", "L"):
|
||||||
|
img = img.convert("RGB")
|
||||||
|
w, h = img.size
|
||||||
|
scale = long_side / max(w, h) if max(w, h) > long_side else 1.0
|
||||||
|
if scale < 1.0:
|
||||||
|
img = img.resize(
|
||||||
|
(max(1, int(w * scale)), max(1, int(h * scale))),
|
||||||
|
Image.LANCZOS,
|
||||||
|
)
|
||||||
|
buf = io.BytesIO()
|
||||||
|
img.save(buf, format="JPEG", quality=85, optimize=True)
|
||||||
|
return buf.getvalue()
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
68
vision/recognition_state.py
Normal file
68
vision/recognition_state.py
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
"""Recognition state file — atomic JSON I/O shared by parent + child.
|
||||||
|
|
||||||
|
The dashboard (parent process) writes this file on every toggle / face
|
||||||
|
gallery change; the Gemini child (`gemini/script.py`) polls it at 1 Hz
|
||||||
|
to flip its in-memory flags without a session restart.
|
||||||
|
|
||||||
|
Format (data/.recognition_state.json):
|
||||||
|
{
|
||||||
|
"vision_enabled": bool,
|
||||||
|
"face_rec_enabled": bool,
|
||||||
|
"gallery_version": int # bumped on every face CRUD
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import tempfile
|
||||||
|
from dataclasses import asdict, dataclass
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class RecognitionState:
|
||||||
|
vision_enabled: bool = False
|
||||||
|
face_rec_enabled: bool = False
|
||||||
|
gallery_version: int = 0
|
||||||
|
|
||||||
|
|
||||||
|
def read(path: Path) -> RecognitionState:
|
||||||
|
"""Return the persisted state, or a default if missing/corrupt."""
|
||||||
|
try:
|
||||||
|
raw = json.loads(Path(path).read_text(encoding="utf-8"))
|
||||||
|
except (FileNotFoundError, json.JSONDecodeError, OSError):
|
||||||
|
return RecognitionState()
|
||||||
|
return RecognitionState(
|
||||||
|
vision_enabled=bool(raw.get("vision_enabled", False)),
|
||||||
|
face_rec_enabled=bool(raw.get("face_rec_enabled", False)),
|
||||||
|
gallery_version=int(raw.get("gallery_version", 0)),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def write(path: Path, state: RecognitionState) -> None:
|
||||||
|
"""Write atomically via tempfile + os.replace."""
|
||||||
|
p = Path(path)
|
||||||
|
p.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
fd, tmp = tempfile.mkstemp(prefix=f".{p.name}.", suffix=".tmp", dir=str(p.parent))
|
||||||
|
try:
|
||||||
|
with os.fdopen(fd, "w", encoding="utf-8") as fh:
|
||||||
|
json.dump(asdict(state), fh, ensure_ascii=False, indent=2)
|
||||||
|
os.replace(tmp, p)
|
||||||
|
except Exception:
|
||||||
|
try:
|
||||||
|
os.unlink(tmp)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
def mutate(path: Path, **changes) -> RecognitionState:
|
||||||
|
"""Read-modify-write helper. Returns the new state."""
|
||||||
|
cur = read(path)
|
||||||
|
for k, v in changes.items():
|
||||||
|
if hasattr(cur, k):
|
||||||
|
setattr(cur, k, v)
|
||||||
|
write(path, cur)
|
||||||
|
return cur
|
||||||
Loading…
x
Reference in New Issue
Block a user