Update 2026-05-15 09:39:52

2026-05-15 09:39:53 +04:00 · 2026-05-15 09:39:53 +04:00 · 811a391932
commit 811a391932
parent edddb7e0c3
16 changed files with 2723 additions and 262 deletions
--- a/README.md
+++ b/README.md
@ -7,21 +7,44 @@ JSONL macros; everything is orchestrated by a FastAPI dashboard.
 ```
 ┌────────────────────────────────────────────────────────────────────┐
 │  Dashboard (FastAPI) ── http://<robot>:8000                        │
 │  ├─ Operations         Quick-fire arm actions                      │
 │  ├─ Voice & Audio      Live Gemini, Typed Replay, Wake Phrases     │
 │  ├─ Motion & Replay    SDK actions, JSONL replays, teaching mode   │
-│  ├─ Camera & Vision    (deprecated, UI kept for compat)            │
+│  ├─ Recognition        Camera vision + face gallery (Gemini-side)  │
 │  ├─ Recordings         Skills registry, saved Gemini turns         │
 │  └─ Settings & Logs    System info, tail live log                  │
 └────────────────────────────────────────────────────────────────────┘
        │
        ├─ voice/sanad_voice.py  (subprocess — Gemini Live audio loop)
        ├─ gemini/script.py      (Gemini Live brain — audio + video + state)
        ├─ gemini/client.py      (short-session client for Typed Replay)
-        ├─ gemini/subprocess.py  (spawns+supervises sanad_voice.py)
+        ├─ gemini/subprocess.py  (spawns+supervises sanad_voice.py;
        │                         pushes camera frames + motion state
        │                         to the child over its stdin)
        ├─ vision/camera.py      (RealSense/USB capture daemon)
        ├─ vision/face_gallery.py (data/faces/ CRUD for the primer turn)
        ├─ motion/arm_controller.py  (G1 arm DDS publisher)
        ├─ voice/audio_io.py     (mic + speaker abstraction — 3 profiles)
        └─ core/brain.py         (skill dispatcher, event bus)
 ```
 ### Camera + face recognition data flow
 ```
 CameraDaemon (parent, in-memory JPEG+b64 cache)
  ├─→ dashboard /api/recognition/frame.jpg   ── snapshot_jpeg()
  └─→ GeminiSubprocess._frame_forwarder      ── get_frame_b64()
                                                 │ "frame:<b64>\n" over stdin
 ArmController ─emit→ event bus ─→ main.py ─→ live_sub.send_state()
                                                 │ "state:<json>\n" over stdin
                                                 ▼
                          gemini/script.py  _stdin_watcher thread
                            ├─ frame: → _LATEST_FRAME → _send_frame_loop →
                            │             session.send_realtime_input(video=Blob)
                            └─ state: → _STATE_PENDING → _send_state_loop →
                                          session.send_realtime_input(text=…)
 ```
 ## Quick start (on the robot)
@ -42,13 +65,14 @@ Then open `http://<robot-ip>:8000` in a browser.
 | `config.py` | Runtime constants derived from `config/*_config.json`. |
 | `config/` | Per-subsystem JSON config: `core`, `voice`, `gemini`, `motion`, `dashboard`, `local`. |
 | `core/` | Brain, skill registry, event bus, config loader, logger. |
-| `gemini/` | Gemini Live — `client.py` (one-shot), `script.py` (live brain), `subprocess.py` (supervisor). |
+| `gemini/` | Gemini Live — `client.py` (one-shot), `script.py` (live brain: audio + video + motion-state), `subprocess.py` (supervisor + stdin frame/state push). |
 | `voice/` | `sanad_voice.py` (subprocess entry), `audio_io.py` (mic/speaker), `audio_manager.py`, `local_tts.py`, `live_voice_loop.py`, `typed_replay.py`, `wake_phrase_manager.py`, `text_utils.py`, `model_script.py` (brain template). |
 | `vision/` | `camera.py` (RealSense/USB capture daemon, auto-reconnect), `face_gallery.py` (`data/faces/` CRUD), `recognition_state.py` (toggle state file I/O). |
 | `local/` | Offline pipeline skeleton — Silero VAD, Whisper, Qwen (via Ollama), CosyVoice2. Opt-in via `SANAD_VOICE_BRAIN=local`. |
 | `motion/` | `arm_controller.py` (main), `sanad_arm_controller.py`, `macro_player.py`, `macro_recorder.py`, `teaching.py`. |
 | `dashboard/` | FastAPI routes (`dashboard/routes/*.py`) + static UI (`dashboard/static/index.html`). |
-| `scripts/` | Persona files — `sanad_script.txt` (voice persona), `sanad_rule.txt`, `sanad_arm.txt` (voice→arm phrases). |
+| `scripts/` | Persona files — `sanad_v2` (voice persona), `sanad_rule.txt`, `sanad_arm.txt` (voice→arm phrases). |
-| `data/` | Runtime state — `audio/` (typed-replay WAVs), `motions/` (arm JSONL files), `recordings/` (live-captured turns), `motions/config.json` (dashboard-editable settings). |
+| `data/` | Runtime state — `audio/` (typed-replay WAVs), `motions/` (arm JSONL files), `recordings/` (live-captured turns), `faces/face_{id}/` (enrolled face galleries), `.recognition_state.json` (vision/face-rec toggle state), `motions/config.json` (dashboard-editable settings). |
 | `model/` | Place for local SpeechT5 / CosyVoice2 weights when using offline pipeline. |
 | `logs/` | Per-module rotating logs. |
@ -64,6 +88,16 @@ Then open `http://<robot-ip>:8000` in a browser.
 | `SANAD_LIVE_SCRIPT` | path | auto | Override the subprocess entry script path. |
 | `SANAD_RECORD` | `0` or `1` | `1` | Record every Gemini turn to `data/recordings/`. |
 | `SANAD_AEC_ENABLE` | `0` or `1` | `1` | Enable WebRTC AEC3 (if the Python binding is installed). |
 | `SANAD_VISION_ENABLE` | `0` or `1` | `0` | Boot default for camera vision. **Runtime truth is the Recognition-tab toggle** → `data/.recognition_state.json`, hot-applied without a restart. |
 | `SANAD_FACE_RECOGNITION_ENABLE` | `0` or `1` | `0` | Boot default for Gemini-side face recognition. Also a hot toggle. |
 | `SANAD_VISION_SEND_HZ` | float | `2` | Frames/sec the Gemini child relays to Live. |
 | `SANAD_CAMERA_WIDTH` / `_HEIGHT` / `_FPS` | int | `424` / `240` / `15` | Capture profile. Also settable per-deploy in `config/core_config.json > camera`. |
 | `SANAD_FACES_MAX_SAMPLES` | int | `3` | Max photos per person fed into the gallery primer turn (token budget). |
 > All `SANAD_VISION_*` / `SANAD_CAMERA_*` / `SANAD_FACE_*` vars are **boot
 > defaults** forwarded to the Gemini child via `LIVE_TUNE`. Once running,
 > the Recognition tab's toggles are the live source of truth — they write
 > `data/.recognition_state.json`, which the child polls at 1 Hz.
 ## Dashboard features
@ -83,6 +117,25 @@ Quick-fire SDK + JSONL arm actions (chip buttons), gestural speaking toggle.
 - **Replay Manager** — upload `.jsonl` files, test-play with speed, Teaching Mode (kinesthetic record).
 - **Macro Recorder** — Record new audio+motion pair, OR pick any WAV + any motion (SDK or JSONL) and Play them in parallel.
 ### Recognition
 Camera vision + Gemini-side face recognition. Both are **off by default**;
 each is a **hot toggle** — flipping it takes effect on the running Gemini
 session within ~1 s, no restart.
 - **Camera Vision** — when on, the `CameraDaemon` captures from a RealSense
  (preferred) or USB camera and the supervisor streams JPEG frames to
  Gemini Live so it can answer "what do you see?". Live preview panel.
 - **Face Recognition** — manage `data/faces/face_{id}/` galleries: enroll
  from the live camera or upload photos, rename, download (per-photo or
  ZIP), delete. On a session start (and on any gallery change) the child
  sends a **primer turn** carrying every enrolled face + a Khaleeji
  greeting instruction — Gemini itself does the matching in-context, so
  there's **no local face-recognition model**. Recognition needs vision on.
 - **Sync Gallery** — force-resend the primer to the live session.
 The camera daemon auto-reconnects on USB unplug / stalled frames and warns
 if a RealSense negotiated USB 2.0 (Marcus-ported resilience).
 ### Recordings
 Skill Registry (predefined audio+motion skills from `skills.json`) + Saved Records (Gemini turn recordings).
@ -94,6 +147,48 @@ Skill Registry (predefined audio+motion skills from `skills.json`) + Saved Recor
 - **Supervisor contract**: each brain ships a sibling supervisor (e.g., `gemini/subprocess.py`) that spawns `sanad_voice.py` with its `SANAD_VOICE_BRAIN` env var and parses the brain's log markers. Template: `voice/model_subprocess.py`.
 - **Audio routing**: the G1's platform-sound PulseAudio sink is NOT wired to a physical speaker. All dashboard-triggered playback (`play_wav`, typed-replay audio, record playback) routes through DDS `AudioClient.PlayStream` via `audio_manager._play_pcm_via_g1`. The PyAudio path is kept as a desktop/dev fallback only.
 - **Arm replay**: `motion/arm_controller.py:_replay_file_inner()` is a verbatim port of `G1_Lootah/Manual_Recorder/g1_replay_v4_stable.py:Run()` — ramp-in → settle hold → playback → smooth return → disable SDK. Cancel breaks the play loop; `_return_home()` runs unconditionally afterwards for a jerk-free return.
 - **Camera frame transport (stdin push)**: the `CameraDaemon` lives in the parent and caches frames in memory. `GeminiSubprocess` runs a `_frame_forwarder` thread that base64-encodes the latest frame and writes `frame:<b64>\n` to the child's stdin (~2 fps). The child's `_stdin_watcher` thread decodes into `_LATEST_FRAME`; `_send_frame_loop` relays it to Gemini Live with a staleness guard. This is the Marcus pattern — chosen over a file drop so the parent owns the camera once and the dashboard preview reads the same in-memory cache.
 - **Motion-state channel**: `arm_controller._execute()` emits `motion.action_started` / `_done` / `_error` on the event bus. `main.py` forwards each to `live_sub.send_state()`, which writes `state:<json>\n` to the child's stdin. The child injects `[STATE-START] wave_hand`, `[STATE-DONE] wave_hand (2.3s)`, etc. into Gemini Live as silent text context (`send_realtime_input(text=…)`) so it can honestly answer "what are you doing?".
 - **Face recognition is Gemini-side**: no dlib/insightface/onnxruntime. `vision/face_gallery.py` is pure file IO over `data/faces/face_{id}/` (`face_N.jpg|png` samples + optional `meta.json` with a `name`). At session start (and on any gallery change) `gemini/script.py:_send_gallery_primer()` builds one multimodal `send_client_content` turn — every enrolled face's photos + a greeting instruction — and Gemini matches incoming frames against it in-context.
 ## Camera vision on Jetson
 The Recognition tab needs `pyrealsense2` to talk to the Intel RealSense.
 **Do not `pip install pyrealsense2` on JetPack 5** — the PyPI wheel is built
 against glibc 2.32+ (Ubuntu 22.04) and fails to load on JetPack 5's glibc
 2.31 with `ImportError: ... version 'GLIBC_2.32' not found`.
 The native runtime is already there (`apt`-installed `librealsense2`). Build
 just the Python binding from source against it, into the `gemini_sdk` env:
 ```bash
 rs-enumerate-devices            # confirm the D435I shows up at OS level first
 source ~/miniconda3/etc/profile.d/conda.sh && conda activate gemini_sdk
 pip uninstall -y pyrealsense2   # remove the broken wheel if present
 sudo apt install -y cmake build-essential git python3-dev libusb-1.0-0-dev pkg-config libssl-dev
 cd /tmp && rm -rf librealsense
 git clone --depth=1 --branch v2.56.5 https://github.com/IntelRealSense/librealsense.git
 cd librealsense && mkdir -p build && cd build
 cmake .. -DBUILD_PYTHON_BINDINGS=ON -DPYTHON_EXECUTABLE=$(which python3) \
         -DBUILD_EXAMPLES=OFF -DBUILD_GRAPHICAL_EXAMPLES=OFF \
         -DBUILD_UNIT_TESTS=OFF -DCHECK_FOR_UPDATES=OFF -DCMAKE_BUILD_TYPE=Release
 make -j$(nproc) pyrealsense2
 SITE=$(python3 -c "import sysconfig; print(sysconfig.get_paths()['purelib'])")
 mkdir -p "$SITE/pyrealsense2"
 cp wrappers/python/pyrealsense2*.so "$SITE/pyrealsense2/"
 cp ../wrappers/python/pyrealsense2/__init__.py "$SITE/pyrealsense2/" 2>/dev/null || true
 python3 -c 'import pyrealsense2 as rs; print([d.get_info(rs.camera_info.name) for d in rs.context().query_devices()])'
 ```
 Match the `--branch` tag to the installed runtime (`dpkg -l | grep librealsense2`).
 If the build isn't worth it, `CameraDaemon` falls back to `cv2.VideoCapture(0)`
 automatically — fine for a plain USB webcam, but note a RealSense exposes its
 *depth* stream at `/dev/video0`, not RGB, so a real USB cam is the cleaner
 fallback. On x86_64 / Ubuntu 22.04+ desktops, `pip install pyrealsense2` just works.
 ## Dynamic paths
@ -133,7 +228,10 @@ Then on the robot: `Ctrl+C` the running `main.py` and re-run.
 | Record playback silent | `audio_mgr.play_wav` only routes to G1 DDS if the Unitree SDK is importable; on desktop it falls back to the PulseAudio sink. |
 | Live Voice Commands transcript stuck | Deferred trigger was queued but `trigger_enabled` toggle was off. Toggle on — or the pending-trigger poll now fires it automatically once enabled. |
 | Gemini "no audio" on Typed Replay | Non-deterministic; the retry chain in `voice/typed_replay.py:generate_audio` tries three prompt variants. For reliable TTS, use the offline `local_tts` SpeechT5 path. |
-| Dashboard `Not Found` 404s for `/api/vision/*` | Vision module was deleted; HTML still has stale fetches for a few endpoints. Cosmetic — `dashboard/static/index.html` init block already skips most. |
+| Recognition tab: "Camera could not start (no backend)" | No camera backend acquired. Check `rs-enumerate-devices` (RealSense at OS level) and `python3 -c 'import pyrealsense2'` in the `gemini_sdk` env. The glibc `ImportError` means the pip wheel is incompatible — see "Camera vision on Jetson" above. |
 | Camera badge stuck on "reconnecting…" | `CameraDaemon` lost the device and is retrying with exponential backoff. Re-seat the USB 3 cable; check `logs/camera.log` for the USB-2.0 warning. |
 | Gemini doesn't greet an enrolled face | Face Recognition toggle on? Vision on? (Face rec needs frames.) Check `logs/gemini_brain.log` for `face gallery primed: N person(s)`. Hit "Sync Gallery" to force a re-prime. |
 | Gemini unaware of motion state | The `motion.action_*` → `send_state` chain only runs when Live Gemini is up. Check `logs/gemini_subprocess.log` and `logs/gemini_brain.log` for `STATE injected:` lines. |
 ## License / attribution
@ -142,4 +240,7 @@ Internal project for YS Lootah Technology. Reuses/ports patterns from:
 - `G1_Lootah/Manual_Recorder/g1_replay_v4_stable.py` (arm replay math)
 - `SanadVoice/gemini_interact` (arm-phrase dispatch, skill registry)
 - `SanadVoice/gemini_voice_v2` (local SpeechT5 TTS)
 - `Project/Marcus` — camera→Gemini stdin-push transport, motion-state
  injection, camera daemon resilience (auto-reconnect, USB-2.0 warning),
  and the `API/camera_api.py` cache shape (`get_frame_b64` / `get_fresh_frame`).
 - Unitree `unitree_sdk2py` (G1 low-level SDK, LocoClient, G1ArmActionClient)
--- a/config.py
+++ b/config.py
@ -341,6 +341,24 @@ LIVE_TUNE: dict[str, str] = {
    # G1 built-in mic — UDP multicast 239.168.123.161:5555.
    # Requires wake-up conversation mode ON in Unitree app.
    "SANAD_USE_G1_MIC": "1",
    # ── Recognition (camera vision + face recognition) ──
    # All of these are BOOT defaults. The runtime source of truth is the
    # state file data/.recognition_state.json — toggled live from the
    # Recognition tab and polled by the Gemini child at 1 Hz.
    "SANAD_VISION_ENABLE":            "0",
    "SANAD_VISION_SEND_HZ":           "2",
    "SANAD_VISION_STALE_MS":          "1500",
    "SANAD_CAMERA_WIDTH":             "424",
    "SANAD_CAMERA_HEIGHT":            "240",
    "SANAD_CAMERA_FPS":               "15",
    "SANAD_CAMERA_JPEG_QUALITY":      "70",
    "SANAD_FACE_RECOGNITION_ENABLE":  "0",
    "SANAD_FACES_DIR":                str(DATA_DIR / "faces"),
    "SANAD_FACES_MAX_SAMPLES":        "3",
    "SANAD_FACES_PRIMER_RESIZE":      "256",
    "SANAD_RECOGNITION_STATE_PATH":   str(DATA_DIR / ".recognition_state.json"),
    "SANAD_RECOGNITION_POLL_S":       "1.0",
 }
 # -- Camera --
--- a/config/core_config.json
+++ b/config/core_config.json
@ -76,5 +76,26 @@
  "dds": {
    "network_interface_default": "eth0"
  },
  "camera": {
    "_comment": "Recognition tab camera daemon (parent process reads this). width/height/fps/jpeg_quality + the reconnect knobs configure CameraDaemon. Frames are cached in memory and pushed to the Gemini child over its stdin (no file drop). send_hz/stale_ms are read by the Gemini child via SANAD_VISION_SEND_HZ / SANAD_VISION_STALE_MS env vars (LIVE_TUNE).",
    "width": 424,
    "height": 240,
    "fps": 15,
    "jpeg_quality": 70,
    "send_hz": 2,
    "stale_ms": 1500,
    "stale_threshold_s": 10.0,
    "reconnect_min_s": 2.0,
    "reconnect_max_s": 10.0,
    "capture_timeout_ms": 5000
  },
  "faces": {
    "_comment": "Face gallery for Gemini-side recognition. Folder layout: data/faces/face_{id}/{face_1.jpg, ...} + optional meta.json {\"name\": \"...\"}. Gemini does the matching — no local ML model.",
    "dir_rel": "data/faces",
    "max_samples_per_face": 3,
    "primer_resize_long_side": 256
  }
 }
--- a/config/gemini_config.json
+++ b/config/gemini_config.json
@ -10,12 +10,13 @@
  },
  "subprocess": {
-    "_comment": "gemini/subprocess.py — GeminiSubprocess supervisor. Spawns voice/sanad_voice.py as a child, tails stdout for Gemini-specific log markers, exposes transcript + state to the dashboard.",
+    "_comment": "gemini/subprocess.py — GeminiSubprocess supervisor. Spawns voice/sanad_voice.py as a child, tails stdout for Gemini-specific log markers, pushes camera frames + motion state to the child over its stdin, exposes transcript + state to the dashboard.",
    "log_tail_size": 2000,
    "transcript_tail_size": 30,
    "log_name": "gemini_subprocess",
    "stop_timeout_sec": 3.0,
    "terminate_timeout_sec": 2.0,
    "frame_forward_interval_sec": 0.5,
    "noisy_prefixes": [
      "ALSA lib ",
      "Expression 'alsa_",
--- a/dashboard/app.py
+++ b/dashboard/app.py
@ -50,6 +50,7 @@ _REST_ROUTES: list[tuple[str, str, str]] = [
    ("live_voice",      "/api/live-voice",      "live-voice"),
    ("live_subprocess", "/api/live-subprocess", "live-subprocess"),
    ("typed_replay",    "/api/typed-replay",    "typed-replay"),
    ("recognition",     "/api/recognition",     "recognition"),
 ]
 _WS_ROUTES: list[str] = ["log_stream"]
--- a/dashboard/routes/recognition.py
+++ b/dashboard/routes/recognition.py
@ -0,0 +1,457 @@
 """Recognition tab — camera vision + face gallery + hot toggles.
 Single router covering:
  - Vision / Face Recognition toggles (hot — no Gemini restart needed)
  - Live camera preview (latest JPEG drop)
  - Face gallery CRUD: enroll, upload, capture, rename, delete, ZIP
  - Per-photo download + delete
 Toggle changes write data/.recognition_state.json atomically. The Gemini
 child polls that file at 1 Hz and applies changes mid-session.
 """
 from __future__ import annotations
 import io
 from typing import Optional
 from fastapi import APIRouter, File, HTTPException, Query, UploadFile
 from fastapi.responses import FileResponse, Response, StreamingResponse
 from pydantic import BaseModel
 from Project.Sanad.config import BASE_DIR
 from Project.Sanad.core.logger import get_logger
 from Project.Sanad.dashboard.routes._safe_io import check_upload_size
 from Project.Sanad.vision import recognition_state
 log = get_logger("recognition_routes")
 router = APIRouter()
 # ── paths (resolved from BASE_DIR) ──────────────────────────
 STATE_PATH = BASE_DIR / "data" / ".recognition_state.json"
 FACES_DIR = BASE_DIR / "data" / "faces"
 ALLOWED_IMAGE_EXTS = {".jpg", ".jpeg", ".png"}
 # ── helpers ─────────────────────────────────────────────────
 def _get_camera():
    """Lazy import to avoid circular import on dashboard load."""
    try:
        from Project.Sanad.main import camera  # type: ignore
        return camera
    except Exception:
        return None
 def _get_gallery():
    """Lazy import — same reason."""
    try:
        from Project.Sanad.main import gallery  # type: ignore
        return gallery
    except Exception:
        return None
 def _bump_and_write_state(**changes) -> recognition_state.RecognitionState:
    """Apply changes (vision_enabled, face_rec_enabled) and persist."""
    return recognition_state.mutate(STATE_PATH, **changes)
 def _bump_gallery_version() -> int:
    cur = recognition_state.read(STATE_PATH)
    new_version = cur.gallery_version + 1
    recognition_state.mutate(STATE_PATH, gallery_version=new_version)
    return new_version
 # ── state + toggles ─────────────────────────────────────────
@router.get("/state")
 async def get_state():
    """Return the current toggle/camera/gallery state."""
    st = recognition_state.read(STATE_PATH)
    cam = _get_camera()
    gallery = _get_gallery()
    faces_count = 0
    photos_count = 0
    if gallery is not None:
        try:
            entries = gallery.list()
            faces_count = len(entries)
            photos_count = sum(len(e.sample_paths) for e in entries)
        except Exception:
            pass
    return {
        "vision_enabled": st.vision_enabled,
        "face_rec_enabled": st.face_rec_enabled,
        "gallery_version": st.gallery_version,
        "camera": cam.status() if cam is not None else {
            "running": False, "backend": None, "error": "camera subsystem unavailable"
        },
        "faces_count": faces_count,
        "photos_count": photos_count,
    }
@router.post("/vision")
 async def set_vision(on: bool = Query(...)):
    """Enable / disable camera vision (hot — no Gemini restart)."""
    cam = _get_camera()
    if cam is None:
        log.warning("vision toggle requested but camera subsystem unavailable")
        raise HTTPException(503, "Camera subsystem not available.")
    if on and not cam.is_running():
        ok = cam.start()
        if not ok:
            log.warning("vision ON requested but camera.start() failed: %s",
                        cam.error or "no backend")
            _bump_and_write_state(vision_enabled=False)
            raise HTTPException(503,
                                f"Camera could not start (no backend). {cam.error or ''}")
    elif (not on) and cam.is_running():
        cam.stop()
    st = _bump_and_write_state(vision_enabled=bool(on))
    log.info("vision %s (backend=%s)", "ON" if on else "OFF",
             cam.backend if cam.is_running() else "none")
    return {"ok": True, "vision_enabled": st.vision_enabled,
            "camera": cam.status()}
@router.post("/face-rec")
 async def set_face_rec(on: bool = Query(...)):
    """Enable / disable face recognition (hot — no Gemini restart).
    The Gemini child picks the change up within ~1 s: ON re-sends the
    gallery primer and tells Gemini it can recognise people; OFF tells
    Gemini to disregard the gallery and stop identifying anyone. Both
    take effect on the live session — no reconnect needed.
    """
    st = _bump_and_write_state(face_rec_enabled=bool(on))
    log.info("face recognition %s", "ON" if on else "OFF")
    return {"ok": True, "face_rec_enabled": st.face_rec_enabled}
@router.post("/sync")
 async def sync_gallery():
    """Bump gallery_version so the child re-sends the primer if face-rec is ON."""
    v = _bump_gallery_version()
    log.info("gallery sync requested → v.%d", v)
    return {"ok": True, "gallery_version": v}
 # ── live preview ────────────────────────────────────────────
@router.get("/frame.jpg")
 async def latest_frame():
    """Serve the most recent camera frame straight from the daemon's
    in-memory cache (no file drop — frames are also pushed to the Gemini
    child over its stdin)."""
    cam = _get_camera()
    if cam is None:
        raise HTTPException(503, "Camera subsystem unavailable.")
    jpeg = cam.snapshot_jpeg()
    if not jpeg:
        raise HTTPException(404, "No frame captured yet.")
    return Response(
        content=jpeg,
        media_type="image/jpeg",
        headers={"Cache-Control": "no-store, must-revalidate"},
    )
 # ── camera resolution / quality ─────────────────────────────
 class CameraConfigPayload(BaseModel):
    width: Optional[int] = None
    height: Optional[int] = None
    fps: Optional[int] = None
    jpeg_quality: Optional[int] = None
@router.post("/camera-config")
 async def set_camera_config(payload: CameraConfigPayload):
    """Hot-swap the camera capture profile (resolution / fps / JPEG quality).
    If the camera is running, CameraDaemon.reconfigure() rebuilds the
    pipeline at the new profile (~0.5 s gap). If idle, the values just
    take effect on the next start. Bounds are sanity-checked here so a
    fat-fingered value can't wedge the daemon."""
    cam = _get_camera()
    if cam is None:
        raise HTTPException(503, "Camera subsystem unavailable.")
    if payload.width is not None and not (160 <= payload.width <= 1920):
        raise HTTPException(400, "width out of range (160–1920)")
    if payload.height is not None and not (120 <= payload.height <= 1080):
        raise HTTPException(400, "height out of range (120–1080)")
    if payload.fps is not None and not (1 <= payload.fps <= 60):
        raise HTTPException(400, "fps out of range (1–60)")
    if payload.jpeg_quality is not None and not (10 <= payload.jpeg_quality <= 95):
        raise HTTPException(400, "jpeg_quality out of range (10–95)")
    profile = cam.reconfigure(
        width=payload.width, height=payload.height,
        fps=payload.fps, jpeg_quality=payload.jpeg_quality,
    )
    log.info("camera reconfigured via dashboard → %s", profile)
    return {"ok": True, "profile": profile, "camera": cam.status()}
 # ── face gallery routes ─────────────────────────────────────
 def _validate_image(content: bytes, filename: str | None = None) -> None:
    """Reject non-JPEG/PNG content + oversize uploads."""
    check_upload_size(content)
    if len(content) < 16:
        raise HTTPException(400, "Image too small / empty.")
    is_jpeg = content[:3] == b"\xff\xd8\xff"
    is_png = content[:8] == b"\x89PNG\r\n\x1a\n"
    if not (is_jpeg or is_png):
        raise HTTPException(
            400,
            f"Only JPEG/PNG accepted (got {filename or 'unknown'}).",
        )
 def _entry_to_dict(entry) -> dict:
    photos = []
    for p in entry.sample_paths:
        try:
            photos.append({"name": p.name, "size_bytes": p.stat().st_size})
        except OSError:
            continue
    return {
        "id": entry.id,
        "name": entry.name,
        "description": entry.description,
        "added_at": entry.added_at,
        "photos": photos,
    }
@router.get("/faces")
 async def list_faces():
    gallery = _get_gallery()
    if gallery is None:
        raise HTTPException(503, "Face gallery subsystem unavailable.")
    entries = gallery.list()
    return {"faces": [_entry_to_dict(e) for e in entries],
            "total": len(entries)}
 class RenamePayload(BaseModel):
    name: Optional[str] = None
 class DescribePayload(BaseModel):
    description: Optional[str] = None
@router.post("/faces/enroll")
 async def enroll_from_camera(name: Optional[str] = Query(default=None),
                             description: Optional[str] = Query(default=None)):
    """Create a new face from the camera's latest snapshot."""
    gallery = _get_gallery()
    if gallery is None:
        raise HTTPException(503, "Face gallery subsystem unavailable.")
    cam = _get_camera()
    if cam is None or not cam.is_running():
        raise HTTPException(409, "Camera is not running. Toggle Vision ON first.")
    # get_fresh_frame waits briefly for a current frame so the enrolled
    # photo is the scene the user is posing for, not a stale buffer.
    jpeg = cam.get_fresh_frame(max_age_s=0.5, timeout_s=1.5)
    if not jpeg:
        raise HTTPException(409, "Camera has no frame yet. Wait a moment and retry.")
    entry = gallery.create_face(
        [jpeg],
        name=name.strip() if name else None,
        description=description.strip() if description else None,
    )
    v = _bump_gallery_version()
    log.info("enrolled face_%d via camera (name=%s, desc=%s, v.%d)",
             entry.id, name or "(unnamed)",
             "yes" if description else "no", v)
    return {"ok": True, "face": _entry_to_dict(entry)}
@router.post("/faces/upload")
 async def enroll_from_upload(
    files: list[UploadFile] = File(...),
    name: Optional[str] = Query(default=None),
    description: Optional[str] = Query(default=None),
 ):
    """Create a new face from uploaded image file(s)."""
    gallery = _get_gallery()
    if gallery is None:
        raise HTTPException(503, "Face gallery subsystem unavailable.")
    if not files:
        raise HTTPException(400, "At least one image file required.")
    image_bytes: list[bytes] = []
    for f in files:
        content = await f.read()
        _validate_image(content, f.filename)
        image_bytes.append(content)
    entry = gallery.create_face(
        image_bytes,
        name=name.strip() if name else None,
        description=description.strip() if description else None,
    )
    v = _bump_gallery_version()
    log.info("enrolled face_%d via upload (%d photos, name=%s, desc=%s, v.%d)",
             entry.id, len(image_bytes), name or "(unnamed)",
             "yes" if description else "no", v)
    return {"ok": True, "face": _entry_to_dict(entry)}
@router.post("/faces/{face_id}/capture")
 async def capture_to_face(face_id: int):
    """Add a new sample (from the camera) to an existing face."""
    gallery = _get_gallery()
    if gallery is None:
        raise HTTPException(503, "Face gallery subsystem unavailable.")
    cam = _get_camera()
    if cam is None or not cam.is_running():
        raise HTTPException(409, "Camera is not running. Toggle Vision ON first.")
    jpeg = cam.get_fresh_frame(max_age_s=0.5, timeout_s=1.5)
    if not jpeg:
        raise HTTPException(409, "Camera has no frame yet.")
    try:
        fname = gallery.add_photo(face_id, jpeg)
    except FileNotFoundError as exc:
        raise HTTPException(404, str(exc))
    v = _bump_gallery_version()
    log.info("captured new photo for face_%d → %s (v.%d)", face_id, fname, v)
    return {"ok": True, "added": fname, "face": _entry_to_dict(gallery.get(face_id))}
@router.post("/faces/{face_id}/upload")
 async def upload_to_face(face_id: int, files: list[UploadFile] = File(...)):
    """Add one or more uploaded samples to an existing face."""
    gallery = _get_gallery()
    if gallery is None:
        raise HTTPException(503, "Face gallery subsystem unavailable.")
    if gallery.get(face_id) is None:
        raise HTTPException(404, f"face_{face_id} not found")
    added: list[str] = []
    for f in files:
        content = await f.read()
        _validate_image(content, f.filename)
        try:
            fname = gallery.add_photo(face_id, content)
            added.append(fname)
        except FileNotFoundError as exc:
            raise HTTPException(404, str(exc))
    v = _bump_gallery_version()
    log.info("uploaded %d photo(s) to face_%d (v.%d)", len(added), face_id, v)
    return {"ok": True, "added": added,
            "face": _entry_to_dict(gallery.get(face_id))}
@router.post("/faces/{face_id}/rename")
 async def rename_face(face_id: int, payload: RenamePayload):
    gallery = _get_gallery()
    if gallery is None:
        raise HTTPException(503, "Face gallery subsystem unavailable.")
    try:
        gallery.rename(face_id, payload.name)
    except FileNotFoundError as exc:
        raise HTTPException(404, str(exc))
    v = _bump_gallery_version()
    log.info("renamed face_%d → %s (v.%d)", face_id,
             payload.name or "(unnamed)", v)
    return {"ok": True, "face": _entry_to_dict(gallery.get(face_id))}
@router.post("/faces/{face_id}/describe")
 async def describe_face(face_id: int, payload: DescribePayload):
    """Set / clear a face's free-text description. The description is
    folded into the Gemini primer turn so Gemini can reference it."""
    gallery = _get_gallery()
    if gallery is None:
        raise HTTPException(503, "Face gallery subsystem unavailable.")
    try:
        gallery.set_description(face_id, payload.description)
    except FileNotFoundError as exc:
        raise HTTPException(404, str(exc))
    v = _bump_gallery_version()
    log.info("described face_%d (%s, v.%d)", face_id,
             "set" if payload.description else "cleared", v)
    return {"ok": True, "face": _entry_to_dict(gallery.get(face_id))}
@router.delete("/faces/{face_id}")
 async def delete_face(face_id: int):
    gallery = _get_gallery()
    if gallery is None:
        raise HTTPException(503, "Face gallery subsystem unavailable.")
    try:
        gallery.delete_face(face_id)
    except FileNotFoundError as exc:
        raise HTTPException(404, str(exc))
    v = _bump_gallery_version()
    log.info("deleted face_%d (v.%d)", face_id, v)
    return {"ok": True, "deleted": face_id}
@router.delete("/faces/{face_id}/photo/{photo_name}")
 async def delete_photo(face_id: int, photo_name: str):
    gallery = _get_gallery()
    if gallery is None:
        raise HTTPException(503, "Face gallery subsystem unavailable.")
    # safe filename — only allow simple file names, no traversal
    if "/" in photo_name or ".." in photo_name or "\x00" in photo_name:
        raise HTTPException(400, "Invalid photo name.")
    try:
        gallery.delete_photo(face_id, photo_name)
    except FileNotFoundError as exc:
        raise HTTPException(404, str(exc))
    except ValueError as exc:
        raise HTTPException(400, str(exc))
    v = _bump_gallery_version()
    log.info("deleted photo %s from face_%d (v.%d)", photo_name, face_id, v)
    return {"ok": True, "deleted": photo_name}
@router.get("/faces/{face_id}/photo/{photo_name}")
 async def get_photo(face_id: int, photo_name: str,
                    download: int = Query(default=0)):
    """Serve a single photo. Add ?download=1 for attachment disposition."""
    gallery = _get_gallery()
    if gallery is None:
        raise HTTPException(503, "Face gallery subsystem unavailable.")
    if "/" in photo_name or ".." in photo_name or "\x00" in photo_name:
        raise HTTPException(400, "Invalid photo name.")
    path = gallery.get_photo(face_id, photo_name)
    if path is None:
        raise HTTPException(404, "Photo not found.")
    media = "image/png" if path.suffix.lower() == ".png" else "image/jpeg"
    headers = {}
    if download:
        headers["Content-Disposition"] = (
            f'attachment; filename="face_{face_id}_{photo_name}"'
        )
    return FileResponse(path, media_type=media, headers=headers)
@router.get("/faces/{face_id}/download.zip")
 async def download_face_zip(face_id: int):
    gallery = _get_gallery()
    if gallery is None:
        raise HTTPException(503, "Face gallery subsystem unavailable.")
    try:
        data = gallery.zip_face(face_id)
    except FileNotFoundError as exc:
        raise HTTPException(404, str(exc))
    return StreamingResponse(
        io.BytesIO(data),
        media_type="application/zip",
        headers={
            "Content-Disposition": f'attachment; filename="face_{face_id}.zip"',
            "Content-Length": str(len(data)),
        },
    )
--- a/dashboard/static/index.html
+++ b/dashboard/static/index.html
@ -123,7 +123,7 @@
  <div class="tab active" onclick="switchTab('operations')">Operations</div>
  <div class="tab" onclick="switchTab('voice')">Voice & Audio</div>
  <div class="tab" onclick="switchTab('motion')">Motion & Replay</div>
-  <div class="tab" onclick="switchTab('vision')">Camera & Vision</div>
+  <div class="tab" onclick="switchTab('recognition')">Recognition</div>
  <div class="tab" onclick="switchTab('recordings')">Recordings</div>
  <div class="tab" onclick="switchTab('settings')">Settings & Logs</div>
 </div>
@ -211,23 +211,6 @@
    <div style="margin-top:.5rem;font-size:.72rem;color:var(--dim)" id="audio-status-text"></div>
  </div>
  <!-- Live Camera Feed -->
  <div class="card">
    <h3><svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><path d="M23 7l-7 5 7 5V7z"/><rect x="1" y="5" width="15" height="14" rx="2"/></svg>Live Camera</h3>
    <canvas id="camera-feed" width="640" height="480" style="width:100%;max-height:260px;border-radius:8px;background:#000"></canvas>
    <div class="row" style="margin-top:.4rem">
      <button class="btn btn-success btn-sm" onclick="startLocalCam(this)">Start</button>
      <button class="btn btn-danger btn-sm" onclick="stopLocalCam(this)">Stop</button>
      <button class="btn btn-ghost btn-sm" onclick="reconnectCamera()">Reconnect</button>
      <span id="ops-cam-state" class="badge" style="margin-left:.3rem"></span>
    </div>
    <div class="row" style="margin-top:.3rem">
      <button class="btn btn-primary" onclick="capturePhoto(this)">Capture</button>
      <select id="capture-gesture" style="width:130px"><option value="">No gesture</option></select>
      <button class="btn btn-success" onclick="captureWithMotion(this)">Capture + Gesture</button>
    </div>
  </div>
  <!-- Motion Quick Panel -->
  <div class="card">
    <h3><svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><path d="M18 8A6 6 0 0 0 6 8c0 7-3 9-3 9h18s-3-2-3-9"/><path d="M13.73 21a2 2 0 0 1-3.46 0"/></svg>Quick Actions</h3>
@ -288,6 +271,7 @@
    <div class="row">
      <button class="btn btn-success" onclick="startLiveSub(this)">Start</button>
      <button class="btn btn-danger" onclick="stopLiveSub(this)">Stop</button>
      <button id="ls-cam-btn" class="btn btn-sm btn-ghost" onclick="toggleGeminiCamera(this)" title="Stream camera frames to Gemini Live — same toggle as the Recognition tab">Camera: --</button>
      <span id="ls-state" class="badge"></span>
      <button class="btn btn-sm mic-mute-shortcut btn-success" onclick="toggleMic()" style="margin-left:auto">Mic: LIVE</button>
      <button class="btn btn-sm spk-mute-shortcut btn-success" onclick="toggleSpeaker()">Speaker: LIVE</button>
@ -470,78 +454,102 @@
 </div>
 </div>
-<!-- ==================== TAB: Camera & Vision ==================== -->
+<!-- ==================== TAB: Recognition ==================== -->
-<div class="tab-content" id="tab-vision">
+<div class="tab-content" id="tab-recognition">
 <div class="grid">
-  <!-- Camera Devices -->
+  <!-- Status & Toggles -->
  <div class="card">
    <h3>Camera Device</h3>
    <div class="row">
      <label>Profile</label>
      <select id="camdev-profile" style="flex:1" onchange="selectCamProfile(this.value)">
        <option value="">Loading...</option>
      </select>
      <button class="btn btn-ghost btn-sm" onclick="scanCameras(this)" title="Re-scan plugged cameras">Scan</button>
    </div>
    <div id="camdev-detected" style="margin-top:.3rem;font-size:.65rem;color:var(--dim)"></div>
    <details style="margin-top:.5rem">
      <summary style="cursor:pointer;font-size:.72rem;color:var(--dim)">All plugged cameras</summary>
      <div id="camdev-list" style="margin-top:.3rem;font-size:.7rem"></div>
    </details>
    <details style="margin-top:.4rem">
      <summary style="cursor:pointer;font-size:.72rem;color:var(--dim)">Pin RealSense serial to slot</summary>
      <div class="row" style="margin-top:.3rem">
        <select id="camdev-pin-profile" style="flex:1"></select>
        <input id="camdev-pin-serial" placeholder="Serial..." style="flex:1">
        <button class="btn btn-primary btn-sm" onclick="pinCamSerial(this)">Pin</button>
      </div>
      <div style="font-size:.65rem;color:var(--dim);margin-top:.2rem">Use this when you have two RealSense units and want to lock which one is "primary".</div>
    </details>
    <div class="row" style="margin-top:.5rem">
      <button class="btn btn-success btn-sm" onclick="startLocalCam(this)">Start Capture</button>
      <button class="btn btn-danger btn-sm" onclick="stopLocalCam(this)">Stop Capture</button>
      <span id="local-cam-state" class="badge"></span>
    </div>
    <div style="margin-top:.5rem;font-size:.72rem;color:var(--dim)" id="camdev-status"></div>
  </div>
  <!-- Camera Config (legacy) -->
  <div class="card">
    <h3>Camera Configuration</h3>
    <div class="row"><label>Source</label><select id="cam-source" style="flex:1" onchange="setCamSource(this.value)"></select></div>
    <div class="row">
      <label>Resolution</label>
      <input id="cam-w" type="number" value="640" style="width:60px"> <span style="color:var(--dim)">x</span>
      <input id="cam-h" type="number" value="480" style="width:60px"> <span style="color:var(--dim)">@</span>
      <input id="cam-fps" type="number" value="30" style="width:45px"> <span style="color:var(--dim)">fps</span>
      <button class="btn btn-primary btn-sm" onclick="setCamRes()">Set</button>
    </div>
    <div class="row"><label>RealSense</label><input id="cam-serial" placeholder="Serial number" style="flex:1"><button class="btn btn-ghost btn-sm" onclick="setPreferredCam()">Save</button></div>
    <div style="margin-top:.4rem"><a href="/api/vision/preview.mjpg" target="_blank" style="font-size:.72rem;color:var(--accent);text-decoration:none">Open MJPEG Stream in new tab</a></div>
  </div>
  <!-- YOLO Detector -->
  <div class="card">
    <h3>YOLO Vision Detector</h3>
    <div class="row">
      <button class="btn btn-primary" onclick="loadDetector(this)">Load Model</button>
      <button class="btn btn-success" onclick="runDetection(this)">Detect Now</button>
      <span id="yolo-status" class="badge"></span>
    </div>
    <div id="yolo-result" style="margin-top:.4rem;font-size:.72rem;color:var(--muted)"></div>
  </div>
  <!-- Photo Gallery -->
  <div class="card card-full">
-    <h3>Photo Gallery</h3>
+    <h3>Camera Vision &amp; Face Recognition</h3>
-    <div class="row">
+    <div class="row" style="gap:1rem;flex-wrap:wrap">
-      <button class="btn btn-ghost btn-sm" onclick="refreshPhotos()">Refresh</button>
+      <div class="row" style="gap:.4rem">
-      <button class="btn btn-primary btn-sm" onclick="downloadAllPhotos()">Download ZIP</button>
+        <label style="min-width:7rem">Camera Vision</label>
-      <button class="btn btn-danger btn-sm" onclick="clearPhotos()">Clear All</button>
+        <label class="switch">
-      <span id="photo-count" style="margin-left:auto;font-size:.72rem;color:var(--muted)"></span>
+          <input type="checkbox" id="rec-vision-toggle" onchange="setVisionEnabled(this.checked)">
          <span class="slider"></span>
        </label>
        <span id="rec-camera-status" class="badge" style="margin-left:.5rem">--</span>
      </div>
      <div class="row" style="gap:.4rem">
        <label style="min-width:7rem">Face Recognition</label>
        <label class="switch">
          <input type="checkbox" id="rec-facerec-toggle" onchange="setFaceRecEnabled(this.checked)">
          <span class="slider"></span>
        </label>
        <span id="rec-facerec-status" class="badge" style="margin-left:.5rem">--</span>
      </div>
      <button class="btn btn-ghost btn-sm" onclick="syncGallery(this)" title="Re-send gallery to live Gemini session">↻ Sync Gallery</button>
    </div>
-    <div class="gallery-grid" id="photo-gallery"><div class="empty">No photos yet</div></div>
+    <div style="margin-top:.4rem;font-size:.7rem;color:var(--dim)" id="rec-status-line">
      Toggles take effect within ~1 second on the running Gemini session — no restart required.
    </div>
  </div>
  <!-- Live Preview -->
  <div class="card">
    <h3>Live Preview</h3>
    <div id="rec-preview-wrap" style="background:#000;border-radius:.4rem;overflow:hidden;text-align:center;min-height:180px;display:flex;align-items:center;justify-content:center">
      <img id="rec-preview-img" src="" alt="" style="max-width:100%;display:none">
      <div id="rec-preview-empty" style="color:var(--dim);font-size:.75rem;padding:1rem">Camera off — toggle Vision ON to see the live feed.</div>
    </div>
    <div style="margin-top:.3rem;font-size:.65rem;color:var(--dim)" id="rec-preview-meta">--</div>
    <div style="margin-top:.45rem">
      <div style="font-size:.7rem;color:var(--dim);margin-bottom:.2rem">Resolution / FPS</div>
      <div class="row" style="gap:.25rem;flex-wrap:wrap" id="rec-res-buttons">
        <button class="btn btn-ghost btn-sm" data-w="424" data-h="240" data-fps="15" onclick="setCameraMode(this)">424×240 · 15</button>
        <button class="btn btn-ghost btn-sm" data-w="424" data-h="240" data-fps="30" onclick="setCameraMode(this)">424×240 · 30</button>
        <button class="btn btn-ghost btn-sm" data-w="640" data-h="480" data-fps="15" onclick="setCameraMode(this)">640×480 · 15</button>
        <button class="btn btn-ghost btn-sm" data-w="640" data-h="480" data-fps="30" onclick="setCameraMode(this)">640×480 · 30</button>
        <button class="btn btn-ghost btn-sm" data-w="1280" data-h="720" data-fps="15" onclick="setCameraMode(this)">1280×720 · 15</button>
        <button class="btn btn-ghost btn-sm" data-w="1920" data-h="1080" data-fps="8" onclick="setCameraMode(this)">1920×1080 · 8</button>
      </div>
      <div style="font-size:.7rem;color:var(--dim);margin:.35rem 0 .2rem">JPEG Quality</div>
      <div class="row" style="gap:.25rem" id="rec-quality-buttons">
        <button class="btn btn-ghost btn-sm" data-q="50" onclick="setCameraQuality(this)">Low</button>
        <button class="btn btn-ghost btn-sm" data-q="70" onclick="setCameraQuality(this)">Med</button>
        <button class="btn btn-ghost btn-sm" data-q="85" onclick="setCameraQuality(this)">High</button>
      </div>
    </div>
    <div style="margin-top:.3rem;font-size:.6rem;color:var(--dim)">
      Each button rebuilds the capture pipeline (~0.5 s). Modes match the
      RealSense D435I colour sensor — on USB 2.x, stick to 424×240 or 640×480.
      If the feed is grayscale/IR, pin the colour node with <code>SANAD_CAMERA_USB_INDEX</code>.
    </div>
  </div>
  <!-- Add New Face -->
  <div class="card">
    <h3>Add New Face</h3>
    <div class="row">
      <label>Name</label>
      <input id="rec-newface-name" placeholder="(optional)" style="flex:1">
    </div>
    <div style="margin-top:.4rem">
      <label style="font-size:.72rem;color:var(--dim)">Description — who is this person? (Gemini reads it)</label>
      <textarea id="rec-newface-desc" rows="2" placeholder="e.g. Qassam, lead engineer on the robotics team — likes coffee" style="width:100%;margin-top:.2rem;font-size:.78rem;resize:vertical"></textarea>
    </div>
    <div class="row" style="margin-top:.4rem">
      <button class="btn btn-success btn-sm" onclick="enrollFromCamera(this)" title="Snap current frame">📷 Capture</button>
      <label class="btn btn-primary btn-sm" style="cursor:pointer;margin:0">
        📁 Upload images
        <input type="file" id="rec-upload-input" multiple accept="image/jpeg,image/png" style="display:none" onchange="enrollFromUpload(this)">
      </label>
    </div>
    <div style="margin-top:.4rem;font-size:.65rem;color:var(--dim)">
      Tip: add 2–3 photos / different angles per person for best recognition.
      The description is sent to Gemini with the photos — it can then greet
      and talk about the person using what you wrote.
    </div>
  </div>
  <!-- Enrolled Faces -->
  <div class="card card-full">
    <h3>Enrolled Faces <span id="rec-faces-count" style="font-weight:normal;color:var(--dim);font-size:.75rem"></span></h3>
    <div class="row">
      <button class="btn btn-ghost btn-sm" onclick="refreshFaces()">↻ Refresh</button>
      <span style="margin-left:auto;font-size:.65rem;color:var(--dim)" id="rec-gallery-version"></span>
    </div>
    <div id="rec-faces-list" style="margin-top:.6rem"><div class="empty">Loading…</div></div>
  </div>
 </div>
@ -621,7 +629,14 @@ function btnDone(b){if(b&&b.classList)b.classList.remove('loading');}
 async function api(m,p,b){const o={method:m,headers:{'Content-Type':'application/json'}};if(b)o.body=JSON.stringify(b);const r=await fetch(API+p,o);const j=await r.json();if(!r.ok){toast(j.detail||j.error||'Error '+r.status,'err');throw new Error(j.detail||j.error);}return j;}
 // Tabs
-function switchTab(name){document.querySelectorAll('.tab').forEach(t=>t.classList.toggle('active',t.textContent.toLowerCase().includes(name.slice(0,4))));document.querySelectorAll('.tab-content').forEach(c=>c.classList.toggle('active',c.id==='tab-'+name));}
+function switchTab(name){
  // Match the nav tab by its exact onclick target — NOT a substring of the
  // label. "recognition" and "recordings" both start with "reco", so the old
  // textContent.includes(name.slice(0,4)) lit up both tabs at once.
  const want="switchTab('"+name+"')";
  document.querySelectorAll('.tab').forEach(t=>t.classList.toggle('active',(t.getAttribute('onclick')||'').includes(want)));
  document.querySelectorAll('.tab-content').forEach(c=>c.classList.toggle('active',c.id==='tab-'+name));
 }
 // Emergency Stop
 async function emergencyStop(){try{await api('POST','/api/replay/cancel');await api('POST','/api/live-voice/stop');toast('EMERGENCY STOP sent','err');}catch(e){}}
@ -894,155 +909,6 @@ async function applyManualAudio(b){
  refreshAudioDevices();
 }
 // Camera
 async function capturePhoto(b){btnLoad(b);try{await api('POST','/api/vision/capture');toast('Photo captured','ok');refreshPhotos();}catch(e){}btnDone(b);}
 async function captureWithMotion(b){btnLoad(b);const g=document.getElementById('capture-gesture').value;const url=g?'/api/vision/capture?motion_file='+encodeURIComponent(g):'/api/vision/capture';try{await api('POST',url);toast('Captured'+(g?' + gesture':''),'ok');refreshPhotos();}catch(e){}btnDone(b);}
 async function refreshCamSources(){try{const r=await api('GET','/api/vision/cameras');const sel=document.getElementById('cam-source');sel.innerHTML='<option value="">Auto</option>'+(r||[]).map(c=>`<option value="${esc(c.source||c.serial||c.name)}">${esc(c.name||c.source||c.serial)}</option>`).join('');}catch(e){}}
 async function populateGestureSelect(){try{const r=await api('GET','/api/replay/files');const sel=document.getElementById('capture-gesture');sel.innerHTML='<option value="">No gesture</option>'+(r.files||[]).map(f=>`<option value="${esc(f.name)}">${esc(f.name)}</option>`).join('');}catch(e){}}
 async function setCamSource(v){if(v)try{await api('POST','/api/vision/set-source',{source:v});toast('Camera source set','ok');}catch(e){}}
 async function setCamRes(){const w=+document.getElementById('cam-w').value,h=+document.getElementById('cam-h').value,f=+document.getElementById('cam-fps').value;try{await api('POST','/api/vision/set-resolution',{width:w,height:h,fps:f});toast(`${w}x${h}@${f}fps`,'ok');}catch(e){}}
 async function setPreferredCam(){const s=document.getElementById('cam-serial').value;if(s)try{await api('POST','/api/vision/set-preferred-camera',{serial:s});toast('Saved','ok');}catch(e){}}
 // Photos
 async function refreshPhotos(){try{const r=await api('GET','/api/vision/photos');const el=document.getElementById('photo-gallery');document.getElementById('photo-count').textContent=`${r.total} photos`;if(!r.photos?.length){el.innerHTML='<div class="empty">No photos yet</div>';return;}el.innerHTML=r.photos.map(p=>{const n=esc(p.name);return`<img src="/api/vision/photos/${encodeURIComponent(p.name)}" title="${n}\n${p.size_kb}KB\n${p.created_at}" onclick="if(confirm('Delete ${n}?'))deletePhoto('${n}')">`;}).join('');}catch(e){}}
 async function deletePhoto(n){try{await api('DELETE','/api/vision/photos/'+encodeURIComponent(n));toast('Deleted','ok');refreshPhotos();}catch(e){}}
 function downloadAllPhotos(){window.open('/api/vision/photos/download-zip');}
 async function clearPhotos(){if(confirm('Delete ALL photos?'))try{const r=await api('POST','/api/vision/photos/clear');toast(`Cleared ${r.deleted_count}`,'ok');refreshPhotos();}catch(e){}}
 // Camera devices
 async function refreshCamDevices(){
  try{
    const r=await api('GET','/api/vision/devices');
    const cur=r.current||{};
    const curDev=cur.device||{};
    const curId=cur.profile?cur.profile.id:'';
    const detIds=r.detected_ids||[];
    // Profile dropdown
    const profSel=document.getElementById('camdev-profile');
    profSel.innerHTML=(r.profiles||[]).map(p=>{
      const avail=detIds.indexOf(p.id)>=0;
      const sel=p.id===curId?' selected':'';
      const tag=avail?'':' (no device)';
      return `<option value="${esc(p.id)}"${sel}${avail?'':' disabled'}>${esc(p.label)}${tag}</option>`;
    }).join('');
    // Pin profile dropdown
    const pinSel=document.getElementById('camdev-pin-profile');
    pinSel.innerHTML=(r.profiles||[]).filter(p=>p.backend==='realsense').map(p=>
      `<option value="${esc(p.id)}">${esc(p.label)}</option>`).join('');
    // Detected summary
    const det=document.getElementById('camdev-detected');
    const counts=r.counts||{};
    det.innerHTML=`<strong>Detected:</strong> ${counts.realsense||0} RealSense, ${counts.v4l2||0} V4L2 (total ${counts.total||0})`;
    // All devices list
    const list=document.getElementById('camdev-list');
    if(!(r.all_devices||[]).length){
      list.innerHTML='<div class="empty">No cameras plugged</div>';
    }else{
      list.innerHTML='<table><tr><th>Backend</th><th>Name</th><th>Serial / Path</th><th></th></tr>'+
        r.all_devices.map(d=>{
          const idVal=d.serial||d.device_path;
          const action=d.serial
            ?`<button class="btn btn-primary btn-sm" onclick="selectCamSerial('${esc(d.serial)}')">Use</button>`
            :`<button class="btn btn-primary btn-sm" onclick="selectCamPath('${esc(d.device_path)}')">Use</button>`;
          return `<tr><td>${esc(d.backend)}</td><td>${esc(d.name||'-')}</td><td><code style="font-size:.65rem">${esc(idVal||'-')}</code></td><td>${action}</td></tr>`;
        }).join('')+'</table>';
    }
    // Status text
    const st=document.getElementById('camdev-status');
    if(curDev.name){
      st.innerHTML=`<strong>Active:</strong> ${esc(curDev.name)}<br>`+
        (curDev.serial?`Serial: <code>${esc(curDev.serial)}</code><br>`:'')+
        (curDev.device_path?`Path: <code>${esc(curDev.device_path)}</code><br>`:'')+
        `<span style="color:var(--muted)">via ${esc(cur.source_kind||'?')}</span>`;
    }else{
      st.innerHTML='<span style="color:#f55">No camera selected</span>';
    }
  }catch(e){}
 }
 async function scanCameras(b){
  if(b)btnLoad(b);
  try{await api('POST','/api/vision/devices/scan');toast('Re-scanned cameras','ok');}catch(e){}
  if(b)btnDone(b);
  refreshCamDevices();
 }
 async function selectCamProfile(profileId){
  if(!profileId)return;
  try{
    await api('POST','/api/vision/devices/select-profile',{profile_id:profileId});
    toast('Camera profile switched','ok');
  }catch(e){}
  refreshCamDevices();
 }
 async function selectCamSerial(serial){
  if(!serial)return;
  try{
    await api('POST','/api/vision/devices/select-serial',{serial});
    toast('Camera selected by serial','ok');
  }catch(e){}
  refreshCamDevices();
 }
 async function selectCamPath(path){
  if(!path)return;
  try{
    await api('POST','/api/vision/devices/select-path',{device_path:path});
    toast('Camera selected by path','ok');
  }catch(e){}
  refreshCamDevices();
 }
 async function startLocalCam(b){
  btnLoad(b);
  try{
    const r=await api('POST','/api/vision/local/start',{});
    if(r.ok){
      toast('Camera started: '+(r.backend||'?'),'ok');
    }else{
      toast('Camera start failed: '+(r.error||'unknown'),'err');
    }
  }catch(e){}
  btnDone(b);
  setTimeout(refreshLocalCam, 500);
 }
 async function stopLocalCam(b){
  btnLoad(b);
  try{await api('POST','/api/vision/local/stop');toast('Camera stopped','info');}catch(e){}
  btnDone(b);
  refreshLocalCam();
 }
 async function refreshLocalCam(){
  try{
    const r=await api('GET','/api/vision/local/status');
    const els=[document.getElementById('local-cam-state'),document.getElementById('ops-cam-state')];
    els.forEach(el=>{
      if(!el)return;
      if(r.running){
        el.textContent=(r.backend||'on')+(r.serial?(' '+r.serial.slice(-6)):'')+' '+r.width+'x'+r.height+'@'+r.fps;
        el.className='badge badge-ok';
      }else if(r.last_error){
        el.textContent='error';
        el.className='badge badge-err';
        el.title=r.last_error;
      }else{
        el.textContent='stopped';
        el.className='badge badge-warn';
      }
    });
  }catch(e){}
 }
 async function pinCamSerial(b){
  const pid=document.getElementById('camdev-pin-profile').value;
  const serial=document.getElementById('camdev-pin-serial').value.trim();
  if(!pid||!serial){toast('Pick a profile and enter a serial','err');return;}
  btnLoad(b);
  try{
    await api('POST','/api/vision/devices/assign-serial',{profile_id:pid,serial:serial});
    toast(`Pinned ${serial} → ${pid}`,'ok');
    document.getElementById('camdev-pin-serial').value='';
  }catch(e){}
  btnDone(b);
  refreshCamDevices();
 }
 // Motion
 async function toggleGestural(v){try{await api('POST','/api/motion/gestural-speaking?enabled='+v);}catch(e){}}
 let _armBusy=false,_runId=null;
@ -1212,10 +1078,10 @@ async function stopCombo(b){
 async function refreshReplayFiles(){try{const r=await api('GET','/api/replay/files');const el=document.getElementById('replay-files');if(!(r.files||[]).length){el.innerHTML='<div class="empty">No motion files</div>';return;}el.innerHTML='<table><tr><th>File</th><th>Frames</th><th>Duration</th><th>Size</th><th></th></tr>'+(r.files||[]).map(f=>`<tr><td>${esc(f.name)}</td><td>${f.frames}</td><td>${f.duration_sec}s</td><td>${f.size_kb}KB</td><td><button class="btn btn-primary btn-sm" onclick="document.getElementById('replay-name').value='${esc(f.name)}';testReplay()">Play</button> <button class="btn btn-danger btn-sm" onclick="deleteMotionFile('${esc(f.name)}')">Del</button></td></tr>`).join('')+'</table>';}catch(e){}}
 async function testReplay(b){const n=document.getElementById('replay-name').value,s=parseFloat(document.getElementById('replay-speed').value);if(!n)return;btnLoad(b);try{await api('POST','/api/replay/test',{name:n,speed:s});toast('Replay: '+n,'ok');pollArmBusy();}catch(e){}btnDone(b);}
 async function cancelReplay(){try{const r=await api('POST','/api/replay/cancel');toast(r&&r.message?r.message:'Cancelled','info');}catch(e){}}
-async function deleteMotionFile(n){if(confirm('Delete '+n+'?'))try{await api('DELETE','/api/replay/files/'+encodeURIComponent(n));toast('Deleted','ok');refreshReplayFiles();populateGestureSelect();}catch(e){}}
+async function deleteMotionFile(n){if(confirm('Delete '+n+'?'))try{await api('DELETE','/api/replay/files/'+encodeURIComponent(n));toast('Deleted','ok');refreshReplayFiles();}catch(e){}}
-async function uploadMotionFile(input){if(!input.files[0])return;const fd=new FormData();fd.append('file',input.files[0]);try{const r=await fetch('/api/replay/files/upload',{method:'POST',body:fd});if(!r.ok){const j=await r.json();toast(j.detail||'Upload failed','err');}else{toast('Uploaded','ok');refreshReplayFiles();populateGestureSelect();}}catch(e){toast('Upload error','err');}input.value='';}
+async function uploadMotionFile(input){if(!input.files[0])return;const fd=new FormData();fd.append('file',input.files[0]);try{const r=await fetch('/api/replay/files/upload',{method:'POST',body:fd});if(!r.ok){const j=await r.json();toast(j.detail||'Upload failed','err');}else{toast('Uploaded','ok');refreshReplayFiles();}}catch(e){toast('Upload error','err');}input.value='';}
 async function startTeaching(b){const n=document.getElementById('teach-name').value,d=parseFloat(document.getElementById('teach-duration').value);if(!n)return toast('Enter name','err');btnLoad(b);try{await api('POST','/api/replay/teach/start',{name:n,duration_sec:d});toast('Teaching: '+n,'ok');pollTeachStatus();}catch(e){}btnDone(b);}
-async function stopTeaching(b){btnLoad(b);try{const r=await api('POST','/api/replay/teach/stop');toast(`Saved: ${r.name} (${r.frames} frames)`,'ok');document.getElementById('teach-status').textContent=`Done: ${r.frames} frames`;refreshReplayFiles();populateGestureSelect();}catch(e){}btnDone(b);}
+async function stopTeaching(b){btnLoad(b);try{const r=await api('POST','/api/replay/teach/stop');toast(`Saved: ${r.name} (${r.frames} frames)`,'ok');document.getElementById('teach-status').textContent=`Done: ${r.frames} frames`;refreshReplayFiles();}catch(e){}btnDone(b);}
 let _teachPoll;function pollTeachStatus(){clearInterval(_teachPoll);_teachPoll=setInterval(async()=>{try{const r=await api('GET','/api/replay/teach/status');document.getElementById('teach-status').textContent=`${r.phase} | ${r.elapsed_sec}s | ${r.frames_recorded} frames`;if(!r.recording){clearInterval(_teachPoll);refreshReplayFiles();}}catch(e){clearInterval(_teachPoll);}},500);}
 // Scripts
@ -1288,11 +1154,6 @@ async function trReplayLast(b){btnLoad(b);try{await api('POST','/api/typed-repla
 async function trSaveLast(b){btnLoad(b);try{await api('POST','/api/typed-replay/save-last',{record_name:document.getElementById('tr-name').value});toast('Saved','ok');refreshTR();refreshRecords();}catch(e){}btnDone(b);}
 async function refreshTR(){try{const r=await api('GET','/api/typed-replay/status');const s=r.session||{};document.getElementById('tr-session').innerHTML=`<strong>Text:</strong> ${esc(s.text||'--')}<br><strong>Audio:</strong> ${s.has_audio?'Yes':'No'} | <strong>Capture:</strong> ${s.has_capture?'Yes':'No'}<br><strong>Replays:</strong> ${s.replay_count||0}<br><strong>Generated:</strong> ${s.generated_at||'--'}<br><strong>Saved:</strong> ${esc(s.saved_as||'--')}`;}catch(e){}}
 // YOLO
 async function loadDetector(b){btnLoad(b);try{const r=await api('POST','/api/detector/load');const el=document.getElementById('yolo-status');el.textContent=r.ok?'Loaded':'Failed';el.className='badge '+(r.ok?'badge-ok':'badge-err');toast(r.ok?'Model loaded':'Failed',r.ok?'ok':'err');}catch(e){}btnDone(b);}
 async function runDetection(b){btnLoad(b);try{const r=await api('POST','/api/detector/detect');document.getElementById('yolo-result').innerHTML=`<strong>Persons:</strong> ${r.person_count} | <strong>Faces:</strong> ${r.face_count} | <strong>Group:</strong> ${r.group_detected?'Yes ('+r.group_size+')':'No'} | <strong>Intent:</strong> ${r.intent_detected?'Yes':'No'} | ${r.detection_ms}ms`;}catch(e){document.getElementById('yolo-result').textContent='Detection failed';}btnDone(b);}
 async function refreshDetector(){try{const r=await api('GET','/api/detector/status');const el=document.getElementById('yolo-status');el.textContent=r.loaded?'Loaded':'Not loaded';el.className='badge '+(r.loaded?'badge-ok':'badge-warn');}catch(e){}}
 // Wake Phrases
 async function refreshWakeActions(){try{const r=await api('GET','/api/wake-phrases/');const sel=document.getElementById('wp-action');sel.innerHTML='<option value="">-- select action --</option>'+(r.actions||[]).map(a=>`<option value="${esc(a.action)}">${esc(a.action)} (${a.phrase_count})</option>`).join('');}catch(e){}}
 async function loadWakePhrases(action){if(!action)return;try{const r=await api('GET',`/api/wake-phrases/${encodeURIComponent(action)}`);const el=document.getElementById('wp-phrases');if(!(r.phrases||[]).length){el.innerHTML='<div class="empty">No phrases</div>';return;}el.innerHTML=(r.phrases||[]).map(p=>`<div class="row"><span style="flex:1;font-size:.78rem">${esc(p)}</span><button class="btn btn-danger btn-sm" onclick="removeWakePhrase(document.getElementById('wp-action').value,this.dataset.p)" data-p="${esc(p)}">X</button></div>`).join('');}catch(e){}}
@ -1386,10 +1247,6 @@ async function refreshStatus(){try{const s=await api('GET','/api/status');docume
 // WebSocket logs
 let logWs;function connectLogs(){const p=location.protocol==='https:'?'wss':'ws';logWs=new WebSocket(`${p}://${location.host}/ws/logs`);const box=document.getElementById('log-box');logWs.onmessage=e=>{box.textContent+=e.data+'\n';if(box.childNodes.length>1000)box.textContent=box.textContent.split('\n').slice(-500).join('\n');box.scrollTop=box.scrollHeight;};logWs.onclose=()=>setTimeout(connectLogs,3000);}
 // WebSocket camera
 let camWs;function connectCamera(){if(camWs&&camWs.readyState<=1)try{camWs.close();}catch(e){}const p=location.protocol==='https:'?'wss':'ws';camWs=new WebSocket(`${p}://${location.host}/ws/camera`);camWs.binaryType='arraybuffer';const canvas=document.getElementById('camera-feed'),ctx=canvas.getContext('2d');camWs.onmessage=e=>{const url=URL.createObjectURL(new Blob([e.data],{type:'image/jpeg'})),img=new Image();img.onload=()=>{ctx.drawImage(img,0,0,canvas.width,canvas.height);URL.revokeObjectURL(url);};img.onerror=()=>URL.revokeObjectURL(url);img.src=url;};camWs.onclose=()=>setTimeout(connectCamera,3000);}
 function reconnectCamera(){if(camWs)try{camWs.close();}catch(e){}setTimeout(connectCamera,300);toast('Camera reconnecting...','info');}
 // Auto-connect Gemini and auto-start Live Subprocess on page load
 async function autoConnectGemini(){
  try{
@ -1413,10 +1270,331 @@ async function autoStartLiveSub(){
  }catch(e){}
 }
 // ── Recognition tab (camera vision + face recognition) ──
 // Mirror of /api/recognition/state.vision_enabled — kept fresh by
 // refreshRecognition() so the Live-Gemini-panel Camera button can flip
 // it without a round-trip GET.
 let _recVisionEnabled=false;
 async function refreshRecognition(){
  try{
    const r=await api('GET','/api/recognition/state');
    _recVisionEnabled=!!r.vision_enabled;
    const v=document.getElementById('rec-vision-toggle');
    const f=document.getElementById('rec-facerec-toggle');
    if(v) v.checked=!!r.vision_enabled;
    if(f) f.checked=!!r.face_rec_enabled;
    const cs=document.getElementById('rec-camera-status');
    if(cs){
      const c=r.camera||{};
      cs.title=c.error||'';
      if(c.running&&c.backend){
        cs.textContent=c.backend+' '+(c.width||'')+'x'+(c.height||'')
          +(c.reconnect_count?(' ↻'+c.reconnect_count):'');
        cs.className='badge badge-ok';
      }else if(c.running&&!c.backend){
        // thread alive but between reconnect attempts (camera unplugged)
        cs.textContent='reconnecting…';cs.className='badge badge-warn';
      }else if(c.error){
        cs.textContent='error';cs.className='badge badge-warn';
      }else{
        cs.textContent='off';cs.className='badge';
      }
    }
    // Camera button in the Live Gemini Process panel (Voice & Audio tab) —
    // same toggle as the Recognition tab, surfaced where it's handy.
    const cb=document.getElementById('ls-cam-btn');
    if(cb){
      const c=r.camera||{};
      if(c.running&&c.backend){
        cb.textContent='Camera: ON';
        cb.className='btn btn-sm btn-success';
        cb.title='Streaming '+(c.backend||'')+' '+(c.width||'')+'x'+(c.height||'')+' to Gemini — click to turn off';
      }else if(c.running&&!c.backend){
        cb.textContent='Camera: …';
        cb.className='btn btn-sm btn-ghost';
        cb.title='Camera reconnecting…';
      }else if(r.vision_enabled&&c.error){
        cb.textContent='Camera: N/A';
        cb.className='btn btn-sm btn-danger';
        cb.title='Vision on but no camera backend: '+(c.error||'');
      }else{
        cb.textContent='Camera: OFF';
        cb.className='btn btn-sm btn-ghost';
        cb.title='Click to stream camera frames to Gemini Live';
      }
    }
    const fs=document.getElementById('rec-facerec-status');
    if(fs){fs.textContent=r.face_rec_enabled?'on':'off';fs.className='badge '+(r.face_rec_enabled?'badge-ok':'');}
    const fc=document.getElementById('rec-faces-count');
    if(fc) fc.textContent=`(${r.faces_count} faces, ${r.photos_count} photos)`;
    const gv=document.getElementById('rec-gallery-version');
    if(gv) gv.textContent='v.'+r.gallery_version;
    // toggle preview visibility — only when actively capturing (has a backend)
    const img=document.getElementById('rec-preview-img');
    const empty=document.getElementById('rec-preview-empty');
    const meta=document.getElementById('rec-preview-meta');
    const c2=r.camera||{};
    if(c2.running&&c2.backend){
      img.style.display='inline-block';empty.style.display='none';
      if(meta) meta.textContent=`${c2.width}x${c2.height} @ ${c2.fps}fps · seq=${c2.frame_seq}`;
    }else{
      img.style.display='none';empty.style.display='block';
      if(empty) empty.textContent=(c2.running&&!c2.backend)
        ? 'Camera reconnecting…'
        : 'Camera off — toggle Vision ON to see the live feed.';
      if(meta) meta.textContent='--';
    }
    // Highlight the active resolution / quality buttons to match the live
    // capture profile (works whether the camera is running or idle).
    document.querySelectorAll('#rec-res-buttons button').forEach(btn=>{
      const on = parseInt(btn.dataset.w)===c2.width
              && parseInt(btn.dataset.h)===c2.height
              && parseInt(btn.dataset.fps)===c2.fps;
      btn.className='btn btn-sm '+(on?'btn-primary':'btn-ghost');
    });
    document.querySelectorAll('#rec-quality-buttons button').forEach(btn=>{
      const on = parseInt(btn.dataset.q)===c2.jpeg_quality;
      btn.className='btn btn-sm '+(on?'btn-primary':'btn-ghost');
    });
  }catch(e){}
 }
 // Resolution / FPS button menu — each click POSTs one mode and the
 // CameraDaemon rebuilds the pipeline at it. refreshRecognition() then
 // highlights whichever button matches the live profile.
 async function setCameraMode(btn){
  btnLoad(btn);
  try{
    const body={
      width:  parseInt(btn.dataset.w),
      height: parseInt(btn.dataset.h),
      fps:    parseInt(btn.dataset.fps),
    };
    const r=await api('POST','/api/recognition/camera-config',body);
    const p=r.profile||body;
    toast(`Camera → ${p.width}×${p.height} @ ${p.fps}fps`,'ok');
    refreshRecognition();
  }catch(e){toast('Resolution change failed: '+(e.message||e),'err');}
  btnDone(btn);
 }
 async function setCameraQuality(btn){
  btnLoad(btn);
  try{
    const q=parseInt(btn.dataset.q);
    await api('POST','/api/recognition/camera-config',{jpeg_quality:q});
    toast('JPEG quality → '+q,'ok');
    refreshRecognition();
  }catch(e){toast('Quality change failed: '+(e.message||e),'err');}
  btnDone(btn);
 }
 // Camera button in the Live Gemini Process panel — flips the same
 // vision toggle the Recognition tab owns. _recVisionEnabled is the
 // last-known state (refreshed every 5 s by refreshRecognition).
 async function toggleGeminiCamera(b){
  if(b) btnLoad(b);
  const next=!_recVisionEnabled;
  try{
    const r=await api('POST','/api/recognition/vision?on='+(next?'1':'0'));
    _recVisionEnabled=!!(r&&r.vision_enabled);
    toast(next?'Camera ON for Gemini':'Camera OFF for Gemini','ok');
  }catch(e){
    toast('Camera toggle failed: '+(e.message||e),'err');
  }
  if(b) btnDone(b);
  refreshRecognition();   // refresh both the panel button + the Recognition tab
 }
 async function setVisionEnabled(on){
  try{
    const r=await api('POST','/api/recognition/vision?on='+(on?'1':'0'));
    toast(on?'Vision ON':'Vision OFF','ok');
    refreshRecognition();
  }catch(e){
    toast('Vision toggle failed: '+(e.message||e),'err');
    refreshRecognition();
  }
 }
 async function setFaceRecEnabled(on){
  try{
    const r=await api('POST','/api/recognition/face-rec?on='+(on?'1':'0'));
    toast(on?'Face Recognition ON':'Face Recognition OFF','ok');
    if(r&&r.warning) toast(r.warning,'info');
    refreshRecognition();
  }catch(e){
    toast('Face Rec toggle failed: '+(e.message||e),'err');
    refreshRecognition();
  }
 }
 async function syncGallery(b){
  if(b) btnLoad(b);
  try{await api('POST','/api/recognition/sync');toast('Gallery sync requested','ok');refreshRecognition();}
  catch(e){toast('Sync failed','err');}
  if(b) btnDone(b);
 }
 // Preview poller — bumps the img src each tick to defeat caching.
 let _recPreviewTimer=null;
 function startRecPreview(){
  if(_recPreviewTimer) return;
  const tick=()=>{
    const img=document.getElementById('rec-preview-img');
    if(img && img.style.display!=='none'){
      img.src='/api/recognition/frame.jpg?t='+Date.now();
    }
  };
  tick();
  _recPreviewTimer=setInterval(tick,500);
 }
 function stopRecPreview(){if(_recPreviewTimer){clearInterval(_recPreviewTimer);_recPreviewTimer=null;}}
 // Hook into tab switch — start/stop preview when recognition tab is active.
 (function(){
  const origSwitchTab=window.switchTab;
  window.switchTab=function(name){
    origSwitchTab(name);
    if(name==='recognition'){refreshRecognition();refreshFaces();startRecPreview();}
    else{stopRecPreview();}
  };
 })();
 // Face CRUD stubs — filled in milestone 5
 async function refreshFaces(){
  const el=document.getElementById('rec-faces-list');
  if(!el) return;
  try{
    const r=await api('GET','/api/recognition/faces');
    if(!r.faces||!r.faces.length){el.innerHTML='<div class="empty">No faces enrolled yet</div>';return;}
    el.innerHTML=r.faces.map(f=>renderFaceCard(f)).join('');
  }catch(e){
    el.innerHTML='<div class="empty">(face gallery not yet wired)</div>';
  }
 }
 function renderFaceCard(f){
  const name=f.name||`(face_${f.id})`;
  const photos=(f.photos||[]).map(p=>{
    const url=`/api/recognition/faces/${f.id}/photo/${encodeURIComponent(p.name)}`;
    return `<div style="display:inline-block;margin:.2rem;text-align:center">
      <img src="${url}?t=${Date.now()}" alt="${esc(p.name)}" style="width:72px;height:72px;object-fit:cover;border-radius:.3rem;background:#222"/>
      <div style="font-size:.6rem;color:var(--dim);margin-top:.1rem">
        <a href="${url}?download=1" download style="color:var(--accent);text-decoration:none">⬇</a>
        <a href="#" onclick="deletePhoto(${f.id},'${esc(p.name)}');return false" style="color:var(--err);text-decoration:none;margin-left:.3rem">🗑</a>
      </div>
    </div>`;
  }).join('');
  return `<div class="card" style="margin-top:.5rem">
    <div class="row" style="align-items:center">
      <strong>face_${f.id}</strong>
      <span style="color:var(--dim)">—</span>
      <span id="rec-name-${f.id}" style="flex:1">${esc(name)}</span>
      <button class="btn btn-ghost btn-sm" onclick="renameFace(${f.id})" title="Rename">✏</button>
      <span style="color:var(--dim);font-size:.7rem">${(f.photos||[]).length} photo(s)</span>
    </div>
    <div style="margin-top:.25rem;font-size:.72rem">
      <span style="color:var(--dim)">Description:</span>
      <span id="rec-desc-${f.id}" style="color:var(--muted)">${f.description?esc(f.description):''}</span>${f.description?'':'<span style="color:var(--dim)">(none — no extra context for Gemini)</span>'}
      <button class="btn btn-ghost btn-sm" onclick="describeFace(${f.id})" title="Edit description Gemini sees">✏</button>
    </div>
    <div style="margin-top:.3rem">${photos}</div>
    <div class="row" style="margin-top:.4rem">
      <button class="btn btn-success btn-sm" onclick="captureToFace(${f.id},this)">📷 Capture</button>
      <label class="btn btn-primary btn-sm" style="cursor:pointer;margin:0">
        📁 Upload
        <input type="file" multiple accept="image/jpeg,image/png" style="display:none" onchange="uploadToFace(${f.id},this)">
      </label>
      <a class="btn btn-ghost btn-sm" href="/api/recognition/faces/${f.id}/download.zip" download>⬇ ZIP</a>
      <button class="btn btn-danger btn-sm" style="margin-left:auto" onclick="deleteFace(${f.id})">🗑 Delete face</button>
    </div>
  </div>`;
 }
 // Build the ?name=&description= query string from the Add-New-Face inputs.
 function _newFaceQuery(){
  const name=document.getElementById('rec-newface-name').value.trim();
  const desc=document.getElementById('rec-newface-desc').value.trim();
  const qs=[];
  if(name) qs.push('name='+encodeURIComponent(name));
  if(desc) qs.push('description='+encodeURIComponent(desc));
  return qs.length?('?'+qs.join('&')):'';
 }
 function _clearNewFaceInputs(){
  document.getElementById('rec-newface-name').value='';
  document.getElementById('rec-newface-desc').value='';
 }
 async function enrollFromCamera(b){
  btnLoad(b);
  try{
    const r=await api('POST','/api/recognition/faces/enroll'+_newFaceQuery());
    toast('Enrolled face_'+r.face.id+(r.face.description?' (with description)':''),'ok');
    _clearNewFaceInputs();
    refreshFaces();refreshRecognition();
  }catch(e){toast('Enroll failed: '+(e.message||e),'err');}
  btnDone(b);
 }
 async function enrollFromUpload(input){
  const files=input.files;if(!files||!files.length)return;
  const fd=new FormData();for(const f of files) fd.append('files',f);
  try{
    const resp=await fetch('/api/recognition/faces/upload'+_newFaceQuery(),{method:'POST',body:fd});
    if(!resp.ok)throw new Error(await resp.text());
    const r=await resp.json();
    toast('Uploaded face_'+r.face.id+' ('+files.length+' photos'+(r.face.description?', with description':'')+')','ok');
    _clearNewFaceInputs();
    input.value='';
    refreshFaces();refreshRecognition();
  }catch(e){toast('Upload failed: '+(e.message||e),'err');}
 }
 async function captureToFace(id,b){
  btnLoad(b);
  try{await api('POST','/api/recognition/faces/'+id+'/capture');toast('Added photo','ok');refreshFaces();}
  catch(e){toast('Capture failed','err');}
  btnDone(b);
 }
 async function uploadToFace(id,input){
  const files=input.files;if(!files||!files.length)return;
  const fd=new FormData();for(const f of files) fd.append('files',f);
  try{
    const resp=await fetch('/api/recognition/faces/'+id+'/upload',{method:'POST',body:fd});
    if(!resp.ok)throw new Error(await resp.text());
    toast('Uploaded '+files.length+' photo(s)','ok');
    input.value='';
    refreshFaces();
  }catch(e){toast('Upload failed: '+(e.message||e),'err');}
 }
 async function renameFace(id){
  const el=document.getElementById('rec-name-'+id);if(!el)return;
  const cur=el.textContent.replace(/^\((.*)\)$/,'$1');
  const next=prompt('New name (blank to clear):',cur==='face_'+id?'':cur);
  if(next===null) return;
  try{
    await api('POST','/api/recognition/faces/'+id+'/rename',{name:next});
    toast('Renamed','ok');refreshFaces();
  }catch(e){toast('Rename failed','err');}
 }
 async function describeFace(id){
  const el=document.getElementById('rec-desc-'+id);
  const cur=el?el.textContent.trim():'';
  const next=prompt('Description for Gemini — who is this person? '+
                    '(blank to clear)',cur);
  if(next===null) return;
  try{
    await api('POST','/api/recognition/faces/'+id+'/describe',{description:next});
    toast(next.trim()?'Description saved':'Description cleared','ok');
    refreshFaces();
  }catch(e){toast('Save failed: '+(e.message||e),'err');}
 }
 async function deletePhoto(id,name){
  if(!confirm('Delete photo '+name+'?'))return;
  try{
    await api('DELETE','/api/recognition/faces/'+id+'/photo/'+encodeURIComponent(name));
    toast('Photo deleted','ok');refreshFaces();
  }catch(e){toast('Delete failed: '+(e.message||e),'err');}
 }
 async function deleteFace(id){
  if(!confirm('Delete face_'+id+' and all photos?'))return;
  try{
    await api('DELETE','/api/recognition/faces/'+id);
    toast('Face deleted','ok');refreshFaces();refreshRecognition();
  }catch(e){toast('Delete failed','err');}
 }
 // Init — vision/camera/detector fetches removed; those endpoints were deleted.
-refreshStatus();refreshSystem();refreshAudio();refreshAudioDevices();refreshSkills();refreshReplayFiles();refreshScripts();refreshPrompt();refreshRecords();populateGestureSelect();refreshLiveVoice();refreshLiveSub();refreshTR();refreshWakeActions();refreshApiKey();refreshCombo();connectLogs();
+refreshStatus();refreshSystem();refreshAudio();refreshAudioDevices();refreshSkills();refreshReplayFiles();refreshScripts();refreshPrompt();refreshRecords();refreshLiveVoice();refreshLiveSub();refreshTR();refreshWakeActions();refreshApiKey();refreshCombo();refreshRecognition();connectLogs();
 setTimeout(autoConnectGemini,2000);setTimeout(autoStartLiveSub,3000);
-setInterval(refreshStatus,5000);setInterval(refreshSystem,30000);setInterval(refreshLiveVoice,5000);setInterval(refreshLiveSub,5000);
+setInterval(refreshStatus,5000);setInterval(refreshSystem,30000);setInterval(refreshLiveVoice,5000);setInterval(refreshLiveSub,5000);setInterval(refreshRecognition,5000);
 </script>
 </body>
 </html>
--- a/gemini/script.py
+++ b/gemini/script.py
@ -19,8 +19,13 @@ from __future__ import annotations
 import array
 import asyncio
 import base64
 import json
 import os
 import sys
 import threading
 import time
 from pathlib import Path
 from typing import Any, Optional
 import numpy as np
@ -29,6 +34,7 @@ from google import genai
 from google.genai import types
 from Project.Sanad.config import (
    BASE_DIR,
    CHUNK_SIZE,
    GEMINI_API_KEY,
    GEMINI_VOICE,
@ -37,6 +43,7 @@ from Project.Sanad.config import (
 )
 from Project.Sanad.core.config_loader import section as _cfg_section
 from Project.Sanad.core.logger import get_logger
 from Project.Sanad.vision import recognition_state as _recog_state
 log = get_logger("gemini_brain")
@ -57,6 +64,93 @@ _NO_MESSAGES_TIMEOUT = _SV.get("no_messages_timeout_sec", 30)
 _CHUNK_BYTES = CHUNK_SIZE * 2
 _SILENCE_PCM = b"\x00" * _CHUNK_BYTES
 # ── Recognition (camera + face gallery) tunables ──
 _RECOG_STATE_PATH = Path(os.environ.get(
    "SANAD_RECOGNITION_STATE_PATH",
    str(BASE_DIR / "data" / ".recognition_state.json"),
 ))
 _VISION_SEND_HZ = float(os.environ.get("SANAD_VISION_SEND_HZ", "2"))
 _VISION_STALE_MS = int(os.environ.get("SANAD_VISION_STALE_MS", "1500"))
 _RECOG_POLL_S = float(os.environ.get("SANAD_RECOGNITION_POLL_S", "1.0"))
 _FACES_DIR = Path(os.environ.get(
    "SANAD_FACES_DIR",
    str(BASE_DIR / "data" / "faces"),
 ))
 _FACES_MAX_SAMPLES = int(os.environ.get("SANAD_FACES_MAX_SAMPLES", "3"))
 _FACES_PRIMER_RESIZE = int(os.environ.get("SANAD_FACES_PRIMER_RESIZE", "256"))
 # ── stdin push channel (Marcus pattern) ──────────────────────
 # The GeminiSubprocess supervisor writes two line types to this process's
 # stdin:
 #   "frame:<base64-jpeg>\n"   — a camera frame to relay to Gemini Live
 #   "state:<json>\n"          — a motion-state update to inject as text
 # A daemon thread parses them into the caches below; the asyncio tasks
 # _send_frame_loop / _send_state_loop drain those caches.
 _LATEST_FRAME_LOCK = threading.Lock()
 _LATEST_FRAME: dict = {"bytes": None, "ts": 0.0}
 _STATE_LOCK = threading.Lock()
 _STATE_PENDING: list[str] = []
 _STATE_TAGS = {
    "start":       "[STATE-START]",
    "complete":    "[STATE-DONE]",
    "interrupted": "[STATE-INTERRUPTED]",
    "error":       "[STATE-ERROR]",
    "paused":      "[STATE-PAUSED]",
    "resumed":     "[STATE-RESUMED]",
 }
 def _stdin_watcher() -> None:
    """Daemon thread — parse 'frame:' / 'state:' lines off stdin.
    Best-effort: any malformed line is skipped. Exits when the parent
    closes our stdin (subprocess teardown)."""
    try:
        for line in sys.stdin:
            line = line.rstrip("\n")
            if not line:
                continue
            if line.startswith("frame:"):
                b64 = line[len("frame:"):]
                try:
                    data = base64.b64decode(b64)
                except Exception:
                    continue
                if data:
                    with _LATEST_FRAME_LOCK:
                        _LATEST_FRAME["bytes"] = data
                        _LATEST_FRAME["ts"] = time.time()
            elif line.startswith("state:"):
                try:
                    payload = json.loads(line[len("state:"):])
                except Exception:
                    continue
                event = (payload.get("event") or "").strip().lower()
                cmd = (payload.get("cmd") or "").strip()
                tag = _STATE_TAGS.get(event)
                if not tag or not cmd:
                    continue
                msg = f"{tag} {cmd}"
                elapsed = payload.get("elapsed_sec")
                if isinstance(elapsed, (int, float)):
                    msg += f" ({float(elapsed):.1f}s)"
                reason = payload.get("reason")
                if reason and event == "error":
                    msg += f" — {reason}"
                with _STATE_LOCK:
                    _STATE_PENDING.append(msg)
    except Exception:
        return
 # Start the watcher at import time — it blocks harmlessly on sys.stdin
 # until the supervisor sends something. Daemon so it never blocks exit.
 threading.Thread(target=_stdin_watcher, daemon=True, name="stdin-watcher").start()
 def _audio_energy(pcm: bytes) -> int:
    try:
@ -86,6 +180,19 @@ class GeminiBrain:
        self._ai_speak_start = 0.0
        self._last_ai_audio = 0.0
        self._done: Optional[asyncio.Event] = None
        # ── Recognition flags — kept in sync with the state file by
        # _recognition_state_watcher. Boot defaults come from the file (or
        # the SANAD_* env vars if the file is missing).
        _initial = _recog_state.read(_RECOG_STATE_PATH)
        self._vision_enabled = bool(
            _initial.vision_enabled
            or os.environ.get("SANAD_VISION_ENABLE", "0") == "1"
        )
        self._face_rec_enabled = bool(
            _initial.face_rec_enabled
            or os.environ.get("SANAD_FACE_RECOGNITION_ENABLE", "0") == "1"
        )
        self._gallery_version_primed = -1   # bumped after first successful primer
    def stop(self) -> None:
        """Signal the run loop to exit at the next opportunity."""
@ -116,12 +223,19 @@ class GeminiBrain:
                    consecutive_errors = 0
                    self._mic.flush()
                    self._done = asyncio.Event()
                    # Reset per-session primer state so re-priming on reconnect
                    # actually happens. The state watcher will re-prime as soon
                    # as it sees vision+face-rec enabled.
                    self._gallery_version_primed = -1
                    try:
                        await asyncio.wait_for(
                            asyncio.gather(
                                self._send_mic_loop(session),
                                self._receive_loop(session),
                                self._send_frame_loop(session),
                                self._send_state_loop(session),
                                self._recognition_state_watcher(session),
                            ),
                            timeout=_SESSION_TIMEOUT,
                        )
@ -368,3 +482,310 @@ class GeminiBrain:
            log.warning("receive ended: %s", exc)
        finally:
            self._done.set()
    # ─── vision-state announcer ───────────────────────────
    # Injects the camera state into the live session as text context.
    # On a live toggle Gemini is told to say so out loud ("I can see you
    # now" / "I can't see you anymore"); at session start it's silent
    # standing context so "can you see me?" is answered honestly.
    async def _announce_vision_state(self, session: Any, enabled: bool,
                                     is_toggle: bool) -> None:
        if is_toggle and enabled:
            text = (
                "[VISION ON] Your camera was just enabled — you can now see "
                "the user through it. Briefly tell them you can see them now, "
                "in your normal Khaleeji style (for example: "
                "'هلا، الحين أشوفك زين')."
            )
        elif is_toggle and not enabled:
            text = (
                "[VISION OFF] Your camera was just disabled — you can no "
                "longer see anything. Briefly tell the user you can't see "
                "them anymore. If they later ask whether you can see them, "
                "tell them to enable the camera from the dashboard."
            )
        elif enabled:  # session start, camera already on
            text = (
                "[VISION STATUS] Your camera is ON — you can see the user "
                "through it. Do not announce this unprompted; just answer "
                "naturally if they ask what you see."
            )
        else:  # session start, camera off
            text = (
                "[VISION STATUS] Your camera is OFF — you cannot see anything "
                "right now. If the user asks whether you can see them, tell "
                "them to enable the camera from the dashboard. Do not announce "
                "this unprompted."
            )
        try:
            await session.send_realtime_input(text=text)
            log.info("vision-state injected (enabled=%s, toggle=%s)",
                     enabled, is_toggle)
        except asyncio.CancelledError:
            raise
        except Exception as exc:
            log.warning("vision-state inject failed: %s", exc)
    # ─── face-recognition-state announcer ─────────────────
    # Same idea as _announce_vision_state, for the face-recognition toggle.
    # On a live OFF toggle it also tells Gemini to disregard the gallery —
    # so OFF takes effect immediately instead of lingering until reconnect.
    async def _announce_facerec_state(self, session: Any, enabled: bool,
                                      is_toggle: bool) -> None:
        if is_toggle and enabled:
            text = (
                "[FACE RECOGNITION ON] Face recognition was just enabled — "
                "you'll be shown the people you know in a moment. Briefly "
                "tell the user you can now recognise the people you know, in "
                "your normal Khaleeji style."
            )
        elif is_toggle and not enabled:
            text = (
                "[FACE RECOGNITION OFF] Face recognition was just disabled. "
                "Disregard the face gallery you were given earlier — stop "
                "greeting people by name and do not identify anyone. Briefly "
                "tell the user you'll no longer recognise faces."
            )
        elif enabled:  # session start, face rec already on
            text = (
                "[FACE RECOGNITION STATUS] Face recognition is ON — when you "
                "see someone you've been shown in the gallery, greet them by "
                "name. Do not announce this unprompted."
            )
        else:  # session start, face rec off
            text = (
                "[FACE RECOGNITION STATUS] Face recognition is OFF — you "
                "cannot identify people. If the user asks who someone is or "
                "whether you recognise them, tell them to enable face "
                "recognition from the dashboard. Do not announce this "
                "unprompted."
            )
        try:
            await session.send_realtime_input(text=text)
            log.info("face-rec-state injected (enabled=%s, toggle=%s)",
                     enabled, is_toggle)
        except asyncio.CancelledError:
            raise
        except Exception as exc:
            log.warning("face-rec-state inject failed: %s", exc)
    # ─── recognition state watcher ────────────────────────
    # Polls data/.recognition_state.json at SANAD_RECOGNITION_POLL_S Hz and
    # mirrors vision_enabled / face_rec_enabled into in-memory flags so the
    # rest of the session can react WITHOUT a Gemini reconnect.
    async def _recognition_state_watcher(self, session: Any) -> None:
        last_mtime = 0.0
        last_state = _recog_state.RecognitionState(
            vision_enabled=self._vision_enabled,
            face_rec_enabled=self._face_rec_enabled,
            gallery_version=self._gallery_version_primed,
        )
        # Best-effort initial primer if face_rec is already on at session start.
        if self._face_rec_enabled and self._vision_enabled:
            try:
                cur = _recog_state.read(_RECOG_STATE_PATH)
                await self._send_gallery_primer(session, cur.gallery_version)
            except Exception as exc:
                log.warning("initial gallery primer failed: %s", exc)
        # Tell Gemini the current camera + face-recognition state at session
        # start — silent standing context so "can you see me?" / "do you know
        # who I am?" are answered honestly even if the user never toggles.
        await self._announce_vision_state(
            session, self._vision_enabled, is_toggle=False,
        )
        await self._announce_facerec_state(
            session, self._face_rec_enabled, is_toggle=False,
        )
        while not self._done.is_set() and not self._stop_flag.is_set():
            await asyncio.sleep(_RECOG_POLL_S)
            try:
                st = _RECOG_STATE_PATH.stat()
            except FileNotFoundError:
                continue
            except Exception:
                continue
            if st.st_mtime == last_mtime:
                continue
            last_mtime = st.st_mtime
            new_state = _recog_state.read(_RECOG_STATE_PATH)
            # Vision toggle — instant. Announce it out loud so Gemini reacts
            # ("I can see you now" / "I can't see you anymore").
            if new_state.vision_enabled != last_state.vision_enabled:
                self._vision_enabled = new_state.vision_enabled
                log.info("vision toggled → %s", self._vision_enabled)
                await self._announce_vision_state(
                    session, self._vision_enabled, is_toggle=True,
                )
            # Face-rec toggle — announce it out loud. The OFF announcement
            # also tells Gemini to disregard the gallery, so OFF takes effect
            # immediately instead of lingering until the next reconnect.
            if new_state.face_rec_enabled != last_state.face_rec_enabled:
                self._face_rec_enabled = new_state.face_rec_enabled
                if self._face_rec_enabled:
                    log.info("face rec enabled — announcing + sending primer")
                else:
                    log.info("face rec disabled — telling Gemini to "
                             "disregard the gallery")
                await self._announce_facerec_state(
                    session, self._face_rec_enabled, is_toggle=True,
                )
            # Conditions for re-priming:
            #  - face_rec just turned ON (no_face_rec_before)
            #  - gallery version bumped since the last primer
            face_rec_just_on = (
                new_state.face_rec_enabled and not last_state.face_rec_enabled
            )
            gallery_changed = (
                new_state.gallery_version != self._gallery_version_primed
            )
            if (self._face_rec_enabled
                    and (face_rec_just_on or gallery_changed)
                    and self._vision_enabled):
                try:
                    await self._send_gallery_primer(
                        session, new_state.gallery_version,
                    )
                except Exception as exc:
                    log.warning("gallery primer failed: %s", exc)
            last_state = new_state
    # ─── camera frame send loop ───────────────────────────
    # Reads the latest JPEG from the _LATEST_FRAME cache (fed by the
    # _stdin_watcher thread, which the GeminiSubprocess supervisor pushes
    # 'frame:<b64>' lines into) and relays it to Gemini Live at
    # _VISION_SEND_HZ. Only active when self._vision_enabled. Skips frames
    # older than _VISION_STALE_MS so a stopped/unplugged camera doesn't
    # waste tokens on a frozen scene.
    async def _send_frame_loop(self, session: Any) -> None:
        period = 1.0 / max(0.5, _VISION_SEND_HZ)
        stale_s = _VISION_STALE_MS / 1000.0
        backoff = 0.0
        last_sent_ts = 0.0
        while not self._done.is_set() and not self._stop_flag.is_set():
            await asyncio.sleep(max(period, backoff))
            if not self._vision_enabled:
                continue
            with _LATEST_FRAME_LOCK:
                data = _LATEST_FRAME.get("bytes")
                ts = _LATEST_FRAME.get("ts", 0.0)
            if not data:
                continue
            # Stale — supervisor stopped pushing (camera off / unplugged).
            if (time.time() - ts) > stale_s:
                continue
            # De-dup — don't re-send a frame we already relayed.
            if ts == last_sent_ts:
                continue
            try:
                await session.send_realtime_input(
                    video=types.Blob(data=data, mime_type="image/jpeg"),
                )
                last_sent_ts = ts
                backoff = 0.0
            except asyncio.CancelledError:
                return
            except Exception as exc:
                log.warning("frame send failed: %s", exc)
                backoff = min(backoff * 2 + 0.5, 5.0)
    # ─── motion-state inject loop ─────────────────────────
    # Drains _STATE_PENDING (fed by the _stdin_watcher from 'state:' lines
    # the supervisor pushes when the arm starts/finishes/errors a motion)
    # and injects each as silent text context into the live session, so
    # Gemini can answer "what are you doing?" honestly. Per persona, Gemini
    # reads these for context but does not narrate them unprompted.
    async def _send_state_loop(self, session: Any) -> None:
        while not self._done.is_set() and not self._stop_flag.is_set():
            await asyncio.sleep(0.1)
            with _STATE_LOCK:
                if not _STATE_PENDING:
                    continue
                pending = list(_STATE_PENDING)
                _STATE_PENDING.clear()
            for msg in pending:
                try:
                    await session.send_realtime_input(text=msg)
                    log.info("STATE injected: %s", msg)
                except asyncio.CancelledError:
                    return
                except Exception as exc:
                    # Some SDK versions may not accept text on
                    # send_realtime_input — log once-ish and keep going;
                    # motion still works, only this context channel is lost.
                    log.warning("state inject failed: %s", exc)
    # ─── face gallery primer ──────────────────────────────
    # Builds one multimodal turn carrying the entire face gallery + a Khaleeji
    # greeting instruction, and sends it via send_client_content. Gemini keeps
    # this in session context until reconnect. Re-sent on gallery_version bumps.
    async def _send_gallery_primer(self, session: Any, version: int) -> None:
        try:
            from Project.Sanad.vision.face_gallery import FaceGallery
        except Exception as exc:
            log.info("face gallery module unavailable: %s", exc)
            return
        gallery = FaceGallery(_FACES_DIR)
        try:
            entries = gallery.load_for_primer(
                max_samples_per_face=_FACES_MAX_SAMPLES,
                resize_long_side=_FACES_PRIMER_RESIZE,
            )
        except Exception as exc:
            log.warning("face gallery load failed: %s", exc)
            return
        if not entries:
            log.info("face gallery empty — primer skipped (v.%d)", version)
            self._gallery_version_primed = version
            return
        parts: list[dict[str, Any]] = [{
            "text": (
                "GALLERY PRIMER (do not reply to this turn). "
                "Below are people you know. When the live camera shows one of "
                "them, greet them warmly by name in UAE Khaleeji dialect "
                "(for example: 'هلا والله يا كسام، شحالك؟'), and you may use "
                "the notes about them to make the conversation personal. "
                "For faces NOT in this gallery, welcome them as a guest "
                "without inventing a name. Greet each person only once per "
                "minute to avoid repetition."
            ),
        }]
        for entry, jpegs in entries:
            label = (
                f"This person is named {entry.name}."
                if entry.name
                else "This person's name is unknown — greet as guest."
            )
            if entry.description:
                label += f" Notes about them: {entry.description}"
            parts.append({"text": f"\n— {label}"})
            for jpeg in jpegs:
                parts.append({
                    "inline_data": {"mime_type": "image/jpeg", "data": jpeg},
                })
        try:
            await session.send_client_content(
                turns=[{"role": "user", "parts": parts}],
                turn_complete=True,
            )
        except Exception as exc:
            log.warning("primer send failed: %s", exc)
            return
        self._gallery_version_primed = version
        log.info("face gallery primed: %d person(s), v.%d", len(entries), version)
--- a/gemini/subprocess.py
+++ b/gemini/subprocess.py
@ -10,15 +10,16 @@ When a new model is added, build its own sibling supervisor (see
 from __future__ import annotations
 import base64
 import json
 import os
 import signal
 import subprocess
 import sys
 import threading
 import time
 from collections import deque
 from datetime import datetime
-from typing import Any
+from typing import Any, Optional, Union
 from pathlib import Path
@ -30,6 +31,11 @@ log = get_logger("gemini_subprocess")
 _LS_CFG = _cfg_section("gemini", "subprocess")
 # Camera frame forwarding — push the latest JPEG to the child over stdin
 # at this interval (seconds). 0.5 s ≈ 2 fps, matching the child's
 # SANAD_VISION_SEND_HZ default. The child de-stales + relays to Gemini.
 _FRAME_FORWARD_INTERVAL_S = float(_LS_CFG.get("frame_forward_interval_sec", 0.5))
 def _resolve_live_script() -> Path:
    """Locate the voice script to run as subprocess.
@ -82,6 +88,22 @@ class GeminiSubprocess:
        self.state_message = "Idle."
        self.last_user_text = ""
        self.suppressed_noise = 0
        # ── stdin push channel (camera frames + motion state) ──
        # The child (gemini/script.py) reads "frame:<b64>\n" and
        # "state:<json>\n" lines off its stdin. Writes are serialised
        # because the frame forwarder and the motion-state bus handler
        # both call from different threads.
        self._stdin_lock = threading.Lock()
        self._camera = None                       # set via attach_camera()
        self._frame_thread: threading.Thread | None = None
        self._frame_stop = threading.Event()
    # ── camera attach (called once from main.py) ──────────────
    def attach_camera(self, camera) -> None:
        """Give the supervisor a reference to the CameraDaemon so it can
        forward frames to the child over stdin while a session runs."""
        self._camera = camera
    def _open_session_log(self, pid: int):
        """Open (or re-open) the per-day append log file for this session."""
@ -214,6 +236,7 @@ class GeminiSubprocess:
        proc = subprocess.Popen(
            cmd,
            cwd=str(script.parent),
            stdin=subprocess.PIPE,        # camera frames + motion state push
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
            text=True,
@ -221,16 +244,106 @@ class GeminiSubprocess:
            env=env,
        )
        # Reap any stale frame forwarder from a previous session that ended
        # by a child crash rather than a clean stop() — otherwise it keeps
        # spinning on a dead pipe and we'd leak a thread per restart.
        stale_ft = self._frame_thread
        if stale_ft is not None and stale_ft.is_alive():
            self._frame_stop.set()
            stale_ft.join(timeout=2.0)
        with self._lock:
            self.process = proc
            self.log_tail.append(f"Started: pid={proc.pid}")
            self._set_state("starting", f"pid={proc.pid}")
            self._reader_thread = threading.Thread(target=self._reader_loop, daemon=True)
            self._reader_thread.start()
            # Frame forwarder — pushes camera JPEGs to the child over stdin.
            self._frame_stop.clear()
            self._frame_thread = threading.Thread(
                target=self._frame_forwarder, daemon=True, name="gemini-frame-fwd",
            )
            self._frame_thread.start()
        log.info("Live Gemini subprocess started: pid=%d", proc.pid)
        return {"started": True, "pid": proc.pid}
    # ── stdin push channel ────────────────────────────────────
    def _send_stdin(self, line: str) -> None:
        """Serialised stdin write — frame forwarder + motion-state handler
        both call this from different threads. Best-effort: a closed pipe
        or a not-yet-started process is a silent no-op."""
        proc = self.process
        if proc is None or proc.stdin is None:
            return
        try:
            with self._stdin_lock:
                if not proc.stdin.closed:
                    proc.stdin.write(line)
                    proc.stdin.flush()
        except Exception:
            # Pipe broke (child exited) — drop silently; the reader thread
            # will surface the exit via state="stopped".
            pass
    def send_frame(self, jpeg: Union[bytes, str]) -> None:
        """Forward one camera frame to the child as 'frame:<base64>\\n'.
        Accepts raw JPEG bytes (base64-encoded here) or an already-base64
        ASCII string (e.g. CameraDaemon.get_frame_b64() — no re-encode)."""
        if isinstance(jpeg, bytes):
            b64 = base64.b64encode(jpeg).decode("ascii")
        elif isinstance(jpeg, str):
            b64 = jpeg.strip()
        else:
            return
        if b64:
            self._send_stdin("frame:" + b64 + "\n")
    def send_state(self, event: str, cmd: str,
                   elapsed_sec: Optional[float] = None,
                   reason: Optional[str] = None) -> None:
        """Push a motion-state update to the child as 'state:<json>\\n'.
        Events: start | complete | interrupted | error. The child injects
        '[STATE-...] <cmd>' into the live Gemini session as silent text
        context so Gemini can answer "what are you doing?" honestly."""
        if not event or not cmd:
            return
        payload: dict[str, Any] = {"event": event, "cmd": cmd}
        if elapsed_sec is not None:
            payload["elapsed_sec"] = round(float(elapsed_sec), 2)
        if reason:
            payload["reason"] = str(reason)[:200]
        try:
            line = "state:" + json.dumps(payload, ensure_ascii=False) + "\n"
        except Exception:
            return
        self._send_stdin(line)
    def _frame_forwarder(self) -> None:
        """Background thread — push the camera's latest frame to the child.
        Runs for the lifetime of one subprocess session. Gated on the
        camera actually running; the child does its own vision-enabled +
        staleness checks, so this stays dumb (camera up → push)."""
        cam = self._camera
        if cam is None:
            return
        while not self._frame_stop.is_set():
            if self._frame_stop.wait(_FRAME_FORWARD_INTERVAL_S):
                break
            try:
                if not cam.is_running():
                    continue
                b64 = cam.get_frame_b64()
                if b64:
                    self.send_frame(b64)
            except Exception:
                # Best-effort — never let a frame hiccup kill the thread.
                pass
    def stop(self) -> dict[str, Any]:
        with self._lock:
            proc = self.process
@ -238,6 +351,13 @@ class GeminiSubprocess:
                return {"stopped": False, "message": "Not running."}
            self._set_state("stopping", "Stopping...")
        # Halt the frame forwarder before we tear the pipe down.
        self._frame_stop.set()
        ft = self._frame_thread
        if ft is not None:
            ft.join(timeout=2.0)
        self._frame_thread = None
        try:
            proc.send_signal(signal.SIGINT)
            proc.wait(timeout=_STOP_TIMEOUT_SEC)
@ -251,6 +371,16 @@ class GeminiSubprocess:
        rc = proc.returncode
        # Close stdin/stdout explicitly — without this each start/stop
        # cycle leaks FDs (relied on Popen.__del__ which only runs at GC;
        # a reconnect loop would march the FD count to the OS limit).
        for pipe in (getattr(proc, "stdin", None), getattr(proc, "stdout", None)):
            if pipe is not None:
                try:
                    pipe.close()
                except Exception:
                    pass
        with self._lock:
            self.process = None
            self.log_tail.append("Stopped.")
--- a/main.py
+++ b/main.py
@ -106,6 +106,8 @@ TypedReplayEngine    = _safe_import("TypedReplayEngine",    lambda: __import__("
 GeminiVoiceClient    = _safe_import("GeminiVoiceClient",    lambda: __import__("Project.Sanad.gemini.client", fromlist=["GeminiVoiceClient"]).GeminiVoiceClient)
 GeminiSubprocess     = _safe_import("GeminiSubprocess",     lambda: __import__("Project.Sanad.gemini.subprocess", fromlist=["GeminiSubprocess"]).GeminiSubprocess)
 LocalSubprocess      = _safe_import("LocalSubprocess",      lambda: __import__("Project.Sanad.local.subprocess",  fromlist=["LocalSubprocess"]).LocalSubprocess)
 CameraDaemon         = _safe_import("CameraDaemon",         lambda: __import__("Project.Sanad.vision.camera", fromlist=["CameraDaemon"]).CameraDaemon)
 FaceGallery          = _safe_import("FaceGallery",          lambda: __import__("Project.Sanad.vision.face_gallery", fromlist=["FaceGallery"]).FaceGallery)
 # ── global instances (imported by route modules) ──
@ -131,6 +133,107 @@ else:
    live_sub  = _safe_construct("live_sub", GeminiSubprocess)
 typed_replay  = _safe_construct("typed_replay", (lambda: TypedReplayEngine(voice_client, audio_mgr)) if (TypedReplayEngine and voice_client and audio_mgr) else None)
 # ── Recognition (camera + face gallery) ─────────────────────────────────────
 # Camera is idle until the dashboard toggles vision on; face gallery is pure
 # file IO and always available if the import succeeded.
 #
 # Config precedence (highest first): explicit env var → config/core_config.json
 # section → hardcoded default. The parent process normally has no SANAD_CAMERA_*
 # env vars (LIVE_TUNE is only forwarded to the Gemini child), so in practice the
 # core_config.json `camera` / `faces` sections are the live source here.
 def _build_camera():
    from Project.Sanad.core.config_loader import section as _cfg_section
    cam_cfg = _cfg_section("core", "camera")
    def _knob(env_key: str, cfg_key: str, default):
        env_val = os.environ.get(env_key)
        if env_val is not None and env_val != "":
            return type(default)(env_val)
        return type(default)(cam_cfg.get(cfg_key, default))
    # Frames are cached in memory and pushed to the Gemini child over its
    # stdin (see GeminiSubprocess._frame_forwarder) — no file drop.
    return CameraDaemon(
        width=_knob("SANAD_CAMERA_WIDTH", "width", 424),
        height=_knob("SANAD_CAMERA_HEIGHT", "height", 240),
        fps=_knob("SANAD_CAMERA_FPS", "fps", 15),
        jpeg_quality=_knob("SANAD_CAMERA_JPEG_QUALITY", "jpeg_quality", 70),
        stale_threshold_s=float(cam_cfg.get("stale_threshold_s", 10.0)),
        reconnect_min_s=float(cam_cfg.get("reconnect_min_s", 2.0)),
        reconnect_max_s=float(cam_cfg.get("reconnect_max_s", 10.0)),
        capture_timeout_ms=int(cam_cfg.get("capture_timeout_ms", 5000)),
    )
 def _build_gallery():
    from Project.Sanad.config import BASE_DIR
    from Project.Sanad.core.config_loader import section as _cfg_section
    faces_cfg = _cfg_section("core", "faces")
    # SANAD_FACES_DIR is set absolute by LIVE_TUNE (the Gemini child reads the
    # same var). In the parent it's usually unset → fall back to the JSON's
    # dir_rel, then the hardcoded default. Honour absolute paths as-is.
    raw = os.environ.get("SANAD_FACES_DIR") or faces_cfg.get("dir_rel", "data/faces")
    p = Path(raw)
    root = p if p.is_absolute() else (BASE_DIR / raw)
    return FaceGallery(root)
 camera  = _safe_construct("camera",  _build_camera if CameraDaemon else None)
 gallery = _safe_construct("gallery", _build_gallery if FaceGallery else None)
 # Restore persisted vision_enabled at boot — start camera if the user left
 # it on across a reboot. Face-rec state is read by the Gemini child directly.
 try:
    from Project.Sanad.vision import recognition_state as _recog_state
    from Project.Sanad.config import BASE_DIR as _BD
    _state = _recog_state.read(_BD / "data" / ".recognition_state.json")
    if _state.vision_enabled and camera is not None:
        if camera.start():
            log.info("Camera vision restored from state (backend=%s)", camera.backend)
        else:
            log.warning("Camera vision was ON but no backend available — leaving OFF")
            _recog_state.mutate(_BD / "data" / ".recognition_state.json",
                                vision_enabled=False)
 except Exception:
    log.exception("Could not restore recognition state")
 # Hand the camera to the Gemini supervisor so it can forward frames to the
 # child over stdin while a live session runs.
 if live_sub is not None and camera is not None:
    try:
        if hasattr(live_sub, "attach_camera"):
            live_sub.attach_camera(camera)
            log.info("Camera attached to live subprocess supervisor")
    except Exception:
        log.exception("attach_camera failed")
 # ── Motion-state → Gemini channel ───────────────────────────────────────────
 # The arm controller emits motion.action_started / _done / _error on the bus.
 # Forward each to the Gemini child as a 'state:' line so the live session can
 # answer "what are you doing?" honestly. Sync handlers, fired via emit_sync
 # from the arm's worker thread — send_state just writes to a pipe (cheap).
 if live_sub is not None and hasattr(live_sub, "send_state"):
    try:
        from Project.Sanad.core.event_bus import bus as _bus
        def _on_motion_started(action: str = "", **_kw):
            live_sub.send_state("start", action)
        def _on_motion_done(action: str = "", elapsed_sec=None,
                            failed: bool = False, **_kw):
            # action_error already covered the failure case with a reason;
            # here just emit complete (skip if it failed to avoid a dup).
            if not failed:
                live_sub.send_state("complete", action, elapsed_sec=elapsed_sec)
        def _on_motion_error(action: str = "", reason: str = "", **_kw):
            live_sub.send_state("error", action, reason=reason)
        _bus.on("motion.action_started", _on_motion_started)
        _bus.on("motion.action_done", _on_motion_done)
        _bus.on("motion.action_error", _on_motion_error)
        log.info("Motion-state → Gemini channel wired")
    except Exception:
        log.exception("Could not wire motion-state → Gemini channel")
 # Wire everything into the Brain (only what was constructed)
 def _safe_attach(method_name: str, value):
    if brain is None or value is None:
@ -166,6 +269,8 @@ SUBSYSTEMS = {
    "live_voice":    live_voice,
    "live_sub":      live_sub,
    "typed_replay":  typed_replay,
    "camera":        camera,
    "gallery":       gallery,
 }
 # Critical subsystems — if any of these are None, log a warning at startup
@ -220,6 +325,13 @@ def _do_shutdown(from_signal: bool = False):
        except Exception:
            log.exception("audio_mgr.close() failed")
    if camera is not None:
        try:
            if camera.is_running():
                camera.stop()
        except Exception:
            log.exception("camera.stop() failed")
    log.info("Shutdown complete")
--- a/motion/arm_controller.py
+++ b/motion/arm_controller.py
@ -673,6 +673,8 @@ class ArmController:
            self._is_busy = True
            self._cancel.clear()
        _start = time.monotonic()
        _failed = False
        try:
            bus.emit_sync("motion.action_started", action=action.name)
            if action.file:
@ -680,12 +682,20 @@ class ArmController:
            else:
                self._run_sdk_action(action)
        except Exception as exc:
            _failed = True
            log.error("Action %s failed: %s", action.name, exc)
            bus.emit_sync("motion.action_error", action=action.name,
                          reason=str(exc))
        finally:
            with self._lock:
                self._is_busy = False
                self._last_action_time = time.monotonic()
-            bus.emit_sync("motion.action_done", action=action.name)
+            # action_done always fires (back-compat for existing listeners);
            # action_error above is the extra signal for the Gemini
            # motion-state channel. elapsed_sec lets Gemini say "...took 2.3s".
            bus.emit_sync("motion.action_done", action=action.name,
                          elapsed_sec=round(time.monotonic() - _start, 2),
                          failed=_failed)
    def _run_sdk_action(self, action: Action):
        if not _HAS_SDK:
--- a/requirements.txt
+++ b/requirements.txt
@ -7,11 +7,30 @@ uvicorn[standard]>=0.29.0
 python-multipart>=0.0.9
 # Gemini voice
 # google-genai: the Gemini Live SDK — used by gemini/script.py (live brain)
 #   and gemini/client.py. Needs Python 3.10+, which is why the voice loop
 #   runs in the gemini_sdk conda env. send_realtime_input(video=)/(text=)
 #   and send_client_content() require a reasonably recent (>=1.x) release.
 google-genai>=1.0.0
 websockets>=12.0
 pyaudio>=0.2.13
-# Camera proxy
+# Recognition (camera vision + face gallery for Gemini-side face recognition)
-httpx>=0.27.0
+# opencv-python-headless: JPEG encode + USB-camera fallback. Headless wheel —
 #                         the dashboard renders frames; we never need a GUI window.
 # Pillow: resize face samples before the Gemini primer turn.
 opencv-python-headless>=4.8.0
 Pillow>=10.0.0
 #
 # pyrealsense2 — DO NOT `pip install` on Jetson / JetPack 5.
 #   The PyPI wheel is built against glibc 2.32+ (Ubuntu 22.04); JetPack 5 ships
 #   glibc 2.31, so the wheel fails to load with:
 #     ImportError: ... version `GLIBC_2.32' not found
 #   On Jetson, build the Python binding from source against the apt-installed
 #   librealsense2 runtime (see README → "Camera vision on Jetson").
 #   On x86_64 / Ubuntu 22.04+ desktops, `pip install pyrealsense2` works fine.
 #   If pyrealsense2 is absent, CameraDaemon falls back to cv2.VideoCapture(0).
 # pyrealsense2>=2.50.0   # intentionally commented — see note above
 # Local TTS (optional — only needed for MBZUAI model)
 transformers>=4.40.0
--- a/vision/init.py
+++ b/vision/init.py
@ -0,0 +1 @@
 """Vision package — camera daemon + face gallery for Gemini-side recognition."""
--- a/vision/camera.py
+++ b/vision/camera.py
@ -0,0 +1,560 @@
 """Camera daemon — single producer, in-memory frame cache.
 Captures frames at fixed FPS from a RealSense (preferred) or any USB
 camera (fallback), JPEG-encodes them, and caches the latest frame in
 memory in two views (matches Marcus's API/camera_api.py):
  - `_latest_jpeg`  raw JPEG bytes  — dashboard preview + frame forwarder
  - `_latest_b64`   base64 ASCII    — frame forwarder → Gemini child stdin
 Consumers:
  - dashboard preview       → `snapshot_jpeg()` (served as an HTTP Response)
  - face enrollment         → `get_fresh_frame()` for a guaranteed-current capture
  - GeminiSubprocess        → `get_frame_b64()`, pushed over the child's stdin
 Lifecycle is driven by the Recognition tab toggle. The daemon is idle
 until `start()` is called; failures in start() are non-fatal and
 reported via `is_running()` / `backend`. Once running it auto-reconnects
 on USB unplug / stalled frames (Marcus-style resilience), and supports
 hot `reconfigure()` of resolution/FPS without a full restart.
 """
 from __future__ import annotations
 import base64
 import os
 import threading
 import time
 from typing import Optional
 import numpy as np
 from Project.Sanad.core.logger import get_logger
 log = get_logger("camera")
 # How many /dev/video* indices to scan for a USB-style color camera when
 # RealSense isn't available. A RealSense exposes ~6 V4L2 nodes (depth, IR,
 # color, metadata…) — the color one is rarely index 0, so we probe each
 # and accept the first that yields a real 3-channel BGR frame.
 _USB_SCAN_RANGE = 10
 class CameraDaemon:
    """RealSense → USB fallback camera capture with in-memory frame cache."""
    def __init__(
        self,
        width: int = 424,
        height: int = 240,
        fps: int = 15,
        jpeg_quality: int = 70,
        stale_threshold_s: float = 10.0,
        reconnect_min_s: float = 2.0,
        reconnect_max_s: float = 10.0,
        capture_timeout_ms: int = 5000,
    ) -> None:
        # Active profile — guarded by _reconfig_lock so reconfigure() can
        # hot-swap it from another thread between capture sessions.
        self._reconfig_lock = threading.Lock()
        self._w = int(width)
        self._h = int(height)
        self._fps = int(fps)
        self._q = max(10, min(95, int(jpeg_quality)))
        self._reconfig_pending = False
        # Resilience knobs (Marcus-style)
        self._stale_s = float(stale_threshold_s)
        self._reconnect_min_s = float(reconnect_min_s)
        self._reconnect_max_s = float(reconnect_max_s)
        self._capture_timeout_ms = int(capture_timeout_ms)
        self._thread: Optional[threading.Thread] = None
        self._stop = threading.Event()
        self._backend: Optional[str] = None
        self._lock = threading.Lock()
        self._latest_jpeg: Optional[bytes] = None
        self._latest_b64: Optional[str] = None
        self._latest_ts: float = 0.0
        self._frame_seq: int = 0
        self._error: Optional[str] = None
        self._reconnect_count: int = 0
    # ── public API ──────────────────────────────────────────
    @property
    def backend(self) -> Optional[str]:
        return self._backend
    @property
    def error(self) -> Optional[str]:
        return self._error
    @property
    def frame_seq(self) -> int:
        return self._frame_seq
    def is_running(self) -> bool:
        return self._thread is not None and self._thread.is_alive()
    def start(self) -> bool:
        """Start capture thread. Returns True if a backend was acquired.
        Initial probe is synchronous; if it fails the thread isn't spawned.
        Once running, the inner loop auto-reconnects on USB unplug or
        stalled frames using exponential backoff (`reconnect_min_s` ..
        `reconnect_max_s`).
        """
        if self.is_running():
            return True
        self._stop.clear()
        self._error = None
        self._reconnect_count = 0
        # One-shot USB-2.0 negotiation diagnostic (warns operator if D435I
        # came up on USB 2.0 — frame drops would be likely otherwise).
        self._check_usb_version()
        backend = self._probe_any()
        if backend is None:
            log.warning("Camera: no backend available (RealSense + USB both failed)")
            self._backend = None
            return False
        self._backend = backend["name"]
        self._thread = threading.Thread(
            target=self._reconnect_loop, args=(backend,),
            daemon=True, name="camera-daemon",
        )
        self._thread.start()
        with self._reconfig_lock:
            w, h, f = self._w, self._h, self._fps
        log.info("Camera started (backend=%s, %dx%d @ %dfps)",
                 self._backend, w, h, f)
        return True
    def stop(self) -> None:
        """Stop the capture thread and release the hardware."""
        if not self.is_running():
            self._backend = None
            return
        self._stop.set()
        t = self._thread
        if t is not None:
            t.join(timeout=2.0)
        self._thread = None
        self._backend = None
        log.info("Camera stopped")
    def reconfigure(self, width: Optional[int] = None, height: Optional[int] = None,
                    fps: Optional[int] = None, jpeg_quality: Optional[int] = None) -> dict:
        """Hot-swap the capture profile without a full stop/start.
        Sets a pending flag — the capture loop notices it, tears the
        pipeline down, and rebuilds at the new resolution (~0.5 s gap).
        If the daemon isn't running the new values just take effect on
        the next `start()`. Returns the resulting active profile.
        """
        with self._reconfig_lock:
            if width is not None:
                self._w = int(width)
            if height is not None:
                self._h = int(height)
            if fps is not None:
                self._fps = int(fps)
            if jpeg_quality is not None:
                self._q = max(10, min(95, int(jpeg_quality)))
            if self.is_running():
                self._reconfig_pending = True
            profile = {"width": self._w, "height": self._h,
                       "fps": self._fps, "jpeg_quality": self._q}
        log.info("Camera reconfigure → %s", profile)
        return profile
    def snapshot_jpeg(self) -> Optional[bytes]:
        """Return the latest JPEG bytes, or None if no frame yet."""
        with self._lock:
            return self._latest_jpeg
    def get_frame_b64(self) -> Optional[str]:
        """Return the latest frame as a base64 ASCII string (or None).
        Used by the frame forwarder to push frames over the Gemini child's
        stdin without re-encoding — base64 is cached alongside the JPEG.
        """
        with self._lock:
            return self._latest_b64
    def get_fresh_frame(self, max_age_s: float = 0.5,
                        timeout_s: float = 1.5) -> Optional[bytes]:
        """Return a JPEG frame newer than `max_age_s`, waiting up to `timeout_s`.
        Used by face enrollment so the captured frame is guaranteed to be
        the *current* scene, not a stale buffer from before the user got
        into position. Falls back to whatever's cached on timeout.
        """
        deadline = time.time() + timeout_s
        while time.time() < deadline:
            with self._lock:
                if (self._latest_jpeg is not None
                        and self._latest_ts > 0
                        and (time.time() - self._latest_ts) <= max_age_s):
                    return self._latest_jpeg
            time.sleep(0.03)
        with self._lock:
            return self._latest_jpeg
    def latest_age_s(self) -> float:
        """Seconds since last successful frame; +inf if none."""
        with self._lock:
            if self._latest_ts <= 0:
                return float("inf")
            return time.time() - self._latest_ts
    def status(self) -> dict:
        with self._reconfig_lock:
            w, h, f, q = self._w, self._h, self._fps, self._q
        # latest_age_s() is +inf until the first frame lands. inf is NOT
        # JSON-serialisable by Starlette's JSONResponse (allow_nan=False) —
        # leaving it as inf would 500 the /api/recognition/* routes. Map
        # "running but no frame yet" and "not running" both to None.
        age = self.latest_age_s()
        age_s = round(age, 2) if (self.is_running() and age != float("inf")) else None
        return {
            "running": self.is_running(),
            "backend": self._backend,
            "width": w,
            "height": h,
            "fps": f,
            "jpeg_quality": q,
            "frame_seq": self._frame_seq,
            "age_s": age_s,
            "error": self._error,
            "reconnect_count": self._reconnect_count,
        }
    # ── helpers ─────────────────────────────────────────────
    def _probe_any(self) -> Optional[dict]:
        """Try RealSense first, then USB. Returns backend dict or None."""
        b = self._probe_realsense()
        if b is None:
            b = self._probe_usb()
        return b
    def _check_usb_version(self) -> None:
        """Warn if a connected RealSense negotiated USB 2.0 (needs 3.x).
        Marcus has this same check — D435I on USB 2.0 can't deliver
        color+depth+IMU and the pipeline silently stalls. Catching it at
        startup lets the operator fix the cable/port instead of chasing a
        "no frames" loop. Diagnostic only; never blocks startup.
        """
        try:
            import pyrealsense2 as rs  # type: ignore
            ctx = rs.context()
            for dev in ctx.query_devices():
                try:
                    usb_type = dev.get_info(rs.camera_info.usb_type_descriptor)
                    name = dev.get_info(rs.camera_info.name)
                except Exception:
                    continue
                if str(usb_type).startswith("2."):
                    log.warning(
                        "RealSense %s negotiated USB %s — expected 3.x. "
                        "Frame drops likely. Try a USB 3 port / shorter cable / "
                        "powered hub.", name, usb_type,
                    )
                else:
                    log.info("RealSense %s on USB %s", name, usb_type)
        except Exception:
            pass
    # ── backend probing ─────────────────────────────────────
    def _probe_realsense(self) -> Optional[dict]:
        with self._reconfig_lock:
            w, h, f = self._w, self._h, self._fps
        try:
            import pyrealsense2 as rs  # type: ignore
            pipeline = rs.pipeline()
            cfg = rs.config()
            cfg.enable_stream(rs.stream.color, w, h, rs.format.bgr8, f)
            profile = pipeline.start(cfg)
            return {"name": "realsense", "pipeline": pipeline, "rs": rs,
                    "profile": profile}
        except Exception as exc:
            log.info("RealSense unavailable: %s", exc)
            return None
    def _open_usb_index(self, idx: int, w: int, h: int, f: int,
                        cv2) -> Optional[dict]:
        """Open one /dev/video<idx>, validate it yields a 3-channel frame,
        and classify it as colour vs grayscale/IR.
        A RealSense IR node delivers Y8 — cv2 replicates that single plane
        across 3 channels, so the planes come back *bit-identical*. A real
        colour sensor never produces bit-identical channels (per-channel
        sensor noise differs even on a flat gray scene). That's the test.
        Returns a backend dict with `is_color`, or None if the node is
        unusable.
        """
        cap = None
        try:
            cap = cv2.VideoCapture(idx)
            if not cap.isOpened():
                cap.release()
                return None
            cap.set(cv2.CAP_PROP_FRAME_WIDTH, w)
            cap.set(cv2.CAP_PROP_FRAME_HEIGHT, h)
            cap.set(cv2.CAP_PROP_FPS, f)
            good = None
            for _ in range(5):
                ok, frame = cap.read()
                if (ok and frame is not None and frame.ndim == 3
                        and frame.shape[2] == 3):
                    good = frame
                    break
            if good is None:
                cap.release()
                return None
            is_color = not (
                np.array_equal(good[:, :, 0], good[:, :, 1])
                and np.array_equal(good[:, :, 1], good[:, :, 2])
            )
            return {"name": "usb", "cap": cap, "cv2": cv2, "index": idx,
                    "is_color": is_color,
                    "frame_wh": (good.shape[1], good.shape[0])}
        except Exception as exc:
            log.info("USB camera index %d: %s", idx, exc)
            if cap is not None:
                try:
                    cap.release()
                except Exception:
                    pass
            return None
    def _probe_usb(self) -> Optional[dict]:
        """Scan /dev/video* for a colour camera node, falling back to a
        grayscale/IR node only if no colour node exists.
        On a RealSense, /dev/video0 is the *depth* stream (Z16, cv2 can't
        open it as a webcam); the IR nodes deliver Y8 (grayscale); the
        *colour* node delivers YUYV/BGR. We can't know the index up front,
        so we probe each and prefer the first genuine colour node — that's
        why the dashboard preview used to come up grayscale. Pin a node
        with SANAD_CAMERA_USB_INDEX=<n> to skip the scan entirely.
        """
        with self._reconfig_lock:
            w, h, f = self._w, self._h, self._fps
        try:
            import cv2  # type: ignore
        except Exception as exc:
            log.info("USB camera unavailable: %s", exc)
            return None
        # Pinned index — accept whatever it is (colour or not).
        explicit = os.environ.get("SANAD_CAMERA_USB_INDEX", "").strip()
        if explicit.isdigit():
            backend = self._open_usb_index(int(explicit), w, h, f, cv2)
            if backend is not None:
                fw, fh = backend["frame_wh"]
                log.info("USB camera: pinned /dev/video%d (%dx%d, %s)",
                         backend["index"], fw, fh,
                         "colour" if backend["is_color"] else "grayscale/IR")
                return backend
            log.warning("USB camera: pinned index %s unusable", explicit)
            return None
        # Scan — prefer a real colour node; keep the first grayscale node
        # as a last resort so the camera still works if that's all there is.
        gray_fallback: Optional[dict] = None
        for idx in range(_USB_SCAN_RANGE):
            backend = self._open_usb_index(idx, w, h, f, cv2)
            if backend is None:
                continue
            fw, fh = backend["frame_wh"]
            if backend["is_color"]:
                log.info("USB camera: using /dev/video%d (colour, %dx%d)",
                         idx, fw, fh)
                if gray_fallback is not None:
                    try:
                        gray_fallback["cap"].release()
                    except Exception:
                        pass
                return backend
            # grayscale/IR — remember the first, release any extras
            if gray_fallback is None:
                gray_fallback = backend
            else:
                try:
                    backend["cap"].release()
                except Exception:
                    pass
        if gray_fallback is not None:
            fw, fh = gray_fallback["frame_wh"]
            log.warning("USB camera: no colour node found — falling back to "
                        "/dev/video%d (grayscale/IR, %dx%d). For a RealSense, "
                        "build pyrealsense2 or pin the colour node with "
                        "SANAD_CAMERA_USB_INDEX.", gray_fallback["index"], fw, fh)
            return gray_fallback
        log.info("USB camera unavailable: no working /dev/video* node found "
                 "(scanned %d indices)", _USB_SCAN_RANGE)
        return None
    # ── main capture loop ───────────────────────────────────
    def _reconnect_loop(self, initial_backend: dict) -> None:
        """Outer loop — owns reconnect with exponential backoff.
        Inner `_capture_session` runs until the camera goes stale, the
        stop flag is set, or a reconfigure is requested. On stall we
        sleep + re-probe; on reconfigure we re-probe immediately at the
        new resolution. Backoff resets after a successful session.
        """
        backend = initial_backend
        backoff = self._reconnect_min_s
        while not self._stop.is_set():
            reconfigured = False
            try:
                reconfigured = self._capture_session(backend)
            except Exception as exc:
                log.exception("Camera capture session crashed: %s", exc)
                self._error = str(exc)
            finally:
                self._teardown(backend)
            if self._stop.is_set():
                break
            if reconfigured:
                # Fast path — rebuild immediately at the new profile.
                with self._reconfig_lock:
                    self._reconfig_pending = False
                new_backend = self._probe_any()
                if new_backend is None:
                    self._error = "reconnecting"
                    log.warning("Camera reconfigure: re-probe failed — "
                                "retrying in %.1fs", backoff)
                    if self._stop.wait(backoff):
                        break
                    backoff = min(backoff * 2, self._reconnect_max_s)
                    continue
                self._backend = new_backend["name"]
                self._error = None
                backend = new_backend
                backoff = self._reconnect_min_s
                log.info("Camera rebuilt after reconfigure (backend=%s)",
                         self._backend)
                continue
            # Capture session ended unexpectedly (stall / crash). Sleep + re-probe.
            self._error = "reconnecting"
            log.warning("Camera disconnected — reconnecting in %.1fs", backoff)
            if self._stop.wait(backoff):  # interruptible sleep
                break
            backoff = min(backoff * 2, self._reconnect_max_s)
            new_backend = self._probe_any()
            if new_backend is None:
                self._backend = None
                continue  # stay in the loop; next iteration retries
            self._backend = new_backend["name"]
            self._reconnect_count += 1
            self._error = None
            log.info("Camera reconnected (backend=%s, attempt #%d)",
                     self._backend, self._reconnect_count)
            backend = new_backend
            backoff = self._reconnect_min_s  # reset on success
    def _capture_session(self, backend: dict) -> bool:
        """Inner capture loop — runs until stop, stale-frame timeout, or
        a reconfigure request.
        Returns True if it exited because of a reconfigure (caller rebuilds
        immediately), False on a stall or clean stop.
        """
        import cv2  # always available — used for JPEG encode
        with self._reconfig_lock:
            encode_params = [int(cv2.IMWRITE_JPEG_QUALITY), self._q]
        last_frame_time = time.time()
        consecutive_failures = 0
        while not self._stop.is_set():
            if self._reconfig_pending:
                log.info("Camera reconfigure requested — rebuilding pipeline")
                return True
            bgr = self._read_frame(backend)
            if bgr is None:
                consecutive_failures += 1
                age = time.time() - last_frame_time
                if age > self._stale_s:
                    log.warning(
                        "Camera stalled %.1fs (%d consecutive timeouts) — "
                        "rebuilding pipeline", age, consecutive_failures,
                    )
                    return False
                # Intermediate warnings so degradation is visible early
                if consecutive_failures in (3, 10, 30):
                    log.warning("Camera slow (%d failures, age %.1fs)",
                                consecutive_failures, age)
                time.sleep(0.05)
                continue
            try:
                ok, buf = cv2.imencode(".jpg", bgr, encode_params)
            except Exception as exc:
                log.warning("JPEG encode failed: %s", exc)
                continue
            if not ok:
                continue
            jpeg = bytes(buf)
            b64 = base64.b64encode(jpeg).decode("ascii")
            now = time.time()
            with self._lock:
                self._latest_jpeg = jpeg
                self._latest_b64 = b64
                self._latest_ts = now
                self._frame_seq += 1
            last_frame_time = now
            consecutive_failures = 0
        return False
    def _read_frame(self, backend: dict) -> Optional[np.ndarray]:
        name = backend["name"]
        if name == "realsense":
            try:
                frames = backend["pipeline"].wait_for_frames(
                    timeout_ms=self._capture_timeout_ms,
                )
                color = frames.get_color_frame()
                if not color:
                    return None
                return np.asanyarray(color.get_data())
            except Exception:
                # Soft path — single timeouts handled by _capture_session's
                # stale-detection logic; don't spam the log per frame.
                return None
        elif name == "usb":
            cap = backend["cap"]
            ok, frame = cap.read()
            if not ok or frame is None:
                return None
            return frame
        return None
    def _teardown(self, backend: dict) -> None:
        name = backend.get("name")
        try:
            if name == "realsense":
                backend["pipeline"].stop()
            elif name == "usb":
                backend["cap"].release()
        except Exception as exc:
            log.info("Camera teardown: %s", exc)
--- a/vision/face_gallery.py
+++ b/vision/face_gallery.py
@ -0,0 +1,363 @@
 """Face gallery — pure file IO over data/faces/face_{id}/.
 Layout per face:
    face_{id}/
      face_1.jpg   ← samples (≥1 required)
      face_2.jpg
      face_3.png
      meta.json    ← optional: {"name": "...", "description": "...", "added_at": "..."}
 `description` is free text the operator writes about the person ("lead
 engineer, likes coffee") — it's folded into the Gemini primer turn so
 Gemini can reference it when it recognises that face.
 No ML — Gemini does the recognition in-context using the samples we feed it
 via the primer turn. This module's only jobs are:
  - enumerate enrolled faces
  - serve & accept JPEG/PNG bytes per face
  - rename / describe / delete / zip / load-for-primer
 Thread-safe via a single internal RLock.
 """
 from __future__ import annotations
 import io
 import json
 import re
 import threading
 import zipfile
 from dataclasses import dataclass, field
 from datetime import datetime
 from pathlib import Path
 from typing import Iterable
 from Project.Sanad.core.logger import get_logger
 log = get_logger("face_gallery")
 _DIR_RE = re.compile(r"^face_(\d+)$")
 ALLOWED_EXTS = {".jpg", ".jpeg", ".png"}
 SAMPLE_NAME_RE = re.compile(r"^face_(\d+)\.(jpg|jpeg|png)$", re.IGNORECASE)
@dataclass
 class PhotoInfo:
    name: str
    size_bytes: int
    path: Path
@dataclass
 class FaceEntry:
    id: int
    name: str | None
    added_at: str | None
    dir: Path
    description: str | None = None
    sample_paths: list[Path] = field(default_factory=list)
    def to_dict(self) -> dict:
        return {
            "id": self.id,
            "name": self.name,
            "description": self.description,
            "added_at": self.added_at,
            "dir": str(self.dir),
            "photos": [
                {"name": p.name, "size_bytes": p.stat().st_size}
                for p in self.sample_paths
                if p.exists()
            ],
        }
 class FaceGallery:
    """File-system backed gallery rooted at `root` (e.g. data/faces/)."""
    def __init__(self, root: Path | str) -> None:
        self.root = Path(root)
        self._lock = threading.RLock()
    # ── read ────────────────────────────────────────────────
    def _ensure_root(self) -> None:
        self.root.mkdir(parents=True, exist_ok=True)
    def _iter_face_dirs(self) -> Iterable[tuple[int, Path]]:
        if not self.root.exists():
            return
        for child in sorted(self.root.iterdir()):
            if not child.is_dir():
                continue
            m = _DIR_RE.match(child.name)
            if not m:
                continue
            yield int(m.group(1)), child
    def _samples_in(self, face_dir: Path) -> list[Path]:
        out: list[Path] = []
        for p in sorted(face_dir.iterdir()):
            if p.is_file() and p.suffix.lower() in ALLOWED_EXTS:
                out.append(p)
        return out
    def _meta(self, face_dir: Path) -> tuple[str | None, str | None, str | None]:
        """Return (name, description, added_at) — any may be None."""
        meta_path = face_dir / "meta.json"
        if not meta_path.exists():
            return None, None, None
        try:
            data = json.loads(meta_path.read_text(encoding="utf-8"))
        except Exception:
            return None, None, None
        name = data.get("name")
        description = data.get("description")
        added = data.get("added_at")
        return (name if name else None), (description if description else None), added
    def list(self) -> list[FaceEntry]:
        with self._lock:
            entries: list[FaceEntry] = []
            for face_id, face_dir in self._iter_face_dirs():
                name, description, added = self._meta(face_dir)
                entries.append(FaceEntry(
                    id=face_id,
                    name=name,
                    description=description,
                    added_at=added,
                    dir=face_dir,
                    sample_paths=self._samples_in(face_dir),
                ))
            return entries
    def get(self, face_id: int) -> FaceEntry | None:
        with self._lock:
            face_dir = self.root / f"face_{face_id}"
            if not face_dir.is_dir():
                return None
            name, description, added = self._meta(face_dir)
            return FaceEntry(
                id=face_id, name=name, description=description, added_at=added,
                dir=face_dir, sample_paths=self._samples_in(face_dir),
            )
    def get_photo(self, face_id: int, photo_name: str) -> Path | None:
        with self._lock:
            face_dir = self.root / f"face_{face_id}"
            if not face_dir.is_dir():
                return None
            p = face_dir / photo_name
            try:
                p.resolve().relative_to(face_dir.resolve())
            except ValueError:
                return None
            if not p.exists() or p.suffix.lower() not in ALLOWED_EXTS:
                return None
            return p
    # ── write ───────────────────────────────────────────────
    def next_id(self) -> int:
        with self._lock:
            max_id = 0
            for face_id, _ in self._iter_face_dirs():
                if face_id > max_id:
                    max_id = face_id
            return max_id + 1
    def _next_sample_name(self, face_dir: Path, ext: str) -> str:
        """Return next free face_N.<ext> filename inside face_dir."""
        existing = self._samples_in(face_dir)
        max_n = 0
        for p in existing:
            m = SAMPLE_NAME_RE.match(p.name)
            if m:
                n = int(m.group(1))
                if n > max_n:
                    max_n = n
        return f"face_{max_n + 1}{ext.lower()}"
    @staticmethod
    def _detect_ext(jpeg_or_png: bytes) -> str:
        """Sniff PNG vs JPEG from the magic bytes."""
        if len(jpeg_or_png) >= 8 and jpeg_or_png[:8] == b"\x89PNG\r\n\x1a\n":
            return ".png"
        return ".jpg"
    def _write_meta(self, face_dir: Path, name: str | None,
                    description: str | None = None,
                    added_at: str | None = None) -> None:
        meta: dict[str, str] = {}
        if name:
            meta["name"] = name
        if description:
            meta["description"] = description
        meta["added_at"] = added_at or datetime.now().isoformat(timespec="seconds")
        (face_dir / "meta.json").write_text(
            json.dumps(meta, ensure_ascii=False, indent=2),
            encoding="utf-8",
        )
    def create_face(self, image_bytes_list: list[bytes],
                    name: str | None = None,
                    description: str | None = None) -> FaceEntry:
        """Create a new face_{next_id}/ with one or more samples."""
        if not image_bytes_list:
            raise ValueError("create_face: empty image list")
        with self._lock:
            self._ensure_root()
            face_id = self.next_id()
            face_dir = self.root / f"face_{face_id}"
            face_dir.mkdir(parents=True, exist_ok=False)
            for idx, data in enumerate(image_bytes_list, start=1):
                ext = self._detect_ext(data)
                fname = f"face_{idx}{ext}"
                (face_dir / fname).write_bytes(data)
            clean_name = (name or "").strip() or None
            clean_desc = (description or "").strip() or None
            self._write_meta(face_dir, clean_name, clean_desc)
            log.info("Created face_%d (samples=%d, name=%s, desc=%s)",
                     face_id, len(image_bytes_list), clean_name or "(unnamed)",
                     "yes" if clean_desc else "no")
            return self.get(face_id)  # type: ignore[return-value]
    def add_photo(self, face_id: int, image_bytes: bytes) -> str:
        """Append a new sample to an existing face. Returns the filename."""
        with self._lock:
            face_dir = self.root / f"face_{face_id}"
            if not face_dir.is_dir():
                raise FileNotFoundError(f"face_{face_id} not found")
            ext = self._detect_ext(image_bytes)
            fname = self._next_sample_name(face_dir, ext)
            (face_dir / fname).write_bytes(image_bytes)
            log.info("Added sample %s to face_%d", fname, face_id)
            return fname
    def rename(self, face_id: int, name: str | None) -> None:
        """Update meta.json with a new name (or clear it if name is empty).
        Preserves the existing description + added_at.
        """
        with self._lock:
            face_dir = self.root / f"face_{face_id}"
            if not face_dir.is_dir():
                raise FileNotFoundError(f"face_{face_id} not found")
            _, description, added = self._meta(face_dir)
            clean = (name or "").strip() or None
            self._write_meta(face_dir, clean, description, added_at=added)
            log.info("Renamed face_%d → %s", face_id, clean or "(unnamed)")
    def set_description(self, face_id: int, description: str | None) -> None:
        """Update meta.json with a free-text description (or clear it).
        Preserves the existing name + added_at. The description is folded
        into the Gemini primer turn so Gemini can reference it.
        """
        with self._lock:
            face_dir = self.root / f"face_{face_id}"
            if not face_dir.is_dir():
                raise FileNotFoundError(f"face_{face_id} not found")
            name, _, added = self._meta(face_dir)
            clean = (description or "").strip() or None
            self._write_meta(face_dir, name, clean, added_at=added)
            log.info("Set description for face_%d (%s)", face_id,
                     "cleared" if not clean else f"{len(clean)} chars")
    def delete_photo(self, face_id: int, photo_name: str) -> None:
        """Delete one photo. Refuses if it's the only remaining sample."""
        with self._lock:
            face_dir = self.root / f"face_{face_id}"
            if not face_dir.is_dir():
                raise FileNotFoundError(f"face_{face_id} not found")
            samples = self._samples_in(face_dir)
            if len(samples) <= 1:
                raise ValueError(
                    "Cannot delete the only photo — delete the face instead."
                )
            target = self.get_photo(face_id, photo_name)
            if target is None:
                raise FileNotFoundError(f"photo {photo_name} not found")
            target.unlink()
            log.info("Deleted %s from face_%d", photo_name, face_id)
    def delete_face(self, face_id: int) -> None:
        """Delete the entire face_{id}/ folder (including meta.json)."""
        import shutil
        with self._lock:
            face_dir = self.root / f"face_{face_id}"
            if not face_dir.is_dir():
                raise FileNotFoundError(f"face_{face_id} not found")
            shutil.rmtree(face_dir)
            log.info("Deleted face_%d", face_id)
    def zip_face(self, face_id: int) -> bytes:
        """Return the entire face_{id}/ folder packaged as a ZIP."""
        with self._lock:
            face_dir = self.root / f"face_{face_id}"
            if not face_dir.is_dir():
                raise FileNotFoundError(f"face_{face_id} not found")
            buf = io.BytesIO()
            with zipfile.ZipFile(buf, "w", compression=zipfile.ZIP_DEFLATED) as zf:
                for p in sorted(face_dir.iterdir()):
                    if p.is_file():
                        zf.write(p, arcname=f"face_{face_id}/{p.name}")
            return buf.getvalue()
    # ── primer support (used by gemini/script.py) ───────────
    def load_for_primer(
        self, max_samples_per_face: int = 3, resize_long_side: int = 256,
    ) -> list[tuple[FaceEntry, list[bytes]]]:
        """Return [(FaceEntry, [jpeg_bytes,…]), …] for Gemini upload.
        Resizes each sample to longest-side <= resize_long_side, re-encodes
        as JPEG (q=85) to keep the token cost manageable. Falls back to
        the raw bytes if PIL isn't available.
        """
        entries = self.list()
        if not entries:
            return []
        out: list[tuple[FaceEntry, list[bytes]]] = []
        for e in entries:
            paths = e.sample_paths[:max_samples_per_face]
            jpegs: list[bytes] = []
            for p in paths:
                try:
                    raw = p.read_bytes()
                except OSError:
                    continue
                processed = self._resize_for_primer(raw, resize_long_side)
                jpegs.append(processed or raw)
            if jpegs:
                out.append((e, jpegs))
        return out
    @staticmethod
    def _resize_for_primer(raw: bytes, long_side: int) -> bytes | None:
        """Resize image to longest-side ≤ long_side, re-encode JPEG q=85.
        Returns None on any failure (caller falls back to raw bytes).
        """
        try:
            from PIL import Image  # type: ignore
        except Exception:
            return None
        try:
            img = Image.open(io.BytesIO(raw))
            img.load()
            if img.mode not in ("RGB", "L"):
                img = img.convert("RGB")
            w, h = img.size
            scale = long_side / max(w, h) if max(w, h) > long_side else 1.0
            if scale < 1.0:
                img = img.resize(
                    (max(1, int(w * scale)), max(1, int(h * scale))),
                    Image.LANCZOS,
                )
            buf = io.BytesIO()
            img.save(buf, format="JPEG", quality=85, optimize=True)
            return buf.getvalue()
        except Exception:
            return None
--- a/vision/recognition_state.py
+++ b/vision/recognition_state.py
@ -0,0 +1,68 @@
 """Recognition state file — atomic JSON I/O shared by parent + child.
 The dashboard (parent process) writes this file on every toggle / face
 gallery change; the Gemini child (`gemini/script.py`) polls it at 1 Hz
 to flip its in-memory flags without a session restart.
 Format (data/.recognition_state.json):
    {
        "vision_enabled":    bool,
        "face_rec_enabled":  bool,
        "gallery_version":   int   # bumped on every face CRUD
    }
 """
 from __future__ import annotations
 import json
 import os
 import tempfile
 from dataclasses import asdict, dataclass
 from pathlib import Path
@dataclass
 class RecognitionState:
    vision_enabled: bool = False
    face_rec_enabled: bool = False
    gallery_version: int = 0
 def read(path: Path) -> RecognitionState:
    """Return the persisted state, or a default if missing/corrupt."""
    try:
        raw = json.loads(Path(path).read_text(encoding="utf-8"))
    except (FileNotFoundError, json.JSONDecodeError, OSError):
        return RecognitionState()
    return RecognitionState(
        vision_enabled=bool(raw.get("vision_enabled", False)),
        face_rec_enabled=bool(raw.get("face_rec_enabled", False)),
        gallery_version=int(raw.get("gallery_version", 0)),
    )
 def write(path: Path, state: RecognitionState) -> None:
    """Write atomically via tempfile + os.replace."""
    p = Path(path)
    p.parent.mkdir(parents=True, exist_ok=True)
    fd, tmp = tempfile.mkstemp(prefix=f".{p.name}.", suffix=".tmp", dir=str(p.parent))
    try:
        with os.fdopen(fd, "w", encoding="utf-8") as fh:
            json.dump(asdict(state), fh, ensure_ascii=False, indent=2)
        os.replace(tmp, p)
    except Exception:
        try:
            os.unlink(tmp)
        except OSError:
            pass
        raise
 def mutate(path: Path, **changes) -> RecognitionState:
    """Read-modify-write helper. Returns the new state."""
    cur = read(path)
    for k, v in changes.items():
        if hasattr(cur, k):
            setattr(cur, k, v)
    write(path, cur)
    return cur
		`@ -0,0 +1 @@`
							`"""Vision package — camera daemon + face gallery for Gemini-side recognition."""`