Update 2026-05-15 09:39:52

2026-05-15 09:39:53 +04:00 · 2026-05-15 09:39:53 +04:00 · 811a391932
commit 811a391932
parent edddb7e0c3
16 changed files with 2723 additions and 262 deletions
--- a/README.md
+++ b/README.md
@ -7,21 +7,44 @@ JSONL macros; everything is orchestrated by a FastAPI dashboard.
 ```
 ┌────────────────────────────────────────────────────────────────────┐
 │  Dashboard (FastAPI) ── http://<robot>:8000                        │
+│  ├─ Operations         Quick-fire arm actions                      │
 │  ├─ Voice & Audio      Live Gemini, Typed Replay, Wake Phrases     │
 │  ├─ Motion & Replay    SDK actions, JSONL replays, teaching mode   │
-│  ├─ Camera & Vision    (deprecated, UI kept for compat)            │
+│  ├─ Recognition        Camera vision + face gallery (Gemini-side)  │
 │  ├─ Recordings         Skills registry, saved Gemini turns         │
 │  └─ Settings & Logs    System info, tail live log                  │
 └────────────────────────────────────────────────────────────────────┘
        │
        ├─ voice/sanad_voice.py  (subprocess — Gemini Live audio loop)
+        ├─ gemini/script.py      (Gemini Live brain — audio + video + state)
        ├─ gemini/client.py      (short-session client for Typed Replay)
-        ├─ gemini/subprocess.py  (spawns+supervises sanad_voice.py)
+        ├─ gemini/subprocess.py  (spawns+supervises sanad_voice.py;
+        │                         pushes camera frames + motion state
+        │                         to the child over its stdin)
+        ├─ vision/camera.py      (RealSense/USB capture daemon)
+        ├─ vision/face_gallery.py (data/faces/ CRUD for the primer turn)
        ├─ motion/arm_controller.py  (G1 arm DDS publisher)
        ├─ voice/audio_io.py     (mic + speaker abstraction — 3 profiles)
        └─ core/brain.py         (skill dispatcher, event bus)
 ```

+### Camera + face recognition data flow
+
+```
+CameraDaemon (parent, in-memory JPEG+b64 cache)
+  ├─→ dashboard /api/recognition/frame.jpg   ── snapshot_jpeg()
+  └─→ GeminiSubprocess._frame_forwarder      ── get_frame_b64()
+                                                 │ "frame:<b64>\n" over stdin
+ArmController ─emit→ event bus ─→ main.py ─→ live_sub.send_state()
+                                                 │ "state:<json>\n" over stdin
+                                                 ▼
+                          gemini/script.py  _stdin_watcher thread
+                            ├─ frame: → _LATEST_FRAME → _send_frame_loop →
+                            │             session.send_realtime_input(video=Blob)
+                            └─ state: → _STATE_PENDING → _send_state_loop →
+                                          session.send_realtime_input(text=…)
+```
+

 ## Quick start (on the robot)

@ -42,13 +65,14 @@ Then open `http://<robot-ip>:8000` in a browser.
 | `config.py` | Runtime constants derived from `config/*_config.json`. |
 | `config/` | Per-subsystem JSON config: `core`, `voice`, `gemini`, `motion`, `dashboard`, `local`. |
 | `core/` | Brain, skill registry, event bus, config loader, logger. |
-| `gemini/` | Gemini Live — `client.py` (one-shot), `script.py` (live brain), `subprocess.py` (supervisor). |
+| `gemini/` | Gemini Live — `client.py` (one-shot), `script.py` (live brain: audio + video + motion-state), `subprocess.py` (supervisor + stdin frame/state push). |
 | `voice/` | `sanad_voice.py` (subprocess entry), `audio_io.py` (mic/speaker), `audio_manager.py`, `local_tts.py`, `live_voice_loop.py`, `typed_replay.py`, `wake_phrase_manager.py`, `text_utils.py`, `model_script.py` (brain template). |
+| `vision/` | `camera.py` (RealSense/USB capture daemon, auto-reconnect), `face_gallery.py` (`data/faces/` CRUD), `recognition_state.py` (toggle state file I/O). |
 | `local/` | Offline pipeline skeleton — Silero VAD, Whisper, Qwen (via Ollama), CosyVoice2. Opt-in via `SANAD_VOICE_BRAIN=local`. |
 | `motion/` | `arm_controller.py` (main), `sanad_arm_controller.py`, `macro_player.py`, `macro_recorder.py`, `teaching.py`. |
 | `dashboard/` | FastAPI routes (`dashboard/routes/*.py`) + static UI (`dashboard/static/index.html`). |
-| `scripts/` | Persona files — `sanad_script.txt` (voice persona), `sanad_rule.txt`, `sanad_arm.txt` (voice→arm phrases). |
-| `data/` | Runtime state — `audio/` (typed-replay WAVs), `motions/` (arm JSONL files), `recordings/` (live-captured turns), `motions/config.json` (dashboard-editable settings). |
+| `scripts/` | Persona files — `sanad_v2` (voice persona), `sanad_rule.txt`, `sanad_arm.txt` (voice→arm phrases). |
+| `data/` | Runtime state — `audio/` (typed-replay WAVs), `motions/` (arm JSONL files), `recordings/` (live-captured turns), `faces/face_{id}/` (enrolled face galleries), `.recognition_state.json` (vision/face-rec toggle state), `motions/config.json` (dashboard-editable settings). |
 | `model/` | Place for local SpeechT5 / CosyVoice2 weights when using offline pipeline. |
 | `logs/` | Per-module rotating logs. |

@ -64,6 +88,16 @@ Then open `http://<robot-ip>:8000` in a browser.
 | `SANAD_LIVE_SCRIPT` | path | auto | Override the subprocess entry script path. |
 | `SANAD_RECORD` | `0` or `1` | `1` | Record every Gemini turn to `data/recordings/`. |
 | `SANAD_AEC_ENABLE` | `0` or `1` | `1` | Enable WebRTC AEC3 (if the Python binding is installed). |
+| `SANAD_VISION_ENABLE` | `0` or `1` | `0` | Boot default for camera vision. **Runtime truth is the Recognition-tab toggle** → `data/.recognition_state.json`, hot-applied without a restart. |
+| `SANAD_FACE_RECOGNITION_ENABLE` | `0` or `1` | `0` | Boot default for Gemini-side face recognition. Also a hot toggle. |
+| `SANAD_VISION_SEND_HZ` | float | `2` | Frames/sec the Gemini child relays to Live. |
+| `SANAD_CAMERA_WIDTH` / `_HEIGHT` / `_FPS` | int | `424` / `240` / `15` | Capture profile. Also settable per-deploy in `config/core_config.json > camera`. |
+| `SANAD_FACES_MAX_SAMPLES` | int | `3` | Max photos per person fed into the gallery primer turn (token budget). |
+
+> All `SANAD_VISION_*` / `SANAD_CAMERA_*` / `SANAD_FACE_*` vars are **boot
+> defaults** forwarded to the Gemini child via `LIVE_TUNE`. Once running,
+> the Recognition tab's toggles are the live source of truth — they write
+> `data/.recognition_state.json`, which the child polls at 1 Hz.


 ## Dashboard features
@ -83,6 +117,25 @@ Quick-fire SDK + JSONL arm actions (chip buttons), gestural speaking toggle.
 - **Replay Manager** — upload `.jsonl` files, test-play with speed, Teaching Mode (kinesthetic record).
 - **Macro Recorder** — Record new audio+motion pair, OR pick any WAV + any motion (SDK or JSONL) and Play them in parallel.

+### Recognition
+Camera vision + Gemini-side face recognition. Both are **off by default**;
+each is a **hot toggle** — flipping it takes effect on the running Gemini
+session within ~1 s, no restart.
+
+- **Camera Vision** — when on, the `CameraDaemon` captures from a RealSense
+  (preferred) or USB camera and the supervisor streams JPEG frames to
+  Gemini Live so it can answer "what do you see?". Live preview panel.
+- **Face Recognition** — manage `data/faces/face_{id}/` galleries: enroll
+  from the live camera or upload photos, rename, download (per-photo or
+  ZIP), delete. On a session start (and on any gallery change) the child
+  sends a **primer turn** carrying every enrolled face + a Khaleeji
+  greeting instruction — Gemini itself does the matching in-context, so
+  there's **no local face-recognition model**. Recognition needs vision on.
+- **Sync Gallery** — force-resend the primer to the live session.
+
+The camera daemon auto-reconnects on USB unplug / stalled frames and warns
+if a RealSense negotiated USB 2.0 (Marcus-ported resilience).
+
 ### Recordings
 Skill Registry (predefined audio+motion skills from `skills.json`) + Saved Records (Gemini turn recordings).

@ -94,6 +147,48 @@ Skill Registry (predefined audio+motion skills from `skills.json`) + Saved Recor
 - **Supervisor contract**: each brain ships a sibling supervisor (e.g., `gemini/subprocess.py`) that spawns `sanad_voice.py` with its `SANAD_VOICE_BRAIN` env var and parses the brain's log markers. Template: `voice/model_subprocess.py`.
 - **Audio routing**: the G1's platform-sound PulseAudio sink is NOT wired to a physical speaker. All dashboard-triggered playback (`play_wav`, typed-replay audio, record playback) routes through DDS `AudioClient.PlayStream` via `audio_manager._play_pcm_via_g1`. The PyAudio path is kept as a desktop/dev fallback only.
 - **Arm replay**: `motion/arm_controller.py:_replay_file_inner()` is a verbatim port of `G1_Lootah/Manual_Recorder/g1_replay_v4_stable.py:Run()` — ramp-in → settle hold → playback → smooth return → disable SDK. Cancel breaks the play loop; `_return_home()` runs unconditionally afterwards for a jerk-free return.
+- **Camera frame transport (stdin push)**: the `CameraDaemon` lives in the parent and caches frames in memory. `GeminiSubprocess` runs a `_frame_forwarder` thread that base64-encodes the latest frame and writes `frame:<b64>\n` to the child's stdin (~2 fps). The child's `_stdin_watcher` thread decodes into `_LATEST_FRAME`; `_send_frame_loop` relays it to Gemini Live with a staleness guard. This is the Marcus pattern — chosen over a file drop so the parent owns the camera once and the dashboard preview reads the same in-memory cache.
+- **Motion-state channel**: `arm_controller._execute()` emits `motion.action_started` / `_done` / `_error` on the event bus. `main.py` forwards each to `live_sub.send_state()`, which writes `state:<json>\n` to the child's stdin. The child injects `[STATE-START] wave_hand`, `[STATE-DONE] wave_hand (2.3s)`, etc. into Gemini Live as silent text context (`send_realtime_input(text=…)`) so it can honestly answer "what are you doing?".
+- **Face recognition is Gemini-side**: no dlib/insightface/onnxruntime. `vision/face_gallery.py` is pure file IO over `data/faces/face_{id}/` (`face_N.jpg|png` samples + optional `meta.json` with a `name`). At session start (and on any gallery change) `gemini/script.py:_send_gallery_primer()` builds one multimodal `send_client_content` turn — every enrolled face's photos + a greeting instruction — and Gemini matches incoming frames against it in-context.
+
+
+## Camera vision on Jetson
+
+The Recognition tab needs `pyrealsense2` to talk to the Intel RealSense.
+**Do not `pip install pyrealsense2` on JetPack 5** — the PyPI wheel is built
+against glibc 2.32+ (Ubuntu 22.04) and fails to load on JetPack 5's glibc
+2.31 with `ImportError: ... version 'GLIBC_2.32' not found`.
+
+The native runtime is already there (`apt`-installed `librealsense2`). Build
+just the Python binding from source against it, into the `gemini_sdk` env:
+
+```bash
+rs-enumerate-devices            # confirm the D435I shows up at OS level first
+
+source ~/miniconda3/etc/profile.d/conda.sh && conda activate gemini_sdk
+pip uninstall -y pyrealsense2   # remove the broken wheel if present
+sudo apt install -y cmake build-essential git python3-dev libusb-1.0-0-dev pkg-config libssl-dev
+
+cd /tmp && rm -rf librealsense
+git clone --depth=1 --branch v2.56.5 https://github.com/IntelRealSense/librealsense.git
+cd librealsense && mkdir -p build && cd build
+cmake .. -DBUILD_PYTHON_BINDINGS=ON -DPYTHON_EXECUTABLE=$(which python3) \
+         -DBUILD_EXAMPLES=OFF -DBUILD_GRAPHICAL_EXAMPLES=OFF \
+         -DBUILD_UNIT_TESTS=OFF -DCHECK_FOR_UPDATES=OFF -DCMAKE_BUILD_TYPE=Release
+make -j$(nproc) pyrealsense2
+SITE=$(python3 -c "import sysconfig; print(sysconfig.get_paths()['purelib'])")
+mkdir -p "$SITE/pyrealsense2"
+cp wrappers/python/pyrealsense2*.so "$SITE/pyrealsense2/"
+cp ../wrappers/python/pyrealsense2/__init__.py "$SITE/pyrealsense2/" 2>/dev/null || true
+
+python3 -c 'import pyrealsense2 as rs; print([d.get_info(rs.camera_info.name) for d in rs.context().query_devices()])'
+```
+
+Match the `--branch` tag to the installed runtime (`dpkg -l | grep librealsense2`).
+If the build isn't worth it, `CameraDaemon` falls back to `cv2.VideoCapture(0)`
+automatically — fine for a plain USB webcam, but note a RealSense exposes its
+*depth* stream at `/dev/video0`, not RGB, so a real USB cam is the cleaner
+fallback. On x86_64 / Ubuntu 22.04+ desktops, `pip install pyrealsense2` just works.


 ## Dynamic paths
@ -133,7 +228,10 @@ Then on the robot: `Ctrl+C` the running `main.py` and re-run.
 | Record playback silent | `audio_mgr.play_wav` only routes to G1 DDS if the Unitree SDK is importable; on desktop it falls back to the PulseAudio sink. |
 | Live Voice Commands transcript stuck | Deferred trigger was queued but `trigger_enabled` toggle was off. Toggle on — or the pending-trigger poll now fires it automatically once enabled. |
 | Gemini "no audio" on Typed Replay | Non-deterministic; the retry chain in `voice/typed_replay.py:generate_audio` tries three prompt variants. For reliable TTS, use the offline `local_tts` SpeechT5 path. |
-| Dashboard `Not Found` 404s for `/api/vision/*` | Vision module was deleted; HTML still has stale fetches for a few endpoints. Cosmetic — `dashboard/static/index.html` init block already skips most. |
+| Recognition tab: "Camera could not start (no backend)" | No camera backend acquired. Check `rs-enumerate-devices` (RealSense at OS level) and `python3 -c 'import pyrealsense2'` in the `gemini_sdk` env. The glibc `ImportError` means the pip wheel is incompatible — see "Camera vision on Jetson" above. |
+| Camera badge stuck on "reconnecting…" | `CameraDaemon` lost the device and is retrying with exponential backoff. Re-seat the USB 3 cable; check `logs/camera.log` for the USB-2.0 warning. |
+| Gemini doesn't greet an enrolled face | Face Recognition toggle on? Vision on? (Face rec needs frames.) Check `logs/gemini_brain.log` for `face gallery primed: N person(s)`. Hit "Sync Gallery" to force a re-prime. |
+| Gemini unaware of motion state | The `motion.action_*` → `send_state` chain only runs when Live Gemini is up. Check `logs/gemini_subprocess.log` and `logs/gemini_brain.log` for `STATE injected:` lines. |


 ## License / attribution
@ -142,4 +240,7 @@ Internal project for YS Lootah Technology. Reuses/ports patterns from:
 - `G1_Lootah/Manual_Recorder/g1_replay_v4_stable.py` (arm replay math)
 - `SanadVoice/gemini_interact` (arm-phrase dispatch, skill registry)
 - `SanadVoice/gemini_voice_v2` (local SpeechT5 TTS)
+- `Project/Marcus` — camera→Gemini stdin-push transport, motion-state
+  injection, camera daemon resilience (auto-reconnect, USB-2.0 warning),
+  and the `API/camera_api.py` cache shape (`get_frame_b64` / `get_fresh_frame`).
 - Unitree `unitree_sdk2py` (G1 low-level SDK, LocoClient, G1ArmActionClient)
--- a/config.py
+++ b/config.py
@ -341,6 +341,24 @@ LIVE_TUNE: dict[str, str] = {
    # G1 built-in mic — UDP multicast 239.168.123.161:5555.
    # Requires wake-up conversation mode ON in Unitree app.
    "SANAD_USE_G1_MIC": "1",
+
+    # ── Recognition (camera vision + face recognition) ──
+    # All of these are BOOT defaults. The runtime source of truth is the
+    # state file data/.recognition_state.json — toggled live from the
+    # Recognition tab and polled by the Gemini child at 1 Hz.
+    "SANAD_VISION_ENABLE":            "0",
+    "SANAD_VISION_SEND_HZ":           "2",
+    "SANAD_VISION_STALE_MS":          "1500",
+    "SANAD_CAMERA_WIDTH":             "424",
+    "SANAD_CAMERA_HEIGHT":            "240",
+    "SANAD_CAMERA_FPS":               "15",
+    "SANAD_CAMERA_JPEG_QUALITY":      "70",
+    "SANAD_FACE_RECOGNITION_ENABLE":  "0",
+    "SANAD_FACES_DIR":                str(DATA_DIR / "faces"),
+    "SANAD_FACES_MAX_SAMPLES":        "3",
+    "SANAD_FACES_PRIMER_RESIZE":      "256",
+    "SANAD_RECOGNITION_STATE_PATH":   str(DATA_DIR / ".recognition_state.json"),
+    "SANAD_RECOGNITION_POLL_S":       "1.0",
 }

 # -- Camera --
--- a/config/core_config.json
+++ b/config/core_config.json
@ -76,5 +76,26 @@

  "dds": {
    "network_interface_default": "eth0"
+  },
+
+  "camera": {
+    "_comment": "Recognition tab camera daemon (parent process reads this). width/height/fps/jpeg_quality + the reconnect knobs configure CameraDaemon. Frames are cached in memory and pushed to the Gemini child over its stdin (no file drop). send_hz/stale_ms are read by the Gemini child via SANAD_VISION_SEND_HZ / SANAD_VISION_STALE_MS env vars (LIVE_TUNE).",
+    "width": 424,
+    "height": 240,
+    "fps": 15,
+    "jpeg_quality": 70,
+    "send_hz": 2,
+    "stale_ms": 1500,
+    "stale_threshold_s": 10.0,
+    "reconnect_min_s": 2.0,
+    "reconnect_max_s": 10.0,
+    "capture_timeout_ms": 5000
+  },
+
+  "faces": {
+    "_comment": "Face gallery for Gemini-side recognition. Folder layout: data/faces/face_{id}/{face_1.jpg, ...} + optional meta.json {\"name\": \"...\"}. Gemini does the matching — no local ML model.",
+    "dir_rel": "data/faces",
+    "max_samples_per_face": 3,
+    "primer_resize_long_side": 256
  }
 }
--- a/config/gemini_config.json
+++ b/config/gemini_config.json
@ -10,12 +10,13 @@
  },

  "subprocess": {
-    "_comment": "gemini/subprocess.py — GeminiSubprocess supervisor. Spawns voice/sanad_voice.py as a child, tails stdout for Gemini-specific log markers, exposes transcript + state to the dashboard.",
+    "_comment": "gemini/subprocess.py — GeminiSubprocess supervisor. Spawns voice/sanad_voice.py as a child, tails stdout for Gemini-specific log markers, pushes camera frames + motion state to the child over its stdin, exposes transcript + state to the dashboard.",
    "log_tail_size": 2000,
    "transcript_tail_size": 30,
    "log_name": "gemini_subprocess",
    "stop_timeout_sec": 3.0,
    "terminate_timeout_sec": 2.0,
+    "frame_forward_interval_sec": 0.5,
    "noisy_prefixes": [
      "ALSA lib ",
      "Expression 'alsa_",
--- a/dashboard/app.py
+++ b/dashboard/app.py
@ -50,6 +50,7 @@ _REST_ROUTES: list[tuple[str, str, str]] = [
    ("live_voice",      "/api/live-voice",      "live-voice"),
    ("live_subprocess", "/api/live-subprocess", "live-subprocess"),
    ("typed_replay",    "/api/typed-replay",    "typed-replay"),
+    ("recognition",     "/api/recognition",     "recognition"),
 ]

 _WS_ROUTES: list[str] = ["log_stream"]
--- a/dashboard/routes/recognition.py
+++ b/dashboard/routes/recognition.py
@ -0,0 +1,457 @@
+"""Recognition tab — camera vision + face gallery + hot toggles.
+
+Single router covering:
+  - Vision / Face Recognition toggles (hot — no Gemini restart needed)
+  - Live camera preview (latest JPEG drop)
+  - Face gallery CRUD: enroll, upload, capture, rename, delete, ZIP
+  - Per-photo download + delete
+
+Toggle changes write data/.recognition_state.json atomically. The Gemini
+child polls that file at 1 Hz and applies changes mid-session.
+"""
+
+from __future__ import annotations
+
+import io
+from typing import Optional
+
+from fastapi import APIRouter, File, HTTPException, Query, UploadFile
+from fastapi.responses import FileResponse, Response, StreamingResponse
+from pydantic import BaseModel
+
+from Project.Sanad.config import BASE_DIR
+from Project.Sanad.core.logger import get_logger
+from Project.Sanad.dashboard.routes._safe_io import check_upload_size
+from Project.Sanad.vision import recognition_state
+
+log = get_logger("recognition_routes")
+
+router = APIRouter()
+
+
+# ── paths (resolved from BASE_DIR) ──────────────────────────
+
+STATE_PATH = BASE_DIR / "data" / ".recognition_state.json"
+FACES_DIR = BASE_DIR / "data" / "faces"
+
+ALLOWED_IMAGE_EXTS = {".jpg", ".jpeg", ".png"}
+
+
+# ── helpers ─────────────────────────────────────────────────
+
+def _get_camera():
+    """Lazy import to avoid circular import on dashboard load."""
+    try:
+        from Project.Sanad.main import camera  # type: ignore
+        return camera
+    except Exception:
+        return None
+
+
+def _get_gallery():
+    """Lazy import — same reason."""
+    try:
+        from Project.Sanad.main import gallery  # type: ignore
+        return gallery
+    except Exception:
+        return None
+
+
+def _bump_and_write_state(**changes) -> recognition_state.RecognitionState:
+    """Apply changes (vision_enabled, face_rec_enabled) and persist."""
+    return recognition_state.mutate(STATE_PATH, **changes)
+
+
+def _bump_gallery_version() -> int:
+    cur = recognition_state.read(STATE_PATH)
+    new_version = cur.gallery_version + 1
+    recognition_state.mutate(STATE_PATH, gallery_version=new_version)
+    return new_version
+
+
+# ── state + toggles ─────────────────────────────────────────
+
+@router.get("/state")
+async def get_state():
+    """Return the current toggle/camera/gallery state."""
+    st = recognition_state.read(STATE_PATH)
+    cam = _get_camera()
+    gallery = _get_gallery()
+    faces_count = 0
+    photos_count = 0
+    if gallery is not None:
+        try:
+            entries = gallery.list()
+            faces_count = len(entries)
+            photos_count = sum(len(e.sample_paths) for e in entries)
+        except Exception:
+            pass
+    return {
+        "vision_enabled": st.vision_enabled,
+        "face_rec_enabled": st.face_rec_enabled,
+        "gallery_version": st.gallery_version,
+        "camera": cam.status() if cam is not None else {
+            "running": False, "backend": None, "error": "camera subsystem unavailable"
+        },
+        "faces_count": faces_count,
+        "photos_count": photos_count,
+    }
+
+
+@router.post("/vision")
+async def set_vision(on: bool = Query(...)):
+    """Enable / disable camera vision (hot — no Gemini restart)."""
+    cam = _get_camera()
+    if cam is None:
+        log.warning("vision toggle requested but camera subsystem unavailable")
+        raise HTTPException(503, "Camera subsystem not available.")
+
+    if on and not cam.is_running():
+        ok = cam.start()
+        if not ok:
+            log.warning("vision ON requested but camera.start() failed: %s",
+                        cam.error or "no backend")
+            _bump_and_write_state(vision_enabled=False)
+            raise HTTPException(503,
+                                f"Camera could not start (no backend). {cam.error or ''}")
+    elif (not on) and cam.is_running():
+        cam.stop()
+
+    st = _bump_and_write_state(vision_enabled=bool(on))
+    log.info("vision %s (backend=%s)", "ON" if on else "OFF",
+             cam.backend if cam.is_running() else "none")
+    return {"ok": True, "vision_enabled": st.vision_enabled,
+            "camera": cam.status()}
+
+
+@router.post("/face-rec")
+async def set_face_rec(on: bool = Query(...)):
+    """Enable / disable face recognition (hot — no Gemini restart).
+
+    The Gemini child picks the change up within ~1 s: ON re-sends the
+    gallery primer and tells Gemini it can recognise people; OFF tells
+    Gemini to disregard the gallery and stop identifying anyone. Both
+    take effect on the live session — no reconnect needed.
+    """
+    st = _bump_and_write_state(face_rec_enabled=bool(on))
+    log.info("face recognition %s", "ON" if on else "OFF")
+    return {"ok": True, "face_rec_enabled": st.face_rec_enabled}
+
+
+@router.post("/sync")
+async def sync_gallery():
+    """Bump gallery_version so the child re-sends the primer if face-rec is ON."""
+    v = _bump_gallery_version()
+    log.info("gallery sync requested → v.%d", v)
+    return {"ok": True, "gallery_version": v}
+
+
+# ── live preview ────────────────────────────────────────────
+
+@router.get("/frame.jpg")
+async def latest_frame():
+    """Serve the most recent camera frame straight from the daemon's
+    in-memory cache (no file drop — frames are also pushed to the Gemini
+    child over its stdin)."""
+    cam = _get_camera()
+    if cam is None:
+        raise HTTPException(503, "Camera subsystem unavailable.")
+    jpeg = cam.snapshot_jpeg()
+    if not jpeg:
+        raise HTTPException(404, "No frame captured yet.")
+    return Response(
+        content=jpeg,
+        media_type="image/jpeg",
+        headers={"Cache-Control": "no-store, must-revalidate"},
+    )
+
+
+# ── camera resolution / quality ─────────────────────────────
+
+class CameraConfigPayload(BaseModel):
+    width: Optional[int] = None
+    height: Optional[int] = None
+    fps: Optional[int] = None
+    jpeg_quality: Optional[int] = None
+
+
+@router.post("/camera-config")
+async def set_camera_config(payload: CameraConfigPayload):
+    """Hot-swap the camera capture profile (resolution / fps / JPEG quality).
+
+    If the camera is running, CameraDaemon.reconfigure() rebuilds the
+    pipeline at the new profile (~0.5 s gap). If idle, the values just
+    take effect on the next start. Bounds are sanity-checked here so a
+    fat-fingered value can't wedge the daemon."""
+    cam = _get_camera()
+    if cam is None:
+        raise HTTPException(503, "Camera subsystem unavailable.")
+    if payload.width is not None and not (160 <= payload.width <= 1920):
+        raise HTTPException(400, "width out of range (160–1920)")
+    if payload.height is not None and not (120 <= payload.height <= 1080):
+        raise HTTPException(400, "height out of range (120–1080)")
+    if payload.fps is not None and not (1 <= payload.fps <= 60):
+        raise HTTPException(400, "fps out of range (1–60)")
+    if payload.jpeg_quality is not None and not (10 <= payload.jpeg_quality <= 95):
+        raise HTTPException(400, "jpeg_quality out of range (10–95)")
+    profile = cam.reconfigure(
+        width=payload.width, height=payload.height,
+        fps=payload.fps, jpeg_quality=payload.jpeg_quality,
+    )
+    log.info("camera reconfigured via dashboard → %s", profile)
+    return {"ok": True, "profile": profile, "camera": cam.status()}
+
+
+# ── face gallery routes ─────────────────────────────────────
+
+def _validate_image(content: bytes, filename: str | None = None) -> None:
+    """Reject non-JPEG/PNG content + oversize uploads."""
+    check_upload_size(content)
+    if len(content) < 16:
+        raise HTTPException(400, "Image too small / empty.")
+    is_jpeg = content[:3] == b"\xff\xd8\xff"
+    is_png = content[:8] == b"\x89PNG\r\n\x1a\n"
+    if not (is_jpeg or is_png):
+        raise HTTPException(
+            400,
+            f"Only JPEG/PNG accepted (got {filename or 'unknown'}).",
+        )
+
+
+def _entry_to_dict(entry) -> dict:
+    photos = []
+    for p in entry.sample_paths:
+        try:
+            photos.append({"name": p.name, "size_bytes": p.stat().st_size})
+        except OSError:
+            continue
+    return {
+        "id": entry.id,
+        "name": entry.name,
+        "description": entry.description,
+        "added_at": entry.added_at,
+        "photos": photos,
+    }
+
+
+@router.get("/faces")
+async def list_faces():
+    gallery = _get_gallery()
+    if gallery is None:
+        raise HTTPException(503, "Face gallery subsystem unavailable.")
+    entries = gallery.list()
+    return {"faces": [_entry_to_dict(e) for e in entries],
+            "total": len(entries)}
+
+
+class RenamePayload(BaseModel):
+    name: Optional[str] = None
+
+
+class DescribePayload(BaseModel):
+    description: Optional[str] = None
+
+
+@router.post("/faces/enroll")
+async def enroll_from_camera(name: Optional[str] = Query(default=None),
+                             description: Optional[str] = Query(default=None)):
+    """Create a new face from the camera's latest snapshot."""
+    gallery = _get_gallery()
+    if gallery is None:
+        raise HTTPException(503, "Face gallery subsystem unavailable.")
+    cam = _get_camera()
+    if cam is None or not cam.is_running():
+        raise HTTPException(409, "Camera is not running. Toggle Vision ON first.")
+    # get_fresh_frame waits briefly for a current frame so the enrolled
+    # photo is the scene the user is posing for, not a stale buffer.
+    jpeg = cam.get_fresh_frame(max_age_s=0.5, timeout_s=1.5)
+    if not jpeg:
+        raise HTTPException(409, "Camera has no frame yet. Wait a moment and retry.")
+    entry = gallery.create_face(
+        [jpeg],
+        name=name.strip() if name else None,
+        description=description.strip() if description else None,
+    )
+    v = _bump_gallery_version()
+    log.info("enrolled face_%d via camera (name=%s, desc=%s, v.%d)",
+             entry.id, name or "(unnamed)",
+             "yes" if description else "no", v)
+    return {"ok": True, "face": _entry_to_dict(entry)}
+
+
+@router.post("/faces/upload")
+async def enroll_from_upload(
+    files: list[UploadFile] = File(...),
+    name: Optional[str] = Query(default=None),
+    description: Optional[str] = Query(default=None),
+):
+    """Create a new face from uploaded image file(s)."""
+    gallery = _get_gallery()
+    if gallery is None:
+        raise HTTPException(503, "Face gallery subsystem unavailable.")
+    if not files:
+        raise HTTPException(400, "At least one image file required.")
+    image_bytes: list[bytes] = []
+    for f in files:
+        content = await f.read()
+        _validate_image(content, f.filename)
+        image_bytes.append(content)
+    entry = gallery.create_face(
+        image_bytes,
+        name=name.strip() if name else None,
+        description=description.strip() if description else None,
+    )
+    v = _bump_gallery_version()
+    log.info("enrolled face_%d via upload (%d photos, name=%s, desc=%s, v.%d)",
+             entry.id, len(image_bytes), name or "(unnamed)",
+             "yes" if description else "no", v)
+    return {"ok": True, "face": _entry_to_dict(entry)}
+
+
+@router.post("/faces/{face_id}/capture")
+async def capture_to_face(face_id: int):
+    """Add a new sample (from the camera) to an existing face."""
+    gallery = _get_gallery()
+    if gallery is None:
+        raise HTTPException(503, "Face gallery subsystem unavailable.")
+    cam = _get_camera()
+    if cam is None or not cam.is_running():
+        raise HTTPException(409, "Camera is not running. Toggle Vision ON first.")
+    jpeg = cam.get_fresh_frame(max_age_s=0.5, timeout_s=1.5)
+    if not jpeg:
+        raise HTTPException(409, "Camera has no frame yet.")
+    try:
+        fname = gallery.add_photo(face_id, jpeg)
+    except FileNotFoundError as exc:
+        raise HTTPException(404, str(exc))
+    v = _bump_gallery_version()
+    log.info("captured new photo for face_%d → %s (v.%d)", face_id, fname, v)
+    return {"ok": True, "added": fname, "face": _entry_to_dict(gallery.get(face_id))}
+
+
+@router.post("/faces/{face_id}/upload")
+async def upload_to_face(face_id: int, files: list[UploadFile] = File(...)):
+    """Add one or more uploaded samples to an existing face."""
+    gallery = _get_gallery()
+    if gallery is None:
+        raise HTTPException(503, "Face gallery subsystem unavailable.")
+    if gallery.get(face_id) is None:
+        raise HTTPException(404, f"face_{face_id} not found")
+    added: list[str] = []
+    for f in files:
+        content = await f.read()
+        _validate_image(content, f.filename)
+        try:
+            fname = gallery.add_photo(face_id, content)
+            added.append(fname)
+        except FileNotFoundError as exc:
+            raise HTTPException(404, str(exc))
+    v = _bump_gallery_version()
+    log.info("uploaded %d photo(s) to face_%d (v.%d)", len(added), face_id, v)
+    return {"ok": True, "added": added,
+            "face": _entry_to_dict(gallery.get(face_id))}
+
+
+@router.post("/faces/{face_id}/rename")
+async def rename_face(face_id: int, payload: RenamePayload):
+    gallery = _get_gallery()
+    if gallery is None:
+        raise HTTPException(503, "Face gallery subsystem unavailable.")
+    try:
+        gallery.rename(face_id, payload.name)
+    except FileNotFoundError as exc:
+        raise HTTPException(404, str(exc))
+    v = _bump_gallery_version()
+    log.info("renamed face_%d → %s (v.%d)", face_id,
+             payload.name or "(unnamed)", v)
+    return {"ok": True, "face": _entry_to_dict(gallery.get(face_id))}
+
+
+@router.post("/faces/{face_id}/describe")
+async def describe_face(face_id: int, payload: DescribePayload):
+    """Set / clear a face's free-text description. The description is
+    folded into the Gemini primer turn so Gemini can reference it."""
+    gallery = _get_gallery()
+    if gallery is None:
+        raise HTTPException(503, "Face gallery subsystem unavailable.")
+    try:
+        gallery.set_description(face_id, payload.description)
+    except FileNotFoundError as exc:
+        raise HTTPException(404, str(exc))
+    v = _bump_gallery_version()
+    log.info("described face_%d (%s, v.%d)", face_id,
+             "set" if payload.description else "cleared", v)
+    return {"ok": True, "face": _entry_to_dict(gallery.get(face_id))}
+
+
+@router.delete("/faces/{face_id}")
+async def delete_face(face_id: int):
+    gallery = _get_gallery()
+    if gallery is None:
+        raise HTTPException(503, "Face gallery subsystem unavailable.")
+    try:
+        gallery.delete_face(face_id)
+    except FileNotFoundError as exc:
+        raise HTTPException(404, str(exc))
+    v = _bump_gallery_version()
+    log.info("deleted face_%d (v.%d)", face_id, v)
+    return {"ok": True, "deleted": face_id}
+
+
+@router.delete("/faces/{face_id}/photo/{photo_name}")
+async def delete_photo(face_id: int, photo_name: str):
+    gallery = _get_gallery()
+    if gallery is None:
+        raise HTTPException(503, "Face gallery subsystem unavailable.")
+    # safe filename — only allow simple file names, no traversal
+    if "/" in photo_name or ".." in photo_name or "\x00" in photo_name:
+        raise HTTPException(400, "Invalid photo name.")
+    try:
+        gallery.delete_photo(face_id, photo_name)
+    except FileNotFoundError as exc:
+        raise HTTPException(404, str(exc))
+    except ValueError as exc:
+        raise HTTPException(400, str(exc))
+    v = _bump_gallery_version()
+    log.info("deleted photo %s from face_%d (v.%d)", photo_name, face_id, v)
+    return {"ok": True, "deleted": photo_name}
+
+
+@router.get("/faces/{face_id}/photo/{photo_name}")
+async def get_photo(face_id: int, photo_name: str,
+                    download: int = Query(default=0)):
+    """Serve a single photo. Add ?download=1 for attachment disposition."""
+    gallery = _get_gallery()
+    if gallery is None:
+        raise HTTPException(503, "Face gallery subsystem unavailable.")
+    if "/" in photo_name or ".." in photo_name or "\x00" in photo_name:
+        raise HTTPException(400, "Invalid photo name.")
+    path = gallery.get_photo(face_id, photo_name)
+    if path is None:
+        raise HTTPException(404, "Photo not found.")
+    media = "image/png" if path.suffix.lower() == ".png" else "image/jpeg"
+    headers = {}
+    if download:
+        headers["Content-Disposition"] = (
+            f'attachment; filename="face_{face_id}_{photo_name}"'
+        )
+    return FileResponse(path, media_type=media, headers=headers)
+
+
+@router.get("/faces/{face_id}/download.zip")
+async def download_face_zip(face_id: int):
+    gallery = _get_gallery()
+    if gallery is None:
+        raise HTTPException(503, "Face gallery subsystem unavailable.")
+    try:
+        data = gallery.zip_face(face_id)
+    except FileNotFoundError as exc:
+        raise HTTPException(404, str(exc))
+    return StreamingResponse(
+        io.BytesIO(data),
+        media_type="application/zip",
+        headers={
+            "Content-Disposition": f'attachment; filename="face_{face_id}.zip"',
+            "Content-Length": str(len(data)),
+        },
+    )
--- a/dashboard/static/index.html
+++ b/dashboard/static/index.html
@ -123,7 +123,7 @@
  <div class="tab active" onclick="switchTab('operations')">Operations</div>
  <div class="tab" onclick="switchTab('voice')">Voice & Audio</div>
  <div class="tab" onclick="switchTab('motion')">Motion & Replay</div>
-  <div class="tab" onclick="switchTab('vision')">Camera & Vision</div>
+  <div class="tab" onclick="switchTab('recognition')">Recognition</div>
  <div class="tab" onclick="switchTab('recordings')">Recordings</div>
  <div class="tab" onclick="switchTab('settings')">Settings & Logs</div>
 </div>
@ -211,23 +211,6 @@
    <div style="margin-top:.5rem;font-size:.72rem;color:var(--dim)" id="audio-status-text"></div>
  </div>

-  <!-- Live Camera Feed -->
-  <div class="card">
-    <h3><svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><path d="M23 7l-7 5 7 5V7z"/><rect x="1" y="5" width="15" height="14" rx="2"/></svg>Live Camera</h3>
-    <canvas id="camera-feed" width="640" height="480" style="width:100%;max-height:260px;border-radius:8px;background:#000"></canvas>
-    <div class="row" style="margin-top:.4rem">
-      <button class="btn btn-success btn-sm" onclick="startLocalCam(this)">Start</button>
-      <button class="btn btn-danger btn-sm" onclick="stopLocalCam(this)">Stop</button>
-      <button class="btn btn-ghost btn-sm" onclick="reconnectCamera()">Reconnect</button>
-      <span id="ops-cam-state" class="badge" style="margin-left:.3rem"></span>
-    </div>
-    <div class="row" style="margin-top:.3rem">
-      <button class="btn btn-primary" onclick="capturePhoto(this)">Capture</button>
-      <select id="capture-gesture" style="width:130px"><option value="">No gesture</option></select>
-      <button class="btn btn-success" onclick="captureWithMotion(this)">Capture + Gesture</button>
-    </div>
-  </div>
-
  <!-- Motion Quick Panel -->
  <div class="card">
    <h3><svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><path d="M18 8A6 6 0 0 0 6 8c0 7-3 9-3 9h18s-3-2-3-9"/><path d="M13.73 21a2 2 0 0 1-3.46 0"/></svg>Quick Actions</h3>
@ -288,6 +271,7 @@
    <div class="row">
      <button class="btn btn-success" onclick="startLiveSub(this)">Start</button>
      <button class="btn btn-danger" onclick="stopLiveSub(this)">Stop</button>
+      <button id="ls-cam-btn" class="btn btn-sm btn-ghost" onclick="toggleGeminiCamera(this)" title="Stream camera frames to Gemini Live — same toggle as the Recognition tab">Camera: --</button>
      <span id="ls-state" class="badge"></span>
      <button class="btn btn-sm mic-mute-shortcut btn-success" onclick="toggleMic()" style="margin-left:auto">Mic: LIVE</button>
      <button class="btn btn-sm spk-mute-shortcut btn-success" onclick="toggleSpeaker()">Speaker: LIVE</button>
@ -470,78 +454,102 @@
 </div>
 </div>

-<!-- ==================== TAB: Camera & Vision ==================== -->
-<div class="tab-content" id="tab-vision">
+<!-- ==================== TAB: Recognition ==================== -->
+<div class="tab-content" id="tab-recognition">
 <div class="grid">

-  <!-- Camera Devices -->
-  <div class="card">
-    <h3>Camera Device</h3>
-    <div class="row">
-      <label>Profile</label>
-      <select id="camdev-profile" style="flex:1" onchange="selectCamProfile(this.value)">
-        <option value="">Loading...</option>
-      </select>
-      <button class="btn btn-ghost btn-sm" onclick="scanCameras(this)" title="Re-scan plugged cameras">Scan</button>
-    </div>
-    <div id="camdev-detected" style="margin-top:.3rem;font-size:.65rem;color:var(--dim)"></div>
-    <details style="margin-top:.5rem">
-      <summary style="cursor:pointer;font-size:.72rem;color:var(--dim)">All plugged cameras</summary>
-      <div id="camdev-list" style="margin-top:.3rem;font-size:.7rem"></div>
-    </details>
-    <details style="margin-top:.4rem">
-      <summary style="cursor:pointer;font-size:.72rem;color:var(--dim)">Pin RealSense serial to slot</summary>
-      <div class="row" style="margin-top:.3rem">
-        <select id="camdev-pin-profile" style="flex:1"></select>
-        <input id="camdev-pin-serial" placeholder="Serial..." style="flex:1">
-        <button class="btn btn-primary btn-sm" onclick="pinCamSerial(this)">Pin</button>
-      </div>
-      <div style="font-size:.65rem;color:var(--dim);margin-top:.2rem">Use this when you have two RealSense units and want to lock which one is "primary".</div>
-    </details>
-    <div class="row" style="margin-top:.5rem">
-      <button class="btn btn-success btn-sm" onclick="startLocalCam(this)">Start Capture</button>
-      <button class="btn btn-danger btn-sm" onclick="stopLocalCam(this)">Stop Capture</button>
-      <span id="local-cam-state" class="badge"></span>
-    </div>
-    <div style="margin-top:.5rem;font-size:.72rem;color:var(--dim)" id="camdev-status"></div>
-  </div>
-
-  <!-- Camera Config (legacy) -->
-  <div class="card">
-    <h3>Camera Configuration</h3>
-    <div class="row"><label>Source</label><select id="cam-source" style="flex:1" onchange="setCamSource(this.value)"></select></div>
-    <div class="row">
-      <label>Resolution</label>
-      <input id="cam-w" type="number" value="640" style="width:60px"> <span style="color:var(--dim)">x</span>
-      <input id="cam-h" type="number" value="480" style="width:60px"> <span style="color:var(--dim)">@</span>
-      <input id="cam-fps" type="number" value="30" style="width:45px"> <span style="color:var(--dim)">fps</span>
-      <button class="btn btn-primary btn-sm" onclick="setCamRes()">Set</button>
-    </div>
-    <div class="row"><label>RealSense</label><input id="cam-serial" placeholder="Serial number" style="flex:1"><button class="btn btn-ghost btn-sm" onclick="setPreferredCam()">Save</button></div>
-    <div style="margin-top:.4rem"><a href="/api/vision/preview.mjpg" target="_blank" style="font-size:.72rem;color:var(--accent);text-decoration:none">Open MJPEG Stream in new tab</a></div>
-  </div>
-
-  <!-- YOLO Detector -->
-  <div class="card">
-    <h3>YOLO Vision Detector</h3>
-    <div class="row">
-      <button class="btn btn-primary" onclick="loadDetector(this)">Load Model</button>
-      <button class="btn btn-success" onclick="runDetection(this)">Detect Now</button>
-      <span id="yolo-status" class="badge"></span>
-    </div>
-    <div id="yolo-result" style="margin-top:.4rem;font-size:.72rem;color:var(--muted)"></div>
-  </div>
-
-  <!-- Photo Gallery -->
+  <!-- Status & Toggles -->
  <div class="card card-full">
-    <h3>Photo Gallery</h3>
-    <div class="row">
-      <button class="btn btn-ghost btn-sm" onclick="refreshPhotos()">Refresh</button>
-      <button class="btn btn-primary btn-sm" onclick="downloadAllPhotos()">Download ZIP</button>
-      <button class="btn btn-danger btn-sm" onclick="clearPhotos()">Clear All</button>
-      <span id="photo-count" style="margin-left:auto;font-size:.72rem;color:var(--muted)"></span>
+    <h3>Camera Vision &amp; Face Recognition</h3>
+    <div class="row" style="gap:1rem;flex-wrap:wrap">
+      <div class="row" style="gap:.4rem">
+        <label style="min-width:7rem">Camera Vision</label>
+        <label class="switch">
+          <input type="checkbox" id="rec-vision-toggle" onchange="setVisionEnabled(this.checked)">
+          <span class="slider"></span>
+        </label>
+        <span id="rec-camera-status" class="badge" style="margin-left:.5rem">--</span>
+      </div>
+      <div class="row" style="gap:.4rem">
+        <label style="min-width:7rem">Face Recognition</label>
+        <label class="switch">
+          <input type="checkbox" id="rec-facerec-toggle" onchange="setFaceRecEnabled(this.checked)">
+          <span class="slider"></span>
+        </label>
+        <span id="rec-facerec-status" class="badge" style="margin-left:.5rem">--</span>
+      </div>
+      <button class="btn btn-ghost btn-sm" onclick="syncGallery(this)" title="Re-send gallery to live Gemini session">↻ Sync Gallery</button>
    </div>
-    <div class="gallery-grid" id="photo-gallery"><div class="empty">No photos yet</div></div>
+    <div style="margin-top:.4rem;font-size:.7rem;color:var(--dim)" id="rec-status-line">
+      Toggles take effect within ~1 second on the running Gemini session — no restart required.
+    </div>
+  </div>
+
+  <!-- Live Preview -->
+  <div class="card">
+    <h3>Live Preview</h3>
+    <div id="rec-preview-wrap" style="background:#000;border-radius:.4rem;overflow:hidden;text-align:center;min-height:180px;display:flex;align-items:center;justify-content:center">
+      <img id="rec-preview-img" src="" alt="" style="max-width:100%;display:none">
+      <div id="rec-preview-empty" style="color:var(--dim);font-size:.75rem;padding:1rem">Camera off — toggle Vision ON to see the live feed.</div>
+    </div>
+    <div style="margin-top:.3rem;font-size:.65rem;color:var(--dim)" id="rec-preview-meta">--</div>
+    <div style="margin-top:.45rem">
+      <div style="font-size:.7rem;color:var(--dim);margin-bottom:.2rem">Resolution / FPS</div>
+      <div class="row" style="gap:.25rem;flex-wrap:wrap" id="rec-res-buttons">
+        <button class="btn btn-ghost btn-sm" data-w="424" data-h="240" data-fps="15" onclick="setCameraMode(this)">424×240 · 15</button>
+        <button class="btn btn-ghost btn-sm" data-w="424" data-h="240" data-fps="30" onclick="setCameraMode(this)">424×240 · 30</button>
+        <button class="btn btn-ghost btn-sm" data-w="640" data-h="480" data-fps="15" onclick="setCameraMode(this)">640×480 · 15</button>
+        <button class="btn btn-ghost btn-sm" data-w="640" data-h="480" data-fps="30" onclick="setCameraMode(this)">640×480 · 30</button>
+        <button class="btn btn-ghost btn-sm" data-w="1280" data-h="720" data-fps="15" onclick="setCameraMode(this)">1280×720 · 15</button>
+        <button class="btn btn-ghost btn-sm" data-w="1920" data-h="1080" data-fps="8" onclick="setCameraMode(this)">1920×1080 · 8</button>
+      </div>
+      <div style="font-size:.7rem;color:var(--dim);margin:.35rem 0 .2rem">JPEG Quality</div>
+      <div class="row" style="gap:.25rem" id="rec-quality-buttons">
+        <button class="btn btn-ghost btn-sm" data-q="50" onclick="setCameraQuality(this)">Low</button>
+        <button class="btn btn-ghost btn-sm" data-q="70" onclick="setCameraQuality(this)">Med</button>
+        <button class="btn btn-ghost btn-sm" data-q="85" onclick="setCameraQuality(this)">High</button>
+      </div>
+    </div>
+    <div style="margin-top:.3rem;font-size:.6rem;color:var(--dim)">
+      Each button rebuilds the capture pipeline (~0.5 s). Modes match the
+      RealSense D435I colour sensor — on USB 2.x, stick to 424×240 or 640×480.
+      If the feed is grayscale/IR, pin the colour node with <code>SANAD_CAMERA_USB_INDEX</code>.
+    </div>
+  </div>
+
+  <!-- Add New Face -->
+  <div class="card">
+    <h3>Add New Face</h3>
+    <div class="row">
+      <label>Name</label>
+      <input id="rec-newface-name" placeholder="(optional)" style="flex:1">
+    </div>
+    <div style="margin-top:.4rem">
+      <label style="font-size:.72rem;color:var(--dim)">Description — who is this person? (Gemini reads it)</label>
+      <textarea id="rec-newface-desc" rows="2" placeholder="e.g. Qassam, lead engineer on the robotics team — likes coffee" style="width:100%;margin-top:.2rem;font-size:.78rem;resize:vertical"></textarea>
+    </div>
+    <div class="row" style="margin-top:.4rem">
+      <button class="btn btn-success btn-sm" onclick="enrollFromCamera(this)" title="Snap current frame">📷 Capture</button>
+      <label class="btn btn-primary btn-sm" style="cursor:pointer;margin:0">
+        📁 Upload images
+        <input type="file" id="rec-upload-input" multiple accept="image/jpeg,image/png" style="display:none" onchange="enrollFromUpload(this)">
+      </label>
+    </div>
+    <div style="margin-top:.4rem;font-size:.65rem;color:var(--dim)">
+      Tip: add 2–3 photos / different angles per person for best recognition.
+      The description is sent to Gemini with the photos — it can then greet
+      and talk about the person using what you wrote.
+    </div>
+  </div>
+
+  <!-- Enrolled Faces -->
+  <div class="card card-full">
+    <h3>Enrolled Faces <span id="rec-faces-count" style="font-weight:normal;color:var(--dim);font-size:.75rem"></span></h3>
+    <div class="row">
+      <button class="btn btn-ghost btn-sm" onclick="refreshFaces()">↻ Refresh</button>
+      <span style="margin-left:auto;font-size:.65rem;color:var(--dim)" id="rec-gallery-version"></span>
+    </div>
+    <div id="rec-faces-list" style="margin-top:.6rem"><div class="empty">Loading…</div></div>
  </div>

 </div>
@ -621,7 +629,14 @@ function btnDone(b){if(b&&b.classList)b.classList.remove('loading');}
 async function api(m,p,b){const o={method:m,headers:{'Content-Type':'application/json'}};if(b)o.body=JSON.stringify(b);const r=await fetch(API+p,o);const j=await r.json();if(!r.ok){toast(j.detail||j.error||'Error '+r.status,'err');throw new Error(j.detail||j.error);}return j;}

 // Tabs
-function switchTab(name){document.querySelectorAll('.tab').forEach(t=>t.classList.toggle('active',t.textContent.toLowerCase().includes(name.slice(0,4))));document.querySelectorAll('.tab-content').forEach(c=>c.classList.toggle('active',c.id==='tab-'+name));}
+function switchTab(name){
+  // Match the nav tab by its exact onclick target — NOT a substring of the
+  // label. "recognition" and "recordings" both start with "reco", so the old
+  // textContent.includes(name.slice(0,4)) lit up both tabs at once.
+  const want="switchTab('"+name+"')";
+  document.querySelectorAll('.tab').forEach(t=>t.classList.toggle('active',(t.getAttribute('onclick')||'').includes(want)));
+  document.querySelectorAll('.tab-content').forEach(c=>c.classList.toggle('active',c.id==='tab-'+name));
+}

 // Emergency Stop
 async function emergencyStop(){try{await api('POST','/api/replay/cancel');await api('POST','/api/live-voice/stop');toast('EMERGENCY STOP sent','err');}catch(e){}}
@ -894,155 +909,6 @@ async function applyManualAudio(b){
  refreshAudioDevices();
 }

-// Camera
-async function capturePhoto(b){btnLoad(b);try{await api('POST','/api/vision/capture');toast('Photo captured','ok');refreshPhotos();}catch(e){}btnDone(b);}
-async function captureWithMotion(b){btnLoad(b);const g=document.getElementById('capture-gesture').value;const url=g?'/api/vision/capture?motion_file='+encodeURIComponent(g):'/api/vision/capture';try{await api('POST',url);toast('Captured'+(g?' + gesture':''),'ok');refreshPhotos();}catch(e){}btnDone(b);}
-async function refreshCamSources(){try{const r=await api('GET','/api/vision/cameras');const sel=document.getElementById('cam-source');sel.innerHTML='<option value="">Auto</option>'+(r||[]).map(c=>`<option value="${esc(c.source||c.serial||c.name)}">${esc(c.name||c.source||c.serial)}</option>`).join('');}catch(e){}}
-async function populateGestureSelect(){try{const r=await api('GET','/api/replay/files');const sel=document.getElementById('capture-gesture');sel.innerHTML='<option value="">No gesture</option>'+(r.files||[]).map(f=>`<option value="${esc(f.name)}">${esc(f.name)}</option>`).join('');}catch(e){}}
-async function setCamSource(v){if(v)try{await api('POST','/api/vision/set-source',{source:v});toast('Camera source set','ok');}catch(e){}}
-async function setCamRes(){const w=+document.getElementById('cam-w').value,h=+document.getElementById('cam-h').value,f=+document.getElementById('cam-fps').value;try{await api('POST','/api/vision/set-resolution',{width:w,height:h,fps:f});toast(`${w}x${h}@${f}fps`,'ok');}catch(e){}}
-async function setPreferredCam(){const s=document.getElementById('cam-serial').value;if(s)try{await api('POST','/api/vision/set-preferred-camera',{serial:s});toast('Saved','ok');}catch(e){}}
-
-// Photos
-async function refreshPhotos(){try{const r=await api('GET','/api/vision/photos');const el=document.getElementById('photo-gallery');document.getElementById('photo-count').textContent=`${r.total} photos`;if(!r.photos?.length){el.innerHTML='<div class="empty">No photos yet</div>';return;}el.innerHTML=r.photos.map(p=>{const n=esc(p.name);return`<img src="/api/vision/photos/${encodeURIComponent(p.name)}" title="${n}\n${p.size_kb}KB\n${p.created_at}" onclick="if(confirm('Delete ${n}?'))deletePhoto('${n}')">`;}).join('');}catch(e){}}
-async function deletePhoto(n){try{await api('DELETE','/api/vision/photos/'+encodeURIComponent(n));toast('Deleted','ok');refreshPhotos();}catch(e){}}
-function downloadAllPhotos(){window.open('/api/vision/photos/download-zip');}
-async function clearPhotos(){if(confirm('Delete ALL photos?'))try{const r=await api('POST','/api/vision/photos/clear');toast(`Cleared ${r.deleted_count}`,'ok');refreshPhotos();}catch(e){}}
-
-// Camera devices
-async function refreshCamDevices(){
-  try{
-    const r=await api('GET','/api/vision/devices');
-    const cur=r.current||{};
-    const curDev=cur.device||{};
-    const curId=cur.profile?cur.profile.id:'';
-    const detIds=r.detected_ids||[];
-    // Profile dropdown
-    const profSel=document.getElementById('camdev-profile');
-    profSel.innerHTML=(r.profiles||[]).map(p=>{
-      const avail=detIds.indexOf(p.id)>=0;
-      const sel=p.id===curId?' selected':'';
-      const tag=avail?'':' (no device)';
-      return `<option value="${esc(p.id)}"${sel}${avail?'':' disabled'}>${esc(p.label)}${tag}</option>`;
-    }).join('');
-    // Pin profile dropdown
-    const pinSel=document.getElementById('camdev-pin-profile');
-    pinSel.innerHTML=(r.profiles||[]).filter(p=>p.backend==='realsense').map(p=>
-      `<option value="${esc(p.id)}">${esc(p.label)}</option>`).join('');
-    // Detected summary
-    const det=document.getElementById('camdev-detected');
-    const counts=r.counts||{};
-    det.innerHTML=`<strong>Detected:</strong> ${counts.realsense||0} RealSense, ${counts.v4l2||0} V4L2 (total ${counts.total||0})`;
-    // All devices list
-    const list=document.getElementById('camdev-list');
-    if(!(r.all_devices||[]).length){
-      list.innerHTML='<div class="empty">No cameras plugged</div>';
-    }else{
-      list.innerHTML='<table><tr><th>Backend</th><th>Name</th><th>Serial / Path</th><th></th></tr>'+
-        r.all_devices.map(d=>{
-          const idVal=d.serial||d.device_path;
-          const action=d.serial
-            ?`<button class="btn btn-primary btn-sm" onclick="selectCamSerial('${esc(d.serial)}')">Use</button>`
-            :`<button class="btn btn-primary btn-sm" onclick="selectCamPath('${esc(d.device_path)}')">Use</button>`;
-          return `<tr><td>${esc(d.backend)}</td><td>${esc(d.name||'-')}</td><td><code style="font-size:.65rem">${esc(idVal||'-')}</code></td><td>${action}</td></tr>`;
-        }).join('')+'</table>';
-    }
-    // Status text
-    const st=document.getElementById('camdev-status');
-    if(curDev.name){
-      st.innerHTML=`<strong>Active:</strong> ${esc(curDev.name)}<br>`+
-        (curDev.serial?`Serial: <code>${esc(curDev.serial)}</code><br>`:'')+
-        (curDev.device_path?`Path: <code>${esc(curDev.device_path)}</code><br>`:'')+
-        `<span style="color:var(--muted)">via ${esc(cur.source_kind||'?')}</span>`;
-    }else{
-      st.innerHTML='<span style="color:#f55">No camera selected</span>';
-    }
-  }catch(e){}
-}
-async function scanCameras(b){
-  if(b)btnLoad(b);
-  try{await api('POST','/api/vision/devices/scan');toast('Re-scanned cameras','ok');}catch(e){}
-  if(b)btnDone(b);
-  refreshCamDevices();
-}
-async function selectCamProfile(profileId){
-  if(!profileId)return;
-  try{
-    await api('POST','/api/vision/devices/select-profile',{profile_id:profileId});
-    toast('Camera profile switched','ok');
-  }catch(e){}
-  refreshCamDevices();
-}
-async function selectCamSerial(serial){
-  if(!serial)return;
-  try{
-    await api('POST','/api/vision/devices/select-serial',{serial});
-    toast('Camera selected by serial','ok');
-  }catch(e){}
-  refreshCamDevices();
-}
-async function selectCamPath(path){
-  if(!path)return;
-  try{
-    await api('POST','/api/vision/devices/select-path',{device_path:path});
-    toast('Camera selected by path','ok');
-  }catch(e){}
-  refreshCamDevices();
-}
-async function startLocalCam(b){
-  btnLoad(b);
-  try{
-    const r=await api('POST','/api/vision/local/start',{});
-    if(r.ok){
-      toast('Camera started: '+(r.backend||'?'),'ok');
-    }else{
-      toast('Camera start failed: '+(r.error||'unknown'),'err');
-    }
-  }catch(e){}
-  btnDone(b);
-  setTimeout(refreshLocalCam, 500);
-}
-async function stopLocalCam(b){
-  btnLoad(b);
-  try{await api('POST','/api/vision/local/stop');toast('Camera stopped','info');}catch(e){}
-  btnDone(b);
-  refreshLocalCam();
-}
-async function refreshLocalCam(){
-  try{
-    const r=await api('GET','/api/vision/local/status');
-    const els=[document.getElementById('local-cam-state'),document.getElementById('ops-cam-state')];
-    els.forEach(el=>{
-      if(!el)return;
-      if(r.running){
-        el.textContent=(r.backend||'on')+(r.serial?(' '+r.serial.slice(-6)):'')+' '+r.width+'x'+r.height+'@'+r.fps;
-        el.className='badge badge-ok';
-      }else if(r.last_error){
-        el.textContent='error';
-        el.className='badge badge-err';
-        el.title=r.last_error;
-      }else{
-        el.textContent='stopped';
-        el.className='badge badge-warn';
-      }
-    });
-  }catch(e){}
-}
-
-async function pinCamSerial(b){
-  const pid=document.getElementById('camdev-pin-profile').value;
-  const serial=document.getElementById('camdev-pin-serial').value.trim();
-  if(!pid||!serial){toast('Pick a profile and enter a serial','err');return;}
-  btnLoad(b);
-  try{
-    await api('POST','/api/vision/devices/assign-serial',{profile_id:pid,serial:serial});
-    toast(`Pinned ${serial} → ${pid}`,'ok');
-    document.getElementById('camdev-pin-serial').value='';
-  }catch(e){}
-  btnDone(b);
-  refreshCamDevices();
-}
-
 // Motion
 async function toggleGestural(v){try{await api('POST','/api/motion/gestural-speaking?enabled='+v);}catch(e){}}
 let _armBusy=false,_runId=null;
@ -1212,10 +1078,10 @@ async function stopCombo(b){
 async function refreshReplayFiles(){try{const r=await api('GET','/api/replay/files');const el=document.getElementById('replay-files');if(!(r.files||[]).length){el.innerHTML='<div class="empty">No motion files</div>';return;}el.innerHTML='<table><tr><th>File</th><th>Frames</th><th>Duration</th><th>Size</th><th></th></tr>'+(r.files||[]).map(f=>`<tr><td>${esc(f.name)}</td><td>${f.frames}</td><td>${f.duration_sec}s</td><td>${f.size_kb}KB</td><td><button class="btn btn-primary btn-sm" onclick="document.getElementById('replay-name').value='${esc(f.name)}';testReplay()">Play</button> <button class="btn btn-danger btn-sm" onclick="deleteMotionFile('${esc(f.name)}')">Del</button></td></tr>`).join('')+'</table>';}catch(e){}}
 async function testReplay(b){const n=document.getElementById('replay-name').value,s=parseFloat(document.getElementById('replay-speed').value);if(!n)return;btnLoad(b);try{await api('POST','/api/replay/test',{name:n,speed:s});toast('Replay: '+n,'ok');pollArmBusy();}catch(e){}btnDone(b);}
 async function cancelReplay(){try{const r=await api('POST','/api/replay/cancel');toast(r&&r.message?r.message:'Cancelled','info');}catch(e){}}
-async function deleteMotionFile(n){if(confirm('Delete '+n+'?'))try{await api('DELETE','/api/replay/files/'+encodeURIComponent(n));toast('Deleted','ok');refreshReplayFiles();populateGestureSelect();}catch(e){}}
-async function uploadMotionFile(input){if(!input.files[0])return;const fd=new FormData();fd.append('file',input.files[0]);try{const r=await fetch('/api/replay/files/upload',{method:'POST',body:fd});if(!r.ok){const j=await r.json();toast(j.detail||'Upload failed','err');}else{toast('Uploaded','ok');refreshReplayFiles();populateGestureSelect();}}catch(e){toast('Upload error','err');}input.value='';}
+async function deleteMotionFile(n){if(confirm('Delete '+n+'?'))try{await api('DELETE','/api/replay/files/'+encodeURIComponent(n));toast('Deleted','ok');refreshReplayFiles();}catch(e){}}
+async function uploadMotionFile(input){if(!input.files[0])return;const fd=new FormData();fd.append('file',input.files[0]);try{const r=await fetch('/api/replay/files/upload',{method:'POST',body:fd});if(!r.ok){const j=await r.json();toast(j.detail||'Upload failed','err');}else{toast('Uploaded','ok');refreshReplayFiles();}}catch(e){toast('Upload error','err');}input.value='';}
 async function startTeaching(b){const n=document.getElementById('teach-name').value,d=parseFloat(document.getElementById('teach-duration').value);if(!n)return toast('Enter name','err');btnLoad(b);try{await api('POST','/api/replay/teach/start',{name:n,duration_sec:d});toast('Teaching: '+n,'ok');pollTeachStatus();}catch(e){}btnDone(b);}
-async function stopTeaching(b){btnLoad(b);try{const r=await api('POST','/api/replay/teach/stop');toast(`Saved: ${r.name} (${r.frames} frames)`,'ok');document.getElementById('teach-status').textContent=`Done: ${r.frames} frames`;refreshReplayFiles();populateGestureSelect();}catch(e){}btnDone(b);}
+async function stopTeaching(b){btnLoad(b);try{const r=await api('POST','/api/replay/teach/stop');toast(`Saved: ${r.name} (${r.frames} frames)`,'ok');document.getElementById('teach-status').textContent=`Done: ${r.frames} frames`;refreshReplayFiles();}catch(e){}btnDone(b);}
 let _teachPoll;function pollTeachStatus(){clearInterval(_teachPoll);_teachPoll=setInterval(async()=>{try{const r=await api('GET','/api/replay/teach/status');document.getElementById('teach-status').textContent=`${r.phase} | ${r.elapsed_sec}s | ${r.frames_recorded} frames`;if(!r.recording){clearInterval(_teachPoll);refreshReplayFiles();}}catch(e){clearInterval(_teachPoll);}},500);}

 // Scripts
@ -1288,11 +1154,6 @@ async function trReplayLast(b){btnLoad(b);try{await api('POST','/api/typed-repla
 async function trSaveLast(b){btnLoad(b);try{await api('POST','/api/typed-replay/save-last',{record_name:document.getElementById('tr-name').value});toast('Saved','ok');refreshTR();refreshRecords();}catch(e){}btnDone(b);}
 async function refreshTR(){try{const r=await api('GET','/api/typed-replay/status');const s=r.session||{};document.getElementById('tr-session').innerHTML=`<strong>Text:</strong> ${esc(s.text||'--')}<br><strong>Audio:</strong> ${s.has_audio?'Yes':'No'} | <strong>Capture:</strong> ${s.has_capture?'Yes':'No'}<br><strong>Replays:</strong> ${s.replay_count||0}<br><strong>Generated:</strong> ${s.generated_at||'--'}<br><strong>Saved:</strong> ${esc(s.saved_as||'--')}`;}catch(e){}}

-// YOLO
-async function loadDetector(b){btnLoad(b);try{const r=await api('POST','/api/detector/load');const el=document.getElementById('yolo-status');el.textContent=r.ok?'Loaded':'Failed';el.className='badge '+(r.ok?'badge-ok':'badge-err');toast(r.ok?'Model loaded':'Failed',r.ok?'ok':'err');}catch(e){}btnDone(b);}
-async function runDetection(b){btnLoad(b);try{const r=await api('POST','/api/detector/detect');document.getElementById('yolo-result').innerHTML=`<strong>Persons:</strong> ${r.person_count} | <strong>Faces:</strong> ${r.face_count} | <strong>Group:</strong> ${r.group_detected?'Yes ('+r.group_size+')':'No'} | <strong>Intent:</strong> ${r.intent_detected?'Yes':'No'} | ${r.detection_ms}ms`;}catch(e){document.getElementById('yolo-result').textContent='Detection failed';}btnDone(b);}
-async function refreshDetector(){try{const r=await api('GET','/api/detector/status');const el=document.getElementById('yolo-status');el.textContent=r.loaded?'Loaded':'Not loaded';el.className='badge '+(r.loaded?'badge-ok':'badge-warn');}catch(e){}}
-
 // Wake Phrases
 async function refreshWakeActions(){try{const r=await api('GET','/api/wake-phrases/');const sel=document.getElementById('wp-action');sel.innerHTML='<option value="">-- select action --</option>'+(r.actions||[]).map(a=>`<option value="${esc(a.action)}">${esc(a.action)} (${a.phrase_count})</option>`).join('');}catch(e){}}
 async function loadWakePhrases(action){if(!action)return;try{const r=await api('GET',`/api/wake-phrases/${encodeURIComponent(action)}`);const el=document.getElementById('wp-phrases');if(!(r.phrases||[]).length){el.innerHTML='<div class="empty">No phrases</div>';return;}el.innerHTML=(r.phrases||[]).map(p=>`<div class="row"><span style="flex:1;font-size:.78rem">${esc(p)}</span><button class="btn btn-danger btn-sm" onclick="removeWakePhrase(document.getElementById('wp-action').value,this.dataset.p)" data-p="${esc(p)}">X</button></div>`).join('');}catch(e){}}
@ -1386,10 +1247,6 @@ async function refreshStatus(){try{const s=await api('GET','/api/status');docume
 // WebSocket logs
 let logWs;function connectLogs(){const p=location.protocol==='https:'?'wss':'ws';logWs=new WebSocket(`${p}://${location.host}/ws/logs`);const box=document.getElementById('log-box');logWs.onmessage=e=>{box.textContent+=e.data+'\n';if(box.childNodes.length>1000)box.textContent=box.textContent.split('\n').slice(-500).join('\n');box.scrollTop=box.scrollHeight;};logWs.onclose=()=>setTimeout(connectLogs,3000);}

-// WebSocket camera
-let camWs;function connectCamera(){if(camWs&&camWs.readyState<=1)try{camWs.close();}catch(e){}const p=location.protocol==='https:'?'wss':'ws';camWs=new WebSocket(`${p}://${location.host}/ws/camera`);camWs.binaryType='arraybuffer';const canvas=document.getElementById('camera-feed'),ctx=canvas.getContext('2d');camWs.onmessage=e=>{const url=URL.createObjectURL(new Blob([e.data],{type:'image/jpeg'})),img=new Image();img.onload=()=>{ctx.drawImage(img,0,0,canvas.width,canvas.height);URL.revokeObjectURL(url);};img.onerror=()=>URL.revokeObjectURL(url);img.src=url;};camWs.onclose=()=>setTimeout(connectCamera,3000);}
-function reconnectCamera(){if(camWs)try{camWs.close();}catch(e){}setTimeout(connectCamera,300);toast('Camera reconnecting...','info');}
-
 // Auto-connect Gemini and auto-start Live Subprocess on page load
 async function autoConnectGemini(){
  try{
@ -1413,10 +1270,331 @@ async function autoStartLiveSub(){
  }catch(e){}
 }

+// ── Recognition tab (camera vision + face recognition) ──
+// Mirror of /api/recognition/state.vision_enabled — kept fresh by
+// refreshRecognition() so the Live-Gemini-panel Camera button can flip
+// it without a round-trip GET.
+let _recVisionEnabled=false;
+async function refreshRecognition(){
+  try{
+    const r=await api('GET','/api/recognition/state');
+    _recVisionEnabled=!!r.vision_enabled;
+    const v=document.getElementById('rec-vision-toggle');
+    const f=document.getElementById('rec-facerec-toggle');
+    if(v) v.checked=!!r.vision_enabled;
+    if(f) f.checked=!!r.face_rec_enabled;
+    const cs=document.getElementById('rec-camera-status');
+    if(cs){
+      const c=r.camera||{};
+      cs.title=c.error||'';
+      if(c.running&&c.backend){
+        cs.textContent=c.backend+' '+(c.width||'')+'x'+(c.height||'')
+          +(c.reconnect_count?(' ↻'+c.reconnect_count):'');
+        cs.className='badge badge-ok';
+      }else if(c.running&&!c.backend){
+        // thread alive but between reconnect attempts (camera unplugged)
+        cs.textContent='reconnecting…';cs.className='badge badge-warn';
+      }else if(c.error){
+        cs.textContent='error';cs.className='badge badge-warn';
+      }else{
+        cs.textContent='off';cs.className='badge';
+      }
+    }
+    // Camera button in the Live Gemini Process panel (Voice & Audio tab) —
+    // same toggle as the Recognition tab, surfaced where it's handy.
+    const cb=document.getElementById('ls-cam-btn');
+    if(cb){
+      const c=r.camera||{};
+      if(c.running&&c.backend){
+        cb.textContent='Camera: ON';
+        cb.className='btn btn-sm btn-success';
+        cb.title='Streaming '+(c.backend||'')+' '+(c.width||'')+'x'+(c.height||'')+' to Gemini — click to turn off';
+      }else if(c.running&&!c.backend){
+        cb.textContent='Camera: …';
+        cb.className='btn btn-sm btn-ghost';
+        cb.title='Camera reconnecting…';
+      }else if(r.vision_enabled&&c.error){
+        cb.textContent='Camera: N/A';
+        cb.className='btn btn-sm btn-danger';
+        cb.title='Vision on but no camera backend: '+(c.error||'');
+      }else{
+        cb.textContent='Camera: OFF';
+        cb.className='btn btn-sm btn-ghost';
+        cb.title='Click to stream camera frames to Gemini Live';
+      }
+    }
+    const fs=document.getElementById('rec-facerec-status');
+    if(fs){fs.textContent=r.face_rec_enabled?'on':'off';fs.className='badge '+(r.face_rec_enabled?'badge-ok':'');}
+    const fc=document.getElementById('rec-faces-count');
+    if(fc) fc.textContent=`(${r.faces_count} faces, ${r.photos_count} photos)`;
+    const gv=document.getElementById('rec-gallery-version');
+    if(gv) gv.textContent='v.'+r.gallery_version;
+    // toggle preview visibility — only when actively capturing (has a backend)
+    const img=document.getElementById('rec-preview-img');
+    const empty=document.getElementById('rec-preview-empty');
+    const meta=document.getElementById('rec-preview-meta');
+    const c2=r.camera||{};
+    if(c2.running&&c2.backend){
+      img.style.display='inline-block';empty.style.display='none';
+      if(meta) meta.textContent=`${c2.width}x${c2.height} @ ${c2.fps}fps · seq=${c2.frame_seq}`;
+    }else{
+      img.style.display='none';empty.style.display='block';
+      if(empty) empty.textContent=(c2.running&&!c2.backend)
+        ? 'Camera reconnecting…'
+        : 'Camera off — toggle Vision ON to see the live feed.';
+      if(meta) meta.textContent='--';
+    }
+    // Highlight the active resolution / quality buttons to match the live
+    // capture profile (works whether the camera is running or idle).
+    document.querySelectorAll('#rec-res-buttons button').forEach(btn=>{
+      const on = parseInt(btn.dataset.w)===c2.width
+              && parseInt(btn.dataset.h)===c2.height
+              && parseInt(btn.dataset.fps)===c2.fps;
+      btn.className='btn btn-sm '+(on?'btn-primary':'btn-ghost');
+    });
+    document.querySelectorAll('#rec-quality-buttons button').forEach(btn=>{
+      const on = parseInt(btn.dataset.q)===c2.jpeg_quality;
+      btn.className='btn btn-sm '+(on?'btn-primary':'btn-ghost');
+    });
+  }catch(e){}
+}
+// Resolution / FPS button menu — each click POSTs one mode and the
+// CameraDaemon rebuilds the pipeline at it. refreshRecognition() then
+// highlights whichever button matches the live profile.
+async function setCameraMode(btn){
+  btnLoad(btn);
+  try{
+    const body={
+      width:  parseInt(btn.dataset.w),
+      height: parseInt(btn.dataset.h),
+      fps:    parseInt(btn.dataset.fps),
+    };
+    const r=await api('POST','/api/recognition/camera-config',body);
+    const p=r.profile||body;
+    toast(`Camera → ${p.width}×${p.height} @ ${p.fps}fps`,'ok');
+    refreshRecognition();
+  }catch(e){toast('Resolution change failed: '+(e.message||e),'err');}
+  btnDone(btn);
+}
+async function setCameraQuality(btn){
+  btnLoad(btn);
+  try{
+    const q=parseInt(btn.dataset.q);
+    await api('POST','/api/recognition/camera-config',{jpeg_quality:q});
+    toast('JPEG quality → '+q,'ok');
+    refreshRecognition();
+  }catch(e){toast('Quality change failed: '+(e.message||e),'err');}
+  btnDone(btn);
+}
+// Camera button in the Live Gemini Process panel — flips the same
+// vision toggle the Recognition tab owns. _recVisionEnabled is the
+// last-known state (refreshed every 5 s by refreshRecognition).
+async function toggleGeminiCamera(b){
+  if(b) btnLoad(b);
+  const next=!_recVisionEnabled;
+  try{
+    const r=await api('POST','/api/recognition/vision?on='+(next?'1':'0'));
+    _recVisionEnabled=!!(r&&r.vision_enabled);
+    toast(next?'Camera ON for Gemini':'Camera OFF for Gemini','ok');
+  }catch(e){
+    toast('Camera toggle failed: '+(e.message||e),'err');
+  }
+  if(b) btnDone(b);
+  refreshRecognition();   // refresh both the panel button + the Recognition tab
+}
+async function setVisionEnabled(on){
+  try{
+    const r=await api('POST','/api/recognition/vision?on='+(on?'1':'0'));
+    toast(on?'Vision ON':'Vision OFF','ok');
+    refreshRecognition();
+  }catch(e){
+    toast('Vision toggle failed: '+(e.message||e),'err');
+    refreshRecognition();
+  }
+}
+async function setFaceRecEnabled(on){
+  try{
+    const r=await api('POST','/api/recognition/face-rec?on='+(on?'1':'0'));
+    toast(on?'Face Recognition ON':'Face Recognition OFF','ok');
+    if(r&&r.warning) toast(r.warning,'info');
+    refreshRecognition();
+  }catch(e){
+    toast('Face Rec toggle failed: '+(e.message||e),'err');
+    refreshRecognition();
+  }
+}
+async function syncGallery(b){
+  if(b) btnLoad(b);
+  try{await api('POST','/api/recognition/sync');toast('Gallery sync requested','ok');refreshRecognition();}
+  catch(e){toast('Sync failed','err');}
+  if(b) btnDone(b);
+}
+// Preview poller — bumps the img src each tick to defeat caching.
+let _recPreviewTimer=null;
+function startRecPreview(){
+  if(_recPreviewTimer) return;
+  const tick=()=>{
+    const img=document.getElementById('rec-preview-img');
+    if(img && img.style.display!=='none'){
+      img.src='/api/recognition/frame.jpg?t='+Date.now();
+    }
+  };
+  tick();
+  _recPreviewTimer=setInterval(tick,500);
+}
+function stopRecPreview(){if(_recPreviewTimer){clearInterval(_recPreviewTimer);_recPreviewTimer=null;}}
+// Hook into tab switch — start/stop preview when recognition tab is active.
+(function(){
+  const origSwitchTab=window.switchTab;
+  window.switchTab=function(name){
+    origSwitchTab(name);
+    if(name==='recognition'){refreshRecognition();refreshFaces();startRecPreview();}
+    else{stopRecPreview();}
+  };
+})();
+// Face CRUD stubs — filled in milestone 5
+async function refreshFaces(){
+  const el=document.getElementById('rec-faces-list');
+  if(!el) return;
+  try{
+    const r=await api('GET','/api/recognition/faces');
+    if(!r.faces||!r.faces.length){el.innerHTML='<div class="empty">No faces enrolled yet</div>';return;}
+    el.innerHTML=r.faces.map(f=>renderFaceCard(f)).join('');
+  }catch(e){
+    el.innerHTML='<div class="empty">(face gallery not yet wired)</div>';
+  }
+}
+function renderFaceCard(f){
+  const name=f.name||`(face_${f.id})`;
+  const photos=(f.photos||[]).map(p=>{
+    const url=`/api/recognition/faces/${f.id}/photo/${encodeURIComponent(p.name)}`;
+    return `<div style="display:inline-block;margin:.2rem;text-align:center">
+      <img src="${url}?t=${Date.now()}" alt="${esc(p.name)}" style="width:72px;height:72px;object-fit:cover;border-radius:.3rem;background:#222"/>
+      <div style="font-size:.6rem;color:var(--dim);margin-top:.1rem">
+        <a href="${url}?download=1" download style="color:var(--accent);text-decoration:none">⬇</a>
+        <a href="#" onclick="deletePhoto(${f.id},'${esc(p.name)}');return false" style="color:var(--err);text-decoration:none;margin-left:.3rem">🗑</a>
+      </div>
+    </div>`;
+  }).join('');
+  return `<div class="card" style="margin-top:.5rem">
+    <div class="row" style="align-items:center">
+      <strong>face_${f.id}</strong>
+      <span style="color:var(--dim)">—</span>
+      <span id="rec-name-${f.id}" style="flex:1">${esc(name)}</span>
+      <button class="btn btn-ghost btn-sm" onclick="renameFace(${f.id})" title="Rename">✏</button>
+      <span style="color:var(--dim);font-size:.7rem">${(f.photos||[]).length} photo(s)</span>
+    </div>
+    <div style="margin-top:.25rem;font-size:.72rem">
+      <span style="color:var(--dim)">Description:</span>
+      <span id="rec-desc-${f.id}" style="color:var(--muted)">${f.description?esc(f.description):''}</span>${f.description?'':'<span style="color:var(--dim)">(none — no extra context for Gemini)</span>'}
+      <button class="btn btn-ghost btn-sm" onclick="describeFace(${f.id})" title="Edit description Gemini sees">✏</button>
+    </div>
+    <div style="margin-top:.3rem">${photos}</div>
+    <div class="row" style="margin-top:.4rem">
+      <button class="btn btn-success btn-sm" onclick="captureToFace(${f.id},this)">📷 Capture</button>
+      <label class="btn btn-primary btn-sm" style="cursor:pointer;margin:0">
+        📁 Upload
+        <input type="file" multiple accept="image/jpeg,image/png" style="display:none" onchange="uploadToFace(${f.id},this)">
+      </label>
+      <a class="btn btn-ghost btn-sm" href="/api/recognition/faces/${f.id}/download.zip" download>⬇ ZIP</a>
+      <button class="btn btn-danger btn-sm" style="margin-left:auto" onclick="deleteFace(${f.id})">🗑 Delete face</button>
+    </div>
+  </div>`;
+}
+// Build the ?name=&description= query string from the Add-New-Face inputs.
+function _newFaceQuery(){
+  const name=document.getElementById('rec-newface-name').value.trim();
+  const desc=document.getElementById('rec-newface-desc').value.trim();
+  const qs=[];
+  if(name) qs.push('name='+encodeURIComponent(name));
+  if(desc) qs.push('description='+encodeURIComponent(desc));
+  return qs.length?('?'+qs.join('&')):'';
+}
+function _clearNewFaceInputs(){
+  document.getElementById('rec-newface-name').value='';
+  document.getElementById('rec-newface-desc').value='';
+}
+async function enrollFromCamera(b){
+  btnLoad(b);
+  try{
+    const r=await api('POST','/api/recognition/faces/enroll'+_newFaceQuery());
+    toast('Enrolled face_'+r.face.id+(r.face.description?' (with description)':''),'ok');
+    _clearNewFaceInputs();
+    refreshFaces();refreshRecognition();
+  }catch(e){toast('Enroll failed: '+(e.message||e),'err');}
+  btnDone(b);
+}
+async function enrollFromUpload(input){
+  const files=input.files;if(!files||!files.length)return;
+  const fd=new FormData();for(const f of files) fd.append('files',f);
+  try{
+    const resp=await fetch('/api/recognition/faces/upload'+_newFaceQuery(),{method:'POST',body:fd});
+    if(!resp.ok)throw new Error(await resp.text());
+    const r=await resp.json();
+    toast('Uploaded face_'+r.face.id+' ('+files.length+' photos'+(r.face.description?', with description':'')+')','ok');
+    _clearNewFaceInputs();
+    input.value='';
+    refreshFaces();refreshRecognition();
+  }catch(e){toast('Upload failed: '+(e.message||e),'err');}
+}
+async function captureToFace(id,b){
+  btnLoad(b);
+  try{await api('POST','/api/recognition/faces/'+id+'/capture');toast('Added photo','ok');refreshFaces();}
+  catch(e){toast('Capture failed','err');}
+  btnDone(b);
+}
+async function uploadToFace(id,input){
+  const files=input.files;if(!files||!files.length)return;
+  const fd=new FormData();for(const f of files) fd.append('files',f);
+  try{
+    const resp=await fetch('/api/recognition/faces/'+id+'/upload',{method:'POST',body:fd});
+    if(!resp.ok)throw new Error(await resp.text());
+    toast('Uploaded '+files.length+' photo(s)','ok');
+    input.value='';
+    refreshFaces();
+  }catch(e){toast('Upload failed: '+(e.message||e),'err');}
+}
+async function renameFace(id){
+  const el=document.getElementById('rec-name-'+id);if(!el)return;
+  const cur=el.textContent.replace(/^\((.*)\)$/,'$1');
+  const next=prompt('New name (blank to clear):',cur==='face_'+id?'':cur);
+  if(next===null) return;
+  try{
+    await api('POST','/api/recognition/faces/'+id+'/rename',{name:next});
+    toast('Renamed','ok');refreshFaces();
+  }catch(e){toast('Rename failed','err');}
+}
+async function describeFace(id){
+  const el=document.getElementById('rec-desc-'+id);
+  const cur=el?el.textContent.trim():'';
+  const next=prompt('Description for Gemini — who is this person? '+
+                    '(blank to clear)',cur);
+  if(next===null) return;
+  try{
+    await api('POST','/api/recognition/faces/'+id+'/describe',{description:next});
+    toast(next.trim()?'Description saved':'Description cleared','ok');
+    refreshFaces();
+  }catch(e){toast('Save failed: '+(e.message||e),'err');}
+}
+async function deletePhoto(id,name){
+  if(!confirm('Delete photo '+name+'?'))return;
+  try{
+    await api('DELETE','/api/recognition/faces/'+id+'/photo/'+encodeURIComponent(name));
+    toast('Photo deleted','ok');refreshFaces();
+  }catch(e){toast('Delete failed: '+(e.message||e),'err');}
+}
+async function deleteFace(id){
+  if(!confirm('Delete face_'+id+' and all photos?'))return;
+  try{
+    await api('DELETE','/api/recognition/faces/'+id);
+    toast('Face deleted','ok');refreshFaces();refreshRecognition();
+  }catch(e){toast('Delete failed','err');}
+}
+
 // Init — vision/camera/detector fetches removed; those endpoints were deleted.
-refreshStatus();refreshSystem();refreshAudio();refreshAudioDevices();refreshSkills();refreshReplayFiles();refreshScripts();refreshPrompt();refreshRecords();populateGestureSelect();refreshLiveVoice();refreshLiveSub();refreshTR();refreshWakeActions();refreshApiKey();refreshCombo();connectLogs();
+refreshStatus();refreshSystem();refreshAudio();refreshAudioDevices();refreshSkills();refreshReplayFiles();refreshScripts();refreshPrompt();refreshRecords();refreshLiveVoice();refreshLiveSub();refreshTR();refreshWakeActions();refreshApiKey();refreshCombo();refreshRecognition();connectLogs();
 setTimeout(autoConnectGemini,2000);setTimeout(autoStartLiveSub,3000);
-setInterval(refreshStatus,5000);setInterval(refreshSystem,30000);setInterval(refreshLiveVoice,5000);setInterval(refreshLiveSub,5000);
+setInterval(refreshStatus,5000);setInterval(refreshSystem,30000);setInterval(refreshLiveVoice,5000);setInterval(refreshLiveSub,5000);setInterval(refreshRecognition,5000);
 </script>
 </body>
 </html>
--- a/gemini/script.py
+++ b/gemini/script.py
@ -19,8 +19,13 @@ from __future__ import annotations

 import array
 import asyncio
+import base64
+import json
 import os
+import sys
+import threading
 import time
+from pathlib import Path
 from typing import Any, Optional

 import numpy as np
@ -29,6 +34,7 @@ from google import genai
 from google.genai import types

 from Project.Sanad.config import (
+    BASE_DIR,
    CHUNK_SIZE,
    GEMINI_API_KEY,
    GEMINI_VOICE,
@ -37,6 +43,7 @@ from Project.Sanad.config import (
 )
 from Project.Sanad.core.config_loader import section as _cfg_section
 from Project.Sanad.core.logger import get_logger
+from Project.Sanad.vision import recognition_state as _recog_state

 log = get_logger("gemini_brain")

@ -57,6 +64,93 @@ _NO_MESSAGES_TIMEOUT = _SV.get("no_messages_timeout_sec", 30)
 _CHUNK_BYTES = CHUNK_SIZE * 2
 _SILENCE_PCM = b"\x00" * _CHUNK_BYTES

+# ── Recognition (camera + face gallery) tunables ──
+_RECOG_STATE_PATH = Path(os.environ.get(
+    "SANAD_RECOGNITION_STATE_PATH",
+    str(BASE_DIR / "data" / ".recognition_state.json"),
+))
+_VISION_SEND_HZ = float(os.environ.get("SANAD_VISION_SEND_HZ", "2"))
+_VISION_STALE_MS = int(os.environ.get("SANAD_VISION_STALE_MS", "1500"))
+_RECOG_POLL_S = float(os.environ.get("SANAD_RECOGNITION_POLL_S", "1.0"))
+_FACES_DIR = Path(os.environ.get(
+    "SANAD_FACES_DIR",
+    str(BASE_DIR / "data" / "faces"),
+))
+_FACES_MAX_SAMPLES = int(os.environ.get("SANAD_FACES_MAX_SAMPLES", "3"))
+_FACES_PRIMER_RESIZE = int(os.environ.get("SANAD_FACES_PRIMER_RESIZE", "256"))
+
+
+# ── stdin push channel (Marcus pattern) ──────────────────────
+# The GeminiSubprocess supervisor writes two line types to this process's
+# stdin:
+#   "frame:<base64-jpeg>\n"   — a camera frame to relay to Gemini Live
+#   "state:<json>\n"          — a motion-state update to inject as text
+# A daemon thread parses them into the caches below; the asyncio tasks
+# _send_frame_loop / _send_state_loop drain those caches.
+
+_LATEST_FRAME_LOCK = threading.Lock()
+_LATEST_FRAME: dict = {"bytes": None, "ts": 0.0}
+
+_STATE_LOCK = threading.Lock()
+_STATE_PENDING: list[str] = []
+
+_STATE_TAGS = {
+    "start":       "[STATE-START]",
+    "complete":    "[STATE-DONE]",
+    "interrupted": "[STATE-INTERRUPTED]",
+    "error":       "[STATE-ERROR]",
+    "paused":      "[STATE-PAUSED]",
+    "resumed":     "[STATE-RESUMED]",
+}
+
+
+def _stdin_watcher() -> None:
+    """Daemon thread — parse 'frame:' / 'state:' lines off stdin.
+
+    Best-effort: any malformed line is skipped. Exits when the parent
+    closes our stdin (subprocess teardown)."""
+    try:
+        for line in sys.stdin:
+            line = line.rstrip("\n")
+            if not line:
+                continue
+            if line.startswith("frame:"):
+                b64 = line[len("frame:"):]
+                try:
+                    data = base64.b64decode(b64)
+                except Exception:
+                    continue
+                if data:
+                    with _LATEST_FRAME_LOCK:
+                        _LATEST_FRAME["bytes"] = data
+                        _LATEST_FRAME["ts"] = time.time()
+            elif line.startswith("state:"):
+                try:
+                    payload = json.loads(line[len("state:"):])
+                except Exception:
+                    continue
+                event = (payload.get("event") or "").strip().lower()
+                cmd = (payload.get("cmd") or "").strip()
+                tag = _STATE_TAGS.get(event)
+                if not tag or not cmd:
+                    continue
+                msg = f"{tag} {cmd}"
+                elapsed = payload.get("elapsed_sec")
+                if isinstance(elapsed, (int, float)):
+                    msg += f" ({float(elapsed):.1f}s)"
+                reason = payload.get("reason")
+                if reason and event == "error":
+                    msg += f" — {reason}"
+                with _STATE_LOCK:
+                    _STATE_PENDING.append(msg)
+    except Exception:
+        return
+
+
+# Start the watcher at import time — it blocks harmlessly on sys.stdin
+# until the supervisor sends something. Daemon so it never blocks exit.
+threading.Thread(target=_stdin_watcher, daemon=True, name="stdin-watcher").start()
+

 def _audio_energy(pcm: bytes) -> int:
    try:
@ -86,6 +180,19 @@ class GeminiBrain:
        self._ai_speak_start = 0.0
        self._last_ai_audio = 0.0
        self._done: Optional[asyncio.Event] = None
+        # ── Recognition flags — kept in sync with the state file by
+        # _recognition_state_watcher. Boot defaults come from the file (or
+        # the SANAD_* env vars if the file is missing).
+        _initial = _recog_state.read(_RECOG_STATE_PATH)
+        self._vision_enabled = bool(
+            _initial.vision_enabled
+            or os.environ.get("SANAD_VISION_ENABLE", "0") == "1"
+        )
+        self._face_rec_enabled = bool(
+            _initial.face_rec_enabled
+            or os.environ.get("SANAD_FACE_RECOGNITION_ENABLE", "0") == "1"
+        )
+        self._gallery_version_primed = -1   # bumped after first successful primer

    def stop(self) -> None:
        """Signal the run loop to exit at the next opportunity."""
@ -116,12 +223,19 @@ class GeminiBrain:
                    consecutive_errors = 0
                    self._mic.flush()
                    self._done = asyncio.Event()
+                    # Reset per-session primer state so re-priming on reconnect
+                    # actually happens. The state watcher will re-prime as soon
+                    # as it sees vision+face-rec enabled.
+                    self._gallery_version_primed = -1

                    try:
                        await asyncio.wait_for(
                            asyncio.gather(
                                self._send_mic_loop(session),
                                self._receive_loop(session),
+                                self._send_frame_loop(session),
+                                self._send_state_loop(session),
+                                self._recognition_state_watcher(session),
                            ),
                            timeout=_SESSION_TIMEOUT,
                        )
@ -368,3 +482,310 @@ class GeminiBrain:
            log.warning("receive ended: %s", exc)
        finally:
            self._done.set()
+
+    # ─── vision-state announcer ───────────────────────────
+    # Injects the camera state into the live session as text context.
+    # On a live toggle Gemini is told to say so out loud ("I can see you
+    # now" / "I can't see you anymore"); at session start it's silent
+    # standing context so "can you see me?" is answered honestly.
+
+    async def _announce_vision_state(self, session: Any, enabled: bool,
+                                     is_toggle: bool) -> None:
+        if is_toggle and enabled:
+            text = (
+                "[VISION ON] Your camera was just enabled — you can now see "
+                "the user through it. Briefly tell them you can see them now, "
+                "in your normal Khaleeji style (for example: "
+                "'هلا، الحين أشوفك زين')."
+            )
+        elif is_toggle and not enabled:
+            text = (
+                "[VISION OFF] Your camera was just disabled — you can no "
+                "longer see anything. Briefly tell the user you can't see "
+                "them anymore. If they later ask whether you can see them, "
+                "tell them to enable the camera from the dashboard."
+            )
+        elif enabled:  # session start, camera already on
+            text = (
+                "[VISION STATUS] Your camera is ON — you can see the user "
+                "through it. Do not announce this unprompted; just answer "
+                "naturally if they ask what you see."
+            )
+        else:  # session start, camera off
+            text = (
+                "[VISION STATUS] Your camera is OFF — you cannot see anything "
+                "right now. If the user asks whether you can see them, tell "
+                "them to enable the camera from the dashboard. Do not announce "
+                "this unprompted."
+            )
+        try:
+            await session.send_realtime_input(text=text)
+            log.info("vision-state injected (enabled=%s, toggle=%s)",
+                     enabled, is_toggle)
+        except asyncio.CancelledError:
+            raise
+        except Exception as exc:
+            log.warning("vision-state inject failed: %s", exc)
+
+    # ─── face-recognition-state announcer ─────────────────
+    # Same idea as _announce_vision_state, for the face-recognition toggle.
+    # On a live OFF toggle it also tells Gemini to disregard the gallery —
+    # so OFF takes effect immediately instead of lingering until reconnect.
+
+    async def _announce_facerec_state(self, session: Any, enabled: bool,
+                                      is_toggle: bool) -> None:
+        if is_toggle and enabled:
+            text = (
+                "[FACE RECOGNITION ON] Face recognition was just enabled — "
+                "you'll be shown the people you know in a moment. Briefly "
+                "tell the user you can now recognise the people you know, in "
+                "your normal Khaleeji style."
+            )
+        elif is_toggle and not enabled:
+            text = (
+                "[FACE RECOGNITION OFF] Face recognition was just disabled. "
+                "Disregard the face gallery you were given earlier — stop "
+                "greeting people by name and do not identify anyone. Briefly "
+                "tell the user you'll no longer recognise faces."
+            )
+        elif enabled:  # session start, face rec already on
+            text = (
+                "[FACE RECOGNITION STATUS] Face recognition is ON — when you "
+                "see someone you've been shown in the gallery, greet them by "
+                "name. Do not announce this unprompted."
+            )
+        else:  # session start, face rec off
+            text = (
+                "[FACE RECOGNITION STATUS] Face recognition is OFF — you "
+                "cannot identify people. If the user asks who someone is or "
+                "whether you recognise them, tell them to enable face "
+                "recognition from the dashboard. Do not announce this "
+                "unprompted."
+            )
+        try:
+            await session.send_realtime_input(text=text)
+            log.info("face-rec-state injected (enabled=%s, toggle=%s)",
+                     enabled, is_toggle)
+        except asyncio.CancelledError:
+            raise
+        except Exception as exc:
+            log.warning("face-rec-state inject failed: %s", exc)
+
+    # ─── recognition state watcher ────────────────────────
+    # Polls data/.recognition_state.json at SANAD_RECOGNITION_POLL_S Hz and
+    # mirrors vision_enabled / face_rec_enabled into in-memory flags so the
+    # rest of the session can react WITHOUT a Gemini reconnect.
+
+    async def _recognition_state_watcher(self, session: Any) -> None:
+        last_mtime = 0.0
+        last_state = _recog_state.RecognitionState(
+            vision_enabled=self._vision_enabled,
+            face_rec_enabled=self._face_rec_enabled,
+            gallery_version=self._gallery_version_primed,
+        )
+        # Best-effort initial primer if face_rec is already on at session start.
+        if self._face_rec_enabled and self._vision_enabled:
+            try:
+                cur = _recog_state.read(_RECOG_STATE_PATH)
+                await self._send_gallery_primer(session, cur.gallery_version)
+            except Exception as exc:
+                log.warning("initial gallery primer failed: %s", exc)
+
+        # Tell Gemini the current camera + face-recognition state at session
+        # start — silent standing context so "can you see me?" / "do you know
+        # who I am?" are answered honestly even if the user never toggles.
+        await self._announce_vision_state(
+            session, self._vision_enabled, is_toggle=False,
+        )
+        await self._announce_facerec_state(
+            session, self._face_rec_enabled, is_toggle=False,
+        )
+
+        while not self._done.is_set() and not self._stop_flag.is_set():
+            await asyncio.sleep(_RECOG_POLL_S)
+            try:
+                st = _RECOG_STATE_PATH.stat()
+            except FileNotFoundError:
+                continue
+            except Exception:
+                continue
+            if st.st_mtime == last_mtime:
+                continue
+            last_mtime = st.st_mtime
+            new_state = _recog_state.read(_RECOG_STATE_PATH)
+
+            # Vision toggle — instant. Announce it out loud so Gemini reacts
+            # ("I can see you now" / "I can't see you anymore").
+            if new_state.vision_enabled != last_state.vision_enabled:
+                self._vision_enabled = new_state.vision_enabled
+                log.info("vision toggled → %s", self._vision_enabled)
+                await self._announce_vision_state(
+                    session, self._vision_enabled, is_toggle=True,
+                )
+
+            # Face-rec toggle — announce it out loud. The OFF announcement
+            # also tells Gemini to disregard the gallery, so OFF takes effect
+            # immediately instead of lingering until the next reconnect.
+            if new_state.face_rec_enabled != last_state.face_rec_enabled:
+                self._face_rec_enabled = new_state.face_rec_enabled
+                if self._face_rec_enabled:
+                    log.info("face rec enabled — announcing + sending primer")
+                else:
+                    log.info("face rec disabled — telling Gemini to "
+                             "disregard the gallery")
+                await self._announce_facerec_state(
+                    session, self._face_rec_enabled, is_toggle=True,
+                )
+
+            # Conditions for re-priming:
+            #  - face_rec just turned ON (no_face_rec_before)
+            #  - gallery version bumped since the last primer
+            face_rec_just_on = (
+                new_state.face_rec_enabled and not last_state.face_rec_enabled
+            )
+            gallery_changed = (
+                new_state.gallery_version != self._gallery_version_primed
+            )
+            if (self._face_rec_enabled
+                    and (face_rec_just_on or gallery_changed)
+                    and self._vision_enabled):
+                try:
+                    await self._send_gallery_primer(
+                        session, new_state.gallery_version,
+                    )
+                except Exception as exc:
+                    log.warning("gallery primer failed: %s", exc)
+
+            last_state = new_state
+
+    # ─── camera frame send loop ───────────────────────────
+    # Reads the latest JPEG from the _LATEST_FRAME cache (fed by the
+    # _stdin_watcher thread, which the GeminiSubprocess supervisor pushes
+    # 'frame:<b64>' lines into) and relays it to Gemini Live at
+    # _VISION_SEND_HZ. Only active when self._vision_enabled. Skips frames
+    # older than _VISION_STALE_MS so a stopped/unplugged camera doesn't
+    # waste tokens on a frozen scene.
+
+    async def _send_frame_loop(self, session: Any) -> None:
+        period = 1.0 / max(0.5, _VISION_SEND_HZ)
+        stale_s = _VISION_STALE_MS / 1000.0
+        backoff = 0.0
+        last_sent_ts = 0.0
+
+        while not self._done.is_set() and not self._stop_flag.is_set():
+            await asyncio.sleep(max(period, backoff))
+            if not self._vision_enabled:
+                continue
+            with _LATEST_FRAME_LOCK:
+                data = _LATEST_FRAME.get("bytes")
+                ts = _LATEST_FRAME.get("ts", 0.0)
+            if not data:
+                continue
+            # Stale — supervisor stopped pushing (camera off / unplugged).
+            if (time.time() - ts) > stale_s:
+                continue
+            # De-dup — don't re-send a frame we already relayed.
+            if ts == last_sent_ts:
+                continue
+            try:
+                await session.send_realtime_input(
+                    video=types.Blob(data=data, mime_type="image/jpeg"),
+                )
+                last_sent_ts = ts
+                backoff = 0.0
+            except asyncio.CancelledError:
+                return
+            except Exception as exc:
+                log.warning("frame send failed: %s", exc)
+                backoff = min(backoff * 2 + 0.5, 5.0)
+
+    # ─── motion-state inject loop ─────────────────────────
+    # Drains _STATE_PENDING (fed by the _stdin_watcher from 'state:' lines
+    # the supervisor pushes when the arm starts/finishes/errors a motion)
+    # and injects each as silent text context into the live session, so
+    # Gemini can answer "what are you doing?" honestly. Per persona, Gemini
+    # reads these for context but does not narrate them unprompted.
+
+    async def _send_state_loop(self, session: Any) -> None:
+        while not self._done.is_set() and not self._stop_flag.is_set():
+            await asyncio.sleep(0.1)
+            with _STATE_LOCK:
+                if not _STATE_PENDING:
+                    continue
+                pending = list(_STATE_PENDING)
+                _STATE_PENDING.clear()
+            for msg in pending:
+                try:
+                    await session.send_realtime_input(text=msg)
+                    log.info("STATE injected: %s", msg)
+                except asyncio.CancelledError:
+                    return
+                except Exception as exc:
+                    # Some SDK versions may not accept text on
+                    # send_realtime_input — log once-ish and keep going;
+                    # motion still works, only this context channel is lost.
+                    log.warning("state inject failed: %s", exc)
+
+    # ─── face gallery primer ──────────────────────────────
+    # Builds one multimodal turn carrying the entire face gallery + a Khaleeji
+    # greeting instruction, and sends it via send_client_content. Gemini keeps
+    # this in session context until reconnect. Re-sent on gallery_version bumps.
+
+    async def _send_gallery_primer(self, session: Any, version: int) -> None:
+        try:
+            from Project.Sanad.vision.face_gallery import FaceGallery
+        except Exception as exc:
+            log.info("face gallery module unavailable: %s", exc)
+            return
+
+        gallery = FaceGallery(_FACES_DIR)
+        try:
+            entries = gallery.load_for_primer(
+                max_samples_per_face=_FACES_MAX_SAMPLES,
+                resize_long_side=_FACES_PRIMER_RESIZE,
+            )
+        except Exception as exc:
+            log.warning("face gallery load failed: %s", exc)
+            return
+
+        if not entries:
+            log.info("face gallery empty — primer skipped (v.%d)", version)
+            self._gallery_version_primed = version
+            return
+
+        parts: list[dict[str, Any]] = [{
+            "text": (
+                "GALLERY PRIMER (do not reply to this turn). "
+                "Below are people you know. When the live camera shows one of "
+                "them, greet them warmly by name in UAE Khaleeji dialect "
+                "(for example: 'هلا والله يا كسام، شحالك؟'), and you may use "
+                "the notes about them to make the conversation personal. "
+                "For faces NOT in this gallery, welcome them as a guest "
+                "without inventing a name. Greet each person only once per "
+                "minute to avoid repetition."
+            ),
+        }]
+        for entry, jpegs in entries:
+            label = (
+                f"This person is named {entry.name}."
+                if entry.name
+                else "This person's name is unknown — greet as guest."
+            )
+            if entry.description:
+                label += f" Notes about them: {entry.description}"
+            parts.append({"text": f"\n— {label}"})
+            for jpeg in jpegs:
+                parts.append({
+                    "inline_data": {"mime_type": "image/jpeg", "data": jpeg},
+                })
+
+        try:
+            await session.send_client_content(
+                turns=[{"role": "user", "parts": parts}],
+                turn_complete=True,
+            )
+        except Exception as exc:
+            log.warning("primer send failed: %s", exc)
+            return
+        self._gallery_version_primed = version
+        log.info("face gallery primed: %d person(s), v.%d", len(entries), version)
--- a/gemini/subprocess.py
+++ b/gemini/subprocess.py
@ -10,15 +10,16 @@ When a new model is added, build its own sibling supervisor (see

 from __future__ import annotations

+import base64
+import json
 import os
 import signal
 import subprocess
 import sys
 import threading
-import time
 from collections import deque
 from datetime import datetime
-from typing import Any
+from typing import Any, Optional, Union

 from pathlib import Path

@ -30,6 +31,11 @@ log = get_logger("gemini_subprocess")

 _LS_CFG = _cfg_section("gemini", "subprocess")

+# Camera frame forwarding — push the latest JPEG to the child over stdin
+# at this interval (seconds). 0.5 s ≈ 2 fps, matching the child's
+# SANAD_VISION_SEND_HZ default. The child de-stales + relays to Gemini.
+_FRAME_FORWARD_INTERVAL_S = float(_LS_CFG.get("frame_forward_interval_sec", 0.5))
+

 def _resolve_live_script() -> Path:
    """Locate the voice script to run as subprocess.
@ -82,6 +88,22 @@ class GeminiSubprocess:
        self.state_message = "Idle."
        self.last_user_text = ""
        self.suppressed_noise = 0
+        # ── stdin push channel (camera frames + motion state) ──
+        # The child (gemini/script.py) reads "frame:<b64>\n" and
+        # "state:<json>\n" lines off its stdin. Writes are serialised
+        # because the frame forwarder and the motion-state bus handler
+        # both call from different threads.
+        self._stdin_lock = threading.Lock()
+        self._camera = None                       # set via attach_camera()
+        self._frame_thread: threading.Thread | None = None
+        self._frame_stop = threading.Event()
+
+    # ── camera attach (called once from main.py) ──────────────
+
+    def attach_camera(self, camera) -> None:
+        """Give the supervisor a reference to the CameraDaemon so it can
+        forward frames to the child over stdin while a session runs."""
+        self._camera = camera

    def _open_session_log(self, pid: int):
        """Open (or re-open) the per-day append log file for this session."""
@ -214,6 +236,7 @@ class GeminiSubprocess:
        proc = subprocess.Popen(
            cmd,
            cwd=str(script.parent),
+            stdin=subprocess.PIPE,        # camera frames + motion state push
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
            text=True,
@ -221,16 +244,106 @@ class GeminiSubprocess:
            env=env,
        )

+        # Reap any stale frame forwarder from a previous session that ended
+        # by a child crash rather than a clean stop() — otherwise it keeps
+        # spinning on a dead pipe and we'd leak a thread per restart.
+        stale_ft = self._frame_thread
+        if stale_ft is not None and stale_ft.is_alive():
+            self._frame_stop.set()
+            stale_ft.join(timeout=2.0)
+
        with self._lock:
            self.process = proc
            self.log_tail.append(f"Started: pid={proc.pid}")
            self._set_state("starting", f"pid={proc.pid}")
            self._reader_thread = threading.Thread(target=self._reader_loop, daemon=True)
            self._reader_thread.start()
+            # Frame forwarder — pushes camera JPEGs to the child over stdin.
+            self._frame_stop.clear()
+            self._frame_thread = threading.Thread(
+                target=self._frame_forwarder, daemon=True, name="gemini-frame-fwd",
+            )
+            self._frame_thread.start()

        log.info("Live Gemini subprocess started: pid=%d", proc.pid)
        return {"started": True, "pid": proc.pid}

+    # ── stdin push channel ────────────────────────────────────
+
+    def _send_stdin(self, line: str) -> None:
+        """Serialised stdin write — frame forwarder + motion-state handler
+        both call this from different threads. Best-effort: a closed pipe
+        or a not-yet-started process is a silent no-op."""
+        proc = self.process
+        if proc is None or proc.stdin is None:
+            return
+        try:
+            with self._stdin_lock:
+                if not proc.stdin.closed:
+                    proc.stdin.write(line)
+                    proc.stdin.flush()
+        except Exception:
+            # Pipe broke (child exited) — drop silently; the reader thread
+            # will surface the exit via state="stopped".
+            pass
+
+    def send_frame(self, jpeg: Union[bytes, str]) -> None:
+        """Forward one camera frame to the child as 'frame:<base64>\\n'.
+
+        Accepts raw JPEG bytes (base64-encoded here) or an already-base64
+        ASCII string (e.g. CameraDaemon.get_frame_b64() — no re-encode)."""
+        if isinstance(jpeg, bytes):
+            b64 = base64.b64encode(jpeg).decode("ascii")
+        elif isinstance(jpeg, str):
+            b64 = jpeg.strip()
+        else:
+            return
+        if b64:
+            self._send_stdin("frame:" + b64 + "\n")
+
+    def send_state(self, event: str, cmd: str,
+                   elapsed_sec: Optional[float] = None,
+                   reason: Optional[str] = None) -> None:
+        """Push a motion-state update to the child as 'state:<json>\\n'.
+
+        Events: start | complete | interrupted | error. The child injects
+        '[STATE-...] <cmd>' into the live Gemini session as silent text
+        context so Gemini can answer "what are you doing?" honestly."""
+        if not event or not cmd:
+            return
+        payload: dict[str, Any] = {"event": event, "cmd": cmd}
+        if elapsed_sec is not None:
+            payload["elapsed_sec"] = round(float(elapsed_sec), 2)
+        if reason:
+            payload["reason"] = str(reason)[:200]
+        try:
+            line = "state:" + json.dumps(payload, ensure_ascii=False) + "\n"
+        except Exception:
+            return
+        self._send_stdin(line)
+
+    def _frame_forwarder(self) -> None:
+        """Background thread — push the camera's latest frame to the child.
+
+        Runs for the lifetime of one subprocess session. Gated on the
+        camera actually running; the child does its own vision-enabled +
+        staleness checks, so this stays dumb (camera up → push)."""
+        cam = self._camera
+        if cam is None:
+            return
+        while not self._frame_stop.is_set():
+            if self._frame_stop.wait(_FRAME_FORWARD_INTERVAL_S):
+                break
+            try:
+                if not cam.is_running():
+                    continue
+                b64 = cam.get_frame_b64()
+                if b64:
+                    self.send_frame(b64)
+            except Exception:
+                # Best-effort — never let a frame hiccup kill the thread.
+                pass
+
    def stop(self) -> dict[str, Any]:
        with self._lock:
            proc = self.process
@ -238,6 +351,13 @@ class GeminiSubprocess:
                return {"stopped": False, "message": "Not running."}
            self._set_state("stopping", "Stopping...")

+        # Halt the frame forwarder before we tear the pipe down.
+        self._frame_stop.set()
+        ft = self._frame_thread
+        if ft is not None:
+            ft.join(timeout=2.0)
+        self._frame_thread = None
+
        try:
            proc.send_signal(signal.SIGINT)
            proc.wait(timeout=_STOP_TIMEOUT_SEC)
@ -251,6 +371,16 @@ class GeminiSubprocess:

        rc = proc.returncode

+        # Close stdin/stdout explicitly — without this each start/stop
+        # cycle leaks FDs (relied on Popen.__del__ which only runs at GC;
+        # a reconnect loop would march the FD count to the OS limit).
+        for pipe in (getattr(proc, "stdin", None), getattr(proc, "stdout", None)):
+            if pipe is not None:
+                try:
+                    pipe.close()
+                except Exception:
+                    pass
+
        with self._lock:
            self.process = None
            self.log_tail.append("Stopped.")
--- a/main.py
+++ b/main.py
@ -106,6 +106,8 @@ TypedReplayEngine    = _safe_import("TypedReplayEngine",    lambda: __import__("
 GeminiVoiceClient    = _safe_import("GeminiVoiceClient",    lambda: __import__("Project.Sanad.gemini.client", fromlist=["GeminiVoiceClient"]).GeminiVoiceClient)
 GeminiSubprocess     = _safe_import("GeminiSubprocess",     lambda: __import__("Project.Sanad.gemini.subprocess", fromlist=["GeminiSubprocess"]).GeminiSubprocess)
 LocalSubprocess      = _safe_import("LocalSubprocess",      lambda: __import__("Project.Sanad.local.subprocess",  fromlist=["LocalSubprocess"]).LocalSubprocess)
+CameraDaemon         = _safe_import("CameraDaemon",         lambda: __import__("Project.Sanad.vision.camera", fromlist=["CameraDaemon"]).CameraDaemon)
+FaceGallery          = _safe_import("FaceGallery",          lambda: __import__("Project.Sanad.vision.face_gallery", fromlist=["FaceGallery"]).FaceGallery)


 # ── global instances (imported by route modules) ──
@ -131,6 +133,107 @@ else:
    live_sub  = _safe_construct("live_sub", GeminiSubprocess)
 typed_replay  = _safe_construct("typed_replay", (lambda: TypedReplayEngine(voice_client, audio_mgr)) if (TypedReplayEngine and voice_client and audio_mgr) else None)

+# ── Recognition (camera + face gallery) ─────────────────────────────────────
+# Camera is idle until the dashboard toggles vision on; face gallery is pure
+# file IO and always available if the import succeeded.
+#
+# Config precedence (highest first): explicit env var → config/core_config.json
+# section → hardcoded default. The parent process normally has no SANAD_CAMERA_*
+# env vars (LIVE_TUNE is only forwarded to the Gemini child), so in practice the
+# core_config.json `camera` / `faces` sections are the live source here.
+def _build_camera():
+    from Project.Sanad.core.config_loader import section as _cfg_section
+    cam_cfg = _cfg_section("core", "camera")
+
+    def _knob(env_key: str, cfg_key: str, default):
+        env_val = os.environ.get(env_key)
+        if env_val is not None and env_val != "":
+            return type(default)(env_val)
+        return type(default)(cam_cfg.get(cfg_key, default))
+
+    # Frames are cached in memory and pushed to the Gemini child over its
+    # stdin (see GeminiSubprocess._frame_forwarder) — no file drop.
+    return CameraDaemon(
+        width=_knob("SANAD_CAMERA_WIDTH", "width", 424),
+        height=_knob("SANAD_CAMERA_HEIGHT", "height", 240),
+        fps=_knob("SANAD_CAMERA_FPS", "fps", 15),
+        jpeg_quality=_knob("SANAD_CAMERA_JPEG_QUALITY", "jpeg_quality", 70),
+        stale_threshold_s=float(cam_cfg.get("stale_threshold_s", 10.0)),
+        reconnect_min_s=float(cam_cfg.get("reconnect_min_s", 2.0)),
+        reconnect_max_s=float(cam_cfg.get("reconnect_max_s", 10.0)),
+        capture_timeout_ms=int(cam_cfg.get("capture_timeout_ms", 5000)),
+    )
+
+def _build_gallery():
+    from Project.Sanad.config import BASE_DIR
+    from Project.Sanad.core.config_loader import section as _cfg_section
+    faces_cfg = _cfg_section("core", "faces")
+    # SANAD_FACES_DIR is set absolute by LIVE_TUNE (the Gemini child reads the
+    # same var). In the parent it's usually unset → fall back to the JSON's
+    # dir_rel, then the hardcoded default. Honour absolute paths as-is.
+    raw = os.environ.get("SANAD_FACES_DIR") or faces_cfg.get("dir_rel", "data/faces")
+    p = Path(raw)
+    root = p if p.is_absolute() else (BASE_DIR / raw)
+    return FaceGallery(root)
+
+camera  = _safe_construct("camera",  _build_camera if CameraDaemon else None)
+gallery = _safe_construct("gallery", _build_gallery if FaceGallery else None)
+
+# Restore persisted vision_enabled at boot — start camera if the user left
+# it on across a reboot. Face-rec state is read by the Gemini child directly.
+try:
+    from Project.Sanad.vision import recognition_state as _recog_state
+    from Project.Sanad.config import BASE_DIR as _BD
+    _state = _recog_state.read(_BD / "data" / ".recognition_state.json")
+    if _state.vision_enabled and camera is not None:
+        if camera.start():
+            log.info("Camera vision restored from state (backend=%s)", camera.backend)
+        else:
+            log.warning("Camera vision was ON but no backend available — leaving OFF")
+            _recog_state.mutate(_BD / "data" / ".recognition_state.json",
+                                vision_enabled=False)
+except Exception:
+    log.exception("Could not restore recognition state")
+
+# Hand the camera to the Gemini supervisor so it can forward frames to the
+# child over stdin while a live session runs.
+if live_sub is not None and camera is not None:
+    try:
+        if hasattr(live_sub, "attach_camera"):
+            live_sub.attach_camera(camera)
+            log.info("Camera attached to live subprocess supervisor")
+    except Exception:
+        log.exception("attach_camera failed")
+
+# ── Motion-state → Gemini channel ───────────────────────────────────────────
+# The arm controller emits motion.action_started / _done / _error on the bus.
+# Forward each to the Gemini child as a 'state:' line so the live session can
+# answer "what are you doing?" honestly. Sync handlers, fired via emit_sync
+# from the arm's worker thread — send_state just writes to a pipe (cheap).
+if live_sub is not None and hasattr(live_sub, "send_state"):
+    try:
+        from Project.Sanad.core.event_bus import bus as _bus
+
+        def _on_motion_started(action: str = "", **_kw):
+            live_sub.send_state("start", action)
+
+        def _on_motion_done(action: str = "", elapsed_sec=None,
+                            failed: bool = False, **_kw):
+            # action_error already covered the failure case with a reason;
+            # here just emit complete (skip if it failed to avoid a dup).
+            if not failed:
+                live_sub.send_state("complete", action, elapsed_sec=elapsed_sec)
+
+        def _on_motion_error(action: str = "", reason: str = "", **_kw):
+            live_sub.send_state("error", action, reason=reason)
+
+        _bus.on("motion.action_started", _on_motion_started)
+        _bus.on("motion.action_done", _on_motion_done)
+        _bus.on("motion.action_error", _on_motion_error)
+        log.info("Motion-state → Gemini channel wired")
+    except Exception:
+        log.exception("Could not wire motion-state → Gemini channel")
+
 # Wire everything into the Brain (only what was constructed)
 def _safe_attach(method_name: str, value):
    if brain is None or value is None:
@ -166,6 +269,8 @@ SUBSYSTEMS = {
    "live_voice":    live_voice,
    "live_sub":      live_sub,
    "typed_replay":  typed_replay,
+    "camera":        camera,
+    "gallery":       gallery,
 }

 # Critical subsystems — if any of these are None, log a warning at startup
@ -220,6 +325,13 @@ def _do_shutdown(from_signal: bool = False):
        except Exception:
            log.exception("audio_mgr.close() failed")

+    if camera is not None:
+        try:
+            if camera.is_running():
+                camera.stop()
+        except Exception:
+            log.exception("camera.stop() failed")
+
    log.info("Shutdown complete")


--- a/motion/arm_controller.py
+++ b/motion/arm_controller.py
@ -673,6 +673,8 @@ class ArmController:
            self._is_busy = True
            self._cancel.clear()

+        _start = time.monotonic()
+        _failed = False
        try:
            bus.emit_sync("motion.action_started", action=action.name)
            if action.file:
@ -680,12 +682,20 @@ class ArmController:
            else:
                self._run_sdk_action(action)
        except Exception as exc:
+            _failed = True
            log.error("Action %s failed: %s", action.name, exc)
+            bus.emit_sync("motion.action_error", action=action.name,
+                          reason=str(exc))
        finally:
            with self._lock:
                self._is_busy = False
                self._last_action_time = time.monotonic()
-            bus.emit_sync("motion.action_done", action=action.name)
+            # action_done always fires (back-compat for existing listeners);
+            # action_error above is the extra signal for the Gemini
+            # motion-state channel. elapsed_sec lets Gemini say "...took 2.3s".
+            bus.emit_sync("motion.action_done", action=action.name,
+                          elapsed_sec=round(time.monotonic() - _start, 2),
+                          failed=_failed)

    def _run_sdk_action(self, action: Action):
        if not _HAS_SDK:
--- a/requirements.txt
+++ b/requirements.txt
@ -7,11 +7,30 @@ uvicorn[standard]>=0.29.0
 python-multipart>=0.0.9

 # Gemini voice
+# google-genai: the Gemini Live SDK — used by gemini/script.py (live brain)
+#   and gemini/client.py. Needs Python 3.10+, which is why the voice loop
+#   runs in the gemini_sdk conda env. send_realtime_input(video=)/(text=)
+#   and send_client_content() require a reasonably recent (>=1.x) release.
+google-genai>=1.0.0
 websockets>=12.0
 pyaudio>=0.2.13

-# Camera proxy
-httpx>=0.27.0
+# Recognition (camera vision + face gallery for Gemini-side face recognition)
+# opencv-python-headless: JPEG encode + USB-camera fallback. Headless wheel —
+#                         the dashboard renders frames; we never need a GUI window.
+# Pillow: resize face samples before the Gemini primer turn.
+opencv-python-headless>=4.8.0
+Pillow>=10.0.0
+#
+# pyrealsense2 — DO NOT `pip install` on Jetson / JetPack 5.
+#   The PyPI wheel is built against glibc 2.32+ (Ubuntu 22.04); JetPack 5 ships
+#   glibc 2.31, so the wheel fails to load with:
+#     ImportError: ... version `GLIBC_2.32' not found
+#   On Jetson, build the Python binding from source against the apt-installed
+#   librealsense2 runtime (see README → "Camera vision on Jetson").
+#   On x86_64 / Ubuntu 22.04+ desktops, `pip install pyrealsense2` works fine.
+#   If pyrealsense2 is absent, CameraDaemon falls back to cv2.VideoCapture(0).
+# pyrealsense2>=2.50.0   # intentionally commented — see note above

 # Local TTS (optional — only needed for MBZUAI model)
 transformers>=4.40.0
--- a/vision/init.py
+++ b/vision/init.py
@ -0,0 +1 @@
+"""Vision package — camera daemon + face gallery for Gemini-side recognition."""
--- a/vision/camera.py
+++ b/vision/camera.py
@ -0,0 +1,560 @@
+"""Camera daemon — single producer, in-memory frame cache.
+
+Captures frames at fixed FPS from a RealSense (preferred) or any USB
+camera (fallback), JPEG-encodes them, and caches the latest frame in
+memory in two views (matches Marcus's API/camera_api.py):
+
+  - `_latest_jpeg`  raw JPEG bytes  — dashboard preview + frame forwarder
+  - `_latest_b64`   base64 ASCII    — frame forwarder → Gemini child stdin
+
+Consumers:
+  - dashboard preview       → `snapshot_jpeg()` (served as an HTTP Response)
+  - face enrollment         → `get_fresh_frame()` for a guaranteed-current capture
+  - GeminiSubprocess        → `get_frame_b64()`, pushed over the child's stdin
+
+Lifecycle is driven by the Recognition tab toggle. The daemon is idle
+until `start()` is called; failures in start() are non-fatal and
+reported via `is_running()` / `backend`. Once running it auto-reconnects
+on USB unplug / stalled frames (Marcus-style resilience), and supports
+hot `reconfigure()` of resolution/FPS without a full restart.
+"""
+
+from __future__ import annotations
+
+import base64
+import os
+import threading
+import time
+from typing import Optional
+
+import numpy as np
+
+from Project.Sanad.core.logger import get_logger
+
+log = get_logger("camera")
+
+# How many /dev/video* indices to scan for a USB-style color camera when
+# RealSense isn't available. A RealSense exposes ~6 V4L2 nodes (depth, IR,
+# color, metadata…) — the color one is rarely index 0, so we probe each
+# and accept the first that yields a real 3-channel BGR frame.
+_USB_SCAN_RANGE = 10
+
+
+class CameraDaemon:
+    """RealSense → USB fallback camera capture with in-memory frame cache."""
+
+    def __init__(
+        self,
+        width: int = 424,
+        height: int = 240,
+        fps: int = 15,
+        jpeg_quality: int = 70,
+        stale_threshold_s: float = 10.0,
+        reconnect_min_s: float = 2.0,
+        reconnect_max_s: float = 10.0,
+        capture_timeout_ms: int = 5000,
+    ) -> None:
+        # Active profile — guarded by _reconfig_lock so reconfigure() can
+        # hot-swap it from another thread between capture sessions.
+        self._reconfig_lock = threading.Lock()
+        self._w = int(width)
+        self._h = int(height)
+        self._fps = int(fps)
+        self._q = max(10, min(95, int(jpeg_quality)))
+        self._reconfig_pending = False
+
+        # Resilience knobs (Marcus-style)
+        self._stale_s = float(stale_threshold_s)
+        self._reconnect_min_s = float(reconnect_min_s)
+        self._reconnect_max_s = float(reconnect_max_s)
+        self._capture_timeout_ms = int(capture_timeout_ms)
+
+        self._thread: Optional[threading.Thread] = None
+        self._stop = threading.Event()
+        self._backend: Optional[str] = None
+        self._lock = threading.Lock()
+        self._latest_jpeg: Optional[bytes] = None
+        self._latest_b64: Optional[str] = None
+        self._latest_ts: float = 0.0
+        self._frame_seq: int = 0
+        self._error: Optional[str] = None
+        self._reconnect_count: int = 0
+
+    # ── public API ──────────────────────────────────────────
+
+    @property
+    def backend(self) -> Optional[str]:
+        return self._backend
+
+    @property
+    def error(self) -> Optional[str]:
+        return self._error
+
+    @property
+    def frame_seq(self) -> int:
+        return self._frame_seq
+
+    def is_running(self) -> bool:
+        return self._thread is not None and self._thread.is_alive()
+
+    def start(self) -> bool:
+        """Start capture thread. Returns True if a backend was acquired.
+
+        Initial probe is synchronous; if it fails the thread isn't spawned.
+        Once running, the inner loop auto-reconnects on USB unplug or
+        stalled frames using exponential backoff (`reconnect_min_s` ..
+        `reconnect_max_s`).
+        """
+        if self.is_running():
+            return True
+        self._stop.clear()
+        self._error = None
+        self._reconnect_count = 0
+
+        # One-shot USB-2.0 negotiation diagnostic (warns operator if D435I
+        # came up on USB 2.0 — frame drops would be likely otherwise).
+        self._check_usb_version()
+
+        backend = self._probe_any()
+        if backend is None:
+            log.warning("Camera: no backend available (RealSense + USB both failed)")
+            self._backend = None
+            return False
+
+        self._backend = backend["name"]
+        self._thread = threading.Thread(
+            target=self._reconnect_loop, args=(backend,),
+            daemon=True, name="camera-daemon",
+        )
+        self._thread.start()
+        with self._reconfig_lock:
+            w, h, f = self._w, self._h, self._fps
+        log.info("Camera started (backend=%s, %dx%d @ %dfps)",
+                 self._backend, w, h, f)
+        return True
+
+    def stop(self) -> None:
+        """Stop the capture thread and release the hardware."""
+        if not self.is_running():
+            self._backend = None
+            return
+        self._stop.set()
+        t = self._thread
+        if t is not None:
+            t.join(timeout=2.0)
+        self._thread = None
+        self._backend = None
+        log.info("Camera stopped")
+
+    def reconfigure(self, width: Optional[int] = None, height: Optional[int] = None,
+                    fps: Optional[int] = None, jpeg_quality: Optional[int] = None) -> dict:
+        """Hot-swap the capture profile without a full stop/start.
+
+        Sets a pending flag — the capture loop notices it, tears the
+        pipeline down, and rebuilds at the new resolution (~0.5 s gap).
+        If the daemon isn't running the new values just take effect on
+        the next `start()`. Returns the resulting active profile.
+        """
+        with self._reconfig_lock:
+            if width is not None:
+                self._w = int(width)
+            if height is not None:
+                self._h = int(height)
+            if fps is not None:
+                self._fps = int(fps)
+            if jpeg_quality is not None:
+                self._q = max(10, min(95, int(jpeg_quality)))
+            if self.is_running():
+                self._reconfig_pending = True
+            profile = {"width": self._w, "height": self._h,
+                       "fps": self._fps, "jpeg_quality": self._q}
+        log.info("Camera reconfigure → %s", profile)
+        return profile
+
+    def snapshot_jpeg(self) -> Optional[bytes]:
+        """Return the latest JPEG bytes, or None if no frame yet."""
+        with self._lock:
+            return self._latest_jpeg
+
+    def get_frame_b64(self) -> Optional[str]:
+        """Return the latest frame as a base64 ASCII string (or None).
+
+        Used by the frame forwarder to push frames over the Gemini child's
+        stdin without re-encoding — base64 is cached alongside the JPEG.
+        """
+        with self._lock:
+            return self._latest_b64
+
+    def get_fresh_frame(self, max_age_s: float = 0.5,
+                        timeout_s: float = 1.5) -> Optional[bytes]:
+        """Return a JPEG frame newer than `max_age_s`, waiting up to `timeout_s`.
+
+        Used by face enrollment so the captured frame is guaranteed to be
+        the *current* scene, not a stale buffer from before the user got
+        into position. Falls back to whatever's cached on timeout.
+        """
+        deadline = time.time() + timeout_s
+        while time.time() < deadline:
+            with self._lock:
+                if (self._latest_jpeg is not None
+                        and self._latest_ts > 0
+                        and (time.time() - self._latest_ts) <= max_age_s):
+                    return self._latest_jpeg
+            time.sleep(0.03)
+        with self._lock:
+            return self._latest_jpeg
+
+    def latest_age_s(self) -> float:
+        """Seconds since last successful frame; +inf if none."""
+        with self._lock:
+            if self._latest_ts <= 0:
+                return float("inf")
+            return time.time() - self._latest_ts
+
+    def status(self) -> dict:
+        with self._reconfig_lock:
+            w, h, f, q = self._w, self._h, self._fps, self._q
+        # latest_age_s() is +inf until the first frame lands. inf is NOT
+        # JSON-serialisable by Starlette's JSONResponse (allow_nan=False) —
+        # leaving it as inf would 500 the /api/recognition/* routes. Map
+        # "running but no frame yet" and "not running" both to None.
+        age = self.latest_age_s()
+        age_s = round(age, 2) if (self.is_running() and age != float("inf")) else None
+        return {
+            "running": self.is_running(),
+            "backend": self._backend,
+            "width": w,
+            "height": h,
+            "fps": f,
+            "jpeg_quality": q,
+            "frame_seq": self._frame_seq,
+            "age_s": age_s,
+            "error": self._error,
+            "reconnect_count": self._reconnect_count,
+        }
+
+    # ── helpers ─────────────────────────────────────────────
+
+    def _probe_any(self) -> Optional[dict]:
+        """Try RealSense first, then USB. Returns backend dict or None."""
+        b = self._probe_realsense()
+        if b is None:
+            b = self._probe_usb()
+        return b
+
+    def _check_usb_version(self) -> None:
+        """Warn if a connected RealSense negotiated USB 2.0 (needs 3.x).
+
+        Marcus has this same check — D435I on USB 2.0 can't deliver
+        color+depth+IMU and the pipeline silently stalls. Catching it at
+        startup lets the operator fix the cable/port instead of chasing a
+        "no frames" loop. Diagnostic only; never blocks startup.
+        """
+        try:
+            import pyrealsense2 as rs  # type: ignore
+            ctx = rs.context()
+            for dev in ctx.query_devices():
+                try:
+                    usb_type = dev.get_info(rs.camera_info.usb_type_descriptor)
+                    name = dev.get_info(rs.camera_info.name)
+                except Exception:
+                    continue
+                if str(usb_type).startswith("2."):
+                    log.warning(
+                        "RealSense %s negotiated USB %s — expected 3.x. "
+                        "Frame drops likely. Try a USB 3 port / shorter cable / "
+                        "powered hub.", name, usb_type,
+                    )
+                else:
+                    log.info("RealSense %s on USB %s", name, usb_type)
+        except Exception:
+            pass
+
+    # ── backend probing ─────────────────────────────────────
+
+    def _probe_realsense(self) -> Optional[dict]:
+        with self._reconfig_lock:
+            w, h, f = self._w, self._h, self._fps
+        try:
+            import pyrealsense2 as rs  # type: ignore
+            pipeline = rs.pipeline()
+            cfg = rs.config()
+            cfg.enable_stream(rs.stream.color, w, h, rs.format.bgr8, f)
+            profile = pipeline.start(cfg)
+            return {"name": "realsense", "pipeline": pipeline, "rs": rs,
+                    "profile": profile}
+        except Exception as exc:
+            log.info("RealSense unavailable: %s", exc)
+            return None
+
+    def _open_usb_index(self, idx: int, w: int, h: int, f: int,
+                        cv2) -> Optional[dict]:
+        """Open one /dev/video<idx>, validate it yields a 3-channel frame,
+        and classify it as colour vs grayscale/IR.
+
+        A RealSense IR node delivers Y8 — cv2 replicates that single plane
+        across 3 channels, so the planes come back *bit-identical*. A real
+        colour sensor never produces bit-identical channels (per-channel
+        sensor noise differs even on a flat gray scene). That's the test.
+        Returns a backend dict with `is_color`, or None if the node is
+        unusable.
+        """
+        cap = None
+        try:
+            cap = cv2.VideoCapture(idx)
+            if not cap.isOpened():
+                cap.release()
+                return None
+            cap.set(cv2.CAP_PROP_FRAME_WIDTH, w)
+            cap.set(cv2.CAP_PROP_FRAME_HEIGHT, h)
+            cap.set(cv2.CAP_PROP_FPS, f)
+            good = None
+            for _ in range(5):
+                ok, frame = cap.read()
+                if (ok and frame is not None and frame.ndim == 3
+                        and frame.shape[2] == 3):
+                    good = frame
+                    break
+            if good is None:
+                cap.release()
+                return None
+            is_color = not (
+                np.array_equal(good[:, :, 0], good[:, :, 1])
+                and np.array_equal(good[:, :, 1], good[:, :, 2])
+            )
+            return {"name": "usb", "cap": cap, "cv2": cv2, "index": idx,
+                    "is_color": is_color,
+                    "frame_wh": (good.shape[1], good.shape[0])}
+        except Exception as exc:
+            log.info("USB camera index %d: %s", idx, exc)
+            if cap is not None:
+                try:
+                    cap.release()
+                except Exception:
+                    pass
+            return None
+
+    def _probe_usb(self) -> Optional[dict]:
+        """Scan /dev/video* for a colour camera node, falling back to a
+        grayscale/IR node only if no colour node exists.
+
+        On a RealSense, /dev/video0 is the *depth* stream (Z16, cv2 can't
+        open it as a webcam); the IR nodes deliver Y8 (grayscale); the
+        *colour* node delivers YUYV/BGR. We can't know the index up front,
+        so we probe each and prefer the first genuine colour node — that's
+        why the dashboard preview used to come up grayscale. Pin a node
+        with SANAD_CAMERA_USB_INDEX=<n> to skip the scan entirely.
+        """
+        with self._reconfig_lock:
+            w, h, f = self._w, self._h, self._fps
+        try:
+            import cv2  # type: ignore
+        except Exception as exc:
+            log.info("USB camera unavailable: %s", exc)
+            return None
+
+        # Pinned index — accept whatever it is (colour or not).
+        explicit = os.environ.get("SANAD_CAMERA_USB_INDEX", "").strip()
+        if explicit.isdigit():
+            backend = self._open_usb_index(int(explicit), w, h, f, cv2)
+            if backend is not None:
+                fw, fh = backend["frame_wh"]
+                log.info("USB camera: pinned /dev/video%d (%dx%d, %s)",
+                         backend["index"], fw, fh,
+                         "colour" if backend["is_color"] else "grayscale/IR")
+                return backend
+            log.warning("USB camera: pinned index %s unusable", explicit)
+            return None
+
+        # Scan — prefer a real colour node; keep the first grayscale node
+        # as a last resort so the camera still works if that's all there is.
+        gray_fallback: Optional[dict] = None
+        for idx in range(_USB_SCAN_RANGE):
+            backend = self._open_usb_index(idx, w, h, f, cv2)
+            if backend is None:
+                continue
+            fw, fh = backend["frame_wh"]
+            if backend["is_color"]:
+                log.info("USB camera: using /dev/video%d (colour, %dx%d)",
+                         idx, fw, fh)
+                if gray_fallback is not None:
+                    try:
+                        gray_fallback["cap"].release()
+                    except Exception:
+                        pass
+                return backend
+            # grayscale/IR — remember the first, release any extras
+            if gray_fallback is None:
+                gray_fallback = backend
+            else:
+                try:
+                    backend["cap"].release()
+                except Exception:
+                    pass
+
+        if gray_fallback is not None:
+            fw, fh = gray_fallback["frame_wh"]
+            log.warning("USB camera: no colour node found — falling back to "
+                        "/dev/video%d (grayscale/IR, %dx%d). For a RealSense, "
+                        "build pyrealsense2 or pin the colour node with "
+                        "SANAD_CAMERA_USB_INDEX.", gray_fallback["index"], fw, fh)
+            return gray_fallback
+
+        log.info("USB camera unavailable: no working /dev/video* node found "
+                 "(scanned %d indices)", _USB_SCAN_RANGE)
+        return None
+
+    # ── main capture loop ───────────────────────────────────
+
+    def _reconnect_loop(self, initial_backend: dict) -> None:
+        """Outer loop — owns reconnect with exponential backoff.
+
+        Inner `_capture_session` runs until the camera goes stale, the
+        stop flag is set, or a reconfigure is requested. On stall we
+        sleep + re-probe; on reconfigure we re-probe immediately at the
+        new resolution. Backoff resets after a successful session.
+        """
+        backend = initial_backend
+        backoff = self._reconnect_min_s
+
+        while not self._stop.is_set():
+            reconfigured = False
+            try:
+                reconfigured = self._capture_session(backend)
+            except Exception as exc:
+                log.exception("Camera capture session crashed: %s", exc)
+                self._error = str(exc)
+            finally:
+                self._teardown(backend)
+
+            if self._stop.is_set():
+                break
+
+            if reconfigured:
+                # Fast path — rebuild immediately at the new profile.
+                with self._reconfig_lock:
+                    self._reconfig_pending = False
+                new_backend = self._probe_any()
+                if new_backend is None:
+                    self._error = "reconnecting"
+                    log.warning("Camera reconfigure: re-probe failed — "
+                                "retrying in %.1fs", backoff)
+                    if self._stop.wait(backoff):
+                        break
+                    backoff = min(backoff * 2, self._reconnect_max_s)
+                    continue
+                self._backend = new_backend["name"]
+                self._error = None
+                backend = new_backend
+                backoff = self._reconnect_min_s
+                log.info("Camera rebuilt after reconfigure (backend=%s)",
+                         self._backend)
+                continue
+
+            # Capture session ended unexpectedly (stall / crash). Sleep + re-probe.
+            self._error = "reconnecting"
+            log.warning("Camera disconnected — reconnecting in %.1fs", backoff)
+            if self._stop.wait(backoff):  # interruptible sleep
+                break
+            backoff = min(backoff * 2, self._reconnect_max_s)
+
+            new_backend = self._probe_any()
+            if new_backend is None:
+                self._backend = None
+                continue  # stay in the loop; next iteration retries
+            self._backend = new_backend["name"]
+            self._reconnect_count += 1
+            self._error = None
+            log.info("Camera reconnected (backend=%s, attempt #%d)",
+                     self._backend, self._reconnect_count)
+            backend = new_backend
+            backoff = self._reconnect_min_s  # reset on success
+
+    def _capture_session(self, backend: dict) -> bool:
+        """Inner capture loop — runs until stop, stale-frame timeout, or
+        a reconfigure request.
+
+        Returns True if it exited because of a reconfigure (caller rebuilds
+        immediately), False on a stall or clean stop.
+        """
+        import cv2  # always available — used for JPEG encode
+
+        with self._reconfig_lock:
+            encode_params = [int(cv2.IMWRITE_JPEG_QUALITY), self._q]
+        last_frame_time = time.time()
+        consecutive_failures = 0
+
+        while not self._stop.is_set():
+            if self._reconfig_pending:
+                log.info("Camera reconfigure requested — rebuilding pipeline")
+                return True
+
+            bgr = self._read_frame(backend)
+            if bgr is None:
+                consecutive_failures += 1
+                age = time.time() - last_frame_time
+                if age > self._stale_s:
+                    log.warning(
+                        "Camera stalled %.1fs (%d consecutive timeouts) — "
+                        "rebuilding pipeline", age, consecutive_failures,
+                    )
+                    return False
+                # Intermediate warnings so degradation is visible early
+                if consecutive_failures in (3, 10, 30):
+                    log.warning("Camera slow (%d failures, age %.1fs)",
+                                consecutive_failures, age)
+                time.sleep(0.05)
+                continue
+
+            try:
+                ok, buf = cv2.imencode(".jpg", bgr, encode_params)
+            except Exception as exc:
+                log.warning("JPEG encode failed: %s", exc)
+                continue
+            if not ok:
+                continue
+            jpeg = bytes(buf)
+            b64 = base64.b64encode(jpeg).decode("ascii")
+            now = time.time()
+            with self._lock:
+                self._latest_jpeg = jpeg
+                self._latest_b64 = b64
+                self._latest_ts = now
+                self._frame_seq += 1
+            last_frame_time = now
+            consecutive_failures = 0
+
+        return False
+
+    def _read_frame(self, backend: dict) -> Optional[np.ndarray]:
+        name = backend["name"]
+        if name == "realsense":
+            try:
+                frames = backend["pipeline"].wait_for_frames(
+                    timeout_ms=self._capture_timeout_ms,
+                )
+                color = frames.get_color_frame()
+                if not color:
+                    return None
+                return np.asanyarray(color.get_data())
+            except Exception:
+                # Soft path — single timeouts handled by _capture_session's
+                # stale-detection logic; don't spam the log per frame.
+                return None
+        elif name == "usb":
+            cap = backend["cap"]
+            ok, frame = cap.read()
+            if not ok or frame is None:
+                return None
+            return frame
+        return None
+
+    def _teardown(self, backend: dict) -> None:
+        name = backend.get("name")
+        try:
+            if name == "realsense":
+                backend["pipeline"].stop()
+            elif name == "usb":
+                backend["cap"].release()
+        except Exception as exc:
+            log.info("Camera teardown: %s", exc)
--- a/vision/face_gallery.py
+++ b/vision/face_gallery.py
@ -0,0 +1,363 @@
+"""Face gallery — pure file IO over data/faces/face_{id}/.
+
+Layout per face:
+    face_{id}/
+      face_1.jpg   ← samples (≥1 required)
+      face_2.jpg
+      face_3.png
+      meta.json    ← optional: {"name": "...", "description": "...", "added_at": "..."}
+
+`description` is free text the operator writes about the person ("lead
+engineer, likes coffee") — it's folded into the Gemini primer turn so
+Gemini can reference it when it recognises that face.
+
+No ML — Gemini does the recognition in-context using the samples we feed it
+via the primer turn. This module's only jobs are:
+  - enumerate enrolled faces
+  - serve & accept JPEG/PNG bytes per face
+  - rename / describe / delete / zip / load-for-primer
+
+Thread-safe via a single internal RLock.
+"""
+
+from __future__ import annotations
+
+import io
+import json
+import re
+import threading
+import zipfile
+from dataclasses import dataclass, field
+from datetime import datetime
+from pathlib import Path
+from typing import Iterable
+
+from Project.Sanad.core.logger import get_logger
+
+log = get_logger("face_gallery")
+
+
+_DIR_RE = re.compile(r"^face_(\d+)$")
+ALLOWED_EXTS = {".jpg", ".jpeg", ".png"}
+SAMPLE_NAME_RE = re.compile(r"^face_(\d+)\.(jpg|jpeg|png)$", re.IGNORECASE)
+
+
+@dataclass
+class PhotoInfo:
+    name: str
+    size_bytes: int
+    path: Path
+
+
+@dataclass
+class FaceEntry:
+    id: int
+    name: str | None
+    added_at: str | None
+    dir: Path
+    description: str | None = None
+    sample_paths: list[Path] = field(default_factory=list)
+
+    def to_dict(self) -> dict:
+        return {
+            "id": self.id,
+            "name": self.name,
+            "description": self.description,
+            "added_at": self.added_at,
+            "dir": str(self.dir),
+            "photos": [
+                {"name": p.name, "size_bytes": p.stat().st_size}
+                for p in self.sample_paths
+                if p.exists()
+            ],
+        }
+
+
+class FaceGallery:
+    """File-system backed gallery rooted at `root` (e.g. data/faces/)."""
+
+    def __init__(self, root: Path | str) -> None:
+        self.root = Path(root)
+        self._lock = threading.RLock()
+
+    # ── read ────────────────────────────────────────────────
+
+    def _ensure_root(self) -> None:
+        self.root.mkdir(parents=True, exist_ok=True)
+
+    def _iter_face_dirs(self) -> Iterable[tuple[int, Path]]:
+        if not self.root.exists():
+            return
+        for child in sorted(self.root.iterdir()):
+            if not child.is_dir():
+                continue
+            m = _DIR_RE.match(child.name)
+            if not m:
+                continue
+            yield int(m.group(1)), child
+
+    def _samples_in(self, face_dir: Path) -> list[Path]:
+        out: list[Path] = []
+        for p in sorted(face_dir.iterdir()):
+            if p.is_file() and p.suffix.lower() in ALLOWED_EXTS:
+                out.append(p)
+        return out
+
+    def _meta(self, face_dir: Path) -> tuple[str | None, str | None, str | None]:
+        """Return (name, description, added_at) — any may be None."""
+        meta_path = face_dir / "meta.json"
+        if not meta_path.exists():
+            return None, None, None
+        try:
+            data = json.loads(meta_path.read_text(encoding="utf-8"))
+        except Exception:
+            return None, None, None
+        name = data.get("name")
+        description = data.get("description")
+        added = data.get("added_at")
+        return (name if name else None), (description if description else None), added
+
+    def list(self) -> list[FaceEntry]:
+        with self._lock:
+            entries: list[FaceEntry] = []
+            for face_id, face_dir in self._iter_face_dirs():
+                name, description, added = self._meta(face_dir)
+                entries.append(FaceEntry(
+                    id=face_id,
+                    name=name,
+                    description=description,
+                    added_at=added,
+                    dir=face_dir,
+                    sample_paths=self._samples_in(face_dir),
+                ))
+            return entries
+
+    def get(self, face_id: int) -> FaceEntry | None:
+        with self._lock:
+            face_dir = self.root / f"face_{face_id}"
+            if not face_dir.is_dir():
+                return None
+            name, description, added = self._meta(face_dir)
+            return FaceEntry(
+                id=face_id, name=name, description=description, added_at=added,
+                dir=face_dir, sample_paths=self._samples_in(face_dir),
+            )
+
+    def get_photo(self, face_id: int, photo_name: str) -> Path | None:
+        with self._lock:
+            face_dir = self.root / f"face_{face_id}"
+            if not face_dir.is_dir():
+                return None
+            p = face_dir / photo_name
+            try:
+                p.resolve().relative_to(face_dir.resolve())
+            except ValueError:
+                return None
+            if not p.exists() or p.suffix.lower() not in ALLOWED_EXTS:
+                return None
+            return p
+
+    # ── write ───────────────────────────────────────────────
+
+    def next_id(self) -> int:
+        with self._lock:
+            max_id = 0
+            for face_id, _ in self._iter_face_dirs():
+                if face_id > max_id:
+                    max_id = face_id
+            return max_id + 1
+
+    def _next_sample_name(self, face_dir: Path, ext: str) -> str:
+        """Return next free face_N.<ext> filename inside face_dir."""
+        existing = self._samples_in(face_dir)
+        max_n = 0
+        for p in existing:
+            m = SAMPLE_NAME_RE.match(p.name)
+            if m:
+                n = int(m.group(1))
+                if n > max_n:
+                    max_n = n
+        return f"face_{max_n + 1}{ext.lower()}"
+
+    @staticmethod
+    def _detect_ext(jpeg_or_png: bytes) -> str:
+        """Sniff PNG vs JPEG from the magic bytes."""
+        if len(jpeg_or_png) >= 8 and jpeg_or_png[:8] == b"\x89PNG\r\n\x1a\n":
+            return ".png"
+        return ".jpg"
+
+    def _write_meta(self, face_dir: Path, name: str | None,
+                    description: str | None = None,
+                    added_at: str | None = None) -> None:
+        meta: dict[str, str] = {}
+        if name:
+            meta["name"] = name
+        if description:
+            meta["description"] = description
+        meta["added_at"] = added_at or datetime.now().isoformat(timespec="seconds")
+        (face_dir / "meta.json").write_text(
+            json.dumps(meta, ensure_ascii=False, indent=2),
+            encoding="utf-8",
+        )
+
+    def create_face(self, image_bytes_list: list[bytes],
+                    name: str | None = None,
+                    description: str | None = None) -> FaceEntry:
+        """Create a new face_{next_id}/ with one or more samples."""
+        if not image_bytes_list:
+            raise ValueError("create_face: empty image list")
+        with self._lock:
+            self._ensure_root()
+            face_id = self.next_id()
+            face_dir = self.root / f"face_{face_id}"
+            face_dir.mkdir(parents=True, exist_ok=False)
+            for idx, data in enumerate(image_bytes_list, start=1):
+                ext = self._detect_ext(data)
+                fname = f"face_{idx}{ext}"
+                (face_dir / fname).write_bytes(data)
+            clean_name = (name or "").strip() or None
+            clean_desc = (description or "").strip() or None
+            self._write_meta(face_dir, clean_name, clean_desc)
+            log.info("Created face_%d (samples=%d, name=%s, desc=%s)",
+                     face_id, len(image_bytes_list), clean_name or "(unnamed)",
+                     "yes" if clean_desc else "no")
+            return self.get(face_id)  # type: ignore[return-value]
+
+    def add_photo(self, face_id: int, image_bytes: bytes) -> str:
+        """Append a new sample to an existing face. Returns the filename."""
+        with self._lock:
+            face_dir = self.root / f"face_{face_id}"
+            if not face_dir.is_dir():
+                raise FileNotFoundError(f"face_{face_id} not found")
+            ext = self._detect_ext(image_bytes)
+            fname = self._next_sample_name(face_dir, ext)
+            (face_dir / fname).write_bytes(image_bytes)
+            log.info("Added sample %s to face_%d", fname, face_id)
+            return fname
+
+    def rename(self, face_id: int, name: str | None) -> None:
+        """Update meta.json with a new name (or clear it if name is empty).
+
+        Preserves the existing description + added_at.
+        """
+        with self._lock:
+            face_dir = self.root / f"face_{face_id}"
+            if not face_dir.is_dir():
+                raise FileNotFoundError(f"face_{face_id} not found")
+            _, description, added = self._meta(face_dir)
+            clean = (name or "").strip() or None
+            self._write_meta(face_dir, clean, description, added_at=added)
+            log.info("Renamed face_%d → %s", face_id, clean or "(unnamed)")
+
+    def set_description(self, face_id: int, description: str | None) -> None:
+        """Update meta.json with a free-text description (or clear it).
+
+        Preserves the existing name + added_at. The description is folded
+        into the Gemini primer turn so Gemini can reference it.
+        """
+        with self._lock:
+            face_dir = self.root / f"face_{face_id}"
+            if not face_dir.is_dir():
+                raise FileNotFoundError(f"face_{face_id} not found")
+            name, _, added = self._meta(face_dir)
+            clean = (description or "").strip() or None
+            self._write_meta(face_dir, name, clean, added_at=added)
+            log.info("Set description for face_%d (%s)", face_id,
+                     "cleared" if not clean else f"{len(clean)} chars")
+
+    def delete_photo(self, face_id: int, photo_name: str) -> None:
+        """Delete one photo. Refuses if it's the only remaining sample."""
+        with self._lock:
+            face_dir = self.root / f"face_{face_id}"
+            if not face_dir.is_dir():
+                raise FileNotFoundError(f"face_{face_id} not found")
+            samples = self._samples_in(face_dir)
+            if len(samples) <= 1:
+                raise ValueError(
+                    "Cannot delete the only photo — delete the face instead."
+                )
+            target = self.get_photo(face_id, photo_name)
+            if target is None:
+                raise FileNotFoundError(f"photo {photo_name} not found")
+            target.unlink()
+            log.info("Deleted %s from face_%d", photo_name, face_id)
+
+    def delete_face(self, face_id: int) -> None:
+        """Delete the entire face_{id}/ folder (including meta.json)."""
+        import shutil
+        with self._lock:
+            face_dir = self.root / f"face_{face_id}"
+            if not face_dir.is_dir():
+                raise FileNotFoundError(f"face_{face_id} not found")
+            shutil.rmtree(face_dir)
+            log.info("Deleted face_%d", face_id)
+
+    def zip_face(self, face_id: int) -> bytes:
+        """Return the entire face_{id}/ folder packaged as a ZIP."""
+        with self._lock:
+            face_dir = self.root / f"face_{face_id}"
+            if not face_dir.is_dir():
+                raise FileNotFoundError(f"face_{face_id} not found")
+            buf = io.BytesIO()
+            with zipfile.ZipFile(buf, "w", compression=zipfile.ZIP_DEFLATED) as zf:
+                for p in sorted(face_dir.iterdir()):
+                    if p.is_file():
+                        zf.write(p, arcname=f"face_{face_id}/{p.name}")
+            return buf.getvalue()
+
+    # ── primer support (used by gemini/script.py) ───────────
+
+    def load_for_primer(
+        self, max_samples_per_face: int = 3, resize_long_side: int = 256,
+    ) -> list[tuple[FaceEntry, list[bytes]]]:
+        """Return [(FaceEntry, [jpeg_bytes,…]), …] for Gemini upload.
+
+        Resizes each sample to longest-side <= resize_long_side, re-encodes
+        as JPEG (q=85) to keep the token cost manageable. Falls back to
+        the raw bytes if PIL isn't available.
+        """
+        entries = self.list()
+        if not entries:
+            return []
+        out: list[tuple[FaceEntry, list[bytes]]] = []
+        for e in entries:
+            paths = e.sample_paths[:max_samples_per_face]
+            jpegs: list[bytes] = []
+            for p in paths:
+                try:
+                    raw = p.read_bytes()
+                except OSError:
+                    continue
+                processed = self._resize_for_primer(raw, resize_long_side)
+                jpegs.append(processed or raw)
+            if jpegs:
+                out.append((e, jpegs))
+        return out
+
+    @staticmethod
+    def _resize_for_primer(raw: bytes, long_side: int) -> bytes | None:
+        """Resize image to longest-side ≤ long_side, re-encode JPEG q=85.
+
+        Returns None on any failure (caller falls back to raw bytes).
+        """
+        try:
+            from PIL import Image  # type: ignore
+        except Exception:
+            return None
+        try:
+            img = Image.open(io.BytesIO(raw))
+            img.load()
+            if img.mode not in ("RGB", "L"):
+                img = img.convert("RGB")
+            w, h = img.size
+            scale = long_side / max(w, h) if max(w, h) > long_side else 1.0
+            if scale < 1.0:
+                img = img.resize(
+                    (max(1, int(w * scale)), max(1, int(h * scale))),
+                    Image.LANCZOS,
+                )
+            buf = io.BytesIO()
+            img.save(buf, format="JPEG", quality=85, optimize=True)
+            return buf.getvalue()
+        except Exception:
+            return None
--- a/vision/recognition_state.py
+++ b/vision/recognition_state.py
@ -0,0 +1,68 @@
+"""Recognition state file — atomic JSON I/O shared by parent + child.
+
+The dashboard (parent process) writes this file on every toggle / face
+gallery change; the Gemini child (`gemini/script.py`) polls it at 1 Hz
+to flip its in-memory flags without a session restart.
+
+Format (data/.recognition_state.json):
+    {
+        "vision_enabled":    bool,
+        "face_rec_enabled":  bool,
+        "gallery_version":   int   # bumped on every face CRUD
+    }
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import tempfile
+from dataclasses import asdict, dataclass
+from pathlib import Path
+
+
+@dataclass
+class RecognitionState:
+    vision_enabled: bool = False
+    face_rec_enabled: bool = False
+    gallery_version: int = 0
+
+
+def read(path: Path) -> RecognitionState:
+    """Return the persisted state, or a default if missing/corrupt."""
+    try:
+        raw = json.loads(Path(path).read_text(encoding="utf-8"))
+    except (FileNotFoundError, json.JSONDecodeError, OSError):
+        return RecognitionState()
+    return RecognitionState(
+        vision_enabled=bool(raw.get("vision_enabled", False)),
+        face_rec_enabled=bool(raw.get("face_rec_enabled", False)),
+        gallery_version=int(raw.get("gallery_version", 0)),
+    )
+
+
+def write(path: Path, state: RecognitionState) -> None:
+    """Write atomically via tempfile + os.replace."""
+    p = Path(path)
+    p.parent.mkdir(parents=True, exist_ok=True)
+    fd, tmp = tempfile.mkstemp(prefix=f".{p.name}.", suffix=".tmp", dir=str(p.parent))
+    try:
+        with os.fdopen(fd, "w", encoding="utf-8") as fh:
+            json.dump(asdict(state), fh, ensure_ascii=False, indent=2)
+        os.replace(tmp, p)
+    except Exception:
+        try:
+            os.unlink(tmp)
+        except OSError:
+            pass
+        raise
+
+
+def mutate(path: Path, **changes) -> RecognitionState:
+    """Read-modify-write helper. Returns the new state."""
+    cur = read(path)
+    for k, v in changes.items():
+        if hasattr(cur, k):
+            setattr(cur, k, v)
+    write(path, cur)
+    return cur
				`@ -0,0 +1 @@`
				`"""Vision package — camera daemon + face gallery for Gemini-side recognition."""`