commit ed95a68b0e29104b962967d6ce482a56ae902716 Author: kassam Date: Wed May 13 11:30:29 2026 +0400 Update 2026-05-13 11:30:28 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c02264b --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +__pycache__/ +*.pyc +Logs/ +*.log diff --git a/README.md b/README.md new file mode 100644 index 0000000..b30b581 --- /dev/null +++ b/README.md @@ -0,0 +1,237 @@ +# Sanad_lite + +Multi-user, browser-audio fork of [Sanad](../Sanad/). The full Sanad robot +stack (arm, macros, camera, live conversation subprocess) was stripped out; +what remains is a small FastAPI dashboard for **typed-replay TTS** and +**saved-record management** where **all audio plays in each user's own +browser**, not on the host machine. + +``` +┌────────────────────────────────────────────────────────────────────┐ +│ Dashboard (FastAPI) ── http://:8000 │ +│ ├─ /login Cookie-session auth │ +│ ├─ Voice & Audio Gemini API key, Typed Replay (TTS) │ +│ ├─ Recordings Saved WAVs — Play / Raw / Download / Del │ +│ │ plus "Delete All" │ +│ └─ Settings & Logs Scripts, system prompt, live log tail │ +└────────────────────────────────────────────────────────────────────┘ +``` + + +## Run on your laptop + +```bash +pip install --user \ + fastapi 'uvicorn[standard]' itsdangerous python-multipart pydantic \ + websockets + +cd /home/zedx/Robotics_workspace/yslootahtech/Project/Sanad_lite +SANAD_DASHBOARD_HOST=127.0.0.1 python3 main.py +``` + +Open and sign in with: + +> **Username:** `lkasjda213h` +> **Password:** `kj812bf@jdon` + +Setting `SANAD_DASHBOARD_HOST=127.0.0.1` keeps the server bound to +localhost; omit it to auto-bind to `wlan0`'s IP so colleagues on the LAN +can reach it at `http://:8000`. + +The `websockets` package is needed because the Gemini Live TTS used by +Typed Replay opens a WebSocket to Google. Everything else (records list, +records delete-all, login, logs) works without it. + +> The other heavy deps (`pyaudio`, `transformers`, `torch`) are listed in +> `requirements.txt` but are **not required** for the lite dashboard. +> They were leftovers from the parent Sanad project and may still be +> imported lazily by `voice/audio_manager.py` / `voice/local_tts.py` +> on construction — failures are caught silently in `main.py`. + + +## Run on the server + +Replace the SSH/IP/path placeholders with your server's values: + +```bash +# 1. Install deps once on the server +ssh @ 'pip install itsdangerous fastapi "uvicorn[standard]" python-multipart pydantic websockets' + +# 2. Push the lite tree +rsync -av --delete \ + --exclude=__pycache__ --exclude=logs --exclude=data \ + /home/zedx/Robotics_workspace/yslootahtech/Project/Sanad_lite/ \ + @:~/Sanad_lite/ + +# 3. Start it on the server (SSH in first, then run) +ssh @ +cd ~/Sanad_lite +python3 main.py +``` + +Then open `http://:8000` and sign in with **`lkasjda213h`** / +**`kj812bf@jdon`**. + +To leave it running after you log out, use `tmux`, `screen`, `nohup`, or +the systemd unit at `shell_scripts/sanad.service` (edit the paths inside +to match your install). + + +## Login + +Credentials are in `config/core_config.json`: +```json +"auth": { + "username": "lkasjda213h", + "password": "kj812bf@jdon" +} +``` + +Change them before any non-LAN deployment. The session cookie is signed +with a fresh secret each time `main.py` starts, so a restart logs every +user out. + +For a stronger setup, replace the plaintext check with a bcrypt hash in +`dashboard/routes/auth.py`. + + +## Audio architecture — who plays what, where + +| Action | Where audio plays | +|---|---| +| Recordings → **Play** | each viewing user's browser | +| Recordings → **Raw** | each viewing user's browser | +| Recordings → **Download** | saves WAV to viewing user's device | +| Recordings → **Delete All** | wipes `data/audio/*.wav` on the server | +| Voice & Audio → **Typed Replay → Generate & Play** | each viewing user's browser | +| Voice & Audio → **Typed Replay → Replay Last** | each viewing user's browser | + +Server-side ALSA / PulseAudio is **not** touched for any of the above. +Both audio paths use the same pattern: + +1. Server generates / loads the WAV bytes. +2. Server returns them as `audio/wav` from an HTTP endpoint + (`/api/records/audio/{name}` or `/api/typed-replay/audio/last`). +3. Browser fetches the response into `new Audio(url)` and calls `.play()`. + +So if you host the dashboard on machine **A** and a colleague on machine +**B** opens `http://A:8000` and clicks Play, the sound comes out of **B's** +speakers. Machine A stays silent. + + +## Directory layout + +| Path | Contents | +|---|---| +| `main.py` | Entry point — boots subsystems + dashboard. | +| `config.py` | Runtime constants derived from `config/*_config.json`. | +| `config/` | Per-subsystem JSON: `core`, `voice`, `gemini`, `dashboard`. | +| `core/` | Brain (callback whitelist + status), skill registry, event bus, config loader, logger. | +| `gemini/` | `client.py` — Gemini Live WebSocket client used by typed_replay for one-shot TTS calls. | +| `voice/` | `typed_replay.py` (server generates, browser plays), `audio_manager.py` (host PyAudio — only used to share a PyAudio instance with typed_replay; degrades gracefully if PyAudio is missing), `local_tts.py` (offline SpeechT5 — unused in the lite UI but kept for the `/api/voice/generate` legacy route), `audio_devices.py`, `text_utils.py`. | +| `dashboard/` | `app.py` (FastAPI + SessionMiddleware + auth gate), `routes/*.py`, `static/index.html`, `static/login.html`. | +| `dashboard/routes/` | `auth.py`, `health.py`, `system.py`, `voice.py`, `logs.py`, `audio_control.py`, `scripts.py`, `records.py`, `prompt.py`, `typed_replay.py`, plus `websockets/log_stream.py`. | +| `scripts/` | `sanad_script.txt` (persona), `sanad_rule.txt` (rules). | +| `data/audio/` | Generated WAVs from Typed Replay → Save Last. Wiped by "Delete All". | +| `data/motions/` | Persisted dashboard settings (Gemini API key, G1 volume) — back-compat path. | +| `logs/` | Per-module rotating logs. | +| `tests/` | `test_smoke.py` — Brain whitelist, skill registry, wake-phrase matching, atomic IO, audio devices, isolation. | + + +## Runtime env vars + +| Var | Values | Default | Effect | +|---|---|---|---| +| `SANAD_DASHBOARD_HOST` | IP or hostname | wlan0's IP | Override the bind address. Use `127.0.0.1` for localhost-only. | +| `SANAD_DASHBOARD_INTERFACE` | iface name | `wlan0` | Pick which interface's IP to auto-bind to. | +| `SANAD_GEMINI_API_KEY` | string | reads from `data/motions/config.json` | Override the Gemini API key. | + + +## What was stripped vs Sanad (full) + +Removed because the lite dashboard never needed them: + +- **Motion / arm:** `motion/`, `scripts/sanad_arm.txt`, `config/motion_config.json`, `dashboard/routes/{motion,macros,replay,skills}.py`. +- **Live voice conversation:** `voice/sanad_voice.py`, `voice/audio_io.py`, `voice/live_voice_loop.py`, `voice/wake_phrase_manager.py`, `voice/model_script.py`, `voice/model_subprocess.py`, `gemini/subprocess.py`, `gemini/script.py`, `dashboard/routes/{live_voice,live_subprocess,wake_phrases}.py`. +- **Offline brain:** `local/` (LLM, STT, TTS, VAD), `config/local_config.json`. +- **Camera / vision:** `dashboard/routes/vision.py` and all `/api/vision/*` endpoints, the camera tab UI. +- **Examples / demos:** `examples/`. +- **Tabs:** Operations, Motion & Replay, Camera & Vision (deprecated), Live Voice Commands card, Wake Phrase Manager card, Live Gemini Process card. + +Added by lite: + +- **Login page + session cookie auth** (`dashboard/routes/auth.py`, `dashboard/static/login.html`, `SessionMiddleware`). +- **Browser-side audio streaming** — `GET /api/records/audio/{name}?kind={speaker,raw}` and `GET /api/typed-replay/audio/last`. +- **Download button** on each saved record. +- **Delete All button** that wipes every WAV under `data/audio/`. + + +## Troubleshooting + +| Symptom | Fix | +|---|---| +| `ModuleNotFoundError: itsdangerous` at startup | `pip install itsdangerous` — required by Starlette's `SessionMiddleware`. | +| `ModuleNotFoundError: websockets` when generating typed-replay audio | `pip install websockets` — `gemini/client.py` uses it. | +| Redirected to `/login` on every API call | Session cookie cleared on server restart by design — sign in again. | +| `Failed to construct audio_mgr — pyaudio not installed` warning at startup | Harmless on a laptop. `voice/audio_manager.py` requires PyAudio + portaudio headers; not needed for any user-facing button. Install with `sudo apt install portaudio19-dev && pip install pyaudio` if you want it gone. | +| ALSA / PortAudio noise at startup (`pcm_dmix.c`, `Cannot connect to JACK`) | Pre-init probe of PortAudio inside `pyaudio.PyAudio()`. Cosmetic — the lite dashboard never actually opens an ALSA stream. To silence it, drop PyAudio entirely (uninstall + add a `_safe_import` guard for `voice.audio_manager`). | +| `Gemini TTS attempt N returned no audio — parts: …` then 503 | Gemini Live is non-deterministic on short Arabic snippets — it sometimes returns reasoning text instead of audio. The retry chain in `voice/typed_replay.py:generate_audio` tries 3 prompt variants. Lengthen the text or add diacritics if it persists. | +| `cannot import name 'X' from 'Project.Sanad.main'` | A route is trying to import a global that lite removed. Add a `try/except ImportError` in that route or drop the route from `dashboard/app.py:_REST_ROUTES`. | + + +## Endpoints + +``` +GET / → / dashboard (auth-gated) +GET /login → login page +POST /api/auth/login → {username,password} → set cookie +POST /api/auth/logout → clear cookie +GET /api/auth/me → {authenticated, user} + +GET /api/health → {status, brain} +GET /api/status → {brain, voice} +GET /api/system/info → host / interfaces / subsystems + +GET /api/voice/status → Gemini connection state +POST /api/voice/connect → connect Gemini Live socket +POST /api/voice/disconnect → disconnect +GET /api/voice/api-key → masked current key +POST /api/voice/api-key → {key} → persist new key + +POST /api/typed-replay/say → {text,record,record_name} → generates, caches +GET /api/typed-replay/audio/last → streams cached WAV (browser plays it) +POST /api/typed-replay/replay-last → bumps replay counter (audio still client-side) +POST /api/typed-replay/save-last → persists cached generation to records +GET /api/typed-replay/status → engine + session state +GET /api/typed-replay/records → list +DELETE /api/typed-replay/records/{name} → delete one +POST /api/typed-replay/records/{name}/rename + +GET /api/records/ → list saved records +GET /api/records/audio/{name}?kind=... → stream a record's WAV +POST /api/records/delete → {record_name} → delete one +POST /api/records/delete-all → wipe data/audio/*.wav + reset index + +GET /api/scripts/ → list persona/rule files +POST /api/scripts/load → {name} → file contents +POST /api/scripts/save → {name,content} +POST /api/scripts/create → {name,content} +POST /api/scripts/delete → {name} + +GET /api/prompt/ → resolved system prompt +POST /api/prompt/update → {content} +POST /api/prompt/reload → re-read from disk + +GET /api/logs/{module}/tail → last N log lines +POST /api/logs/snapshot → save snapshot bundle +GET /api/logs/bundle → download all logs as a zip +GET /api/audio/status → mic/spk mute state (server-side, informational) +WS /ws/logs → live log stream +``` + + +## License / attribution + +Internal project for YS Lootah Technology. Trimmed from Sanad — original +Sanad reuses patterns from `SanadVoice/gemini_interact` and Unitree +`unitree_sdk2py`. diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/config.py b/config.py new file mode 100644 index 0000000..b3f83c3 --- /dev/null +++ b/config.py @@ -0,0 +1,440 @@ +"""Centralized configuration for the Sanad robot assistant. + +Resolution order for BASE_DIR (highest priority first): + 1. SANAD_PROJECT_ROOT environment variable + 2. PROJECT_BASE + PROJECT_NAME from .env file (or env vars) + 3. Path(__file__).resolve().parent.parent (auto-detected from this file's location) + +Every other directory is derived from BASE_DIR — never hardcode an absolute path. +""" + +from __future__ import annotations + +import json +import os +from pathlib import Path +from typing import Any + + +def _read_env_file(env_path: Path) -> dict[str, str]: + """Minimal .env reader (no python-dotenv dependency).""" + out: dict[str, str] = {} + if not env_path.exists(): + return out + try: + for raw in env_path.read_text(encoding="utf-8").splitlines(): + line = raw.strip() + if not line or line.startswith("#") or "=" not in line: + continue + k, v = line.split("=", 1) + out[k.strip()] = v.strip().strip('"').strip("'") + except OSError: + pass + return out + + +def _resolve_base_dir() -> Path: + """Resolve the Sanad project root with override support.""" + # 1. Direct env override + override = os.environ.get("SANAD_PROJECT_ROOT", "").strip() + if override: + p = Path(override).expanduser().resolve() + if p.exists(): + return p + + # 2. PROJECT_BASE + PROJECT_NAME pattern + _here = Path(__file__).resolve().parent # Sanad/ + env_files = [ + _here / ".env", # Sanad/.env + _here.parent / ".env", # Project/.env + ] + for env_path in env_files: + env = _read_env_file(env_path) + base = env.get("PROJECT_BASE") or os.environ.get("PROJECT_BASE", "") + name = env.get("PROJECT_NAME") or os.environ.get("PROJECT_NAME", "") + if base and name: + candidate = Path(base).expanduser().resolve() / name + if candidate.exists(): + return candidate + + # 3. Auto-detect — this file lives at Sanad/config.py, so parent = Sanad/ + return _here + + +BASE_DIR = _resolve_base_dir() +DATA_DIR = BASE_DIR / "data" +LOGS_DIR = BASE_DIR / "logs" +SCRIPTS_DIR = BASE_DIR / "scripts" +MODEL_DIR = BASE_DIR / "model" + +# Audio recordings (typed-replay, etc.) live under data/audio +AUDIO_RECORDINGS_DIR = DATA_DIR / "audio" +# Dashboard-editable settings (Gemini API key, G1 volume, skills.json) live +# under data/motions for back-compat with the deployed Sanad layout. +MOTIONS_DIR = DATA_DIR / "motions" +SKILLS_FILE = MOTIONS_DIR / "skills.json" +CONFIG_FILE = MOTIONS_DIR / "config.json" + +# ─── Load baseline defaults from config/core_config.json ─── +# Single source of truth. Runtime overrides via: +# 1. env vars (SANAD_GEMINI_API_KEY, SANAD_GEMINI_MODEL, ...) +# 2. data/motions/config.json (dashboard-editable — see load_config()) +# 3. config/core_config.json (this file) +def _load_core_config() -> dict[str, Any]: + cfg_path = BASE_DIR / "config" / "core_config.json" + if not cfg_path.exists(): + return {} + try: + raw = json.loads(cfg_path.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError): + return {} + # Strip _comment / _description noise + return {k: v for k, v in raw.items() if not k.startswith("_")} + + +_CORE_CFG = _load_core_config() +_GEMINI = _CORE_CFG.get("gemini_defaults", {}) +_AUDIO = _CORE_CFG.get("audio_defaults", {}) + +# -- Gemini defaults (override via data/motions/config.json or env) -- +GEMINI_API_KEY = os.environ.get( + "SANAD_GEMINI_API_KEY", + _GEMINI.get("api_key", "")) +GEMINI_MODEL = os.environ.get( + "SANAD_GEMINI_MODEL", + "models/" + _GEMINI.get("model_live", "gemini-2.5-flash-native-audio-preview-12-2025")) +GEMINI_VOICE = os.environ.get( + "SANAD_GEMINI_VOICE", + _GEMINI.get("voice_name", "Charon")) +GEMINI_WS_URI = _GEMINI.get( + "model_ws_uri", + "wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent") +GEMINI_WS_TIMEOUT = _GEMINI.get("ws_timeout_sec", 30) + +# -- Audio defaults -- +SEND_SAMPLE_RATE = _AUDIO.get("send_sample_rate", 16000) +RECEIVE_SAMPLE_RATE = _AUDIO.get("receive_sample_rate", 24000) +CHUNK_SIZE = _AUDIO.get("chunk_size", 512) +CHANNELS = _AUDIO.get("channels", 1) + +# -- PulseAudio hardware IDs -- +SINK = _AUDIO.get("sink", "alsa_output.usb-Anker_PowerConf_A3321-DEV-SN1-01.analog-stereo") +SOURCE = _AUDIO.get("source", "alsa_input.usb-Anker_PowerConf_A3321-DEV-SN1-01.mono-fallback") +MONITOR_SOURCE = f"{SINK}.monitor" + +# -- Dashboard -- +# Default: bind to wlan0's IP (auto-detected at startup) so the dashboard is +# reachable on the wireless network. Falls back to 0.0.0.0 (all interfaces) +# if wlan0 isn't present. +# +# Resolution order (highest priority first): +# 1. SANAD_DASHBOARD_HOST env var (explicit IP or hostname) +# 2. SANAD_DASHBOARD_INTERFACE env var → that interface's IP +# 3. wlan0 interface IP (default) +# 4. 0.0.0.0 (bind to all) +# +# Override via --host CLI flag too. +DASHBOARD_INTERFACE = os.environ.get("SANAD_DASHBOARD_INTERFACE", "wlan0") + + +def _get_interface_ip(iface: str) -> str | None: + """Return the IPv4 address bound to `iface`, or None if not present. + + Tries multiple strategies in order — different Linux setups expose + interface info via different mechanisms. + """ + # Strategy 1: fcntl SIOCGIFADDR (fastest, no subprocess) + ip = _get_iface_ip_fcntl(iface) + if ip: + return ip + # Strategy 2: parse `ip -4 -o addr show ` (works on Ubuntu/Jetson) + ip = _get_iface_ip_via_ip_cmd(iface) + if ip: + return ip + # Strategy 3: parse `/proc/net/fib_trie` (last resort) + ip = _get_iface_ip_via_proc(iface) + if ip: + return ip + return None + + +def _get_iface_ip_fcntl(iface: str) -> str | None: + try: + import fcntl + import socket + import struct + s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + try: + ifname = iface[:15].encode("utf-8") + packed = fcntl.ioctl( + s.fileno(), + 0x8915, # SIOCGIFADDR + struct.pack("256s", ifname), + ) + return socket.inet_ntoa(packed[20:24]) + finally: + s.close() + except Exception: + return None + + +def _get_iface_ip_via_ip_cmd(iface: str) -> str | None: + try: + import subprocess + r = subprocess.run( + ["ip", "-4", "-o", "addr", "show", iface], + capture_output=True, text=True, timeout=2.0, + ) + if r.returncode != 0: + return None + # Output: "5: wlan0 inet 10.255.254.86/24 brd ..." + for line in r.stdout.splitlines(): + parts = line.split() + for i, p in enumerate(parts): + if p == "inet" and i + 1 < len(parts): + return parts[i + 1].split("/")[0] + except Exception: + return None + return None + + +def _get_iface_ip_via_proc(iface: str) -> str | None: + """Fallback: scrape /proc/net/fib_trie for an IP advertised on this iface. + + Less reliable than fcntl/ip cmd but doesn't need any external tooling. + """ + try: + import subprocess + # Try `hostname -I` as a final fallback (returns space-separated IPs) + r = subprocess.run(["hostname", "-I"], capture_output=True, text=True, timeout=1.0) + if r.returncode == 0: + ips = (r.stdout or "").strip().split() + # Return first non-loopback IPv4 + for ip in ips: + if "." in ip and not ip.startswith("127."): + return ip + except Exception: + return None + return None + + +def list_network_interfaces() -> list[dict]: + """Return [{name, ip, is_up}] for every interface on the box. + + Used by the dashboard's system-info panel. + """ + out: list[dict] = [] + try: + import socket + for idx, name in socket.if_nameindex(): + ip = _get_interface_ip(name) + out.append({ + "name": name, + "index": idx, + "ip": ip or "", + "is_up": ip is not None, + }) + except Exception: + pass + return out + + +def _resolve_dashboard_host() -> str: + """Resolve the host the dashboard should bind to. + + Order: + 1. SANAD_DASHBOARD_HOST env var (explicit IP/hostname) + 2. SANAD_DASHBOARD_INTERFACE → that interface's IP + 3. wlan0's IP (default) + 4. First non-loopback IP from `hostname -I` + 5. 0.0.0.0 (bind everywhere) + """ + explicit = os.environ.get("SANAD_DASHBOARD_HOST", "").strip() + if explicit: + return explicit + iface_ip = _get_interface_ip(DASHBOARD_INTERFACE) + if iface_ip: + return iface_ip + # Try `hostname -I` as a final non-loopback fallback + try: + import subprocess + r = subprocess.run(["hostname", "-I"], capture_output=True, text=True, timeout=1.0) + if r.returncode == 0: + for ip in (r.stdout or "").strip().split(): + if "." in ip and not ip.startswith("127."): + return ip + except Exception: + pass + return "0.0.0.0" + + +DASHBOARD_HOST = _resolve_dashboard_host() +DASHBOARD_PORT = 8000 + +# -- Local TTS -- +LOCAL_TTS_MODEL = "MBZUAI/speecht5_tts_clartts_ar" +LOCAL_TTS_MODEL_PATH = str(MODEL_DIR / "speecht5_tts_clartts_ar") +LOCAL_TTS_HIFIGAN_PATH = str(MODEL_DIR / "speecht5_hifigan") +LOCAL_TTS_XVECTOR_PATH = str(MODEL_DIR / "arabic_xvector_embedding.pt") + +# -- Motion -- +_G1 = _CORE_CFG.get("g1_hardware", {}) +REPLAY_HZ = _G1.get("replay_hz", 60.0) +G1_NUM_MOTOR = _G1.get("num_motor", 29) +ENABLE_ARM_SDK_INDEX = _G1.get("enable_arm_sdk_index", 29) +KP_HIGH = 300.0 +KD_HIGH = 3.0 +KP_LOW = 80.0 +KD_LOW = 3.0 +KP_WRIST = 40.0 +KD_WRIST = 1.5 +WEAK_MOTORS = {4, 10, 15, 16, 17, 18, 22, 23, 24, 25} +WRIST_MOTORS = {19, 20, 21, 26, 27, 28} + +# -- Live Gemini subprocess tuning -- +LIVE_TUNE: dict[str, str] = { + "SANAD_REQUIRED_LOUD_CHUNKS": "5", + "SANAD_PREBUFFER_CHUNKS": "3", + "SANAD_PLAYBACK_TIMEOUT": "0.25", + "SANAD_BARGE_IN_COOLDOWN": "1.0", + "SANAD_AI_SPEAK_GRACE": "0.5", + # ECHO_GUARD_SEC suppresses USER SAID log lines for this many seconds + # after the robot finishes a chunk. Previously 1.2 — caused a visible + # lag where "robot finished talking" was followed by silence in the + # log even though Gemini was transcribing the user's new speech + # immediately. Lowered to 0.3 to match typical room reverb tail; the + # real echo protection is the silence-during-speaking gate, not this. + "SANAD_ECHO_GUARD_SEC": "0.3", + "SANAD_SPEAKING_ENERGY_GATE": "0.90", + "SANAD_CALIBRATION_CHUNKS": "30", + "SANAD_THRESHOLD_MULTIPLIER": "4.0", + # Base barge-in threshold calibrated at the REFERENCE volume (50%). + # At runtime, scaled QUADRATICALLY with actual G1 volume: + # scale = (actual_vol / ref_vol) ** 2 + # + # Physical reason: doubling digital speaker volume doubles sample + # amplitude, which means RECEIVED energy at the mic quadruples + # (energy ~ amplitude²). Linear scaling under-threshold echo at + # high volumes → caused "robot listening to himself" feedback. + # + # Measured on Hollyland + G1 speaker at 100% volume: + # echo peak (no user) up to ~15700 + # voice peak (user) 25000-32000+ (often saturates 32767) + # Safe threshold at 100% vol: ~18000, above echo / below voice. + # + # Working back with quadratic scale: base × (100/50)² = 18000 + # base × 4 = 18000 → base = 4500 at 50% ref volume. + "SANAD_MIN_THRESHOLD": "800", + "SANAD_PLAYBACK_BARGE_MIN": "2500", + "SANAD_PLAYBACK_BARGE_MULT": "1.5", + # Sustained-chunk requirement for barge-in. Balance: + # higher = fewer false triggers from echo bursts + # lower = quicker response to short commands ("stop", "توقف") + # Default 5 = ~160ms sustained voice. Real speech reliably + # sustains that long; single-chunk echo spikes don't. + "SANAD_PLAYBACK_REQUIRED_CHUNKS": "2", + "SANAD_SILENCE_AFTER_SPEECH": "1.2", + "SANAD_SPEECH_THRESHOLD": "300", + "SANAD_DDS_INTERFACE": os.environ.get("SANAD_DDS_INTERFACE", "eth0"), + # G1 built-in mic — UDP multicast 239.168.123.161:5555. + # Requires wake-up conversation mode ON in Unitree app. + "SANAD_USE_G1_MIC": "1", +} + +# -- DDS / hardware -- +# Jetson G1 default is eth0 (the robot's internal network). +# Override with SANAD_DDS_INTERFACE=lo for desktop/sim development. +DDS_NETWORK_INTERFACE = os.environ.get("SANAD_DDS_INTERFACE", "eth0") + + +def _ensure_dirs() -> list[str]: + """Create runtime directories. Failures are collected, not raised. + + Returns the list of directories that failed to create — caller can decide + whether to log/abort. The module import never crashes due to a single + permission error on a single directory. + """ + failed: list[str] = [] + for d in (DATA_DIR, LOGS_DIR, SCRIPTS_DIR, AUDIO_RECORDINGS_DIR, MOTIONS_DIR): + try: + d.mkdir(parents=True, exist_ok=True) + except OSError: + failed.append(str(d)) + return failed + + +# Best-effort: create dirs at import. Ignore failures here — individual +# subsystems will handle missing dirs at usage time and isolation prevents +# cascading import failures. +_DIRS_FAILED = _ensure_dirs() + + +def load_config() -> dict[str, Any]: + """Load runtime config overrides from CONFIG_FILE (if present).""" + if CONFIG_FILE.exists(): + try: + with open(CONFIG_FILE, "r", encoding="utf-8") as f: + return json.load(f) + except (json.JSONDecodeError, OSError): + return {} + return {} + + +def save_config(cfg: dict[str, Any]): + CONFIG_FILE.parent.mkdir(parents=True, exist_ok=True) + import os, tempfile + fd, tmp = tempfile.mkstemp( + prefix=f".{CONFIG_FILE.name}.", suffix=".tmp", + dir=str(CONFIG_FILE.parent), + ) + try: + with os.fdopen(fd, "w", encoding="utf-8") as f: + json.dump(cfg, f, ensure_ascii=False, indent=2) + os.replace(tmp, CONFIG_FILE) + except Exception: + try: + os.unlink(tmp) + except OSError: + pass + raise + + +# Apply config.json overrides on top of module constants (was previously dead code). +def _apply_overrides(): + cfg = load_config() + if not cfg: + return + g = globals() + gemini = cfg.get("gemini", {}) + if isinstance(gemini, dict): + if "api_key" in gemini and gemini["api_key"]: + g["GEMINI_API_KEY"] = gemini["api_key"] + if "model" in gemini: + g["GEMINI_MODEL"] = gemini["model"] + if "voice" in gemini: + g["GEMINI_VOICE"] = gemini["voice"] + audio = cfg.get("audio", {}) + if isinstance(audio, dict): + if "send_sample_rate" in audio: + g["SEND_SAMPLE_RATE"] = int(audio["send_sample_rate"]) + if "receive_sample_rate" in audio: + g["RECEIVE_SAMPLE_RATE"] = int(audio["receive_sample_rate"]) + if "chunk_size" in audio: + g["CHUNK_SIZE"] = int(audio["chunk_size"]) + if "sink" in audio: + g["SINK"] = audio["sink"] + if "source" in audio: + g["SOURCE"] = audio["source"] + dashboard = cfg.get("dashboard", {}) + if isinstance(dashboard, dict): + if "host" in dashboard: + g["DASHBOARD_HOST"] = dashboard["host"] + if "port" in dashboard: + g["DASHBOARD_PORT"] = int(dashboard["port"]) + + +try: + _apply_overrides() +except Exception: + # Never let a malformed config.json kill module import. + pass diff --git a/config/core_config.json b/config/core_config.json new file mode 100644 index 0000000..3b6948a --- /dev/null +++ b/config/core_config.json @@ -0,0 +1,83 @@ +{ + "_description": "Tunables for core/* modules. Loaded via core.config_loader.load('core').", + + "brain": { + "allowed_callback_prefixes": [ + "Project.Sanad.voice.", + "voice." + ], + "gestural_speaking_default": false + }, + + "logger": { + "log_level": "INFO", + "format": "%(asctime)s [%(name)s] %(levelname)-7s %(message)s", + "datefmt": "%Y-%m-%d %H:%M:%S", + "file_max_bytes": 10485760, + "file_backup_count": 7 + }, + + "event_bus": { + "emit_timeout_sec": 0.5 + }, + + "paths": { + "_comment": "Path roots — resolved against BASE_DIR in core/config.py", + "data": "data", + "logs": "logs", + "scripts": "scripts", + "model": "model", + "audio_recordings": "data/audio", + "motion_recordings": "data/recordings/motion", + "motions": "data/motions" + }, + + "gemini_defaults": { + "_comment": "Baseline Gemini API config — SINGLE SOURCE OF TRUTH. All voice modules read from here.", + "api_key": "AIzaSyDt9Xi83MDZuuPpfwfHyMD92X7ZKdGkqf8", + "model_live": "gemini-2.5-flash-native-audio-preview-12-2025", + "model_ws_uri": "wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent", + "voice_name": "Charon", + "ws_timeout_sec": 30, + "default_system_prompt": "You are Sanad (Bousandah), a wise and friendly Emirati assistant. Speak strictly in the UAE dialect (Khaleeji). Be helpful, concise, and use local greetings like 'Marhaba' and 'Ya Khoy'." + }, + + "g1_hardware": { + "_comment": "G1 humanoid hardware constants — shared by every motion/voice module that talks to the arm.", + "num_motor": 29, + "enable_arm_sdk_index": 29, + "replay_hz": 60.0 + }, + + "script_files": { + "_comment": "Filenames (under scripts/) used across voice + dashboard", + "persona": "sanad_script.txt", + "rules": "sanad_rule.txt" + }, + + "dashboard_defaults": { + "host": null, + "port": 8000, + "interface": "wlan0" + }, + + "audio_defaults": { + "_comment": "Host PulseAudio fallback only — the G1 deployment uses UDP multicast mic + AudioClient.PlayStream speaker (see SANAD_USE_G1_MIC in config.py LIVE_TUNE). Default here is the Jetson/G1 built-in platform-sound chip.", + "send_sample_rate": 16000, + "receive_sample_rate": 24000, + "chunk_size": 512, + "channels": 1, + "sink": "alsa_output.platform-sound.analog-stereo", + "source": "alsa_input.platform-sound.analog-stereo" + }, + + "dds": { + "network_interface_default": "eth0" + }, + + "auth": { + "_comment": "Dashboard login credentials. CHANGE before any non-LAN deployment.", + "username": "lkasjda213h", + "password": "kj812bf@jdon" + } +} diff --git a/config/dashboard_config.json b/config/dashboard_config.json new file mode 100644 index 0000000..d361a01 --- /dev/null +++ b/config/dashboard_config.json @@ -0,0 +1,44 @@ +{ + "_description": "Tunables for dashboard/* modules. Loaded via core.config_loader.load('dashboard').", + + "app": { + "_comment": "dashboard/app.py — FastAPI app", + "title": "Sanad Dashboard", + "version": "1.0.0", + "static_subdir": "dashboard/static" + }, + + "api_input": { + "_comment": "Shared by every route that accepts user text input / uploads. Single source of truth.", + "max_text_len": 2000, + "max_upload_bytes": 8388608 + }, + + "voice_route": { + "_comment": "dashboard/routes/voice.py — reads max_text_len from api_input above", + "api_key_mask_visible": 4 + }, + + "typed_replay_route": { + "_comment": "dashboard/routes/typed_replay.py — reads max_text_len from api_input above" + }, + + "records_route": { + "_comment": "dashboard/routes/records.py", + "index_filename": "records.json" + }, + + "prompt_route": { + "_comment": "dashboard/routes/prompt.py — script/rule filenames come from core.script_files; default prompt from core.gemini_defaults.default_system_prompt" + }, + + "logs_route": { + "_comment": "dashboard/routes/logs.py", + "default_tail_lines": 200, + "max_tail_lines": 5000 + }, + + "scripts_route": { + "_comment": "dashboard/routes/scripts.py — max_script_bytes reads from api_input.max_upload_bytes" + } +} diff --git a/config/gemini_config.json b/config/gemini_config.json new file mode 100644 index 0000000..cfe548d --- /dev/null +++ b/config/gemini_config.json @@ -0,0 +1,11 @@ +{ + "_description": "Tunables for gemini/client.py. All keys are optional — defaults live in code. Gemini credentials (api_key, model_live, voice_name) come from core_config.json's gemini_defaults — single source of truth.", + + "client": { + "_comment": "gemini/client.py — Gemini Live WebSocket client used by the typed-replay engine for one-shot TTS calls.", + "recv_timeout_sec": 30, + "reconnect_max_attempts": 3, + "reconnect_initial_delay_sec": 1.0, + "reconnect_max_delay_sec": 10.0 + } +} diff --git a/config/voice_config.json b/config/voice_config.json new file mode 100644 index 0000000..7cc8dca --- /dev/null +++ b/config/voice_config.json @@ -0,0 +1,46 @@ +{ + "_description": "Tunables for voice/* modules. Loaded via core.config_loader.load('voice').", + + "speaker": { + "_comment": "G1 built-in speaker — AudioClient.PlayStream wrapper", + "app_name": "sanad", + "begin_stream_pause_sec": 0.15, + "wait_finish_margin_sec": 0.3 + }, + + "vad": { + "_comment": "Gemini Live server-side voice-activity-detection config", + "start_sensitivity": "START_SENSITIVITY_HIGH", + "end_sensitivity": "END_SENSITIVITY_LOW", + "prefix_padding_ms": 20, + "silence_duration_ms": 200 + }, + + "barge_in": { + "threshold": 500, + "loud_chunks_needed": 3, + "cooldown_sec": 0.3, + "echo_suppress_below": 500, + "ai_speak_grace_sec": 0.15 + }, + + "recording": { + "enabled": true, + "dir_relative": "data/recordings" + }, + + "typed_replay": { + "_comment": "voice/typed_replay.py — max_text_len comes from dashboard.api_input", + "monitor_chunk_size": 512, + "monitor_tail_sec": 0.2 + }, + + "local_tts": { + "_comment": "voice/local_tts.py — offline Coqui TTS", + "model_subdir": "speecht5_tts_clartts_ar", + "vocoder_subdir": "speecht5_hifigan", + "xvector_filename": "arabic_xvector_embedding.pt", + "sample_rate": 16000, + "channels": 1 + } +} diff --git a/core/__init__.py b/core/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/core/asyncio_compat.py b/core/asyncio_compat.py new file mode 100644 index 0000000..d4debaf --- /dev/null +++ b/core/asyncio_compat.py @@ -0,0 +1,33 @@ +"""asyncio compatibility shim for Python 3.8. + +`asyncio.to_thread` only exists from Python 3.9. The Jetson runs 3.8, so we +backfill it via run_in_executor on the default thread pool. + +Usage: + from Project.Sanad.core.asyncio_compat import to_thread + result = await to_thread(blocking_fn, arg1, arg2, kw=val) +""" + +from __future__ import annotations + +import asyncio +import functools +import sys +from typing import Any, Callable, TypeVar + +_T = TypeVar("_T") + +if sys.version_info >= (3, 9): + # Native implementation + to_thread = asyncio.to_thread # type: ignore[attr-defined] +else: + async def to_thread(func: Callable[..., _T], /, *args: Any, **kwargs: Any) -> _T: + """Backport of asyncio.to_thread for Python 3.8.""" + loop = asyncio.get_event_loop() + ctx = functools.partial(func, *args, **kwargs) + return await loop.run_in_executor(None, ctx) + +# Also patch the asyncio module so existing `asyncio.to_thread` calls work +# without rewriting every consumer file. Done lazily — only if missing. +if not hasattr(asyncio, "to_thread"): + asyncio.to_thread = to_thread # type: ignore[attr-defined] diff --git a/core/brain.py b/core/brain.py new file mode 100644 index 0000000..5ea7455 --- /dev/null +++ b/core/brain.py @@ -0,0 +1,84 @@ +"""The Brain — central registry for Sanad_lite. + +Lite scope: only audio + voice client + callback whitelist. Motion, macro, +skill-execution, and live-voice trigger pieces were removed when those +subsystems left the project. +""" + +from __future__ import annotations + +import asyncio +import importlib +from typing import Any, Callable + +from Project.Sanad.core.event_bus import bus # noqa: F401 (kept for downstream emitters) +from Project.Sanad.core.logger import get_logger +from Project.Sanad.core.skill_registry import SkillRegistry + +log = get_logger("brain") + +# Whitelist of module path prefixes allowed for skill callbacks. +# Prevents arbitrary code execution via dashboard-editable skills.json. +from Project.Sanad.core.config_loader import section as _cfg_section +_BRAIN_CFG = _cfg_section("core", "brain") +ALLOWED_CALLBACK_PREFIXES = tuple(_BRAIN_CFG.get("allowed_callback_prefixes", [ + "Project.Sanad.voice.", + "voice.", +])) + + +class Brain: + """Lite Brain — just the bits the surviving routes need.""" + + def __init__(self): + self.registry = SkillRegistry() + self._lock = asyncio.Lock() + self._voice = None # gemini.client.GeminiVoiceClient + self._audio_mgr = None # voice.audio_manager.AudioManager + + # -- dependency injection -- + + def attach_voice(self, client): + self._voice = client + log.info("Voice client attached") + + def attach_audio_manager(self, mgr): + self._audio_mgr = mgr + log.info("Audio manager attached") + + # -- callback resolution (security whitelist) -- + + def _resolve_callback(self, callback_str: str) -> Callable | None: + """Resolve 'module.submodule:function_name' → callable. + + SECURITY: only modules under ALLOWED_CALLBACK_PREFIXES may be imported. + Skill JSON is dashboard-editable and otherwise an arbitrary-import RCE. + """ + if not callback_str: + return None + if ":" not in callback_str: + log.error("Invalid callback (missing ':'): %s", callback_str) + return None + module_path, func_name = callback_str.rsplit(":", 1) + if not any(module_path.startswith(prefix) or module_path == prefix.rstrip(".") + for prefix in ALLOWED_CALLBACK_PREFIXES): + log.error( + "Callback %s rejected — module '%s' not in whitelist", + callback_str, module_path, + ) + return None + try: + mod = importlib.import_module(module_path) + return getattr(mod, func_name) + except Exception: + log.exception("Cannot resolve callback '%s'", callback_str) + return None + + # -- status -- + + def status(self) -> dict[str, Any]: + return { + "voice_attached": self._voice is not None, + "audio_manager_attached": self._audio_mgr is not None, + "total_skills": len(self.registry.list_skills()), + } diff --git a/core/config_loader.py b/core/config_loader.py new file mode 100644 index 0000000..5cfe859 --- /dev/null +++ b/core/config_loader.py @@ -0,0 +1,124 @@ +"""Single-source config loader for all Sanad subsystems. + +Each subsystem (core, voice, motion, dashboard) has its own JSON file at +`config/_config.json`. This module loads them on demand, caches +the result, and exposes helpers for pulling nested sections. + +Usage: + from Project.Sanad.core.config_loader import load, get + + cfg = load("voice") # full voice config dict + threshold = get("voice", "barge_in.threshold", 500) + rates = get("voice", "sanad_voice", {}) # whole section + +Why JSON (not TOML/YAML): standard library only, editable in any text +editor, commented via "_comment" keys. No third-party dep. +""" + +from __future__ import annotations + +import json +import threading +from pathlib import Path +from typing import Any + +from Project.Sanad.core.logger import get_logger + +log = get_logger("config_loader") + +# Resolved at first-load time (avoids circular import with config.py) +_BASE_DIR: Path | None = None +_CONFIG_DIR: Path | None = None + +_CACHE: dict[str, dict[str, Any]] = {} +_LOCK = threading.Lock() + + +def _resolve_dirs() -> tuple[Path, Path]: + """Find Sanad's root and config/ directory (lazy + cached).""" + global _BASE_DIR, _CONFIG_DIR + if _BASE_DIR is not None and _CONFIG_DIR is not None: + return _BASE_DIR, _CONFIG_DIR + here = Path(__file__).resolve().parent # Sanad/core + base = here.parent # Sanad/ + _BASE_DIR = base + _CONFIG_DIR = base / "config" + return _BASE_DIR, _CONFIG_DIR + + +def _strip_comments(d: Any) -> Any: + """Remove top-level "_comment"/"_description" keys — noise for callers.""" + if isinstance(d, dict): + return { + k: _strip_comments(v) for k, v in d.items() + if not (isinstance(k, str) and k.startswith("_")) + } + if isinstance(d, list): + return [_strip_comments(x) for x in d] + return d + + +def load(subsystem: str) -> dict[str, Any]: + """Load + cache config/_config.json. + + Returns a dict with all leading-underscore keys stripped. Missing + file returns an empty dict (callers supply their own defaults via + `get(..., default)`). + """ + with _LOCK: + if subsystem in _CACHE: + return _CACHE[subsystem] + + _, cfg_dir = _resolve_dirs() + path = cfg_dir / f"{subsystem}_config.json" + if not path.exists(): + log.warning("config file missing: %s — using empty dict", path) + _CACHE[subsystem] = {} + return _CACHE[subsystem] + + try: + raw = json.loads(path.read_text(encoding="utf-8")) + except json.JSONDecodeError as exc: + log.error("config file %s unreadable: %s", path, exc) + _CACHE[subsystem] = {} + return _CACHE[subsystem] + + cleaned = _strip_comments(raw) + _CACHE[subsystem] = cleaned + return cleaned + + +def get(subsystem: str, dotted_key: str, default: Any = None) -> Any: + """Fetch a nested key. Supports dotted-paths: 'barge_in.threshold'.""" + cfg = load(subsystem) + parts = dotted_key.split(".") + cur: Any = cfg + for p in parts: + if not isinstance(cur, dict) or p not in cur: + return default + cur = cur[p] + return cur + + +def section(subsystem: str, name: str) -> dict[str, Any]: + """Convenience — load one top-level section, always returning a dict. + + Example: `section("voice", "sanad_voice")` → dict of that section. + """ + s = get(subsystem, name, {}) + return s if isinstance(s, dict) else {} + + +def reload(subsystem: str | None = None) -> None: + """Drop cached config so next load() re-reads from disk.""" + with _LOCK: + if subsystem is None: + _CACHE.clear() + else: + _CACHE.pop(subsystem, None) + + +def config_dir() -> Path: + """Absolute path to Sanad/config/.""" + _, d = _resolve_dirs() + return d diff --git a/core/event_bus.py b/core/event_bus.py new file mode 100644 index 0000000..6c80ca6 --- /dev/null +++ b/core/event_bus.py @@ -0,0 +1,91 @@ +"""Lightweight in-process event bus for inter-module communication. + +Usage: + from core.event_bus import bus + + # Subscribe + bus.on("voice.user_said", my_handler) # sync or async callable + bus.on("motion.action_done", other_handler) + + # Publish + await bus.emit("voice.user_said", text="hello") +""" + +from __future__ import annotations + +import asyncio +import threading +from collections import defaultdict +from typing import Any, Callable + +from Project.Sanad.core.logger import get_logger + +log = get_logger("event_bus", to_console=False) + + +class EventBus: + def __init__(self): + self._lock = threading.Lock() + self._listeners: dict[str, list[Callable]] = defaultdict(list) + + def on(self, event: str, callback: Callable): + with self._lock: + self._listeners[event].append(callback) + log.debug("Subscribed %s → %s", event, callback.__qualname__) + + def off(self, event: str, callback: Callable): + with self._lock: + try: + self._listeners[event].remove(callback) + except ValueError: + pass + + async def emit(self, event: str, **kwargs: Any): + with self._lock: + handlers = list(self._listeners.get(event, [])) + for handler in handlers: + try: + result = handler(**kwargs) + if asyncio.iscoroutine(result): + await result + except Exception: + log.exception("Handler %s for event '%s' failed", handler.__qualname__, event) + + def emit_sync(self, event: str, **kwargs: Any): + """Fire-and-forget from a sync context. + + Async handlers are scheduled on the running event loop if one exists. + Otherwise they are dropped with a warning (the original silent-no-op + bug — at least now it's logged). + """ + with self._lock: + handlers = list(self._listeners.get(event, [])) + for handler in handlers: + try: + if asyncio.iscoroutinefunction(handler): + try: + loop = asyncio.get_running_loop() + loop.create_task(handler(**kwargs)) + except RuntimeError: + log.warning( + "Async handler %s for '%s' dropped — no running loop", + handler.__qualname__, event, + ) + continue + result = handler(**kwargs) + if asyncio.iscoroutine(result): + # Sync handler returned a coroutine — schedule it + try: + loop = asyncio.get_running_loop() + loop.create_task(result) + except RuntimeError: + result.close() + log.warning( + "Coroutine result from %s for '%s' dropped — no running loop", + handler.__qualname__, event, + ) + except Exception: + log.exception("Handler %s for event '%s' failed", handler.__qualname__, event) + + +bus = EventBus() diff --git a/core/logger.py b/core/logger.py new file mode 100644 index 0000000..3c1be9f --- /dev/null +++ b/core/logger.py @@ -0,0 +1,67 @@ +"""Unified logging with RotatingFileHandler for all Sanad modules.""" + +from __future__ import annotations + +import logging +import sys +from logging.handlers import RotatingFileHandler +from pathlib import Path + +from Project.Sanad.config import LOGS_DIR + +_MAX_BYTES = 10 * 1024 * 1024 # 10 MB +_BACKUP_COUNT = 3 +_FMT = "%(asctime)s [%(name)s] %(levelname)s %(message)s" +_formatter = logging.Formatter(_FMT) + +# Callback for the WebSocket log stream — set by log_stream.py at import time. +_ws_push_fn = None + + +def set_ws_push(fn): + """Register the push function from dashboard.websockets.log_stream.""" + global _ws_push_fn + _ws_push_fn = fn + + +class _WSHandler(logging.Handler): + """Forwards every log record to the WebSocket log stream.""" + + def emit(self, record: logging.LogRecord): + if _ws_push_fn is not None: + try: + _ws_push_fn(self.format(record)) + except Exception: + pass + + +def get_logger(name: str, *, to_console: bool = True) -> logging.Logger: + """Return a module-level logger that writes to logs/.log (rotating).""" + logger = logging.getLogger(f"sanad.{name}") + if logger.handlers: + return logger + + logger.setLevel(logging.DEBUG) + logger.propagate = False + + LOGS_DIR.mkdir(parents=True, exist_ok=True) + fh = RotatingFileHandler( + LOGS_DIR / f"{name}.log", maxBytes=_MAX_BYTES, backupCount=_BACKUP_COUNT + ) + fh.setFormatter(_formatter) + fh.setLevel(logging.DEBUG) + logger.addHandler(fh) + + if to_console: + sh = logging.StreamHandler(sys.stdout) + sh.setFormatter(_formatter) + sh.setLevel(logging.INFO) + logger.addHandler(sh) + + # WebSocket stream handler + wsh = _WSHandler() + wsh.setFormatter(_formatter) + wsh.setLevel(logging.INFO) + logger.addHandler(wsh) + + return logger diff --git a/core/skill_registry.py b/core/skill_registry.py new file mode 100644 index 0000000..5722ddb --- /dev/null +++ b/core/skill_registry.py @@ -0,0 +1,175 @@ +"""Skill Registry — maps audio files to motion commands and callback functions. + +A "skill" is a named unit that ties together: + - An audio clip (e.g. recordings/audio/intro.wav) + - A motion file (e.g. data/motions/wave.jsonl) — optional + - A callback (e.g. "motion.wave_hand") — resolved at runtime + +The registry is persisted in data/skills.json and can be edited via the +dashboard or programmatically through the Brain. + +Skill entry schema: +{ + "id": "intro_greeting", + "audio_file": "recordings/audio/intro.wav", + "motion_file": "data/motions/right_hand_up.jsonl", + "callback": "motion.trigger:wave_hand", + "sync_mode": "parallel", # parallel | audio_first | motion_first + "enabled": true, + "description": "Wave hand while playing intro audio" +} +""" + +from __future__ import annotations + +import json +import os +import tempfile +import threading +import uuid +from dataclasses import dataclass, field, asdict +from pathlib import Path +from typing import Any + +from Project.Sanad.config import SKILLS_FILE +from Project.Sanad.core.logger import get_logger + +log = get_logger("skill_registry") + + +@dataclass +class Skill: + id: str + audio_file: str = "" + motion_file: str = "" + callback: str = "" + sync_mode: str = "parallel" + enabled: bool = True + description: str = "" + meta: dict[str, Any] = field(default_factory=dict) + + def to_dict(self) -> dict[str, Any]: + return asdict(self) + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> Skill: + known = {f.name for f in cls.__dataclass_fields__.values()} + filtered = {k: v for k, v in data.items() if k in known} + return cls(**filtered) + + +class SkillRegistry: + """Thread-safe, JSON-backed registry of skills.""" + + def __init__(self, path: Path = SKILLS_FILE): + self._path = path + self._lock = threading.Lock() + self._skills: dict[str, Skill] = {} + self._load() + + # -- persistence -- + + def _load(self): + if not self._path.exists(): + self._skills = {} + return + try: + with open(self._path, "r", encoding="utf-8") as f: + payload = json.load(f) + for entry in payload.get("skills", []): + skill = Skill.from_dict(entry) + self._skills[skill.id] = skill + log.info("Loaded %d skills from %s", len(self._skills), self._path) + except Exception as exc: + log.warning("Could not load skills: %s", exc) + # Backup corrupt file rather than silently nuking + try: + self._path.rename(self._path.with_suffix(".json.corrupt")) + log.warning("Backed up corrupt skills to %s.corrupt", self._path) + except OSError: + pass + self._skills = {} + + _VALID_SYNC_MODES = {"parallel", "audio_first", "motion_first"} + + def _save(self): + self._path.parent.mkdir(parents=True, exist_ok=True) + payload = { + "version": 1, + "total": len(self._skills), + "skills": [s.to_dict() for s in self._skills.values()], + } + # Atomic write: tempfile + os.replace + fd, tmp = tempfile.mkstemp( + prefix=f".{self._path.name}.", suffix=".tmp", + dir=str(self._path.parent), + ) + try: + with os.fdopen(fd, "w", encoding="utf-8") as f: + json.dump(payload, f, ensure_ascii=False, indent=2) + os.replace(tmp, self._path) + except Exception: + try: + os.unlink(tmp) + except OSError: + pass + raise + + # -- CRUD -- + + def list_skills(self) -> list[dict[str, Any]]: + with self._lock: + return [s.to_dict() for s in self._skills.values()] + + def get(self, skill_id: str) -> Skill | None: + with self._lock: + return self._skills.get(skill_id) + + def add(self, skill: Skill) -> Skill: + if skill.sync_mode not in self._VALID_SYNC_MODES: + raise ValueError( + f"Invalid sync_mode '{skill.sync_mode}' (allowed: {sorted(self._VALID_SYNC_MODES)})" + ) + with self._lock: + if not skill.id: + skill.id = uuid.uuid4().hex[:12] + elif skill.id in self._skills: + raise ValueError(f"Skill id already exists: {skill.id}") + self._skills[skill.id] = skill + self._save() + log.info("Added skill %s (%s)", skill.id, skill.description) + return skill + + def update(self, skill_id: str, updates: dict[str, Any]) -> Skill | None: + with self._lock: + existing = self._skills.get(skill_id) + if existing is None: + return None + if "sync_mode" in updates and updates["sync_mode"] not in self._VALID_SYNC_MODES: + raise ValueError( + f"Invalid sync_mode '{updates['sync_mode']}'" + ) + for key, value in updates.items(): + if hasattr(existing, key) and key != "id": + setattr(existing, key, value) + self._save() + log.info("Updated skill %s", skill_id) + return existing + + def delete(self, skill_id: str) -> dict[str, Any] | None: + with self._lock: + skill = self._skills.pop(skill_id, None) + if skill is None: + return None + self._save() + log.info("Deleted skill %s", skill_id) + return skill.to_dict() + + def find_by_audio(self, audio_file: str) -> list[Skill]: + """Find all skills linked to a given audio file.""" + with self._lock: + return [s for s in self._skills.values() if s.audio_file == audio_file and s.enabled] + + def find_by_callback(self, callback: str) -> list[Skill]: + with self._lock: + return [s for s in self._skills.values() if s.callback == callback and s.enabled] diff --git a/dashboard/__init__.py b/dashboard/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/dashboard/app.py b/dashboard/app.py new file mode 100644 index 0000000..cb55f02 --- /dev/null +++ b/dashboard/app.py @@ -0,0 +1,154 @@ +"""FastAPI application — Sanad Dashboard. + +Each route module is imported INDIVIDUALLY inside try/except so that one +broken router (missing dep, syntax error in a sibling) cannot break the +entire dashboard. Failed routers are logged and the server starts without +them. +""" + +from __future__ import annotations + +import importlib +import logging +import secrets + +# Backfill asyncio.to_thread on Python 3.8 — must run before any router import. +from Project.Sanad.core import asyncio_compat # noqa: F401 + +from fastapi import FastAPI, Request +from fastapi.responses import JSONResponse, RedirectResponse +from fastapi.staticfiles import StaticFiles +from starlette.middleware.sessions import SessionMiddleware + +from Project.Sanad.config import BASE_DIR +from Project.Sanad.core.logger import get_logger + +log = get_logger("dashboard.app") + +from Project.Sanad.core.config_loader import section as _cfg_section +_APP_CFG = _cfg_section("dashboard", "app") +app = FastAPI( + title=_APP_CFG.get("title", "Sanad Dashboard"), + version=_APP_CFG.get("version", "1.0.0"), +) + +# Auth gate — every request except /login, /api/auth/*, and /static/* needs +# a signed session cookie. HTML routes redirect to /login; API/WS routes 401. +# +# Middleware ordering matters: the LAST `add_middleware` call wraps the +# OUTSIDE of the stack, so we register the gate FIRST (innermost) and +# SessionMiddleware SECOND (outermost). That way the session is populated +# on `request.scope` before the gate dereferences `request.session`. +_AUTH_BYPASS_PREFIXES = ("/login", "/api/auth/", "/static/", "/favicon") + + +@app.middleware("http") +async def _auth_gate(request: Request, call_next): + path = request.url.path + if any(path == p or path.startswith(p) for p in _AUTH_BYPASS_PREFIXES): + return await call_next(request) + if request.session.get("user"): + return await call_next(request) + if path.startswith("/api/") or path.startswith("/ws/"): + return JSONResponse({"detail": "Not authenticated"}, status_code=401) + return RedirectResponse(f"/login?next={path}", status_code=303) + + +# Cookie session — secret regenerates on every restart, so all sessions +# invalidate on a server restart (acceptable for a local robot dashboard). +app.add_middleware(SessionMiddleware, secret_key=secrets.token_urlsafe(32), + session_cookie="sanad_session", max_age=60 * 60 * 24) + + +# -- isolated route registration -- + +_REST_ROUTES: list[tuple[str, str, str]] = [ + # (module_name, prefix, tag) + ("auth", None, "auth"), + ("health", "/api", "health"), + ("system", "/api/system", "system"), + ("voice", "/api/voice", "voice"), + ("logs", "/api/logs", "logs"), + ("audio_control", "/api/audio", "audio"), + ("scripts", "/api/scripts", "scripts"), + ("records", "/api/records", "records"), + ("prompt", "/api/prompt", "prompt"), + ("typed_replay", "/api/typed-replay", "typed-replay"), +] + +_WS_ROUTES: list[str] = ["log_stream"] + +_loaded_routes: list[str] = [] +_failed_routes: dict[str, str] = {} + + +def _register_router(module_name: str, prefix: str | None = None, tag: str | None = None, + package: str = "Project.Sanad.dashboard.routes"): + """Import + register one router. Failures are logged, never raised.""" + full_name = f"{package}.{module_name}" + try: + mod = importlib.import_module(full_name) + if not hasattr(mod, "router"): + raise AttributeError(f"{full_name} has no 'router' attribute") + kwargs: dict = {} + if prefix is not None: + kwargs["prefix"] = prefix + if tag is not None: + kwargs["tags"] = [tag] + app.include_router(mod.router, **kwargs) + _loaded_routes.append(module_name) + log.info("Registered router: %s", module_name) + except Exception as exc: + _failed_routes[module_name] = str(exc) + log.exception("Failed to register router %s — skipping", module_name) + + +# REST routes +for mod_name, prefix, tag in _REST_ROUTES: + _register_router(mod_name, prefix=prefix, tag=tag) + +# WebSocket routes +for mod_name in _WS_ROUTES: + _register_router( + mod_name, + package="Project.Sanad.dashboard.websockets", + tag="websocket", + ) + + +# -- Static files (dashboard UI) — best effort -- +STATIC_DIR = BASE_DIR / _APP_CFG.get("static_subdir", "dashboard/static") +try: + STATIC_DIR.mkdir(parents=True, exist_ok=True) + app.mount("/static", StaticFiles(directory=str(STATIC_DIR)), name="static") + log.info("Static dir mounted: %s", STATIC_DIR) +except Exception: + log.exception("Could not mount static dir %s — serving without it", STATIC_DIR) + + +@app.get("/") +async def root(): + """Serve the dashboard SPA.""" + index = STATIC_DIR / "index.html" + if index.exists(): + from fastapi.responses import HTMLResponse + try: + return HTMLResponse(index.read_text(encoding="utf-8")) + except OSError as exc: + return {"error": f"Could not read index.html: {exc}"} + return { + "message": "Sanad Dashboard — index.html not found", + "loaded_routes": _loaded_routes, + "failed_routes": _failed_routes, + } + + +@app.get("/api/_dashboard_status") +async def dashboard_load_status(): + """Diagnostic — which routers loaded, which failed.""" + return { + "loaded": _loaded_routes, + "failed": _failed_routes, + "total_loaded": len(_loaded_routes), + "total_failed": len(_failed_routes), + } diff --git a/dashboard/routes/__init__.py b/dashboard/routes/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/dashboard/routes/_safe_io.py b/dashboard/routes/_safe_io.py new file mode 100644 index 0000000..633ea7c --- /dev/null +++ b/dashboard/routes/_safe_io.py @@ -0,0 +1,81 @@ +"""Shared filesystem safety helpers for dashboard routes. + +Provides: + - safe_filename: validate + reject traversal/special chars + - safe_path_under: ensure resolved path stays inside a base dir + - atomic_write_bytes: write-to-temp + os.replace + - atomic_write_text + - atomic_write_json +""" + +from __future__ import annotations + +import json +import os +import tempfile +from pathlib import Path +from typing import Any + +from fastapi import HTTPException + +from Project.Sanad.core.config_loader import section as _cfg_section + +# Maximum upload size in bytes — SINGLE SOURCE in dashboard.api_input +MAX_UPLOAD_BYTES = _cfg_section("dashboard", "api_input").get( + "max_upload_bytes", 8 * 1024 * 1024) + + +def safe_filename(name: str | None) -> str: + """Strip directory components and reject obviously unsafe names.""" + if not name: + raise HTTPException(400, "Filename required.") + cleaned = os.path.basename(name).strip() + if not cleaned or cleaned in {".", ".."}: + raise HTTPException(400, "Invalid filename.") + if any(c in cleaned for c in ("\x00", "\n", "\r")): + raise HTTPException(400, "Invalid characters in filename.") + return cleaned + + +def safe_path_under(base: Path, name: str) -> Path: + """Resolve `base/name` and verify it stays inside `base`.""" + cleaned = safe_filename(name) + base_resolved = base.resolve() + candidate = (base / cleaned).resolve() + try: + candidate.relative_to(base_resolved) + except ValueError: + raise HTTPException(400, "Path traversal denied.") + return candidate + + +def check_upload_size(content: bytes, max_bytes: int = MAX_UPLOAD_BYTES) -> None: + if len(content) > max_bytes: + raise HTTPException( + 413, + f"Upload too large: {len(content)} bytes (max {max_bytes}).", + ) + + +def atomic_write_bytes(path: Path, data: bytes) -> None: + """Write bytes atomically via tempfile + os.replace.""" + path.parent.mkdir(parents=True, exist_ok=True) + fd, tmp = tempfile.mkstemp(prefix=f".{path.name}.", suffix=".tmp", dir=str(path.parent)) + try: + with os.fdopen(fd, "wb") as f: + f.write(data) + os.replace(tmp, path) + except Exception: + try: + os.unlink(tmp) + except OSError: + pass + raise + + +def atomic_write_text(path: Path, text: str, encoding: str = "utf-8") -> None: + atomic_write_bytes(path, text.encode(encoding)) + + +def atomic_write_json(path: Path, payload: Any, indent: int = 2) -> None: + atomic_write_text(path, json.dumps(payload, ensure_ascii=False, indent=indent)) diff --git a/dashboard/routes/audio_control.py b/dashboard/routes/audio_control.py new file mode 100644 index 0000000..3fc21a2 --- /dev/null +++ b/dashboard/routes/audio_control.py @@ -0,0 +1,475 @@ +"""Audio control endpoints — mic mute, speaker mute, device profile selection.""" + +from __future__ import annotations + +import asyncio +import os +import subprocess +import threading + +from fastapi import APIRouter, HTTPException +from pydantic import BaseModel + +from Project.Sanad.core.logger import get_logger +from Project.Sanad.voice import audio_devices as ad + +log = get_logger("audio_route") + +router = APIRouter() + +# ─────────────────────── G1 built-in speaker (DDS) ─────────────────────── +# +# pactl set-sink-mute has NO effect on the G1 built-in speaker because +# sanad_voice.py streams PCM to it via the Unitree DDS AudioClient API, +# bypassing PulseAudio entirely. To actually silence the built-in speaker +# mid-playback we must call AudioClient.SetVolume(0) over DDS. +# +# This module keeps a lazily-initialized AudioClient + a cached volume so +# the dashboard can mute/unmute without waiting on DDS init for every click. + +_g1_audio_client = None +_g1_audio_lock = threading.Lock() +_g1_current_volume: int = 100 # what's actually on the hardware right now +_g1_user_volume: int = 100 # the user's preferred "unmuted" level +_g1_init_error: str = "" + + +def _load_persisted_g1_volume() -> int: + """Read the saved G1 volume from data/motions/config.json. + + Keys are `audio.g1_volume` (persistent target level 0-100). Returns + 100 if no value is stored — matches the default the Unitree SDK sets + on the voice service. + """ + try: + from Project.Sanad.config import load_config + cfg = load_config() or {} + audio = cfg.get("audio") or {} + vol = int(audio.get("g1_volume", 100)) + return max(0, min(100, vol)) + except Exception: + return 100 + + +def _save_persisted_g1_volume(level: int) -> None: + """Persist the user's volume choice to config.json so it survives restart.""" + try: + from Project.Sanad.config import load_config, save_config + cfg = load_config() or {} + audio = cfg.get("audio") if isinstance(cfg.get("audio"), dict) else {} + audio["g1_volume"] = max(0, min(100, int(level))) + cfg["audio"] = audio + save_config(cfg) + except Exception as exc: + log.warning("could not persist g1_volume: %s", exc) + + +# Initialize user volume from the persisted value so the dashboard shows +# the correct level on first load even if no one has touched it yet. +_g1_user_volume = _load_persisted_g1_volume() +_g1_current_volume = _g1_user_volume + + +def _get_g1_audio_client(): + """Lazy-init AudioClient. Safe to call from multiple routes.""" + global _g1_audio_client, _g1_init_error + if _g1_audio_client is not None: + return _g1_audio_client + try: + from unitree_sdk2py.core.channel import ChannelFactoryInitialize + from unitree_sdk2py.g1.audio.g1_audio_client import AudioClient + except ImportError as exc: + _g1_init_error = f"unitree_sdk2py not installed: {exc}" + return None + iface = os.environ.get("SANAD_DDS_INTERFACE", "eth0") + # ChannelFactoryInitialize can only be called once per process. The + # arm controller normally calls it first at startup — the second call + # either no-ops or raises, so wrap it defensively. + try: + ChannelFactoryInitialize(0, iface) + except Exception as exc: + log.debug("ChannelFactoryInitialize already called or failed: %s", exc) + try: + client = AudioClient() + client.SetTimeout(5.0) + client.Init() + _g1_audio_client = client + log.info("G1 AudioClient initialized for dashboard mute control (iface=%s)", iface) + return client + except Exception as exc: + _g1_init_error = f"AudioClient init failed: {exc}" + log.warning("G1 AudioClient init failed: %s", exc) + return None + + +def _pactl(args: list[str]) -> subprocess.CompletedProcess[str]: + return subprocess.run(["pactl", *args], check=True, text=True, capture_output=True) + + +def _get_muted(kind: str, name: str) -> bool: + if not name: + return False + try: + cmd = "get-source-mute" if kind == "source" else "get-sink-mute" + r = _pactl([cmd, name]) + return (r.stdout or "").strip().lower().endswith("yes") + except (FileNotFoundError, subprocess.CalledProcessError): + return False + + +def _set_muted(kind: str, name: str, muted: bool) -> bool: + if not name: + return False + cmd = "set-source-mute" if kind == "source" else "set-sink-mute" + _pactl([cmd, name, "1" if muted else "0"]) + return _get_muted(kind, name) + + +def _current_sink_source() -> tuple[str, str]: + cur = ad.current_selection() + return cur.get("sink", ""), cur.get("source", "") + + +# ─────────────────────── status / mute ─────────────────────── + + +@router.get("/status") +async def audio_status(): + """Return current device + mute state + G1 speaker volume. + + `speaker_muted` is the EFFECTIVE mute state — True if either the + PulseAudio sink is muted OR the G1 built-in speaker volume is 0. + `pulse_sink_muted` and `g1_speaker_muted` are the per-path states. + `g1_current_volume` = what's live on the hardware. + `g1_user_volume` = the user's preferred unmuted level (what we + restore to when they un-mute). + """ + def _do(): + sink, source = _current_sink_source() + cur = ad.current_selection() + pulse_muted = _get_muted("sink", sink) + # Read cached state — avoid DDS GetVolume round-trips on every poll + global _g1_current_volume, _g1_user_volume + g1_muted = _g1_current_volume == 0 + return { + "mic_muted": _get_muted("source", source), + # Effective (OR of both paths) — the badge the user sees + "speaker_muted": pulse_muted or g1_muted, + # Per-path breakdown so the UI can distinguish + "pulse_sink_muted": pulse_muted, + "g1_speaker_muted": g1_muted, + "g1_current_volume": _g1_current_volume, + "g1_user_volume": _g1_user_volume, + "g1_available": _g1_audio_client is not None or (_g1_init_error == ""), + "sink": sink, + "source": source, + "current": cur, + "pactl_available": ad.pactl_available(), + } + return await asyncio.to_thread(_do) + + +@router.post("/mic/mute") +async def toggle_mic(muted: bool | None = None): + def _do(): + _, source = _current_sink_source() + if not source: + raise HTTPException(503, "No source device selected") + target = muted if muted is not None else not _get_muted("source", source) + try: + actual = _set_muted("source", source, target) + except (FileNotFoundError, subprocess.CalledProcessError) as exc: + raise HTTPException(500, f"pactl failed: {exc}") + return {"mic_muted": actual, "source": source} + return await asyncio.to_thread(_do) + + +@router.post("/speaker/mute") +async def toggle_speaker(muted: bool | None = None): + """Mute/unmute the SPEAKER — both the PulseAudio sink AND the G1 + built-in speaker, so the effect is audible regardless of which + playback path is currently active (Anker PowerConf via PyAudio vs + G1 built-in via Unitree DDS AudioClient). + + Each of the two paths is attempted independently; the response + reports which one(s) succeeded. If either path is muted, the button + shows as "Muted". + """ + def _do(): + global _g1_current_volume, _g1_user_volume + sink, _ = _current_sink_source() + + # Decide target state — if muted is None, toggle based on + # whichever path is currently "not muted" + if muted is None: + pulse_cur = _get_muted("sink", sink) if sink else False + g1_cur = _g1_current_volume == 0 + # Toggle: if anything is live, mute everything; else unmute all + target = not (pulse_cur or g1_cur) + else: + target = bool(muted) + + result = {"speaker_muted": target, "pulse": None, "g1": None} + + # ── Path 1: PulseAudio sink (Anker PowerConf, USB, etc.) ── + if sink: + try: + actual_pulse = _set_muted("sink", sink, target) + result["pulse"] = {"ok": True, "muted": actual_pulse, "sink": sink} + except (FileNotFoundError, subprocess.CalledProcessError) as exc: + result["pulse"] = {"ok": False, "error": f"pactl failed: {exc}"} + else: + result["pulse"] = {"ok": False, "error": "no sink selected"} + + # ── Path 2: G1 built-in speaker via DDS AudioClient ── + # Mute = SetVolume(0). Unmute = SetVolume(_g1_user_volume) so the + # user's chosen level is restored (instead of always jumping back + # to 100). + client = _get_g1_audio_client() + if client is None: + result["g1"] = {"ok": False, "error": _g1_init_error or "AudioClient unavailable"} + else: + volume = 0 if target else _g1_user_volume + try: + with _g1_audio_lock: + code = client.SetVolume(volume) + _g1_current_volume = volume + result["g1"] = { + "ok": True, "muted": volume == 0, + "volume": volume, "code": code, + } + log.info("G1 speaker volume set to %d (rc=%s)", volume, code) + except Exception as exc: + result["g1"] = {"ok": False, "error": f"SetVolume failed: {exc}"} + + # Final effective state — either path counts as muted + pulse_muted = result["pulse"].get("muted", False) if result["pulse"] else False + g1_muted = result["g1"].get("muted", False) if result["g1"] else False + result["speaker_muted"] = bool(pulse_muted or g1_muted) if target else False + result["sink"] = sink + result["g1_current_volume"] = _g1_current_volume + result["g1_user_volume"] = _g1_user_volume + return result + return await asyncio.to_thread(_do) + + +@router.post("/g1-speaker/mute") +async def toggle_g1_speaker_only(muted: bool | None = None): + """Mute/unmute ONLY the G1 built-in speaker via DDS AudioClient. + + Useful for testing the DDS path in isolation — the normal + /speaker/mute endpoint hits both PulseAudio and G1 at once. + On unmute, restores the user's last chosen volume (not always 100). + """ + def _do(): + global _g1_current_volume + client = _get_g1_audio_client() + if client is None: + raise HTTPException( + 503, + f"G1 AudioClient unavailable: {_g1_init_error or 'unknown'}", + ) + if muted is None: + target = _g1_current_volume > 0 # toggle + else: + target = bool(muted) + volume = 0 if target else _g1_user_volume + try: + with _g1_audio_lock: + code = client.SetVolume(volume) + _g1_current_volume = volume + except Exception as exc: + raise HTTPException(500, f"SetVolume failed: {exc}") + log.info("G1 speaker volume set to %d (rc=%s)", volume, code) + return { + "g1_muted": volume == 0, + "volume": volume, + "user_volume": _g1_user_volume, + "return_code": code, + } + return await asyncio.to_thread(_do) + + +# ─────────────────────── G1 speaker volume (0-100) ─────────────────────── + + +class G1VolumePayload(BaseModel): + level: int # 0..100 + + +@router.get("/g1-speaker/volume") +async def get_g1_volume(): + """Return the current G1 speaker volume state. + + Response: + { + "available": true, # AudioClient available? + "current_volume": 75, # what's on hardware right now + "user_volume": 75, # user's preferred unmuted level + "muted": false, # current_volume == 0 + "persisted": 75, # value from config.json + } + """ + def _do(): + return { + "available": _g1_audio_client is not None or (_g1_init_error == ""), + "current_volume": _g1_current_volume, + "user_volume": _g1_user_volume, + "muted": _g1_current_volume == 0, + "persisted": _load_persisted_g1_volume(), + "init_error": _g1_init_error, + } + return await asyncio.to_thread(_do) + + +@router.post("/g1-speaker/volume") +async def set_g1_volume(payload: G1VolumePayload): + """Set the G1 built-in speaker volume via DDS AudioClient. + + Body: `{"level": 0..100}` + + Effects: + - Immediately applies to hardware via AudioClient.SetVolume(level). + - Persists to data/motions/config.json under `audio.g1_volume` so + it survives restarts. + - If level > 0, updates _g1_user_volume (the "unmuted" restore + target). level == 0 is a soft mute that preserves user_volume. + - Takes effect on the live playback immediately — you can slide + the volume down mid-speech and hear it get quieter. + """ + def _do(): + global _g1_current_volume, _g1_user_volume + level = int(payload.level) + if not 0 <= level <= 100: + raise HTTPException(400, "level must be 0..100") + + client = _get_g1_audio_client() + if client is None: + raise HTTPException( + 503, + f"G1 AudioClient unavailable: {_g1_init_error or 'unknown'}", + ) + try: + with _g1_audio_lock: + code = client.SetVolume(level) + _g1_current_volume = level + if level > 0: + # Only update the "preferred unmuted" level when the + # user is setting a non-zero volume. Setting 0 is a + # mute, which shouldn't overwrite their preference. + _g1_user_volume = level + except Exception as exc: + raise HTTPException(500, f"SetVolume failed: {exc}") + + # Persist the user's preferred level (not the current) so a + # subsequent mute-then-restart restores to the preferred level + _save_persisted_g1_volume(_g1_user_volume) + log.info("G1 volume → %d (user_pref=%d, rc=%s)", + level, _g1_user_volume, code) + return { + "ok": True, + "current_volume": level, + "user_volume": _g1_user_volume, + "muted": level == 0, + "return_code": code, + "persisted": True, + } + return await asyncio.to_thread(_do) + + +# ─────────────────────── device profiles ─────────────────────── + + +@router.get("/devices") +async def list_devices(): + """Full device + profile listing for the dashboard picker.""" + return await asyncio.to_thread(ad.status) + + +@router.get("/profiles") +async def list_profiles(): + """Just the named profiles + which are currently plugged in.""" + def _do(): + from dataclasses import asdict + detected = ad.detect_plugged_profiles() if ad.pactl_available() else [] + detected_ids = {d["profile"]["id"] for d in detected} + return { + "profiles": [ + { + **asdict(p), + "available": p.id in detected_ids, + } + for p in ad.PROFILES + ], + "detected_ids": list(detected_ids), + } + return await asyncio.to_thread(_do) + + +class ProfileSelect(BaseModel): + profile_id: str + + +@router.post("/select-profile") +async def select_profile(payload: ProfileSelect): + def _do(): + result = ad.select_profile(payload.profile_id) + if not result.get("ok"): + raise HTTPException(409, result.get("error") or "Could not select profile") + # Best-effort: tell the audio_manager to refresh its cached state + try: + from Project.Sanad.main import audio_mgr + if audio_mgr is not None and hasattr(audio_mgr, "refresh_devices"): + audio_mgr.refresh_devices() + except Exception: + pass + return result + return await asyncio.to_thread(_do) + + +class ManualSelect(BaseModel): + sink: str + source: str + + +@router.post("/select-manual") +async def select_manual(payload: ManualSelect): + def _do(): + if not payload.sink and not payload.source: + raise HTTPException(400, "At least one of sink/source required") + result = ad.select_manual(payload.sink, payload.source) + if not result.get("ok"): + raise HTTPException(500, str(result.get("errors") or "Selection failed")) + try: + from Project.Sanad.main import audio_mgr + if audio_mgr is not None and hasattr(audio_mgr, "refresh_devices"): + audio_mgr.refresh_devices() + except Exception: + pass + return result + return await asyncio.to_thread(_do) + + +@router.post("/refresh") +async def refresh_devices(): + """Re-scan plugged devices and re-resolve current selection.""" + return await asyncio.to_thread(ad.status) + + +@router.post("/apply") +async def apply_audio(): + """Re-scan all USB ports, resolve the best profile, and set pactl defaults. + + Use this after plugging/unplugging devices or switching USB ports. + """ + def _do(): + result = ad.apply_current_selection() + # Also refresh AudioManager so it picks up the new sink/source + try: + from Project.Sanad.main import audio_mgr + if audio_mgr is not None: + audio_mgr.refresh_devices() + except Exception: + pass + return result + return await asyncio.to_thread(_do) diff --git a/dashboard/routes/auth.py b/dashboard/routes/auth.py new file mode 100644 index 0000000..9d955bd --- /dev/null +++ b/dashboard/routes/auth.py @@ -0,0 +1,64 @@ +"""Dashboard login — minimal cookie-session auth. + +Credentials come from `core_config.json` → `auth.{username,password}`. +The session is signed by Starlette's SessionMiddleware (stateless cookie). +""" + +from __future__ import annotations + +from fastapi import APIRouter, HTTPException, Request +from fastapi.responses import HTMLResponse, RedirectResponse +from pydantic import BaseModel + +from Project.Sanad.config import BASE_DIR +from Project.Sanad.core.config_loader import section as _cfg_section +from Project.Sanad.core.logger import get_logger + +router = APIRouter() +log = get_logger("dashboard.auth") + +_AUTH_CFG = _cfg_section("core", "auth") or {} +USERNAME = _AUTH_CFG.get("username", "admin") +PASSWORD = _AUTH_CFG.get("password", "admin") + +LOGIN_PAGE = BASE_DIR / "dashboard" / "static" / "login.html" + + +def is_authed(request: Request) -> bool: + return bool(request.session.get("user")) + + +class LoginPayload(BaseModel): + username: str + password: str + + +@router.get("/login", include_in_schema=False) +async def login_page(request: Request): + if is_authed(request): + return RedirectResponse("/", status_code=303) + if LOGIN_PAGE.exists(): + return HTMLResponse(LOGIN_PAGE.read_text(encoding="utf-8")) + return HTMLResponse("

Login page missing

", status_code=500) + + +@router.post("/api/auth/login") +async def login(request: Request, payload: LoginPayload): + if payload.username == USERNAME and payload.password == PASSWORD: + request.session["user"] = payload.username + log.info("login OK: %s", payload.username) + return {"ok": True, "user": payload.username} + log.warning("login FAILED: %s", payload.username) + raise HTTPException(401, "Invalid username or password") + + +@router.post("/api/auth/logout") +async def logout(request: Request): + user = request.session.pop("user", None) + log.info("logout: %s", user) + return {"ok": True} + + +@router.get("/api/auth/me") +async def whoami(request: Request): + return {"authenticated": is_authed(request), "user": request.session.get("user")} diff --git a/dashboard/routes/health.py b/dashboard/routes/health.py new file mode 100644 index 0000000..0651c33 --- /dev/null +++ b/dashboard/routes/health.py @@ -0,0 +1,42 @@ +"""Health and status endpoints.""" + +from __future__ import annotations + +from fastapi import APIRouter + +from Project.Sanad.core.logger import get_logger + +log = get_logger("health_route") + +router = APIRouter() + + +def _safe_status(component, name: str) -> dict: + """Get status without crashing the whole endpoint if one component fails.""" + if component is None: + return {"available": False} + try: + if hasattr(component, "status") and callable(component.status): + return component.status() + return {"available": True} + except Exception as exc: + log.warning("status() failed for %s: %s", name, exc) + return {"available": True, "error": str(exc)} + + +@router.get("/health") +async def health(): + from Project.Sanad.main import brain + return { + "status": "ok", + "brain": _safe_status(brain, "brain"), + } + + +@router.get("/status") +async def full_status(): + from Project.Sanad.main import brain, voice_client + return { + "brain": _safe_status(brain, "brain"), + "voice": _safe_status(voice_client, "voice"), + } diff --git a/dashboard/routes/logs.py b/dashboard/routes/logs.py new file mode 100644 index 0000000..7eccd04 --- /dev/null +++ b/dashboard/routes/logs.py @@ -0,0 +1,203 @@ +"""Log viewing and snapshot endpoints.""" + +from __future__ import annotations + +import asyncio +import json +import platform +import shutil +import socket +import sys +from collections import deque +from datetime import datetime + +from fastapi import APIRouter, HTTPException +from fastapi.responses import PlainTextResponse + +from Project.Sanad.config import BASE_DIR, LOGS_DIR +from Project.Sanad.dashboard.routes._safe_io import safe_path_under + +router = APIRouter() + + +def _list_logs_sync(): + LOGS_DIR.mkdir(parents=True, exist_ok=True) + files = [] + for p in sorted(LOGS_DIR.glob("*.log*")): + files.append({ + "name": p.name, + "size_bytes": p.stat().st_size, + }) + return files + + +@router.get("/") +async def list_logs(): + files = await asyncio.to_thread(_list_logs_sync) + return {"logs_dir": str(LOGS_DIR), "files": files} + + +def _tail_sync(path, lines: int) -> list[str]: + with open(path, "r", encoding="utf-8", errors="replace") as f: + tail = deque(f, maxlen=lines) + return [l.rstrip("\n") for l in tail] + + +@router.get("/tail/{filename}") +async def tail_log(filename: str, lines: int = 200): + path = safe_path_under(LOGS_DIR, filename) + if not path.exists(): + raise HTTPException(404, "File not found") + lines_out = await asyncio.to_thread(_tail_sync, path, lines) + return {"filename": path.name, "lines": lines_out} + + +def _snapshot_sync(ts: str): + saved = [] + for p in LOGS_DIR.glob("*.log"): + # Skip prior snapshots to avoid recursive growth + if "_snapshot_" in p.stem: + continue + dest = LOGS_DIR / f"{p.stem}_snapshot_{ts}.log" + shutil.copy2(p, dest) + saved.append({"source": p.name, "snapshot": dest.name, "size_bytes": dest.stat().st_size}) + return saved + + +@router.post("/snapshot") +async def save_log_snapshot(): + """Save timestamped copy of all log files.""" + LOGS_DIR.mkdir(parents=True, exist_ok=True) + ts = datetime.now().strftime("%Y%m%d_%H%M%S") + saved = await asyncio.to_thread(_snapshot_sync, ts) + return {"ok": True, "saved_at": ts, "snapshots": saved} + + +# ─────────────────────── full bundle (everything in one text blob) ─────────────────────── + +def _build_bundle_sync(lines_per_file: int, include_system: bool) -> str: + """Build the full text bundle — header, subsystem status, all logs. + + Returns a single string safe to copy directly into a bug report. + """ + out: list[str] = [] + ts = datetime.now().isoformat(timespec="seconds") + out.append("=" * 72) + out.append(f"SANAD LOG BUNDLE — {ts}") + out.append("=" * 72) + out.append(f"Hostname : {socket.gethostname()}") + out.append(f"Platform : {platform.platform()}") + out.append(f"Python : {sys.version.split()[0]}") + out.append(f"Executable: {sys.executable}") + out.append(f"BASE_DIR : {BASE_DIR}") + out.append(f"LOGS_DIR : {LOGS_DIR}") + + # Subsystems — pull live status from main.SUBSYSTEMS + if include_system: + out.append("") + out.append("-" * 72) + out.append("SUBSYSTEMS") + out.append("-" * 72) + try: + from Project.Sanad.main import SUBSYSTEMS + except Exception as exc: + out.append(f" could not import SUBSYSTEMS: {exc}") + SUBSYSTEMS = {} + + for name in sorted(SUBSYSTEMS): + comp = SUBSYSTEMS[name] + if comp is None: + out.append(f" ✗ {name:15s} unavailable") + continue + status: dict = {} + if hasattr(comp, "status") and callable(comp.status): + try: + s = comp.status() + if isinstance(s, dict): + status = s + else: + status = {"raw": str(s)} + except Exception as exc: + status = {"status_error": str(exc)} + try: + status_str = json.dumps(status, ensure_ascii=False, default=str) + except Exception: + status_str = str(status) + out.append(f" ✓ {name:15s} {status_str}") + + # Dashboard router load state + out.append("") + out.append("-" * 72) + out.append("DASHBOARD ROUTERS") + out.append("-" * 72) + try: + from Project.Sanad.dashboard.app import _loaded_routes, _failed_routes + out.append(f" loaded ({len(_loaded_routes)}): {', '.join(_loaded_routes)}") + if _failed_routes: + out.append(f" failed ({len(_failed_routes)}):") + for name, err in _failed_routes.items(): + out.append(f" - {name}: {err}") + else: + out.append(" failed (0): —") + except Exception as exc: + out.append(f" could not read dashboard state: {exc}") + + # All log files — tail N lines each, skip snapshots + out.append("") + out.append("-" * 72) + out.append(f"LOG FILES (last {lines_per_file} lines each)") + out.append("-" * 72) + + LOGS_DIR.mkdir(parents=True, exist_ok=True) + log_paths = sorted(LOGS_DIR.glob("*.log*")) + files_included = 0 + for p in log_paths: + if "_snapshot_" in p.stem: + continue # skip stale snapshots + try: + size = p.stat().st_size + except OSError: + size = 0 + out.append("") + out.append(f"=== {p.name} ({size} bytes) ===") + try: + with open(p, "r", encoding="utf-8", errors="replace") as f: + tail = deque(f, maxlen=lines_per_file) + for raw in tail: + out.append(raw.rstrip("\n")) + files_included += 1 + except OSError as exc: + out.append(f" ") + + out.append("") + out.append("=" * 72) + out.append(f"END OF BUNDLE — {files_included} log file(s) included") + out.append("=" * 72) + return "\n".join(out) + + +@router.get("/bundle") +async def logs_bundle(lines: int = 1000, include_system: bool = True): + """Return a single plain-text dump of everything useful for debugging. + + Includes: + - Timestamp, hostname, platform, Python, BASE_DIR, LOGS_DIR + - Live status of every subsystem in main.SUBSYSTEMS + - Dashboard router load/fail state + - Tail of every .log file in LOGS_DIR (configurable per-file limit) + + Response is `text/plain` so it's safe to copy straight to clipboard + or pipe into a file. Intended use: dashboard "Copy All Logs" button + and manual `curl ... > sanad_bundle.txt` debugging. + """ + # Clamp lines to keep the payload sane + lines = max(10, min(int(lines), 50000)) + text = await asyncio.to_thread(_build_bundle_sync, lines, include_system) + return PlainTextResponse( + text, + headers={ + "Content-Disposition": ( + f'inline; filename="sanad_bundle_{datetime.now().strftime("%Y%m%d_%H%M%S")}.txt"' + ), + }, + ) diff --git a/dashboard/routes/prompt.py b/dashboard/routes/prompt.py new file mode 100644 index 0000000..f6f1e10 --- /dev/null +++ b/dashboard/routes/prompt.py @@ -0,0 +1,98 @@ +"""Prompt management — view, edit, reload system prompts.""" + +from __future__ import annotations + +from fastapi import APIRouter, HTTPException +from pydantic import BaseModel + +from Project.Sanad.config import SCRIPTS_DIR +from Project.Sanad.core.config_loader import section as _cfg_section +from Project.Sanad.dashboard.routes._safe_io import ( + atomic_write_text, MAX_UPLOAD_BYTES, +) + +router = APIRouter() + +# Filenames — SINGLE SOURCE in core.script_files +_SCRIPTS = _cfg_section("core", "script_files") +SCRIPT_PROMPT_PATH = SCRIPTS_DIR / _SCRIPTS.get("persona", "sanad_script.txt") +RULE_PROMPT_PATH = SCRIPTS_DIR / _SCRIPTS.get("rules", "sanad_rule.txt") +MAX_PROMPT_BYTES = MAX_UPLOAD_BYTES + +# Default system prompt — SINGLE SOURCE in core.gemini_defaults +DEFAULT_SYSTEM_PROMPT = _cfg_section("core", "gemini_defaults").get( + "default_system_prompt", + "You are Sanad (Bousandah), a wise and friendly Emirati assistant. " + "Speak strictly in the UAE dialect (Khaleeji). " + "Be helpful, concise, and use local greetings like 'Marhaba' and 'Ya Khoy'." +) + + +def _load_system_prompt() -> str: + try: + content = SCRIPT_PROMPT_PATH.read_text(encoding="utf-8-sig").strip() + if content: + return content + except FileNotFoundError: + pass + return DEFAULT_SYSTEM_PROMPT + + +def _load_rule_prompts() -> dict[str, str]: + result = {"system_prompt": "", "replay_prompt": ""} + try: + content = RULE_PROMPT_PATH.read_text(encoding="utf-8-sig").strip() + sections: dict[str, list[str]] = {} + current = None + for line in content.splitlines(): + stripped = line.strip() + if stripped.startswith("[") and stripped.endswith("]"): + current = stripped[1:-1].strip() + sections[current] = [] + elif current is not None: + sections[current].append(line.rstrip()) + result["system_prompt"] = "\n".join(sections.get("SYSTEM_PROMPT", [])).strip() + result["replay_prompt"] = "\n".join(sections.get("REPLAY_SYSTEM_PROMPT", [])).strip() + except FileNotFoundError: + pass + if not result["system_prompt"]: + result["system_prompt"] = _load_system_prompt() + return result + + +@router.get("/") +async def get_prompt(): + return { + "script_path": str(SCRIPT_PROMPT_PATH), + "rule_path": str(RULE_PROMPT_PATH), + "system_prompt": _load_system_prompt(), + "rules": _load_rule_prompts(), + } + + +class PromptUpdate(BaseModel): + content: str + + +@router.post("/update") +async def update_prompt(payload: PromptUpdate): + if len(payload.content.encode("utf-8")) > MAX_PROMPT_BYTES: + raise HTTPException(413, f"Prompt too large (max {MAX_PROMPT_BYTES} bytes).") + try: + SCRIPTS_DIR.mkdir(parents=True, exist_ok=True) + atomic_write_text(SCRIPT_PROMPT_PATH, payload.content.rstrip() + "\n") + except OSError as exc: + raise HTTPException(500, f"Could not write prompt: {exc}") + return {"ok": True, "path": str(SCRIPT_PROMPT_PATH), "length": len(payload.content)} + + +@router.post("/reload") +async def reload_prompts(): + rules = _load_rule_prompts() + return { + "ok": True, + "system_prompt": rules["system_prompt"], + "replay_prompt": rules["replay_prompt"], + "script_path": str(SCRIPT_PROMPT_PATH), + "rule_path": str(RULE_PROMPT_PATH), + } diff --git a/dashboard/routes/records.py b/dashboard/routes/records.py new file mode 100644 index 0000000..56a0a22 --- /dev/null +++ b/dashboard/routes/records.py @@ -0,0 +1,227 @@ +"""Saved records management — list, play, pause, resume, stop, rename, delete. + +Manages WAV recordings saved via the typed replay engine. +""" + +from __future__ import annotations + +import json +import threading +from pathlib import Path +from typing import Any + +from fastapi import APIRouter, HTTPException +from fastapi.responses import FileResponse +from pydantic import BaseModel + +from Project.Sanad.config import AUDIO_RECORDINGS_DIR +from Project.Sanad.dashboard.routes._safe_io import ( + safe_filename, safe_path_under, atomic_write_json, +) + +router = APIRouter() + +RECORDS_INDEX = AUDIO_RECORDINGS_DIR / "records.json" +_INDEX_LOCK = threading.Lock() + + +def _load_index() -> dict[str, Any]: + if not RECORDS_INDEX.exists(): + return {"records": [], "total_records": 0, "last_updated": ""} + try: + with open(RECORDS_INDEX, "r", encoding="utf-8") as f: + return json.load(f) + except (json.JSONDecodeError, OSError): + # Backup corrupt file rather than nuking it + try: + RECORDS_INDEX.rename(RECORDS_INDEX.with_suffix(".json.corrupt")) + except OSError: + pass + return {"records": [], "total_records": 0, "last_updated": ""} + + +def _save_index(payload: dict[str, Any]): + AUDIO_RECORDINGS_DIR.mkdir(parents=True, exist_ok=True) + payload["total_records"] = len(payload.get("records", [])) + atomic_write_json(RECORDS_INDEX, payload) + + +def _resolve_path(path_str: str) -> Path: + """Resolve record path — basename / relative / absolute. + + Legacy records stored absolute paths. New records store basenames. + Both flavors resolve to a real file under AUDIO_RECORDINGS_DIR. + """ + if not path_str: + return AUDIO_RECORDINGS_DIR + p = Path(path_str) + if p.is_absolute(): + return p + return AUDIO_RECORDINGS_DIR / p + + +def _reconcile(payload: dict[str, Any]) -> tuple[dict[str, Any], int]: + kept, removed = [], 0 + for entry in payload.get("records", []): + try: + sp = _resolve_path(entry["files"]["speaker_recording"]["path"]) + rp = _resolve_path(entry["files"]["gemini_raw_output"]["path"]) + if sp.exists() and rp.exists(): + kept.append(entry) + else: + removed += 1 + except (KeyError, TypeError): + removed += 1 + payload["records"] = kept + payload["total_records"] = len(kept) + return payload, removed + + +@router.get("/") +async def list_records(): + with _INDEX_LOCK: + payload = _load_index() + payload, removed = _reconcile(payload) + if removed: + _save_index(payload) + return payload + + +@router.get("/audio/{record_name}") +async def stream_record_audio(record_name: str, kind: str = "speaker"): + """Stream a record's WAV to the browser. Lite plays audio client-side + (Audio element in index.html) instead of through the robot speaker. + """ + with _INDEX_LOCK: + index = _load_index() + entry = next((r for r in index.get("records", []) if r.get("record_name") == record_name), None) + if entry is None: + raise HTTPException(404, f"Record not found: {record_name}") + + file_key = "speaker_recording" if kind == "speaker" else "gemini_raw_output" + try: + raw_path = _resolve_path(entry["files"][file_key]["path"]).resolve() + except KeyError: + raise HTTPException(404, f"No '{kind}' file for record {record_name}") + base = AUDIO_RECORDINGS_DIR.resolve() + try: + raw_path.relative_to(base) + except ValueError: + raise HTTPException(400, "Record path outside recordings directory.") + if not raw_path.exists(): + raise HTTPException(404, f"File not found: {raw_path.name}") + + return FileResponse(raw_path, media_type="audio/wav", filename=raw_path.name) + + +class RecordRename(BaseModel): + record_name: str + new_name: str + + +@router.post("/rename") +async def rename_record(payload: RecordRename): + new_name = safe_filename(payload.new_name) + # Strip any extension the user provided — we add our own + if new_name.lower().endswith(".wav"): + new_name = new_name[:-4] + if not new_name or new_name.startswith("."): + raise HTTPException(400, "Invalid new name.") + + with _INDEX_LOCK: + index = _load_index() + entry = next( + (r for r in index.get("records", []) if r.get("record_name") == payload.record_name), + None, + ) + if entry is None: + raise HTTPException(404, f"Record not found: {payload.record_name}") + + base = AUDIO_RECORDINGS_DIR.resolve() + for key in ("speaker_recording", "gemini_raw_output"): + try: + old_path = _resolve_path(entry["files"][key]["path"]).resolve() + old_path.relative_to(base) # ensure inside recordings dir + except (KeyError, ValueError): + continue + if not old_path.exists(): + continue + suffix = "_raw.wav" if key == "gemini_raw_output" else ".wav" + new_path = safe_path_under(AUDIO_RECORDINGS_DIR, f"{new_name}{suffix}") + if new_path.exists(): + raise HTTPException(409, f"File already exists: {new_path.name}") + old_path.rename(new_path) + entry["files"][key]["path"] = new_path.name # basename — portable + entry["files"][key]["name"] = new_path.name + + entry["record_name"] = new_name + _save_index(index) + return {"ok": True, "record": entry} + + +class RecordDelete(BaseModel): + record_name: str + + +@router.post("/delete") +async def delete_record(payload: RecordDelete): + with _INDEX_LOCK: + index = _load_index() + kept = [] + deleted_entry = None + for r in index.get("records", []): + if r.get("record_name") == payload.record_name and deleted_entry is None: + deleted_entry = r + else: + kept.append(r) + + if deleted_entry is None: + raise HTTPException(404, f"Record not found: {payload.record_name}") + + base = AUDIO_RECORDINGS_DIR.resolve() + deleted_files = [] + for fi in deleted_entry.get("files", {}).values(): + try: + p = Path(fi.get("path", "")).resolve() + p.relative_to(base) # never delete files outside recordings dir + except (ValueError, OSError): + continue + if p.exists(): + p.unlink() + deleted_files.append(str(p)) + + index["records"] = kept + _save_index(index) + return {"ok": True, "deleted": payload.record_name, "deleted_files": deleted_files} + + +@router.post("/delete-all") +async def delete_all_records(): + """Wipe every WAV under AUDIO_RECORDINGS_DIR and reset the index. + + Stays within AUDIO_RECORDINGS_DIR — never traverses elsewhere. + """ + base = AUDIO_RECORDINGS_DIR.resolve() + deleted_files: list[str] = [] + skipped: list[str] = [] + with _INDEX_LOCK: + if base.exists(): + for wav in base.glob("*.wav"): + try: + p = wav.resolve() + p.relative_to(base) # double-check it lives under base + except (ValueError, OSError): + skipped.append(str(wav)) + continue + try: + p.unlink() + deleted_files.append(p.name) + except OSError as exc: + skipped.append(f"{wav.name}: {exc}") + _save_index({"records": [], "total_records": 0, "last_updated": ""}) + return { + "ok": True, + "deleted_count": len(deleted_files), + "deleted_files": deleted_files, + "skipped": skipped, + } diff --git a/dashboard/routes/scripts.py b/dashboard/routes/scripts.py new file mode 100644 index 0000000..b4c65a5 --- /dev/null +++ b/dashboard/routes/scripts.py @@ -0,0 +1,120 @@ +"""Script/prompt file management — CRUD for sanad_script.txt, sanad_rule.txt, etc.""" + +from __future__ import annotations + +from datetime import datetime +from pathlib import Path + +from fastapi import APIRouter, HTTPException +from pydantic import BaseModel + +from Project.Sanad.config import SCRIPTS_DIR +from Project.Sanad.dashboard.routes._safe_io import ( + atomic_write_text, MAX_UPLOAD_BYTES, +) + +router = APIRouter() + +MAX_SCRIPT_BYTES = MAX_UPLOAD_BYTES + + +def _safe_path(name: str) -> Path: + cleaned = name.strip() + if not cleaned or "/" in cleaned or "\\" in cleaned or cleaned in {".", ".."}: + raise HTTPException(400, "Invalid script name.") + path = (SCRIPTS_DIR / cleaned).resolve() + if not str(path).startswith(str(SCRIPTS_DIR.resolve())): + raise HTTPException(400, "Path traversal denied.") + return path + + +@router.get("/") +async def list_scripts(): + SCRIPTS_DIR.mkdir(parents=True, exist_ok=True) + items = [] + for p in sorted(SCRIPTS_DIR.iterdir(), key=lambda x: x.name.lower()): + if not p.is_file(): + continue + st = p.stat() + items.append({ + "name": p.name, + "size_bytes": st.st_size, + "modified_at": datetime.fromtimestamp(st.st_mtime).isoformat(timespec="seconds"), + }) + return {"path": str(SCRIPTS_DIR), "files": items} + + +class ScriptLoad(BaseModel): + name: str + +@router.post("/load") +async def load_script(payload: ScriptLoad): + path = _safe_path(payload.name) + if not path.exists(): + raise HTTPException(404, f"Script not found: {payload.name}") + content = path.read_text(encoding="utf-8-sig") + st = path.stat() + return { + "name": path.name, + "content": content, + "size_bytes": st.st_size, + "modified_at": datetime.fromtimestamp(st.st_mtime).isoformat(timespec="seconds"), + } + + +class ScriptSave(BaseModel): + name: str + content: str + +@router.post("/save") +async def save_script(payload: ScriptSave): + if len(payload.content.encode("utf-8")) > MAX_SCRIPT_BYTES: + raise HTTPException(413, f"Content too large (max {MAX_SCRIPT_BYTES} bytes).") + path = _safe_path(payload.name) + SCRIPTS_DIR.mkdir(parents=True, exist_ok=True) + atomic_write_text(path, payload.content) + return {"ok": True, "name": path.name, "size_bytes": path.stat().st_size} + + +class ScriptCreate(BaseModel): + name: str + content: str = "" + +@router.post("/create") +async def create_script(payload: ScriptCreate): + if len(payload.content.encode("utf-8")) > MAX_SCRIPT_BYTES: + raise HTTPException(413, f"Content too large (max {MAX_SCRIPT_BYTES} bytes).") + path = _safe_path(payload.name) + if path.exists(): + raise HTTPException(409, f"File already exists: {payload.name}") + SCRIPTS_DIR.mkdir(parents=True, exist_ok=True) + atomic_write_text(path, payload.content) + return {"ok": True, "name": path.name} + + +class ScriptRename(BaseModel): + old_name: str + new_name: str + +@router.post("/rename") +async def rename_script(payload: ScriptRename): + old = _safe_path(payload.old_name) + new = _safe_path(payload.new_name) + if not old.exists(): + raise HTTPException(404, f"Not found: {payload.old_name}") + if new.exists(): + raise HTTPException(409, f"Already exists: {payload.new_name}") + old.rename(new) + return {"ok": True, "old_name": payload.old_name, "new_name": new.name} + + +class ScriptDelete(BaseModel): + name: str + +@router.post("/delete") +async def delete_script(payload: ScriptDelete): + path = _safe_path(payload.name) + if not path.exists(): + raise HTTPException(404, f"Not found: {payload.name}") + path.unlink() + return {"ok": True, "deleted": payload.name} diff --git a/dashboard/routes/system.py b/dashboard/routes/system.py new file mode 100644 index 0000000..4839579 --- /dev/null +++ b/dashboard/routes/system.py @@ -0,0 +1,133 @@ +"""System information endpoints — network, subsystems, dashboard binding.""" + +from __future__ import annotations + +import asyncio +import os +import platform +import socket +import sys +from typing import Any + +from fastapi import APIRouter + +from Project.Sanad.config import ( + BASE_DIR, + DASHBOARD_HOST, + DASHBOARD_INTERFACE, + DASHBOARD_PORT, + DDS_NETWORK_INTERFACE, + list_network_interfaces, +) +from Project.Sanad.core.logger import get_logger + +log = get_logger("system_route") + +router = APIRouter() + + +def _safe_status(component, name: str) -> dict[str, Any]: + if component is None: + return {"available": False} + try: + if hasattr(component, "status") and callable(component.status): + s = component.status() + if not isinstance(s, dict): + s = {"raw": str(s)} + s.setdefault("available", True) + return s + return {"available": True} + except Exception as exc: + log.warning("status() failed for %s: %s", name, exc) + return {"available": True, "error": str(exc)} + + +@router.get("/info") +async def system_info(): + """One-shot system snapshot for the dashboard system panel.""" + def _do(): + # Subsystems + try: + from Project.Sanad.main import SUBSYSTEMS + except Exception: + SUBSYSTEMS = {} + + subsystem_list = [] + for name in sorted(SUBSYSTEMS): + comp = SUBSYSTEMS[name] + entry = { + "name": name, + "connected": comp is not None, + } + if comp is not None and hasattr(comp, "status") and callable(comp.status): + try: + s = comp.status() + if isinstance(s, dict): + entry["status"] = s + except Exception as exc: + entry["status_error"] = str(exc) + subsystem_list.append(entry) + + connected_count = sum(1 for s in subsystem_list if s["connected"]) + + # Audio device current selection (best-effort) + audio_info = {} + try: + from Project.Sanad.voice import audio_devices as ad + audio_info = { + "pactl_available": ad.pactl_available(), + "current": ad.current_selection(), + "detected_profile_ids": [ + d["profile"]["id"] for d in ad.detect_plugged_profiles() + ] if ad.pactl_available() else [], + } + except Exception as exc: + audio_info = {"error": str(exc)} + + # Network interfaces + try: + interfaces = list_network_interfaces() + except Exception: + interfaces = [] + + # Determine the URL the dashboard is reachable at + bound_host = DASHBOARD_HOST + if bound_host == "0.0.0.0": + # Try to find the wlan0 IP for display purposes + up_ifaces = [i for i in interfaces if i["is_up"] and i["ip"] and not i["ip"].startswith("127.")] + display_host = up_ifaces[0]["ip"] if up_ifaces else bound_host + else: + display_host = bound_host + + return { + "host": { + "hostname": socket.gethostname(), + "platform": platform.platform(), + "python": sys.version.split()[0], + "executable": sys.executable, + "base_dir": str(BASE_DIR), + "pid": os.getpid(), + }, + "dashboard": { + "interface": DASHBOARD_INTERFACE, + "bound_host": bound_host, + "display_host": display_host, + "port": DASHBOARD_PORT, + "url": f"http://{display_host}:{DASHBOARD_PORT}", + }, + "dds": { + "interface": DDS_NETWORK_INTERFACE, + }, + "network": { + "interfaces": interfaces, + }, + "subsystems": { + "total": len(subsystem_list), + "connected": connected_count, + "disconnected": len(subsystem_list) - connected_count, + "list": subsystem_list, + }, + "audio": audio_info, + } + + return await asyncio.to_thread(_do) diff --git a/dashboard/routes/typed_replay.py b/dashboard/routes/typed_replay.py new file mode 100644 index 0000000..dc1e66d --- /dev/null +++ b/dashboard/routes/typed_replay.py @@ -0,0 +1,162 @@ +"""Typed Replay dashboard endpoints. + +Full CRUD over the records index: + POST /say generate + play + optionally record + POST /replay-last re-play cached audio + POST /save-last persist cached generation + GET /records list + GET /records/{name} get one + POST /records/{name}/play play saved WAV (speaker or raw) + POST /records/{name}/rename rename + DELETE /records/{name} delete + GET /status engine + session state +""" + +from __future__ import annotations + +import asyncio +from typing import Literal + +from fastapi import APIRouter, HTTPException +from fastapi.responses import Response +from pydantic import BaseModel + +from Project.Sanad.core.config_loader import section as _cfg_section +router = APIRouter() + +# MAX_TEXT_LEN — SINGLE SOURCE in dashboard.api_input +MAX_TEXT_LEN = _cfg_section("dashboard", "api_input").get("max_text_len", 2000) + + +class SayPayload(BaseModel): + text: str + record: bool = False + record_name: str = "" + + +class SaveLastPayload(BaseModel): + record_name: str = "" + + +class RenamePayload(BaseModel): + new_name: str + + +class PlayRecordPayload(BaseModel): + file_kind: Literal["speaker", "raw"] = "speaker" + + +def _engine(): + from Project.Sanad.main import typed_replay + if typed_replay is None: + raise HTTPException(503, "TypedReplayEngine not initialized.") + return typed_replay + + +# ───────────────────── generate / replay ───────────────────── + +@router.post("/say") +async def say(payload: SayPayload): + if not payload.text or not payload.text.strip(): + raise HTTPException(400, "text cannot be empty") + if len(payload.text) > MAX_TEXT_LEN: + raise HTTPException(413, f"text too long (max {MAX_TEXT_LEN})") + eng = _engine() + try: + return await eng.say(payload.text, record=payload.record, + record_name=payload.record_name) + except ValueError as exc: + raise HTTPException(400, str(exc)) + except RuntimeError as exc: + raise HTTPException(503, str(exc)) + + +@router.post("/replay-last") +async def replay_last(): + eng = _engine() + try: + return await asyncio.to_thread(eng.replay_last) + except RuntimeError as exc: + raise HTTPException(400, str(exc)) + + +@router.get("/audio/last") +async def audio_last(): + """Stream the most recent generation as a WAV. Browser plays it client-side.""" + eng = _engine() + try: + wav = eng.last_audio_wav() + except RuntimeError as exc: + raise HTTPException(404, str(exc)) + return Response( + content=wav, + media_type="audio/wav", + headers={"Cache-Control": "no-store"}, + ) + + +@router.post("/save-last") +async def save_last(payload: SaveLastPayload): + eng = _engine() + try: + return {"ok": True, "record": eng.save_last(payload.record_name)} + except RuntimeError as exc: + raise HTTPException(400, str(exc)) + + +# ───────────────────── record CRUD ─────────────────────────── + +@router.get("/records") +async def list_records(): + return _engine().list_records() + + +@router.get("/records/{name}") +async def get_record(name: str): + try: + return _engine().find_record(name) + except KeyError: + raise HTTPException(404, f"record not found: {name}") + + +@router.post("/records/{name}/play") +async def play_record(name: str, payload: PlayRecordPayload): + eng = _engine() + try: + return await asyncio.to_thread(eng.play_record, name, payload.file_kind) + except KeyError: + raise HTTPException(404, f"record not found: {name}") + except FileNotFoundError as exc: + raise HTTPException(410, f"file missing on disk: {exc}") + except RuntimeError as exc: + raise HTTPException(503, str(exc)) + + +@router.post("/records/{name}/rename") +async def rename_record(name: str, payload: RenamePayload): + eng = _engine() + try: + return {"ok": True, "record": eng.rename_record(name, payload.new_name)} + except KeyError: + raise HTTPException(404, f"record not found: {name}") + except ValueError as exc: + raise HTTPException(400, str(exc)) + + +@router.delete("/records/{name}") +async def delete_record(name: str): + eng = _engine() + try: + return {"ok": True, **eng.delete_record(name)} + except KeyError: + raise HTTPException(404, f"record not found: {name}") + + +# ───────────────────── status ──────────────────────────────── + +@router.get("/status") +async def status(): + from Project.Sanad.main import typed_replay + if typed_replay is None: + return {"available": False} + return {"available": True, **typed_replay.status()} diff --git a/dashboard/routes/voice.py b/dashboard/routes/voice.py new file mode 100644 index 0000000..a3d1b88 --- /dev/null +++ b/dashboard/routes/voice.py @@ -0,0 +1,237 @@ +"""Voice endpoints — Gemini interaction, local TTS, prompt management.""" + +from __future__ import annotations + +import asyncio + +from fastapi import APIRouter, HTTPException +from pydantic import BaseModel + +from Project.Sanad.core.config_loader import section as _cfg_section +from Project.Sanad.core.logger import get_logger + +log = get_logger("voice_route") + +router = APIRouter() + +_VR = _cfg_section("dashboard", "voice_route") +_API = _cfg_section("dashboard", "api_input") +# MAX_TEXT_LEN — SINGLE SOURCE in dashboard.api_input +MAX_TEXT_LEN = _API.get("max_text_len", 2000) +_API_KEY_MASK_VISIBLE = _VR.get("api_key_mask_visible", 4) + + +def _mask_api_key(key: str) -> str: + """Mask an API key for display — keeps 4 chars on each end. + + Examples: + "" → "" + "AIza123" → "*******" (≤8 chars = full mask) + "AIzaSy...kqf8" → "AIza***...kqf8" (>8 chars = partial mask) + """ + if not key: + return "" + if len(key) <= 8: + return "*" * len(key) + return f"{key[:4]}{'*' * (len(key) - 8)}{key[-4:]}" + + +class TextPayload(BaseModel): + text: str + engine: str = "gemini" # "gemini" | "local" + + +@router.get("/status") +async def voice_status(): + from Project.Sanad.main import voice_client, local_tts + return { + "gemini": voice_client.status() if voice_client else {}, + "local_tts": local_tts.status() if local_tts else {}, + } + + +@router.post("/generate") +async def generate_speech(payload: TextPayload): + """Generate speech from text using Gemini or local TTS.""" + if not payload.text.strip(): + raise HTTPException(400, "Text cannot be empty.") + if len(payload.text) > MAX_TEXT_LEN: + raise HTTPException(413, f"Text too long (max {MAX_TEXT_LEN} chars).") + + from Project.Sanad.main import voice_client, local_tts, audio_mgr + + if payload.engine == "local": + if local_tts is None: + raise HTTPException(503, "Local TTS not available.") + pcm = await asyncio.to_thread(local_tts.synthesize, payload.text) + if audio_mgr: + await asyncio.to_thread(audio_mgr.play_pcm, pcm, 1, 16000, 2) + return { + "ok": True, + "engine": "local", + "duration_sec": round(len(pcm) / (16000 * 2), 3), + } + else: + if voice_client is None: + raise HTTPException(503, "Voice client not initialized.") + if not voice_client.connected: + try: + await voice_client.connect() + except Exception: + log.exception("Gemini reconnect failed in /generate") + raise HTTPException(503, "Gemini not connected and reconnect failed.") + # Check session ownership — TypedReplay or live loop may hold it + if voice_client.session_owner is not None: + raise HTTPException( + 409, + f"Voice session busy (owned by {voice_client.session_owner})", + ) + try: + audio_bytes, text_parts = await voice_client.send_text( + payload.text, owner="voice_route" + ) + except RuntimeError as exc: + raise HTTPException(503, str(exc)) + except Exception as exc: + raise HTTPException(502, f"Gemini communication error: {exc}") + if audio_bytes and audio_mgr: + await asyncio.to_thread(audio_mgr.play_pcm, audio_bytes, 1, 24000, 2) + return { + "ok": True, + "engine": "gemini", + "has_audio": bool(audio_bytes), + "text_response": text_parts, + } + + +@router.post("/connect") +async def connect_gemini(): + from Project.Sanad.main import voice_client + if voice_client is None: + raise HTTPException(503, "Voice client not initialized.") + try: + await voice_client.connect() + except Exception as exc: + raise HTTPException(502, f"Gemini connection failed: {exc}") + return {"connected": voice_client.connected} + + +@router.post("/disconnect") +async def disconnect_gemini(): + from Project.Sanad.main import voice_client + if voice_client: + await voice_client.disconnect() + return {"connected": False} + + +# ─────────────────────── Gemini API key management ─────────────────────── + +class ApiKeyPayload(BaseModel): + api_key: str + + +@router.get("/api-key") +async def get_api_key(): + """Return the current Gemini API key in masked form. + + Never returns the full key. Response: + { + "has_key": true, + "masked": "AIza***...kqf8", + "length": 39, + "source": "config_file" | "default" + } + """ + import Project.Sanad.config as cfg_mod + key = getattr(cfg_mod, "GEMINI_API_KEY", "") or "" + # Detect where the value came from (persisted override vs module default) + try: + from Project.Sanad.config import load_config + stored = load_config().get("gemini", {}) or {} + source = "config_file" if stored.get("api_key") else "default" + except Exception: + source = "default" + return { + "has_key": bool(key), + "masked": _mask_api_key(key), + "length": len(key), + "source": source, + } + + +@router.post("/api-key") +async def update_api_key(payload: ApiKeyPayload): + """Update the Gemini API key — persists to data/motions/config.json and + hot-swaps the in-memory value so the next Gemini connect uses it. + + Also disconnects any currently-connected Gemini session so that the + next reconnect picks up the new key cleanly. Returns the NEW masked + key + a flag telling the dashboard to trigger a reconnect. + """ + key = payload.api_key.strip() + if not key: + raise HTTPException(400, "API key cannot be empty.") + if len(key) < 20: + raise HTTPException(400, "API key looks too short.") + if not key.startswith("AIza"): + raise HTTPException( + 400, + "Gemini API keys normally start with 'AIza'. " + "Double-check you're pasting a Google AI Studio key.", + ) + + # Persist to data/motions/config.json (atomic temp-then-replace) + try: + from Project.Sanad.config import load_config, save_config + cfg = load_config() or {} + gemini_cfg = cfg.get("gemini") if isinstance(cfg.get("gemini"), dict) else {} + gemini_cfg["api_key"] = key + cfg["gemini"] = gemini_cfg + save_config(cfg) + except Exception as exc: + log.exception("Failed to persist API key to config.json") + raise HTTPException(500, f"Could not save config: {exc}") + + # Hot-swap the in-memory module globals. + # Both Project.Sanad.config AND Project.Sanad.gemini.client + # have their OWN reference to GEMINI_API_KEY (the latter was created + # at `from Project.Sanad.config import GEMINI_API_KEY` at import time). + # Python's `from X import Y` binds a local name — updating config.Y + # alone does NOT propagate to the importer, so we must patch both. + try: + import Project.Sanad.config as _cfg_mod + _cfg_mod.GEMINI_API_KEY = key + except Exception: + log.exception("could not patch config.GEMINI_API_KEY") + + try: + import Project.Sanad.gemini.client as _gc + _gc.GEMINI_API_KEY = key + except Exception: + log.exception("could not patch gemini.client.GEMINI_API_KEY") + + # Disconnect any live session so reconnect uses the new key. + from Project.Sanad.main import voice_client + was_connected = False + if voice_client is not None: + was_connected = bool(getattr(voice_client, "connected", False)) + if was_connected: + try: + await voice_client.disconnect() + except Exception: + log.exception("disconnect during api-key swap failed") + + log.info("Gemini API key updated (length=%d) source=config_file", len(key)) + + return { + "ok": True, + "masked": _mask_api_key(key), + "length": len(key), + "source": "config_file", + "was_connected": was_connected, + "message": ( + "API key saved. Click 'Connect' to reopen the Gemini session with " + "the new key. Any running Live Gemini subprocess must be restarted " + "separately (Stop → Start) to pick up the new key." + ), + } diff --git a/dashboard/static/index.html b/dashboard/static/index.html new file mode 100644 index 0000000..12e0b1d --- /dev/null +++ b/dashboard/static/index.html @@ -0,0 +1,519 @@ + + + + + + Sanad Dashboard + + + +

+ + +

Sanad Dashboard

+ + + + + + Connecting... + +

+ + +

Voice & Audio

Recordings

Settings & Logs

+ + +

Gemini API Key

+ The key used by GeminiVoiceClient and the Live Gemini subprocess. + Saved to data/motions/config.json. Get a free key at + aistudio.google.com/app/apikey. +

+ Current + + + +

+ New key + + + +

+ + +

Typed Replay Engine

+ +

+ + Record speaker + +

+ + + +

+ Session +

+ +

+ + +

Saved Records

No records saved

+ + +

+ +

+ + +

Scripts Manager

+ +

+ + + + +

+ + +

Prompt Management

+ +

+ + +

Live Logs

+ + + + + +

+ +

+ + + + diff --git a/dashboard/static/login.html b/dashboard/static/login.html new file mode 100644 index 0000000..d2a0550 --- /dev/null +++ b/dashboard/static/login.html @@ -0,0 +1,101 @@ + + + + + +Sanad — Sign in + + + + + + + + diff --git a/dashboard/websockets/__init__.py b/dashboard/websockets/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/dashboard/websockets/log_stream.py b/dashboard/websockets/log_stream.py new file mode 100644 index 0000000..17b9af3 --- /dev/null +++ b/dashboard/websockets/log_stream.py @@ -0,0 +1,80 @@ +"""WebSocket endpoint for real-time log streaming. + +Clients connect to /ws/logs and receive live log lines from all modules. +""" + +from __future__ import annotations + +import asyncio +import threading +from collections import deque + +from fastapi import APIRouter, WebSocket, WebSocketDisconnect + +from Project.Sanad.core.logger import set_ws_push + +router = APIRouter() + +MAX_WATCHERS = 50 + +# Ring buffer of recent log lines (shared across connections). +_recent: deque[str] = deque(maxlen=500) +_watchers: set[asyncio.Queue] = set() +_watchers_lock = threading.Lock() + + +def push_log_line(line: str): + """Called from the logging system to feed new lines. + + May be called from any thread (logging is multi-threaded), so we + snapshot the watchers under a lock before iterating. + """ + _recent.append(line) + with _watchers_lock: + snapshot = list(_watchers) + for q in snapshot: + try: + q.put_nowait(line) + except asyncio.QueueFull: + # Drop on overflow rather than block — logs are not critical data + pass + + +# Register with the logger so all log records are pushed to WS clients. +# Wrap so a logger registration failure doesn't break Dashboard import. +try: + set_ws_push(push_log_line) +except Exception: + pass + + +@router.websocket("/ws/logs") +async def log_ws(ws: WebSocket): + await ws.accept() + + with _watchers_lock: + if len(_watchers) >= MAX_WATCHERS: + await ws.close(code=1013, reason="Too many log watchers") + return + queue: asyncio.Queue[str] = asyncio.Queue(maxsize=200) + _watchers.add(queue) + + try: + # Send recent history + for line in list(_recent): + await ws.send_text(line) + + while True: + line = await queue.get() + await ws.send_text(line) + except WebSocketDisconnect: + pass + except Exception: + # Any other error closes the connection cleanly + try: + await ws.close() + except Exception: + pass + finally: + with _watchers_lock: + _watchers.discard(queue) diff --git a/data/audio/records.json b/data/audio/records.json new file mode 100644 index 0000000..d15bab5 --- /dev/null +++ b/data/audio/records.json @@ -0,0 +1,5 @@ +{ + "records": [], + "total_records": 0, + "last_updated": "" +} \ No newline at end of file diff --git a/gemini/__init__.py b/gemini/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/gemini/client.py b/gemini/client.py new file mode 100644 index 0000000..b300da0 --- /dev/null +++ b/gemini/client.py @@ -0,0 +1,341 @@ +"""Gemini WebSocket client for real-time voice interaction. + +Provides: + - Bidirectional audio streaming (mic → Gemini → speaker) + - Text-to-speech via typed input + - Voice-command detection through transcription parsing + - System instruction injection for persona control +""" + +from __future__ import annotations + +import asyncio +import base64 +import inspect +import json +from typing import Any + +import websockets + +from Project.Sanad.config import ( + GEMINI_API_KEY, + GEMINI_MODEL, + GEMINI_VOICE, + GEMINI_WS_TIMEOUT, + GEMINI_WS_URI, +) +from Project.Sanad.core.config_loader import section as _cfg_section +from Project.Sanad.core.event_bus import bus +from Project.Sanad.core.logger import get_logger + +log = get_logger("gemini_client") + +_GC = _cfg_section("gemini", "client") +# Default system prompt — SINGLE SOURCE in core.gemini_defaults +_DEFAULT_SYSTEM_PROMPT = _cfg_section("core", "gemini_defaults").get( + "default_system_prompt", + "You are Sanad (Bousandah), a wise and friendly Emirati assistant. " + "Speak in UAE dialect (Khaleeji). Be helpful and concise." +) +_RECV_TIMEOUT_SEC = _GC.get("recv_timeout_sec", 30) +_RECONNECT_MAX_ATTEMPTS = _GC.get("reconnect_max_attempts", 3) +_RECONNECT_INITIAL_DELAY_SEC = _GC.get("reconnect_initial_delay_sec", 1.0) +_RECONNECT_MAX_DELAY_SEC = _GC.get("reconnect_max_delay_sec", 10.0) + + +class GeminiVoiceClient: + """Manages one WebSocket session to the Gemini Bidi audio API. + + Concurrency model: + - `_send_lock` serializes ALL websocket writes. + - `_session_lock` ensures only one consumer (live loop OR typed replay) + owns the receive stream at a time. Acquired by send_text and + receive_stream context managers. + - `_owner` records who currently holds the session lock for diagnostics. + """ + + def __init__(self, system_prompt: str = ""): + self.system_prompt = system_prompt or _DEFAULT_SYSTEM_PROMPT + self._ws: Any = None + self._connected = False + self._send_lock = asyncio.Lock() + self._session_lock = asyncio.Lock() + self._connect_lock = asyncio.Lock() # serializes reconnect attempts + self._owner: str | None = None + self._reconnect_attempts = 0 + + @property + def connected(self) -> bool: + return self._connected + + @property + def session_owner(self) -> str | None: + return self._owner + + def _ws_kwargs(self) -> dict[str, Any]: + kwargs: dict[str, Any] = {"max_size": None, "open_timeout": 30} + try: + sig = inspect.signature(websockets.connect) + key = "extra_headers" if "extra_headers" in sig.parameters else "additional_headers" + except Exception: + key = "extra_headers" + kwargs[key] = {"Content-Type": "application/json"} + return kwargs + + async def connect(self): + uri = f"{GEMINI_WS_URI}?key={GEMINI_API_KEY}" + try: + self._ws = await websockets.connect(uri, **self._ws_kwargs()) + setup = { + "setup": { + "model": GEMINI_MODEL, + "generationConfig": { + "responseModalities": ["AUDIO"], + "speechConfig": { + "voiceConfig": { + "prebuiltVoiceConfig": {"voiceName": GEMINI_VOICE} + } + }, + }, + "systemInstruction": {"parts": [{"text": self.system_prompt}]}, + } + } + await self._ws.send(json.dumps(setup)) + await self._ws.recv() # ACK + self._connected = True + self._reconnect_attempts = 0 + log.info("Connected to Gemini (%s)", GEMINI_MODEL) + await bus.emit("voice.connected") + except Exception: + self._connected = False + self._ws = None + log.exception("Failed to connect to Gemini") + raise + + async def disconnect(self): + try: + if self._ws is not None: + await self._ws.close() + except Exception: + pass + finally: + self._ws = None + self._connected = False + self._owner = None + log.info("Disconnected from Gemini") + await bus.emit("voice.disconnected") + + async def _ensure_connected(self): + """Reconnect if dropped, with bounded retries. + + Serialized via _connect_lock so concurrent callers don't trigger + duplicate handshakes. + """ + # Fast path — no lock needed + if self._connected and self._ws is not None: + return True + + async with self._connect_lock: + # Re-check inside the lock (another coroutine may have just connected) + if self._connected and self._ws is not None: + return True + + max_attempts = _RECONNECT_MAX_ATTEMPTS + delay = _RECONNECT_INITIAL_DELAY_SEC + for attempt in range(max_attempts): + try: + log.warning("Reconnecting to Gemini (attempt %d/%d)", attempt + 1, max_attempts) + await self.connect() + return True + except Exception: + self._reconnect_attempts += 1 + await asyncio.sleep(delay) + delay = min(delay * 2, _RECONNECT_MAX_DELAY_SEC) + log.error("Reconnect failed after %d attempts", max_attempts) + await bus.emit("voice.error", reason="reconnect_failed") + return False + + async def send_audio_chunk(self, pcm_b64: str) -> bool: + """Send a base64-encoded PCM audio chunk (mic input). + + Returns False on failure so the caller can react instead of silently + no-op'ing forever (the original bug). + """ + if not self._connected or self._ws is None: + return False + msg = { + "realtimeInput": { + "mediaChunks": [ + {"mimeType": "audio/pcm;rate=16000", "data": pcm_b64} + ] + } + } + try: + async with self._send_lock: + await self._ws.send(json.dumps(msg)) + return True + except websockets.exceptions.ConnectionClosed: + log.warning("send_audio_chunk: connection closed") + self._connected = False + await bus.emit("voice.error", reason="connection_closed") + return False + except Exception: + log.exception("send_audio_chunk failed") + return False + + async def send_text(self, text: str, owner: str = "send_text") -> tuple[bytes, list[str]]: + """Send text, receive audio response. Returns (audio_bytes, text_parts). + + Acquires the session lock for the entire request/response cycle so + no other consumer can steal frames from the receive side. + If the connection drops mid-request, reconnects once and retries. + """ + if not await self._ensure_connected(): + raise RuntimeError("Not connected to Gemini and reconnect failed.") + + async with self._session_lock: + self._owner = owner + try: + return await self._send_text_inner(text) + except websockets.exceptions.ConnectionClosed: + log.warning("send_text: connection died on send — reconnecting once") + self._connected = False + if not await self._ensure_connected(): + raise RuntimeError("Reconnect after send failure also failed.") + return await self._send_text_inner(text) + finally: + self._owner = None + + async def _send_text_inner(self, text: str) -> tuple[bytes, list[str]]: + """Inner send/receive loop — caller must hold _session_lock.""" + request = { + "client_content": { + "turns": [{"role": "user", "parts": [{"text": text}]}], + "turn_complete": True, + } + } + async with self._send_lock: + await self._ws.send(json.dumps(request)) + + audio_chunks: list[bytes] = [] + text_parts: list[str] = [] + + while True: + try: + raw = await asyncio.wait_for(self._ws.recv(), timeout=GEMINI_WS_TIMEOUT) + except asyncio.TimeoutError: + log.warning("send_text: recv timed out") + break + except websockets.exceptions.ConnectionClosed: + log.warning("send_text: connection closed mid-stream") + self._connected = False + break + + try: + resp = json.loads(raw) + except json.JSONDecodeError: + log.warning("send_text: bad JSON from server") + continue + + if "error" in resp: + log.error("Gemini error: %s", resp["error"]) + await bus.emit("voice.error", reason=str(resp["error"])) + break + + sc = resp.get("serverContent", {}) + mt = sc.get("modelTurn", {}) + for part in mt.get("parts", []): + inline = part.get("inlineData") + if inline and inline.get("data"): + audio_chunks.append(base64.b64decode(inline["data"])) + tp = part.get("text") + if isinstance(tp, str) and tp.strip(): + text_parts.append(tp.strip()) + + input_tr = sc.get("inputTranscription", {}) + if input_tr.get("text"): + await bus.emit("voice.user_said", text=input_tr["text"]) + + if sc.get("turnComplete") or sc.get("generationComplete"): + break + + audio_bytes = b"".join(audio_chunks) + if audio_bytes: + await bus.emit("voice.gemini_spoke", audio_len=len(audio_bytes)) + return audio_bytes, text_parts + + def acquire_session(self, owner: str) -> "_SessionGuard": + """Return an async context manager for exclusive session ownership. + + Use as `async with client.acquire_session("live_voice"):`. + While held, no other consumer may call send_text or receive_stream. + """ + return _SessionGuard(self, owner) + + async def receive_stream(self): + """Yield server events. Caller MUST hold the session lock.""" + if self._owner is None: + raise RuntimeError( + "receive_stream requires session lock — use acquire_session() first" + ) + if not self._connected or self._ws is None: + return + try: + async for raw in self._ws: + try: + resp = json.loads(raw) + except json.JSONDecodeError: + continue + yield resp.get("serverContent", {}) + except websockets.exceptions.ConnectionClosed: + log.warning("receive_stream: connection closed") + self._connected = False + await bus.emit("voice.error", reason="connection_closed") + + async def raw_send(self, payload: dict): + """Low-level send for the live loop. Always use through send lock.""" + if not self._connected or self._ws is None: + return False + try: + async with self._send_lock: + await self._ws.send(json.dumps(payload)) + return True + except Exception: + log.exception("raw_send failed") + return False + + def status(self) -> dict[str, Any]: + return { + "connected": self._connected, + "model": GEMINI_MODEL, + "voice": GEMINI_VOICE, + "session_owner": self._owner, + "reconnect_attempts": self._reconnect_attempts, + } + + +class _SessionGuard: + """Async context manager for exclusive session ownership. + + Always releases owner + lock on exit, even on exceptions. + """ + + def __init__(self, client: GeminiVoiceClient, owner: str): + self._client = client + self._owner = owner + self._held = False + + async def __aenter__(self): + await self._client._session_lock.acquire() + self._held = True + self._client._owner = self._owner + return self._client + + async def __aexit__(self, exc_type, exc, tb): + try: + self._client._owner = None + finally: + if self._held: + self._client._session_lock.release() + self._held = False + return False # don't suppress exceptions diff --git a/main.py b/main.py new file mode 100644 index 0000000..6420228 --- /dev/null +++ b/main.py @@ -0,0 +1,295 @@ +#!/usr/bin/env python3 +"""Sanad — unified robot assistant entry point. + +Starts all subsystems and the FastAPI dashboard. + + python main.py # default port 8000 + python main.py --port 8080 # custom port +""" + +from __future__ import annotations + +import argparse +import importlib +import os +import sys +import types +from pathlib import Path + +# ───────────────────────────────────────────────────────────────────────────── +# Layout detection — support BOTH: +# 1. Dev layout: /Project/Sanad/main.py (imports use Project.Sanad.*) +# 2. Deployed layout: /home/unitree/Sanad/main.py (no Project/ wrapper) +# +# In the deployed case we synthesize a `Project` namespace package and alias +# `Project.Sanad` → the local `Sanad` package, so every `from Project.Sanad.X +# import Y` keeps working without rewriting any other file. +# ───────────────────────────────────────────────────────────────────────────── +_THIS_DIR = Path(__file__).resolve().parent # .../Sanad +_PARENT = _THIS_DIR.parent # .../Project OR /home/unitree + +if _PARENT.name == "Project": + # Dev layout — add the directory containing Project/ + _ROOT = _PARENT.parent + if str(_ROOT) not in sys.path: + sys.path.insert(0, str(_ROOT)) + # When this dir isn't named "Sanad" (e.g. "Sanad_lite"), the internal + # `from Project.Sanad.X import Y` imports would resolve to a sibling + # Project/Sanad/ package next door — NOT this lite copy. Alias + # Project.Sanad → Project. so every internal import stays + # inside this package. + if _THIS_DIR.name != "Sanad" and "Project.Sanad" not in sys.modules: + _self_pkg = importlib.import_module(f"Project.{_THIS_DIR.name}") + sys.modules["Project.Sanad"] = _self_pkg + import Project as _proj_mod # noqa: E402 + _proj_mod.Sanad = _self_pkg # type: ignore[attr-defined] +else: + # Deployed layout — create a virtual Project package and alias + if str(_PARENT) not in sys.path: + sys.path.insert(0, str(_PARENT)) + if "Project" not in sys.modules: + _proj = types.ModuleType("Project") + _proj.__path__ = [] # mark as namespace package + sys.modules["Project"] = _proj + if "Project.Sanad" not in sys.modules: + # Import the local Sanad package as a top-level module first + _sanad = importlib.import_module(_THIS_DIR.name) + sys.modules["Project.Sanad"] = _sanad + sys.modules["Project"].Sanad = _sanad # type: ignore[attr-defined] + +# When main.py runs as a script (`python3 main.py`), Python loads it as the +# `__main__` module — NOT as `Project.Sanad.main`. Route handlers later do +# `from Project.Sanad.main import arm` etc; without the alias below, Python +# would re-execute this file from scratch under a different module name, +# creating a SECOND set of subsystem instances (uninitialised). Every +# `subsystem not available` / `No LowState` symptom traces back to this. +# The alias ensures both names point at the exact same module object. +if __name__ == "__main__": + sys.modules["Project.Sanad.main"] = sys.modules["__main__"] + +# asyncio compat shim — backfills asyncio.to_thread for Python 3.8. +# MUST be imported before any other Sanad module that uses asyncio.to_thread. +from Project.Sanad.core import asyncio_compat # noqa: F401 + +from Project.Sanad.config import ( + DASHBOARD_HOST, + DASHBOARD_PORT, + DASHBOARD_INTERFACE, + DDS_NETWORK_INTERFACE, +) +from Project.Sanad.core.logger import get_logger + +log = get_logger("main") + + +def _safe_import(label: str, importer): + """Import a module by callable, returning None if it fails.""" + try: + return importer() + except Exception: + log.exception("Failed to import %s — that subsystem will be unavailable", label) + return None + + +def _safe_construct(name: str, factory): + """Construct a subsystem, log + return None on failure.""" + if factory is None: + return None + try: + return factory() + except Exception: + log.exception("Failed to construct %s — that subsystem will be unavailable", name) + return None + + +# ── isolated imports — one bad module never blocks the others ── +Brain = _safe_import("Brain", lambda: __import__("Project.Sanad.core.brain", fromlist=["Brain"]).Brain) +AudioManager = _safe_import("AudioManager", lambda: __import__("Project.Sanad.voice.audio_manager", fromlist=["AudioManager"]).AudioManager) +LocalTTSEngine = _safe_import("LocalTTSEngine", lambda: __import__("Project.Sanad.voice.local_tts", fromlist=["LocalTTSEngine"]).LocalTTSEngine) +TypedReplayEngine = _safe_import("TypedReplayEngine", lambda: __import__("Project.Sanad.voice.typed_replay", fromlist=["TypedReplayEngine"]).TypedReplayEngine) +GeminiVoiceClient = _safe_import("GeminiVoiceClient", lambda: __import__("Project.Sanad.gemini.client", fromlist=["GeminiVoiceClient"]).GeminiVoiceClient) + + +# ── global instances (imported by route modules) ── + +brain = _safe_construct("brain", Brain) if Brain else None +audio_mgr = _safe_construct("audio_mgr", AudioManager) +voice_client = _safe_construct("voice_client", GeminiVoiceClient) +local_tts = _safe_construct("local_tts", LocalTTSEngine) +typed_replay = _safe_construct("typed_replay", (lambda: TypedReplayEngine(voice_client, audio_mgr)) if (TypedReplayEngine and voice_client and audio_mgr) else None) + +# Wire everything into the Brain (only what was constructed) +def _safe_attach(method_name: str, value): + if brain is None or value is None: + return + method = getattr(brain, method_name, None) + if method is None: + return + try: + method(value) + except Exception: + log.exception("brain.%s failed", method_name) + + +_safe_attach("attach_voice", voice_client) +_safe_attach("attach_audio_manager", audio_mgr) + + +# ── Runtime sanity report ──────────────────────────────────────────────── +SUBSYSTEMS = { + "brain": brain, + "audio_mgr": audio_mgr, + "voice_client": voice_client, + "local_tts": local_tts, + "typed_replay": typed_replay, +} + +# Critical subsystems — if any of these are None, log a warning at startup +CRITICAL_SUBSYSTEMS = ("brain",) + +for _name in CRITICAL_SUBSYSTEMS: + if SUBSYSTEMS.get(_name) is None: + log.error("CRITICAL subsystem '%s' is None — application will be unusable", _name) + +_available = [k for k, v in SUBSYSTEMS.items() if v is not None] +_missing = [k for k, v in SUBSYSTEMS.items() if v is None] +log.info("Subsystems available (%d): %s", len(_available), ", ".join(_available)) +if _missing: + log.warning("Subsystems unavailable (%d): %s", len(_missing), ", ".join(_missing)) + + +_already_shut_down = False + + +def _do_shutdown(from_signal: bool = False): + """Clean shutdown — release hardware, stop background tasks. Idempotent.""" + global _already_shut_down + if _already_shut_down: + return + _already_shut_down = True + log.info("Shutdown requested") + + if audio_mgr is not None: + try: + if hasattr(audio_mgr, "close"): + audio_mgr.close() + except Exception: + log.exception("audio_mgr.close() failed") + + log.info("Shutdown complete") + + +import atexit # noqa: E402 +atexit.register(_do_shutdown) +# NOTE: Do NOT install custom SIGINT/SIGTERM handlers here. +# Uvicorn installs its own signal handlers for graceful shutdown. +# If we override them, Ctrl+C never reaches uvicorn and the server +# keeps running forever. Our _do_shutdown runs via atexit instead. + + +def _print_env_diagnostic(): + """Print everything you'd need to debug a deployment issue.""" + print("=" * 60) + print("SANAD ENVIRONMENT DIAGNOSTIC") + print("=" * 60) + print(f"Python: {sys.version}") + print(f"Executable: {sys.executable}") + print(f"Platform: {sys.platform}") + print(f"BASE_DIR: {_THIS_DIR}") + print(f"Parent: {_PARENT}") + print(f"Layout: {'dev (Project/Sanad)' if _PARENT.name == 'Project' else 'deployed (top-level Sanad)'}") + print(f"Dashboard: {DASHBOARD_HOST}:{DASHBOARD_PORT} (interface: {DASHBOARD_INTERFACE})") + print(f"DDS interface: {DDS_NETWORK_INTERFACE}") + print() + print("sys.path[0:8]:") + for p in sys.path[:8]: + print(f" {p}") + print() + print("Critical imports:") + for mod_name in ("uvicorn", "fastapi", "pydantic", "starlette", + "websockets", "httpx", "pyaudio", "pyrealsense2", + "unitree_sdk2py", "ultralytics", "numpy", "cv2"): + try: + mod = __import__(mod_name) + ver = getattr(mod, "__version__", "?") + path = getattr(mod, "__file__", "?") + print(f" ✓ {mod_name:18s} {ver:12s} {path}") + except BaseException as exc: + print(f" ✗ {mod_name:18s} {type(exc).__name__}: {exc}") + print() + print("Subsystems available (after constructing main module globals):") + for name in sorted(SUBSYSTEMS): + print(f" {'✓' if SUBSYSTEMS[name] is not None else '✗'} {name}") + print("=" * 60) + + +def main(): + parser = argparse.ArgumentParser(description="Sanad Robot Assistant") + parser.add_argument("--host", default=DASHBOARD_HOST, + help=f"Dashboard bind address. Default is wlan0's IP " + f"({DASHBOARD_HOST!r}). Override with SANAD_DASHBOARD_HOST " + f"or SANAD_DASHBOARD_INTERFACE.") + parser.add_argument("--port", type=int, default=DASHBOARD_PORT) + parser.add_argument("--network", default=DDS_NETWORK_INTERFACE, + help="DDS network interface (e.g. eth0, lo). " + "Override with SANAD_DDS_INTERFACE env var.") + parser.add_argument("--check-env", action="store_true", + help="Print environment diagnostic and exit " + "(no server start, no hardware init)") + args = parser.parse_args() + + if args.check_env: + _print_env_diagnostic() + return + + log.info("Sanad starting — Python %s @ %s", sys.version.split()[0], sys.executable) + log.info("BASE_DIR: %s", _THIS_DIR) + log.info("Dashboard interface: %s → bound to %s", DASHBOARD_INTERFACE, args.host) + log.info("Starting Sanad — host=%s port=%d network=%s", args.host, args.port, args.network) + if brain is not None: + try: + log.info("Brain status: %s", brain.status()) + except Exception: + log.exception("brain.status() failed") + # ── import uvicorn ────────────────────────────────────────────────── + # Catch ANY exception (not just ImportError) so the real failure reason + # is surfaced. The previous narrow catch hid issues like uvicorn pulling + # in a broken transitive dep, or being installed under a different + # site-packages than the active interpreter. + uvicorn = None + try: + import uvicorn # type: ignore + log.info("uvicorn %s loaded from %s", + getattr(uvicorn, "__version__", "?"), + getattr(uvicorn, "__file__", "?")) + except BaseException as exc: + log.error("Could not import uvicorn: %s: %s", type(exc).__name__, exc) + log.error("Python: %s", sys.executable) + log.error("sys.path[0:5]: %s", sys.path[:5]) + log.error("Try: %s -m pip install --user 'uvicorn[standard]' fastapi", sys.executable) + sys.exit(1) + + # ── import the FastAPI app ────────────────────────────────────────── + # Pass the app object directly so uvicorn doesn't have to re-resolve the + # import path (which differs between dev and deployed layouts). + try: + from Project.Sanad.dashboard.app import app as _app + except BaseException: + log.exception("Could not import Dashboard.app — aborting") + sys.exit(1) + + # ── start the server ──────────────────────────────────────────────── + try: + uvicorn.run( + _app, + host=args.host, + port=args.port, + log_level="info", + ) + except BaseException: + log.exception("uvicorn.run() failed") + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..595b33f --- /dev/null +++ b/requirements.txt @@ -0,0 +1,25 @@ +# Sanad — Python dependencies +# Install: pip install -r requirements.txt + +# Dashboard +fastapi>=0.110.0 +uvicorn[standard]>=0.29.0 +python-multipart>=0.0.9 +itsdangerous>=2.1.0 # required by Starlette's SessionMiddleware + +# Gemini voice +websockets>=12.0 +pyaudio>=0.2.13 + +# Camera proxy +httpx>=0.27.0 + +# Local TTS (optional — only needed for MBZUAI model) +transformers>=4.40.0 +sentencepiece>=0.2.0 +torch>=2.2.0 +datasets>=2.19.0 +soundfile>=0.12.0 + +# General +numpy>=1.24.0 diff --git a/scripts/sanad_rule.txt b/scripts/sanad_rule.txt new file mode 100644 index 0000000..07cd86b --- /dev/null +++ b/scripts/sanad_rule.txt @@ -0,0 +1,19 @@ +[SYSTEM_PROMPT] +You are Sanad (Bousandah), a wise and friendly Emirati assistant. +Speak naturally in the UAE dialect (Khaleeji) unless the user explicitly provides text that must be spoken exactly. +Keep the tone warm, respectful, and clear. +Do not be robotic. +Do not over-explain. +Prefer concise speech that sounds natural when spoken aloud funny mode and happy sound. + +[REPLAY_SYSTEM_PROMPT] +You are Sanad (Bousandah), using the same Emirati voice and personality. +For replay mode, the user will provide text that you must speak exactly as written. +You may sound warm and lively, but you must preserve the exact text. +Do not translate it. +Do not summarize it. +Do not answer it. +Do not rephrase it into another dialect or style. +Do not add greetings, punctuation changes, comments, or extra words. +Keep the same word order and language as the provided text. +Your only task is to speak the exact user text verbatim. diff --git a/scripts/sanad_script.txt b/scripts/sanad_script.txt new file mode 100644 index 0000000..3a2d184 --- /dev/null +++ b/scripts/sanad_script.txt @@ -0,0 +1,68 @@ +أنت "بوسنده" — روبوت إماراتي ذكي تابع لروبوت شركة لوتاه تيك Lootah Tech. + +[أولاً: الروح والمعرفة] + + لديك معرفة واسعة بالقرآن الكريم والأحاديث النبوية الشريفة للإجابة على أي سؤال ديني أو تقديم تذكيرات إيمانية بدقة، مع ذكر المصدر متى لزم. + + إذا سألك المستخدم عن آية، حديث، أو حكم شرعي، جاوب بوقار، ووضوح، وتبسيط، وبلهجتك الإماراتية. + + التزم بالدقة في المسائل الدينية، وإذا كانت المسألة فيها خلاف أو تحتاج تحقق، وضّح ذلك بصراحة بدون جزم زائد. + +[ثانياً: الأسلوب واللغة (التبديل المرن)] + + تكلم باللهجة الإماراتية بشكل طبيعي، راقٍ، ومفهوم، بدون مبالغة أو تصنع. + + قاعدة التبديل الفوري: إذا استخدم المستخدم أي لغة ثانية في أي لحظة، غيّر فوراً ورد بنفس اللغة الجديدة في نفس الرد. + + إذا رجع المستخدم للعربي: أرجع فوراً للعربي باللهجة الإماراتية. + + "آخر لغة كتب فيها المستخدم" هي اللغة اللي ترد فيها. + + ممنوع تخلط لغتين في نفس الرد إلا إذا كان طلب المستخدم ترجمة أو مقارنة صريحة. + +[ثالثاً: التفاعل والبدايات] + + خلك محترم، ودود، مباشر، وركّز على الزبدة والحل العملي. + + نوّع البدايات بأسلوب طبيعي، مثل: + (مرحبابك، أبشر بعزك، فالك طيب، يا مرحبا، حياك الله، زين، تم، حاضر) + ولا تكرر نفس البداية مرتين متتاليتين. + + حافظ على أسلوب احترافي ومرن يناسب السؤال: ديني، تقني، عملي، أو يومي. + +[رابعاً: إنجاز المهام وقوة الذاكرة العمومية] + + اعتبر كل كلمة، اسم، مسار، ملاحظة، أو تفضيل يذكره المستخدم بمثابة "أمر حفظ" وأولوية قصوى داخل المحادثة. + + تعامل مع معلومات المستخدم، تفضيلاته، وتصحيحاته كأنها ثوابت محفوظة يجب الرجوع لها لاحقاً. + + عند تصحيح المستخدم لمعلومة، قل: + "زين نبهتني يا الشيخ، انحفرت في الذاكرة" + + ابنِ على السياق السابق مباشرة بدون ما ترجع تسأل عن أمور سبق ذكرها. + +[خامساً: الأمان والخصوصية] + + إذا كتب المستخدم API key أو Password أو Token أو أي بيانات حساسة: نبهه فوراً يمسحها ويبدلها. + + لا تطلب بيانات حساسة إلا عند الضرورة القصوى، وبأسلوب محترم وواضح. + + حافظ على الخصوصية، ولا تعيد عرض المعلومات الحساسة بدون داعٍ. + + ممنوع أي نكت أو محتوى غير لائق في الدين أو السياسة أو الأمور الحساسة. + +[سادساً: السرعة والتكرار] + + جاوب بسرعة، بوضوح، وباختصار، وغالباً ضمن 2 إلى 6 سطور، إلا إذا طلب المستخدم التفصيل. + + إذا طلب المستخدم "كرر" أو "repeat": أعد نفس الكلام بنفس اللغة الحالية، وحرفياً إذا طلب ذلك. + + إذا طلب اختصار: اختصر مباشرة بدون فقدان المعنى الأساسي. + +[سابعاً: جودة الإجابة] + + قدّم الجواب بشكل واضح، مرتب، وسهل الفهم، وابدأ بالأهم ثم التفصيل إذا احتاج المستخدم. + + إذا كان طلب المستخدم عملي أو تقني، ركّز على الخطوات والحل المباشر. + + إذا كان السؤال يحتمل أكثر من معنى، خذ التفسير الأقرب من السياق وقدّم أفضل إجابة مفيدة بدون إطالة. \ No newline at end of file diff --git a/shell_scripts/check_audio_caps.sh b/shell_scripts/check_audio_caps.sh new file mode 100755 index 0000000..b5346d2 --- /dev/null +++ b/shell_scripts/check_audio_caps.sh @@ -0,0 +1,98 @@ +#!/usr/bin/env bash + +set -e + +echo "==============================" +echo "🔊 POWERCONF AUDIO CHECK" +echo "==============================" +echo + +# ---------------------------------- +# Detect PowerConf card + device +# ---------------------------------- +POWERCONF_LINE=$(arecord -l | grep -i "PowerConf" | head -1) + +if [ -z "$POWERCONF_LINE" ]; then + echo "❌ PowerConf device not found" + exit 1 +fi + +CARD_INDEX=$(echo "$POWERCONF_LINE" | sed -n 's/.*card $[0-9]\+$:.*/\1/p') +DEVICE_INDEX=$(echo "$POWERCONF_LINE" | sed -n 's/.*device $[0-9]\+$:.*/\1/p') + +echo "✅ PowerConf detected:" +echo " Card = $CARD_INDEX" +echo " Device = $DEVICE_INDEX" +echo " ALSA = hw:${CARD_INDEX},${DEVICE_INDEX}" +echo + +# ---------------------------------- +# Capture devices (PowerConf only) +# ---------------------------------- +echo "🎤 CAPTURE DEVICE (PowerConf only)" +arecord -l | grep -i -A4 "PowerConf" +echo + +# ---------------------------------- +# Playback devices (PowerConf only) +# ---------------------------------- +echo "🔈 PLAYBACK DEVICE (PowerConf only)" +aplay -l | grep -i -A4 "PowerConf" +echo + +# ---------------------------------- +# Capture HW params (forced safe) +# ---------------------------------- +echo "🎤 MICROPHONE HARDWARE PARAMETERS (PowerConf)" + +MIC_HW=$(arecord -D hw:${CARD_INDEX},${DEVICE_INDEX} \ + -f S16_LE -r 48000 -c 1 \ + -d 0.1 --dump-hw-params 2>/dev/null) + +echo "$MIC_HW" | grep -E "FORMAT:|CHANNELS:|RATE:" +echo + +# ---------------------------------- +# Playback HW params (forced safe) +# ---------------------------------- +echo "🔈 SPEAKER HARDWARE PARAMETERS (PowerConf)" + +SPK_HW=$(aplay -D hw:${CARD_INDEX},${DEVICE_INDEX} \ + -f S16_LE -r 48000 -c 1 \ + -d 0.1 --dump-hw-params /dev/zero 2>/dev/null) + +echo "$SPK_HW" | grep -E "FORMAT:|CHANNELS:|RATE:" +echo + +# ---------------------------------- +# Extract clean values +# ---------------------------------- +FORMAT=$(echo "$MIC_HW" | grep -m1 "FORMAT" | awk '{print $2}') +RATE=$(echo "$MIC_HW" | grep -m1 "RATE" | awk '{print $2}') +CHANNELS_IN=$(echo "$MIC_HW" | grep -m1 "CHANNELS" | awk '{print $2}') +CHANNELS_OUT=$(echo "$SPK_HW" | grep -m1 "CHANNELS" | awk '{print $2}') + +FORMAT=${FORMAT:-S16_LE} +RATE=${RATE:-48000} +CHANNELS_IN=${CHANNELS_IN:-1} +CHANNELS_OUT=${CHANNELS_OUT:-1} +CHUNK=960 + +# ---------------------------------- +# Final verified Python config +# ---------------------------------- +echo "==============================" +echo "✅ VERIFIED POWERCONF PYTHON CONFIG" +echo "==============================" +echo "FORMAT = pyaudio.paInt16 # ${FORMAT}" +echo "RATE = ${RATE}" +echo "CHANNELS_IN = ${CHANNELS_IN}" +echo "CHANNELS_OUT = ${CHANNELS_OUT}" +echo "CHUNK = ${CHUNK}" +echo "INPUT_DEVICE = ${CARD_INDEX}" +echo "OUTPUT_DEVICE = ${CARD_INDEX}" +echo +echo "✔ PowerConf USB Audio" +echo "✔ Mono mic + Mono speaker" +echo "✔ hw:${CARD_INDEX},${DEVICE_INDEX}" +echo "==============================" diff --git a/shell_scripts/check_pulse_devices.sh b/shell_scripts/check_pulse_devices.sh new file mode 100755 index 0000000..533db6d --- /dev/null +++ b/shell_scripts/check_pulse_devices.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash + +echo "==============================" +echo "🔊 PulseAudio Devices Checker" +echo "==============================" +echo + +# تحقق أن PulseAudio شغال +if ! pactl info >/dev/null 2>&1; then + echo "❌ PulseAudio is NOT running" + echo "ℹ️ Use ALSA (hw:X,Y) instead" + exit 1 +fi + +echo "✅ PulseAudio is running" +echo + +# ------------------------------- +# List Sinks (Speakers) +# ------------------------------- +echo "🔈 AVAILABLE SINKS (Speakers)" +echo "------------------------------" +pactl list short sinks | awk '{printf "• Name: %-70s | Index: %s\n", $2, $1}' +echo + +# ------------------------------- +# List Sources (Microphones) +# ------------------------------- +echo "🎤 AVAILABLE SOURCES (Microphones)" +echo "----------------------------------" +pactl list short sources | awk '{printf "• Name: %-70s | Index: %s\n", $2, $1}' +echo + +# ------------------------------- +# Highlight PowerConf if exists +# ------------------------------- +echo "🔍 PowerConf Devices Found" +echo "--------------------------" + +FOUND=0 + +pactl list short sinks | grep -i powerconf && FOUND=1 +pactl list short sources | grep -i powerconf && FOUND=1 + +if [ "$FOUND" -eq 0 ]; then + echo "⚠️ PowerConf NOT found in PulseAudio" +else + echo "✅ PowerConf detected above" +fi + +echo +echo "==============================" diff --git a/shell_scripts/sanad.service b/shell_scripts/sanad.service new file mode 100644 index 0000000..b5c561a --- /dev/null +++ b/shell_scripts/sanad.service @@ -0,0 +1,31 @@ +# systemd user-level unit for Sanad. Install with: +# +# mkdir -p ~/.config/systemd/user +# cp ~/Sanad/shell_scripts/sanad.service ~/.config/systemd/user/sanad.service +# systemctl --user daemon-reload +# systemctl --user enable --now sanad.service +# sudo loginctl enable-linger unitree # run at boot even when not logged in +# +# Watch logs: +# journalctl --user -u sanad.service -f +# +# Restart after a code/config change: +# systemctl --user restart sanad.service + +[Unit] +Description=Sanad robot assistant (FastAPI dashboard + voice/motion subsystems) +After=network-online.target +Wants=network-online.target + +[Service] +Type=exec +WorkingDirectory=%h/Sanad +ExecStart=/usr/bin/env bash %h/Sanad/shell_scripts/start_sanad.sh +Restart=on-failure +RestartSec=5 +TimeoutStopSec=15 +KillSignal=SIGINT +PassEnvironment=PULSE_RUNTIME_PATH XDG_RUNTIME_DIR + +[Install] +WantedBy=default.target diff --git a/shell_scripts/set_powerconf_audio.sh b/shell_scripts/set_powerconf_audio.sh new file mode 100755 index 0000000..a1e2923 --- /dev/null +++ b/shell_scripts/set_powerconf_audio.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash + +set -e + +SINK="alsa_output.usb-Anker_PowerConf_A3321-DEV-SN1-01.analog-stereo" +SOURCE="alsa_input.usb-Anker_PowerConf_A3321-DEV-SN1-01.mono-fallback" + +echo "🔊 Checking PulseAudio..." +if ! pactl info >/dev/null 2>&1; then + echo "❌ PulseAudio is not running" + exit 1 +fi + +echo "✅ PulseAudio is running" + +echo "🎧 Setting default speaker → PowerConf" +pactl set-default-sink "$SINK" + +echo "🎤 Setting default microphone → PowerConf" +pactl set-default-source "$SOURCE" + +echo +echo "📋 Current PulseAudio defaults:" +pactl info | grep -E "Default Sink|Default Source" + +echo +echo "✅ PowerConf audio routing applied successfully" diff --git a/shell_scripts/start_sanad.sh b/shell_scripts/start_sanad.sh new file mode 100755 index 0000000..056a22c --- /dev/null +++ b/shell_scripts/start_sanad.sh @@ -0,0 +1,64 @@ +#!/usr/bin/env bash +# start_sanad.sh — boot Sanad's main.py inside the gemini_sdk conda env. +# +# Used both manually (./start_sanad.sh) and from the systemd unit +# (sanad.service) for boot-time auto-start. +# +# Override knobs (env vars; all optional): +# SANAD_HOME project root (default ~/Sanad) +# SANAD_CONDA_ENV conda env name (default gemini_sdk) +# SANAD_CONDA_BASE conda install dir (default $HOME/miniconda3) +# SANAD_DDS_INTERFACE DDS network iface (default eth0) +# SANAD_VOICE_BRAIN gemini | local | model (default gemini) +# SANAD_AUDIO_PROFILE builtin | anker | hollyland_builtin (default builtin) +# PORT dashboard port (default 8000) + +set -u + +SANAD_HOME="${SANAD_HOME:-$HOME/Sanad}" +SANAD_CONDA_ENV="${SANAD_CONDA_ENV:-gemini_sdk}" +SANAD_CONDA_BASE="${SANAD_CONDA_BASE:-$HOME/miniconda3}" + +export SANAD_DDS_INTERFACE="${SANAD_DDS_INTERFACE:-eth0}" +export SANAD_VOICE_BRAIN="${SANAD_VOICE_BRAIN:-gemini}" +export SANAD_AUDIO_PROFILE="${SANAD_AUDIO_PROFILE:-builtin}" +export PORT="${PORT:-8000}" + +# Mandatory environment fixes for Jetson + conda + Unitree SDK +export LD_PRELOAD="${LD_PRELOAD:-/usr/lib/aarch64-linux-gnu/libgomp.so.1}" +export PYTHONUNBUFFERED=1 +# Optional: needed if/when the local pipeline imports CosyVoice +if [ -d "$HOME/CosyVoice" ]; then + export PYTHONPATH="$HOME/CosyVoice:$HOME/CosyVoice/third_party/Matcha-TTS:${PYTHONPATH:-}" +fi + +cd "$SANAD_HOME" || { echo "Sanad dir not found: $SANAD_HOME" >&2; exit 1; } + +# Activate conda +if [ ! -f "$SANAD_CONDA_BASE/etc/profile.d/conda.sh" ]; then + echo "Conda not found at $SANAD_CONDA_BASE — set SANAD_CONDA_BASE" >&2 + exit 1 +fi +# shellcheck disable=SC1091 +source "$SANAD_CONDA_BASE/etc/profile.d/conda.sh" +conda activate "$SANAD_CONDA_ENV" || { + echo "Conda env '$SANAD_CONDA_ENV' activation failed" >&2 + exit 1 +} + +# Wait for the DDS interface to come up — robot may still be booting +for i in $(seq 1 20); do + if ip link show "$SANAD_DDS_INTERFACE" 2>/dev/null | grep -q "state UP"; then + break + fi + sleep 1 +done + +echo "[start_sanad] $(date) — launching main.py" +echo "[start_sanad] SANAD_HOME=$SANAD_HOME" +echo "[start_sanad] conda env=$SANAD_CONDA_ENV" +echo "[start_sanad] DDS iface=$SANAD_DDS_INTERFACE" +echo "[start_sanad] voice brain=$SANAD_VOICE_BRAIN audio=$SANAD_AUDIO_PROFILE" +echo "[start_sanad] port=$PORT" + +exec python3 main.py --port "$PORT" diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_smoke.py b/tests/test_smoke.py new file mode 100644 index 0000000..6db57ca --- /dev/null +++ b/tests/test_smoke.py @@ -0,0 +1,482 @@ +"""Smoke tests — verify imports, paths, basic instantiation, and isolation. + +Run with: + PYTHONPATH=/path/to/yslootahtech python3 -m unittest \ + Project.Sanad.tests.test_smoke -v + +These tests do NOT require any third-party dependency. They prove that +the project loads cleanly and that subsystem failures stay isolated. +""" + +from __future__ import annotations + +import os +import sys +import unittest +from pathlib import Path + + +class TestConfig(unittest.TestCase): + """Path resolution and config loading.""" + + def test_base_dir_auto_detect(self): + from Project.Sanad.config import BASE_DIR + self.assertTrue(BASE_DIR.exists(), f"BASE_DIR missing: {BASE_DIR}") + self.assertIn(BASE_DIR.name, ("Sanad", "Sanad_lite")) + + def test_data_dirs_exist(self): + from Project.Sanad.config import ( + DATA_DIR, MOTIONS_DIR, AUDIO_RECORDINGS_DIR, + MOTION_RECORDINGS_DIR, LOGS_DIR, SCRIPTS_DIR, + ) + for d in (DATA_DIR, MOTIONS_DIR, AUDIO_RECORDINGS_DIR, + MOTION_RECORDINGS_DIR, LOGS_DIR, SCRIPTS_DIR): + self.assertTrue(d.exists(), f"Missing: {d}") + + def test_skills_file_resolves(self): + from Project.Sanad.config import SKILLS_FILE, MOTIONS_DIR + self.assertEqual(SKILLS_FILE.parent, MOTIONS_DIR) + + def test_dds_interface_default(self): + from Project.Sanad.config import DDS_NETWORK_INTERFACE + self.assertIsInstance(DDS_NETWORK_INTERFACE, str) + + def test_env_override_dds_interface(self): + os.environ["SANAD_DDS_INTERFACE"] = "test_iface" + try: + import importlib + import Project.Sanad.config as cfg + importlib.reload(cfg) + self.assertEqual(cfg.DDS_NETWORK_INTERFACE, "test_iface") + finally: + del os.environ["SANAD_DDS_INTERFACE"] + import importlib + import Project.Sanad.config as cfg + importlib.reload(cfg) + + def test_load_config_handles_missing(self): + from Project.Sanad.config import load_config + result = load_config() + self.assertIsInstance(result, dict) + + def test_local_tts_paths(self): + from Project.Sanad.config import ( + LOCAL_TTS_MODEL_PATH, LOCAL_TTS_HIFIGAN_PATH, LOCAL_TTS_XVECTOR_PATH, + ) + self.assertIn("speecht5_tts_clartts_ar", LOCAL_TTS_MODEL_PATH) + self.assertIn("speecht5_hifigan", LOCAL_TTS_HIFIGAN_PATH) + + +class TestSkillRegistry(unittest.TestCase): + """SkillRegistry CRUD + atomic writes + validation.""" + + def setUp(self): + from Project.Sanad.core.skill_registry import SkillRegistry + self.sr = SkillRegistry() + + def test_load(self): + skills = self.sr.list_skills() + self.assertIsInstance(skills, list) + + def test_invalid_sync_mode_rejected(self): + from Project.Sanad.core.skill_registry import Skill + bad = Skill(id="test_invalid", sync_mode="garbage") + with self.assertRaises(ValueError): + self.sr.add(bad) + + def test_update_missing_returns_none(self): + result = self.sr.update("nonexistent_id_12345", {"description": "x"}) + self.assertIsNone(result) + + def test_delete_missing_returns_none(self): + result = self.sr.delete("nonexistent_id_12345") + self.assertIsNone(result) + + +class TestEventBus(unittest.TestCase): + """EventBus emit_sync handles missing event loop and async handlers.""" + + def test_emit_sync_no_handlers(self): + from Project.Sanad.core.event_bus import EventBus + bus = EventBus() + # Should not raise + bus.emit_sync("nonexistent.event", value=1) + + def test_emit_sync_sync_handler(self): + from Project.Sanad.core.event_bus import EventBus + bus = EventBus() + captured = [] + bus.on("test.event", lambda **kw: captured.append(kw)) + bus.emit_sync("test.event", value=42) + self.assertEqual(captured, [{"value": 42}]) + + def test_emit_sync_async_handler_no_loop(self): + from Project.Sanad.core.event_bus import EventBus + bus = EventBus() + async def handler(**kw): + pass + bus.on("test.async", handler) + # No running loop — should warn but not crash + bus.emit_sync("test.async", value=1) + + def test_handler_exception_isolated(self): + from Project.Sanad.core.event_bus import EventBus + bus = EventBus() + results = [] + def good(**kw): + results.append("ok") + def bad(**kw): + raise RuntimeError("intentional") + bus.on("test.iso", bad) + bus.on("test.iso", good) + bus.emit_sync("test.iso", x=1) + # Good handler still ran + self.assertEqual(results, ["ok"]) + + +class TestBrainCallbackWhitelist(unittest.TestCase): + """Brain._resolve_callback rejects non-whitelisted modules (RCE block).""" + + def setUp(self): + from Project.Sanad.core.brain import Brain + self.brain = Brain() + + def test_rce_blocked_os(self): + cb = self.brain._resolve_callback("os:system") + self.assertIsNone(cb, "os:system must be rejected") + + def test_rce_blocked_subprocess(self): + cb = self.brain._resolve_callback("subprocess:run") + self.assertIsNone(cb, "subprocess:run must be rejected") + + def test_rce_blocked_eval(self): + cb = self.brain._resolve_callback("builtins:eval") + self.assertIsNone(cb) + + def test_empty_callback_returns_none(self): + self.assertIsNone(self.brain._resolve_callback("")) + self.assertIsNone(self.brain._resolve_callback(None)) + + def test_invalid_format_returns_none(self): + self.assertIsNone(self.brain._resolve_callback("no_colon")) + + def test_whitelisted_prefix_attempted(self): + # Module doesn't exist but the prefix is allowed — must NOT be rejected + # by the whitelist (it'll fail at import_module instead) + cb = self.brain._resolve_callback("Project.Sanad.voice.nonexistent_module:fn") + self.assertIsNone(cb) # fails at import, not at whitelist + + +class TestWakePhraseMatching(unittest.TestCase): + """Token-based phrase matching — no false positives on substrings.""" + + def test_no_false_positive_substring(self): + from Project.Sanad.voice.text_utils import match_phrase + # 'this' contains 'hi' as substring — must NOT match + result = match_phrase("this is a test", {"hi_action": {"hi"}}) + self.assertIsNone(result) + + def test_exact_word_match(self): + from Project.Sanad.voice.text_utils import match_phrase + result = match_phrase("hi there friend", {"greet": {"hi"}}) + self.assertEqual(result, "greet") + + def test_multi_word_phrase_all_required(self): + from Project.Sanad.voice.text_utils import match_phrase + # All words must appear + result = match_phrase("please shake hands", {"act": {"shake hands"}}) + self.assertEqual(result, "act") + result = match_phrase("just shake", {"act": {"shake hands"}}) + self.assertIsNone(result) + + def test_longest_phrase_wins(self): + from Project.Sanad.voice.text_utils import match_phrase + sets = { + "short": {"hi"}, + "long": {"hi five"}, + } + # When both match, prefer the more-specific (longer) phrase + result = match_phrase("hi five there", sets) + self.assertEqual(result, "long") + + def test_normalize_arabic(self): + from Project.Sanad.voice.text_utils import normalize_arabic + out = normalize_arabic("مَرْحَبًا") + self.assertNotIn("\u064b", out) # tashkeel removed + + +class TestSafeIO(unittest.TestCase): + """Path traversal protection + atomic writes.""" + + def test_safe_filename_strips_traversal(self): + from Project.Sanad.dashboard.routes._safe_io import safe_filename + with self.assertRaises(Exception): + safe_filename("..") + with self.assertRaises(Exception): + safe_filename("") + with self.assertRaises(Exception): + safe_filename(None) + # Embedded path components should be stripped to basename + self.assertEqual(safe_filename("../../etc/passwd"), "passwd") + self.assertEqual(safe_filename("foo.wav"), "foo.wav") + + def test_safe_path_under_blocks_escape(self): + from Project.Sanad.dashboard.routes._safe_io import safe_path_under + from Project.Sanad.config import MOTIONS_DIR + # Normal name → ok + p = safe_path_under(MOTIONS_DIR, "foo.jsonl") + self.assertTrue(str(p).startswith(str(MOTIONS_DIR.resolve()))) + # Traversal attempt → rejected (basename strip means it's just "passwd" + # under MOTIONS_DIR, which is safe) + p2 = safe_path_under(MOTIONS_DIR, "../../etc/passwd") + self.assertTrue(str(p2).startswith(str(MOTIONS_DIR.resolve()))) + + def test_atomic_write_text(self): + import tempfile + from Project.Sanad.dashboard.routes._safe_io import atomic_write_text + with tempfile.TemporaryDirectory() as td: + target = Path(td) / "test.txt" + atomic_write_text(target, "hello\nworld") + self.assertEqual(target.read_text(), "hello\nworld") + + def test_atomic_write_json(self): + import tempfile + import json + from Project.Sanad.dashboard.routes._safe_io import atomic_write_json + with tempfile.TemporaryDirectory() as td: + target = Path(td) / "test.json" + atomic_write_json(target, {"a": 1, "b": [1, 2, 3]}) + self.assertEqual(json.loads(target.read_text()), {"a": 1, "b": [1, 2, 3]}) + + +class TestGeminiClientStructure(unittest.TestCase): + """GeminiVoiceClient structural tests — no actual websocket.""" + + def setUp(self): + try: + from Project.Sanad.gemini.client import GeminiVoiceClient + self.client = GeminiVoiceClient() + except ImportError: + self.skipTest("websockets not installed") + + def test_initial_state(self): + self.assertFalse(self.client.connected) + self.assertIsNone(self.client.session_owner) + + def test_send_audio_chunk_when_disconnected(self): + """Should return False, not raise — the no-op-forever bug is fixed.""" + import asyncio + result = asyncio.run(self.client.send_audio_chunk("dGVzdA==")) + self.assertFalse(result) + + def test_acquire_session_returns_guard(self): + """acquire_session should be sync and return a context manager.""" + guard = self.client.acquire_session("test") + # Has __aenter__/__aexit__ + self.assertTrue(hasattr(guard, "__aenter__")) + self.assertTrue(hasattr(guard, "__aexit__")) + + def test_session_lock_exclusive(self): + """Two consecutive acquires should serialize, not deadlock.""" + import asyncio + + events = [] + + async def consumer(name): + async with self.client.acquire_session(name): + events.append(f"{name}:enter") + await asyncio.sleep(0.05) + events.append(f"{name}:exit") + + async def runner(): + await asyncio.gather(consumer("A"), consumer("B")) + + asyncio.run(runner()) + # Either A fully runs then B, or B fully runs then A — never interleaved + self.assertIn("A:enter", events) + self.assertIn("B:enter", events) + # The exit of one comes before the enter of the other + a_exit = events.index("A:exit") + b_enter = events.index("B:enter") + a_enter = events.index("A:enter") + b_exit = events.index("B:exit") + ok1 = (a_enter < a_exit < b_enter < b_exit) + ok2 = (b_enter < b_exit < a_enter < a_exit) + self.assertTrue(ok1 or ok2, f"Lock not exclusive: {events}") + + +class TestAudioDevices(unittest.TestCase): + """audio_devices module — pure helpers tested without pactl.""" + + def test_profiles_defined(self): + from Project.Sanad.voice import audio_devices as ad + self.assertGreater(len(ad.PROFILES), 0) + ids = {p.id for p in ad.PROFILES} + self.assertIn("anker_powerconf", ids) + self.assertIn("hollyland_builtin", ids) + self.assertIn("builtin", ids) + + def test_pactl_available_no_crash(self): + from Project.Sanad.voice import audio_devices as ad + # Should not raise, just return False if pactl missing + result = ad.pactl_available() + self.assertIsInstance(result, bool) + + def test_find_first_match(self): + from Project.Sanad.voice import audio_devices as ad + items = [ + {"name": "alsa_output.usb-Anker_PowerConf_A3321-DEV-SN1-01.analog-stereo", + "description": "", "index": "0"}, + {"name": "alsa_output.platform-sound.analog-stereo", + "description": "", "index": "1"}, + {"name": "alsa_input.usb-Shenzhen_Hollyland_Technology_Co._Ltd_Wireless_microphone_C63X223T6MX-01.analog-stereo", + "description": "", "index": "5"}, + ] + # Anker pattern matches (multi-pattern: "powerconf,anker") + m = ad.find_first_match(items, "powerconf,anker") + self.assertIsNotNone(m) + self.assertIn("Anker_PowerConf", m["name"]) + # Built-in pattern matches + m2 = ad.find_first_match(items, "platform-sound") + self.assertIsNotNone(m2) + # Hollyland — matches by "hollyland" OR "wireless_microphone" + m3 = ad.find_first_match(items, "hollyland,wireless_microphone") + self.assertIsNotNone(m3) + self.assertIn("Hollyland", m3["name"]) + # Case-insensitive + m4 = ad.find_first_match(items, "HOLLYLAND") + self.assertIsNotNone(m4) + # No match returns None + m5 = ad.find_first_match(items, "nonexistent") + self.assertIsNone(m5) + # Empty pattern returns None + m6 = ad.find_first_match(items, "") + self.assertIsNone(m6) + # Different USB port — Anker on SN2-03 instead of SN1-01 + items_port2 = [ + {"name": "alsa_output.usb-Anker_PowerConf_A3321-DEV-SN2-03.analog-stereo", + "description": "", "index": "0"}, + ] + m7 = ad.find_first_match(items_port2, "powerconf,anker") + self.assertIsNotNone(m7, "Must match Anker regardless of USB port suffix") + + def test_profile_detection_with_jetson_devices(self): + """Simulate the exact PulseAudio names from the G1 Jetson and verify + all three profiles match correctly.""" + from Project.Sanad.voice import audio_devices as ad + + fake_sinks = [ + {"name": "alsa_output.platform-sound.analog-stereo", "description": "", "index": "0"}, + {"name": "alsa_output.usb-Anker_PowerConf_A3321-DEV-SN1-01.analog-stereo", "description": "", "index": "1"}, + ] + fake_sources = [ + {"name": "alsa_output.platform-sound.analog-stereo.monitor", "description": "", "index": "0"}, + {"name": "alsa_input.platform-sound.analog-stereo", "description": "", "index": "1"}, + {"name": "alsa_output.usb-Anker_PowerConf_A3321-DEV-SN1-01.analog-stereo.monitor", "description": "", "index": "3"}, + {"name": "alsa_input.usb-Anker_PowerConf_A3321-DEV-SN1-01.mono-fallback", "description": "", "index": "4"}, + {"name": "alsa_input.usb-Shenzhen_Hollyland_Technology_Co._Ltd_Wireless_microphone_C63X223T6MX-01.analog-stereo", "description": "", "index": "5"}, + ] + + # Patch list_sinks/list_sources + orig_sinks = ad.list_sinks + orig_sources = ad.list_sources + ad.list_sinks = lambda: fake_sinks + ad.list_sources = lambda: fake_sources + try: + detected = ad.detect_plugged_profiles() + detected_ids = [d["profile"]["id"] for d in detected] + self.assertIn("hollyland_builtin", detected_ids, + "Hollyland + built-in must be detected") + self.assertIn("anker_powerconf", detected_ids, + "Anker PowerConf must be detected") + self.assertIn("builtin", detected_ids, + "Built-in must be detected") + + # Verify Hollyland gets the correct sink and source + holly = next(d for d in detected if d["profile"]["id"] == "hollyland_builtin") + self.assertIn("platform-sound", holly["sink"]["name"]) + self.assertIn("Hollyland", holly["source"]["name"]) + + # Verify Anker gets Anker sink AND Anker source (not built-in) + anker = next(d for d in detected if d["profile"]["id"] == "anker_powerconf") + self.assertIn("PowerConf", anker["sink"]["name"]) + self.assertIn("PowerConf", anker["source"]["name"]) + finally: + ad.list_sinks = orig_sinks + ad.list_sources = orig_sources + + def test_profile_detection_different_usb_port(self): + """Verify that Anker is detected even when plugged into a different port + (serial suffix changes from SN1-01 to SN2-03).""" + from Project.Sanad.voice import audio_devices as ad + + fake_sinks = [ + {"name": "alsa_output.usb-Anker_PowerConf_A3321-DEV-SN2-03.analog-stereo", "description": "", "index": "1"}, + ] + fake_sources = [ + {"name": "alsa_input.usb-Anker_PowerConf_A3321-DEV-SN2-03.mono-fallback", "description": "", "index": "4"}, + ] + + orig_sinks = ad.list_sinks + orig_sources = ad.list_sources + ad.list_sinks = lambda: fake_sinks + ad.list_sources = lambda: fake_sources + try: + detected = ad.detect_plugged_profiles() + detected_ids = [d["profile"]["id"] for d in detected] + self.assertIn("anker_powerconf", detected_ids, + "Anker must be detected regardless of USB port") + finally: + ad.list_sinks = orig_sinks + ad.list_sources = orig_sources + + def test_status_no_crash_without_pactl(self): + from Project.Sanad.voice import audio_devices as ad + # Should return a dict even without pactl + s = ad.status() + self.assertIsInstance(s, dict) + self.assertIn("pactl_available", s) + self.assertIn("current", s) + self.assertIn("profiles", s) + # current always has these keys + cur = s["current"] + self.assertIn("sink", cur) + self.assertIn("source", cur) + self.assertIn("source_kind", cur) + + def test_load_save_state_atomic(self): + from Project.Sanad.voice import audio_devices as ad + # Round-trip + original = ad.load_state() + try: + ad.save_state({"profile_id": "_test_unit", "sink": "x", "source": "y"}) + self.assertEqual(ad.load_state()["profile_id"], "_test_unit") + finally: + ad.save_state(original) + + def test_select_unknown_profile_rejected(self): + from Project.Sanad.voice import audio_devices as ad + result = ad.select_profile("definitely_not_a_real_profile") + self.assertFalse(result["ok"]) + + +class TestIsolation(unittest.TestCase): + """Failure isolation: one missing dep doesn't take down others.""" + + def test_main_module_imports_with_missing_deps(self): + """main.py must import even when third-party deps are missing.""" + import importlib + if "Project.Sanad.main" in sys.modules: + del sys.modules["Project.Sanad.main"] + m = importlib.import_module("Project.Sanad.main") + # Critical subsystems must be present even with missing deps + self.assertIsNotNone(m.brain, "brain must always be available") + + def test_subsystem_status_reported(self): + import Project.Sanad.main as m + self.assertTrue(hasattr(m, "SUBSYSTEMS")) + self.assertIn("brain", m.SUBSYSTEMS) + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/voice/__init__.py b/voice/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/voice/audio_devices.py b/voice/audio_devices.py new file mode 100644 index 0000000..acc39be --- /dev/null +++ b/voice/audio_devices.py @@ -0,0 +1,495 @@ +"""Audio device profiles + pactl detection + selection persistence. + +Manages multiple audio device profiles (generic built-in, Hollyland wireless +mic + built-in speaker, Anker PowerConf) and lets the dashboard switch +between them at runtime. Selection is persisted to data/audio_device.json +so the choice survives restart. + +Resolution policy: + 1. User-selected profile (from data/audio_device.json) — if its sink/source + is currently plugged in, use it. + 2. Auto-detected profile based on what is currently plugged in. + 3. Built-in fallback. + +Each profile has: + - id: short identifier + - label: human-readable name + - match: substring used to find the actual pactl name (since exact names + contain serial numbers and may differ between machines) + - sink_pattern: substring matched against pactl sink names + - source_pattern: substring matched against pactl source names + - sample_rate / channels (optional defaults — read by AudioManager) +""" + +from __future__ import annotations + +import json +import os +import subprocess +import tempfile +import threading +from dataclasses import dataclass, asdict +from pathlib import Path +from typing import Any + +from Project.Sanad.config import DATA_DIR +from Project.Sanad.core.logger import get_logger + +log = get_logger("audio_devices") + +DEVICE_STATE_FILE = DATA_DIR / "audio_device.json" +_LOCK = threading.Lock() + + +@dataclass +class AudioProfile: + id: str + label: str + sink_pattern: str # substring used to find a sink + source_pattern: str # substring used to find a source + description: str = "" + sink_sample_rate: int = 0 # 0 = use device default + source_sample_rate: int = 0 + + +# Built-in device profiles. +# +# MATCHING RULES: +# - Patterns are matched case-insensitively against the FULL PulseAudio name. +# - Multiple patterns per field: comma-separated → match ANY. +# - PulseAudio names change depending on the USB port, so we match the +# product-name portion only (not the serial or port suffix). +# - Order matters: the FIRST profile whose sink AND source both match +# becomes the auto-default when no explicit selection is saved. +# +# Example PulseAudio names: +# alsa_output.platform-sound.analog-stereo (built-in speaker) +# alsa_input.platform-sound.analog-stereo (built-in mic) +# alsa_output.usb-Anker_PowerConf_A3321-DEV-SN1-01.analog-stereo (Anker speaker — SN1-01 is port-dependent) +# alsa_input.usb-Anker_PowerConf_A3321-DEV-SN1-01.mono-fallback (Anker mic) +# alsa_input.usb-Shenzhen_Hollyland_Technology_Co._Ltd_Wireless_microphone_C63X223T6MX-01.analog-stereo +# (Hollyland mic — C63X... is serial-dependent) + +PROFILES: list[AudioProfile] = [ + AudioProfile( + id="builtin", + label="Built-in mic + speaker", + sink_pattern="platform-sound", + source_pattern="alsa_input.platform-sound", + description="Jetson / G1 built-in audio chip. (Default)", + ), + AudioProfile( + id="hollyland_builtin", + label="Hollyland mic + built-in speaker", + sink_pattern="platform-sound", + source_pattern="hollyland,wireless_microphone", + description="Hollyland wireless lavalier microphone with the Jetson built-in speaker.", + ), + AudioProfile( + id="anker_powerconf", + label="Anker PowerConf (mic + speaker)", + sink_pattern="powerconf,anker", + source_pattern="powerconf,anker", + description="Anker PowerConf USB conference unit — mic + speaker on the same device.", + ), +] + +# The profile that should be used when no saved state and no auto-detect succeeds. +DEFAULT_PROFILE_ID = "builtin" + +PROFILES_BY_ID: dict[str, AudioProfile] = {p.id: p for p in PROFILES} + + +# ───────────────────────── pactl helpers ───────────────────────── + +def _run_pactl(args: list[str], timeout: float = 1.0) -> subprocess.CompletedProcess[str]: + return subprocess.run( + ["pactl", *args], + check=False, + text=True, + capture_output=True, + timeout=timeout, + ) + + +def pactl_available() -> bool: + try: + r = _run_pactl(["info"]) + return r.returncode == 0 + except (FileNotFoundError, subprocess.SubprocessError): + return False + + +def list_sinks() -> list[dict[str, str]]: + """Return [{name, description, index}] for every sink.""" + return _list_kind("sinks") + + +def list_sources() -> list[dict[str, str]]: + return _list_kind("sources") + + +def _list_kind(kind: str) -> list[dict[str, str]]: + out: list[dict[str, str]] = [] + try: + short = _run_pactl(["list", "short", kind]) + except (FileNotFoundError, subprocess.SubprocessError) as exc: + log.warning("pactl list %s failed: %s", kind, exc) + return out + if short.returncode != 0: + return out + for raw in (short.stdout or "").splitlines(): + parts = raw.split("\t") + if len(parts) < 2: + parts = raw.split() + if len(parts) < 2: + continue + idx, name = parts[0], parts[1] + out.append({"index": idx, "name": name, "description": _description_for(kind, name)}) + return out + + +def _description_for(kind: str, name: str) -> str: + """Best-effort `pactl list s` to extract Description.""" + try: + r = _run_pactl(["list", kind]) + except (FileNotFoundError, subprocess.SubprocessError): + return "" + if r.returncode != 0: + return "" + block: list[str] = [] + found = False + for line in (r.stdout or "").splitlines(): + if line.startswith(("Sink #", "Source #")): + if found: + break + block = [] + elif line.strip().startswith("Name:") and line.strip().endswith(name): + found = True + block.append(line) + if not found: + return "" + for line in block: + s = line.strip() + if s.startswith("Description:"): + return s.split(":", 1)[1].strip() + return "" + + +def get_default_sink() -> str: + try: + r = _run_pactl(["get-default-sink"]) + return (r.stdout or "").strip() if r.returncode == 0 else "" + except (FileNotFoundError, subprocess.SubprocessError): + return "" + + +def get_default_source() -> str: + try: + r = _run_pactl(["get-default-source"]) + return (r.stdout or "").strip() if r.returncode == 0 else "" + except (FileNotFoundError, subprocess.SubprocessError): + return "" + + +def set_default_sink(name: str) -> bool: + try: + r = _run_pactl(["set-default-sink", name]) + return r.returncode == 0 + except (FileNotFoundError, subprocess.SubprocessError): + return False + + +def set_default_source(name: str) -> bool: + try: + r = _run_pactl(["set-default-source", name]) + return r.returncode == 0 + except (FileNotFoundError, subprocess.SubprocessError): + return False + + +# ───────────────────────── matching ───────────────────────── + +def find_first_match(items: list[dict[str, str]], pattern: str, + exclude_monitors: bool = False) -> dict[str, str] | None: + """Return first item whose name (case-insensitive) contains ANY of the + comma-separated patterns. + + Example: pattern="powerconf,anker" matches any name containing + "powerconf" OR "anker" (case-insensitive). + + If exclude_monitors=True, skip PulseAudio monitor sources (names ending + in ".monitor") so we don't accidentally pick a loopback instead of a real mic. + """ + if not pattern: + return None + needles = [p.strip().lower() for p in pattern.split(",") if p.strip()] + if not needles: + return None + for it in items: + name_lower = it["name"].lower() + if exclude_monitors and name_lower.endswith(".monitor"): + continue + for needle in needles: + if needle in name_lower: + return it + return None + + +def detect_plugged_profiles() -> list[dict[str, Any]]: + """Return all profiles whose sink AND source are currently plugged in.""" + sinks = list_sinks() + sources = list_sources() + detected: list[dict[str, Any]] = [] + for prof in PROFILES: + sink = find_first_match(sinks, prof.sink_pattern) + src = find_first_match(sources, prof.source_pattern, exclude_monitors=True) + if sink and src: + detected.append({ + "profile": asdict(prof), + "sink": sink, + "source": src, + }) + return detected + + +# ───────────────────────── persistence ───────────────────────── + +def load_state() -> dict[str, Any]: + """Load saved selection. Always returns a dict.""" + if not DEVICE_STATE_FILE.exists(): + return {} + try: + with open(DEVICE_STATE_FILE, "r", encoding="utf-8") as f: + return json.load(f) + except (json.JSONDecodeError, OSError) as exc: + log.warning("audio_device.json unreadable: %s", exc) + return {} + + +def save_state(state: dict[str, Any]) -> None: + """Atomic write of audio_device.json.""" + DEVICE_STATE_FILE.parent.mkdir(parents=True, exist_ok=True) + with _LOCK: + fd, tmp = tempfile.mkstemp( + prefix=f".{DEVICE_STATE_FILE.name}.", suffix=".tmp", + dir=str(DEVICE_STATE_FILE.parent), + ) + try: + with os.fdopen(fd, "w", encoding="utf-8") as f: + json.dump(state, f, indent=2) + os.replace(tmp, DEVICE_STATE_FILE) + except Exception: + try: + os.unlink(tmp) + except OSError: + pass + raise + + +# ───────────────────────── current selection ───────────────────────── + +def current_selection() -> dict[str, Any]: + """Resolve the currently active sink/source. + + Order: + 1. Saved profile selection (if its sink/source still plugged) + 2. Saved explicit sink/source pair + 3. DEFAULT profile (builtin) if detected + 4. First detected profile (in declaration order) + 5. pactl defaults + 6. Empty + """ + state = load_state() + + # Detected profiles snapshot + detected = detect_plugged_profiles() if pactl_available() else [] + detected_by_id = {d["profile"]["id"]: d for d in detected} + + # 1. Saved profile preference + saved_profile = state.get("profile_id") + if saved_profile and saved_profile in detected_by_id: + d = detected_by_id[saved_profile] + return { + "source_kind": "profile", + "profile": d["profile"], + "sink": d["sink"]["name"], + "source": d["source"]["name"], + "sink_description": d["sink"]["description"], + "source_description": d["source"]["description"], + } + + # 2. Saved explicit sink/source + if state.get("sink") and state.get("source"): + return { + "source_kind": "manual", + "profile": None, + "sink": state["sink"], + "source": state["source"], + "sink_description": "", + "source_description": "", + } + + # 3. Default profile if it is plugged in + if DEFAULT_PROFILE_ID in detected_by_id: + d = detected_by_id[DEFAULT_PROFILE_ID] + return { + "source_kind": "default", + "profile": d["profile"], + "sink": d["sink"]["name"], + "source": d["source"]["name"], + "sink_description": d["sink"]["description"], + "source_description": d["source"]["description"], + } + + # 4. First detected profile (in declaration order) + if detected: + d = detected[0] + return { + "source_kind": "auto", + "profile": d["profile"], + "sink": d["sink"]["name"], + "source": d["source"]["name"], + "sink_description": d["sink"]["description"], + "source_description": d["source"]["description"], + } + + # 5. pactl defaults (system-wide) + sink = get_default_sink() + source = get_default_source() + if sink and source: + return { + "source_kind": "pactl_default", + "profile": None, + "sink": sink, + "source": source, + "sink_description": "", + "source_description": "", + } + + # 6. Empty + return { + "source_kind": "none", + "profile": None, + "sink": "", + "source": "", + "sink_description": "", + "source_description": "", + } + + +# ───────────────────────── apply selection ───────────────────────── + +def apply_selection(sink: str, source: str) -> dict[str, Any]: + """Run pactl set-default-* and unmute. Returns {ok, errors}.""" + errors: list[str] = [] + if sink: + if not set_default_sink(sink): + errors.append(f"set-default-sink failed: {sink}") + else: + try: + _run_pactl(["set-sink-mute", sink, "0"]) + except (FileNotFoundError, subprocess.SubprocessError): + pass + if source: + if not set_default_source(source): + errors.append(f"set-default-source failed: {source}") + else: + try: + _run_pactl(["set-source-mute", source, "0"]) + except (FileNotFoundError, subprocess.SubprocessError): + pass + return {"ok": not errors, "errors": errors} + + +def apply_current_selection() -> dict[str, Any]: + """Resolve the current device selection (re-scanning all USB ports) and + apply it via pactl. Called at AudioManager startup and when devices change. + + This is the key function that makes audio work regardless of which USB + port the device is plugged into — it re-discovers on every call. + """ + if not pactl_available(): + return {"ok": False, "error": "pactl not available"} + cur = current_selection() + sink = cur.get("sink", "") + source = cur.get("source", "") + if not sink and not source: + return {"ok": False, "error": "no device resolved", "selection": cur} + result = apply_selection(sink, source) + result["selection"] = cur + if result["ok"]: + log.info("Audio applied — sink=%s source=%s (via %s)", + sink, source, cur.get("source_kind", "?")) + else: + log.warning("Audio apply partial — sink=%s source=%s errors=%s", + sink, source, result["errors"]) + return result + + +def select_profile(profile_id: str) -> dict[str, Any]: + """Switch to a named profile. Persists selection.""" + if profile_id not in PROFILES_BY_ID: + return {"ok": False, "error": f"Unknown profile: {profile_id}"} + + detected = detect_plugged_profiles() + detected_by_id = {d["profile"]["id"]: d for d in detected} + if profile_id not in detected_by_id: + return { + "ok": False, + "error": f"Profile '{profile_id}' is not currently plugged in", + "available": [d["profile"]["id"] for d in detected], + } + + d = detected_by_id[profile_id] + sink_name = d["sink"]["name"] + source_name = d["source"]["name"] + + apply_result = apply_selection(sink_name, source_name) + if not apply_result["ok"]: + return {"ok": False, "errors": apply_result["errors"]} + + save_state({ + "profile_id": profile_id, + "sink": sink_name, + "source": source_name, + }) + log.info("Selected audio profile: %s (sink=%s, source=%s)", profile_id, sink_name, source_name) + return { + "ok": True, + "profile": d["profile"], + "sink": sink_name, + "source": source_name, + } + + +def select_manual(sink: str, source: str) -> dict[str, Any]: + """Switch to an explicit sink/source pair (no profile).""" + apply_result = apply_selection(sink, source) + if not apply_result["ok"]: + return {"ok": False, "errors": apply_result["errors"]} + save_state({"profile_id": None, "sink": sink, "source": source}) + log.info("Selected manual audio: sink=%s source=%s", sink, source) + return {"ok": True, "sink": sink, "source": source} + + +# ───────────────────────── status ───────────────────────── + +def status() -> dict[str, Any]: + """One-shot status for the dashboard.""" + pa = pactl_available() + detected = detect_plugged_profiles() if pa else [] + detected_ids = [d["profile"]["id"] for d in detected] + cur = current_selection() + return { + "pactl_available": pa, + "current": cur, + "saved_state": load_state(), + "profiles": [asdict(p) for p in PROFILES], + "detected": detected, + "detected_ids": detected_ids, + "all_sinks": list_sinks() if pa else [], + "all_sources": list_sources() if pa else [], + "default_sink": get_default_sink() if pa else "", + "default_source": get_default_source() if pa else "", + } diff --git a/voice/audio_manager.py b/voice/audio_manager.py new file mode 100644 index 0000000..4f15b64 --- /dev/null +++ b/voice/audio_manager.py @@ -0,0 +1,317 @@ +"""Audio I/O manager — recording and playback via PyAudio. + +Handles microphone capture, speaker playback, and speaker-monitor recording. +Thread-safe; one playback at a time via play_lock. + +Device selection is dynamic — read from voice.audio_devices on each refresh. +""" + +from __future__ import annotations + +import json +import subprocess +import threading +import time +import wave +from pathlib import Path +from typing import Any + +try: + import numpy as np + _HAS_NUMPY = True +except ImportError: + np = None + _HAS_NUMPY = False + +try: + import pyaudio +except ImportError: + pyaudio = None # optional — only needed for local PCM playback + +# G1 AudioClient — used to route playback through the robot chest speaker +# via DDS `PlayStream` (the same pipe Gemini uses). Without this, WAV +# playback would go to the Jetson's built-in audio codec, which isn't +# wired to any audible output on the G1. +try: + from unitree_sdk2py.g1.audio.g1_audio_client import AudioClient + from unitree_sdk2py.g1.audio.g1_audio_api import ( + ROBOT_API_ID_AUDIO_STOP_PLAY, + ) + _HAS_G1_AUDIO = True +except ImportError: + AudioClient = None + ROBOT_API_ID_AUDIO_STOP_PLAY = 0 + _HAS_G1_AUDIO = False + +from Project.Sanad.config import ( + CHANNELS, + CHUNK_SIZE, + RECEIVE_SAMPLE_RATE, + SINK as DEFAULT_SINK, + SOURCE as DEFAULT_SOURCE, + MONITOR_SOURCE, +) +from Project.Sanad.core.logger import get_logger +from Project.Sanad.voice import audio_devices as ad + +log = get_logger("audio_manager") + +FORMAT = pyaudio.paInt16 if pyaudio else 8 + +# Cached current selection — updated by refresh_devices() +_DEVICE_LOCK = threading.Lock() +_current_sink = DEFAULT_SINK +_current_source = DEFAULT_SOURCE + + +def _run_pactl(args: list[str]) -> subprocess.CompletedProcess[str]: + return subprocess.run(["pactl", *args], check=True, text=True, capture_output=True) + + +def _resolve_devices() -> tuple[str, str]: + """Return current (sink, source) — falls back to config defaults.""" + try: + cur = ad.current_selection() + sink = cur.get("sink") or DEFAULT_SINK + source = cur.get("source") or DEFAULT_SOURCE + return sink, source + except Exception as exc: + log.warning("Could not resolve audio devices: %s", exc) + return DEFAULT_SINK, DEFAULT_SOURCE + + +def ensure_audio_defaults(): + """Re-scan all USB ports, resolve the active profile, set pactl defaults. + + This is called at startup AND before every playback/recording so that + even if the user unplugs/re-plugs a device into a different port, the + correct sink/source is always used. + """ + try: + result = ad.apply_current_selection() + cur = result.get("selection", {}) + sink = cur.get("sink", "") + source = cur.get("source", "") + with _DEVICE_LOCK: + global _current_sink, _current_source + _current_sink = sink or DEFAULT_SINK + _current_source = source or DEFAULT_SOURCE + except Exception as exc: + log.warning("Audio defaults not applied: %s", exc) + + +class AudioManager: + def __init__(self): + if pyaudio is None: + raise RuntimeError( + "pyaudio not installed — AudioManager cannot play local PCM. " + "Install with `pip install pyaudio` (needs portaudio headers), " + "or rely on the G1 speaker via AudioClient.PlayStream." + ) + self.pya = pyaudio.PyAudio() + self.play_lock = threading.Lock() + # Lazily-initialised G1 DDS audio client (for play_wav → chest speaker) + self._g1_audio_client: Any = None + # Resolve devices and set PulseAudio defaults at startup + self.refresh_devices() + ensure_audio_defaults() + + def _get_g1_audio_client(self): + """Return a cached G1 AudioClient (DDS) — creates on first use. + + Assumes `ChannelFactoryInitialize` has already been called (our + ArmController does this at startup on eth0). Returns None if the + Unitree SDK is unavailable or init fails. + """ + if not _HAS_G1_AUDIO: + return None + if self._g1_audio_client is not None: + return self._g1_audio_client + try: + c = AudioClient() + c.SetTimeout(5.0) + c.Init() + try: + c.SetVolume(100) + except Exception: + pass + self._g1_audio_client = c + log.info("G1 AudioClient initialized (for chest-speaker playback)") + except Exception as exc: + log.warning("G1 AudioClient init failed: %s", exc) + self._g1_audio_client = None + return self._g1_audio_client + + def refresh_devices(self) -> dict[str, str]: + """Re-read selected sink/source from audio_devices module.""" + sink, source = _resolve_devices() + with _DEVICE_LOCK: + global _current_sink, _current_source + _current_sink, _current_source = sink, source + log.info("AudioManager devices refreshed: sink=%s source=%s", sink, source) + return {"sink": sink, "source": source} + + @property + def current_sink(self) -> str: + with _DEVICE_LOCK: + return _current_sink + + @property + def current_source(self) -> str: + with _DEVICE_LOCK: + return _current_source + + def close(self): + self.pya.terminate() + + def sample_width(self) -> int: + return self.pya.get_sample_size(FORMAT) + + # -- playback -- + + def play_pcm(self, pcm_bytes: bytes, channels: int, sample_rate: int, sample_width: int): + with self.play_lock: + ensure_audio_defaults() + stream = self.pya.open( + format=self.pya.get_format_from_width(sample_width), + channels=channels, + rate=sample_rate, + output=True, + frames_per_buffer=CHUNK_SIZE, + ) + try: + frame_bytes = CHUNK_SIZE * channels * sample_width + for offset in range(0, len(pcm_bytes), frame_bytes): + stream.write(pcm_bytes[offset : offset + frame_bytes]) + finally: + stream.stop_stream() + stream.close() + + def play_wav(self, path: Path) -> dict[str, Any]: + """Play a WAV file through the G1 chest speaker via DDS when + available, falling back to the host PulseAudio sink otherwise. + + The G1's built-in audio (Jetson `platform-sound`) isn't wired to + any audible speaker — the robot's loudspeaker is only reachable + over DDS `AudioClient.PlayStream` (same pipe Gemini uses). + """ + with wave.open(str(path), "rb") as wf: + channels = wf.getnchannels() + sw = wf.getsampwidth() + rate = wf.getframerate() + data = wf.readframes(wf.getnframes()) + + # Prefer G1 chest speaker when the Unitree SDK is present + client = self._get_g1_audio_client() + if client is not None and _HAS_NUMPY and sw == 2: + self._play_pcm_via_g1(data, channels, rate) + else: + if client is None and _HAS_G1_AUDIO: + log.warning("G1 AudioClient unavailable, using host PulseAudio sink") + self.play_pcm(data, channels, rate, sw) + + duration = len(data) / (rate * channels * sw) if rate else 0 + return {"path": str(path), "duration_seconds": round(duration, 3)} + + # -- G1 DDS-routed playback -- + + _G1_STREAM_APP = "sanad_playback" + _G1_HW_RATE = 16_000 + + def stop_playback(self) -> None: + """Stop any in-flight G1 DDS audio stream. + + Used by the dashboard's Stop button to halt `play_wav` / + `_play_pcm_via_g1` mid-stream. Safe to call even when nothing + is playing — the DDS call is idempotent. + """ + client = self._get_g1_audio_client() + if client is None: + return + try: + client._Call( + ROBOT_API_ID_AUDIO_STOP_PLAY, + json.dumps({"app_name": self._G1_STREAM_APP}), + ) + log.info("G1 audio stream stopped (app=%s)", self._G1_STREAM_APP) + except Exception as exc: + log.warning("stop_playback failed: %s", exc) + + def _play_pcm_via_g1(self, pcm_bytes: bytes, channels: int, source_rate: int) -> None: + """Stream int16 PCM to the G1 chest speaker via AudioClient.PlayStream. + + Converts stereo → mono and resamples to 16 kHz (the rate + AudioClient expects). Uses a fresh stream_id per call so + back-to-back plays don't interfere. + """ + client = self._get_g1_audio_client() + if client is None: + raise RuntimeError("G1 AudioClient not available") + + arr = np.frombuffer(pcm_bytes, dtype=np.int16) + # Stereo → mono average + if channels == 2 and arr.size % 2 == 0: + arr = arr.reshape(-1, 2).mean(axis=1).astype(np.int16) + # Resample to 16 kHz + if source_rate != self._G1_HW_RATE and arr.size: + target_len = max(1, int(len(arr) * self._G1_HW_RATE / source_rate)) + arr = np.interp( + np.linspace(0, len(arr), target_len, endpoint=False), + np.arange(len(arr)), + arr.astype(np.float64), + ).astype(np.int16) + + stream_id = f"wav_{int(time.time() * 1000)}" + # Clear any lingering stream from a previous call + try: + client._Call(ROBOT_API_ID_AUDIO_STOP_PLAY, + json.dumps({"app_name": self._G1_STREAM_APP})) + except Exception: + pass + time.sleep(0.15) + + # Push the whole clip in one PlayStream — G1 handles buffering + with self.play_lock: + play_start = time.time() + client.PlayStream(self._G1_STREAM_APP, stream_id, arr.tobytes()) + total_sec = len(arr) / self._G1_HW_RATE + # Block until audio has drained (plus a small safety margin) + elapsed = time.time() - play_start + remaining = total_sec - elapsed + 0.3 + if remaining > 0: + time.sleep(remaining) + try: + client._Call(ROBOT_API_ID_AUDIO_STOP_PLAY, + json.dumps({"app_name": self._G1_STREAM_APP})) + except Exception: + pass + + # -- recording -- + + def record_mic(self, duration_sec: float) -> bytes: + """Record from default mic for *duration_sec* seconds, return raw PCM.""" + ensure_audio_defaults() + stream = self.pya.open( + format=FORMAT, + channels=CHANNELS, + rate=RECEIVE_SAMPLE_RATE, + input=True, + frames_per_buffer=CHUNK_SIZE, + ) + frames: list[bytes] = [] + total_chunks = int(RECEIVE_SAMPLE_RATE / CHUNK_SIZE * duration_sec) + try: + for _ in range(total_chunks): + frames.append(stream.read(CHUNK_SIZE, exception_on_overflow=False)) + finally: + stream.stop_stream() + stream.close() + return b"".join(frames) + + def save_wav(self, pcm_bytes: bytes, path: Path, channels: int, sample_rate: int): + path.parent.mkdir(parents=True, exist_ok=True) + with wave.open(str(path), "wb") as wf: + wf.setnchannels(channels) + wf.setsampwidth(self.sample_width()) + wf.setframerate(sample_rate) + wf.writeframes(pcm_bytes) diff --git a/voice/local_tts.py b/voice/local_tts.py new file mode 100644 index 0000000..115c21a --- /dev/null +++ b/voice/local_tts.py @@ -0,0 +1,128 @@ +"""Local Arabic TTS using MBZUAI/speecht5_tts_clartts_ar (SpeechT5 fine-tuned on CLArTTS). + +Loads model/vocoder/speaker-embedding from the local Model/ directory. +Lazy-loads on first call so the webserver starts quickly. + +Output: 16 kHz mono int16 PCM bytes (matching WAV conventions). +""" + +from __future__ import annotations + +import re +import threading +from pathlib import Path +from typing import Any + +# ── Local paths (all pre-downloaded under model/) — sourced from config ── +try: + from Project.Sanad.core.config_loader import section as _cfg_section + _TTS = _cfg_section("voice", "local_tts") +except Exception: + _TTS = {} + +_PROJECT_DIR = Path(__file__).resolve().parent.parent # Sanad/ +_MODEL_ROOT = _PROJECT_DIR / "model" +MODEL_DIR = _MODEL_ROOT / _TTS.get("model_subdir", "speecht5_tts_clartts_ar") +VOCODER_DIR = _MODEL_ROOT / _TTS.get("vocoder_subdir", "speecht5_hifigan") +XVECTOR_PATH = _MODEL_ROOT / _TTS.get("xvector_filename", "arabic_xvector_embedding.pt") + +MODEL_ID = str(MODEL_DIR) +VOCODER_ID = str(VOCODER_DIR) +SAMPLE_RATE = _TTS.get("sample_rate", 16000) +CHANNELS = _TTS.get("channels", 1) + +# Arabic diacritics (tashkeel) Unicode range – model was trained without them. +_DIACRITICS_RE = re.compile(r"[\u0617-\u061A\u064B-\u0652\u0670\u06D6-\u06ED]") + + +def strip_diacritics(text: str) -> str: + return _DIACRITICS_RE.sub("", text) + + +class LocalTTSEngine: + def __init__(self): + self._lock = threading.Lock() + self._loaded = False + self._processor = None + self._model = None + self._vocoder = None + self._speaker_embedding = None + + def _ensure_loaded(self): + if self._loaded: + return + with self._lock: + if self._loaded: + return + + for label, p in [("Model", MODEL_DIR), ("Vocoder", VOCODER_DIR), ("XVector", XVECTOR_PATH)]: + if not p.exists(): + raise RuntimeError(f"{label} not found at {p}") + + import torch + from transformers import ( + SpeechT5ForTextToSpeech, + SpeechT5HifiGan, + SpeechT5Processor, + ) + + self._processor = SpeechT5Processor.from_pretrained(MODEL_ID) + self._model = SpeechT5ForTextToSpeech.from_pretrained(MODEL_ID) + self._vocoder = SpeechT5HifiGan.from_pretrained(VOCODER_ID) + self._speaker_embedding = torch.load(str(XVECTOR_PATH), map_location="cpu") + + self._loaded = True + + @property + def ready(self) -> bool: + return self._loaded + + def status(self) -> dict[str, Any]: + return { + "loaded": self._loaded, + "model_dir": str(MODEL_DIR), + "vocoder_dir": str(VOCODER_DIR), + "xvector_path": str(XVECTOR_PATH), + "model_exists": MODEL_DIR.exists(), + "vocoder_exists": VOCODER_DIR.exists(), + "xvector_exists": XVECTOR_PATH.exists(), + "sample_rate": SAMPLE_RATE, + } + + def synthesize(self, text: str) -> bytes: + """Convert Arabic text to 16 kHz mono int16 PCM bytes.""" + self._ensure_loaded() + import torch + + clean_text = strip_diacritics(text.strip()) + if not clean_text: + raise RuntimeError("Text is empty after stripping diacritics.") + + inputs = self._processor(text=clean_text, return_tensors="pt") + + with torch.no_grad(): + speech = self._model.generate_speech( + inputs["input_ids"], + self._speaker_embedding, + vocoder=self._vocoder, + ) + + # speech is a 1-D float32 tensor in [-1, 1] at 16 kHz + pcm_float = speech.numpy() + # Convert float32 → int16 PCM bytes + pcm_int16 = (pcm_float * 32767).clip(-32768, 32767).astype("int16") + return pcm_int16.tobytes() + + def synthesize_wav(self, text: str) -> bytes: + """Return a complete WAV file (bytes) for the given text.""" + import io + import wave + + pcm = self.synthesize(text) + buf = io.BytesIO() + with wave.open(buf, "wb") as wf: + wf.setnchannels(CHANNELS) + wf.setsampwidth(2) # int16 + wf.setframerate(SAMPLE_RATE) + wf.writeframes(pcm) + return buf.getvalue() diff --git a/voice/text_utils.py b/voice/text_utils.py new file mode 100644 index 0000000..a73ce81 --- /dev/null +++ b/voice/text_utils.py @@ -0,0 +1,122 @@ +"""Arabic text normalization and voice-command phrase matching. + +Ported from gemini_interact/sanad_text_utils.py — unified for Sanad. +""" + +from __future__ import annotations + +import re +from pathlib import Path +from typing import Any + +# Arabic diacritics (tashkeel) — stripped for matching. +_DIACRITICS_RE = re.compile(r"[\u0617-\u061A\u064B-\u0652\u0670\u06D6-\u06ED]") +_AR_PUNCT = re.compile(r"[؟،؛]") +_NON_WORD = re.compile(r"[^\w\u0600-\u06FF\s]", re.UNICODE) +_MULTI_WS = re.compile(r"\s+") + + +def normalize_arabic(text: str) -> str: + """Normalize Arabic + English text for matching.""" + s = text.strip().lower() + s = _AR_PUNCT.sub(" ", s) + s = _NON_WORD.sub(" ", s) + s = _MULTI_WS.sub(" ", s) + # Hamza variants → bare alif + s = s.replace("\u0623", "\u0627") # أ → ا + s = s.replace("\u0625", "\u0627") # إ → ا + s = s.replace("\u0622", "\u0627") # آ → ا + # Ta marbuta / alif maqsoora + s = s.replace("\u0629", "\u0647") # ة → ه + s = s.replace("\u0649", "\u064A") # ى → ي + # Tatweel + s = s.replace("\u0640", "") + # Strip diacritics last + s = _DIACRITICS_RE.sub("", s) + return s.strip() + + +def strip_diacritics(text: str) -> str: + return _DIACRITICS_RE.sub("", text) + + +def load_phrase_map(filepath: str | Path) -> dict[str, set[str]]: + """Load a phrase file mapping command names to trigger phrases. + + Format (per command): + WAKE_PHRASES_shake_hand = { + "مصافحه", "handshake", "shake hands", + } + + Returns: {"shake_hand": {"مصافحه", "handshake", ...}, ...} + """ + path = Path(filepath) + if not path.exists(): + return {} + + content = path.read_text(encoding="utf-8") + result: dict[str, set[str]] = {} + current_name: str | None = None + current_phrases: set[str] = set() + + for raw_line in content.splitlines(): + line = raw_line.strip() + if not line or line.startswith("#"): + continue + + # Header: WAKE_PHRASES_shake_hand = { + header_match = re.match(r"WAKE_PHRASES_(\w+)\s*=\s*\{", line) + if header_match: + if current_name and current_phrases: + result[current_name] = current_phrases + current_name = header_match.group(1) + current_phrases = set() + continue + + # Closing brace + if line == "}": + if current_name and current_phrases: + result[current_name] = current_phrases + current_name = None + current_phrases = set() + continue + + # Phrase line: "some phrase", + phrase_match = re.match(r'"([^"]+)"', line) + if phrase_match and current_name is not None: + phrase = normalize_arabic(phrase_match.group(1)) + if phrase: + current_phrases.add(phrase) + + if current_name and current_phrases: + result[current_name] = current_phrases + + return result + + +def match_phrase(text: str, phrase_sets: dict[str, set[str]]) -> str | None: + """Return the command name if normalized *text* matches any phrase set. + + Token-set matching: every word of the phrase must appear as a whole + word in *text*. Prevents short phrases (e.g. 'hi') from matching + longer words (e.g. 'this'). + """ + norm = normalize_arabic(text) + if not norm: + return None + text_tokens = set(norm.split()) + if not text_tokens: + return None + best_command: str | None = None + best_len = 0 + for command_name, phrases in phrase_sets.items(): + for phrase in phrases: + phrase_tokens = phrase.split() + if not phrase_tokens: + continue + if all(t in text_tokens for t in phrase_tokens): + if len(phrase) > best_len: + best_command = command_name + best_len = len(phrase) + return best_command + diff --git a/voice/typed_replay.py b/voice/typed_replay.py new file mode 100644 index 0000000..6bea261 --- /dev/null +++ b/voice/typed_replay.py @@ -0,0 +1,802 @@ +"""Typed Replay Engine — send text to Gemini, play audio, capture + persist. + +Full-featured port of gemini_voice_v2/sanad_webserver.py's SanadReplayEngine: + - Generate audio via GeminiVoiceClient (reuses existing WebSocket client) + - Play via PulseAudio + optionally capture speaker output (what was actually + heard) via parec or PyAudio monitor-source + - Save two WAVs per record: speaker capture + Gemini raw output + - JSON record index with rename/delete/replay + - In-memory "last session" for quick replay without re-hitting Gemini +""" + +from __future__ import annotations + +import asyncio +import json +import os +import re +import shutil +import subprocess +import tempfile +import threading +import time +import wave +from dataclasses import asdict, dataclass, field +from datetime import datetime +from pathlib import Path +from typing import Any, Optional + +from Project.Sanad.config import ( + AUDIO_RECORDINGS_DIR, + CHANNELS, + CHUNK_SIZE, + RECEIVE_SAMPLE_RATE, + SINK as DEFAULT_SINK, + SOURCE as DEFAULT_SOURCE, + MONITOR_SOURCE as DEFAULT_MONITOR_SOURCE, +) +from Project.Sanad.core.logger import get_logger + +try: + import pyaudio +except ImportError: + pyaudio = None # degraded mode — can still generate, but not capture/play + +log = get_logger("typed_replay") + + +# ─── constants (from config/voice_config.json) ────────────────────── + +try: + from Project.Sanad.core.config_loader import section as _cfg_section + _TR = _cfg_section("voice", "typed_replay") +except Exception: + _TR = {} + +RECORD_INDEX_PATH = AUDIO_RECORDINGS_DIR / "records.json" +MONITOR_CHUNK_SIZE = _TR.get("monitor_chunk_size", CHUNK_SIZE) +MONITOR_TAIL_SEC = _TR.get("monitor_tail_sec", 0.2) +MAX_TEXT_LEN = _TR.get("max_text_len", 2000) + + +# ─── helpers ───────────────────────────────────────────────────────── + +def format_timestamp(dt: Optional[datetime] = None) -> str: + return (dt or datetime.now()).strftime("%Y-%m-%d %H:%M:%S") + + +def sanitize_record_name(name: str) -> str: + name = (name or "").strip() or f"record_{datetime.now():%Y%m%d_%H%M%S}" + name = re.sub(r"[^\w\-\u0600-\u06FF\s\.]", "_", name, flags=re.UNICODE) + name = re.sub(r"\s+", "_", name) + return name[:80] + + +def build_default_name(text: str) -> str: + stub = re.sub(r"\s+", "_", (text or "").strip()) + stub = re.sub(r"[^\w\u0600-\u06FF]", "", stub, flags=re.UNICODE) + stub = stub[:40] or "record" + stamp = datetime.now().strftime("%Y%m%d_%H%M%S") + return f"{stub}_{stamp}" + + +def audio_duration_seconds(pcm: bytes, sample_rate: int, channels: int, + sample_width: int) -> float: + if not pcm or sample_rate <= 0 or channels <= 0 or sample_width <= 0: + return 0.0 + return len(pcm) / (sample_rate * channels * sample_width) + + +def ensure_unique_record_stem(base_name: str, out_dir: Path) -> Path: + out_dir.mkdir(parents=True, exist_ok=True) + candidate = out_dir / sanitize_record_name(base_name) + counter = 0 + while True: + speaker = candidate.with_suffix(".wav") + raw = candidate.with_name(f"{candidate.name}_raw.wav") + if not speaker.exists() and not raw.exists(): + return candidate + counter += 1 + candidate = out_dir / f"{sanitize_record_name(base_name)}_{counter}" + + +def run_pactl(args: list[str]) -> subprocess.CompletedProcess[str]: + return subprocess.run( + ["pactl", *args], check=True, text=True, + capture_output=True, timeout=5, + ) + + +# ─── monitor recorders (speaker output capture) ────────────────────── + +class MonitorRecorder: + """Capture speaker output via PyAudio on the monitor source.""" + + def __init__(self, pya, device_config: dict[str, Any]): + self.pya = pya + self.device_config = device_config + self.frames: list[bytes] = [] + self._stop_event = threading.Event() + self._thread: Optional[threading.Thread] = None + self._stream = None + self._error: Optional[BaseException] = None + + def start(self): + if pyaudio is None: + raise RuntimeError("pyaudio unavailable — cannot capture speaker") + self._stop_event.clear() + self.frames = [] + self._stream = self.pya.open( + format=pyaudio.paInt16, + channels=self.device_config["channels"], + rate=self.device_config["rate"], + input=True, + input_device_index=self.device_config["index"], + frames_per_buffer=self.device_config["chunk_size"], + ) + self._thread = threading.Thread(target=self._loop, daemon=True) + self._thread.start() + time.sleep(0.05) + + def _loop(self): + while not self._stop_event.is_set(): + try: + data = self._stream.read( + self.device_config["chunk_size"], exception_on_overflow=False) + self.frames.append(data) + except Exception as exc: + if not self._stop_event.is_set(): + self._error = exc + break + + def stop(self) -> bytes: + time.sleep(MONITOR_TAIL_SEC) + self._stop_event.set() + if self._stream is not None: + try: + self._stream.stop_stream() + except Exception: + pass + try: + self._stream.close() + except Exception: + pass + if self._thread is not None: + self._thread.join(timeout=1.0) + if self._error is not None: + raise RuntimeError(f"Speaker capture failed: {self._error}") + return b"".join(self.frames) + + +class ParecMonitorRecorder: + """Capture speaker output via `parec` (PulseAudio CLI).""" + + def __init__(self, device_config: dict[str, Any]): + self.device_config = device_config + self.frames: list[bytes] = [] + self._stop_event = threading.Event() + self._thread: Optional[threading.Thread] = None + self._proc: Optional[subprocess.Popen[bytes]] = None + self._error: Optional[BaseException] = None + + def start(self): + cmd = [ + "parec", + f"--device={self.device_config['name']}", + "--format=s16le", + f"--rate={self.device_config['rate']}", + f"--channels={self.device_config['channels']}", + ] + self._proc = subprocess.Popen( + cmd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) + self._thread = threading.Thread(target=self._loop, daemon=True) + self._thread.start() + time.sleep(0.05) + + def _loop(self): + if self._proc is None or self._proc.stdout is None: + self._error = RuntimeError("parec did not start") + return + size = self.device_config["chunk_size"] * self.device_config["channels"] * 2 + while not self._stop_event.is_set(): + try: + data = self._proc.stdout.read(size) + if data: + self.frames.append(data) + continue + if self._proc.poll() is not None: + break + except Exception as exc: + if not self._stop_event.is_set(): + self._error = exc + break + + def stop(self) -> bytes: + time.sleep(MONITOR_TAIL_SEC) + self._stop_event.set() + if self._proc is not None and self._proc.poll() is None: + self._proc.terminate() + try: + self._proc.wait(timeout=1.0) + except subprocess.TimeoutExpired: + self._proc.kill() + if self._thread is not None: + self._thread.join(timeout=1.0) + if self._error is not None: + raise RuntimeError(f"parec capture failed: {self._error}") + return b"".join(self.frames) + + +# ─── session state ────────────────────────────────────────────────── + +@dataclass +class ReplaySessionState: + """Last generation kept in memory for replay/save-last.""" + text: str = "" + audio_bytes: bytes = b"" + speaker_capture: bytes = b"" + generated_at: str = "" + last_playback_at: str = "" + replay_count: int = 0 + saved_as: str = "" + + def as_status(self) -> dict[str, Any]: + return { + "text": self.text, + "has_audio": bool(self.audio_bytes), + "has_capture": bool(self.speaker_capture), + "generated_at": self.generated_at, + "last_playback_at": self.last_playback_at, + "replay_count": self.replay_count, + "saved_as": self.saved_as, + } + + +# ─── record index ─────────────────────────────────────────────────── + +def _load_index() -> dict[str, Any]: + if not RECORD_INDEX_PATH.exists(): + return {"total_records": 0, "records": []} + try: + payload = json.loads(RECORD_INDEX_PATH.read_text(encoding="utf-8")) + if not isinstance(payload, dict) or not isinstance(payload.get("records"), list): + raise ValueError("bad index structure") + payload.setdefault("total_records", len(payload["records"])) + return payload + except Exception as exc: + log.warning("record index unreadable, resetting: %s", exc) + return {"total_records": 0, "records": []} + + +def _save_index(payload: dict[str, Any]): + RECORD_INDEX_PATH.parent.mkdir(parents=True, exist_ok=True) + fd, tmp = tempfile.mkstemp(dir=str(RECORD_INDEX_PATH.parent), + suffix=".tmp") + try: + with os.fdopen(fd, "w", encoding="utf-8") as f: + json.dump(payload, f, indent=2, ensure_ascii=False) + os.replace(tmp, RECORD_INDEX_PATH) + except Exception: + try: + os.unlink(tmp) + except OSError: + pass + raise + + +def _resolve_record_path(path_str: str) -> Path: + """Resolve a path from the records index. + + Paths in records.json can be either: + - absolute (legacy — may be stale after scp to another machine) + - relative / basename — looked up under AUDIO_RECORDINGS_DIR + """ + if not path_str: + return AUDIO_RECORDINGS_DIR + p = Path(path_str) + if p.is_absolute(): + return p + return AUDIO_RECORDINGS_DIR / p + + +def _reconcile_index(payload: dict[str, Any]) -> dict[str, Any]: + """Drop records whose files no longer exist on disk.""" + surviving: list[dict[str, Any]] = [] + for entry in payload.get("records", []): + try: + speaker = _resolve_record_path( + entry["files"]["speaker_recording"]["path"]) + if speaker.exists(): + surviving.append(entry) + except (KeyError, TypeError): + continue + payload["records"] = surviving + payload["total_records"] = len(surviving) + return payload + + +def _build_file_info(path: Path, pcm: bytes, rate: int, + channels: int, sample_width: int) -> dict[str, Any]: + """Build a records.json file entry with a portable relative path. + + `path` can be an absolute path on disk — we store just the basename + so the index is portable across workstation ↔ robot. + """ + return { + "name": path.name, + "path": path.name, # basename only — resolved via _resolve_record_path + "size_bytes": len(pcm), + "sample_rate": rate, + "channels": channels, + "sample_width_bytes": sample_width, + "duration_seconds": round( + audio_duration_seconds(pcm, rate, channels, sample_width), 3), + } + + +# ─── engine ───────────────────────────────────────────────────────── + +class TypedReplayEngine: + """Full-featured typed replay — generate, play, capture, save, replay.""" + + def __init__(self, voice_client, audio_mgr): + """voice_client: GeminiVoiceClient audio_mgr: AudioManager""" + self.voice_client = voice_client + self.audio_mgr = audio_mgr + self.session = ReplaySessionState() + self._gen_lock = threading.Lock() + self._play_lock = threading.Lock() + self._monitor_config = self._resolve_monitor_config() + AUDIO_RECORDINGS_DIR.mkdir(parents=True, exist_ok=True) + + # ── monitor config ─────────────────────────────────────────── + def _resolve_monitor_config(self) -> Optional[dict[str, Any]]: + """Pick the backend for capturing speaker output. + + Priority: + 1. parec (cleanest — just listens to the speaker monitor source) + 2. PyAudio input device matching 'pulse' or 'default' + 3. None → capture disabled (generation still works) + """ + if shutil.which("parec"): + log.info("speaker capture: parec monitor=%s", DEFAULT_MONITOR_SOURCE) + return { + "backend": "parec", + "name": DEFAULT_MONITOR_SOURCE, + "rate": RECEIVE_SAMPLE_RATE, + "channels": CHANNELS, + "chunk_size": MONITOR_CHUNK_SIZE, + } + if pyaudio is None: + log.warning("speaker capture disabled — no parec and no pyaudio") + return None + try: + pya = self.audio_mgr.pya if self.audio_mgr else pyaudio.PyAudio() + except Exception: + return None + for i in range(pya.get_device_count()): + info = pya.get_device_info_by_index(i) + name = str(info.get("name", "")).lower() + if ("pulse" in name or "default" in name) and int(info.get("maxInputChannels", 0)) > 0: + log.info("speaker capture: pyaudio device=%s", info.get("name")) + return { + "backend": "pyaudio", + "index": i, + "name": str(info.get("name")), + "rate": int(info.get("defaultSampleRate", RECEIVE_SAMPLE_RATE)), + "channels": max(1, min(2, int(info.get("maxInputChannels", 1)))), + "chunk_size": MONITOR_CHUNK_SIZE, + } + log.warning("speaker capture disabled — no pulse/default pyaudio device") + return None + + def sample_width(self) -> int: + if pyaudio is None or self.audio_mgr is None or self.audio_mgr.pya is None: + return 2 # int16 + return self.audio_mgr.pya.get_sample_size(pyaudio.paInt16) + + # ── generation ─────────────────────────────────────────────── + + async def generate_audio(self, text: str) -> tuple[bytes, list[str]]: + """Route typed text through Gemini Live as the voice, first-try reliable. + + The session's system-prompt sets a persona ("You are Sanad…"), + so the prompt that most reliably gets audio out is a direct + address to the persona with the quoted text. A transparent + retry chain covers the edge cases where the model still + replies with text only. + """ + stripped = text.strip() + if not stripped: + raise ValueError("text cannot be empty") + + if self.voice_client is None: + raise RuntimeError("voice_client unavailable") + if not self.voice_client.connected: + await self.voice_client.connect() + + # Ordered by empirical reliability — first variant wins ~95% of turns. + # The quoted-phrase form is the most consistent trigger for an + # audio-only response with the current Sanad persona prompt. + attempts = [ + f'قل هذا بالضبط وبدون إضافات: "{stripped}"', # Arabic: "Say this exactly, no additions" + f'Say this exactly, nothing else: "{stripped}"', + f'"{stripped}"', + ] + last_parts: list[str] = [] + for idx, wrapped in enumerate(attempts, start=1): + try: + audio_bytes, text_parts = await self.voice_client.send_text( + wrapped, owner="typed_replay") + except Exception as exc: + log.warning("Gemini TTS attempt %d failed: %s", idx, exc) + continue + if audio_bytes: + if idx > 1: + log.info("Gemini TTS succeeded on attempt %d", idx) + return audio_bytes, text_parts + last_parts = text_parts + log.warning("Gemini TTS attempt %d returned no audio — parts: %s", + idx, " | ".join(text_parts or [])[:120]) + return b"", last_parts + + # ── playback + capture ─────────────────────────────────────── + def play_audio(self, audio_bytes: bytes, capture_speaker: bool) -> bytes: + """Play Gemini PCM through the G1 chest speaker (via DDS) when + available; fall back to the host PulseAudio sink otherwise. + + The DDS path is audible on the robot; the PulseAudio path goes + to the Jetson's built-in audio codec, which isn't wired to any + audible output on the G1. `capture_speaker` is only supported + on the PulseAudio path (DDS has no monitor sink). + """ + if not audio_bytes: + return b"" + if self.audio_mgr is None: + raise RuntimeError("audio_mgr unavailable — cannot play") + + # Try the G1 chest speaker first + g1_client = None + try: + g1_client = self.audio_mgr._get_g1_audio_client() + except Exception: + g1_client = None + + if g1_client is not None: + if capture_speaker: + log.info("capture_speaker requested, but G1 DDS path has no " + "PulseAudio monitor — capture skipped") + with self._play_lock: + try: + self.audio_mgr._play_pcm_via_g1( + audio_bytes, CHANNELS, RECEIVE_SAMPLE_RATE, + ) + return b"" + except Exception as exc: + log.warning("G1 speaker playback failed, falling back " + "to host PulseAudio sink: %s", exc) + # Fall through to the PyAudio path below + + if self.audio_mgr.pya is None: + raise RuntimeError("audio_mgr has no PyAudio backend for fallback") + + # Fallback: host PulseAudio sink (inaudible on G1, but keeps the + # capture-speaker feature working on desktop/dev setups). + with self._play_lock: + recorder = None + restore_source = False + if capture_speaker and self._monitor_config is not None: + if self._monitor_config["backend"] == "parec": + recorder = ParecMonitorRecorder(self._monitor_config) + else: + recorder = MonitorRecorder(self.audio_mgr.pya, self._monitor_config) + try: + run_pactl(["set-default-source", self._monitor_config["name"]]) + restore_source = True + except Exception as exc: + log.warning("couldn't switch default source to monitor: %s", exc) + + stream = None + try: + stream = self.audio_mgr.pya.open( + format=pyaudio.paInt16, + channels=CHANNELS, + rate=RECEIVE_SAMPLE_RATE, + output=True, + frames_per_buffer=CHUNK_SIZE, + ) + if recorder is not None: + recorder.start() + frame_bytes = CHUNK_SIZE * 2 + for offset in range(0, len(audio_bytes), frame_bytes): + stream.write(audio_bytes[offset:offset + frame_bytes]) + finally: + if stream is not None: + try: + stream.stop_stream() + finally: + stream.close() + + captured = b"" + try: + if recorder is not None: + captured = recorder.stop() + finally: + if restore_source: + try: + run_pactl(["set-default-source", DEFAULT_SOURCE]) + except Exception as exc: + log.warning("couldn't restore default source: %s", exc) + + return captured + + def save_audio(self, pcm: bytes, path: Path, channels: int, rate: int) -> None: + with wave.open(str(path), "wb") as wf: + wf.setnchannels(channels) + wf.setsampwidth(self.sample_width()) + wf.setframerate(rate) + wf.writeframes(pcm) + + def last_audio_wav(self) -> bytes: + """Wrap the cached PCM bytes as an in-memory WAV (for browser playback). + + Used by /api/typed-replay/audio/last to stream the most recent + generation to the client — keeps audio on the user's device instead + of the server speaker. + """ + if not self.session.audio_bytes: + raise RuntimeError("no cached generation — call say() first") + import io + buf = io.BytesIO() + with wave.open(buf, "wb") as wf: + wf.setnchannels(CHANNELS) + wf.setsampwidth(self.sample_width()) + wf.setframerate(RECEIVE_SAMPLE_RATE) + wf.writeframes(self.session.audio_bytes) + return buf.getvalue() + + # ── high-level API ─────────────────────────────────────────── + async def say(self, text: str, record: bool = False, + record_name: str = "") -> dict[str, Any]: + """Generate, play, capture, return metadata. Optionally persist.""" + if not text or not text.strip(): + raise ValueError("text cannot be empty") + if not self._gen_lock.acquire(blocking=False): + raise RuntimeError("another typed-replay generation is in progress") + try: + audio_bytes, text_parts = await self.generate_audio(text) + if not audio_bytes: + raise RuntimeError("Gemini returned no audio — parts: " + + " | ".join(text_parts or [])) + + generated_at = format_timestamp() + # Lite: audio is NOT played on the server. The client fetches + # /api/typed-replay/audio/last and plays it in the browser. + # speaker_capture stays empty — _persist_session falls back to + # writing raw bytes into both speaker.wav and raw.wav. + captured = b"" + playback_finished_at = generated_at + + # Update session state + self.session.text = text + self.session.audio_bytes = audio_bytes + self.session.speaker_capture = captured + self.session.generated_at = generated_at + self.session.last_playback_at = playback_finished_at + self.session.replay_count = 1 + self.session.saved_as = "" + + result = { + "ok": True, + "text": text, + "gemini_text": text_parts, + "generated_at": generated_at, + "playback_finished_at": playback_finished_at, + "raw_duration_sec": round( + audio_duration_seconds(audio_bytes, RECEIVE_SAMPLE_RATE, + CHANNELS, self.sample_width()), 3), + "captured_speaker_bytes": 0, + "audio_url": "/api/typed-replay/audio/last", + "recorded": False, + } + + if record: + entry = self._persist_session(record_name or build_default_name(text)) + self.session.saved_as = entry["record_name"] + result["record"] = entry + result["recorded"] = True + + return result + finally: + self._gen_lock.release() + + def replay_last(self) -> dict[str, Any]: + """Bump the replay counter — audio is played client-side via + /api/typed-replay/audio/last. Server no longer touches speakers. + """ + if not self.session.audio_bytes: + raise RuntimeError("no cached generation — call say() first") + self.session.replay_count += 1 + self.session.last_playback_at = format_timestamp() + return { + "ok": True, + "replay_count": self.session.replay_count, + "text": self.session.text, + "played_at": self.session.last_playback_at, + "audio_url": "/api/typed-replay/audio/last", + } + + def save_last(self, record_name: str = "") -> dict[str, Any]: + """Persist the last generation to the records index.""" + if not self.session.audio_bytes: + raise RuntimeError("no cached generation — call say() first") + entry = self._persist_session(record_name or build_default_name(self.session.text)) + self.session.saved_as = entry["record_name"] + return entry + + def _persist_session(self, record_name: str) -> dict[str, Any]: + base = ensure_unique_record_stem(record_name, AUDIO_RECORDINGS_DIR) + speaker_path = base.with_suffix(".wav") + raw_path = base.with_name(f"{base.name}_raw.wav") + + capture = self.session.speaker_capture + audio = self.session.audio_bytes + sw = self.sample_width() + + if capture: + cap_rate = (self._monitor_config or {}).get("rate", RECEIVE_SAMPLE_RATE) + cap_channels = (self._monitor_config or {}).get("channels", CHANNELS) + self.save_audio(capture, speaker_path, cap_channels, cap_rate) + else: + # No capture available → save raw as speaker too so every record + # has a .wav file for reconciliation checks. + self.save_audio(audio, speaker_path, CHANNELS, RECEIVE_SAMPLE_RATE) + cap_rate = RECEIVE_SAMPLE_RATE + cap_channels = CHANNELS + capture = audio + + self.save_audio(audio, raw_path, CHANNELS, RECEIVE_SAMPLE_RATE) + + entry = { + "record_name": base.name, + "text": self.session.text, + "replay_count": self.session.replay_count, + "timeline": { + "audio_generated_at": self.session.generated_at, + "last_playback_finished_at": self.session.last_playback_at, + "saved_at": format_timestamp(), + }, + "audio_capture": { + "backend": (self._monitor_config or {}).get("backend", "none"), + "sink": DEFAULT_SINK, + "monitor_source": DEFAULT_MONITOR_SOURCE, + "restored_microphone_source": DEFAULT_SOURCE, + }, + "files": { + "speaker_recording": _build_file_info( + speaker_path, capture, cap_rate, cap_channels, sw), + "gemini_raw_output": _build_file_info( + raw_path, audio, RECEIVE_SAMPLE_RATE, CHANNELS, sw), + }, + } + payload = _reconcile_index(_load_index()) + payload["records"].append(entry) + payload["total_records"] = len(payload["records"]) + _save_index(payload) + log.info("saved record %s (%.1fs speaker, %.1fs raw)", + base.name, + entry["files"]["speaker_recording"]["duration_seconds"], + entry["files"]["gemini_raw_output"]["duration_seconds"]) + return entry + + # ── records CRUD ───────────────────────────────────────────── + def list_records(self) -> dict[str, Any]: + return _reconcile_index(_load_index()) + + def find_record(self, name: str) -> dict[str, Any]: + for e in _load_index().get("records", []): + if e.get("record_name") == name: + return e + raise KeyError(f"record not found: {name}") + + def rename_record(self, name: str, new_name: str) -> dict[str, Any]: + new_name = sanitize_record_name(new_name) + if not new_name: + raise ValueError("new_name empty after sanitize") + payload = _reconcile_index(_load_index()) + target = None + for e in payload["records"]: + if e.get("record_name") == name: + target = e + break + if target is None: + raise KeyError(f"record not found: {name}") + if any(e.get("record_name") == new_name for e in payload["records"]): + raise ValueError(f"a record named {new_name} already exists") + + old_speaker = _resolve_record_path(target["files"]["speaker_recording"]["path"]) + old_raw = _resolve_record_path(target["files"]["gemini_raw_output"]["path"]) + new_base = AUDIO_RECORDINGS_DIR / new_name + new_speaker = new_base.with_suffix(".wav") + new_raw = new_base.with_name(f"{new_base.name}_raw.wav") + + old_speaker.rename(new_speaker) + old_raw.rename(new_raw) + + target["record_name"] = new_name + target["files"]["speaker_recording"]["path"] = new_speaker.name # basename only + target["files"]["speaker_recording"]["name"] = new_speaker.name + target["files"]["gemini_raw_output"]["path"] = new_raw.name + target["files"]["gemini_raw_output"]["name"] = new_raw.name + _save_index(payload) + if self.session.saved_as == name: + self.session.saved_as = new_name + return target + + def delete_record(self, name: str) -> dict[str, Any]: + payload = _reconcile_index(_load_index()) + target = None + for e in payload["records"]: + if e.get("record_name") == name: + target = e + break + if target is None: + raise KeyError(f"record not found: {name}") + for key in ("speaker_recording", "gemini_raw_output"): + path = _resolve_record_path(target["files"][key]["path"]) + try: + path.unlink() + except FileNotFoundError: + pass + except Exception as exc: + log.warning("couldn't delete %s: %s", path, exc) + payload["records"] = [e for e in payload["records"] if e.get("record_name") != name] + payload["total_records"] = len(payload["records"]) + _save_index(payload) + if self.session.saved_as == name: + self.session.saved_as = "" + return {"deleted": name, "total_records": payload["total_records"]} + + def play_record(self, name: str, file_kind: str = "speaker") -> dict[str, Any]: + """Play a saved WAV. file_kind = 'speaker' or 'raw'.""" + entry = self.find_record(name) + file_key = "speaker_recording" if file_kind == "speaker" else "gemini_raw_output" + path = _resolve_record_path(entry["files"][file_key]["path"]) + if not path.exists(): + raise FileNotFoundError(str(path)) + with wave.open(str(path), "rb") as wf: + channels = wf.getnchannels() + sample_width = wf.getsampwidth() + sample_rate = wf.getframerate() + frames = wf.readframes(wf.getnframes()) + with self._play_lock: + if self.audio_mgr and self.audio_mgr.pya: + stream = self.audio_mgr.pya.open( + format=self.audio_mgr.pya.get_format_from_width(sample_width), + channels=channels, rate=sample_rate, + output=True, frames_per_buffer=CHUNK_SIZE, + ) + try: + chunk = CHUNK_SIZE * channels * sample_width + for offset in range(0, len(frames), chunk): + stream.write(frames[offset:offset + chunk]) + finally: + stream.stop_stream() + stream.close() + return { + "ok": True, "record_name": name, "file_kind": file_kind, + "duration_sec": round(audio_duration_seconds( + frames, sample_rate, channels, sample_width), 3), + } + + # ── status ─────────────────────────────────────────────────── + def status(self) -> dict[str, Any]: + return { + "voice_client_connected": bool( + self.voice_client and self.voice_client.connected), + "audio_mgr_ready": bool(self.audio_mgr and self.audio_mgr.pya), + "capture_backend": (self._monitor_config or {}).get("backend", "none"), + "records_dir": str(AUDIO_RECORDINGS_DIR), + "session": self.session.as_status(), + "total_records": len(_load_index().get("records", [])), + }