diff --git a/README.md b/README.md index c826906..d52f236 100644 --- a/README.md +++ b/README.md @@ -1,101 +1,98 @@ # Saqr — PPE Safety Detection on Unitree G1 -Real-time PPE compliance (helmet, vest, boots, gloves, goggles) using YOLO11n, -designed to run on a Unitree G1 humanoid with an Intel RealSense D435I. On -UNSAFE the robot speaks a warning and plays the `reject` arm action. +Real-time PPE compliance (helmet, vest, boots, gloves, goggles) using YOLO11n. +Runs on a Unitree G1 humanoid with an Intel RealSense D435I. On UNSAFE the +robot speaks a warning and plays the `reject` arm action. ## Layout ``` -saqr/ # python package - core/ # detection + tracking + events (shared by CLI/GUI/bridge) - apps/ # CLI entry points (saqr, detect, train, manager, view_stream) - gui/ # PySide6 desktop GUI - robot/ # G1 bridge + DDS controller - utils/ # logger -scripts/ # deploy.sh, start_saqr.sh, run_local.sh, run_robot.sh, systemd unit -config/ # logging.json -data/ # dataset/, models/ (gitignored) -runtime/ # captures/, logs/, runs/ (gitignored) -docs/ # DEPLOY.md, start.md, use_case_catalogue.pdf +Saqr/ +├── core/ # detection + tracking + events (shared by CLI/GUI/bridge) +├── apps/ # CLI modules (saqr_cli, detect_cli, train_cli, manager_cli, view_stream) +├── gui/ # PySide6 desktop GUI (dev-machine only) +├── robot/ # G1 bridge + DDS controller +├── utils/ # logger, config loader +├── scripts/ +│ ├── start_saqr.sh # the single entry point +│ ├── saqr-bridge.service # systemd unit (wraps start_saqr.sh) +│ └── deploy.sh # push code dev machine → robot +├── config/ # logging.json, core_config.json, robot_config.json +├── data/ # dataset/, models/ (gitignored) +├── runtime/ # captures/, runs/ (gitignored) +├── logs/ # per-module .log files (gitignored) +├── docs/ # DEPLOY.md, start.md, use_case_catalogue.pdf +├── pyproject.toml +└── README.md ``` -## Quick start +The project root is auto-detected from `core/paths.py::PROJECT_ROOT` — drop +the `Saqr/` folder anywhere on disk and the code finds itself. Override +with `SAQR_ROOT=/custom/path` if needed. + +## Run + +The project only runs through **[scripts/start_saqr.sh](scripts/start_saqr.sh)** +(directly or under the `saqr-bridge` systemd unit): ```bash -# Install the package (editable) -pip install -e . - -# Local dev run (webcam) -saqr --source 0 - -# PySide6 GUI -pip install -e ".[gui]" -saqr-gui - -# On the Unitree G1 (bridge owns the R2+X / R2+Y flow) -saqr-bridge --iface eth0 --source realsense --headless -- --stream 8080 +# On the robot: +sudo systemctl start saqr-bridge # production +# or +~/Saqr/scripts/start_saqr.sh # foreground / debug ``` -Without installing, everything still works via `python -m`: +Then on the wireless remote: +- **R2 + X** → start detection +- **R2 + Y** → stop detection +See [docs/DEPLOY.md](docs/DEPLOY.md) for first-time deploy and +[docs/start.md](docs/start.md) for the systemd workflow. + +## Deploy + +From the dev machine: ```bash -python -m saqr.apps.saqr_cli --source 0 -python -m saqr.robot.bridge --iface eth0 --source realsense --headless +scripts/deploy.sh # rsync + pip install -e . in the robot's conda env +scripts/deploy.sh --ip … # custom robot IP ``` -## Docs +## Configure -- [docs/DEPLOY.md](docs/DEPLOY.md) — full deploy + robot setup. -- [docs/start.md](docs/start.md) — systemd auto-start workflow. -- [docs/use_case_catalogue.pdf](docs/use_case_catalogue.pdf) — PPE use-case spec. +All tunable values live in JSON — no code edits needed: + +- [config/core_config.json](config/core_config.json) — detection, tracking, + camera, stream, training. +- [config/robot_config.json](config/robot_config.json) — bridge, TTS, + phrases, arm actions, deploy, start_saqr defaults. +- [config/logging.json](config/logging.json) — log levels per category. + +Precedence: **env var > config JSON > code fallback**. Most defaults can be +overridden via env vars without editing the files (`CONDA_ENV`, +`SAQR_SOURCE`, `STREAM_PORT`, `DDS_IFACE`, `ROBOT_IP`, …). + +After editing any JSON: +```bash +sudo systemctl restart saqr-bridge +``` ## Data & Models -The `data/` and `runtime/` directories are excluded from git (too large). -Download them separately before training or running inference. +`data/` and `runtime/` are gitignored (too large). Download separately: -### `data/` — dataset and pre-trained weights +- **Dataset**: [testcasque/ppe-detection-qlq3d](https://universe.roboflow.com/testcasque/ppe-detection-qlq3d) + → YOLOv11 format → unzip to `data/dataset/`. +- **Base weights**: [Ultralytics releases](https://github.com/ultralytics/assets/releases) + → `yolo11n.pt` into `data/models/`. +- **Saqr fine-tuned weights** (`saqr_best.pt`): produced by training — see + [docs/DEPLOY.md](docs/DEPLOY.md). -Expected contents: +## Training (dev machine, off-path) -``` -data/ - dataset/ - train/{images,labels}/ - valid/{images,labels}/ - test/{images,labels}/ - data.yaml - models/ - saqr_best.pt # Saqr YOLO11n fine-tuned on PPE - saqr_last.pt - yolo11n.pt # base YOLO11n - yolo26n.pt # base YOLO26n -``` - -Download: - -- **Dataset** (PPE, Roboflow): [testcasque/ppe-detection-qlq3d](https://universe.roboflow.com/testcasque/ppe-detection-qlq3d) - Open the Roboflow link → *Download Dataset* → format **YOLOv11** → unzip into `data/dataset/`. -- **Base YOLO weights**: [Ultralytics assets releases](https://github.com/ultralytics/assets/releases) - Grab `yolo11n.pt` (and optionally `yolo26n.pt`) into `data/models/`. -- **Saqr fine-tuned weights** (`saqr_best.pt`, `saqr_last.pt`): - Produced by training — see "Training" below. Or request from the maintainer. - -Place everything under `data/` so the tree matches above. - -### `runtime/` — training output (optional) - -Auto-generated when you run training. Not required for inference. -Contains confusion matrices, PR curves, batch previews, and the raw weights -under `runtime/runs/train/saqr_det/weights/`. - -### Training +Training is done on a workstation, not on the robot. It's a one-off, not +part of the normal run flow: ```bash -# after placing the dataset in data/dataset/ and base weights in data/models/ -python -m saqr.apps.train_cli --data data/dataset/data.yaml --weights data/models/yolo11n.pt +python -m apps.train_cli --epochs 100 --batch 16 +# best weights land at data/models/saqr_best.pt; deploy with scripts/deploy.sh ``` - -Outputs land in `runtime/runs/train/saqr_det/`. Copy the best checkpoint to -`data/models/saqr_best.pt` to use it at inference time. diff --git a/saqr/apps/__init__.py b/apps/__init__.py similarity index 100% rename from saqr/apps/__init__.py rename to apps/__init__.py diff --git a/saqr/apps/detect_cli.py b/apps/detect_cli.py similarity index 83% rename from saqr/apps/detect_cli.py rename to apps/detect_cli.py index ffe0340..0bbb824 100644 --- a/saqr/apps/detect_cli.py +++ b/apps/detect_cli.py @@ -8,11 +8,14 @@ from pathlib import Path import cv2 from ultralytics import YOLO -from saqr.core.detection import get_inference_config, set_inference_config -from saqr.core.model import resolve_model_path -from saqr.utils.logger import get_logger +from core.detection import get_inference_config, set_inference_config +from core.model import resolve_model_path +from utils.config import load_config +from utils.logger import get_logger log = get_logger("Inference", "detect") +_DET = load_config("core")["detection"] +_CAM = load_config("core")["camera"] VIOLATION = {"no-helmet", "no-vest", "no-boots", "no-gloves", "no-goggles"} COMPLIANT = {"helmet", "vest", "boots", "gloves", "goggles"} @@ -97,12 +100,12 @@ def run_image(model, path, conf): def main(): parser = argparse.ArgumentParser(description="Saqr simple PPE detection") - parser.add_argument("--source", default="0") - parser.add_argument("--model", default="saqr_best.pt") - parser.add_argument("--conf", type=float, default=0.35) - parser.add_argument("--device", default="0", help="'cpu', '0', 'cuda:0'") - parser.add_argument("--half", action="store_true") - parser.add_argument("--imgsz", type=int, default=320) + parser.add_argument("--source", default=_CAM["default_source"]) + parser.add_argument("--model", default=_DET["default_model"]) + parser.add_argument("--conf", type=float, default=_DET["conf"]) + parser.add_argument("--device", default=_DET["device"], help="'cpu', '0', 'cuda:0'") + parser.add_argument("--half", action="store_true", default=_DET["half"]) + parser.add_argument("--imgsz", type=int, default=_DET["imgsz"]) args = parser.parse_args() set_inference_config(device=args.device, half=args.half, imgsz=args.imgsz) diff --git a/saqr/apps/manager_cli.py b/apps/manager_cli.py similarity index 99% rename from saqr/apps/manager_cli.py rename to apps/manager_cli.py index 170168c..770625e 100644 --- a/saqr/apps/manager_cli.py +++ b/apps/manager_cli.py @@ -10,8 +10,8 @@ from pathlib import Path import cv2 -from saqr.core.paths import CAPTURES_DIR, PROJECT_ROOT -from saqr.utils.logger import get_logger +from core.paths import CAPTURES_DIR, PROJECT_ROOT +from utils.logger import get_logger log = get_logger("Manager", "manager") diff --git a/saqr/apps/saqr_cli.py b/apps/saqr_cli.py similarity index 54% rename from saqr/apps/saqr_cli.py rename to apps/saqr_cli.py index 2b3307d..14321fe 100644 --- a/saqr/apps/saqr_cli.py +++ b/apps/saqr_cli.py @@ -9,23 +9,28 @@ from typing import Dict import cv2 from ultralytics import YOLO -from saqr.core.camera import open_capture -from saqr.core.capture import setup_capture_dirs -from saqr.core.detection import set_inference_config -from saqr.core.drawing import draw_counters -from saqr.core.events import EventLogger, write_result_csv -from saqr.core.model import resolve_model_path -from saqr.core.paths import EVENTS_CSV, RESULT_CSV -from saqr.core.pipeline import process_frame -from saqr.core.streaming import start_stream_server, update_stream_frame -from saqr.core.tracking import PersonTracker -from saqr.utils.logger import get_logger +from core.camera import RealSenseCapture, open_capture +from core.capture import setup_capture_dirs, setup_snapshot_dirs +from core.detection import set_inference_config +from core.drawing import draw_counters +from core.events import EventLogger, write_result_csv +from core.model import resolve_model_path +from core.paths import EVENTS_CSV, RESULT_CSV +from core.pipeline import process_frame +from core.streaming import start_stream_server, update_stream_frame +from core.tracking import PersonTracker +from utils.config import load_config +from utils.logger import get_logger log = get_logger("Inference", "saqr") +_CORE = load_config("core") -def run_video(model, source, conf, capture_dirs: Dict[str, Path], show_gui, csv_every_frame, - max_missing, match_distance, status_confirm_frames, stream_port=0): +def run_video(model, source, conf, capture_dirs: Dict[str, Path], show_gui, + csv_interval, max_missing, match_distance, status_confirm_frames, + *, + snapshot_dirs=None, max_distance_m: float = 0.0, + stream_port: int = 0): cap = open_capture(source) if not cap.isOpened(): log.error(f"Cannot open source: {source}") @@ -37,6 +42,9 @@ def run_video(model, source, conf, capture_dirs: Dict[str, Path], show_gui, csv_ cap.release() return + is_realsense = isinstance(cap, RealSenseCapture) and cap.has_depth + depth_scale = cap.depth_scale if is_realsense else 0.001 + event_logger = EventLogger(EVENTS_CSV) tracker = PersonTracker( event_logger=event_logger, @@ -48,7 +56,11 @@ def run_video(model, source, conf, capture_dirs: Dict[str, Path], show_gui, csv_ if stream_port > 0: start_stream_server(stream_port) - log.info(f"Session started | source={source}") + log.info( + f"Session started | source={source} depth={is_realsense} " + f"max_distance_m={max_distance_m if max_distance_m > 0 else 'off'} " + f"csv_interval={csv_interval}" + ) if show_gui: print("Running - press q to quit, s to save frame.") @@ -58,10 +70,16 @@ def run_video(model, source, conf, capture_dirs: Dict[str, Path], show_gui, csv_ while True: frame_idx += 1 + depth_frame = cap.latest_depth if is_realsense else None + write_csv_this_frame = csv_interval > 0 and (frame_idx % csv_interval == 0) + try: annotated, visible = process_frame( frame, model, tracker, frame_idx, conf, - capture_dirs, write_csv=csv_every_frame, + capture_dirs, write_csv=write_csv_this_frame, + snapshot_dirs=snapshot_dirs, + depth_frame=depth_frame, depth_scale=depth_scale, + max_distance_m=max_distance_m, ) except Exception as e: log.exception(f"Frame error #{frame_idx}: {e}") @@ -94,11 +112,12 @@ def run_video(model, source, conf, capture_dirs: Dict[str, Path], show_gui, csv_ if show_gui: cv2.destroyAllWindows() + # Always write final state on exit so the last tracked people are recorded. write_result_csv(list(tracker.tracks.values()), RESULT_CSV) log.info(f"Session ended | frames={frame_idx} tracks_created={tracker.next_id - 1}") -def run_image(model, path, conf, capture_dirs: Dict[str, Path], show_gui): +def run_image(model, path, conf, capture_dirs: Dict[str, Path], show_gui, snapshot_dirs=None): frame = cv2.imread(path) if frame is None: log.error(f"Cannot read image: {path}") @@ -107,7 +126,10 @@ def run_image(model, path, conf, capture_dirs: Dict[str, Path], show_gui): event_logger = EventLogger(EVENTS_CSV) tracker = PersonTracker(event_logger=event_logger) - annotated, visible = process_frame(frame, model, tracker, 1, conf, capture_dirs) + annotated, visible = process_frame( + frame, model, tracker, 1, conf, capture_dirs, + snapshot_dirs=snapshot_dirs, + ) draw_counters(annotated, visible, 0.0) out = Path(path).stem + "_saqr.jpg" @@ -122,20 +144,32 @@ def run_image(model, path, conf, capture_dirs: Dict[str, Path], show_gui): def main(): parser = argparse.ArgumentParser(description="Saqr PPE detection with tracking") - parser.add_argument("--source", default="0", + det = _CORE["detection"] + trk = _CORE["tracking"] + cam = _CORE["camera"] + cap_cfg = _CORE["capture"] + + parser.add_argument("--source", default=cam["default_source"], help="0/1=webcam, realsense, realsense:SERIAL, /dev/videoX, or video path") - parser.add_argument("--model", default="saqr_best.pt", + parser.add_argument("--model", default=det["default_model"], help="Trained YOLO weights (resolved under data/models/ by default)") - parser.add_argument("--conf", type=float, default=0.35) - parser.add_argument("--max-missing", type=int, default=90) - parser.add_argument("--match-distance", type=float, default=250.0) - parser.add_argument("--status-confirm-frames", type=int, default=5) - parser.add_argument("--headless", action="store_true") - parser.add_argument("--stream", type=int, default=0, metavar="PORT") - parser.add_argument("--csv-on-exit", action="store_true") - parser.add_argument("--device", default="0") - parser.add_argument("--half", action="store_true") - parser.add_argument("--imgsz", type=int, default=320) + parser.add_argument("--conf", type=float, default=det["conf"]) + parser.add_argument("--max-missing", type=int, default=trk["max_missing"]) + parser.add_argument("--match-distance", type=float, default=trk["match_distance"]) + parser.add_argument("--status-confirm-frames", type=int, default=trk["status_confirm_frames"]) + parser.add_argument("--max-distance-m", type=float, default=det.get("max_distance_m", 0.0), + help="RealSense-only: drop candidates beyond this depth (0 = off)") + parser.add_argument("--headless", action="store_true") + parser.add_argument("--stream", type=int, default=0, metavar="PORT") + parser.add_argument("--csv-interval", type=int, default=trk.get("csv_write_every_n_frames", 30), + help="Write result.csv every N frames (0 = only on exit)") + parser.add_argument("--csv-on-exit", action="store_true", + help="Alias for --csv-interval 0") + parser.add_argument("--no-snapshots", action="store_true", + help="Disable full-frame snapshot on transitions") + parser.add_argument("--device", default=det["device"]) + parser.add_argument("--half", action="store_true", default=det["half"]) + parser.add_argument("--imgsz", type=int, default=det["imgsz"]) args = parser.parse_args() set_inference_config(device=args.device, half=args.half, imgsz=args.imgsz) @@ -154,12 +188,17 @@ def main(): log.warning("PyTorch not found") capture_dirs = setup_capture_dirs() + snapshot_dirs = None + if cap_cfg.get("save_event_snapshot", True) and not args.no_snapshots: + snapshot_dirs = setup_snapshot_dirs() + + csv_interval = 0 if args.csv_on_exit else max(0, args.csv_interval) try: model_path = resolve_model_path(args.model) except FileNotFoundError as e: log.error(str(e)) - log.error("Train first: saqr-train --dataset data/dataset") + log.error("Train first: python -m apps.train_cli --dataset data/dataset") raise SystemExit(1) log.info(f"Loading model: {model_path}") @@ -176,14 +215,17 @@ def main(): run_video( model, source, args.conf, capture_dirs, show_gui=not args.headless, - csv_every_frame=not args.csv_on_exit, + csv_interval=csv_interval, max_missing=args.max_missing, match_distance=args.match_distance, status_confirm_frames=args.status_confirm_frames, + snapshot_dirs=snapshot_dirs, + max_distance_m=args.max_distance_m, stream_port=args.stream, ) elif Path(source).exists(): - run_image(model, source, args.conf, capture_dirs, show_gui=not args.headless) + run_image(model, source, args.conf, capture_dirs, + show_gui=not args.headless, snapshot_dirs=snapshot_dirs) else: log.error(f"Source not found: {source}") raise SystemExit(1) diff --git a/saqr/apps/train_cli.py b/apps/train_cli.py similarity index 84% rename from saqr/apps/train_cli.py rename to apps/train_cli.py index 55fb294..5369610 100644 --- a/saqr/apps/train_cli.py +++ b/apps/train_cli.py @@ -7,10 +7,12 @@ from pathlib import Path import yaml -from saqr.core.paths import DATASET_DIR, MODELS_DIR, PROJECT_ROOT, RUNS_DIR -from saqr.utils.logger import get_logger +from core.paths import DATASET_DIR, MODELS_DIR, PROJECT_ROOT, RUNS_DIR +from utils.config import load_config +from utils.logger import get_logger log = get_logger("Training", "train") +_TRAIN = load_config("core")["training"] EXPECTED_CLASSES = [ "boots", "gloves", "goggles", "helmet", "no-boots", @@ -52,13 +54,13 @@ def main(): parser = argparse.ArgumentParser(description="Train Saqr PPE detector (YOLO11n)") parser.add_argument("--dataset", default=str(DATASET_DIR), help="Root folder containing data.yaml + train/valid/test") - parser.add_argument("--epochs", type=int, default=100) - parser.add_argument("--imgsz", type=int, default=640) - parser.add_argument("--batch", type=int, default=16) - parser.add_argument("--model", default="yolo11n.pt", + parser.add_argument("--epochs", type=int, default=_TRAIN["epochs"]) + parser.add_argument("--imgsz", type=int, default=_TRAIN["imgsz"]) + parser.add_argument("--batch", type=int, default=_TRAIN["batch"]) + parser.add_argument("--model", default=_TRAIN["base_model"], help="Base YOLO model (auto-downloaded if not present)") - parser.add_argument("--name", default="saqr_det") - parser.add_argument("--device", default="0") + parser.add_argument("--name", default=_TRAIN["run_name"]) + parser.add_argument("--device", default=_TRAIN["device"]) args = parser.parse_args() dataset_root = Path(args.dataset) diff --git a/saqr/apps/view_stream.py b/apps/view_stream.py similarity index 69% rename from saqr/apps/view_stream.py rename to apps/view_stream.py index 2a31d35..4d1c89c 100644 --- a/saqr/apps/view_stream.py +++ b/apps/view_stream.py @@ -1,15 +1,20 @@ -"""View the robot's MJPEG stream on a laptop.""" +"""View the robot's MJPEG stream from a laptop.""" from __future__ import annotations import argparse import cv2 +from utils.config import load_config + def main(): + default_ip = load_config("robot")["deploy"]["robot_ip"] + default_port = load_config("core")["stream"]["port"] + parser = argparse.ArgumentParser(description="View Saqr PPE stream from robot") - parser.add_argument("--ip", default="192.168.123.164", help="Robot IP address") - parser.add_argument("--port", default="8080", help="Stream port") + parser.add_argument("--ip", default=default_ip, help="Robot IP address") + parser.add_argument("--port", default=str(default_port), help="Stream port") args = parser.parse_args() url = f"http://{args.ip}:{args.port}/stream" diff --git a/assets/audio/README.md b/assets/audio/README.md new file mode 100644 index 0000000..4d41144 --- /dev/null +++ b/assets/audio/README.md @@ -0,0 +1,88 @@ +# Pre-recorded audio library + +WAV clips played via `AudioClient.PlayStream` on the G1 speaker. Bypassing +`TtsMaker` saves ~200–700 ms of firmware synthesis buffer per announcement +and eliminates 3104 "device busy" errors. + +## Required format + +Every file **must** be: + +- **16 kHz** sample rate +- **mono** (1 channel) +- **16-bit signed PCM** (`int16`) +- `.wav` container + +Any file not matching this is logged as a warning and skipped (the bridge +falls back to `TtsMaker` for that phrase per `config/robot_config.json.tts.mode`). + +## Expected files + +Drop WAVs under the right `/.wav` path so `AudioPlayer` finds them: + +``` +fixed/ + ready.wav "Saqr is running. Press R2 plus X to start." + deactivated.wav "Saqr deactivated." + no_camera.wav "Camera not connected. Please plug in the camera and try again." + safe.wav "Safe to enter. Have a good day." + unsafe_generic.wav "Please stop. Wear your proper safety equipment." + +unsafe_missing/ + helmet.wav "Please stop. Wear your proper safety equipment. You are missing helmet." + vest.wav "Please stop. Wear your proper safety equipment. You are missing vest." + helmet_vest.wav "Please stop. Wear your proper safety equipment. You are missing helmet and vest." +``` + +**Naming rule for `unsafe_missing/`**: the `` is the missing PPE names +sorted alphabetically and joined with `_`. So if someone misses both helmet +and vest, the bridge looks up `unsafe_missing/helmet_vest.wav` (not +`vest_helmet.wav`). If you extend `compliance.required_ppe` later, add clips +for every subset — for 3 required items that's 7 combinations (2³−1). + +## Converting existing recordings + +If your source file is at a different sample rate or stereo, convert with +`ffmpeg`: + +```bash +ffmpeg -y -i input.m4a -ac 1 -ar 16000 -sample_fmt s16 fixed/safe.wav +``` + +Validate with: + +```bash +python3 - <<'EOF' +import wave, sys +with wave.open("fixed/safe.wav", "rb") as wf: + print(wf.getframerate(), "Hz,", wf.getnchannels(), "ch,", wf.getsampwidth()*8, "bit") +EOF +# must print: 16000 Hz, 1 ch, 16 bit +``` + +## Recording tips + +- Quiet room; no echo. +- Don't clip — keep peaks below 0 dBFS. +- Leave ~100 ms of silence at the start and end so the clip doesn't pop. +- Target speaking rate: ~3 syllables/sec. The shortest clip (`deactivated`) + should be ~2 s; longest (`no_camera`) around 5–6 s. + +## Runtime behaviour + +In `config/robot_config.json`: + +```json +"tts": { + "mode": "recorded_or_tts" +} +``` + +Modes: +- `recorded_or_tts` — play WAV if the clip exists, otherwise fall back to `TtsMaker`. +- `recorded_only` — play WAV or stay silent. Useful for demos where you want + deterministic audio. Will skip any phrase whose clip is missing. +- `tts_only` — ignore the WAV library entirely (current legacy behaviour). + +After adding or replacing WAVs, restart the bridge to reload the library +(`sudo systemctl restart saqr-bridge`). diff --git a/saqr/core/__init__.py b/assets/audio/fixed/.gitkeep similarity index 100% rename from saqr/core/__init__.py rename to assets/audio/fixed/.gitkeep diff --git a/assets/audio/fixed/deactivated.wav b/assets/audio/fixed/deactivated.wav new file mode 100644 index 0000000..78ed258 Binary files /dev/null and b/assets/audio/fixed/deactivated.wav differ diff --git a/assets/audio/fixed/no_camera.wav b/assets/audio/fixed/no_camera.wav new file mode 100644 index 0000000..9382ad8 Binary files /dev/null and b/assets/audio/fixed/no_camera.wav differ diff --git a/assets/audio/fixed/ready.wav b/assets/audio/fixed/ready.wav new file mode 100644 index 0000000..52be2c0 Binary files /dev/null and b/assets/audio/fixed/ready.wav differ diff --git a/assets/audio/fixed/safe.wav b/assets/audio/fixed/safe.wav new file mode 100644 index 0000000..16b02ee Binary files /dev/null and b/assets/audio/fixed/safe.wav differ diff --git a/assets/audio/fixed/unsafe_generic.wav b/assets/audio/fixed/unsafe_generic.wav new file mode 100644 index 0000000..f4d5548 Binary files /dev/null and b/assets/audio/fixed/unsafe_generic.wav differ diff --git a/saqr/gui/__init__.py b/assets/audio/unsafe_missing/.gitkeep similarity index 100% rename from saqr/gui/__init__.py rename to assets/audio/unsafe_missing/.gitkeep diff --git a/assets/audio/unsafe_missing/helmet.wav b/assets/audio/unsafe_missing/helmet.wav new file mode 100644 index 0000000..ebc439c Binary files /dev/null and b/assets/audio/unsafe_missing/helmet.wav differ diff --git a/assets/audio/unsafe_missing/helmet_vest.wav b/assets/audio/unsafe_missing/helmet_vest.wav new file mode 100644 index 0000000..63e082b Binary files /dev/null and b/assets/audio/unsafe_missing/helmet_vest.wav differ diff --git a/assets/audio/unsafe_missing/vest.wav b/assets/audio/unsafe_missing/vest.wav new file mode 100644 index 0000000..abf4cd9 Binary files /dev/null and b/assets/audio/unsafe_missing/vest.wav differ diff --git a/config/core_config.json b/config/core_config.json new file mode 100644 index 0000000..c049a36 --- /dev/null +++ b/config/core_config.json @@ -0,0 +1,44 @@ +{ + "detection": { + "conf": 0.35, + "device": "0", + "half": false, + "imgsz": 320, + "default_model": "saqr_best.pt", + "max_distance_m": 3.0 + }, + "tracking": { + "max_missing": 90, + "match_distance": 250.0, + "status_confirm_frames": 5, + "csv_write_every_n_frames": 30, + "stationary_check_frames": 15, + "stationary_tolerance_px": 40.0 + }, + "compliance": { + "required_ppe": ["helmet", "vest"] + }, + "capture": { + "save_event_snapshot": true + }, + "camera": { + "width": 640, + "height": 480, + "fps": 30, + "default_source": "0", + "enable_depth": true + }, + "stream": { + "host": "0.0.0.0", + "port": 8080, + "jpeg_quality": 70 + }, + "training": { + "epochs": 100, + "imgsz": 640, + "batch": 16, + "device": "0", + "base_model": "yolo11n.pt", + "run_name": "saqr_det" + } +} diff --git a/config/robot_config.json b/config/robot_config.json new file mode 100644 index 0000000..3ad700e --- /dev/null +++ b/config/robot_config.json @@ -0,0 +1,51 @@ +{ + "bridge": { + "iface": "eth0", + "timeout": 10.0, + "cooldown": 8.0, + "release_after": 0.5, + "audio_lead_s": 0.3, + "quick_fail_window": 8.0, + "trigger_poll_hz": 50.0 + }, + "tts": { + "mode": "recorded_only", + "speaker_id": 2, + "volume": 100, + "seconds_per_char": 0.12, + "min_seconds": 2.5, + "queue_max": 4, + "busy_factor": { + "min": 1.0, + "max": 2.5, + "up": 1.20, + "down": 0.97 + } + }, + "phrases": { + "safe": "Safe to enter. Have a good day.", + "unsafe_generic": "Please stop. Wear your proper safety equipment.", + "unsafe_missing": "Please stop. Wear your proper safety equipment. You are missing {items}.", + "deactivated": "Saqr deactivated.", + "ready": "Saqr is running. Press R2 plus X to start.", + "no_camera": "Camera not connected. Please plug in the camera and try again." + }, + "arm": { + "reject_action": "reject", + "release_action": "release arm" + }, + "deploy": { + "robot_ip": "192.168.123.164", + "robot_user": "unitree", + "robot_env": "marcus", + "remote_dir": "/home/unitree/Saqr", + "ssh_opts": "-o StrictHostKeyChecking=no -o ConnectTimeout=10" + }, + "start_saqr": { + "conda_root": "$HOME/miniconda3", + "conda_env": "marcus", + "dds_iface": "eth0", + "saqr_source": "realsense", + "stream_port": 8080 + } +} diff --git a/saqr/robot/__init__.py b/core/__init__.py similarity index 100% rename from saqr/robot/__init__.py rename to core/__init__.py diff --git a/core/camera.py b/core/camera.py new file mode 100644 index 0000000..ebe3b23 --- /dev/null +++ b/core/camera.py @@ -0,0 +1,157 @@ +"""Camera backends: RealSense SDK (with optional aligned depth) and OpenCV V4L2. + +RealSenseCapture exposes the latest depth frame via ``latest_depth`` (a +numpy uint16 array in millimetres) and ``depth_scale`` (meters per raw unit). +Non-RealSense captures leave ``latest_depth = None`` — callers must guard. +""" +from __future__ import annotations + +from typing import Optional + +import cv2 +import numpy as np + +from utils.config import load_config +from utils.logger import get_logger + +log = get_logger("Inference", "camera") +_CFG = load_config("core")["camera"] + +try: + import pyrealsense2 as rs + HAS_REALSENSE = True +except ImportError: + HAS_REALSENSE = False + + +class RealSenseCapture: + """pyrealsense2 pipeline with an OpenCV-like read() interface. + + When ``enable_depth`` is true (default from core_config.camera.enable_depth), + the pipeline also streams depth and aligns each frame to the color view. + The aligned depth frame is stored on ``self.latest_depth`` as a numpy array. + """ + + def __init__(self, width: int = _CFG["width"], height: int = _CFG["height"], + fps: int = _CFG["fps"], serial: Optional[str] = None, + enable_depth: bool = _CFG.get("enable_depth", True)): + if not HAS_REALSENSE: + raise RuntimeError("pyrealsense2 not installed") + self.pipeline = rs.pipeline() + cfg = rs.config() + if serial: + cfg.enable_device(serial) + cfg.enable_stream(rs.stream.color, width, height, rs.format.bgr8, fps) + + self.has_depth = bool(enable_depth) + self._align = None + self.depth_scale = 0.001 # mm per raw unit default; overwritten below + + if self.has_depth: + cfg.enable_stream(rs.stream.depth, width, height, rs.format.z16, fps) + + self.profile = self.pipeline.start(cfg) + self._open = True + + if self.has_depth: + try: + depth_sensor = self.profile.get_device().first_depth_sensor() + self.depth_scale = float(depth_sensor.get_depth_scale()) + self._align = rs.align(rs.stream.color) + except Exception as e: + log.warning(f"Depth init failed ({e}); disabling depth") + self.has_depth = False + self._align = None + + self.latest_depth: Optional[np.ndarray] = None + + dev = self.profile.get_device() + log.info( + f"RealSense opened | {dev.get_info(rs.camera_info.name)} " + f"serial={dev.get_info(rs.camera_info.serial_number)} " + f"{width}x{height}@{fps} depth={self.has_depth}" + ) + + def isOpened(self) -> bool: + return self._open + + def read(self): + if not self._open: + return False, None + try: + frames = self.pipeline.wait_for_frames(timeout_ms=3000) + if self._align is not None: + frames = self._align.process(frames) + color = frames.get_color_frame() + if not color: + return False, None + if self.has_depth: + depth = frames.get_depth_frame() + self.latest_depth = ( + np.asanyarray(depth.get_data()) if depth else None + ) + return True, np.asanyarray(color.get_data()) + except Exception: + return False, None + + def release(self): + if self._open: + self.pipeline.stop() + self._open = False + + +def open_capture(source: str): + if source.lower().startswith("realsense"): + serial = None + if ":" in source: + serial = source.split(":", 1)[1] + return RealSenseCapture( + width=_CFG["width"], height=_CFG["height"], fps=_CFG["fps"], + serial=serial, + ) + + if str(source).isdigit(): + idx = int(source) + cap = cv2.VideoCapture(idx) + if cap.isOpened(): + return cap + cap = cv2.VideoCapture(idx, cv2.CAP_ANY) + if cap.isOpened(): + return cap + cap = cv2.VideoCapture(idx, cv2.CAP_V4L2) + return cap + + if source.startswith("/dev/video"): + cap = cv2.VideoCapture(source, cv2.CAP_V4L2) + if cap.isOpened(): + cap.set(cv2.CAP_PROP_BUFFERSIZE, 1) + cap.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*"MJPG")) + cap.set(cv2.CAP_PROP_FRAME_WIDTH, _CFG["width"]) + cap.set(cv2.CAP_PROP_FRAME_HEIGHT, _CFG["height"]) + cap.set(cv2.CAP_PROP_FPS, _CFG["fps"]) + return cap + + return cv2.VideoCapture(source) + + +def estimate_person_distance_m(depth_frame, bbox, depth_scale: float) -> Optional[float]: + """Median distance in metres inside ``bbox`` on the depth frame. + + Returns ``None`` if depth is unavailable or the bbox falls outside the + valid area. Ignores zero-depth pixels (RealSense's "no return" marker). + """ + if depth_frame is None: + return None + h, w = depth_frame.shape[:2] + x1, y1, x2, y2 = bbox + x1 = max(0, min(int(x1), w - 1)) + x2 = max(0, min(int(x2), w)) + y1 = max(0, min(int(y1), h - 1)) + y2 = max(0, min(int(y2), h)) + if x2 <= x1 or y2 <= y1: + return None + roi = depth_frame[y1:y2, x1:x2] + valid = roi[roi > 0] + if valid.size == 0: + return None + return float(np.median(valid) * depth_scale) diff --git a/core/capture.py b/core/capture.py new file mode 100644 index 0000000..35559e3 --- /dev/null +++ b/core/capture.py @@ -0,0 +1,67 @@ +"""Per-track image cropping, capture dirs, and full-frame event snapshots.""" +from __future__ import annotations + +from datetime import datetime +from pathlib import Path +from typing import Dict, Optional + +import cv2 + +from core.detection import STATUSES +from core.geometry import clamp_bbox +from core.paths import CAPTURES_DIR, SNAPSHOTS_DIR + + +def setup_capture_dirs() -> Dict[str, Path]: + """runtime/captures/{SAFE,PARTIAL,UNSAFE}/ — latest crop per track.""" + dirs: Dict[str, Path] = {} + for s in STATUSES: + d = CAPTURES_DIR / s + d.mkdir(parents=True, exist_ok=True) + dirs[s] = d + return dirs + + +def setup_snapshot_dirs() -> Dict[str, Path]: + """runtime/snapshots/{SAFE,PARTIAL,UNSAFE}/ — annotated full frame per transition.""" + dirs: Dict[str, Path] = {} + for s in STATUSES: + d = SNAPSHOTS_DIR / s + d.mkdir(parents=True, exist_ok=True) + dirs[s] = d + return dirs + + +def save_track_image(frame, track, capture_dirs: Dict[str, Path]) -> Optional[Path]: + """Save the latest crop for a track. Overwritten each frame.""" + h, w = frame.shape[:2] + x1, y1, x2, y2 = clamp_bbox(track.bbox, w, h) + if x2 <= x1 or y2 <= y1: + return None + crop = frame[y1:y2, x1:x2] + if crop.size == 0: + return None + + target = capture_dirs[track.status] / f"track_{track.track_id:04d}.jpg" + if track.photo_path and track.photo_path != target and track.photo_path.exists(): + try: + track.photo_path.unlink() + except OSError: + pass + + cv2.imwrite(str(target), crop) + track.photo_path = target + return target + + +def save_event_snapshot(annotated_frame, track, snapshot_dirs: Dict[str, Path]) -> Optional[Path]: + """Save the full annotated frame at the moment of a NEW / STATUS_CHANGE event. + + Timestamped filename so a history is preserved across events. + """ + if annotated_frame is None or annotated_frame.size == 0: + return None + ts = datetime.now().strftime("%Y%m%d_%H%M%S_%f")[:-3] + target = snapshot_dirs[track.status] / f"track_{track.track_id:04d}_{ts}.jpg" + cv2.imwrite(str(target), annotated_frame) + return target diff --git a/core/compliance.py b/core/compliance.py new file mode 100644 index 0000000..1784ad3 --- /dev/null +++ b/core/compliance.py @@ -0,0 +1,55 @@ +"""Binary SAFE / UNSAFE classification — every item in REQUIRED_PPE must be worn. + +The required list comes from ``config/core_config.json::compliance.required_ppe`` +so it can be retuned without a code change. +""" +from __future__ import annotations + +from typing import Dict, List, Tuple + +from core.detection import POSITIVE_TO_NEGATIVE, PPE_DISPLAY_ORDER +from utils.config import load_config + +# PPE items required for SAFE status. Other items (gloves/goggles/boots) are +# still detected and logged but don't affect compliance. +REQUIRED_PPE: Tuple[str, ...] = tuple( + load_config("core").get("compliance", {}).get("required_ppe", ["helmet", "vest"]) +) + + +def _is_wearing(items: Dict[str, float], ppe: str) -> bool: + pos = items.get(ppe, 0.0) + neg = items.get(POSITIVE_TO_NEGATIVE[ppe], 0.0) + return pos > neg and pos > 0 + + +def status_from_items(items: Dict[str, float]) -> str: + """SAFE only when every required item is confidently worn.""" + if all(_is_wearing(items, p) for p in REQUIRED_PPE): + return "SAFE" + return "UNSAFE" + + +def split_wearing_missing(items: Dict[str, float]) -> Tuple[List[str], List[str], List[str]]: + """Bucket each PPE item into wearing / missing / unknown. + + ``missing`` only contains REQUIRED items that aren't confidently worn — + this is what the TTS announcement keys off of, so it maps cleanly to a + recorded clip. Non-required items (gloves/goggles/boots) are never in + ``missing`` regardless of whether their ``no-X`` class was detected; they + go to ``unknown`` so they're still visible in the event log. + """ + wearing, missing, unknown = [], [], [] + for pos in PPE_DISPLAY_ORDER: + neg = POSITIVE_TO_NEGATIVE[pos] + pos_conf = items.get(pos, 0.0) + neg_conf = items.get(neg, 0.0) + + worn = pos_conf > neg_conf and pos_conf > 0 + if worn: + wearing.append(pos) + elif pos in REQUIRED_PPE: + missing.append(pos) + else: + unknown.append(pos) + return wearing, missing, unknown diff --git a/saqr/core/detection.py b/core/detection.py similarity index 100% rename from saqr/core/detection.py rename to core/detection.py diff --git a/saqr/core/drawing.py b/core/drawing.py similarity index 95% rename from saqr/core/drawing.py rename to core/drawing.py index e09763a..7e124a9 100644 --- a/saqr/core/drawing.py +++ b/core/drawing.py @@ -5,8 +5,8 @@ from typing import List, Tuple import cv2 -from saqr.core.compliance import split_wearing_missing -from saqr.core.detection import STATUSES +from core.compliance import split_wearing_missing +from core.detection import STATUSES GREEN = (0, 200, 0) YELLOW = (0, 200, 255) diff --git a/saqr/core/events.py b/core/events.py similarity index 85% rename from saqr/core/events.py rename to core/events.py index 04d433e..0c9e7ab 100644 --- a/saqr/core/events.py +++ b/core/events.py @@ -1,6 +1,6 @@ """Event-line emission and CSV writers. -The ``emit_event`` output line is a contract with ``saqr.robot.bridge`` — its +The ``emit_event`` output line is a contract with ``robot.bridge`` — its regex parses this exact format. Don't change the field order without updating the bridge. """ @@ -11,8 +11,8 @@ from datetime import datetime from pathlib import Path from typing import Dict, List -from saqr.core.compliance import split_wearing_missing -from saqr.core.detection import CLASS_ORDER +from core.compliance import split_wearing_missing +from core.detection import CLASS_ORDER def now_iso() -> str: @@ -21,7 +21,7 @@ def now_iso() -> str: class EventLogger: FIELDS = ["timestamp", "track_id", "event_type", "status", - "wearing", "missing", "unknown", "photo", "path"] + "wearing", "missing", "unknown", "photo", "path", "snapshot"] def __init__(self, path: Path): self.path = path @@ -63,15 +63,16 @@ def write_result_csv(tracks: List, output: Path) -> None: def emit_event(track, event_logger: EventLogger, event_type: str = "STATUS_CHANGE", - force: bool = False) -> None: + force: bool = False, snapshot_path: Path = None) -> None: if track.photo_path is None: return if not force and track.announced_status == track.status: return wearing, missing, unknown = split_wearing_missing(track.items) + ts = datetime.now().strftime("%H:%M:%S.%f")[:-3] msg = ( - f"ID {track.track_id:04d} | {event_type} | {track.status} | " + f"[{ts}] ID {track.track_id:04d} | {event_type} | {track.status} | " f"wearing: {', '.join(wearing) or 'none'} | " f"missing: {', '.join(missing) or 'none'} | " f"unknown: {', '.join(unknown) or 'none'}" @@ -88,6 +89,7 @@ def emit_event(track, event_logger: EventLogger, event_type: str = "STATUS_CHANG "unknown": ", ".join(unknown), "photo": track.photo_path.name if track.photo_path else "", "path": str(track.photo_path) if track.photo_path else "", + "snapshot": str(snapshot_path) if snapshot_path else "", }) track.announced_status = track.status track.event_count += 1 diff --git a/saqr/core/geometry.py b/core/geometry.py similarity index 100% rename from saqr/core/geometry.py rename to core/geometry.py diff --git a/saqr/core/grouping.py b/core/grouping.py similarity index 95% rename from saqr/core/grouping.py rename to core/grouping.py index 9360779..f1842a8 100644 --- a/saqr/core/grouping.py +++ b/core/grouping.py @@ -4,8 +4,8 @@ from __future__ import annotations from dataclasses import dataclass, field from typing import Dict, List, Tuple -from saqr.core.detection import PPEItem -from saqr.core.geometry import box_distance, expand_bbox, merge_boxes +from core.detection import PPEItem +from core.geometry import box_distance, expand_bbox, merge_boxes @dataclass diff --git a/saqr/core/model.py b/core/model.py similarity index 90% rename from saqr/core/model.py rename to core/model.py index b24d3b3..340c58b 100644 --- a/saqr/core/model.py +++ b/core/model.py @@ -3,7 +3,7 @@ from __future__ import annotations from pathlib import Path -from saqr.core.paths import MODELS_DIR, PROJECT_ROOT +from core.paths import MODELS_DIR, PROJECT_ROOT def resolve_model_path(model_arg: str) -> Path: diff --git a/core/paths.py b/core/paths.py new file mode 100644 index 0000000..d81a7ab --- /dev/null +++ b/core/paths.py @@ -0,0 +1,28 @@ +"""Canonical project paths, resolved dynamically at import time. + +By default the project root is the parent of the ``core/`` package — +whatever filesystem location ``Saqr/`` currently lives at. Override by +setting the ``SAQR_ROOT`` environment variable. +""" +from __future__ import annotations + +import os +from pathlib import Path + +_DEFAULT_ROOT = Path(__file__).resolve().parents[1] +PROJECT_ROOT = Path(os.environ.get("SAQR_ROOT", _DEFAULT_ROOT)).resolve() + +CONFIG_DIR = PROJECT_ROOT / "config" +DATA_DIR = PROJECT_ROOT / "data" +DATASET_DIR = DATA_DIR / "dataset" +MODELS_DIR = DATA_DIR / "models" + +RUNTIME_DIR = PROJECT_ROOT / "runtime" +CAPTURES_DIR = RUNTIME_DIR / "captures" +SNAPSHOTS_DIR = RUNTIME_DIR / "snapshots" +RUNS_DIR = RUNTIME_DIR / "runs" + +LOGS_DIR = PROJECT_ROOT / "logs" + +RESULT_CSV = CAPTURES_DIR / "result.csv" +EVENTS_CSV = CAPTURES_DIR / "events.csv" diff --git a/core/pipeline.py b/core/pipeline.py new file mode 100644 index 0000000..973fbc0 --- /dev/null +++ b/core/pipeline.py @@ -0,0 +1,79 @@ +"""Per-frame detect + group + depth-gate + track + capture + emit pipeline. + +Announcements are gated on two conditions: + * the track is currently stationary (``is_stationary()``), so walkers-by + never trigger a reject/TTS — only people planted at the checkpoint do; + * the current status differs from the last-announced status for the track, + so we never repeat the same announcement. +""" +from __future__ import annotations + +from pathlib import Path +from typing import Dict, Optional + +from core.camera import estimate_person_distance_m +from core.capture import save_event_snapshot, save_track_image +from core.detection import collect_detections +from core.drawing import draw_track +from core.events import emit_event, write_result_csv +from core.grouping import group_detections_to_people +from core.paths import RESULT_CSV +from core.tracking import PersonTracker + + +def _filter_by_depth(candidates, depth_frame, depth_scale: float, max_distance_m: float): + """Drop candidates whose median depth exceeds ``max_distance_m``. + + Fail-open: candidates with no valid depth pass through. ``max_distance_m`` + <= 0 disables the filter entirely. + """ + if depth_frame is None or max_distance_m <= 0 or not candidates: + return candidates + kept = [] + for cand in candidates: + d = estimate_person_distance_m(depth_frame, cand.bbox, depth_scale) + if d is None or d <= max_distance_m: + cand.distance_m = d + kept.append(cand) + return kept + + +def process_frame(frame, model, tracker: PersonTracker, frame_idx: int, conf: float, + capture_dirs: Dict[str, Path], write_csv: bool = True, + *, + snapshot_dirs: Optional[Dict[str, Path]] = None, + depth_frame=None, depth_scale: float = 0.001, + max_distance_m: float = 0.0): + annotated = frame.copy() + h, w = annotated.shape[:2] + + detections = collect_detections(frame, model, conf) + candidates = group_detections_to_people(detections, w, h) + candidates = _filter_by_depth(candidates, depth_frame, depth_scale, max_distance_m) + + tracker.update(candidates, frame_idx) + visible = tracker.visible_tracks() + + # Pass 1: save crops + draw annotations so snapshots capture the full scene. + for track in visible: + save_track_image(frame, track, capture_dirs) + draw_track(annotated, track) + + # Pass 2: emit events ONLY for stationary tracks whose current status + # hasn't been announced yet. Walkers-by never reach this branch because + # their bbox-center history shows movement above the tolerance. + for track in visible: + if track.announced_status == track.status: + continue + if not track.is_stationary(): + continue + ev_type = "NEW" if track.announced_status is None else "STATUS_CHANGE" + snap_path = None + if snapshot_dirs is not None: + snap_path = save_event_snapshot(annotated, track, snapshot_dirs) + emit_event(track, tracker.event_logger, ev_type, snapshot_path=snap_path) + + if write_csv: + write_result_csv(list(tracker.tracks.values()), RESULT_CSV) + + return annotated, visible diff --git a/saqr/core/streaming.py b/core/streaming.py similarity index 81% rename from saqr/core/streaming.py rename to core/streaming.py index a81ec78..4004d31 100644 --- a/saqr/core/streaming.py +++ b/core/streaming.py @@ -8,9 +8,11 @@ from typing import Optional import cv2 -from saqr.utils.logger import get_logger +from utils.config import load_config +from utils.logger import get_logger log = get_logger("Inference", "streaming") +_CFG = load_config("core")["stream"] _stream_frame: Optional[bytes] = None _stream_lock = threading.Lock() @@ -47,16 +49,18 @@ class MJPEGHandler(BaseHTTPRequestHandler): pass -def start_stream_server(port: int = 8080): - server = HTTPServer(("0.0.0.0", port), MJPEGHandler) +def start_stream_server(port: Optional[int] = None, host: Optional[str] = None): + h = host or _CFG["host"] + p = port if port is not None else _CFG["port"] + server = HTTPServer((h, p), MJPEGHandler) t = threading.Thread(target=server.serve_forever, daemon=True) t.start() - log.info(f"MJPEG stream server started on http://0.0.0.0:{port}") + log.info(f"MJPEG stream server started on http://{h}:{p}") return server def update_stream_frame(frame): global _stream_frame - _, jpeg = cv2.imencode(".jpg", frame, [cv2.IMWRITE_JPEG_QUALITY, 70]) + _, jpeg = cv2.imencode(".jpg", frame, [cv2.IMWRITE_JPEG_QUALITY, _CFG["jpeg_quality"]]) with _stream_lock: _stream_frame = jpeg.tobytes() diff --git a/saqr/core/tracking.py b/core/tracking.py similarity index 69% rename from saqr/core/tracking.py rename to core/tracking.py index 59bb1f1..6565393 100644 --- a/saqr/core/tracking.py +++ b/core/tracking.py @@ -1,14 +1,24 @@ -"""Per-person Track dataclass and the greedy nearest-match PersonTracker.""" +"""Per-person Track dataclass and the greedy nearest-match PersonTracker. + +Tracks keep a rolling history of bbox centers so ``is_stationary()`` can gate +announcements: the bridge only fires TTS + arm actions on people standing +still at the checkpoint, not on anyone walking past. +""" from __future__ import annotations -from dataclasses import dataclass +from dataclasses import dataclass, field from pathlib import Path from typing import Dict, List, Optional, Tuple -from saqr.core.compliance import status_from_items -from saqr.core.events import EventLogger, now_iso -from saqr.core.geometry import box_distance -from saqr.core.grouping import PersonCandidate +from core.compliance import status_from_items +from core.events import EventLogger, now_iso +from core.geometry import box_distance +from core.grouping import PersonCandidate +from utils.config import load_config + +_TRK = load_config("core")["tracking"] +STATIONARY_CHECK_FRAMES = int(_TRK.get("stationary_check_frames", 15)) +STATIONARY_TOLERANCE_PX = float(_TRK.get("stationary_tolerance_px", 40.0)) @dataclass @@ -26,6 +36,26 @@ class Track: event_count: int = 0 pending_status: Optional[str] = None pending_count: int = 0 + # Rolling history of bbox centers (most recent N frames) used to decide + # whether the person is standing still at the checkpoint. + recent_centers: List[Tuple[float, float]] = field(default_factory=list) + + def record_center(self) -> None: + cx = (self.bbox[0] + self.bbox[2]) / 2.0 + cy = (self.bbox[1] + self.bbox[3]) / 2.0 + self.recent_centers.append((cx, cy)) + if len(self.recent_centers) > STATIONARY_CHECK_FRAMES: + del self.recent_centers[0] + + def is_stationary(self) -> bool: + """True when the bbox center hasn't moved more than the configured + tolerance across the last ``STATIONARY_CHECK_FRAMES`` frames.""" + if len(self.recent_centers) < STATIONARY_CHECK_FRAMES: + return False + xs = [c[0] for c in self.recent_centers] + ys = [c[1] for c in self.recent_centers] + spread = max(max(xs) - min(xs), max(ys) - min(ys)) + return spread <= STATIONARY_TOLERANCE_PX class PersonTracker: @@ -101,6 +131,7 @@ class PersonTracker: track.pending_status = None track.pending_count = 0 + track.record_center() used.add(track.track_id) stale = [] diff --git a/docs/DEPLOY.md b/docs/DEPLOY.md index 3b17855..ac80bb9 100644 --- a/docs/DEPLOY.md +++ b/docs/DEPLOY.md @@ -21,15 +21,27 @@ ## Repo layout ``` -saqr/ # python package (core/apps/gui/robot/utils) -scripts/ # deploy.sh, start_saqr.sh, run_local.sh, run_robot.sh, saqr-bridge.service -config/ # logging.json -data/ # dataset/, models/ (gitignored) -runtime/ # captures/, logs/, runs/ (gitignored) -docs/ # this file, start.md, use_case_catalogue.pdf -pyproject.toml # installs the `saqr`, `saqr-bridge`, `saqr-gui`, ... scripts +Saqr/ +├── core/ # detection + tracking + events (shared by CLI/GUI/bridge) +├── apps/ # CLI entry points (saqr, detect, train, manager, view_stream) +├── gui/ # PySide6 desktop GUI +├── robot/ # G1 bridge + DDS controller +├── utils/ # logger +├── scripts/ # start_saqr.sh (entry), saqr-bridge.service (systemd), deploy.sh +├── config/ # logging.json +├── data/ # dataset/, models/ (gitignored) +├── runtime/ # captures/, runs/ (gitignored) +├── logs/ # per-module .log files (gitignored) +├── docs/ # this file, start.md, use_case_catalogue.pdf +├── pyproject.toml # installs the package (no console scripts — use start_saqr.sh) +└── README.md ``` +`core/`, `apps/`, `gui/`, `robot/`, and `utils/` are the importable Python +packages. Run them via `python -m apps.saqr_cli`, `python -m robot.bridge`, +etc. The project root is auto-detected from `core/paths.py`; override with +`SAQR_ROOT=/custom/path` if the tree lives elsewhere. + --- ## Step 1: Train the Model (Dev Machine) @@ -38,7 +50,7 @@ pyproject.toml # installs the `saqr`, `saqr-bridge`, `saqr-gui`, ... scripts cd ~/Robotics_workspace/yslootahtech/Project/Saqr conda activate AI_MSI_yolo pip install -e . -saqr-train --dataset data/dataset --epochs 100 --batch 16 +python -m apps.train_cli --dataset data/dataset --epochs 100 --batch 16 ``` Verify model exists: @@ -58,10 +70,9 @@ scripts/deploy.sh --run # ...and start the bridge scripts/deploy.sh --ip 10.0.0.5 # custom robot IP ``` -The script rsyncs `saqr/`, `scripts/`, `config/`, `docs/`, -`pyproject.toml`, `requirements.txt`, and `README.md` to -`~/Saqr` on the robot, then runs `pip install -e .` inside the -target conda env (default `marcus`). +The script rsyncs `core/`, `apps/`, `gui/`, `robot/`, `utils/`, `scripts/`, +`config/`, `docs/`, and `pyproject.toml` to `~/Saqr` on the robot, then runs +`pip install -e .` inside the target conda env (default `marcus`). --- @@ -100,56 +111,49 @@ sudo date -s "2026-04-10 15:00:00" python -c "from ultralytics import YOLO; print('ultralytics OK')" python -c "import torch; print('CUDA:', torch.cuda.is_available())" python -c "import cv2; print('opencv OK')" -python -c "import saqr; print('saqr', saqr.__version__)" +python -c "import core.paths; print('project root:', core.paths.PROJECT_ROOT)" ``` --- ## Step 4: Run Saqr (Robot) -### Production: bridge with R2+X / R2+Y - -The bridge owns the DDS clients and spawns `saqr` on demand. On the robot: +**The project has one entry point: [scripts/start_saqr.sh](../scripts/start_saqr.sh).** +Everything — conda activation, DDS iface, camera source, stream port — is +read from `config/robot_config.json.start_saqr` (env vars override). ```bash cd ~/Saqr -scripts/start_saqr.sh # manual launch -sudo systemctl restart saqr-bridge # systemd-managed (see start.md) +scripts/start_saqr.sh # manual (foreground, Ctrl+C to stop) +sudo systemctl restart saqr-bridge # systemd-owned (see start.md) ``` -Or without the helper script: +The launcher execs `python -m robot.bridge` with the production flags. +The bridge owns the DDS clients and spawns `python -m apps.saqr_cli` as a +subprocess when R2+X is pressed. + +### Overrides (no config edit) ```bash -conda activate marcus -python -m saqr.robot.bridge --iface eth0 --source realsense --headless -- --stream 8080 +SAQR_SOURCE=/dev/video2 scripts/start_saqr.sh # V4L2 fallback +CONDA_ENV=teleimager scripts/start_saqr.sh +DDS_IFACE=wlan0 scripts/start_saqr.sh +STREAM_PORT=9090 scripts/start_saqr.sh ``` -### Plain saqr (no bridge) +### Companion utilities (off the run path) + +Operators occasionally need these on the robot. They're not part of the +normal run flow: ```bash -# With display -scripts/run_robot.sh --stream 8080 - -# Headless -scripts/run_robot.sh --headless --stream 8080 - -# V4L2 fallback if RealSense SDK won't enumerate -scripts/run_robot.sh --source /dev/video2 --headless +python -m apps.manager_cli --export # dump a CSV report +python -m apps.manager_cli # interactive photo CRUD ``` -Equivalent `python -m` forms: - +Training (dev machine only): ```bash -python -m saqr.apps.saqr_cli --source realsense --model saqr_best.pt --headless -python -m saqr.apps.detect_cli --source /dev/video2 --model saqr_best.pt -python -m saqr.apps.manager_cli --export -``` - -### Dev machine GUI - -```bash -pip install -e ".[gui]" -python -m saqr.gui.app --source 0 +python -m apps.train_cli --epochs 100 ``` --- @@ -205,7 +209,7 @@ cat runtime/captures/result.csv # current state per tracked person cat runtime/captures/events.csv # audit log ls runtime/captures/{SAFE,PARTIAL,UNSAFE}/ -saqr-manager --export # quick CSV export +python -m apps.manager_cli --export # quick CSV export # Download to dev machine scp -r unitree@192.168.123.164:~/Saqr/runtime/captures ./captures_from_robot @@ -268,7 +272,7 @@ saqr --source realsense --headless \ | `result.csv` | `runtime/captures/` | Current state of tracked persons | | `events.csv` | `runtime/captures/` | Audit log (NEW / STATUS_CHANGE) | | Person crops | `runtime/captures/{SAFE,PARTIAL,UNSAFE}/*.jpg` | Latest crop per track | -| Logs | `runtime/logs/Inference/*.log` | Module log output | +| Logs | `logs/Inference/*.log` | Module log output | --- @@ -276,17 +280,17 @@ saqr --source realsense --headless \ | Path | Purpose | |------|---------| -| [saqr/apps/saqr_cli.py](../saqr/apps/saqr_cli.py) | Main PPE tracking entry (`saqr`) | -| [saqr/robot/bridge.py](../saqr/robot/bridge.py) | Saqr → G1 bridge (R2+X/R2+Y) | -| [saqr/robot/robot_controller.py](../saqr/robot/robot_controller.py) | G1 arm + audio + TTS worker | -| [saqr/robot/controller.py](../saqr/robot/controller.py) | G1 wireless-remote DDS reader | -| [saqr/core/pipeline.py](../saqr/core/pipeline.py) | Per-frame detect + track + emit | -| [saqr/core/tracking.py](../saqr/core/tracking.py) | `PersonTracker`, `Track` | -| [saqr/core/events.py](../saqr/core/events.py) | Event-line format (contract with bridge) | -| [saqr/apps/detect_cli.py](../saqr/apps/detect_cli.py) | Simple detection without tracking | -| [saqr/apps/train_cli.py](../saqr/apps/train_cli.py) | YOLO11n training | -| [saqr/apps/manager_cli.py](../saqr/apps/manager_cli.py) | Capture CRUD + CSV export | -| [saqr/gui/app.py](../saqr/gui/app.py) | PySide6 desktop GUI | +| [apps/saqr_cli.py](../apps/saqr_cli.py) | Main PPE tracking entry (`saqr`) | +| [robot/bridge.py](../robot/bridge.py) | Saqr → G1 bridge (R2+X/R2+Y) | +| [robot/robot_controller.py](../robot/robot_controller.py) | G1 arm + audio + TTS worker | +| [robot/controller.py](../robot/controller.py) | G1 wireless-remote DDS reader | +| [core/pipeline.py](../core/pipeline.py) | Per-frame detect + track + emit | +| [core/tracking.py](../core/tracking.py) | `PersonTracker`, `Track` | +| [core/events.py](../core/events.py) | Event-line format (contract with bridge) | +| [apps/detect_cli.py](../apps/detect_cli.py) | Simple detection without tracking | +| [apps/train_cli.py](../apps/train_cli.py) | YOLO11n training | +| [apps/manager_cli.py](../apps/manager_cli.py) | Capture CRUD + CSV export | +| [gui/app.py](../gui/app.py) | PySide6 desktop GUI | --- diff --git a/docs/start.md b/docs/start.md index d222874..7e31b1a 100644 --- a/docs/start.md +++ b/docs/start.md @@ -1,6 +1,6 @@ # Saqr — Auto-start on boot -How to auto-start `saqr.robot.bridge` on every boot of the Unitree G1 +How to auto-start `robot.bridge` on every boot of the Unitree G1 (Jetson), via `systemd` + `scripts/start_saqr.sh`. --- @@ -9,7 +9,7 @@ How to auto-start `saqr.robot.bridge` on every boot of the Unitree G1 | File | Role | |------|------| -| `~/Saqr/saqr/robot/bridge.py` | The bridge process (DDS + TTS + R2+X/R2+Y trigger loop). Entry point: `python -m saqr.robot.bridge`. | +| `~/Saqr/robot/bridge.py` | The bridge process (DDS + TTS + R2+X/R2+Y trigger loop). Entry point: `python -m robot.bridge`. | | `~/Saqr/scripts/start_saqr.sh` | Bash launcher: sources conda, activates `marcus`, `cd ~/Saqr`, exec the bridge with the production flags. | | `~/Saqr/scripts/saqr-bridge.service` | systemd unit that runs `start_saqr.sh` as user `unitree` on every boot, restarts on failure, logs to journalctl. | @@ -164,6 +164,6 @@ sudo systemctl restart saqr-bridge ### Bridge is running twice ```bash -ps -ef | grep "saqr.robot.bridge" +ps -ef | grep "robot.bridge" sudo systemctl restart saqr-bridge ``` diff --git a/saqr/utils/__init__.py b/gui/__init__.py similarity index 100% rename from saqr/utils/__init__.py rename to gui/__init__.py diff --git a/saqr/gui/app.py b/gui/app.py similarity index 91% rename from saqr/gui/app.py rename to gui/app.py index 147eba4..f3b78b2 100644 --- a/saqr/gui/app.py +++ b/gui/app.py @@ -18,18 +18,20 @@ from PySide6.QtWidgets import ( ) from ultralytics import YOLO -from saqr.core.capture import save_track_image, setup_capture_dirs -from saqr.core.compliance import split_wearing_missing -from saqr.core.detection import STATUSES, collect_detections -from saqr.core.drawing import draw_counters, draw_track -from saqr.core.events import EventLogger, emit_event, write_result_csv -from saqr.core.grouping import group_detections_to_people -from saqr.core.model import resolve_model_path -from saqr.core.paths import EVENTS_CSV, MODELS_DIR, PROJECT_ROOT, RESULT_CSV -from saqr.core.tracking import PersonTracker -from saqr.utils.logger import get_logger +from core.capture import save_track_image, setup_capture_dirs +from core.compliance import split_wearing_missing +from core.detection import STATUSES, collect_detections +from core.drawing import draw_counters, draw_track +from core.events import EventLogger, emit_event, write_result_csv +from core.grouping import group_detections_to_people +from core.model import resolve_model_path +from core.paths import EVENTS_CSV, MODELS_DIR, PROJECT_ROOT, RESULT_CSV +from core.tracking import PersonTracker +from utils.config import load_config +from utils.logger import get_logger log = get_logger("Inference", "gui") +_CORE = load_config("core") def list_cameras(max_idx: int = 10) -> List[str]: @@ -47,7 +49,10 @@ def list_cameras(max_idx: int = 10) -> List[str]: return sources if sources else ["0"] -def open_camera(source: str, width: int = 640, height: int = 480, fps: int = 30): +def open_camera(source: str, + width: int = _CORE["camera"]["width"], + height: int = _CORE["camera"]["height"], + fps: int = _CORE["camera"]["fps"]): if source.startswith("/dev/video"): cap = cv2.VideoCapture(source, cv2.CAP_V4L2) elif source.isdigit(): @@ -191,7 +196,9 @@ def cv_to_qpixmap(frame: np.ndarray, max_w: int = 960, max_h: int = 720) -> QPix class SaqrWindow(QMainWindow): - def __init__(self, default_model: str = "saqr_best.pt", default_source: str = "0"): + def __init__(self, + default_model: str = _CORE["detection"]["default_model"], + default_source: str = _CORE["camera"]["default_source"]): super().__init__() self.setWindowTitle("Saqr - PPE Safety Tracking") self.setMinimumSize(1200, 700) @@ -235,26 +242,26 @@ class SaqrWindow(QMainWindow): self.conf_spin = QDoubleSpinBox() self.conf_spin.setRange(0.1, 0.9) self.conf_spin.setSingleStep(0.05) - self.conf_spin.setValue(0.35) + self.conf_spin.setValue(_CORE["detection"]["conf"]) param_lay.addWidget(self.conf_spin, 0, 1) param_lay.addWidget(QLabel("Max Missing:"), 1, 0) self.missing_spin = QSpinBox() self.missing_spin.setRange(10, 300) - self.missing_spin.setValue(90) + self.missing_spin.setValue(_CORE["tracking"]["max_missing"]) param_lay.addWidget(self.missing_spin, 1, 1) param_lay.addWidget(QLabel("Match Dist:"), 2, 0) self.dist_spin = QDoubleSpinBox() self.dist_spin.setRange(50, 500) self.dist_spin.setSingleStep(10) - self.dist_spin.setValue(250) + self.dist_spin.setValue(_CORE["tracking"]["match_distance"]) param_lay.addWidget(self.dist_spin, 2, 1) param_lay.addWidget(QLabel("Confirm Frames:"), 3, 0) self.confirm_spin = QSpinBox() self.confirm_spin.setRange(1, 20) - self.confirm_spin.setValue(5) + self.confirm_spin.setValue(_CORE["tracking"]["status_confirm_frames"]) param_lay.addWidget(self.confirm_spin, 3, 1) left.addWidget(param_grp) @@ -437,7 +444,7 @@ class SaqrWindow(QMainWindow): "CSV Files (*.csv)" ) if path: - from saqr.apps.manager_cli import export_csv, load_photos + from apps.manager_cli import export_csv, load_photos export_csv(load_photos(), Path(path)) self._on_event(f"Exported: {path}") @@ -449,8 +456,8 @@ class SaqrWindow(QMainWindow): def main(): import argparse parser = argparse.ArgumentParser(description="Saqr PPE GUI") - parser.add_argument("--model", default="saqr_best.pt") - parser.add_argument("--source", default="0") + parser.add_argument("--model", default=_CORE["detection"]["default_model"]) + parser.add_argument("--source", default=_CORE["camera"]["default_source"]) args = parser.parse_args() app = QApplication(sys.argv) diff --git a/pyproject.toml b/pyproject.toml index 074d70b..c01c09b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,14 +18,9 @@ dependencies = [ gui = ["PySide6>=6.5.0"] realsense = ["pyrealsense2"] -[project.scripts] -saqr = "saqr.apps.saqr_cli:main" -saqr-detect = "saqr.apps.detect_cli:main" -saqr-train = "saqr.apps.train_cli:main" -saqr-manager = "saqr.apps.manager_cli:main" -saqr-view-stream = "saqr.apps.view_stream:main" -saqr-gui = "saqr.gui.app:main" -saqr-bridge = "saqr.robot.bridge:main" +# No console scripts — the project is launched through scripts/start_saqr.sh +# (which calls `python -m robot.bridge`). That bridge then spawns +# `python -m apps.saqr_cli` as a subprocess. Everything else is a module. [tool.setuptools.packages.find] -include = ["saqr*"] +include = ["core*", "apps*", "gui*", "robot*", "utils*"] diff --git a/robot/__init__.py b/robot/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/robot/audio_player.py b/robot/audio_player.py new file mode 100644 index 0000000..6bcd55f --- /dev/null +++ b/robot/audio_player.py @@ -0,0 +1,181 @@ +"""Plays pre-recorded WAV clips via AudioClient.PlayStream — bypasses TtsMaker. + +Expected WAV format (required by the G1 audio channel): + 16 kHz, mono, 16-bit signed PCM. + +Library layout under assets/audio/: + fixed/.wav e.g. safe.wav, unsafe_generic.wav, ready.wav + unsafe_missing/.wav e.g. helmet.wav, vest.wav, helmet_vest.wav + (key is sorted-joined PPE names, "_" separator) + +Callers look up by (category, key). Missing clip → returns False so the caller +can fall back to TtsMaker. +""" +from __future__ import annotations + +import datetime +import json +import time +import wave +from pathlib import Path +from typing import Dict, Optional, Tuple + +from core.paths import PROJECT_ROOT + +AUDIO_ROOT = PROJECT_ROOT / "assets" / "audio" + + +def _ts() -> str: + return datetime.datetime.now().strftime("%H:%M:%S.%f")[:-3] + +EXPECTED_RATE = 16000 +EXPECTED_CHANNELS = 1 +EXPECTED_WIDTH = 2 # bytes per sample (int16) +PLAY_CHUNK_BYTES = 96000 # 3 s per PlayStream call (matches the Unitree example) +PLAY_APP_NAME = "saqr_audio" + +# The G1 arm action and audio stack share a firmware busy state. If we try to +# PlayStream while an arm action is still being processed, chunk 0 is often +# rejected with rc=3104. Later chunks can fail too if the firmware hasn't +# fully processed the previous chunk. Retry both cases with back-off. +CHUNK0_RETRIES = 4 +CHUNK0_BACKOFF_S = 1.0 # 1,2,3,4 s — total ≈ 10 s, covers a full arm cycle +CHUNKN_RETRIES = 2 +CHUNKN_BACKOFF_S = 1.0 # 1,2 s — firmware usually clears within 1-2 s +PRE_STREAM_SLEEP = 0.1 # pause after PlayStop before first PlayStream +INTER_CHUNK_MARGIN = 0.1 # extra sleep after each chunk's audio duration + + +def _read_wav_pcm(path: Path) -> Optional[bytes]: + """Return the raw PCM bytes if the WAV matches the expected format, else None.""" + try: + with wave.open(str(path), "rb") as wf: + ch = wf.getnchannels() + sw = wf.getsampwidth() + fr = wf.getframerate() + if ch != EXPECTED_CHANNELS or sw != EXPECTED_WIDTH or fr != EXPECTED_RATE: + print( + f"[audio_player {_ts()}][WARN] {path}: expected " + f"{EXPECTED_RATE} Hz mono 16-bit; got " + f"{fr} Hz {ch}-ch {sw*8}-bit. Skipping.", + flush=True, + ) + return None + return wf.readframes(wf.getnframes()) + except Exception as e: + print(f"[audio_player {_ts()}][WARN] failed to load {path}: {e}", flush=True) + return None + + +class AudioPlayer: + """Loads WAVs under ``assets/audio//.wav`` and plays them on the G1.""" + + def __init__(self, audio_client): + self.audio_client = audio_client + self._clips: Dict[Tuple[str, str], bytes] = {} + self._load_all() + if self._clips: + print(f"[audio_player {_ts()}] loaded {len(self._clips)} clip(s): " + f"{sorted(self._clips.keys())}", flush=True) + else: + print(f"[audio_player {_ts()}] no clips found under {AUDIO_ROOT}", flush=True) + + # ── library ───────────────────────────────────────────────────────────── + def _load_all(self) -> None: + if not AUDIO_ROOT.exists(): + return + for category_dir in sorted(AUDIO_ROOT.iterdir()): + if not category_dir.is_dir(): + continue + for wav_path in sorted(category_dir.glob("*.wav")): + pcm = _read_wav_pcm(wav_path) + if pcm is not None: + self._clips[(category_dir.name, wav_path.stem)] = pcm + + def has(self, category: str, key: str) -> bool: + return (category, key) in self._clips + + # ── playback ──────────────────────────────────────────────────────────── + def play(self, category: str, key: str) -> bool: + """Blocking play. Returns True on success, False if clip missing / failed.""" + pcm = self._clips.get((category, key)) + if pcm is None: + return False + if self.audio_client is None: + return False + + # Re-assert max volume on every play; the firmware sometimes resets + # between sessions or after certain events. Matches the Unitree + # reference script's pattern. + try: + self.audio_client.SetVolume(100) + except Exception: + pass + + # Cancel any in-flight stream from a previous call. + try: + from unitree_sdk2py.g1.audio.g1_audio_api import ROBOT_API_ID_AUDIO_STOP_PLAY + self.audio_client._Call( + ROBOT_API_ID_AUDIO_STOP_PLAY, + json.dumps({"app_name": PLAY_APP_NAME}), + ) + except Exception: + pass + time.sleep(PRE_STREAM_SLEEP) + + sid = f"saqr_{int(time.time() * 1000)}" + offset = 0 + chunk0_attempts = 0 + chunkn_attempts = 0 + while offset < len(pcm): + chunk = pcm[offset:offset + PLAY_CHUNK_BYTES] + code, _ = self.audio_client.PlayStream(PLAY_APP_NAME, sid, chunk) + + if code != 0: + # chunk 0: likely firmware busy (arm motion colliding with audio). + # retry with linear back-off; new sid so firmware sees a fresh stream. + if offset == 0 and chunk0_attempts < CHUNK0_RETRIES: + chunk0_attempts += 1 + delay = CHUNK0_BACKOFF_S * chunk0_attempts + print(f"[audio_player {_ts()}][WARN] PlayStream rc={code} at byte 0; " + f"retry {chunk0_attempts}/{CHUNK0_RETRIES} in {delay:.1f}s", + flush=True) + time.sleep(delay) + sid = f"saqr_{int(time.time() * 1000)}" + continue + + # later chunks: firmware is still processing the previous chunk. + # Retry a couple of times with back-off, keeping the same sid. + if offset > 0 and chunkn_attempts < CHUNKN_RETRIES: + chunkn_attempts += 1 + delay = CHUNKN_BACKOFF_S * chunkn_attempts + print(f"[audio_player {_ts()}][WARN] PlayStream rc={code} at byte {offset}; " + f"mid-stream retry {chunkn_attempts}/{CHUNKN_RETRIES} in {delay:.1f}s", + flush=True) + time.sleep(delay) + continue + + print(f"[audio_player {_ts()}][WARN] PlayStream rc={code} at byte {offset} " + f"(retries exhausted)", flush=True) + return False + + if offset == 0 and chunk0_attempts > 0: + print(f"[audio_player {_ts()}] chunk 0 succeeded after " + f"{chunk0_attempts} retry/retries", flush=True) + elif offset > 0 and chunkn_attempts > 0: + print(f"[audio_player {_ts()}] chunk at byte {offset} succeeded after " + f"{chunkn_attempts} retry/retries", flush=True) + chunkn_attempts = 0 # reset for any subsequent chunk + + offset += len(chunk) + # Wait for the chunk to finish playing before sending the next. + chunk_seconds = len(chunk) / (EXPECTED_RATE * EXPECTED_WIDTH) + time.sleep(chunk_seconds + INTER_CHUNK_MARGIN) + + # Short settle time after the final chunk before PlayStop. + time.sleep(0.2) + try: + self.audio_client.PlayStop(PLAY_APP_NAME) + except Exception: + pass + return True diff --git a/saqr/robot/bridge.py b/robot/bridge.py similarity index 67% rename from saqr/robot/bridge.py rename to robot/bridge.py index 75022d9..000241d 100644 --- a/saqr/robot/bridge.py +++ b/robot/bridge.py @@ -11,7 +11,7 @@ R2+Y stops it. While saqr is running the bridge parses its event stream and: See docs/DEPLOY.md for wireless-remote workflow and systemd deploy notes. -Saqr event line format (from saqr.core.events.emit_event): +Saqr event line format (from core.events.emit_event): ID 0001 | NEW | UNSAFE | wearing: ... | missing: ... | unknown: ... ID 0001 | STATUS_CHANGE | SAFE | wearing: ... | missing: ... | unknown: ... """ @@ -27,32 +27,42 @@ import threading import time from typing import Dict, Optional -from saqr.core.paths import PROJECT_ROOT -from saqr.robot.robot_controller import RobotController +from core.paths import PROJECT_ROOT +from robot.robot_controller import RobotController +from utils.config import load_config + +import datetime + + +def _ts() -> str: + return datetime.datetime.now().strftime("%H:%M:%S.%f")[:-3] + +_ROBOT = load_config("robot") +_BRIDGE = _ROBOT["bridge"] +_PHRASES = _ROBOT["phrases"] DANGER_STATUS = "UNSAFE" SAFE_STATUS = "SAFE" -# speaker_id=2 was confirmed English on current G1 firmware via -# Project/Sanad/voice_example.py mode 6. speaker_id=0 is Chinese. -TTS_SPEAKER_ID = 2 +# speaker_id is locked to a language by G1 firmware: 2=English, 0=Chinese. +# Confirmed via Project/Sanad/voice_example.py mode 6. +TTS_SPEAKER_ID = _ROBOT["tts"]["speaker_id"] -TTS_TEXT_SAFE = "Safe to enter. Have a good day." -TTS_UNSAFE_WITH_MISSING = ( - "Please stop. Wear your proper safety equipment. You are missing {items}." -) -TTS_UNSAFE_GENERIC = "Please stop. Wear your proper safety equipment." -TTS_BRIDGE_DEACTIVATED = "Saqr deactivated." -TTS_BRIDGE_READY = "Saqr is running. Press R2 plus X to start." -TTS_BRIDGE_NO_CAMERA = ( - "Camera not connected. Please plug in the camera and try again." -) +TTS_TEXT_SAFE = _PHRASES["safe"] +TTS_UNSAFE_WITH_MISSING = _PHRASES["unsafe_missing"] +TTS_UNSAFE_GENERIC = _PHRASES["unsafe_generic"] +TTS_BRIDGE_DEACTIVATED = _PHRASES["deactivated"] +TTS_BRIDGE_READY = _PHRASES["ready"] +TTS_BRIDGE_NO_CAMERA = _PHRASES["no_camera"] -QUICK_FAIL_WINDOW_S = 8.0 +QUICK_FAIL_WINDOW_S = _BRIDGE["quick_fail_window"] +TRIGGER_POLL_HZ = _BRIDGE["trigger_poll_hz"] -# ID NNNN | EVENT_TYPE | STATUS | wearing: ... | missing: ... | unknown: ... +# [HH:MM:SS.fff] ID NNNN | EVENT_TYPE | STATUS | wearing: ... | missing: ... | unknown: ... +# The leading timestamp is optional for backwards compatibility with old logs. EVENT_RE = re.compile( - r"^ID\s+(?P\d+)\s*\|\s*" + r"^(?:\[[\d:.]+\]\s+)?" + r"ID\s+(?P\d+)\s*\|\s*" r"(?PNEW|STATUS_CHANGE)\s*\|\s*" r"(?PSAFE|PARTIAL|UNSAFE)\s*\|\s*" r"wearing:\s*(?P[^|]*?)\s*\|\s*" @@ -86,7 +96,7 @@ def build_unsafe_tts(missing: list) -> str: def build_saqr_cmd(saqr_extra_args: list) -> list: """Invoke the saqr CLI via ``python -m`` so it picks up the package layout.""" - return [sys.executable, "-u", "-m", "saqr.apps.saqr_cli", *saqr_extra_args] + return [sys.executable, "-u", "-m", "apps.saqr_cli", *saqr_extra_args] def split_argv(argv): @@ -101,10 +111,12 @@ class Bridge: def __init__(self, robot: RobotController, cooldown_s: float, release_after_s: float, saqr_args: list, - env: Dict[str, str], cwd: str): + env: Dict[str, str], cwd: str, + audio_lead_s: float = 0.3): self.robot = robot self.cooldown_s = cooldown_s self.release_after_s = release_after_s + self.audio_lead_s = audio_lead_s self.saqr_args = saqr_args self.env = env self.cwd = cwd @@ -125,11 +137,11 @@ class Bridge: def start_saqr(self): with self._proc_lock: if self.proc is not None and self.proc.poll() is None: - print("[BRIDGE] start ignored — saqr already running", flush=True) + print(f"[BRIDGE {_ts()}] start ignored — saqr already running", flush=True) return cmd = build_saqr_cmd(self.saqr_args) - print(f"[BRIDGE] starting saqr: {' '.join(cmd)}", flush=True) + print(f"[BRIDGE {_ts()}] starting saqr: {' '.join(cmd)}", flush=True) self.proc = subprocess.Popen( cmd, cwd=self.cwd, @@ -154,10 +166,10 @@ class Bridge: with self._proc_lock: proc = self.proc if proc is None or proc.poll() is not None: - print("[BRIDGE] stop ignored — saqr not running", flush=True) + print(f"[BRIDGE {_ts()}] stop ignored — saqr not running", flush=True) self.proc = None return - print("[BRIDGE] stopping saqr (SIGINT)", flush=True) + print(f"[BRIDGE {_ts()}] stopping saqr (SIGINT)", flush=True) try: proc.send_signal(signal.SIGINT) except Exception: @@ -166,12 +178,12 @@ class Bridge: try: proc.wait(timeout=3.0) except subprocess.TimeoutExpired: - print("[BRIDGE] saqr did not exit in 3s, sending SIGTERM", flush=True) + print(f"[BRIDGE {_ts()}] saqr did not exit in 3s, sending SIGTERM", flush=True) try: proc.terminate() proc.wait(timeout=2.0) except subprocess.TimeoutExpired: - print("[BRIDGE] saqr unresponsive, sending SIGKILL", flush=True) + print(f"[BRIDGE {_ts()}] saqr unresponsive, sending SIGKILL", flush=True) proc.kill() proc.wait() @@ -182,7 +194,7 @@ class Bridge: self.proc = None self.reader_thread = None - self.robot.speak(TTS_BRIDGE_DEACTIVATED) + self.robot.speak(TTS_BRIDGE_DEACTIVATED, category="fixed", key="deactivated") def _read_stdout(self, proc: subprocess.Popen): start_t = self._proc_start_t @@ -191,17 +203,17 @@ class Bridge: for line in proc.stdout: self.handle_line(line) except Exception as e: - print(f"[BRIDGE][ERR] reader thread: {e}", flush=True) + print(f"[BRIDGE {_ts()}][ERR] reader thread: {e}", flush=True) rc = proc.wait() lifetime = time.time() - start_t if start_t > 0 else 0.0 - print(f"[BRIDGE] saqr exited rc={rc} (lifetime={lifetime:.1f}s)", + print(f"[BRIDGE {_ts()}] saqr exited rc={rc} (lifetime={lifetime:.1f}s)", flush=True) if rc not in (0, -2) and 0 < lifetime < QUICK_FAIL_WINDOW_S: try: - self.robot.speak(TTS_BRIDGE_NO_CAMERA) + self.robot.speak(TTS_BRIDGE_NO_CAMERA, category="fixed", key="no_camera") except Exception as e: - print(f"[BRIDGE][ERR] no-camera tts failed: {e}", flush=True) + print(f"[BRIDGE {_ts()}][ERR] no-camera tts failed: {e}", flush=True) def handle_line(self, line: str): line = line.rstrip() @@ -235,21 +247,33 @@ class Bridge: try: if status == DANGER_STATUS: - self.robot.speak(build_unsafe_tts(missing)) + # Fire audio first, give the worker thread a head start so + # PlayStream reaches the firmware BEFORE the arm command. + # Once audio is actively playing, the arm command queues + # behind it without blocking playback → audible overlap. + if missing: + key = "_".join(sorted(missing)) + self.robot.speak(build_unsafe_tts(missing), + category="unsafe_missing", key=key) + else: + self.robot.speak(TTS_UNSAFE_GENERIC, + category="fixed", key="unsafe_generic") + time.sleep(self.audio_lead_s) self.robot.reject(release_after=self.release_after_s) else: - self.robot.speak(TTS_TEXT_SAFE) + self.robot.speak(TTS_TEXT_SAFE, + category="fixed", key="safe") except Exception as e: - print(f"[BRIDGE][ERR] robot action failed: {e}", flush=True) + print(f"[BRIDGE {_ts()}][ERR] robot action failed: {e}", flush=True) def trigger_loop(bridge: Bridge, hub, stop_event: threading.Event, - poll_hz: float = 50.0): + poll_hz: float = TRIGGER_POLL_HZ): """Watch the wireless remote for R2+X (start) and R2+Y (stop).""" period = 1.0 / max(poll_hz, 1.0) waiting_release_x = False waiting_release_y = False - print("[BRIDGE] trigger loop ready — press R2+X to start, R2+Y to stop.", + print(f"[BRIDGE {_ts()}] trigger loop ready — press R2+X to start, R2+Y to stop.", flush=True) while not stop_event.is_set(): time.sleep(period) @@ -264,22 +288,22 @@ def trigger_loop(bridge: Bridge, hub, stop_event: threading.Event, waiting_release_x = False elif r2x: waiting_release_x = True - print("[BRIDGE] R2+X pressed -> start saqr", flush=True) + print(f"[BRIDGE {_ts()}] R2+X pressed -> start saqr", flush=True) try: bridge.start_saqr() except Exception as e: - print(f"[BRIDGE][ERR] start_saqr failed: {e}", flush=True) + print(f"[BRIDGE {_ts()}][ERR] start_saqr failed: {e}", flush=True) if waiting_release_y: if not r2y: waiting_release_y = False elif r2y: waiting_release_y = True - print("[BRIDGE] R2+Y pressed -> stop saqr", flush=True) + print(f"[BRIDGE {_ts()}] R2+Y pressed -> stop saqr", flush=True) try: bridge.stop_saqr() except Exception as e: - print(f"[BRIDGE][ERR] stop_saqr failed: {e}", flush=True) + print(f"[BRIDGE {_ts()}][ERR] stop_saqr failed: {e}", flush=True) def main(): @@ -288,14 +312,14 @@ def main(): ap = argparse.ArgumentParser( description="Bridge Saqr PPE events to the G1 arm 'reject' action." ) - ap.add_argument("--iface", default=None, + ap.add_argument("--iface", default=_BRIDGE["iface"], help="DDS network interface (e.g. eth0).") - ap.add_argument("--timeout", type=float, default=10.0) - ap.add_argument("--cooldown", type=float, default=8.0) - ap.add_argument("--release-after", type=float, default=2.0) - ap.add_argument("--dry-run", action="store_true") - ap.add_argument("--speaker-id", type=int, default=TTS_SPEAKER_ID) - ap.add_argument("--no-trigger", action="store_true") + ap.add_argument("--timeout", type=float, default=_BRIDGE["timeout"]) + ap.add_argument("--cooldown", type=float, default=_BRIDGE["cooldown"]) + ap.add_argument("--release-after", type=float, default=_BRIDGE["release_after"]) + ap.add_argument("--dry-run", action="store_true") + ap.add_argument("--speaker-id", type=int, default=TTS_SPEAKER_ID) + ap.add_argument("--no-trigger", action="store_true") ap.add_argument("--source", default=None) ap.add_argument("--headless", action="store_true") @@ -335,18 +359,19 @@ def main(): robot=robot, cooldown_s=args.cooldown, release_after_s=args.release_after, + audio_lead_s=float(_BRIDGE.get("audio_lead_s", 0.3)), saqr_args=saqr_args, env=env, cwd=str(PROJECT_ROOT), ) - print(f"[BRIDGE] saqr cmd template: {' '.join(build_saqr_cmd(saqr_args))}", + print(f"[BRIDGE {_ts()}] saqr cmd template: {' '.join(build_saqr_cmd(saqr_args))}", flush=True) - print(f"[BRIDGE] cwd: {PROJECT_ROOT}", flush=True) + print(f"[BRIDGE {_ts()}] cwd: {PROJECT_ROOT}", flush=True) stop_event = threading.Event() def _forward_signal(signum, _frame): - print(f"[BRIDGE] signal {signum} -> shutting down", flush=True) + print(f"[BRIDGE {_ts()}] signal {signum} -> shutting down", flush=True) stop_event.set() signal.signal(signal.SIGINT, _forward_signal) @@ -354,16 +379,16 @@ def main(): have_hub = use_trigger and robot.hub is not None if use_trigger and not have_hub: - print("[BRIDGE][WARN] --no-trigger not set, but no LowState hub is " + print(f"[BRIDGE {_ts()}][WARN] --no-trigger not set, but no LowState hub is " "available. Falling back to legacy auto-start mode.", flush=True) trigger_thread: Optional[threading.Thread] = None try: if have_hub: try: - robot.speak(TTS_BRIDGE_READY) + robot.speak(TTS_BRIDGE_READY, category="fixed", key="ready") except Exception as e: - print(f"[BRIDGE][WARN] startup announce failed: {e}", flush=True) + print(f"[BRIDGE {_ts()}][WARN] startup announce failed: {e}", flush=True) trigger_thread = threading.Thread( target=trigger_loop, @@ -388,7 +413,7 @@ def main(): robot.shutdown_tts() except Exception: pass - print("[BRIDGE] bye.", flush=True) + print(f"[BRIDGE {_ts()}] bye.", flush=True) sys.exit(0) diff --git a/saqr/robot/controller.py b/robot/controller.py similarity index 100% rename from saqr/robot/controller.py rename to robot/controller.py diff --git a/saqr/robot/robot_controller.py b/robot/robot_controller.py similarity index 51% rename from saqr/robot/robot_controller.py rename to robot/robot_controller.py index 03014bc..afe19be 100644 --- a/saqr/robot/robot_controller.py +++ b/robot/robot_controller.py @@ -1,8 +1,10 @@ """G1 arm + audio + LowState DDS client owned by the bridge. -A dedicated TTS worker thread paces ``TtsMaker`` calls so overlapping phrases -don't trip the SDK's "device busy" error (3104). The busy multiplier adapts -up on 3104s and decays on clean calls. +Announcements run on a dedicated worker thread. Each queue item is a tuple +``(text, category, key)``. The worker picks WAV playback via +``AudioClient.PlayStream`` when the clip exists under ``assets/audio/`` and +``tts.mode`` allows, otherwise falls back to ``TtsMaker`` with the adaptive +busy-factor backoff for 3104 ("device busy") errors. """ from __future__ import annotations @@ -10,20 +12,28 @@ import collections import datetime import threading import time -from typing import Deque, Optional +from typing import Deque, Optional, Tuple -TTS_VOLUME = 100 +from utils.config import load_config -TTS_SECONDS_PER_CHAR = 0.12 -TTS_MIN_SECONDS = 2.5 -TTS_QUEUE_MAX = 4 -TTS_BUSY_FACTOR_MIN = 1.0 -TTS_BUSY_FACTOR_MAX = 2.5 -TTS_BUSY_FACTOR_UP = 1.20 -TTS_BUSY_FACTOR_DOWN = 0.97 +_ROBOT = load_config("robot") +_TTS = _ROBOT["tts"] +_ARM = _ROBOT["arm"] -REJECT_ACTION = "reject" -RELEASE_ACTION = "release arm" +TTS_VOLUME = _TTS["volume"] +TTS_SECONDS_PER_CHAR = _TTS["seconds_per_char"] +TTS_MIN_SECONDS = _TTS["min_seconds"] +TTS_QUEUE_MAX = _TTS["queue_max"] +TTS_BUSY_FACTOR_MIN = _TTS["busy_factor"]["min"] +TTS_BUSY_FACTOR_MAX = _TTS["busy_factor"]["max"] +TTS_BUSY_FACTOR_UP = _TTS["busy_factor"]["up"] +TTS_BUSY_FACTOR_DOWN = _TTS["busy_factor"]["down"] +TTS_MODE = _TTS.get("mode", "tts_only") # tts_only | recorded_or_tts | recorded_only + +REJECT_ACTION = _ARM["reject_action"] +RELEASE_ACTION = _ARM["release_action"] + +QueueItem = Tuple[str, Optional[str], Optional[str]] # (text, category, key) def _ts() -> str: @@ -31,11 +41,7 @@ def _ts() -> str: class RobotController: - """Owns both the G1 arm action client and the G1 audio (TTS) client. - - A single ``ChannelFactoryInitialize`` call is shared by both clients and - the optional ``rt/lowstate`` subscriber used by the wireless-remote loop. - """ + """Owns both the G1 arm action client and the G1 audio (TTS + PlayStream) client.""" def __init__(self, iface: Optional[str], timeout: float, dry_run: bool, tts_speaker_id: int, want_lowstate: bool = True): @@ -46,8 +52,9 @@ class RobotController: self._action_map = None self.hub = None self._lowstate_sub = None + self._player = None # AudioPlayer, lazily initialised - self._tts_queue: Deque[str] = collections.deque(maxlen=TTS_QUEUE_MAX) + self._tts_queue: Deque[QueueItem] = collections.deque(maxlen=TTS_QUEUE_MAX) self._tts_event = threading.Event() self._tts_worker_stop = threading.Event() self._tts_worker_thread: Optional[threading.Thread] = None @@ -57,7 +64,7 @@ class RobotController: self._tts_busy_count: int = 0 if dry_run: - print("[BRIDGE] DRY RUN — G1 SDK will not be loaded.", flush=True) + print(f"[BRIDGE {_ts()}] DRY RUN — G1 SDK will not be loaded.", flush=True) return from unitree_sdk2py.core.channel import ChannelFactoryInitialize @@ -77,7 +84,7 @@ class RobotController: self.arm_client = G1ArmActionClient() self.arm_client.SetTimeout(timeout) self.arm_client.Init() - print(f"[BRIDGE] G1ArmActionClient ready (iface={iface or 'default'})", + print(f"[BRIDGE {_ts()}] G1ArmActionClient ready (iface={iface or 'default'})", flush=True) self.audio_client = AudioClient() @@ -86,9 +93,13 @@ class RobotController: try: self.audio_client.SetVolume(TTS_VOLUME) except Exception as e: - print(f"[BRIDGE][WARN] AudioClient.SetVolume failed: {e}", flush=True) - print(f"[BRIDGE] G1 AudioClient ready (speaker_id={tts_speaker_id})", - flush=True) + print(f"[BRIDGE {_ts()}][WARN] AudioClient.SetVolume failed: {e}", flush=True) + print(f"[BRIDGE {_ts()}] G1 AudioClient ready (speaker_id={tts_speaker_id}, " + f"tts_mode={TTS_MODE})", flush=True) + + # Pre-recorded clip library (WAVs under assets/audio/). + from robot.audio_player import AudioPlayer + self._player = AudioPlayer(self.audio_client) self._tts_worker_thread = threading.Thread( target=self._tts_worker_loop, name="TtsWorker", daemon=True, @@ -99,35 +110,40 @@ class RobotController: try: from unitree_sdk2py.core.channel import ChannelSubscriber from unitree_sdk2py.idl.unitree_hg.msg.dds_ import LowState_ - from saqr.robot.controller import LowStateHub + from robot.controller import LowStateHub self.hub = LowStateHub(watchdog_timeout=0.25) self._lowstate_sub = ChannelSubscriber("rt/lowstate", LowState_) self._lowstate_sub.Init(self.hub.handler, 10) - print("[BRIDGE] Subscribed to rt/lowstate (wireless remote)", + print(f"[BRIDGE {_ts()}] Subscribed to rt/lowstate (wireless remote)", flush=True) except Exception as e: - print(f"[BRIDGE][WARN] LowState subscribe failed: {e}", flush=True) - print("[BRIDGE][WARN] Trigger keys (R2+X / R2+Y) will not work.", + print(f"[BRIDGE {_ts()}][WARN] LowState subscribe failed: {e}", flush=True) + print(f"[BRIDGE {_ts()}][WARN] Trigger keys (R2+X / R2+Y) will not work.", flush=True) self.hub = None - # ── TTS ───────────────────────────────────────────────────────────────── - def _estimate_tts_seconds(self, text: str) -> float: - base = max(TTS_MIN_SECONDS, len(text) * TTS_SECONDS_PER_CHAR) - return base * self._tts_busy_factor + # ── Public API ────────────────────────────────────────────────────────── + def speak(self, text: str, + category: Optional[str] = None, key: Optional[str] = None): + """Non-blocking — enqueue an announcement for the worker thread. - def speak(self, text: str): - """Non-blocking — enqueue the phrase for the worker thread.""" + If ``category`` + ``key`` are provided and a matching WAV exists under + ``assets/audio//.wav``, the worker plays the WAV via + PlayStream (no firmware TTS latency). Otherwise falls back per + ``tts.mode`` setting. + """ if self.dry_run: - print(f"[BRIDGE] (dry) would TtsMaker({text!r}, " - f"speaker_id={self.tts_speaker_id})", flush=True) + print(f"[BRIDGE {_ts()}] (dry) would speak({text!r}, category={category!r}, " + f"key={key!r})", flush=True) return if self.audio_client is None: return - if self._tts_queue and self._tts_queue[-1] == text: + item: QueueItem = (text, category, key) + # Drop adjacent duplicates (same text + routing). + if self._tts_queue and self._tts_queue[-1] == item: return - self._tts_queue.append(text) + self._tts_queue.append(item) self._tts_event.set() def shutdown_tts(self): @@ -136,6 +152,7 @@ class RobotController: if self._tts_worker_thread is not None: self._tts_worker_thread.join(timeout=1.0) + # ── Worker thread ─────────────────────────────────────────────────────── def _tts_worker_loop(self): while not self._tts_worker_stop.is_set(): if not self._tts_queue: @@ -143,10 +160,49 @@ class RobotController: self._tts_event.clear() continue try: - text = self._tts_queue.popleft() + item = self._tts_queue.popleft() except IndexError: continue - self._speak_blocking(text) + self._dispatch(*item) + + def _dispatch(self, text: str, category: Optional[str], key: Optional[str]): + """Route one queue item to PlayStream or TtsMaker per ``tts.mode``.""" + wants_clip = ( + TTS_MODE in ("recorded_or_tts", "recorded_only") + and category is not None and key is not None + and self._player is not None + and self._player.has(category, key) + ) + if wants_clip: + print(f"[BRIDGE {_ts()}] play -> {category}/{key!r} " + f"(text={text!r})", flush=True) + call_t0 = time.monotonic() + ok = self._player.play(category, key) + dt = time.monotonic() - call_t0 + if ok: + print(f"[BRIDGE {_ts()}] play done ({dt*1000:.0f} ms)", flush=True) + return + # Play failed. Decide by mode whether to fall back to TtsMaker. + if TTS_MODE == "recorded_only": + print(f"[BRIDGE {_ts()}][WARN] play failed and tts.mode=recorded_only " + f"— dropping phrase silently", flush=True) + return + print(f"[BRIDGE {_ts()}][WARN] play failed; falling back to TtsMaker", + flush=True) + # fall through to TtsMaker + + if TTS_MODE == "recorded_only": + # No clip exists for this phrase and user opted out of TtsMaker. + print(f"[BRIDGE {_ts()}] skip (recorded_only, no clip for " + f"{category}/{key!r}): {text!r}", flush=True) + return + + self._speak_blocking(text) + + # ── TtsMaker path (fallback + legacy) ─────────────────────────────────── + def _estimate_tts_seconds(self, text: str) -> float: + base = max(TTS_MIN_SECONDS, len(text) * TTS_SECONDS_PER_CHAR) + return base * self._tts_busy_factor def _speak_blocking(self, text: str): if self.audio_client is None: @@ -199,18 +255,18 @@ class RobotController: # ── Arm ───────────────────────────────────────────────────────────────── def reject(self, release_after: float): if self.dry_run: - print(f"[BRIDGE] (dry) would run '{REJECT_ACTION}' " + print(f"[BRIDGE {_ts()}] (dry) would run '{REJECT_ACTION}' " f"then release after {release_after:.1f}s", flush=True) return if self.arm_client is None or self._action_map is None: return if REJECT_ACTION not in self._action_map: - print(f"[BRIDGE][ERR] '{REJECT_ACTION}' not in SDK action_map", + print(f"[BRIDGE {_ts()}][ERR] '{REJECT_ACTION}' not in SDK action_map", flush=True) return - print(f"[BRIDGE] -> {REJECT_ACTION}", flush=True) + print(f"[BRIDGE {_ts()}] -> {REJECT_ACTION}", flush=True) self.arm_client.ExecuteAction(self._action_map[REJECT_ACTION]) if release_after > 0: time.sleep(release_after) - print(f"[BRIDGE] -> {RELEASE_ACTION}", flush=True) + print(f"[BRIDGE {_ts()}] -> {RELEASE_ACTION}", flush=True) self.arm_client.ExecuteAction(self._action_map[RELEASE_ACTION]) diff --git a/saqr/__init__.py b/saqr/__init__.py deleted file mode 100644 index 200f45d..0000000 --- a/saqr/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -"""Saqr — PPE safety detection and G1 humanoid integration.""" - -__version__ = "0.1.0" diff --git a/saqr/core/camera.py b/saqr/core/camera.py deleted file mode 100644 index 5ed53bd..0000000 --- a/saqr/core/camera.py +++ /dev/null @@ -1,88 +0,0 @@ -"""Camera backends: RealSense SDK and OpenCV V4L2.""" -from __future__ import annotations - -from typing import Optional - -import cv2 -import numpy as np - -from saqr.utils.logger import get_logger - -log = get_logger("Inference", "camera") - -try: - import pyrealsense2 as rs - HAS_REALSENSE = True -except ImportError: - HAS_REALSENSE = False - - -class RealSenseCapture: - """pyrealsense2 pipeline with an OpenCV-like read() interface.""" - - def __init__(self, width: int = 640, height: int = 480, fps: int = 30, - serial: Optional[str] = None): - if not HAS_REALSENSE: - raise RuntimeError("pyrealsense2 not installed") - self.pipeline = rs.pipeline() - cfg = rs.config() - if serial: - cfg.enable_device(serial) - cfg.enable_stream(rs.stream.color, width, height, rs.format.bgr8, fps) - self.profile = self.pipeline.start(cfg) - self._open = True - dev = self.profile.get_device() - log.info(f"RealSense opened | {dev.get_info(rs.camera_info.name)} " - f"serial={dev.get_info(rs.camera_info.serial_number)} " - f"{width}x{height}@{fps}") - - def isOpened(self) -> bool: - return self._open - - def read(self): - if not self._open: - return False, None - try: - frames = self.pipeline.wait_for_frames(timeout_ms=3000) - color = frames.get_color_frame() - if not color: - return False, None - return True, np.asanyarray(color.get_data()) - except Exception: - return False, None - - def release(self): - if self._open: - self.pipeline.stop() - self._open = False - - -def open_capture(source: str): - if source.lower().startswith("realsense"): - serial = None - if ":" in source: - serial = source.split(":", 1)[1] - return RealSenseCapture(width=640, height=480, fps=30, serial=serial) - - if str(source).isdigit(): - idx = int(source) - cap = cv2.VideoCapture(idx) - if cap.isOpened(): - return cap - cap = cv2.VideoCapture(idx, cv2.CAP_ANY) - if cap.isOpened(): - return cap - cap = cv2.VideoCapture(idx, cv2.CAP_V4L2) - return cap - - if source.startswith("/dev/video"): - cap = cv2.VideoCapture(source, cv2.CAP_V4L2) - if cap.isOpened(): - cap.set(cv2.CAP_PROP_BUFFERSIZE, 1) - cap.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*"MJPG")) - cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640) - cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480) - cap.set(cv2.CAP_PROP_FPS, 30) - return cap - - return cv2.VideoCapture(source) diff --git a/saqr/core/capture.py b/saqr/core/capture.py deleted file mode 100644 index f3c6a64..0000000 --- a/saqr/core/capture.py +++ /dev/null @@ -1,41 +0,0 @@ -"""Per-track image cropping + capture directory setup.""" -from __future__ import annotations - -from pathlib import Path -from typing import Dict, Optional - -import cv2 - -from saqr.core.detection import STATUSES -from saqr.core.geometry import clamp_bbox -from saqr.core.paths import CAPTURES_DIR - - -def setup_capture_dirs() -> Dict[str, Path]: - dirs: Dict[str, Path] = {} - for s in STATUSES: - d = CAPTURES_DIR / s - d.mkdir(parents=True, exist_ok=True) - dirs[s] = d - return dirs - - -def save_track_image(frame, track, capture_dirs: Dict[str, Path]) -> Optional[Path]: - h, w = frame.shape[:2] - x1, y1, x2, y2 = clamp_bbox(track.bbox, w, h) - if x2 <= x1 or y2 <= y1: - return None - crop = frame[y1:y2, x1:x2] - if crop.size == 0: - return None - - target = capture_dirs[track.status] / f"track_{track.track_id:04d}.jpg" - if track.photo_path and track.photo_path != target and track.photo_path.exists(): - try: - track.photo_path.unlink() - except OSError: - pass - - cv2.imwrite(str(target), crop) - track.photo_path = target - return target diff --git a/saqr/core/compliance.py b/saqr/core/compliance.py deleted file mode 100644 index 6d82560..0000000 --- a/saqr/core/compliance.py +++ /dev/null @@ -1,36 +0,0 @@ -"""SAFE / PARTIAL / UNSAFE classification (helmet + vest focus).""" -from __future__ import annotations - -from typing import Dict, List, Tuple - -from saqr.core.detection import POSITIVE_TO_NEGATIVE, PPE_DISPLAY_ORDER - - -def status_from_items(items: Dict[str, float]) -> str: - has_helmet = items.get("helmet", 0.0) > items.get("no-helmet", 0.0) and items.get("helmet", 0.0) > 0 - has_vest = items.get("vest", 0.0) > items.get("no-vest", 0.0) and items.get("vest", 0.0) > 0 - no_helmet = items.get("no-helmet", 0.0) > 0 - no_vest = items.get("no-vest", 0.0) > 0 - - if no_helmet or no_vest: - return "UNSAFE" - if has_helmet and has_vest: - return "SAFE" - if has_helmet or has_vest: - return "PARTIAL" - return "UNSAFE" - - -def split_wearing_missing(items: Dict[str, float]) -> Tuple[List[str], List[str], List[str]]: - wearing, missing, unknown = [], [], [] - for pos in PPE_DISPLAY_ORDER: - neg = POSITIVE_TO_NEGATIVE[pos] - pos_conf = items.get(pos, 0.0) - neg_conf = items.get(neg, 0.0) - if pos_conf > neg_conf and pos_conf > 0: - wearing.append(pos) - elif neg_conf >= pos_conf and neg_conf > 0: - missing.append(pos) - else: - unknown.append(pos) - return wearing, missing, unknown diff --git a/saqr/core/paths.py b/saqr/core/paths.py deleted file mode 100644 index 7cb3a4f..0000000 --- a/saqr/core/paths.py +++ /dev/null @@ -1,19 +0,0 @@ -"""Canonical project paths, resolved from the saqr package location.""" -from __future__ import annotations - -from pathlib import Path - -PROJECT_ROOT = Path(__file__).resolve().parents[2] - -CONFIG_DIR = PROJECT_ROOT / "config" -DATA_DIR = PROJECT_ROOT / "data" -DATASET_DIR = DATA_DIR / "dataset" -MODELS_DIR = DATA_DIR / "models" - -RUNTIME_DIR = PROJECT_ROOT / "runtime" -CAPTURES_DIR = RUNTIME_DIR / "captures" -LOGS_DIR = RUNTIME_DIR / "logs" -RUNS_DIR = RUNTIME_DIR / "runs" - -RESULT_CSV = CAPTURES_DIR / "result.csv" -EVENTS_CSV = CAPTURES_DIR / "events.csv" diff --git a/saqr/core/pipeline.py b/saqr/core/pipeline.py deleted file mode 100644 index bba3e81..0000000 --- a/saqr/core/pipeline.py +++ /dev/null @@ -1,40 +0,0 @@ -"""Per-frame detect + group + track + capture + emit pipeline.""" -from __future__ import annotations - -from pathlib import Path -from typing import Dict - -from saqr.core.capture import save_track_image -from saqr.core.detection import collect_detections -from saqr.core.drawing import draw_track -from saqr.core.events import emit_event, write_result_csv -from saqr.core.grouping import group_detections_to_people -from saqr.core.paths import RESULT_CSV -from saqr.core.tracking import PersonTracker - - -def process_frame(frame, model, tracker: PersonTracker, frame_idx: int, conf: float, - capture_dirs: Dict[str, Path], write_csv: bool = True): - annotated = frame.copy() - h, w = annotated.shape[:2] - - detections = collect_detections(frame, model, conf) - candidates = group_detections_to_people(detections, w, h) - created, changed = tracker.update(candidates, frame_idx) - visible = tracker.visible_tracks() - - created_ids = {t.track_id for t in created} - changed_ids = {t.track_id for t in changed} - event_ids = created_ids | changed_ids - - for track in visible: - save_track_image(frame, track, capture_dirs) - if track.track_id in event_ids: - ev_type = "NEW" if track.track_id in created_ids else "STATUS_CHANGE" - emit_event(track, tracker.event_logger, ev_type) - draw_track(annotated, track) - - if write_csv: - write_result_csv(list(tracker.tracks.values()), RESULT_CSV) - - return annotated, visible diff --git a/scripts/deploy.sh b/scripts/deploy.sh index c6786b4..ed92f9a 100755 --- a/scripts/deploy.sh +++ b/scripts/deploy.sh @@ -15,11 +15,22 @@ set -e HERE="$(cd "$(dirname "$0")" && pwd)" PROJECT_ROOT="$(cd "$HERE/.." && pwd)" -ROBOT_IP="${ROBOT_IP:-192.168.123.164}" -ROBOT_USER="${ROBOT_USER:-unitree}" -ROBOT_ENV="${ROBOT_ENV:-marcus}" -REMOTE_DIR="/home/${ROBOT_USER}/Saqr" -SSH_OPTS="-o StrictHostKeyChecking=no -o ConnectTimeout=10" +config_get() { + python3 -c " +import json, os, sys +with open('$PROJECT_ROOT/config/robot_config.json') as f: + c = json.load(f) +for k in sys.argv[1].split('.'): + c = c[k] +print(os.path.expandvars(str(c))) +" "$1" +} + +ROBOT_IP="${ROBOT_IP:-$(config_get deploy.robot_ip)}" +ROBOT_USER="${ROBOT_USER:-$(config_get deploy.robot_user)}" +ROBOT_ENV="${ROBOT_ENV:-$(config_get deploy.robot_env)}" +REMOTE_DIR="${REMOTE_DIR:-$(config_get deploy.remote_dir)}" +SSH_OPTS="${SSH_OPTS:-$(config_get deploy.ssh_opts)}" RUN_AFTER=false @@ -50,7 +61,10 @@ ssh ${SSH_OPTS} ${ROBOT_USER}@${ROBOT_IP} "echo 'Connected OK'" || { echo "[2/4] Creating remote directory..." ssh ${SSH_OPTS} ${ROBOT_USER}@${ROBOT_IP} \ - "mkdir -p ${REMOTE_DIR}/runtime/{captures/{SAFE,PARTIAL,UNSAFE},logs,runs} ${REMOTE_DIR}/data/models" + "mkdir -p ${REMOTE_DIR}/runtime/captures/{SAFE,PARTIAL,UNSAFE} \ + ${REMOTE_DIR}/runtime/runs \ + ${REMOTE_DIR}/logs \ + ${REMOTE_DIR}/data/models" echo "[3/4] Rsyncing package..." RSYNC_OPTS=( @@ -60,12 +74,16 @@ RSYNC_OPTS=( --exclude="*.egg-info" --exclude="build/" --exclude="dist/" ) rsync "${RSYNC_OPTS[@]}" \ - "${PROJECT_ROOT}/saqr" \ + "${PROJECT_ROOT}/core" \ + "${PROJECT_ROOT}/apps" \ + "${PROJECT_ROOT}/gui" \ + "${PROJECT_ROOT}/robot" \ + "${PROJECT_ROOT}/utils" \ "${PROJECT_ROOT}/scripts" \ "${PROJECT_ROOT}/config" \ "${PROJECT_ROOT}/docs" \ + "${PROJECT_ROOT}/assets" \ "${PROJECT_ROOT}/pyproject.toml" \ - "${PROJECT_ROOT}/requirements.txt" \ "${PROJECT_ROOT}/README.md" \ ${ROBOT_USER}@${ROBOT_IP}:${REMOTE_DIR}/ @@ -84,8 +102,8 @@ ssh ${SSH_OPTS} ${ROBOT_USER}@${ROBOT_IP} bash -s </dev/null || pip install -e . - chmod +x scripts/start_saqr.sh scripts/run_robot.sh scripts/run_local.sh 2>/dev/null || true + pip install -e . + chmod +x scripts/start_saqr.sh echo " Install OK" INSTALL_EOF diff --git a/scripts/generate_phrases.py b/scripts/generate_phrases.py new file mode 100644 index 0000000..07592f1 --- /dev/null +++ b/scripts/generate_phrases.py @@ -0,0 +1,178 @@ +#!/usr/bin/env python3 +""" +generate_phrases.py — synthesize Saqr's 8 phrases to WAV using piper-tts. + +This produces the audio library locally (no G1 mic, no TtsMaker capture, +no PulseAudio) at exactly the format robot.audio_player expects: +16 kHz mono int16 WAV under assets/audio//.wav. + +Setup (once): + pip install piper-tts + python -m piper.download_voices en_US-amy-medium + +Usage: + python scripts/generate_phrases.py + python scripts/generate_phrases.py --voice en_US-lessac-medium + python scripts/generate_phrases.py --only safe,helmet_vest + python scripts/generate_phrases.py --voice-dir /custom/path + +Common voices (run the download command above, swap the name): + en_US-amy-medium female, neutral (~60 MB) + en_US-lessac-medium female, friendly (~60 MB) + en_US-ryan-high male, clear (~120 MB) + en_GB-alan-medium male, British (~60 MB) +""" +from __future__ import annotations + +import argparse +import io +import sys +import wave +from pathlib import Path + +PROJECT_ROOT = Path(__file__).resolve().parent.parent +ASSETS_ROOT = PROJECT_ROOT / "assets" / "audio" + +PHRASES = [ + ("fixed", "ready", + "Saqr is running. Press R2 plus X to start."), + ("fixed", "deactivated", + "Saqr deactivated."), + ("fixed", "no_camera", + "Camera not connected. Please plug in the camera and try again."), + ("fixed", "safe", + "Safe to enter. Have a good day."), + ("fixed", "unsafe_generic", + "Please stop. Wear your proper safety equipment."), + ("unsafe_missing", "helmet", + "Please stop. Wear your proper safety equipment. You are missing helmet."), + ("unsafe_missing", "vest", + "Please stop. Wear your proper safety equipment. You are missing vest."), + ("unsafe_missing", "helmet_vest", + "Please stop. Wear your proper safety equipment. You are missing helmet and vest."), +] + + +def find_voice_files(voice_name: str, override_dir: Path = None): + """Locate .onnx + .onnx.json in piper's standard dirs.""" + search_dirs = [] + if override_dir is not None: + search_dirs.append(Path(override_dir)) + search_dirs += [ + Path.home() / ".local" / "share" / "piper-voices", + Path.home() / ".cache" / "piper" / "voices", + Path.cwd(), + ] + for d in search_dirs: + onnx = d / f"{voice_name}.onnx" + js = d / f"{voice_name}.onnx.json" + if onnx.exists() and js.exists(): + return onnx, js + return None, None + + +def resample_to_16k_mono_int16(pcm: bytes, src_rate: int, src_channels: int) -> bytes: + """Linear-resample int16 PCM to 16 kHz mono.""" + import numpy as np + a = np.frombuffer(pcm, dtype=np.int16) + if src_channels > 1: + a = a.reshape(-1, src_channels).mean(axis=1).astype(np.int16) + if src_rate == 16000: + return a.tobytes() + target_len = int(round(len(a) * 16000 / src_rate)) + idx = np.linspace(0, len(a) - 1, target_len).astype(np.int64) + return a[idx].astype(np.int16).tobytes() + + +def save_wav_16k_mono(pcm: bytes, path: Path) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + with wave.open(str(path), "wb") as wf: + wf.setnchannels(1) + wf.setsampwidth(2) + wf.setframerate(16000) + wf.writeframes(pcm) + + +def main(): + ap = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter, + ) + ap.add_argument("--voice", default="en_US-amy-medium", + help="piper voice name (default en_US-amy-medium)") + ap.add_argument("--voice-dir", default=None, + help="override directory to search for the voice .onnx files") + ap.add_argument("--only", default=None, + help="comma-separated keys to generate (e.g. safe,helmet)") + ap.add_argument("--length-scale", type=float, default=None, + help="speaking rate (piper default ~1.0; 0.8 faster, 1.2 slower)") + args = ap.parse_args() + + only = set(args.only.split(",")) if args.only else None + targets = [(c, k, t) for (c, k, t) in PHRASES if only is None or k in only] + if not targets: + print(f"No phrases match --only={args.only!r}.") + sys.exit(1) + + # Import piper + try: + from piper.voice import PiperVoice + except ImportError: + print("ERROR: piper-tts is not installed in this Python environment.") + print() + print("Install it:") + print(" pip install piper-tts") + print(f" python -m piper.download_voices {args.voice}") + sys.exit(1) + + onnx, js = find_voice_files(args.voice, args.voice_dir) + if onnx is None: + print(f"ERROR: voice files for {args.voice!r} not found.") + print() + print("Download:") + print(f" python -m piper.download_voices {args.voice}") + print() + print("Or pass --voice-dir pointing at a directory that contains") + print(f" {args.voice}.onnx + {args.voice}.onnx.json") + sys.exit(1) + + print(f"Loading voice: {onnx}") + voice = PiperVoice.load(str(onnx), config_path=str(js)) + src_rate = int(voice.config.sample_rate) + print(f"Native rate: {src_rate} Hz") + print(f"Target: 16000 Hz mono int16 under {ASSETS_ROOT}") + print() + + synth_kwargs = {} + if args.length_scale is not None: + synth_kwargs["length_scale"] = args.length_scale + + for cat, key, text in targets: + out = ASSETS_ROOT / cat / f"{key}.wav" + print(f"[{cat}/{key}]") + print(f" text : {text}") + + # Synthesize into an in-memory WAV buffer. + buf = io.BytesIO() + with wave.open(buf, "wb") as wf: + voice.synthesize(text, wf, **synth_kwargs) + buf.seek(0) + with wave.open(buf, "rb") as wf: + pcm = wf.readframes(wf.getnframes()) + rate_in = wf.getframerate() + channels = wf.getnchannels() + + resampled = resample_to_16k_mono_int16(pcm, rate_in, channels) + save_wav_16k_mono(resampled, out) + + dur = len(resampled) / 2.0 / 16000 + print(f" saved → {out.relative_to(PROJECT_ROOT)} ({dur:.1f}s)") + print() + + print("Done. Deploy + restart:") + print(" scripts/deploy.sh") + print(" ssh unitree@192.168.123.164 'sudo systemctl restart saqr-bridge && " + "journalctl -u saqr-bridge -n 20 | grep audio_player'") + + +if __name__ == "__main__": + main() diff --git a/scripts/import_voices.py b/scripts/import_voices.py new file mode 100644 index 0000000..0262ec5 --- /dev/null +++ b/scripts/import_voices.py @@ -0,0 +1,206 @@ +#!/usr/bin/env python3 +""" +import_voices.py — Convert TTS MP3s to Saqr's audio library format. + +Reads MP3s from a source directory, maps each to its correct +``assets/audio//.wav`` path, and transcodes to +16 kHz mono int16 WAV using ffmpeg. + +Matching rules: + 1. If the filename (without extension, trailing dots stripped) exactly + equals one of the known phrases, that's a direct map. + 2. Any file that doesn't match — typical case is ``ttsmaker-file-*.mp3`` + where the site didn't rename it — is assigned to whichever phrase + key is still missing. If exactly ONE phrase is unmapped and ONE + file is unmatched, they're paired. Otherwise you're prompted. + +Requires ffmpeg on PATH: sudo apt install -y ffmpeg + +Usage: + python scripts/import_voices.py + python scripts/import_voices.py --src "/home/zedx/Downloads/voices saqr" + python scripts/import_voices.py --dry-run # just show mapping +""" +from __future__ import annotations + +import argparse +import shutil +import subprocess +import sys +from pathlib import Path + +PROJECT_ROOT = Path(__file__).resolve().parent.parent +ASSETS_ROOT = PROJECT_ROOT / "assets" / "audio" +DEFAULT_SRC = Path("/home/zedx/Downloads/voices saqr") + +# (category, key, text) — filename match uses the text (with or without +# trailing period). +PHRASES = [ + ("fixed", "ready", + "Saqr is running. Press R2 plus X to start."), + ("fixed", "deactivated", + "Saqr deactivated."), + ("fixed", "no_camera", + "Camera not connected. Please plug in the camera and try again."), + ("fixed", "safe", + "Safe to enter. Have a good day."), + ("fixed", "unsafe_generic", + "Please stop. Wear your proper safety equipment."), + ("unsafe_missing", "helmet", + "Please stop. Wear your proper safety equipment. You are missing helmet."), + ("unsafe_missing", "vest", + "Please stop. Wear your proper safety equipment. You are missing vest."), + ("unsafe_missing", "helmet_vest", + "Please stop. Wear your proper safety equipment. You are missing helmet and vest."), +] + +# Text → (category, key) and normalised-text → (category, key) +TEXT_TO_KEY = {text: (cat, key) for (cat, key, text) in PHRASES} + + +def normalise(stem: str) -> str: + """Strip trailing dots so 'Saqr deactivated.' matches 'Saqr deactivated..mp3'.""" + return stem.rstrip(".").strip() + + +def match_by_filename(stem: str): + candidates = {stem, stem + ".", normalise(stem), normalise(stem) + "."} + for c in candidates: + if c in TEXT_TO_KEY: + return TEXT_TO_KEY[c] + return None + + +def convert_to_wav(src: Path, dst: Path, dry_run: bool = False) -> None: + dst.parent.mkdir(parents=True, exist_ok=True) + cmd = [ + "ffmpeg", "-y", "-loglevel", "error", + "-i", str(src), + "-ac", "1", # mono + "-ar", "16000", # 16 kHz + "-sample_fmt", "s16", # 16-bit int + str(dst), + ] + if dry_run: + return + subprocess.run(cmd, check=True) + + +def main(): + ap = argparse.ArgumentParser(description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + ap.add_argument("--src", default=str(DEFAULT_SRC), + help=f"source directory of MP3s (default: {DEFAULT_SRC!s})") + ap.add_argument("--dry-run", action="store_true", + help="print the mapping but don't convert") + args = ap.parse_args() + + if shutil.which("ffmpeg") is None and not args.dry_run: + print("ERROR: ffmpeg not on PATH. Install it first:") + print(" sudo apt install -y ffmpeg") + sys.exit(1) + + src_dir = Path(args.src) + if not src_dir.exists(): + print(f"ERROR: source directory not found: {src_dir}") + sys.exit(1) + + mp3s = sorted(src_dir.glob("*.mp3")) + if not mp3s: + print(f"No .mp3 files under {src_dir}") + sys.exit(1) + + print(f"Source: {src_dir}") + print(f"Target: {ASSETS_ROOT}") + print(f"Files: {len(mp3s)} mp3 (expected 8)") + print() + + # Pass 1: match by filename. + assigned: dict[tuple, Path] = {} # (cat,key) → src mp3 + unmatched: list[Path] = [] + for mp3 in mp3s: + key = match_by_filename(mp3.stem) + if key is None: + unmatched.append(mp3) + elif key in assigned: + print(f" [WARN] duplicate match for {key}: keeping {assigned[key].name}," + f" ignoring {mp3.name}") + else: + assigned[key] = mp3 + + # Pass 2: assign unmatched files to still-missing phrases. + missing_keys = [(cat, key) for (cat, key, _) in PHRASES if (cat, key) not in assigned] + if len(unmatched) == 1 and len(missing_keys) == 1: + only_key = missing_keys[0] + only_file = unmatched[0] + print(f" [AUTO] assigning '{only_file.name}' → {only_key[0]}/{only_key[1]} " + f"(the only phrase still unmapped)") + assigned[only_key] = only_file + unmatched = [] + missing_keys = [] + elif unmatched: + # Prompt user + print(f" [WARN] {len(unmatched)} unmatched file(s) and " + f"{len(missing_keys)} unassigned phrase(s). Manual mapping needed:") + for f in unmatched: + print(f" unmatched: {f.name}") + for (cat, key) in missing_keys: + print(f" missing: {cat}/{key}") + print() + print("Resolve manually by renaming the source files to one of:") + for (_, _, text) in PHRASES: + print(f" '{text}.mp3'") + sys.exit(1) + + # Show the plan + print("Mapping:") + for (cat, key, _) in PHRASES: + src = assigned.get((cat, key)) + if src is None: + print(f" [MISSING] no source for {cat}/{key}") + continue + rel = src.name if len(src.name) <= 60 else src.name[:57] + "..." + print(f" {rel:<60s} → assets/audio/{cat}/{key}.wav") + print() + + if args.dry_run: + print("(dry-run — nothing written)") + return + + # Convert + print("Converting…") + converted = 0 + for (cat, key, _) in PHRASES: + src = assigned.get((cat, key)) + if src is None: + continue + dst = ASSETS_ROOT / cat / f"{key}.wav" + try: + convert_to_wav(src, dst) + except subprocess.CalledProcessError as e: + print(f" [FAIL] {src.name} → {dst.name}: ffmpeg exited {e.returncode}") + continue + + # Probe the result + import wave + with wave.open(str(dst), "rb") as wf: + ok = (wf.getframerate() == 16000 + and wf.getnchannels() == 1 + and wf.getsampwidth() == 2) + dur = wf.getnframes() / wf.getframerate() + mark = "✓" if ok else "⚠ wrong format" + print(f" {mark} {dst.relative_to(PROJECT_ROOT)} ({dur:.1f}s)") + if ok: + converted += 1 + + print() + print(f"Done — {converted}/{len(PHRASES)} clip(s) converted.") + print() + print("Deploy + restart:") + print(" scripts/deploy.sh") + print(" ssh unitree@192.168.123.164 " + "'sudo systemctl restart saqr-bridge && journalctl -u saqr-bridge -n 20 | grep audio_player'") + + +if __name__ == "__main__": + main() diff --git a/scripts/record_phrases.py b/scripts/record_phrases.py new file mode 100755 index 0000000..9ed6021 --- /dev/null +++ b/scripts/record_phrases.py @@ -0,0 +1,408 @@ +#!/usr/bin/env python3 +""" +record_phrases.py — Build Saqr's audio library for AudioClient.PlayStream. + +Two modes: + + DEFAULT (TTS capture) — the script calls AudioClient.TtsMaker for each + phrase and simultaneously records the G1 speaker + output via the mic. Zero speaking required. Voice + is the G1's own TTS voice, but at runtime + PlayStream skips the firmware synthesis buffer so + it plays back ~200–700 ms faster. + + --mic (your voice) — the script shows each phrase, counts down, and + records whatever the mic hears. You speak each + line yourself. + +Both modes save 16 kHz mono int16 WAVs under +``assets/audio//.wav`` — exactly what ``robot/audio_player`` +expects. + +Usage (run on the robot or any machine on the G1 subnet): + + python3 scripts/record_phrases.py # TTS capture, all 8 + python3 scripts/record_phrases.py --only safe,helmet # just those two + python3 scripts/record_phrases.py --iface enp3s0 # custom DDS iface + python3 scripts/record_phrases.py --mic # your voice instead + python3 scripts/record_phrases.py --play # verify each clip by replaying + +Requires ``unitree_sdk2py`` in the active conda env (TTS-capture and --play). +""" +from __future__ import annotations + +import argparse +import socket +import struct +import subprocess +import sys +import threading +import time +import wave +from pathlib import Path + +PROJECT_ROOT = Path(__file__).resolve().parent.parent +ASSETS_ROOT = PROJECT_ROOT / "assets" / "audio" + +MCAST_GRP = "239.168.123.161" +MCAST_PORT = 5555 +RATE = 16000 + +# (category, key, text). +PHRASES = [ + ("fixed", "ready", + "Saqr is running. Press R2 plus X to start."), + ("fixed", "deactivated", + "Saqr deactivated."), + ("fixed", "no_camera", + "Camera not connected. Please plug in the camera and try again."), + ("fixed", "safe", + "Safe to enter. Have a good day."), + ("fixed", "unsafe_generic", + "Please stop. Wear your proper safety equipment."), + ("unsafe_missing", "helmet", + "Please stop. Wear your proper safety equipment. You are missing helmet."), + ("unsafe_missing", "vest", + "Please stop. Wear your proper safety equipment. You are missing vest."), + ("unsafe_missing", "helmet_vest", + "Please stop. Wear your proper safety equipment. You are missing helmet and vest."), +] + + +# ── mic capture ───────────────────────────────────────────────────────────── +def find_local_ip() -> str: + """Pick the first 192.168.123.x address on this machine.""" + out = subprocess.run( + ["ip", "-4", "-o", "addr"], capture_output=True, text=True + ).stdout + for line in out.splitlines(): + for tok in line.split(): + if tok.startswith("192.168.123."): + return tok.split("/")[0] + raise RuntimeError( + "No 192.168.123.x address on this machine — connect to the G1 network first." + ) + + +def _open_mcast_socket(local_ip: str) -> socket.socket: + s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + s.bind(("", MCAST_PORT)) + mreq = struct.pack("4s4s", socket.inet_aton(MCAST_GRP), socket.inet_aton(local_ip)) + s.setsockopt(socket.IPPROTO_IP, socket.IP_ADD_MEMBERSHIP, mreq) + return s + + +def record_multicast_fixed(seconds: float) -> bytes: + """Blocking mic capture for exactly ``seconds`` (used by --mic mode).""" + local_ip = find_local_ip() + sock = _open_mcast_socket(local_ip) + sock.settimeout(2.0) + target_bytes = int(RATE * 2 * seconds) + buf = bytearray() + t0 = time.time() + try: + while len(buf) < target_bytes and time.time() - t0 < seconds + 5: + try: + data, _ = sock.recvfrom(4096) + buf.extend(data) + except socket.timeout: + continue + finally: + sock.close() + return bytes(buf) + + +def record_while_tts_plays(ac, text: str, speaker_id: int, + capture_seconds: float) -> bytes: + """Start mic capture, call TtsMaker, capture for ``capture_seconds``, stop.""" + local_ip = find_local_ip() + buf = bytearray() + stop_flag = threading.Event() + + def _mic(): + sock = _open_mcast_socket(local_ip) + sock.settimeout(0.5) + try: + while not stop_flag.is_set(): + try: + data, _ = sock.recvfrom(4096) + buf.extend(data) + except socket.timeout: + continue + finally: + sock.close() + + t = threading.Thread(target=_mic, daemon=True) + t.start() + time.sleep(0.3) # let the mic socket settle before TTS fires + + try: + code = ac.TtsMaker(text, speaker_id) + except Exception as e: + print(f" [FAIL] TtsMaker raised: {e}") + stop_flag.set() + t.join(timeout=1.0) + return b"" + if code != 0: + print(f" [FAIL] TtsMaker rc={code} — retry in 2s…") + time.sleep(2.0) + try: + code = ac.TtsMaker(text, speaker_id) + except Exception as e: + print(f" [FAIL] TtsMaker retry raised: {e}") + stop_flag.set() + t.join(timeout=1.0) + return b"" + if code != 0: + print(f" [FAIL] TtsMaker retry rc={code}") + stop_flag.set() + t.join(timeout=1.0) + return b"" + + time.sleep(capture_seconds) + stop_flag.set() + t.join(timeout=1.0) + return bytes(buf) + + +# ── processing ────────────────────────────────────────────────────────────── +def rms_and_peak(pcm: bytes): + import numpy as np + a = np.frombuffer(pcm, dtype=np.int16) + if a.size == 0: + return 0.0, 0 + rms = float(np.sqrt(np.mean(a.astype(np.float32) ** 2))) + peak = int(np.abs(a).max()) + return rms, peak + + +def trim_leading_silence(pcm: bytes, threshold: int = 500, + head_ms: int = 80) -> bytes: + """Cut silence before the first sample above threshold; keep ``head_ms`` lead-in.""" + import numpy as np + a = np.frombuffer(pcm, dtype=np.int16) + above = np.where(np.abs(a) > threshold)[0] + if above.size == 0: + return pcm + start = max(0, int(above[0]) - int(head_ms / 1000.0 * RATE)) + return a[start:].tobytes() + + +def trim_trailing_silence(pcm: bytes, threshold: int = 500, + tail_ms: int = 150) -> bytes: + import numpy as np + a = np.frombuffer(pcm, dtype=np.int16) + above = np.where(np.abs(a) > threshold)[0] + if above.size == 0: + return pcm + end = int(above[-1]) + int(tail_ms / 1000.0 * RATE) + end = min(end, a.size) + return a[:end].tobytes() + + +def save_wav(pcm: bytes, path: Path) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + with wave.open(str(path), "wb") as wf: + wf.setnchannels(1) + wf.setsampwidth(2) + wf.setframerate(RATE) + wf.writeframes(pcm) + + +def estimate_tts_duration(text: str) -> float: + """Match RobotController's pacing estimate (0.12 s/char, min 2.5 s).""" + return max(2.5, len(text) * 0.12) + + +# ── optional playback verification ────────────────────────────────────────── +def play_on_g1(ac, path: Path) -> None: + with wave.open(str(path), "rb") as wf: + pcm = wf.readframes(wf.getnframes()) + CHUNK = 96000 + sid = f"verify_{int(time.time() * 1000)}" + offset = 0 + while offset < len(pcm): + chunk = pcm[offset:offset + CHUNK] + ac.PlayStream("saqr_verify", sid, chunk) + offset += len(chunk) + time.sleep(len(chunk) / (RATE * 2) / 2) + time.sleep(len(pcm) / (RATE * 2) + 0.3) + try: + ac.PlayStop("saqr_verify") + except Exception: + pass + + +def countdown(seconds: int) -> None: + for i in range(seconds, 0, -1): + print(f" starting in {i}...", end="\r", flush=True) + time.sleep(1) + print(" " + " " * 30, end="\r") + + +# ── main ──────────────────────────────────────────────────────────────────── +def main(): + ap = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter, + ) + ap.add_argument("--mic", action="store_true", + help="record YOUR voice via the G1 mic instead of TtsMaker") + ap.add_argument("--duration", type=float, default=None, + help="seconds to record per phrase (default: estimate per phrase)") + ap.add_argument("--pause", type=float, default=1.0, + help="seconds of pause between phrases (default 1)") + ap.add_argument("--only", default=None, + help="comma-separated keys to record (e.g. 'safe,helmet,vest')") + ap.add_argument("--iface", default="eth0", + help="DDS iface for TtsMaker / PlayStream (default eth0)") + ap.add_argument("--speaker-id", type=int, default=2, + help="TtsMaker speaker_id (2 = English on current firmware)") + ap.add_argument("--volume", type=int, default=100, + help="G1 speaker volume 0–100 (default 100)") + ap.add_argument("--play", action="store_true", + help="play each saved clip back on the G1 via PlayStream") + ap.add_argument("--countdown", type=int, default=2, + help="countdown seconds before --mic recordings (default 2)") + ap.add_argument("--no-trim", action="store_true", + help="don't auto-trim leading/trailing silence") + ap.add_argument("--threshold", type=int, default=500, + help="amplitude threshold for silence trim (default 500)") + args = ap.parse_args() + + only = set(args.only.split(",")) if args.only else None + targets = [(c, k, t) for (c, k, t) in PHRASES if only is None or k in only] + if not targets: + print(f"No phrases match --only={args.only!r}. Known keys: " + f"{', '.join(k for _, k, _ in PHRASES)}") + sys.exit(1) + + try: + local_ip = find_local_ip() + except RuntimeError as e: + print(f"[FATAL] {e}") + sys.exit(1) + + mode = "mic" if args.mic else "tts" + + # Init AudioClient for TTS mode or --play. + ac = None + if mode == "tts" or args.play: + try: + from unitree_sdk2py.core.channel import ChannelFactoryInitialize + from unitree_sdk2py.g1.audio.g1_audio_client import AudioClient + except ImportError as e: + print(f"[FATAL] unitree_sdk2py not available in this env: {e}") + sys.exit(1) + print(f"[init] ChannelFactoryInitialize(0, {args.iface!r})") + ChannelFactoryInitialize(0, args.iface) + ac = AudioClient() + ac.SetTimeout(10.0) + ac.Init() + try: + ac.SetVolume(args.volume) + except Exception as e: + print(f"[init][WARN] SetVolume failed: {e}") + print(f"[init] AudioClient ready (speaker_id={args.speaker_id} volume={args.volume})") + print() + + print("=" * 60) + print(f" Saqr phrase recorder — {len(targets)} clip(s)") + print(f" Mode: {mode.upper()} " + f"({'TtsMaker → mic' if mode == 'tts' else 'your voice → mic'})") + print(f" Duration: {('auto (per phrase)' if args.duration is None else f'{args.duration}s')}") + print(f" Pause: {args.pause}s between phrases") + print(f" Trim: {'off' if args.no_trim else f'on (threshold={args.threshold})'}") + print(f" Output: {ASSETS_ROOT}") + print(f" Mic feed: {MCAST_GRP}:{MCAST_PORT} (local IP: {local_ip})") + print("=" * 60) + print() + + if mode == "mic": + print("Stand within ~1 m of the G1. When you see '>>> SPEAK NOW <<<' say") + print("the SAY: line clearly. Target rms 500–5000.") + else: + print("The G1 will speak each phrase via TtsMaker while the script records") + print("through the mic. Stay quiet during capture; any room noise you hear") + print("will end up baked into the clip.") + print() + try: + input("Press Enter to start…") + except EOFError: + return + print() + + saved = [] + for i, (cat, key, text) in enumerate(targets, 1): + out = ASSETS_ROOT / cat / f"{key}.wav" + print(f"── [{i}/{len(targets)} {cat}/{key}] ──") + print(f" SAY: {text}") + + if mode == "mic": + if args.countdown > 0: + countdown(args.countdown) + dur = args.duration if args.duration is not None else 5.0 + print(f" >>> SPEAK NOW — {dur}s <<<", flush=True) + pcm = record_multicast_fixed(dur) + else: + dur = args.duration if args.duration is not None else \ + estimate_tts_duration(text) + 1.5 + print(f" TtsMaker playing… (capturing {dur:.1f}s)", flush=True) + pcm = record_while_tts_plays(ac, text, args.speaker_id, dur) + + if not pcm: + print(f" [FAIL] no audio captured") + print() + continue + + if not args.no_trim: + pcm = trim_leading_silence(pcm, threshold=args.threshold) + pcm = trim_trailing_silence(pcm, threshold=args.threshold) + + rms, peak = rms_and_peak(pcm) + dur_s = len(pcm) / 2.0 / RATE + save_wav(pcm, out) + + if rms < 400: + marker = f" ⚠ TOO QUIET — re-record with --only {key}" + elif peak > 30000: + marker = " ⚠ CLIPPING" + else: + marker = " ✓" + print(f" saved → {out.relative_to(PROJECT_ROOT)} " + f"({dur_s:.1f}s rms={rms:.0f} peak={peak}){marker}") + saved.append((cat, key, out, dur_s, rms, peak)) + + if args.play and ac is not None: + print(f" playing back…") + try: + play_on_g1(ac, out) + except Exception as e: + print(f" [play] failed: {e}") + + if i < len(targets) and args.pause > 0: + time.sleep(args.pause) + print() + + # Summary + print("=" * 60) + print(f" Done — {len(saved)}/{len(targets)} clip(s) saved") + print("=" * 60) + bad = [] + for cat, key, path, dur, rms, peak in saved: + warn = " ⚠ low level" if rms < 400 else (" ⚠ clipping" if peak > 30000 else "") + if warn: + bad.append(key) + print(f" {cat}/{key:20s} {dur:4.1f}s rms={rms:5.0f} peak={peak:5d}{warn}") + print() + if bad: + print(f"Re-record the flagged ones with:") + print(f" python3 scripts/record_phrases.py --only {','.join(bad)}") + print() + print("Next:") + print(" 1. (on robot) sudo systemctl restart saqr-bridge") + print(" 2. expect: [audio_player] loaded N clip(s): ...") + + +if __name__ == "__main__": + main() diff --git a/scripts/run_local.sh b/scripts/run_local.sh deleted file mode 100755 index 4393321..0000000 --- a/scripts/run_local.sh +++ /dev/null @@ -1,87 +0,0 @@ -#!/bin/bash -# ============================================================================ -# Saqr PPE Detection - Run on Local Laptop -# ============================================================================ -# -# Usage: -# scripts/run_local.sh # webcam 0 -# scripts/run_local.sh --source 1 # webcam 1 -# scripts/run_local.sh --source video.mp4 # video file -# scripts/run_local.sh --gui # PySide6 GUI -# scripts/run_local.sh --detect # simple detection (no tracking) -# -# ============================================================================ - -set -e - -HERE="$(cd "$(dirname "$0")" && pwd)" -PROJECT_ROOT="$(cd "$HERE/.." && pwd)" -cd "$PROJECT_ROOT" - -SOURCE="0" -MODEL="saqr_best.pt" -CONF="0.35" -MODE="saqr" # saqr | gui | detect -HEADLESS=false -MAX_MISSING=90 -MATCH_DIST=250 -CONFIRM=5 - -while [[ $# -gt 0 ]]; do - case $1 in - --source) SOURCE="$2"; shift 2 ;; - --model) MODEL="$2"; shift 2 ;; - --conf) CONF="$2"; shift 2 ;; - --gui) MODE="gui"; shift ;; - --detect) MODE="detect"; shift ;; - --headless) HEADLESS=true; shift ;; - --max-missing) MAX_MISSING="$2"; shift 2 ;; - --match-distance) MATCH_DIST="$2"; shift 2 ;; - --confirm) CONFIRM="$2"; shift 2 ;; - *) echo "Unknown arg: $1"; exit 1 ;; - esac -done - -if command -v conda &>/dev/null; then - source "$(conda info --base)/etc/profile.d/conda.sh" 2>/dev/null || true - conda activate AI_MSI_yolo 2>/dev/null || true -fi - -echo "============================================" -echo " Saqr PPE Detection - Local Laptop" -echo "============================================" -echo " Mode : $MODE" -echo " Source : $SOURCE" -echo " Model : $MODEL" -echo " Conf : $CONF" -echo "============================================" -echo "" - -HEADLESS_FLAG="" -if [ "$HEADLESS" = true ]; then - HEADLESS_FLAG="--headless" -fi - -case $MODE in - saqr) - python -m saqr.apps.saqr_cli \ - --source "$SOURCE" \ - --model "$MODEL" \ - --conf "$CONF" \ - --max-missing "$MAX_MISSING" \ - --match-distance "$MATCH_DIST" \ - --status-confirm-frames "$CONFIRM" \ - $HEADLESS_FLAG - ;; - gui) - python -m saqr.gui.app \ - --source "$SOURCE" \ - --model "$MODEL" - ;; - detect) - python -m saqr.apps.detect_cli \ - --source "$SOURCE" \ - --model "$MODEL" \ - --conf "$CONF" - ;; -esac diff --git a/scripts/run_robot.sh b/scripts/run_robot.sh deleted file mode 100755 index 16083ab..0000000 --- a/scripts/run_robot.sh +++ /dev/null @@ -1,102 +0,0 @@ -#!/bin/bash -# ============================================================================ -# Saqr PPE Detection - Run on Unitree G1 Robot (no bridge, direct saqr run) -# ============================================================================ -# -# Run on the robot's physical terminal (with monitor) or via ssh -X: -# scripts/run_robot.sh -# scripts/run_robot.sh --headless # no display -# scripts/run_robot.sh --source /dev/video2 # V4L2 fallback -# -# For the production R2+X / R2+Y workflow, use scripts/start_saqr.sh instead. -# ============================================================================ - -set -e - -HERE="$(cd "$(dirname "$0")" && pwd)" -PROJECT_ROOT="$(cd "$HERE/.." && pwd)" -cd "$PROJECT_ROOT" - -SOURCE="realsense" -MODEL="saqr_best.pt" -CONF="0.35" -HEADLESS=false -MAX_MISSING=120 -MATCH_DIST=300 -CONFIRM=7 -DEVICE="0" -IMGSZ=320 -HALF=true -STREAM_PORT=0 - -while [[ $# -gt 0 ]]; do - case $1 in - --source) SOURCE="$2"; shift 2 ;; - --model) MODEL="$2"; shift 2 ;; - --conf) CONF="$2"; shift 2 ;; - --headless) HEADLESS=true; shift ;; - --max-missing) MAX_MISSING="$2"; shift 2 ;; - --match-distance) MATCH_DIST="$2"; shift 2 ;; - --confirm) CONFIRM="$2"; shift 2 ;; - --device) DEVICE="$2"; shift 2 ;; - --imgsz) IMGSZ="$2"; shift 2 ;; - --no-half) HALF=false; shift ;; - --stream) STREAM_PORT="$2"; shift 2 ;; - --cpu) DEVICE="cpu"; HALF=false; shift ;; - *) echo "Unknown arg: $1"; exit 1 ;; - esac -done - -source ~/miniconda3/etc/profile.d/conda.sh 2>/dev/null || true -conda activate marcus 2>/dev/null || conda activate teleimager 2>/dev/null || true - -YEAR=$(date +%Y) -if [ "$YEAR" -lt 2025 ]; then - echo "[WARN] System clock is wrong (year=$YEAR). Fixing..." - echo "123" | sudo -S date -s "2026-04-10 16:00:00" 2>/dev/null || true -fi - -if [ "$HEADLESS" = true ]; then - export QT_QPA_PLATFORM=offscreen - HEADLESS_FLAG="--headless" - echo "Mode: HEADLESS (no display, results saved to runtime/captures/)" -else - xhost + >/dev/null 2>&1 || true - export DISPLAY=:0 - HEADLESS_FLAG="" - echo "Mode: DISPLAY (OpenCV window on monitor)" -fi - -HALF_FLAG="" -if [ "$HALF" = true ]; then - HALF_FLAG="--half" -fi - -STREAM_FLAG="" -if [ "$STREAM_PORT" -gt 0 ]; then - STREAM_FLAG="--stream $STREAM_PORT" -fi - -echo "============================================" -echo " Saqr PPE Detection - Unitree G1 Robot" -echo "============================================" -echo " Source : $SOURCE" -echo " Model : $MODEL" -echo " Device : $DEVICE (half=$HALF, imgsz=$IMGSZ)" -echo " Conf : $CONF" -echo " Stream : ${STREAM_PORT:-disabled}" -echo "============================================" -echo "" - -python -m saqr.apps.saqr_cli \ - --source "$SOURCE" \ - --model "$MODEL" \ - --conf "$CONF" \ - --max-missing "$MAX_MISSING" \ - --match-distance "$MATCH_DIST" \ - --status-confirm-frames "$CONFIRM" \ - --device "$DEVICE" \ - --imgsz "$IMGSZ" \ - $HALF_FLAG \ - $STREAM_FLAG \ - $HEADLESS_FLAG diff --git a/scripts/start_saqr.sh b/scripts/start_saqr.sh index 0f65038..37bf460 100755 --- a/scripts/start_saqr.sh +++ b/scripts/start_saqr.sh @@ -6,7 +6,7 @@ # What it does: # 1. Sources miniconda and activates the target env (default: marcus). # 2. cd to the project root (parent of this scripts/ dir). -# 3. Execs `python -m saqr.robot.bridge` with the production flags. +# 3. Execs `python -m robot.bridge` with the production flags. # # The bridge will: # - init the G1 arm + audio + LowState DDS clients @@ -22,19 +22,33 @@ set -u HERE="$(cd "$(dirname "$0")" && pwd)" SAQR_DIR="${SAQR_DIR:-$(cd "$HERE/.." && pwd)}" -CONDA_ROOT="${CONDA_ROOT:-$HOME/miniconda3}" -CONDA_ENV="${CONDA_ENV:-marcus}" -DDS_IFACE="${DDS_IFACE:-eth0}" -SAQR_SOURCE="${SAQR_SOURCE:-realsense}" -STREAM_PORT="${STREAM_PORT:-8080}" +# Read defaults from config/robot_config.json (env vars override). +config_get() { + # config_get dotted.key + python3 -c " +import json, os, sys +with open('$SAQR_DIR/config/robot_config.json') as f: + c = json.load(f) +for k in sys.argv[1].split('.'): + c = c[k] +print(os.path.expandvars(str(c))) +" "$1" +} + +CONDA_ROOT="${CONDA_ROOT:-$(config_get start_saqr.conda_root)}" +CONDA_ENV="${CONDA_ENV:-$(config_get start_saqr.conda_env)}" +DDS_IFACE="${DDS_IFACE:-$(config_get start_saqr.dds_iface)}" +SAQR_SOURCE="${SAQR_SOURCE:-$(config_get start_saqr.saqr_source)}" +STREAM_PORT="${STREAM_PORT:-$(config_get start_saqr.stream_port)}" if [ ! -d "$SAQR_DIR" ]; then echo "[start_saqr] FATAL: SAQR_DIR not found: $SAQR_DIR" >&2 exit 1 fi -if [ ! -d "$SAQR_DIR/saqr" ]; then - echo "[start_saqr] FATAL: saqr/ package not found in $SAQR_DIR" >&2 +if [ ! -f "$SAQR_DIR/robot/bridge.py" ]; then + echo "[start_saqr] FATAL: robot/bridge.py not found in $SAQR_DIR" >&2 + echo " (expected $SAQR_DIR to contain core/ apps/ robot/ utils/)" >&2 exit 1 fi @@ -58,7 +72,7 @@ cd "$SAQR_DIR" || { echo "[start_saqr] env=$CONDA_ENV cwd=$PWD iface=$DDS_IFACE source=$SAQR_SOURCE stream=$STREAM_PORT" echo "[start_saqr] launching bridge..." -exec python3 -m saqr.robot.bridge \ +exec python3 -m robot.bridge \ --iface "$DDS_IFACE" \ --source "$SAQR_SOURCE" \ --headless \ diff --git a/utils/__init__.py b/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/utils/config.py b/utils/config.py new file mode 100644 index 0000000..526bde5 --- /dev/null +++ b/utils/config.py @@ -0,0 +1,45 @@ +"""Cached JSON config loader for config/_config.json files. + +Usage: + from utils.config import load_config + cfg = load_config("core") # reads config/core_config.json + conf_threshold = cfg["detection"]["conf"] + +String values pass through ``os.path.expandvars`` on load, so JSON like +``"$HOME/miniconda3"`` resolves at read time. +""" +from __future__ import annotations + +import json +import os +from typing import Any, Dict + +from core.paths import CONFIG_DIR + +_CACHE: Dict[str, Dict[str, Any]] = {} + + +def _expand(value): + if isinstance(value, str): + return os.path.expandvars(value) + if isinstance(value, list): + return [_expand(v) for v in value] + if isinstance(value, dict): + return {k: _expand(v) for k, v in value.items()} + return value + + +def load_config(name: str) -> Dict[str, Any]: + """Return the parsed JSON config for ``config/_config.json`` (cached).""" + if name in _CACHE: + return _CACHE[name] + + path = CONFIG_DIR / f"{name}_config.json" + if not path.exists(): + raise FileNotFoundError(f"Config not found: {path}") + + with open(path, "r") as f: + raw = json.load(f) + cfg = _expand(raw) + _CACHE[name] = cfg + return cfg diff --git a/saqr/utils/logger.py b/utils/logger.py similarity index 91% rename from saqr/utils/logger.py rename to utils/logger.py index 06d6d9e..3910fc5 100644 --- a/saqr/utils/logger.py +++ b/utils/logger.py @@ -4,7 +4,7 @@ import json import logging from typing import Dict -from saqr.core.paths import CONFIG_DIR, LOGS_DIR +from core.paths import CONFIG_DIR, LOGS_DIR _LOGGER_CACHE: Dict[str, logging.Logger] = {} @@ -23,7 +23,7 @@ def _level_from_name(name: str) -> int: def get_logger(category: str, name: str) -> logging.Logger: - """Return a cached logger that writes to runtime/logs//.log.""" + """Return a cached logger that writes to logs//.log.""" key = f"{category}.{name}" if key in _LOGGER_CACHE: return _LOGGER_CACHE[key]