#!/usr/bin/env python3 """ import_voices.py — Convert TTS MP3s to Saqr's audio library format. Reads MP3s from a source directory, maps each to its correct ``assets/audio//.wav`` path, and transcodes to 16 kHz mono int16 WAV using ffmpeg. Matching rules: 1. If the filename (without extension, trailing dots stripped) exactly equals one of the known phrases, that's a direct map. 2. Any file that doesn't match — typical case is ``ttsmaker-file-*.mp3`` where the site didn't rename it — is assigned to whichever phrase key is still missing. If exactly ONE phrase is unmapped and ONE file is unmatched, they're paired. Otherwise you're prompted. Requires ffmpeg on PATH: sudo apt install -y ffmpeg Usage: python scripts/import_voices.py python scripts/import_voices.py --src "/home/zedx/Downloads/voices saqr" python scripts/import_voices.py --dry-run # just show mapping """ from __future__ import annotations import argparse import shutil import subprocess import sys from pathlib import Path PROJECT_ROOT = Path(__file__).resolve().parent.parent ASSETS_ROOT = PROJECT_ROOT / "assets" / "audio" DEFAULT_SRC = Path("/home/zedx/Downloads/voices saqr") # (category, key, text) — filename match uses the text (with or without # trailing period). PHRASES = [ ("fixed", "ready", "Saqr is running. Press R2 plus X to start."), ("fixed", "deactivated", "Saqr deactivated."), ("fixed", "no_camera", "Camera not connected. Please plug in the camera and try again."), ("fixed", "safe", "Safe to enter. Have a good day."), ("fixed", "unsafe_generic", "Please stop. Wear your proper safety equipment."), ("unsafe_missing", "helmet", "Please stop. Wear your proper safety equipment. You are missing helmet."), ("unsafe_missing", "vest", "Please stop. Wear your proper safety equipment. You are missing vest."), ("unsafe_missing", "helmet_vest", "Please stop. Wear your proper safety equipment. You are missing helmet and vest."), ] # Text → (category, key) and normalised-text → (category, key) TEXT_TO_KEY = {text: (cat, key) for (cat, key, text) in PHRASES} def normalise(stem: str) -> str: """Strip trailing dots so 'Saqr deactivated.' matches 'Saqr deactivated..mp3'.""" return stem.rstrip(".").strip() def match_by_filename(stem: str): candidates = {stem, stem + ".", normalise(stem), normalise(stem) + "."} for c in candidates: if c in TEXT_TO_KEY: return TEXT_TO_KEY[c] return None def convert_to_wav(src: Path, dst: Path, dry_run: bool = False) -> None: dst.parent.mkdir(parents=True, exist_ok=True) cmd = [ "ffmpeg", "-y", "-loglevel", "error", "-i", str(src), "-ac", "1", # mono "-ar", "16000", # 16 kHz "-sample_fmt", "s16", # 16-bit int str(dst), ] if dry_run: return subprocess.run(cmd, check=True) def main(): ap = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) ap.add_argument("--src", default=str(DEFAULT_SRC), help=f"source directory of MP3s (default: {DEFAULT_SRC!s})") ap.add_argument("--dry-run", action="store_true", help="print the mapping but don't convert") args = ap.parse_args() if shutil.which("ffmpeg") is None and not args.dry_run: print("ERROR: ffmpeg not on PATH. Install it first:") print(" sudo apt install -y ffmpeg") sys.exit(1) src_dir = Path(args.src) if not src_dir.exists(): print(f"ERROR: source directory not found: {src_dir}") sys.exit(1) mp3s = sorted(src_dir.glob("*.mp3")) if not mp3s: print(f"No .mp3 files under {src_dir}") sys.exit(1) print(f"Source: {src_dir}") print(f"Target: {ASSETS_ROOT}") print(f"Files: {len(mp3s)} mp3 (expected 8)") print() # Pass 1: match by filename. assigned: dict[tuple, Path] = {} # (cat,key) → src mp3 unmatched: list[Path] = [] for mp3 in mp3s: key = match_by_filename(mp3.stem) if key is None: unmatched.append(mp3) elif key in assigned: print(f" [WARN] duplicate match for {key}: keeping {assigned[key].name}," f" ignoring {mp3.name}") else: assigned[key] = mp3 # Pass 2: assign unmatched files to still-missing phrases. missing_keys = [(cat, key) for (cat, key, _) in PHRASES if (cat, key) not in assigned] if len(unmatched) == 1 and len(missing_keys) == 1: only_key = missing_keys[0] only_file = unmatched[0] print(f" [AUTO] assigning '{only_file.name}' → {only_key[0]}/{only_key[1]} " f"(the only phrase still unmapped)") assigned[only_key] = only_file unmatched = [] missing_keys = [] elif unmatched: # Prompt user print(f" [WARN] {len(unmatched)} unmatched file(s) and " f"{len(missing_keys)} unassigned phrase(s). Manual mapping needed:") for f in unmatched: print(f" unmatched: {f.name}") for (cat, key) in missing_keys: print(f" missing: {cat}/{key}") print() print("Resolve manually by renaming the source files to one of:") for (_, _, text) in PHRASES: print(f" '{text}.mp3'") sys.exit(1) # Show the plan print("Mapping:") for (cat, key, _) in PHRASES: src = assigned.get((cat, key)) if src is None: print(f" [MISSING] no source for {cat}/{key}") continue rel = src.name if len(src.name) <= 60 else src.name[:57] + "..." print(f" {rel:<60s} → assets/audio/{cat}/{key}.wav") print() if args.dry_run: print("(dry-run — nothing written)") return # Convert print("Converting…") converted = 0 for (cat, key, _) in PHRASES: src = assigned.get((cat, key)) if src is None: continue dst = ASSETS_ROOT / cat / f"{key}.wav" try: convert_to_wav(src, dst) except subprocess.CalledProcessError as e: print(f" [FAIL] {src.name} → {dst.name}: ffmpeg exited {e.returncode}") continue # Probe the result import wave with wave.open(str(dst), "rb") as wf: ok = (wf.getframerate() == 16000 and wf.getnchannels() == 1 and wf.getsampwidth() == 2) dur = wf.getnframes() / wf.getframerate() mark = "✓" if ok else "⚠ wrong format" print(f" {mark} {dst.relative_to(PROJECT_ROOT)} ({dur:.1f}s)") if ok: converted += 1 print() print(f"Done — {converted}/{len(PHRASES)} clip(s) converted.") print() print("Deploy + restart:") print(" scripts/deploy.sh") print(" ssh unitree@192.168.123.164 " "'sudo systemctl restart saqr-bridge && journalctl -u saqr-bridge -n 20 | grep audio_player'") if __name__ == "__main__": main()