Saqr/scripts/import_voices.py

207 lines
7.1 KiB
Python

#!/usr/bin/env python3
"""
import_voices.py — Convert TTS MP3s to Saqr's audio library format.
Reads MP3s from a source directory, maps each to its correct
``assets/audio/<category>/<key>.wav`` path, and transcodes to
16 kHz mono int16 WAV using ffmpeg.
Matching rules:
1. If the filename (without extension, trailing dots stripped) exactly
equals one of the known phrases, that's a direct map.
2. Any file that doesn't match — typical case is ``ttsmaker-file-*.mp3``
where the site didn't rename it — is assigned to whichever phrase
key is still missing. If exactly ONE phrase is unmapped and ONE
file is unmatched, they're paired. Otherwise you're prompted.
Requires ffmpeg on PATH: sudo apt install -y ffmpeg
Usage:
python scripts/import_voices.py
python scripts/import_voices.py --src "/home/zedx/Downloads/voices saqr"
python scripts/import_voices.py --dry-run # just show mapping
"""
from __future__ import annotations
import argparse
import shutil
import subprocess
import sys
from pathlib import Path
PROJECT_ROOT = Path(__file__).resolve().parent.parent
ASSETS_ROOT = PROJECT_ROOT / "assets" / "audio"
DEFAULT_SRC = Path("/home/zedx/Downloads/voices saqr")
# (category, key, text) — filename match uses the text (with or without
# trailing period).
PHRASES = [
("fixed", "ready",
"Saqr is running. Press R2 plus X to start."),
("fixed", "deactivated",
"Saqr deactivated."),
("fixed", "no_camera",
"Camera not connected. Please plug in the camera and try again."),
("fixed", "safe",
"Safe to enter. Have a good day."),
("fixed", "unsafe_generic",
"Please stop. Wear your proper safety equipment."),
("unsafe_missing", "helmet",
"Please stop. Wear your proper safety equipment. You are missing helmet."),
("unsafe_missing", "vest",
"Please stop. Wear your proper safety equipment. You are missing vest."),
("unsafe_missing", "helmet_vest",
"Please stop. Wear your proper safety equipment. You are missing helmet and vest."),
]
# Text → (category, key) and normalised-text → (category, key)
TEXT_TO_KEY = {text: (cat, key) for (cat, key, text) in PHRASES}
def normalise(stem: str) -> str:
"""Strip trailing dots so 'Saqr deactivated.' matches 'Saqr deactivated..mp3'."""
return stem.rstrip(".").strip()
def match_by_filename(stem: str):
candidates = {stem, stem + ".", normalise(stem), normalise(stem) + "."}
for c in candidates:
if c in TEXT_TO_KEY:
return TEXT_TO_KEY[c]
return None
def convert_to_wav(src: Path, dst: Path, dry_run: bool = False) -> None:
dst.parent.mkdir(parents=True, exist_ok=True)
cmd = [
"ffmpeg", "-y", "-loglevel", "error",
"-i", str(src),
"-ac", "1", # mono
"-ar", "16000", # 16 kHz
"-sample_fmt", "s16", # 16-bit int
str(dst),
]
if dry_run:
return
subprocess.run(cmd, check=True)
def main():
ap = argparse.ArgumentParser(description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
ap.add_argument("--src", default=str(DEFAULT_SRC),
help=f"source directory of MP3s (default: {DEFAULT_SRC!s})")
ap.add_argument("--dry-run", action="store_true",
help="print the mapping but don't convert")
args = ap.parse_args()
if shutil.which("ffmpeg") is None and not args.dry_run:
print("ERROR: ffmpeg not on PATH. Install it first:")
print(" sudo apt install -y ffmpeg")
sys.exit(1)
src_dir = Path(args.src)
if not src_dir.exists():
print(f"ERROR: source directory not found: {src_dir}")
sys.exit(1)
mp3s = sorted(src_dir.glob("*.mp3"))
if not mp3s:
print(f"No .mp3 files under {src_dir}")
sys.exit(1)
print(f"Source: {src_dir}")
print(f"Target: {ASSETS_ROOT}")
print(f"Files: {len(mp3s)} mp3 (expected 8)")
print()
# Pass 1: match by filename.
assigned: dict[tuple, Path] = {} # (cat,key) → src mp3
unmatched: list[Path] = []
for mp3 in mp3s:
key = match_by_filename(mp3.stem)
if key is None:
unmatched.append(mp3)
elif key in assigned:
print(f" [WARN] duplicate match for {key}: keeping {assigned[key].name},"
f" ignoring {mp3.name}")
else:
assigned[key] = mp3
# Pass 2: assign unmatched files to still-missing phrases.
missing_keys = [(cat, key) for (cat, key, _) in PHRASES if (cat, key) not in assigned]
if len(unmatched) == 1 and len(missing_keys) == 1:
only_key = missing_keys[0]
only_file = unmatched[0]
print(f" [AUTO] assigning '{only_file.name}'{only_key[0]}/{only_key[1]} "
f"(the only phrase still unmapped)")
assigned[only_key] = only_file
unmatched = []
missing_keys = []
elif unmatched:
# Prompt user
print(f" [WARN] {len(unmatched)} unmatched file(s) and "
f"{len(missing_keys)} unassigned phrase(s). Manual mapping needed:")
for f in unmatched:
print(f" unmatched: {f.name}")
for (cat, key) in missing_keys:
print(f" missing: {cat}/{key}")
print()
print("Resolve manually by renaming the source files to one of:")
for (_, _, text) in PHRASES:
print(f" '{text}.mp3'")
sys.exit(1)
# Show the plan
print("Mapping:")
for (cat, key, _) in PHRASES:
src = assigned.get((cat, key))
if src is None:
print(f" [MISSING] no source for {cat}/{key}")
continue
rel = src.name if len(src.name) <= 60 else src.name[:57] + "..."
print(f" {rel:<60s} → assets/audio/{cat}/{key}.wav")
print()
if args.dry_run:
print("(dry-run — nothing written)")
return
# Convert
print("Converting…")
converted = 0
for (cat, key, _) in PHRASES:
src = assigned.get((cat, key))
if src is None:
continue
dst = ASSETS_ROOT / cat / f"{key}.wav"
try:
convert_to_wav(src, dst)
except subprocess.CalledProcessError as e:
print(f" [FAIL] {src.name}{dst.name}: ffmpeg exited {e.returncode}")
continue
# Probe the result
import wave
with wave.open(str(dst), "rb") as wf:
ok = (wf.getframerate() == 16000
and wf.getnchannels() == 1
and wf.getsampwidth() == 2)
dur = wf.getnframes() / wf.getframerate()
mark = "" if ok else "⚠ wrong format"
print(f" {mark} {dst.relative_to(PROJECT_ROOT)} ({dur:.1f}s)")
if ok:
converted += 1
print()
print(f"Done — {converted}/{len(PHRASES)} clip(s) converted.")
print()
print("Deploy + restart:")
print(" scripts/deploy.sh")
print(" ssh unitree@192.168.123.164 "
"'sudo systemctl restart saqr-bridge && journalctl -u saqr-bridge -n 20 | grep audio_player'")
if __name__ == "__main__":
main()