Saqr/scripts/import_voices.py

#!/usr/bin/env python3
"""
import_voices.py — Convert TTS MP3s to Saqr's audio library format.

Reads MP3s from a source directory, maps each to its correct
``assets/audio/<category>/<key>.wav`` path, and transcodes to
16 kHz mono int16 WAV using ffmpeg.

Matching rules:
  1. If the filename (without extension, trailing dots stripped) exactly
     equals one of the known phrases, that's a direct map.
  2. Any file that doesn't match — typical case is ``ttsmaker-file-*.mp3``
     where the site didn't rename it — is assigned to whichever phrase
     key is still missing. If exactly ONE phrase is unmapped and ONE
     file is unmatched, they're paired. Otherwise you're prompted.

Requires ffmpeg on PATH:  sudo apt install -y ffmpeg

Usage:
    python scripts/import_voices.py
    python scripts/import_voices.py --src "/home/zedx/Downloads/voices saqr"
    python scripts/import_voices.py --dry-run       # just show mapping
"""
from __future__ import annotations

import argparse
import shutil
import subprocess
import sys
from pathlib import Path

PROJECT_ROOT  = Path(__file__).resolve().parent.parent
ASSETS_ROOT   = PROJECT_ROOT / "assets" / "audio"
DEFAULT_SRC   = Path("/home/zedx/Downloads/voices saqr")

# (category, key, text) — filename match uses the text (with or without
# trailing period).
PHRASES = [
    ("fixed",          "ready",
     "Saqr is running. Press R2 plus X to start."),
    ("fixed",          "deactivated",
     "Saqr deactivated."),
    ("fixed",          "no_camera",
     "Camera not connected. Please plug in the camera and try again."),
    ("fixed",          "safe",
     "Safe to enter. Have a good day."),
    ("fixed",          "unsafe_generic",
     "Please stop. Wear your proper safety equipment."),
    ("unsafe_missing", "helmet",
     "Please stop. Wear your proper safety equipment. You are missing helmet."),
    ("unsafe_missing", "vest",
     "Please stop. Wear your proper safety equipment. You are missing vest."),
    ("unsafe_missing", "helmet_vest",
     "Please stop. Wear your proper safety equipment. You are missing helmet and vest."),
]

# Text → (category, key)  and  normalised-text → (category, key)
TEXT_TO_KEY = {text: (cat, key) for (cat, key, text) in PHRASES}


def normalise(stem: str) -> str:
    """Strip trailing dots so 'Saqr deactivated.' matches 'Saqr deactivated..mp3'."""
    return stem.rstrip(".").strip()


def match_by_filename(stem: str):
    candidates = {stem, stem + ".", normalise(stem), normalise(stem) + "."}
    for c in candidates:
        if c in TEXT_TO_KEY:
            return TEXT_TO_KEY[c]
    return None


def convert_to_wav(src: Path, dst: Path, dry_run: bool = False) -> None:
    dst.parent.mkdir(parents=True, exist_ok=True)
    cmd = [
        "ffmpeg", "-y", "-loglevel", "error",
        "-i", str(src),
        "-ac", "1",            # mono
        "-ar", "16000",        # 16 kHz
        "-sample_fmt", "s16",  # 16-bit int
        str(dst),
    ]
    if dry_run:
        return
    subprocess.run(cmd, check=True)


def main():
    ap = argparse.ArgumentParser(description=__doc__,
                                 formatter_class=argparse.RawDescriptionHelpFormatter)
    ap.add_argument("--src",     default=str(DEFAULT_SRC),
                    help=f"source directory of MP3s (default: {DEFAULT_SRC!s})")
    ap.add_argument("--dry-run", action="store_true",
                    help="print the mapping but don't convert")
    args = ap.parse_args()

    if shutil.which("ffmpeg") is None and not args.dry_run:
        print("ERROR: ffmpeg not on PATH.  Install it first:")
        print("    sudo apt install -y ffmpeg")
        sys.exit(1)

    src_dir = Path(args.src)
    if not src_dir.exists():
        print(f"ERROR: source directory not found: {src_dir}")
        sys.exit(1)

    mp3s = sorted(src_dir.glob("*.mp3"))
    if not mp3s:
        print(f"No .mp3 files under {src_dir}")
        sys.exit(1)

    print(f"Source: {src_dir}")
    print(f"Target: {ASSETS_ROOT}")
    print(f"Files:  {len(mp3s)} mp3  (expected 8)")
    print()

    # Pass 1: match by filename.
    assigned: dict[tuple, Path] = {}  # (cat,key) → src mp3
    unmatched: list[Path] = []
    for mp3 in mp3s:
        key = match_by_filename(mp3.stem)
        if key is None:
            unmatched.append(mp3)
        elif key in assigned:
            print(f"  [WARN] duplicate match for {key}: keeping {assigned[key].name},"
                  f" ignoring {mp3.name}")
        else:
            assigned[key] = mp3

    # Pass 2: assign unmatched files to still-missing phrases.
    missing_keys = [(cat, key) for (cat, key, _) in PHRASES if (cat, key) not in assigned]
    if len(unmatched) == 1 and len(missing_keys) == 1:
        only_key = missing_keys[0]
        only_file = unmatched[0]
        print(f"  [AUTO] assigning '{only_file.name}' → {only_key[0]}/{only_key[1]} "
              f"(the only phrase still unmapped)")
        assigned[only_key] = only_file
        unmatched = []
        missing_keys = []
    elif unmatched:
        # Prompt user
        print(f"  [WARN] {len(unmatched)} unmatched file(s) and "
              f"{len(missing_keys)} unassigned phrase(s). Manual mapping needed:")
        for f in unmatched:
            print(f"    unmatched: {f.name}")
        for (cat, key) in missing_keys:
            print(f"    missing:   {cat}/{key}")
        print()
        print("Resolve manually by renaming the source files to one of:")
        for (_, _, text) in PHRASES:
            print(f"    '{text}.mp3'")
        sys.exit(1)

    # Show the plan
    print("Mapping:")
    for (cat, key, _) in PHRASES:
        src = assigned.get((cat, key))
        if src is None:
            print(f"  [MISSING] no source for {cat}/{key}")
            continue
        rel = src.name if len(src.name) <= 60 else src.name[:57] + "..."
        print(f"  {rel:<60s} → assets/audio/{cat}/{key}.wav")
    print()

    if args.dry_run:
        print("(dry-run — nothing written)")
        return

    # Convert
    print("Converting…")
    converted = 0
    for (cat, key, _) in PHRASES:
        src = assigned.get((cat, key))
        if src is None:
            continue
        dst = ASSETS_ROOT / cat / f"{key}.wav"
        try:
            convert_to_wav(src, dst)
        except subprocess.CalledProcessError as e:
            print(f"  [FAIL] {src.name} → {dst.name}: ffmpeg exited {e.returncode}")
            continue

        # Probe the result
        import wave
        with wave.open(str(dst), "rb") as wf:
            ok = (wf.getframerate() == 16000
                  and wf.getnchannels() == 1
                  and wf.getsampwidth() == 2)
            dur = wf.getnframes() / wf.getframerate()
        mark = "✓" if ok else "⚠ wrong format"
        print(f"  {mark}  {dst.relative_to(PROJECT_ROOT)}   ({dur:.1f}s)")
        if ok:
            converted += 1

    print()
    print(f"Done — {converted}/{len(PHRASES)} clip(s) converted.")
    print()
    print("Deploy + restart:")
    print("  scripts/deploy.sh")
    print("  ssh unitree@192.168.123.164 "
          "'sudo systemctl restart saqr-bridge && journalctl -u saqr-bridge -n 20 | grep audio_player'")


if __name__ == "__main__":
    main()