179 lines
6.3 KiB
Python
179 lines
6.3 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
generate_phrases.py — synthesize Saqr's 8 phrases to WAV using piper-tts.
|
|
|
|
This produces the audio library locally (no G1 mic, no TtsMaker capture,
|
|
no PulseAudio) at exactly the format robot.audio_player expects:
|
|
16 kHz mono int16 WAV under assets/audio/<category>/<key>.wav.
|
|
|
|
Setup (once):
|
|
pip install piper-tts
|
|
python -m piper.download_voices en_US-amy-medium
|
|
|
|
Usage:
|
|
python scripts/generate_phrases.py
|
|
python scripts/generate_phrases.py --voice en_US-lessac-medium
|
|
python scripts/generate_phrases.py --only safe,helmet_vest
|
|
python scripts/generate_phrases.py --voice-dir /custom/path
|
|
|
|
Common voices (run the download command above, swap the name):
|
|
en_US-amy-medium female, neutral (~60 MB)
|
|
en_US-lessac-medium female, friendly (~60 MB)
|
|
en_US-ryan-high male, clear (~120 MB)
|
|
en_GB-alan-medium male, British (~60 MB)
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import io
|
|
import sys
|
|
import wave
|
|
from pathlib import Path
|
|
|
|
PROJECT_ROOT = Path(__file__).resolve().parent.parent
|
|
ASSETS_ROOT = PROJECT_ROOT / "assets" / "audio"
|
|
|
|
PHRASES = [
|
|
("fixed", "ready",
|
|
"Saqr is running. Press R2 plus X to start."),
|
|
("fixed", "deactivated",
|
|
"Saqr deactivated."),
|
|
("fixed", "no_camera",
|
|
"Camera not connected. Please plug in the camera and try again."),
|
|
("fixed", "safe",
|
|
"Safe to enter. Have a good day."),
|
|
("fixed", "unsafe_generic",
|
|
"Please stop. Wear your proper safety equipment."),
|
|
("unsafe_missing", "helmet",
|
|
"Please stop. Wear your proper safety equipment. You are missing helmet."),
|
|
("unsafe_missing", "vest",
|
|
"Please stop. Wear your proper safety equipment. You are missing vest."),
|
|
("unsafe_missing", "helmet_vest",
|
|
"Please stop. Wear your proper safety equipment. You are missing helmet and vest."),
|
|
]
|
|
|
|
|
|
def find_voice_files(voice_name: str, override_dir: Path = None):
|
|
"""Locate <voice_name>.onnx + .onnx.json in piper's standard dirs."""
|
|
search_dirs = []
|
|
if override_dir is not None:
|
|
search_dirs.append(Path(override_dir))
|
|
search_dirs += [
|
|
Path.home() / ".local" / "share" / "piper-voices",
|
|
Path.home() / ".cache" / "piper" / "voices",
|
|
Path.cwd(),
|
|
]
|
|
for d in search_dirs:
|
|
onnx = d / f"{voice_name}.onnx"
|
|
js = d / f"{voice_name}.onnx.json"
|
|
if onnx.exists() and js.exists():
|
|
return onnx, js
|
|
return None, None
|
|
|
|
|
|
def resample_to_16k_mono_int16(pcm: bytes, src_rate: int, src_channels: int) -> bytes:
|
|
"""Linear-resample int16 PCM to 16 kHz mono."""
|
|
import numpy as np
|
|
a = np.frombuffer(pcm, dtype=np.int16)
|
|
if src_channels > 1:
|
|
a = a.reshape(-1, src_channels).mean(axis=1).astype(np.int16)
|
|
if src_rate == 16000:
|
|
return a.tobytes()
|
|
target_len = int(round(len(a) * 16000 / src_rate))
|
|
idx = np.linspace(0, len(a) - 1, target_len).astype(np.int64)
|
|
return a[idx].astype(np.int16).tobytes()
|
|
|
|
|
|
def save_wav_16k_mono(pcm: bytes, path: Path) -> None:
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
with wave.open(str(path), "wb") as wf:
|
|
wf.setnchannels(1)
|
|
wf.setsampwidth(2)
|
|
wf.setframerate(16000)
|
|
wf.writeframes(pcm)
|
|
|
|
|
|
def main():
|
|
ap = argparse.ArgumentParser(
|
|
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
)
|
|
ap.add_argument("--voice", default="en_US-amy-medium",
|
|
help="piper voice name (default en_US-amy-medium)")
|
|
ap.add_argument("--voice-dir", default=None,
|
|
help="override directory to search for the voice .onnx files")
|
|
ap.add_argument("--only", default=None,
|
|
help="comma-separated keys to generate (e.g. safe,helmet)")
|
|
ap.add_argument("--length-scale", type=float, default=None,
|
|
help="speaking rate (piper default ~1.0; 0.8 faster, 1.2 slower)")
|
|
args = ap.parse_args()
|
|
|
|
only = set(args.only.split(",")) if args.only else None
|
|
targets = [(c, k, t) for (c, k, t) in PHRASES if only is None or k in only]
|
|
if not targets:
|
|
print(f"No phrases match --only={args.only!r}.")
|
|
sys.exit(1)
|
|
|
|
# Import piper
|
|
try:
|
|
from piper.voice import PiperVoice
|
|
except ImportError:
|
|
print("ERROR: piper-tts is not installed in this Python environment.")
|
|
print()
|
|
print("Install it:")
|
|
print(" pip install piper-tts")
|
|
print(f" python -m piper.download_voices {args.voice}")
|
|
sys.exit(1)
|
|
|
|
onnx, js = find_voice_files(args.voice, args.voice_dir)
|
|
if onnx is None:
|
|
print(f"ERROR: voice files for {args.voice!r} not found.")
|
|
print()
|
|
print("Download:")
|
|
print(f" python -m piper.download_voices {args.voice}")
|
|
print()
|
|
print("Or pass --voice-dir pointing at a directory that contains")
|
|
print(f" {args.voice}.onnx + {args.voice}.onnx.json")
|
|
sys.exit(1)
|
|
|
|
print(f"Loading voice: {onnx}")
|
|
voice = PiperVoice.load(str(onnx), config_path=str(js))
|
|
src_rate = int(voice.config.sample_rate)
|
|
print(f"Native rate: {src_rate} Hz")
|
|
print(f"Target: 16000 Hz mono int16 under {ASSETS_ROOT}")
|
|
print()
|
|
|
|
synth_kwargs = {}
|
|
if args.length_scale is not None:
|
|
synth_kwargs["length_scale"] = args.length_scale
|
|
|
|
for cat, key, text in targets:
|
|
out = ASSETS_ROOT / cat / f"{key}.wav"
|
|
print(f"[{cat}/{key}]")
|
|
print(f" text : {text}")
|
|
|
|
# Synthesize into an in-memory WAV buffer.
|
|
buf = io.BytesIO()
|
|
with wave.open(buf, "wb") as wf:
|
|
voice.synthesize(text, wf, **synth_kwargs)
|
|
buf.seek(0)
|
|
with wave.open(buf, "rb") as wf:
|
|
pcm = wf.readframes(wf.getnframes())
|
|
rate_in = wf.getframerate()
|
|
channels = wf.getnchannels()
|
|
|
|
resampled = resample_to_16k_mono_int16(pcm, rate_in, channels)
|
|
save_wav_16k_mono(resampled, out)
|
|
|
|
dur = len(resampled) / 2.0 / 16000
|
|
print(f" saved → {out.relative_to(PROJECT_ROOT)} ({dur:.1f}s)")
|
|
print()
|
|
|
|
print("Done. Deploy + restart:")
|
|
print(" scripts/deploy.sh")
|
|
print(" ssh unitree@192.168.123.164 'sudo systemctl restart saqr-bridge && "
|
|
"journalctl -u saqr-bridge -n 20 | grep audio_player'")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|