""" builtin_tts.py — Unitree G1 built-in TTS (English only) ======================================================== Thin wrapper around AudioClient.TtsMaker(text, speaker_id). The G1's on-board TTS engine synthesizes and plays directly through the body speaker — no internet, no MP3/WAV roundtrip, no audio SDK plumbing on our side. Supported languages (firmware-side): English — works (Marcus uses this) Chinese — works (unused) Arabic — silently falls back to Chinese (unusable — we refuse these) Signature: client.TtsMaker(text: str, speaker_id: int) -> int # 0 = success speaker_id ∈ {0, 1, 2} — different voice timbres Usage: from Voice.builtin_tts import BuiltinTTS tts = BuiltinTTS(audio_client) tts.speak("Hello, I am Sanad", speaker_id=0) """ from __future__ import annotations import logging import time from typing import Optional log = logging.getLogger("builtin_tts") class BuiltinTTS: """Synchronous English-only TTS via the G1's on-board engine.""" # Rough playback duration per character — enough margin that `speak()` # returns after audio has actually finished on the robot. SECONDS_PER_CHAR = 0.08 MIN_SECONDS = 1.5 def __init__(self, audio_client, default_speaker_id: int = 0): """ Args: audio_client : initialized unitree_sdk2py AudioClient default_speaker_id : 0, 1, or 2 (default voice timbre) """ self._client = audio_client self._default_speaker = default_speaker_id def speak( self, text: str, speaker_id: Optional[int] = None, block: bool = True, ) -> int: """ Play `text` on the G1 speaker via TtsMaker. English-only by policy. Non-ASCII (Arabic) input is rejected rather than silently played back as Chinese. Returns the TtsMaker status code (0 = success) or -1 if input was rejected. """ if not text or not text.strip(): return -1 # Reject non-English. TtsMaker "falls back" by playing Arabic text # as Chinese phonemes — intelligible to nobody — so we refuse it # rather than surprise the operator. if any(ord(c) > 127 for c in text): log.warning("builtin_tts refusing non-ASCII text: %r", text[:60]) return -1 sid = self._default_speaker if speaker_id is None else speaker_id log.info("[TtsMaker sid=%d] %s", sid, text[:80]) try: code = self._client.TtsMaker(text, sid) except Exception as e: log.error("TtsMaker call failed: %s", e) return -1 if block: # Estimate how long the G1 is going to take to finish speaking. # TtsMaker is fire-and-forget — we need to wait so the mic loop # knows when to unmute. duration = max(self.MIN_SECONDS, len(text) * self.SECONDS_PER_CHAR) time.sleep(duration) return code