""" builtin_tts.py — Unitree G1 built-in TTS (English) =================================================== Thin wrapper around AudioClient.TtsMaker(text, speaker_id). The G1's on-board TTS engine synthesizes and plays directly through the body speaker — no internet, no MP3/WAV roundtrip, no audio SDK plumbing on our side. Speaker IDs — IMPORTANT: The `speaker_id` argument is *not* a language selector; it's a voice bank. On some G1 firmware revisions, speaker_id 0 produces a Mandarin voice even for Latin-script input. If `Config/config_Voice.json:: tts.builtin_speaker_id` gives you Chinese output, try 1 or 2. Quick probe on the robot: python3 /home/unitree/Marcus/Voice/builtin_tts.py 0 1 2 (plays "Hello, I am Sanad" once per speaker_id so you can pick the one that sounds like English, then set that in config_Voice.json). Usage: from Voice.builtin_tts import BuiltinTTS tts = BuiltinTTS(audio_client, default_speaker_id=1) tts.speak("Hello, I am Sanad") """ from __future__ import annotations import logging import time from typing import Optional log = logging.getLogger("builtin_tts") class BuiltinTTS: """Synchronous English-only TTS via the G1's on-board engine.""" # Rough playback duration per character — enough margin that `speak()` # returns after audio has actually finished on the robot. SECONDS_PER_CHAR = 0.08 MIN_SECONDS = 1.5 def __init__(self, audio_client, default_speaker_id: int = 0): """ Args: audio_client : initialized unitree_sdk2py AudioClient default_speaker_id : 0, 1, or 2 (default voice timbre) """ self._client = audio_client self._default_speaker = default_speaker_id def speak( self, text: str, speaker_id: Optional[int] = None, block: bool = True, ) -> int: """ Play `text` on the G1 speaker via TtsMaker. English-only by policy. Non-ASCII (Arabic) input is rejected rather than silently played back as Chinese. Returns the TtsMaker status code (0 = success) or -1 if input was rejected. """ if not text or not text.strip(): return -1 # Reject non-English. TtsMaker "falls back" by playing Arabic text # as Chinese phonemes — intelligible to nobody — so we refuse it # rather than surprise the operator. if any(ord(c) > 127 for c in text): log.warning("builtin_tts refusing non-ASCII text: %r", text[:60]) return -1 sid = self._default_speaker if speaker_id is None else speaker_id log.info("[TtsMaker sid=%d] %s", sid, text[:80]) try: code = self._client.TtsMaker(text, sid) except Exception as e: log.error("TtsMaker call failed: %s", e) return -1 if block: # Estimate how long the G1 is going to take to finish speaking. # TtsMaker is fire-and-forget — we need to wait so the mic loop # knows when to unmute. duration = max(self.MIN_SECONDS, len(text) * self.SECONDS_PER_CHAR) time.sleep(duration) return code # ─── STANDALONE PROBE ──────────────────────────────────────────────── # Usage on the robot (in the marcus conda env): # python3 Voice/builtin_tts.py # plays all 3 speaker IDs # python3 Voice/builtin_tts.py 1 2 # plays only IDs 1 and 2 # Pick whichever ID sounds like English and set it in # Config/config_Voice.json :: tts.builtin_speaker_id if __name__ == "__main__": import sys from unitree_sdk2py.core.channel import ChannelFactoryInitialize from unitree_sdk2py.g1.audio.g1_audio_client import AudioClient ChannelFactoryInitialize(0, "eth0") ac = AudioClient() ac.SetTimeout(10.0) ac.Init() ac.SetVolume(100) ids = [int(x) for x in sys.argv[1:]] if len(sys.argv) > 1 else [0, 1, 2] phrase = "Hello, I am Sanad." print(f"\nProbing TtsMaker with text: {phrase!r}\n") tts = BuiltinTTS(ac) for sid in ids: print(f" → speaker_id = {sid}") tts.speak(phrase, speaker_id=sid, block=True) time.sleep(0.5) print("\nDone. Pick the speaker_id that sounded like English and put it in") print(" Config/config_Voice.json :: tts.builtin_speaker_id")