Update 2026-04-22 12:08:03

This commit is contained in:
kassam 2026-04-22 12:08:04 +04:00
parent 1c994fa175
commit dcf5f9f39b
5 changed files with 125 additions and 19 deletions

View File

@ -529,8 +529,16 @@ def run_terminal():
return str(v)
W = 58
LEFT_W = 28
RIGHT_W = (W - 2) - LEFT_W # visible chars available in the right column
def _pad(s: str, width: int) -> str:
"""ljust by visible width, treating ANSI colour escapes as zero-width."""
visible = len(_strip_ansi(s))
return s + " " * max(0, width - visible)
print("\n\n" + "" + "" * (W-2) + "")
print("" + " SANAD — AI BRAIN READY".ljust(W-2) + "")
print("" + _pad(" SANAD — AI BRAIN READY", W-2) + "")
print("" + "" * (W-2) + "")
left = [("model", status["model"]),
("voice", _fmt(status["voice"])),
@ -542,12 +550,10 @@ def run_terminal():
for i in range(max(len(left), len(right))):
l = f" {left[i][0]:<8}: {left[i][1]}" if i < len(left) else ""
r = f" {right[i][0]:<8}: {right[i][1]}" if i < len(right) else ""
# account for ANSI escapes when padding the left column
pad = 28 + (len(l) - len(_strip_ansi(l)))
print("" + l.ljust(pad) + r.ljust(W - 2 - pad) + "")
print("" + _pad(l, LEFT_W) + _pad(r, RIGHT_W) + "")
print("" + "" * (W-2) + "")
print("" + " Type a command, or say \"Sanad, <command>\".".ljust(W-2) + "")
print("" + " help · example · yolo · patrol · auto on/off · q".ljust(W-2) + "")
print("" + _pad(" Type a command, or say \"Sanad, <command>\".", W-2) + "")
print("" + _pad(" help · example · yolo · test_tts · auto on/off · q", W-2) + "")
print("" + "" * (W-2) + "\n")
try:
@ -574,6 +580,22 @@ def run_terminal():
_audio_api._unmute_mic()
print(" Mic unmuted")
continue
if cmd.lower().startswith("test_tts"):
# Probe speaker IDs to find which one speaks English on this
# firmware. Usage: `test_tts` (runs 0, 1, 2) or `test_tts 1`.
if _audio_api is None or _audio_api._tts_engine is None:
print(" Voice is not initialized")
continue
parts = cmd.split()
ids = [int(x) for x in parts[1:]] if len(parts) > 1 else [0, 1, 2]
phrase = "Hello, I am Sanad."
for sid in ids:
print(f" → speaker_id = {sid}")
_audio_api._tts_engine.speak(phrase, speaker_id=sid, block=True)
time.sleep(0.3)
print(' Pick the ID that sounded English and set it in')
print(' Config/config_Voice.json :: tts.builtin_speaker_id')
continue
result = process_command(cmd)
sp = result.get("speak", "") if isinstance(result, dict) else ""
if sp and _audio_api:

View File

@ -6,7 +6,7 @@
},
"stt": {
"wake_model": "tiny",
"command_model": "small",
"command_model": "tiny",
"wake_words_en": ["sanad", "sannad", "sanat", "sunnat"],
"language": "en",
"command_timeout_sec": 10,

View File

@ -1,23 +1,25 @@
"""
builtin_tts.py Unitree G1 built-in TTS (English only)
========================================================
builtin_tts.py Unitree G1 built-in TTS (English)
===================================================
Thin wrapper around AudioClient.TtsMaker(text, speaker_id). The G1's on-board
TTS engine synthesizes and plays directly through the body speaker no
internet, no MP3/WAV roundtrip, no audio SDK plumbing on our side.
Supported languages (firmware-side):
English works (Marcus uses this)
Chinese works (unused)
Arabic silently falls back to Chinese (unusable we refuse these)
Speaker IDs IMPORTANT:
The `speaker_id` argument is *not* a language selector; it's a voice
bank. On some G1 firmware revisions, speaker_id 0 produces a Mandarin
voice even for Latin-script input. If `Config/config_Voice.json::
tts.builtin_speaker_id` gives you Chinese output, try 1 or 2.
Signature:
client.TtsMaker(text: str, speaker_id: int) -> int # 0 = success
speaker_id {0, 1, 2} different voice timbres
Quick probe on the robot:
python3 /home/unitree/Marcus/Voice/builtin_tts.py 0 1 2
(plays "Hello, I am Sanad" once per speaker_id so you can pick the
one that sounds like English, then set that in config_Voice.json).
Usage:
from Voice.builtin_tts import BuiltinTTS
tts = BuiltinTTS(audio_client)
tts.speak("Hello, I am Sanad", speaker_id=0)
tts = BuiltinTTS(audio_client, default_speaker_id=1)
tts.speak("Hello, I am Sanad")
"""
from __future__ import annotations
@ -86,3 +88,33 @@ class BuiltinTTS:
time.sleep(duration)
return code
# ─── STANDALONE PROBE ────────────────────────────────────────────────
# Usage on the robot (in the marcus conda env):
# python3 Voice/builtin_tts.py # plays all 3 speaker IDs
# python3 Voice/builtin_tts.py 1 2 # plays only IDs 1 and 2
# Pick whichever ID sounds like English and set it in
# Config/config_Voice.json :: tts.builtin_speaker_id
if __name__ == "__main__":
import sys
from unitree_sdk2py.core.channel import ChannelFactoryInitialize
from unitree_sdk2py.g1.audio.g1_audio_client import AudioClient
ChannelFactoryInitialize(0, "eth0")
ac = AudioClient()
ac.SetTimeout(10.0)
ac.Init()
ac.SetVolume(100)
ids = [int(x) for x in sys.argv[1:]] if len(sys.argv) > 1 else [0, 1, 2]
phrase = "Hello, I am Sanad."
print(f"\nProbing TtsMaker with text: {phrase!r}\n")
tts = BuiltinTTS(ac)
for sid in ids:
print(f" → speaker_id = {sid}")
tts.speak(phrase, speaker_id=sid, block=True)
time.sleep(0.5)
print("\nDone. Pick the speaker_id that sounded like English and put it in")
print(" Config/config_Voice.json :: tts.builtin_speaker_id")

View File

@ -3,8 +3,17 @@
run_marcus.py Marcus AI Brain (terminal mode)
Usage: python3 run_marcus.py
"""
import sys
import os
import sys
import warnings
# Silence known-harmless third-party deprecation warnings before ANY heavy
# import fires them. Keeps the terminal dashboard readable.
# - TypedStorage : fires from torch during yolov8m.pt checkpoint load
# - torch.ampwhile : fires in ultralytics when FP16 is enabled on Jetson torch 2.1
warnings.filterwarnings("ignore", message=".*TypedStorage is deprecated.*")
warnings.filterwarnings("ignore", message=".*torch\\.cuda\\.amp.*")
os.environ.setdefault("PYTHONWARNINGS", "ignore::UserWarning:torch._utils")
PROJECT_ROOT = os.path.dirname(os.path.abspath(__file__))
if PROJECT_ROOT not in sys.path:

43
start_ollama.sh Executable file
View File

@ -0,0 +1,43 @@
#!/usr/bin/env bash
# start_ollama.sh — launch Ollama with Jetson-friendly memory settings
#
# The Jetson Orin NX has 16 GB unified CPU+GPU memory. When Marcus + YOLO +
# Whisper + Holosoma + the camera + audio all run alongside Qwen2.5-VL,
# the compute-graph OOMs the llama runner and Linux kills the biggest
# process (often Holosoma — which is a safety problem for locomotion).
#
# These env vars cut Ollama's memory footprint:
# OLLAMA_FLASH_ATTENTION=1 ~30 % less memory for attention tensors
# OLLAMA_KV_CACHE_TYPE=q8_0 quantize KV cache (halves it)
# OLLAMA_KEEP_ALIVE=2m keep the model warm for 2 min then evict
# (adjust if cold-load lag matters more
# than idle memory)
# OLLAMA_MAX_LOADED_MODELS=1 never hold two VL models at once
#
# Usage:
# ./start_ollama.sh # starts server in background, logs to /tmp/ollama.log
# ./start_ollama.sh --fg # runs in foreground (for debugging)
pkill -f "ollama (runner|serve)" 2>/dev/null
sleep 1
export OLLAMA_FLASH_ATTENTION=1
export OLLAMA_KV_CACHE_TYPE=q8_0
export OLLAMA_KEEP_ALIVE=2m
export OLLAMA_MAX_LOADED_MODELS=1
if [[ "$1" == "--fg" ]]; then
echo "Running ollama in foreground..."
ollama serve
else
ollama serve > /tmp/ollama.log 2>&1 &
sleep 3
if curl -sf http://localhost:11434/api/version > /dev/null; then
echo "✓ Ollama started (pid $(pgrep -f 'ollama serve'))"
echo " logs: tail -f /tmp/ollama.log"
echo " stop: pkill -f 'ollama serve'"
else
echo "✗ Ollama failed to start — see /tmp/ollama.log"
exit 1
fi
fi