Update 2026-05-13 14:42:31

This commit is contained in:
kassam 2026-05-13 14:42:34 +04:00
parent 54b1e745ca
commit edddb7e0c3
15 changed files with 501 additions and 50 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -1,106 +1,148 @@
{
"total_records": 3,
"total_records": 4,
"last_updated": "2026-04-19 20:55:59",
"records": [
{
"record_name": "R156",
"text": "هيه اعرفه طبعا .. ياولد هاد المندوس ياولد هات المندوس.. هذا المندوس مندوسك لايسحبون فلوسك\nالمندوس كله هدايا وفلوس \nتدخل وجيبك فاضي تظهر وجيبك متروووس",
"record_name": "testing_14",
"text": "يا هلا مسهلا معاك بوسينده،\nحاب تعمل معي فيديو مميز؟\nتواصل وياي وانا فريقي جاهزين خلنا نبدع معا ونعمل ترند.",
"replay_count": 1,
"timeline": {
"audio_generated_at": "2026-03-17T01:16:32",
"last_playback_finished_at": "2026-03-17T01:16:47",
"saved_at": "2026-04-19 20:55:59",
"imported_from_disk": true
"audio_generated_at": "2026-05-12 10:40:29",
"last_playback_finished_at": "2026-05-12 10:40:40",
"saved_at": "2026-05-12 10:40:40"
},
"audio_capture": {
"backend": "unknown"
"backend": "parec",
"sink": "alsa_output.platform-sound.analog-stereo",
"monitor_source": "alsa_output.platform-sound.analog-stereo.monitor",
"restored_microphone_source": "alsa_input.platform-sound.analog-stereo"
},
"files": {
"speaker_recording": {
"name": "R156.wav",
"path": "R156.wav",
"size_bytes": 713672,
"name": "testing_14.wav",
"path": "testing_14.wav",
"size_bytes": 468480,
"sample_rate": 24000,
"channels": 1,
"sample_width_bytes": 2,
"duration_seconds": 14.867
"duration_seconds": 9.76
},
"gemini_raw_output": {
"name": "R156_raw.wav",
"path": "R156_raw.wav",
"size_bytes": 706604,
"name": "testing_14_raw.wav",
"path": "testing_14_raw.wav",
"size_bytes": 468480,
"sample_rate": 24000,
"channels": 1,
"sample_width_bytes": 2,
"duration_seconds": 14.72
"duration_seconds": 9.76
}
}
},
{
"record_name": "R2(1)",
"text": "من اجمل البرامج في رمضان",
"record_name": "testing_15",
"text": "يا هلا مسهلا معاك بوسينده،\nحاب تعمل معي فيديو مميز؟\nتواصل وياي وانا فريقي جاهزين خلنا نبدع معا ونعمل ترند.",
"replay_count": 1,
"timeline": {
"audio_generated_at": "2026-03-16T19:37:29",
"last_playback_finished_at": "2026-03-16T19:37:32",
"saved_at": "2026-04-19 20:55:59",
"imported_from_disk": true
"audio_generated_at": "2026-05-12 10:41:16",
"last_playback_finished_at": "2026-05-12 10:41:26",
"saved_at": "2026-05-12 10:41:26"
},
"audio_capture": {
"backend": "unknown"
"backend": "parec",
"sink": "alsa_output.platform-sound.analog-stereo",
"monitor_source": "alsa_output.platform-sound.analog-stereo.monitor",
"restored_microphone_source": "alsa_input.platform-sound.analog-stereo"
},
"files": {
"speaker_recording": {
"name": "R2(1).wav",
"path": "R2(1).wav",
"size_bytes": 131526,
"name": "testing_15.wav",
"path": "testing_15.wav",
"size_bytes": 472320,
"sample_rate": 24000,
"channels": 1,
"sample_width_bytes": 2,
"duration_seconds": 2.739
"duration_seconds": 9.84
},
"gemini_raw_output": {
"name": "R2(1)_raw.wav",
"path": "R2(1)_raw.wav",
"size_bytes": 117164,
"name": "testing_15_raw.wav",
"path": "testing_15_raw.wav",
"size_bytes": 472320,
"sample_rate": 24000,
"channels": 1,
"sample_width_bytes": 2,
"duration_seconds": 2.44
"duration_seconds": 9.84
}
}
},
{
"record_name": "countdown_intro_2",
"text": "Look at the camera, stay ready, hold your pose with me, keep still, keep your smile soft, and in a moment I will count down for the photo.",
"replay_count": 0,
"record_name": "testing_16",
"text": "يا هلا مسهلا معاك بُوسْنَيْدَة،\nحاب تعمل معي فيديو مميز؟\nتواصل وياي وانا فريقي جاهزين خلنا نبدع معا ونعمل ترند.",
"replay_count": 1,
"timeline": {
"audio_generated_at": "",
"last_playback_finished_at": "",
"saved_at": "2026-04-19 20:55:59",
"imported_from_disk": true
"audio_generated_at": "2026-05-12 10:42:31",
"last_playback_finished_at": "2026-05-12 10:42:40",
"saved_at": "2026-05-12 10:42:40"
},
"audio_capture": {
"backend": "unknown"
"backend": "parec",
"sink": "alsa_output.platform-sound.analog-stereo",
"monitor_source": "alsa_output.platform-sound.analog-stereo.monitor",
"restored_microphone_source": "alsa_input.platform-sound.analog-stereo"
},
"files": {
"speaker_recording": {
"name": "countdown_intro_2.wav",
"path": "countdown_intro_2.wav",
"size_bytes": 432148,
"name": "testing_16.wav",
"path": "testing_16.wav",
"size_bytes": 426240,
"sample_rate": 24000,
"channels": 1,
"sample_width_bytes": 2,
"duration_seconds": 9.002
"duration_seconds": 8.88
},
"gemini_raw_output": {
"name": "countdown_intro_2_raw.wav",
"path": "countdown_intro_2_raw.wav",
"size_bytes": 418604,
"name": "testing_16_raw.wav",
"path": "testing_16_raw.wav",
"size_bytes": 426240,
"sample_rate": 24000,
"channels": 1,
"sample_width_bytes": 2,
"duration_seconds": 8.72
"duration_seconds": 8.88
}
}
},
{
"record_name": "testing_17",
"text": "يا هلا مسهلا معاك بُوسْنَيْدَة،\nحاب تعمل معي فيديو مميز؟\nتواصل وياي وانا فريقي جاهزين خلنا نبدع معا ونعمل ترند.",
"replay_count": 1,
"timeline": {
"audio_generated_at": "2026-05-12 10:42:31",
"last_playback_finished_at": "2026-05-12 10:42:40",
"saved_at": "2026-05-12 10:42:42"
},
"audio_capture": {
"backend": "parec",
"sink": "alsa_output.platform-sound.analog-stereo",
"monitor_source": "alsa_output.platform-sound.analog-stereo.monitor",
"restored_microphone_source": "alsa_input.platform-sound.analog-stereo"
},
"files": {
"speaker_recording": {
"name": "testing_17.wav",
"path": "testing_17.wav",
"size_bytes": 426240,
"sample_rate": 24000,
"channels": 1,
"sample_width_bytes": 2,
"duration_seconds": 8.88
},
"gemini_raw_output": {
"name": "testing_17_raw.wav",
"path": "testing_17_raw.wav",
"size_bytes": 426240,
"sample_rate": 24000,
"channels": 1,
"sample_width_bytes": 2,
"duration_seconds": 8.88
}
}
}

156
shell_scripts/clean_sanad.sh Executable file
View File

@ -0,0 +1,156 @@
#!/usr/bin/env bash
# clean_sanad.sh — wipe transient state (logs, recordings, audio, caches).
#
# Safe by default: shows a preview + asks for confirmation. Won't touch
# config files, skills.json, wake_phrases.json, recorded JSONL motions,
# or the model directory.
#
# Usage:
# ./clean_sanad.sh # interactive — preview + y/N prompt
# ./clean_sanad.sh -y # skip the prompt
# ./clean_sanad.sh --dry-run # show what would be deleted; delete nothing
# ./clean_sanad.sh --logs # logs only
# ./clean_sanad.sh --records # recorded turns + typed-replay audio only
# ./clean_sanad.sh --cache # __pycache__ only
# ./clean_sanad.sh --all # everything (default)
#
# Override the project location:
# SANAD_HOME=/some/path ./clean_sanad.sh
set -u
SANAD_HOME="${SANAD_HOME:-$HOME/Sanad}"
if [ ! -d "$SANAD_HOME" ]; then
# Fallback for invocation from the repo (workstation)
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
SANAD_HOME="$(dirname "$SCRIPT_DIR")"
fi
if [ ! -d "$SANAD_HOME" ]; then
echo "Sanad dir not found: $SANAD_HOME" >&2
exit 1
fi
# ── flag parsing ──────────────────────────────────────────
TARGET="all"
DRY_RUN=0
ASSUME_YES=0
for arg in "$@"; do
case "$arg" in
-y|--yes) ASSUME_YES=1 ;;
-n|--dry-run) DRY_RUN=1 ;;
--logs) TARGET="logs" ;;
--records) TARGET="records" ;;
--cache) TARGET="cache" ;;
--all) TARGET="all" ;;
-h|--help)
sed -n '2,20p' "$0"; exit 0 ;;
*)
echo "unknown option: $arg (try -h)" >&2; exit 2 ;;
esac
done
# ── targets — grouped per category so we can summarise per-category ──
declare -a LOGS_PATHS=()
declare -a RECORDS_PATHS=()
declare -a CACHE_PATHS=()
collect_logs() {
while IFS= read -r p; do LOGS_PATHS+=("$p"); done < <(
find "$SANAD_HOME/logs" -maxdepth 2 -type f 2>/dev/null
)
}
collect_records() {
while IFS= read -r p; do RECORDS_PATHS+=("$p"); done < <(
find "$SANAD_HOME/data/recordings" -type f 2>/dev/null
)
while IFS= read -r p; do RECORDS_PATHS+=("$p"); done < <(
find "$SANAD_HOME/data/audio" -maxdepth 1 -type f \
\( -name "*.wav" -o -name "*.pcm" \) 2>/dev/null
)
}
collect_cache() {
while IFS= read -r p; do CACHE_PATHS+=("$p"); done < <(
find "$SANAD_HOME" -type d -name "__pycache__" 2>/dev/null
)
while IFS= read -r p; do CACHE_PATHS+=("$p"); done < <(
find "$SANAD_HOME" -type f -name "*.pyc" 2>/dev/null
)
}
case "$TARGET" in
logs) collect_logs ;;
records) collect_records ;;
cache) collect_cache ;;
all) collect_logs; collect_records; collect_cache ;;
esac
# Bytes per array (silently skips missing paths)
sum_bytes() {
local total=0 p sz
for p in "$@"; do
[ -e "$p" ] || continue
sz=$(du -sb "$p" 2>/dev/null | awk '{print $1}')
total=$((total + ${sz:-0}))
done
echo "$total"
}
fmt() { numfmt --to=iec --suffix=B "$1" 2>/dev/null || echo "${1} B"; }
LOGS_BYTES=$(sum_bytes "${LOGS_PATHS[@]:-}")
RECORDS_BYTES=$(sum_bytes "${RECORDS_PATHS[@]:-}")
CACHE_BYTES=$(sum_bytes "${CACHE_PATHS[@]:-}")
TOTAL_BYTES=$((LOGS_BYTES + RECORDS_BYTES + CACHE_BYTES))
TOTAL_COUNT=$(( ${#LOGS_PATHS[@]} + ${#RECORDS_PATHS[@]} + ${#CACHE_PATHS[@]} ))
# ── preview ───────────────────────────────────────────────
if [ "$TOTAL_COUNT" -eq 0 ]; then
echo "Nothing to delete (target=$TARGET)."
exit 0
fi
echo
echo "Sanad clean — target=$TARGET dry_run=$DRY_RUN"
echo "Project: $SANAD_HOME"
echo
printf '┌──────────────┬─────────┬────────────┐\n'
printf '│ %-12s │ %7s │ %10s │\n' "Category" "Items" "Size"
printf '├──────────────┼─────────┼────────────┤\n'
if [ "${#LOGS_PATHS[@]}" -gt 0 ]; then printf '│ %-12s │ %7d │ %10s │\n' "logs" "${#LOGS_PATHS[@]}" "$(fmt "$LOGS_BYTES")"; fi
if [ "${#RECORDS_PATHS[@]}" -gt 0 ]; then printf '│ %-12s │ %7d │ %10s │\n' "records" "${#RECORDS_PATHS[@]}" "$(fmt "$RECORDS_BYTES")"; fi
if [ "${#CACHE_PATHS[@]}" -gt 0 ]; then printf '│ %-12s │ %7d │ %10s │\n' "cache" "${#CACHE_PATHS[@]}" "$(fmt "$CACHE_BYTES")"; fi
printf '├──────────────┼─────────┼────────────┤\n'
printf '│ %-12s │ %7d │ %10s │\n' "TOTAL" "$TOTAL_COUNT" "$(fmt "$TOTAL_BYTES")"
printf '└──────────────┴─────────┴────────────┘\n'
# Flatten for the delete loop
declare -a PATHS_TO_DELETE=( "${LOGS_PATHS[@]:-}" "${RECORDS_PATHS[@]:-}" "${CACHE_PATHS[@]:-}" )
# Strip any empty entries the unset-array fallback may have introduced
PATHS_TO_DELETE=("${PATHS_TO_DELETE[@]/#/}")
TMP_PATHS=()
for p in "${PATHS_TO_DELETE[@]}"; do [ -n "$p" ] && TMP_PATHS+=("$p"); done
PATHS_TO_DELETE=("${TMP_PATHS[@]}")
if [ "$DRY_RUN" -eq 1 ]; then
echo "Dry run — nothing deleted."
exit 0
fi
if [ "$ASSUME_YES" -ne 1 ]; then
read -r -p "Proceed with delete? [y/N] " ans
case "$ans" in
y|Y|yes|YES) ;;
*) echo "Aborted."; exit 0 ;;
esac
fi
# ── delete ────────────────────────────────────────────────
removed=0
for p in "${PATHS_TO_DELETE[@]}"; do
rm -rf -- "$p" && removed=$((removed + 1))
done
echo "Removed $removed of ${#PATHS_TO_DELETE[@]} item(s)."
# Recreate empty top-level dirs so next start_sanad.sh run doesn't
# complain about missing paths.
mkdir -p "$SANAD_HOME/logs" "$SANAD_HOME/data/recordings" "$SANAD_HOME/data/audio"
echo "Done."

View File

@ -0,0 +1,31 @@
# systemd user-level unit for Sanad. Install with:
#
# mkdir -p ~/.config/systemd/user
# cp ~/Sanad/shell_scripts/sanad.service ~/.config/systemd/user/sanad.service
# systemctl --user daemon-reload
# systemctl --user enable --now sanad.service
# sudo loginctl enable-linger unitree # run at boot even when not logged in
#
# Watch logs:
# journalctl --user -u sanad.service -f
#
# Restart after a code/config change:
# systemctl --user restart sanad.service
[Unit]
Description=Sanad robot assistant (FastAPI dashboard + voice/motion subsystems)
After=network-online.target
Wants=network-online.target
[Service]
Type=exec
WorkingDirectory=%h/Sanad
ExecStart=/usr/bin/env bash %h/Sanad/shell_scripts/start_sanad.sh
Restart=on-failure
RestartSec=5
TimeoutStopSec=15
KillSignal=SIGINT
PassEnvironment=PULSE_RUNTIME_PATH XDG_RUNTIME_DIR
[Install]
WantedBy=default.target

64
shell_scripts/start_sanad.sh Executable file
View File

@ -0,0 +1,64 @@
#!/usr/bin/env bash
# start_sanad.sh — boot Sanad's main.py inside the gemini_sdk conda env.
#
# Used both manually (./start_sanad.sh) and from the systemd unit
# (sanad.service) for boot-time auto-start.
#
# Override knobs (env vars; all optional):
# SANAD_HOME project root (default ~/Sanad)
# SANAD_CONDA_ENV conda env name (default gemini_sdk)
# SANAD_CONDA_BASE conda install dir (default $HOME/miniconda3)
# SANAD_DDS_INTERFACE DDS network iface (default eth0)
# SANAD_VOICE_BRAIN gemini | local | model (default gemini)
# SANAD_AUDIO_PROFILE builtin | anker | hollyland_builtin (default builtin)
# PORT dashboard port (default 8000)
set -u
SANAD_HOME="${SANAD_HOME:-$HOME/Sanad}"
SANAD_CONDA_ENV="${SANAD_CONDA_ENV:-gemini_sdk}"
SANAD_CONDA_BASE="${SANAD_CONDA_BASE:-$HOME/miniconda3}"
export SANAD_DDS_INTERFACE="${SANAD_DDS_INTERFACE:-eth0}"
export SANAD_VOICE_BRAIN="${SANAD_VOICE_BRAIN:-gemini}"
export SANAD_AUDIO_PROFILE="${SANAD_AUDIO_PROFILE:-builtin}"
export PORT="${PORT:-8000}"
# Mandatory environment fixes for Jetson + conda + Unitree SDK
export LD_PRELOAD="${LD_PRELOAD:-/usr/lib/aarch64-linux-gnu/libgomp.so.1}"
export PYTHONUNBUFFERED=1
# Optional: needed if/when the local pipeline imports CosyVoice
if [ -d "$HOME/CosyVoice" ]; then
export PYTHONPATH="$HOME/CosyVoice:$HOME/CosyVoice/third_party/Matcha-TTS:${PYTHONPATH:-}"
fi
cd "$SANAD_HOME" || { echo "Sanad dir not found: $SANAD_HOME" >&2; exit 1; }
# Activate conda
if [ ! -f "$SANAD_CONDA_BASE/etc/profile.d/conda.sh" ]; then
echo "Conda not found at $SANAD_CONDA_BASE — set SANAD_CONDA_BASE" >&2
exit 1
fi
# shellcheck disable=SC1091
source "$SANAD_CONDA_BASE/etc/profile.d/conda.sh"
conda activate "$SANAD_CONDA_ENV" || {
echo "Conda env '$SANAD_CONDA_ENV' activation failed" >&2
exit 1
}
# Wait for the DDS interface to come up — robot may still be booting
for i in $(seq 1 20); do
if ip link show "$SANAD_DDS_INTERFACE" 2>/dev/null | grep -q "state UP"; then
break
fi
sleep 1
done
echo "[start_sanad] $(date) — launching main.py"
echo "[start_sanad] SANAD_HOME=$SANAD_HOME"
echo "[start_sanad] conda env=$SANAD_CONDA_ENV"
echo "[start_sanad] DDS iface=$SANAD_DDS_INTERFACE"
echo "[start_sanad] voice brain=$SANAD_VOICE_BRAIN audio=$SANAD_AUDIO_PROFILE"
echo "[start_sanad] port=$PORT"
exec python3 main.py --port "$PORT"

73
voice/live_voice.py Normal file
View File

@ -0,0 +1,73 @@
"""Live Voice Commands — voice-to-arm phrase trigger dispatcher.
Listens to GeminiSubprocess user transcripts, matches against
sanad_arm.txt phrases, and fires ARM.trigger_action_by_id.
Endpoints:
POST /start begin polling transcripts
POST /stop stop polling
POST /deferred-mode?enabled toggle instant vs deferred trigger
POST /trigger-enabled?enabled master gate allow arm actions or not
GET /status running, last heard, last action, etc.
GET /triggers arm trigger history (log)
"""
from __future__ import annotations
from fastapi import APIRouter, HTTPException
router = APIRouter()
def _loop():
from Project.Sanad.main import live_voice
if live_voice is None:
raise HTTPException(503, "LiveVoiceLoop not initialized.")
return live_voice
@router.get("/status")
async def status():
from Project.Sanad.main import live_voice
if live_voice is None:
return {"available": False}
return {"available": True, **live_voice.status()}
@router.post("/start")
async def start():
loop = _loop()
await loop.start()
return {"ok": True, **loop.status()}
@router.post("/stop")
async def stop():
loop = _loop()
await loop.stop()
return {"ok": True, **loop.status()}
@router.post("/deferred-mode")
async def set_deferred(enabled: bool):
loop = _loop()
loop.set_deferred(enabled)
return {"ok": True, "deferred_mode": loop.deferred_mode}
@router.post("/trigger-enabled")
async def set_trigger_enabled(enabled: bool):
"""Master gate for voice → arm triggering. Default OFF."""
loop = _loop()
loop.set_trigger_enabled(enabled)
return {"ok": True, "trigger_enabled": loop.trigger_enabled}
@router.get("/triggers")
async def triggers():
loop = _loop()
return {
"triggers": list(loop.triggers),
"total": len(loop.triggers),
"dispatch_actions": len(loop.wake_dispatch),
}

View File

@ -90,10 +90,20 @@ class LiveVoiceLoop:
from Project.Sanad.motion.sanad_arm_controller import ARM, OPTION_LIST, OPTION_BY_ID
self.sanad_arm = ARM
self.option_by_id = OPTION_BY_ID
# Voice-trigger policy: SDK built-ins only.
# JSONL replays (option.file set) are dashboard-only — voice
# phrase blocks for laugh/bird/change_battery/move_* never
# reach `wake_dispatch`, so a matched phrase for one of those
# silently no-ops in voice mode.
sdk_only_options = [o for o in OPTION_LIST if not getattr(o, "file", "")]
if SANAD_ARM_TXT.exists():
self.wake_dispatch = load_arm_phrase_dispatch(SANAD_ARM_TXT, OPTION_LIST)
log.info("loaded %d arm-action phrase sets from %s",
len(self.wake_dispatch), SANAD_ARM_TXT.name)
self.wake_dispatch = load_arm_phrase_dispatch(SANAD_ARM_TXT, sdk_only_options)
log.info(
"loaded %d arm-action phrase sets from %s "
"(SDK-only filter: %d/%d options)",
len(self.wake_dispatch), SANAD_ARM_TXT.name,
len(sdk_only_options), len(OPTION_LIST),
)
else:
log.warning("sanad_arm.txt missing at %s — arm trigger disabled",
SANAD_ARM_TXT)

75
voice/voice_config.json Normal file
View File

@ -0,0 +1,75 @@
{
"_description": "Tunables for voice/* modules. Loaded via core.config_loader.load('voice').",
"sanad_voice": {
"_comment": "voice/sanad_voice.py — main live voice subprocess. Gemini API credentials (api_key, model, voice_name) come from core_config.json's gemini_defaults — single source of truth.",
"mic_gain": 1.0,
"play_chunk_bytes": 96000,
"log_dir": "~/logs",
"log_name": "gemini_live_v2",
"session_timeout_sec": 660,
"max_reconnect_delay_sec": 30,
"max_consecutive_errors": 10,
"no_messages_timeout_sec": 30
},
"mic_udp": {
"_comment": "G1 built-in mic — UDP multicast subscriber",
"group": "239.168.123.161",
"port": 5555,
"buffer_max_bytes": 64000,
"read_timeout_sec": 0.04,
"socket_timeout_sec": 1.0
},
"speaker": {
"_comment": "G1 built-in speaker — AudioClient.PlayStream wrapper",
"app_name": "sanad",
"begin_stream_pause_sec": 0.15,
"wait_finish_margin_sec": 0.3
},
"vad": {
"_comment": "Gemini Live server-side voice-activity-detection config",
"start_sensitivity": "START_SENSITIVITY_HIGH",
"end_sensitivity": "END_SENSITIVITY_LOW",
"prefix_padding_ms": 20,
"silence_duration_ms": 200
},
"barge_in": {
"threshold": 500,
"loud_chunks_needed": 3,
"cooldown_sec": 0.3,
"echo_suppress_below": 500,
"ai_speak_grace_sec": 0.15
},
"recording": {
"enabled": true,
"dir_relative": "data/recordings"
},
"typed_replay": {
"_comment": "voice/typed_replay.py — max_text_len comes from dashboard.api_input",
"monitor_chunk_size": 512,
"monitor_tail_sec": 0.2
},
"live_voice_loop": {
"_comment": "voice/live_voice_loop.py — arm phrase dispatcher. arm_txt filename comes from core.script_files.arm_phrases",
"trigger_log_size": 100,
"poll_interval_sec": 0.1,
"deferred_default": false,
"trigger_enabled_default": false
},
"local_tts": {
"_comment": "voice/local_tts.py — offline Coqui TTS",
"model_subdir": "speecht5_tts_clartts_ar",
"vocoder_subdir": "speecht5_hifigan",
"xvector_filename": "arabic_xvector_embedding.pt",
"sample_rate": 16000,
"channels": 1
}
}