Update 2026-04-21 11:47:43

This commit is contained in:
kassam 2026-04-21 11:47:44 +04:00
parent 94e4a9c4cb
commit cf5e916120
11 changed files with 580 additions and 92 deletions

View File

@ -5,6 +5,7 @@
"_comment": "motion/arm_controller.py — enable_arm_sdk_index + replay_hz come from core.g1_hardware",
"ramp_in_steps": 60,
"ramp_out_steps": 180,
"settle_hold_sec": 0.5,
"watchdog_timeout_sec": 0.25,
"watchdog_disable_after_sec": 1.0,
"arm_indices_start": 15,

View File

@ -60,7 +60,8 @@
"_comment": "voice/live_voice_loop.py — arm phrase dispatcher. arm_txt filename comes from core.script_files.arm_phrases",
"trigger_log_size": 100,
"poll_interval_sec": 0.1,
"deferred_default": false
"deferred_default": false,
"trigger_enabled_default": false
},
"local_tts": {

View File

@ -7,6 +7,7 @@ Endpoints:
POST /start begin polling transcripts
POST /stop stop polling
POST /deferred-mode?enabled toggle instant vs deferred trigger
POST /trigger-enabled?enabled master gate allow arm actions or not
GET /status running, last heard, last action, etc.
GET /triggers arm trigger history (log)
"""
@ -54,6 +55,14 @@ async def set_deferred(enabled: bool):
return {"ok": True, "deferred_mode": loop.deferred_mode}
@router.post("/trigger-enabled")
async def set_trigger_enabled(enabled: bool):
"""Master gate for voice → arm triggering. Default OFF."""
loop = _loop()
loop.set_trigger_enabled(enabled)
return {"ok": True, "trigger_enabled": loop.trigger_enabled}
@router.get("/triggers")
async def triggers():
loop = _loop()

View File

@ -114,9 +114,16 @@ async def test_replay(payload: ReplayRequest):
@router.post("/cancel")
async def cancel_replay():
"""Stop the current replay — the smooth return-to-home runs as the
final phase of the replay itself.
Matches g1_replay_v4_stable.py's behaviour: the play loop breaks on
the cancel flag, then the same Run() function executes its
return-home ramp + DisableSDK. No separate scheduling needed.
"""
from Project.Sanad.main import arm
arm.cancel()
return {"ok": True, "message": "Cancel signal sent."}
return {"ok": True, "message": "Cancelled — returning to home pose smoothly."}
@router.get("/status")

View File

@ -259,9 +259,15 @@
<button class="btn btn-sm mic-mute-shortcut btn-success" onclick="toggleMic()" style="margin-left:auto">Mic: LIVE</button>
<button class="btn btn-sm spk-mute-shortcut btn-success" onclick="toggleSpeaker()">Speaker: LIVE</button>
</div>
<div class="row" style="margin-top:.4rem">
<div class="row" style="margin-top:.4rem;gap:1.2rem">
<div class="row" style="gap:.4rem">
<label>Arm Trigger</label>
<label class="switch" title="Master gate — when OFF, voice never moves the arm"><input type="checkbox" id="lv-trigger-enabled" onchange="setTriggerEnabled(this.checked)"><span class="slider"></span></label>
</div>
<div class="row" style="gap:.4rem">
<label>Deferred Trigger</label>
<label class="switch"><input type="checkbox" id="lv-deferred" checked onchange="setDeferredMode(this.checked)"><span class="slider"></span></label>
<label class="switch" title="When ON, arm fires ~0.6s after you stop talking"><input type="checkbox" id="lv-deferred" onchange="setDeferredMode(this.checked)"><span class="slider"></span></label>
</div>
</div>
<div style="margin-top:.4rem;font-size:.72rem;color:var(--muted)">
<div>Last heard: <strong id="lv-last-text">--</strong></div>
@ -1091,7 +1097,7 @@ async function renderActions(arm){
}catch(e){}
}
async function triggerAction(id,name){if(_armBusy)return;_runId=id;_armBusy=true;document.getElementById('running-action').textContent='Running: '+name.replace(/_/g,' ')+'...';document.getElementById('running-action').style.display='block';renderActions({busy:true});const speed=parseFloat(document.getElementById('action-speed').value||document.getElementById('action-speed-2').value);try{await api('POST','/api/motion/trigger',{action_id:id,speed});}catch(e){}pollArmBusy();}
async function cancelAction(){try{await api('POST','/api/replay/cancel');toast('Cancelled','info');}catch(e){}_armBusy=false;_runId=null;document.getElementById('running-action').style.display='none';refreshStatus();}
async function cancelAction(){try{const r=await api('POST','/api/replay/cancel');toast(r&&r.message?r.message:'Cancelled','info');}catch(e){}_armBusy=false;_runId=null;document.getElementById('running-action').style.display='none';refreshStatus();}
let _armPoll;function pollArmBusy(){clearInterval(_armPoll);_armPoll=setInterval(async()=>{try{const s=await api('GET','/api/replay/status');if(!s.arm?.busy){clearInterval(_armPoll);_armBusy=false;_runId=null;document.getElementById('running-action').style.display='none';refreshStatus();}}catch(e){clearInterval(_armPoll);}},500);}
// Skills
@ -1106,7 +1112,7 @@ async function playMacro(b){const n=document.getElementById('play-macro-name').v
// Replay
async function refreshReplayFiles(){try{const r=await api('GET','/api/replay/files');const el=document.getElementById('replay-files');if(!(r.files||[]).length){el.innerHTML='<div class="empty">No motion files</div>';return;}el.innerHTML='<table><tr><th>File</th><th>Frames</th><th>Duration</th><th>Size</th><th></th></tr>'+(r.files||[]).map(f=>`<tr><td>${esc(f.name)}</td><td>${f.frames}</td><td>${f.duration_sec}s</td><td>${f.size_kb}KB</td><td><button class="btn btn-primary btn-sm" onclick="document.getElementById('replay-name').value='${esc(f.name)}';testReplay()">Play</button> <button class="btn btn-danger btn-sm" onclick="deleteMotionFile('${esc(f.name)}')">Del</button></td></tr>`).join('')+'</table>';}catch(e){}}
async function testReplay(b){const n=document.getElementById('replay-name').value,s=parseFloat(document.getElementById('replay-speed').value);if(!n)return;btnLoad(b);try{await api('POST','/api/replay/test',{name:n,speed:s});toast('Replay: '+n,'ok');pollArmBusy();}catch(e){}btnDone(b);}
async function cancelReplay(){try{await api('POST','/api/replay/cancel');toast('Cancelled','info');}catch(e){}}
async function cancelReplay(){try{const r=await api('POST','/api/replay/cancel');toast(r&&r.message?r.message:'Cancelled','info');}catch(e){}}
async function deleteMotionFile(n){if(confirm('Delete '+n+'?'))try{await api('DELETE','/api/replay/files/'+encodeURIComponent(n));toast('Deleted','ok');refreshReplayFiles();populateGestureSelect();}catch(e){}}
async function uploadMotionFile(input){if(!input.files[0])return;const fd=new FormData();fd.append('file',input.files[0]);try{const r=await fetch('/api/replay/files/upload',{method:'POST',body:fd});if(!r.ok){const j=await r.json();toast(j.detail||'Upload failed','err');}else{toast('Uploaded','ok');refreshReplayFiles();populateGestureSelect();}}catch(e){toast('Upload error','err');}input.value='';}
async function startTeaching(b){const n=document.getElementById('teach-name').value,d=parseFloat(document.getElementById('teach-duration').value);if(!n)return toast('Enter name','err');btnLoad(b);try{await api('POST','/api/replay/teach/start',{name:n,duration_sec:d});toast('Teaching: '+n,'ok');pollTeachStatus();}catch(e){}btnDone(b);}
@ -1146,6 +1152,7 @@ async function startLiveVoice(b){
}
async function stopLiveVoice(b){btnLoad(b);try{await api('POST','/api/live-voice/stop');toast('Stopped','info');}catch(e){}btnDone(b);refreshLiveVoice();}
async function setDeferredMode(v){try{await api('POST','/api/live-voice/deferred-mode?enabled='+v);}catch(e){}}
async function setTriggerEnabled(v){try{await api('POST','/api/live-voice/trigger-enabled?enabled='+v);}catch(e){}}
async function refreshLiveVoice(){
try{
const r=await api('GET','/api/live-voice/status');
@ -1156,6 +1163,7 @@ async function refreshLiveVoice(){
document.getElementById('lv-last-text').textContent=r.last_heard||'--';
document.getElementById('lv-pending').textContent=r.pending_action||r.last_action||'--';
document.getElementById('lv-deferred').checked=r.deferred_mode===true;
document.getElementById('lv-trigger-enabled').checked=r.trigger_enabled===true;
document.getElementById('lv-audio').textContent=r.audio_attached?'yes':'no';
document.getElementById('lv-arm').textContent=r.arm_attached?'yes':'no';
document.getElementById('lv-gem').textContent=r.gemini_connected?'connected':'disconnected';
@ -1176,10 +1184,10 @@ async function stopLiveSub(b){btnLoad(b);try{await api('POST','/api/live-subproc
async function refreshLiveSub(){try{const r=await api('GET','/api/live-subprocess/status');const st=document.getElementById('ls-state');st.textContent=r.state||'stopped';st.className='badge '+(r.running?'badge-ok':'badge-warn');document.getElementById('ls-msg').textContent=r.state_message||'--';document.getElementById('ls-user').textContent=r.last_user_text||'--';document.getElementById('ls-log').textContent=(r.log_tail||[]).slice(-25).join('\n');}catch(e){}}
// Typed Replay
async function trGenerate(b){const t=document.getElementById('tr-text').value;if(!t)return toast('Enter text','err');btnLoad(b);try{await api('POST','/api/typed-replay/generate',{text:t,record_name:document.getElementById('tr-name').value,capture_speaker:document.getElementById('tr-capture').checked});toast('Generated & played','ok');refreshTR();}catch(e){}btnDone(b);}
async function trReplayLast(b){btnLoad(b);try{await api('POST','/api/typed-replay/replay-last?capture_speaker='+document.getElementById('tr-capture').checked);toast('Replayed','ok');refreshTR();}catch(e){}btnDone(b);}
async function trSaveLast(b){btnLoad(b);try{await api('POST','/api/typed-replay/save-last',{name:document.getElementById('tr-name').value});toast('Saved','ok');refreshTR();refreshRecords();}catch(e){}btnDone(b);}
async function refreshTR(){try{const r=await api('GET','/api/typed-replay/status');const s=r.session||{};document.getElementById('tr-session').innerHTML=`<strong>Text:</strong> ${esc(s.text||'--')}<br><strong>Audio:</strong> ${s.has_audio?'Yes':'No'} | <strong>Capture:</strong> ${s.has_speaker_capture?'Yes':'No'}<br><strong>Replays:</strong> ${s.replay_count||0}<br><strong>Generated:</strong> ${s.generated_at||'--'}<br><strong>Saved:</strong> ${s.saved_entry?.record_name||'--'}`;}catch(e){}}
async function trGenerate(b){const t=document.getElementById('tr-text').value;if(!t)return toast('Enter text','err');btnLoad(b);try{await api('POST','/api/typed-replay/say',{text:t,record:document.getElementById('tr-capture').checked,record_name:document.getElementById('tr-name').value});toast('Generated & played','ok');refreshTR();}catch(e){}btnDone(b);}
async function trReplayLast(b){btnLoad(b);try{await api('POST','/api/typed-replay/replay-last');toast('Replayed','ok');refreshTR();}catch(e){}btnDone(b);}
async function trSaveLast(b){btnLoad(b);try{await api('POST','/api/typed-replay/save-last',{record_name:document.getElementById('tr-name').value});toast('Saved','ok');refreshTR();refreshRecords();}catch(e){}btnDone(b);}
async function refreshTR(){try{const r=await api('GET','/api/typed-replay/status');const s=r.session||{};document.getElementById('tr-session').innerHTML=`<strong>Text:</strong> ${esc(s.text||'--')}<br><strong>Audio:</strong> ${s.has_audio?'Yes':'No'} | <strong>Capture:</strong> ${s.has_capture?'Yes':'No'}<br><strong>Replays:</strong> ${s.replay_count||0}<br><strong>Generated:</strong> ${s.generated_at||'--'}<br><strong>Saved:</strong> ${esc(s.saved_as||'--')}`;}catch(e){}}
// YOLO
async function loadDetector(b){btnLoad(b);try{const r=await api('POST','/api/detector/load');const el=document.getElementById('yolo-status');el.textContent=r.ok?'Loaded':'Failed';el.className='badge '+(r.ok?'badge-ok':'badge-err');toast(r.ok?'Model loaded':'Failed',r.ok?'ok':'err');}catch(e){}btnDone(b);}
@ -1306,10 +1314,10 @@ async function autoStartLiveSub(){
}catch(e){}
}
// Init
refreshStatus();refreshSystem();refreshAudio();refreshAudioDevices();refreshSkills();refreshReplayFiles();refreshScripts();refreshPrompt();refreshRecords();refreshPhotos();refreshCamSources();refreshCamDevices();refreshLocalCam();populateGestureSelect();refreshLiveVoice();refreshLiveSub();refreshTR();refreshDetector();refreshWakeActions();refreshApiKey();connectLogs();connectCamera();
// Init — vision/camera/detector fetches removed; those endpoints were deleted.
refreshStatus();refreshSystem();refreshAudio();refreshAudioDevices();refreshSkills();refreshReplayFiles();refreshScripts();refreshPrompt();refreshRecords();populateGestureSelect();refreshLiveVoice();refreshLiveSub();refreshTR();refreshWakeActions();refreshApiKey();connectLogs();
setTimeout(autoConnectGemini,2000);setTimeout(autoStartLiveSub,3000);
setInterval(refreshStatus,5000);setInterval(refreshSystem,30000);setInterval(refreshLocalCam,10000);setInterval(refreshLiveVoice,5000);setInterval(refreshLiveSub,5000);
setInterval(refreshStatus,5000);setInterval(refreshSystem,30000);setInterval(refreshLiveVoice,5000);setInterval(refreshLiveSub,5000);
</script>
</body>
</html>

10
main.py
View File

@ -47,6 +47,16 @@ else:
sys.modules["Project.Sanad"] = _sanad
sys.modules["Project"].Sanad = _sanad # type: ignore[attr-defined]
# When main.py runs as a script (`python3 main.py`), Python loads it as the
# `__main__` module — NOT as `Project.Sanad.main`. Route handlers later do
# `from Project.Sanad.main import arm` etc; without the alias below, Python
# would re-execute this file from scratch under a different module name,
# creating a SECOND set of subsystem instances (uninitialised). Every
# `subsystem not available` / `No LowState` symptom traces back to this.
# The alias ensures both names point at the exact same module object.
if __name__ == "__main__":
sys.modules["Project.Sanad.main"] = sys.modules["__main__"]
# asyncio compat shim — backfills asyncio.to_thread for Python 3.8.
# MUST be imported before any other Sanad module that uses asyncio.to_thread.
from Project.Sanad.core import asyncio_compat # noqa: F401

View File

@ -38,6 +38,7 @@ _AC = _cfg_section("motion", "arm_controller")
from Project.Sanad.config import ENABLE_ARM_SDK_INDEX
RAMP_IN_STEPS = _AC.get("ramp_in_steps", 60) # ~1.0s smooth move to start pose
RAMP_OUT_STEPS = _AC.get("ramp_out_steps", 180) # ~3.0s smooth return to home
SETTLE_HOLD_SEC = _AC.get("settle_hold_sec", 0.5) # hold start pose before replay begins
WATCHDOG_TIMEOUT = _AC.get("watchdog_timeout_sec", 0.25) # hold last pose if state stale
WATCHDOG_DISABLE_AFTER = _AC.get("watchdog_disable_after_sec", 1.0) # abort if state stale this long
ARM_INDICES = range(
@ -71,12 +72,21 @@ except ImportError:
_make_low_cmd = None
log.warning("Unitree SDK not available — running in simulation mode")
# Optional G1 high-level SDK action client (built-in actions)
# G1 arm-action client for built-in arm moves (wave, shake_hand, hug, …).
# NOTE: do NOT use LocoClient here — LocoClient is the locomotion/body-move
# client and its ExecuteAction() doesn't recognise arm-action IDs, so arm
# commands become silent no-ops. The correct client is the arm-specific
# G1ArmActionClient with the SDK's action_map (name → opcode lookup).
try:
from unitree_sdk2py.g1.loco.g1_loco_client import LocoClient
_HAS_LOCO = True
from unitree_sdk2py.g1.arm.g1_arm_action_client import (
G1ArmActionClient,
action_map as _ARM_ACTION_MAP,
)
_HAS_ARM_CLIENT = True
except ImportError:
_HAS_LOCO = False
G1ArmActionClient = None
_ARM_ACTION_MAP = {}
_HAS_ARM_CLIENT = False
@dataclass
@ -189,7 +199,7 @@ class ArmController:
self._state_sub = None
self._low_cmd = None
self._crc = None
self._loco_client = None
self._arm_client = None
# Live state from LowState_ subscriber
self._current_q: list[float] = [0.0] * G1_NUM_MOTOR
@ -242,16 +252,17 @@ class ArmController:
)
self._crc = CRC()
# High-level SDK action client (for built-in actions)
if _HAS_LOCO:
# Arm-specific action client for built-in moves
if _HAS_ARM_CLIENT:
try:
self._loco_client = LocoClient()
self._loco_client.SetTimeout(10.0)
self._loco_client.Init()
log.info("LocoClient initialized — built-in SDK actions available")
self._arm_client = G1ArmActionClient()
self._arm_client.SetTimeout(10.0)
self._arm_client.Init()
log.info("G1ArmActionClient initialized (%d actions) — built-in moves available",
len(_ARM_ACTION_MAP))
except Exception as exc:
log.warning("LocoClient init failed: %s — built-in actions disabled", exc)
self._loco_client = None
log.warning("G1ArmActionClient init failed: %s — built-in actions disabled", exc)
self._arm_client = None
self._initialized = True
log.info("DDS initialized on %s", network_interface)
@ -332,12 +343,13 @@ class ArmController:
self._arm_pub.Write(self._low_cmd)
def _disable_sdk(self):
"""Send 5 disable frames to cleanly release arm control."""
"""Send 10 disable frames at 50 Hz — direct port of
g1_replay_v4_stable.py:DisableSDK (lines 141-147)."""
if not self._initialized or self._low_cmd is None:
return
for _ in range(5):
self._low_cmd.motor_cmd[ENABLE_ARM_SDK_INDEX].q = 0.0
self._low_cmd.crc = self._crc.Crc(self._low_cmd)
for _ in range(10):
self._arm_pub.Write(self._low_cmd)
time.sleep(0.02)
@ -365,6 +377,24 @@ class ArmController:
self._last_action_time = time.monotonic()
def _replay_file_inner(self, filepath: str, speed: float = 1.0):
"""One-for-one port of g1_replay_v4_stable.py:ReplayWithHome.Run().
Five phases timing and math match the reference exactly:
1. Wait for first LowState_ message (no body-lock from zeros).
2. Load data: home_q (last valid frame of arm_home.jsonl),
full_body_lock_q (live snapshot), and the replay frames.
3. MOVE TO START 60 steps at 60 Hz, alpha = k/steps (starts
at 0 = exact current pose, ends at 59/60 just shy of target).
3b. SETTLE HOLD replaces the reference's human
`input("Press Enter to Begin")` pause; gives the physical
motors time to reach the commanded start pose before
playback so the first real frames don't jerk.
4. PLAY `for f in frames: if f['t']-t0 >= play_elapsed`
frame-select pattern, fixed 1/REPLAY_HZ sleep per iteration.
5. RETURN HOME 180 steps alpha = k/steps from last_played_q
to home_q, body locked. Then DisableSDK (10 frames).
"""
path = Path(filepath)
if not path.is_absolute():
path = MOTIONS_DIR / path
@ -376,84 +406,116 @@ class ArmController:
log.warning("Empty motion file: %s", path)
return
home_q = _load_home_q() or [0.0] * G1_NUM_MOTOR
if not _HAS_SDK:
duration = len(frames) / REPLAY_HZ / speed if speed else len(frames) / REPLAY_HZ
log.info("[SIM] Replay %s (%.1fs, %d frames, speed=%.1f)", path.name, duration, len(frames), speed)
log.info("[SIM] Replay %s (%.1fs, %d frames, speed=%.1f)",
path.name, duration, len(frames), speed)
self._sim_replay(frames, speed)
return
log.info("Replay %s (%d frames @ %.0f Hz, speed=%.1f)", path.name, len(frames), REPLAY_HZ, speed)
log.info("Replay %s (%d frames @ %.0f Hz, speed=%.1f)",
path.name, len(frames), REPLAY_HZ, speed)
# CRITICAL: wait for first LowState_ callback before reading _current_q
# Otherwise we'd lock the body to all-zeros and crash the robot.
# ─── 1. Wait for first LowState ─────────────────────────
if not self._first_state_event.is_set():
log.warning("Waiting for first LowState message...")
if not self._first_state_event.wait(timeout=2.0):
log.error("No LowState received in 2s — refusing to replay (would lock body to zeros)")
return
body_lock_q = self._get_current_q()
current_arm_q = list(body_lock_q)
start_arm_q = frames[0]["q"]
interval = 1.0 / REPLAY_HZ
# ─── 2. Load data (ref lines 154-166) ───────────────────
home_q = _load_home_q() or [0.0] * G1_NUM_MOTOR
full_body_lock_q = self._get_current_q() # snapshot live state
# Phase 1: Ramp-in — interpolate from current pose to first frame
log.debug("Ramp-in (%d steps)", RAMP_IN_STEPS)
for step in range(RAMP_IN_STEPS):
interval = 1.0 / REPLAY_HZ
file_start_q = frames[0]["q"]
# ─── 3. MOVE TO START (ref lines 171-181) ───────────────
log.debug("Moving to start (%d steps)", RAMP_IN_STEPS)
for k in range(RAMP_IN_STEPS):
if self._cancel.is_set():
self._return_home(current_arm_q, body_lock_q, home_q)
self._return_home(full_body_lock_q, full_body_lock_q, home_q)
return
t = (step + 1) / RAMP_IN_STEPS
interp = _lerp_q(current_arm_q, start_arm_q, t)
self._send_frame(interp, body_lock_q)
alpha = k / RAMP_IN_STEPS # 0 .. (RAMP_IN_STEPS-1)/RAMP_IN_STEPS
interp_q = list(full_body_lock_q)
for j in range(15, G1_NUM_MOTOR):
interp_q[j] = (1 - alpha) * full_body_lock_q[j] + alpha * file_start_q[j]
self._send_frame(interp_q, full_body_lock_q)
time.sleep(interval)
# Phase 2: Play frames with timing and watchdog
# ─── 3b. SETTLE HOLD — replaces reference's Enter pause ─
settle_frames = max(0, int(SETTLE_HOLD_SEC * REPLAY_HZ))
if settle_frames > 0:
log.debug("Settle hold (%d frames / %.2fs)", settle_frames, SETTLE_HOLD_SEC)
for _ in range(settle_frames):
if self._cancel.is_set():
self._return_home(file_start_q, full_body_lock_q, home_q)
return
self._send_frame(file_start_q, full_body_lock_q)
time.sleep(interval)
# ─── 4. PLAY (ref lines 183-234) ────────────────────────
log.debug("Playing %d frames", len(frames))
last_played_q = file_start_q
play_elapsed = 0.0
last_real = time.monotonic()
t0_frame = frames[0].get("t", 0.0)
t0 = frames[0].get("t", 0.0)
for frame in frames:
while True:
if self._cancel.is_set():
break
# Watchdog
# Watchdog — abort if LowState goes stale
age = self._state_age()
if age > WATCHDOG_DISABLE_AFTER:
log.error("Watchdog abort — LowState stale %.2fs", age)
self._disable_sdk()
return
if age > WATCHDOG_TIMEOUT:
log.warning("Watchdog hold — LowState stale %.2fs", age)
now_real = time.monotonic()
play_elapsed += (now_real - last_real) * speed
dt_real = now_real - last_real
last_real = now_real
play_elapsed += dt_real * speed
frame_t = frame.get("t", 0.0) - t0_frame
if frame_t > play_elapsed:
time.sleep(min(frame_t - play_elapsed, interval))
# Pick the next frame whose timestamp has elapsed (reference pattern)
target_frame = None
for f in frames:
if f.get("t", 0.0) - t0 >= play_elapsed:
target_frame = f
break
if target_frame is None:
break
self._send_frame(frame["q"], body_lock_q)
# NOTE: timing is driven by frame_t above — no extra sleep here
self._send_frame(target_frame["q"], full_body_lock_q)
last_played_q = target_frame["q"]
time.sleep(interval)
# Phase 3: Return home
last_arm_q = frames[-1]["q"] if frames else current_arm_q
self._return_home(last_arm_q, body_lock_q, home_q)
# ─── 5. RETURN HOME (ref lines 239-256) + DisableSDK ────
self._return_home(last_played_q, full_body_lock_q, home_q)
def _return_home(self, from_q: list[float], body_lock_q: list[float], home_q: list[float]):
"""Smooth interpolation back to home pose, then disable SDK."""
log.debug("Returning home (%d steps)", RAMP_OUT_STEPS)
"""Smooth return to home — direct port of g1_replay_v4_stable.py:239-256.
180 steps × (1/60)s = 3s linear ramp on arm motors only (indices
15-28); body motors (0-14) stay locked to `body_lock_q`. Then
DisableSDK sends 10 disable-bit frames at 50 Hz.
IMPORTANT: the reference's return-home is unconditional — it
always runs to completion regardless of why the play loop ended
(natural end OR 'q' press). We clear `_cancel` at entry so a
user-hit Cancel (which set `_cancel` to break the play loop)
doesn't also abort the return ramp. Without this, the arm
"snaps" home because the loop exits on the first iteration.
"""
self._cancel.clear()
log.info("Returning home (%d steps / %.1fs)", RAMP_OUT_STEPS, RAMP_OUT_STEPS / REPLAY_HZ)
interval = 1.0 / REPLAY_HZ
for step in range(RAMP_OUT_STEPS):
if self._cancel.is_set():
break
t = (step + 1) / RAMP_OUT_STEPS
interp = _lerp_q(from_q, home_q, t)
self._send_frame(interp, body_lock_q)
for k in range(RAMP_OUT_STEPS):
alpha = k / RAMP_OUT_STEPS # 0 .. (RAMP_OUT_STEPS-1)/RAMP_OUT_STEPS
interp_q = list(from_q)
for j in range(15, G1_NUM_MOTOR):
interp_q[j] = (1 - alpha) * from_q[j] + alpha * home_q[j]
self._send_frame(interp_q, body_lock_q)
time.sleep(interval)
self._disable_sdk()
log.info("Home reached, SDK disabled")
@ -475,9 +537,81 @@ class ArmController:
return self._is_busy
def cancel(self):
"""Cancel the currently running replay."""
"""Cancel the currently running replay.
Sets the cancel flag the play loop in `_replay_file_inner`
checks this and breaks out; `_return_home` then runs as the
final phase of the same replay (matches the reference's
end-of-Run behaviour in g1_replay_v4_stable.py).
"""
self._cancel.set()
def _unused_return_to_home(self, duration_sec: float = 3.0,
home_file: str = "arm_home.jsonl") -> None:
"""Deprecated — replay's own `_return_home` is called automatically
when cancel breaks the play loop. Kept here only to preserve any
external caller; no new code should use this.
"""
if not self._initialized or self._low_cmd is None:
log.warning("return_to_home: arm controller not initialised")
return
if not self._first_state_event.wait(timeout=2.0):
log.error("return_to_home: no LowState received in 2s — aborting")
return
home_path = MOTIONS_DIR / home_file
if not home_path.exists():
log.warning("return_to_home: %s missing — skipping", home_path.name)
return
# Use the LAST valid 'q' in the file as the settle pose
home_q: list[float] | None = None
try:
for frame in _load_frames(home_path):
q = frame.get("q")
if q and len(q) == G1_NUM_MOTOR:
home_q = q
except Exception as exc:
log.warning("return_to_home: reading %s failed: %s",
home_path.name, exc)
return
if home_q is None:
log.warning("return_to_home: %s has no valid 'q' frames",
home_path.name)
return
with self._state_lock:
start_q = list(self._current_q)
body_lock_q = list(start_q)
# Let the ramp publish frames even though we just cancelled
self._cancel.clear()
with self._lock:
if self._is_busy:
# A pending replay is still winding down — wait a beat
log.debug("return_to_home: arm busy, waiting briefly")
self._is_busy = True
try:
steps = max(30, int(duration_sec * REPLAY_HZ)) # ≥ 0.5s ramp
dt = 1.0 / REPLAY_HZ
log.info("return_to_home: ramp %d steps (%.1fs) → %s",
steps, duration_sec, home_file)
for k in range(steps):
if self._cancel.is_set():
log.info("return_to_home: cancelled mid-ramp")
break
alpha = (k + 1) / steps
interp_q = list(body_lock_q)
for j in range(15, G1_NUM_MOTOR):
interp_q[j] = (1 - alpha) * start_q[j] + alpha * home_q[j]
self._send_frame(interp_q, body_lock_q)
time.sleep(dt)
log.info("return_to_home: done")
finally:
with self._lock:
self._is_busy = False
def refresh_actions(self):
"""Re-scan data/motions/ and rebuild the action registry."""
global ACTIONS, ACTION_BY_ID, ACTION_BY_NAME
@ -558,19 +692,39 @@ class ArmController:
log.info("[SIM] SDK action: %s (id=%d)", action.name, action.id)
time.sleep(2.0)
return
if self._loco_client is None:
if self._arm_client is None:
log.warning(
"SDK action %s requested but LocoClient not available — skipping",
"SDK action %s requested but G1ArmActionClient not available — skipping",
action.name,
)
return
log.info("SDK action: %s (id=%d)", action.name, action.id)
# Sanad's registry uses underscored names ("shake_hand", "x_ray");
# the SDK's action_map is keyed by human-readable forms that mix
# spaces and hyphens ("shake hand", "x-ray", "two-hand kiss").
# Try each candidate in turn.
name = action.name
candidates = [
name,
name.replace("_", " "), # shake_hand → shake hand
name.replace("_", "-"), # x_ray → x-ray
# two-word with specific hyphenation: first token with hyphen,
# rest with spaces (matches SDK's "two-hand kiss" pattern)
name.replace("_", "-", 1).replace("_", " "),
]
sdk_name = next((c for c in candidates if c in _ARM_ACTION_MAP), None)
if sdk_name is None:
log.warning(
"SDK action %s not in G1ArmActionClient action_map — tried %s. keys=%s",
action.name, candidates, sorted(_ARM_ACTION_MAP.keys())[:12],
)
return
opcode = _ARM_ACTION_MAP[sdk_name]
log.info("SDK action: %s (opcode=%s)", action.name, opcode)
try:
# G1 LocoClient exposes ExecuteAction(id) for built-in actions
self._loco_client.ExecuteAction(action.id)
# Built-in actions are blocking on the robot side, but the SDK call returns
# immediately. Give it a sensible default duration so we don't hammer it.
time.sleep(2.5)
self._arm_client.ExecuteAction(opcode)
# Built-in arm actions block on the robot side for ~3s; the SDK
# call returns immediately. Sleep so we don't hammer it back-to-back.
time.sleep(3.0)
except Exception as exc:
log.error("SDK action %s failed: %s", action.name, exc)

View File

@ -8,6 +8,7 @@ Device selection is dynamic — read from voice.audio_devices on each refresh.
from __future__ import annotations
import json
import subprocess
import threading
import time
@ -15,11 +16,33 @@ import wave
from pathlib import Path
from typing import Any
try:
import numpy as np
_HAS_NUMPY = True
except ImportError:
np = None
_HAS_NUMPY = False
try:
import pyaudio
except ImportError:
pyaudio = None # optional — only needed for local PCM playback
# G1 AudioClient — used to route playback through the robot chest speaker
# via DDS `PlayStream` (the same pipe Gemini uses). Without this, WAV
# playback would go to the Jetson's built-in audio codec, which isn't
# wired to any audible output on the G1.
try:
from unitree_sdk2py.g1.audio.g1_audio_client import AudioClient
from unitree_sdk2py.g1.audio.g1_audio_api import (
ROBOT_API_ID_AUDIO_STOP_PLAY,
)
_HAS_G1_AUDIO = True
except ImportError:
AudioClient = None
ROBOT_API_ID_AUDIO_STOP_PLAY = 0
_HAS_G1_AUDIO = False
from Project.Sanad.config import (
CHANNELS,
CHUNK_SIZE,
@ -87,10 +110,38 @@ class AudioManager:
)
self.pya = pyaudio.PyAudio()
self.play_lock = threading.Lock()
# Lazily-initialised G1 DDS audio client (for play_wav → chest speaker)
self._g1_audio_client: Any = None
# Resolve devices and set PulseAudio defaults at startup
self.refresh_devices()
ensure_audio_defaults()
def _get_g1_audio_client(self):
"""Return a cached G1 AudioClient (DDS) — creates on first use.
Assumes `ChannelFactoryInitialize` has already been called (our
ArmController does this at startup on eth0). Returns None if the
Unitree SDK is unavailable or init fails.
"""
if not _HAS_G1_AUDIO:
return None
if self._g1_audio_client is not None:
return self._g1_audio_client
try:
c = AudioClient()
c.SetTimeout(5.0)
c.Init()
try:
c.SetVolume(100)
except Exception:
pass
self._g1_audio_client = c
log.info("G1 AudioClient initialized (for chest-speaker playback)")
except Exception as exc:
log.warning("G1 AudioClient init failed: %s", exc)
self._g1_audio_client = None
return self._g1_audio_client
def refresh_devices(self) -> dict[str, str]:
"""Re-read selected sink/source from audio_devices module."""
sink, source = _resolve_devices()
@ -137,15 +188,85 @@ class AudioManager:
stream.close()
def play_wav(self, path: Path) -> dict[str, Any]:
"""Play a WAV file through the G1 chest speaker via DDS when
available, falling back to the host PulseAudio sink otherwise.
The G1's built-in audio (Jetson `platform-sound`) isn't wired to
any audible speaker the robot's loudspeaker is only reachable
over DDS `AudioClient.PlayStream` (same pipe Gemini uses).
"""
with wave.open(str(path), "rb") as wf:
channels = wf.getnchannels()
sw = wf.getsampwidth()
rate = wf.getframerate()
data = wf.readframes(wf.getnframes())
# Prefer G1 chest speaker when the Unitree SDK is present
client = self._get_g1_audio_client()
if client is not None and _HAS_NUMPY and sw == 2:
self._play_pcm_via_g1(data, channels, rate)
else:
if client is None and _HAS_G1_AUDIO:
log.warning("G1 AudioClient unavailable, using host PulseAudio sink")
self.play_pcm(data, channels, rate, sw)
duration = len(data) / (rate * channels * sw) if rate else 0
return {"path": str(path), "duration_seconds": round(duration, 3)}
# -- G1 DDS-routed playback --
_G1_STREAM_APP = "sanad_playback"
_G1_HW_RATE = 16_000
def _play_pcm_via_g1(self, pcm_bytes: bytes, channels: int, source_rate: int) -> None:
"""Stream int16 PCM to the G1 chest speaker via AudioClient.PlayStream.
Converts stereo mono and resamples to 16 kHz (the rate
AudioClient expects). Uses a fresh stream_id per call so
back-to-back plays don't interfere.
"""
client = self._get_g1_audio_client()
if client is None:
raise RuntimeError("G1 AudioClient not available")
arr = np.frombuffer(pcm_bytes, dtype=np.int16)
# Stereo → mono average
if channels == 2 and arr.size % 2 == 0:
arr = arr.reshape(-1, 2).mean(axis=1).astype(np.int16)
# Resample to 16 kHz
if source_rate != self._G1_HW_RATE and arr.size:
target_len = max(1, int(len(arr) * self._G1_HW_RATE / source_rate))
arr = np.interp(
np.linspace(0, len(arr), target_len, endpoint=False),
np.arange(len(arr)),
arr.astype(np.float64),
).astype(np.int16)
stream_id = f"wav_{int(time.time() * 1000)}"
# Clear any lingering stream from a previous call
try:
client._Call(ROBOT_API_ID_AUDIO_STOP_PLAY,
json.dumps({"app_name": self._G1_STREAM_APP}))
except Exception:
pass
time.sleep(0.15)
# Push the whole clip in one PlayStream — G1 handles buffering
with self.play_lock:
play_start = time.time()
client.PlayStream(self._G1_STREAM_APP, stream_id, arr.tobytes())
total_sec = len(arr) / self._G1_HW_RATE
# Block until audio has drained (plus a small safety margin)
elapsed = time.time() - play_start
remaining = total_sec - elapsed + 0.3
if remaining > 0:
time.sleep(remaining)
try:
client._Call(ROBOT_API_ID_AUDIO_STOP_PLAY,
json.dumps({"app_name": self._G1_STREAM_APP}))
except Exception:
pass
# -- recording --
def record_mic(self, duration_sec: float) -> bytes:

View File

@ -43,6 +43,7 @@ SANAD_ARM_TXT = SCRIPTS_DIR / _ARM_TXT_NAME
TRIGGER_LOG_SIZE = _LV_CFG.get("trigger_log_size", 100)
POLL_INTERVAL_SEC = _LV_CFG.get("poll_interval_sec", 0.1)
DEFERRED_DEFAULT = _LV_CFG.get("deferred_default", False)
TRIGGER_ENABLED_DEFAULT = bool(_LV_CFG.get("trigger_enabled_default", False))
class LiveVoiceLoop:
@ -60,6 +61,11 @@ class LiveVoiceLoop:
# Deferred-trigger toggle (fire on phrase match vs fire after AI responds)
self.deferred_mode = DEFERRED_DEFAULT
# Master arm-trigger gate — when False, transcripts are still
# captured (you can watch them on the dashboard) but NO arm
# actions fire. Defaults to OFF so the robot doesn't move
# unexpectedly until the operator opts in.
self.trigger_enabled = TRIGGER_ENABLED_DEFAULT
# Trigger history (dashboard log)
self.triggers: deque[dict[str, Any]] = deque(maxlen=TRIGGER_LOG_SIZE)
@ -116,13 +122,82 @@ class LiveVoiceLoop:
def set_deferred(self, enabled: bool) -> None:
self.deferred_mode = bool(enabled)
def set_trigger_enabled(self, enabled: bool) -> None:
"""Master arm-trigger gate. When False, phrase matches are ignored.
Toggle semantics (no queue memory across the gate):
- Always clears any in-flight pending trigger so a late
fallback fire can't happen after disable/enable.
- On enable: snapshots every transcript currently in the
live_sub deque as already-seen. Only NEW speech after this
moment will dispatch phrases said while the gate was off
don't suddenly fire when you turn it back on.
"""
self.trigger_enabled = bool(enabled)
# Drop pending fallback timer — a queued "fire in 0.6s" from
# before the toggle must not leak across.
self.state._pending_arm_wave = False
self.state._pending_arm_wave_fired = False
self.state._pending_arm_trigger_fn = None
self.state._pending_arm_fallback_time = 0.0
snapshotted = 0
if self.trigger_enabled:
try:
from Project.Sanad.main import live_sub
if live_sub is not None:
for txt in list(live_sub.user_transcript):
if txt and txt not in self._seen_transcripts:
self._seen_transcripts.add(txt)
snapshotted += 1
except Exception as exc:
log.warning("set_trigger_enabled: snapshot failed: %s", exc)
log.info("trigger_enabled=%s (pending cleared, %d transcripts marked seen)",
self.trigger_enabled, snapshotted)
# ── poll loop ────────────────────────────────────────────────
def _poll_loop(self):
"""Poll GeminiSubprocess.user_transcript for new user texts."""
"""Poll GeminiSubprocess.user_transcript for new user texts AND
fire any deferred-mode arm trigger whose fallback timer elapsed.
Without the pending-check, a deferred trigger (`fire_on_wake_match=
False`) would only fire when the NEXT transcript arrives so if
the user says one sentence and stops, the arm never moves.
"""
while not self._stop_event.is_set():
self._check_transcripts()
self._check_pending_trigger()
self._stop_event.wait(POLL_INTERVAL_SEC)
def _check_pending_trigger(self):
"""Fire a queued deferred trigger if its fallback time has passed."""
# Master gate — same check as _dispatch
if not self.trigger_enabled:
return
if not getattr(self.state, "_pending_arm_wave", False):
return
if getattr(self.state, "_pending_arm_wave_fired", False):
return
fn = getattr(self.state, "_pending_arm_trigger_fn", None)
if fn is None:
return
fallback_at = float(getattr(self.state, "_pending_arm_fallback_time", 0.0) or 0.0)
if fallback_at <= 0.0 or time.time() < fallback_at:
return
# Gate on arm idle — skip fire if a motion is already running
if self.sanad_arm is not None and getattr(self.sanad_arm, "_is_busy", False):
return
try:
fn()
except Exception as exc:
log.warning("deferred arm trigger failed: %s", exc)
finally:
self.state._pending_arm_wave_fired = True
self.state._pending_arm_wave = False
self.state._pending_arm_trigger_fn = None
def _check_transcripts(self):
try:
from Project.Sanad.main import live_sub
@ -145,7 +220,9 @@ class LiveVoiceLoop:
def _dispatch(self, transcript_text: str) -> None:
if not self.wake_dispatch or self.sanad_arm is None:
return
# Master gate — skip arm triggering entirely when disabled
if not self.trigger_enabled:
return
# Gate trigger on arm idle
if getattr(self.sanad_arm, "_is_busy", False):
return
@ -200,6 +277,7 @@ class LiveVoiceLoop:
return {
"running": self._running,
"deferred_mode": self.deferred_mode,
"trigger_enabled": self.trigger_enabled,
"last_heard": self.last_heard,
"pending_action": pending,
"last_action": self.last_action,

View File

@ -29,16 +29,45 @@ from __future__ import annotations
import array
import asyncio
import importlib
import json
import logging
import os
import sys
import threading
import time
import types
import wave
from datetime import datetime
from pathlib import Path
# ─────────────────────────────────────────────────────────────────────────────
# Layout bootstrap — MUST run before any `Project.Sanad.*` import.
# This file runs as a standalone subprocess (spawned by gemini/subprocess.py
# or local/subprocess.py); it can't rely on main.py having set up sys.path.
# Mirrors the dev-vs-deployed detection in main.py.
# dev layout: <root>/Project/Sanad/voice/sanad_voice.py
# deployed layout: /home/unitree/Sanad/voice/sanad_voice.py
# ─────────────────────────────────────────────────────────────────────────────
_SANAD_DIR = Path(__file__).resolve().parent.parent # .../Sanad
_SANAD_PARENT = _SANAD_DIR.parent # .../Project OR /home/unitree
if _SANAD_PARENT.name == "Project":
_ROOT = _SANAD_PARENT.parent
if str(_ROOT) not in sys.path:
sys.path.insert(0, str(_ROOT))
else:
if str(_SANAD_PARENT) not in sys.path:
sys.path.insert(0, str(_SANAD_PARENT))
if "Project" not in sys.modules:
_proj = types.ModuleType("Project")
_proj.__path__ = [] # namespace package marker
sys.modules["Project"] = _proj
if "Project.Sanad" not in sys.modules:
_sanad = importlib.import_module(_SANAD_DIR.name)
sys.modules["Project.Sanad"] = _sanad
sys.modules["Project"].Sanad = _sanad # type: ignore[attr-defined]
from unitree_sdk2py.core.channel import ChannelFactoryInitialize
from unitree_sdk2py.g1.audio.g1_audio_client import AudioClient

View File

@ -396,22 +396,92 @@ class TypedReplayEngine:
return self.audio_mgr.pya.get_sample_size(pyaudio.paInt16)
# ── generation ───────────────────────────────────────────────
async def generate_audio(self, text: str) -> tuple[bytes, list[str]]:
"""Send text to Gemini, return (pcm_audio, text_parts)."""
"""Route typed text through Gemini Live as the voice, first-try reliable.
The session's system-prompt sets a persona ("You are Sanad…"),
so the prompt that most reliably gets audio out is a direct
address to the persona with the quoted text. A transparent
retry chain covers the edge cases where the model still
replies with text only.
"""
stripped = text.strip()
if not stripped:
raise ValueError("text cannot be empty")
if self.voice_client is None:
raise RuntimeError("voice_client unavailable")
if not self.voice_client.connected:
await self.voice_client.connect()
return await self.voice_client.send_text(text, owner="typed_replay")
# Ordered by empirical reliability — first variant wins ~95% of turns.
# The quoted-phrase form is the most consistent trigger for an
# audio-only response with the current Sanad persona prompt.
attempts = [
f'قل هذا بالضبط وبدون إضافات: "{stripped}"', # Arabic: "Say this exactly, no additions"
f'Say this exactly, nothing else: "{stripped}"',
f'"{stripped}"',
]
last_parts: list[str] = []
for idx, wrapped in enumerate(attempts, start=1):
try:
audio_bytes, text_parts = await self.voice_client.send_text(
wrapped, owner="typed_replay")
except Exception as exc:
log.warning("Gemini TTS attempt %d failed: %s", idx, exc)
continue
if audio_bytes:
if idx > 1:
log.info("Gemini TTS succeeded on attempt %d", idx)
return audio_bytes, text_parts
last_parts = text_parts
log.warning("Gemini TTS attempt %d returned no audio — parts: %s",
idx, " | ".join(text_parts or [])[:120])
return b"", last_parts
# ── playback + capture ───────────────────────────────────────
def play_audio(self, audio_bytes: bytes, capture_speaker: bool) -> bytes:
"""Play PCM on speaker; optionally capture what was heard."""
"""Play Gemini PCM through the G1 chest speaker (via DDS) when
available; fall back to the host PulseAudio sink otherwise.
The DDS path is audible on the robot; the PulseAudio path goes
to the Jetson's built-in audio codec, which isn't wired to any
audible output on the G1. `capture_speaker` is only supported
on the PulseAudio path (DDS has no monitor sink).
"""
if not audio_bytes:
return b""
if self.audio_mgr is None or self.audio_mgr.pya is None:
if self.audio_mgr is None:
raise RuntimeError("audio_mgr unavailable — cannot play")
# Try the G1 chest speaker first
g1_client = None
try:
g1_client = self.audio_mgr._get_g1_audio_client()
except Exception:
g1_client = None
if g1_client is not None:
if capture_speaker:
log.info("capture_speaker requested, but G1 DDS path has no "
"PulseAudio monitor — capture skipped")
with self._play_lock:
try:
self.audio_mgr._play_pcm_via_g1(
audio_bytes, CHANNELS, RECEIVE_SAMPLE_RATE,
)
return b""
except Exception as exc:
log.warning("G1 speaker playback failed, falling back "
"to host PulseAudio sink: %s", exc)
# Fall through to the PyAudio path below
if self.audio_mgr.pya is None:
raise RuntimeError("audio_mgr has no PyAudio backend for fallback")
# Fallback: host PulseAudio sink (inaudible on G1, but keeps the
# capture-speaker feature working on desktop/dev setups).
with self._play_lock:
recorder = None
restore_source = False