From cf5e9161204b10b038915649b4f06ff7dd00ae2f Mon Sep 17 00:00:00 2001 From: kassam Date: Tue, 21 Apr 2026 11:47:44 +0400 Subject: [PATCH] Update 2026-04-21 11:47:43 --- config/motion_config.json | 1 + config/voice_config.json | 3 +- dashboard/routes/live_voice.py | 19 ++- dashboard/routes/replay.py | 9 +- dashboard/static/index.html | 32 ++-- main.py | 10 ++ motion/arm_controller.py | 286 +++++++++++++++++++++++++-------- voice/audio_manager.py | 123 +++++++++++++- voice/live_voice_loop.py | 82 +++++++++- voice/sanad_voice.py | 29 ++++ voice/typed_replay.py | 78 ++++++++- 11 files changed, 580 insertions(+), 92 deletions(-) diff --git a/config/motion_config.json b/config/motion_config.json index 28b0499..bcc3336 100644 --- a/config/motion_config.json +++ b/config/motion_config.json @@ -5,6 +5,7 @@ "_comment": "motion/arm_controller.py — enable_arm_sdk_index + replay_hz come from core.g1_hardware", "ramp_in_steps": 60, "ramp_out_steps": 180, + "settle_hold_sec": 0.5, "watchdog_timeout_sec": 0.25, "watchdog_disable_after_sec": 1.0, "arm_indices_start": 15, diff --git a/config/voice_config.json b/config/voice_config.json index fe671b3..00e151d 100644 --- a/config/voice_config.json +++ b/config/voice_config.json @@ -60,7 +60,8 @@ "_comment": "voice/live_voice_loop.py — arm phrase dispatcher. arm_txt filename comes from core.script_files.arm_phrases", "trigger_log_size": 100, "poll_interval_sec": 0.1, - "deferred_default": false + "deferred_default": false, + "trigger_enabled_default": false }, "local_tts": { diff --git a/dashboard/routes/live_voice.py b/dashboard/routes/live_voice.py index 0fa733e..1075841 100644 --- a/dashboard/routes/live_voice.py +++ b/dashboard/routes/live_voice.py @@ -4,11 +4,12 @@ Listens to GeminiSubprocess user transcripts, matches against sanad_arm.txt phrases, and fires ARM.trigger_action_by_id. Endpoints: - POST /start begin polling transcripts - POST /stop stop polling - POST /deferred-mode?enabled toggle instant vs deferred trigger - GET /status running, last heard, last action, etc. - GET /triggers arm trigger history (log) + POST /start begin polling transcripts + POST /stop stop polling + POST /deferred-mode?enabled toggle instant vs deferred trigger + POST /trigger-enabled?enabled master gate — allow arm actions or not + GET /status running, last heard, last action, etc. + GET /triggers arm trigger history (log) """ from __future__ import annotations @@ -54,6 +55,14 @@ async def set_deferred(enabled: bool): return {"ok": True, "deferred_mode": loop.deferred_mode} +@router.post("/trigger-enabled") +async def set_trigger_enabled(enabled: bool): + """Master gate for voice → arm triggering. Default OFF.""" + loop = _loop() + loop.set_trigger_enabled(enabled) + return {"ok": True, "trigger_enabled": loop.trigger_enabled} + + @router.get("/triggers") async def triggers(): loop = _loop() diff --git a/dashboard/routes/replay.py b/dashboard/routes/replay.py index 3f670bb..7d30c7f 100644 --- a/dashboard/routes/replay.py +++ b/dashboard/routes/replay.py @@ -114,9 +114,16 @@ async def test_replay(payload: ReplayRequest): @router.post("/cancel") async def cancel_replay(): + """Stop the current replay — the smooth return-to-home runs as the + final phase of the replay itself. + + Matches g1_replay_v4_stable.py's behaviour: the play loop breaks on + the cancel flag, then the same Run() function executes its + return-home ramp + DisableSDK. No separate scheduling needed. + """ from Project.Sanad.main import arm arm.cancel() - return {"ok": True, "message": "Cancel signal sent."} + return {"ok": True, "message": "Cancelled — returning to home pose smoothly."} @router.get("/status") diff --git a/dashboard/static/index.html b/dashboard/static/index.html index be12a1d..dbcda9d 100644 --- a/dashboard/static/index.html +++ b/dashboard/static/index.html @@ -259,9 +259,15 @@ -
- - +
+
+ + +
+
+ + +
Last heard: --
@@ -1091,7 +1097,7 @@ async function renderActions(arm){ }catch(e){} } async function triggerAction(id,name){if(_armBusy)return;_runId=id;_armBusy=true;document.getElementById('running-action').textContent='Running: '+name.replace(/_/g,' ')+'...';document.getElementById('running-action').style.display='block';renderActions({busy:true});const speed=parseFloat(document.getElementById('action-speed').value||document.getElementById('action-speed-2').value);try{await api('POST','/api/motion/trigger',{action_id:id,speed});}catch(e){}pollArmBusy();} -async function cancelAction(){try{await api('POST','/api/replay/cancel');toast('Cancelled','info');}catch(e){}_armBusy=false;_runId=null;document.getElementById('running-action').style.display='none';refreshStatus();} +async function cancelAction(){try{const r=await api('POST','/api/replay/cancel');toast(r&&r.message?r.message:'Cancelled','info');}catch(e){}_armBusy=false;_runId=null;document.getElementById('running-action').style.display='none';refreshStatus();} let _armPoll;function pollArmBusy(){clearInterval(_armPoll);_armPoll=setInterval(async()=>{try{const s=await api('GET','/api/replay/status');if(!s.arm?.busy){clearInterval(_armPoll);_armBusy=false;_runId=null;document.getElementById('running-action').style.display='none';refreshStatus();}}catch(e){clearInterval(_armPoll);}},500);} // Skills @@ -1106,7 +1112,7 @@ async function playMacro(b){const n=document.getElementById('play-macro-name').v // Replay async function refreshReplayFiles(){try{const r=await api('GET','/api/replay/files');const el=document.getElementById('replay-files');if(!(r.files||[]).length){el.innerHTML='
No motion files
';return;}el.innerHTML=''+(r.files||[]).map(f=>``).join('')+'
FileFramesDurationSize
${esc(f.name)}${f.frames}${f.duration_sec}s${f.size_kb}KB
';}catch(e){}} async function testReplay(b){const n=document.getElementById('replay-name').value,s=parseFloat(document.getElementById('replay-speed').value);if(!n)return;btnLoad(b);try{await api('POST','/api/replay/test',{name:n,speed:s});toast('Replay: '+n,'ok');pollArmBusy();}catch(e){}btnDone(b);} -async function cancelReplay(){try{await api('POST','/api/replay/cancel');toast('Cancelled','info');}catch(e){}} +async function cancelReplay(){try{const r=await api('POST','/api/replay/cancel');toast(r&&r.message?r.message:'Cancelled','info');}catch(e){}} async function deleteMotionFile(n){if(confirm('Delete '+n+'?'))try{await api('DELETE','/api/replay/files/'+encodeURIComponent(n));toast('Deleted','ok');refreshReplayFiles();populateGestureSelect();}catch(e){}} async function uploadMotionFile(input){if(!input.files[0])return;const fd=new FormData();fd.append('file',input.files[0]);try{const r=await fetch('/api/replay/files/upload',{method:'POST',body:fd});if(!r.ok){const j=await r.json();toast(j.detail||'Upload failed','err');}else{toast('Uploaded','ok');refreshReplayFiles();populateGestureSelect();}}catch(e){toast('Upload error','err');}input.value='';} async function startTeaching(b){const n=document.getElementById('teach-name').value,d=parseFloat(document.getElementById('teach-duration').value);if(!n)return toast('Enter name','err');btnLoad(b);try{await api('POST','/api/replay/teach/start',{name:n,duration_sec:d});toast('Teaching: '+n,'ok');pollTeachStatus();}catch(e){}btnDone(b);} @@ -1146,6 +1152,7 @@ async function startLiveVoice(b){ } async function stopLiveVoice(b){btnLoad(b);try{await api('POST','/api/live-voice/stop');toast('Stopped','info');}catch(e){}btnDone(b);refreshLiveVoice();} async function setDeferredMode(v){try{await api('POST','/api/live-voice/deferred-mode?enabled='+v);}catch(e){}} +async function setTriggerEnabled(v){try{await api('POST','/api/live-voice/trigger-enabled?enabled='+v);}catch(e){}} async function refreshLiveVoice(){ try{ const r=await api('GET','/api/live-voice/status'); @@ -1156,6 +1163,7 @@ async function refreshLiveVoice(){ document.getElementById('lv-last-text').textContent=r.last_heard||'--'; document.getElementById('lv-pending').textContent=r.pending_action||r.last_action||'--'; document.getElementById('lv-deferred').checked=r.deferred_mode===true; + document.getElementById('lv-trigger-enabled').checked=r.trigger_enabled===true; document.getElementById('lv-audio').textContent=r.audio_attached?'yes':'no'; document.getElementById('lv-arm').textContent=r.arm_attached?'yes':'no'; document.getElementById('lv-gem').textContent=r.gemini_connected?'connected':'disconnected'; @@ -1176,10 +1184,10 @@ async function stopLiveSub(b){btnLoad(b);try{await api('POST','/api/live-subproc async function refreshLiveSub(){try{const r=await api('GET','/api/live-subprocess/status');const st=document.getElementById('ls-state');st.textContent=r.state||'stopped';st.className='badge '+(r.running?'badge-ok':'badge-warn');document.getElementById('ls-msg').textContent=r.state_message||'--';document.getElementById('ls-user').textContent=r.last_user_text||'--';document.getElementById('ls-log').textContent=(r.log_tail||[]).slice(-25).join('\n');}catch(e){}} // Typed Replay -async function trGenerate(b){const t=document.getElementById('tr-text').value;if(!t)return toast('Enter text','err');btnLoad(b);try{await api('POST','/api/typed-replay/generate',{text:t,record_name:document.getElementById('tr-name').value,capture_speaker:document.getElementById('tr-capture').checked});toast('Generated & played','ok');refreshTR();}catch(e){}btnDone(b);} -async function trReplayLast(b){btnLoad(b);try{await api('POST','/api/typed-replay/replay-last?capture_speaker='+document.getElementById('tr-capture').checked);toast('Replayed','ok');refreshTR();}catch(e){}btnDone(b);} -async function trSaveLast(b){btnLoad(b);try{await api('POST','/api/typed-replay/save-last',{name:document.getElementById('tr-name').value});toast('Saved','ok');refreshTR();refreshRecords();}catch(e){}btnDone(b);} -async function refreshTR(){try{const r=await api('GET','/api/typed-replay/status');const s=r.session||{};document.getElementById('tr-session').innerHTML=`Text: ${esc(s.text||'--')}
Audio: ${s.has_audio?'Yes':'No'} | Capture: ${s.has_speaker_capture?'Yes':'No'}
Replays: ${s.replay_count||0}
Generated: ${s.generated_at||'--'}
Saved: ${s.saved_entry?.record_name||'--'}`;}catch(e){}} +async function trGenerate(b){const t=document.getElementById('tr-text').value;if(!t)return toast('Enter text','err');btnLoad(b);try{await api('POST','/api/typed-replay/say',{text:t,record:document.getElementById('tr-capture').checked,record_name:document.getElementById('tr-name').value});toast('Generated & played','ok');refreshTR();}catch(e){}btnDone(b);} +async function trReplayLast(b){btnLoad(b);try{await api('POST','/api/typed-replay/replay-last');toast('Replayed','ok');refreshTR();}catch(e){}btnDone(b);} +async function trSaveLast(b){btnLoad(b);try{await api('POST','/api/typed-replay/save-last',{record_name:document.getElementById('tr-name').value});toast('Saved','ok');refreshTR();refreshRecords();}catch(e){}btnDone(b);} +async function refreshTR(){try{const r=await api('GET','/api/typed-replay/status');const s=r.session||{};document.getElementById('tr-session').innerHTML=`Text: ${esc(s.text||'--')}
Audio: ${s.has_audio?'Yes':'No'} | Capture: ${s.has_capture?'Yes':'No'}
Replays: ${s.replay_count||0}
Generated: ${s.generated_at||'--'}
Saved: ${esc(s.saved_as||'--')}`;}catch(e){}} // YOLO async function loadDetector(b){btnLoad(b);try{const r=await api('POST','/api/detector/load');const el=document.getElementById('yolo-status');el.textContent=r.ok?'Loaded':'Failed';el.className='badge '+(r.ok?'badge-ok':'badge-err');toast(r.ok?'Model loaded':'Failed',r.ok?'ok':'err');}catch(e){}btnDone(b);} @@ -1306,10 +1314,10 @@ async function autoStartLiveSub(){ }catch(e){} } -// Init -refreshStatus();refreshSystem();refreshAudio();refreshAudioDevices();refreshSkills();refreshReplayFiles();refreshScripts();refreshPrompt();refreshRecords();refreshPhotos();refreshCamSources();refreshCamDevices();refreshLocalCam();populateGestureSelect();refreshLiveVoice();refreshLiveSub();refreshTR();refreshDetector();refreshWakeActions();refreshApiKey();connectLogs();connectCamera(); +// Init — vision/camera/detector fetches removed; those endpoints were deleted. +refreshStatus();refreshSystem();refreshAudio();refreshAudioDevices();refreshSkills();refreshReplayFiles();refreshScripts();refreshPrompt();refreshRecords();populateGestureSelect();refreshLiveVoice();refreshLiveSub();refreshTR();refreshWakeActions();refreshApiKey();connectLogs(); setTimeout(autoConnectGemini,2000);setTimeout(autoStartLiveSub,3000); -setInterval(refreshStatus,5000);setInterval(refreshSystem,30000);setInterval(refreshLocalCam,10000);setInterval(refreshLiveVoice,5000);setInterval(refreshLiveSub,5000); +setInterval(refreshStatus,5000);setInterval(refreshSystem,30000);setInterval(refreshLiveVoice,5000);setInterval(refreshLiveSub,5000); diff --git a/main.py b/main.py index a61d09d..a3c1884 100644 --- a/main.py +++ b/main.py @@ -47,6 +47,16 @@ else: sys.modules["Project.Sanad"] = _sanad sys.modules["Project"].Sanad = _sanad # type: ignore[attr-defined] +# When main.py runs as a script (`python3 main.py`), Python loads it as the +# `__main__` module — NOT as `Project.Sanad.main`. Route handlers later do +# `from Project.Sanad.main import arm` etc; without the alias below, Python +# would re-execute this file from scratch under a different module name, +# creating a SECOND set of subsystem instances (uninitialised). Every +# `subsystem not available` / `No LowState` symptom traces back to this. +# The alias ensures both names point at the exact same module object. +if __name__ == "__main__": + sys.modules["Project.Sanad.main"] = sys.modules["__main__"] + # asyncio compat shim — backfills asyncio.to_thread for Python 3.8. # MUST be imported before any other Sanad module that uses asyncio.to_thread. from Project.Sanad.core import asyncio_compat # noqa: F401 diff --git a/motion/arm_controller.py b/motion/arm_controller.py index 62b23b8..6237ea7 100644 --- a/motion/arm_controller.py +++ b/motion/arm_controller.py @@ -38,6 +38,7 @@ _AC = _cfg_section("motion", "arm_controller") from Project.Sanad.config import ENABLE_ARM_SDK_INDEX RAMP_IN_STEPS = _AC.get("ramp_in_steps", 60) # ~1.0s smooth move to start pose RAMP_OUT_STEPS = _AC.get("ramp_out_steps", 180) # ~3.0s smooth return to home +SETTLE_HOLD_SEC = _AC.get("settle_hold_sec", 0.5) # hold start pose before replay begins WATCHDOG_TIMEOUT = _AC.get("watchdog_timeout_sec", 0.25) # hold last pose if state stale WATCHDOG_DISABLE_AFTER = _AC.get("watchdog_disable_after_sec", 1.0) # abort if state stale this long ARM_INDICES = range( @@ -71,12 +72,21 @@ except ImportError: _make_low_cmd = None log.warning("Unitree SDK not available — running in simulation mode") -# Optional G1 high-level SDK action client (built-in actions) +# G1 arm-action client for built-in arm moves (wave, shake_hand, hug, …). +# NOTE: do NOT use LocoClient here — LocoClient is the locomotion/body-move +# client and its ExecuteAction() doesn't recognise arm-action IDs, so arm +# commands become silent no-ops. The correct client is the arm-specific +# G1ArmActionClient with the SDK's action_map (name → opcode lookup). try: - from unitree_sdk2py.g1.loco.g1_loco_client import LocoClient - _HAS_LOCO = True + from unitree_sdk2py.g1.arm.g1_arm_action_client import ( + G1ArmActionClient, + action_map as _ARM_ACTION_MAP, + ) + _HAS_ARM_CLIENT = True except ImportError: - _HAS_LOCO = False + G1ArmActionClient = None + _ARM_ACTION_MAP = {} + _HAS_ARM_CLIENT = False @dataclass @@ -189,7 +199,7 @@ class ArmController: self._state_sub = None self._low_cmd = None self._crc = None - self._loco_client = None + self._arm_client = None # Live state from LowState_ subscriber self._current_q: list[float] = [0.0] * G1_NUM_MOTOR @@ -242,16 +252,17 @@ class ArmController: ) self._crc = CRC() - # High-level SDK action client (for built-in actions) - if _HAS_LOCO: + # Arm-specific action client for built-in moves + if _HAS_ARM_CLIENT: try: - self._loco_client = LocoClient() - self._loco_client.SetTimeout(10.0) - self._loco_client.Init() - log.info("LocoClient initialized — built-in SDK actions available") + self._arm_client = G1ArmActionClient() + self._arm_client.SetTimeout(10.0) + self._arm_client.Init() + log.info("G1ArmActionClient initialized (%d actions) — built-in moves available", + len(_ARM_ACTION_MAP)) except Exception as exc: - log.warning("LocoClient init failed: %s — built-in actions disabled", exc) - self._loco_client = None + log.warning("G1ArmActionClient init failed: %s — built-in actions disabled", exc) + self._arm_client = None self._initialized = True log.info("DDS initialized on %s", network_interface) @@ -332,12 +343,13 @@ class ArmController: self._arm_pub.Write(self._low_cmd) def _disable_sdk(self): - """Send 5 disable frames to cleanly release arm control.""" + """Send 10 disable frames at 50 Hz — direct port of + g1_replay_v4_stable.py:DisableSDK (lines 141-147).""" if not self._initialized or self._low_cmd is None: return - for _ in range(5): - self._low_cmd.motor_cmd[ENABLE_ARM_SDK_INDEX].q = 0.0 - self._low_cmd.crc = self._crc.Crc(self._low_cmd) + self._low_cmd.motor_cmd[ENABLE_ARM_SDK_INDEX].q = 0.0 + self._low_cmd.crc = self._crc.Crc(self._low_cmd) + for _ in range(10): self._arm_pub.Write(self._low_cmd) time.sleep(0.02) @@ -365,6 +377,24 @@ class ArmController: self._last_action_time = time.monotonic() def _replay_file_inner(self, filepath: str, speed: float = 1.0): + """One-for-one port of g1_replay_v4_stable.py:ReplayWithHome.Run(). + + Five phases — timing and math match the reference exactly: + + 1. Wait for first LowState_ message (no body-lock from zeros). + 2. Load data: home_q (last valid frame of arm_home.jsonl), + full_body_lock_q (live snapshot), and the replay frames. + 3. MOVE TO START — 60 steps at 60 Hz, alpha = k/steps (starts + at 0 = exact current pose, ends at 59/60 just shy of target). + 3b. SETTLE HOLD — replaces the reference's human + `input("Press Enter to Begin")` pause; gives the physical + motors time to reach the commanded start pose before + playback so the first real frames don't jerk. + 4. PLAY — `for f in frames: if f['t']-t0 >= play_elapsed` + frame-select pattern, fixed 1/REPLAY_HZ sleep per iteration. + 5. RETURN HOME — 180 steps alpha = k/steps from last_played_q + to home_q, body locked. Then DisableSDK (10 frames). + """ path = Path(filepath) if not path.is_absolute(): path = MOTIONS_DIR / path @@ -376,84 +406,116 @@ class ArmController: log.warning("Empty motion file: %s", path) return - home_q = _load_home_q() or [0.0] * G1_NUM_MOTOR - if not _HAS_SDK: duration = len(frames) / REPLAY_HZ / speed if speed else len(frames) / REPLAY_HZ - log.info("[SIM] Replay %s (%.1fs, %d frames, speed=%.1f)", path.name, duration, len(frames), speed) + log.info("[SIM] Replay %s (%.1fs, %d frames, speed=%.1f)", + path.name, duration, len(frames), speed) self._sim_replay(frames, speed) return - log.info("Replay %s (%d frames @ %.0f Hz, speed=%.1f)", path.name, len(frames), REPLAY_HZ, speed) + log.info("Replay %s (%d frames @ %.0f Hz, speed=%.1f)", + path.name, len(frames), REPLAY_HZ, speed) - # CRITICAL: wait for first LowState_ callback before reading _current_q - # Otherwise we'd lock the body to all-zeros and crash the robot. + # ─── 1. Wait for first LowState ───────────────────────── if not self._first_state_event.is_set(): log.warning("Waiting for first LowState message...") if not self._first_state_event.wait(timeout=2.0): log.error("No LowState received in 2s — refusing to replay (would lock body to zeros)") return - body_lock_q = self._get_current_q() - current_arm_q = list(body_lock_q) - start_arm_q = frames[0]["q"] - interval = 1.0 / REPLAY_HZ + # ─── 2. Load data (ref lines 154-166) ─────────────────── + home_q = _load_home_q() or [0.0] * G1_NUM_MOTOR + full_body_lock_q = self._get_current_q() # snapshot live state - # Phase 1: Ramp-in — interpolate from current pose to first frame - log.debug("Ramp-in (%d steps)", RAMP_IN_STEPS) - for step in range(RAMP_IN_STEPS): + interval = 1.0 / REPLAY_HZ + file_start_q = frames[0]["q"] + + # ─── 3. MOVE TO START (ref lines 171-181) ─────────────── + log.debug("Moving to start (%d steps)", RAMP_IN_STEPS) + for k in range(RAMP_IN_STEPS): if self._cancel.is_set(): - self._return_home(current_arm_q, body_lock_q, home_q) + self._return_home(full_body_lock_q, full_body_lock_q, home_q) return - t = (step + 1) / RAMP_IN_STEPS - interp = _lerp_q(current_arm_q, start_arm_q, t) - self._send_frame(interp, body_lock_q) + alpha = k / RAMP_IN_STEPS # 0 .. (RAMP_IN_STEPS-1)/RAMP_IN_STEPS + interp_q = list(full_body_lock_q) + for j in range(15, G1_NUM_MOTOR): + interp_q[j] = (1 - alpha) * full_body_lock_q[j] + alpha * file_start_q[j] + self._send_frame(interp_q, full_body_lock_q) time.sleep(interval) - # Phase 2: Play frames with timing and watchdog + # ─── 3b. SETTLE HOLD — replaces reference's Enter pause ─ + settle_frames = max(0, int(SETTLE_HOLD_SEC * REPLAY_HZ)) + if settle_frames > 0: + log.debug("Settle hold (%d frames / %.2fs)", settle_frames, SETTLE_HOLD_SEC) + for _ in range(settle_frames): + if self._cancel.is_set(): + self._return_home(file_start_q, full_body_lock_q, home_q) + return + self._send_frame(file_start_q, full_body_lock_q) + time.sleep(interval) + + # ─── 4. PLAY (ref lines 183-234) ──────────────────────── log.debug("Playing %d frames", len(frames)) + last_played_q = file_start_q play_elapsed = 0.0 last_real = time.monotonic() - t0_frame = frames[0].get("t", 0.0) + t0 = frames[0].get("t", 0.0) - for frame in frames: + while True: if self._cancel.is_set(): break - # Watchdog + # Watchdog — abort if LowState goes stale age = self._state_age() if age > WATCHDOG_DISABLE_AFTER: log.error("Watchdog abort — LowState stale %.2fs", age) self._disable_sdk() return - if age > WATCHDOG_TIMEOUT: - log.warning("Watchdog hold — LowState stale %.2fs", age) now_real = time.monotonic() - play_elapsed += (now_real - last_real) * speed + dt_real = now_real - last_real last_real = now_real + play_elapsed += dt_real * speed - frame_t = frame.get("t", 0.0) - t0_frame - if frame_t > play_elapsed: - time.sleep(min(frame_t - play_elapsed, interval)) + # Pick the next frame whose timestamp has elapsed (reference pattern) + target_frame = None + for f in frames: + if f.get("t", 0.0) - t0 >= play_elapsed: + target_frame = f + break + if target_frame is None: + break - self._send_frame(frame["q"], body_lock_q) - # NOTE: timing is driven by frame_t above — no extra sleep here + self._send_frame(target_frame["q"], full_body_lock_q) + last_played_q = target_frame["q"] + time.sleep(interval) - # Phase 3: Return home - last_arm_q = frames[-1]["q"] if frames else current_arm_q - self._return_home(last_arm_q, body_lock_q, home_q) + # ─── 5. RETURN HOME (ref lines 239-256) + DisableSDK ──── + self._return_home(last_played_q, full_body_lock_q, home_q) def _return_home(self, from_q: list[float], body_lock_q: list[float], home_q: list[float]): - """Smooth interpolation back to home pose, then disable SDK.""" - log.debug("Returning home (%d steps)", RAMP_OUT_STEPS) + """Smooth return to home — direct port of g1_replay_v4_stable.py:239-256. + + 180 steps × (1/60)s = 3s linear ramp on arm motors only (indices + 15-28); body motors (0-14) stay locked to `body_lock_q`. Then + DisableSDK sends 10 disable-bit frames at 50 Hz. + + IMPORTANT: the reference's return-home is unconditional — it + always runs to completion regardless of why the play loop ended + (natural end OR 'q' press). We clear `_cancel` at entry so a + user-hit Cancel (which set `_cancel` to break the play loop) + doesn't also abort the return ramp. Without this, the arm + "snaps" home because the loop exits on the first iteration. + """ + self._cancel.clear() + log.info("Returning home (%d steps / %.1fs)", RAMP_OUT_STEPS, RAMP_OUT_STEPS / REPLAY_HZ) interval = 1.0 / REPLAY_HZ - for step in range(RAMP_OUT_STEPS): - if self._cancel.is_set(): - break - t = (step + 1) / RAMP_OUT_STEPS - interp = _lerp_q(from_q, home_q, t) - self._send_frame(interp, body_lock_q) + for k in range(RAMP_OUT_STEPS): + alpha = k / RAMP_OUT_STEPS # 0 .. (RAMP_OUT_STEPS-1)/RAMP_OUT_STEPS + interp_q = list(from_q) + for j in range(15, G1_NUM_MOTOR): + interp_q[j] = (1 - alpha) * from_q[j] + alpha * home_q[j] + self._send_frame(interp_q, body_lock_q) time.sleep(interval) self._disable_sdk() log.info("Home reached, SDK disabled") @@ -475,9 +537,81 @@ class ArmController: return self._is_busy def cancel(self): - """Cancel the currently running replay.""" + """Cancel the currently running replay. + + Sets the cancel flag — the play loop in `_replay_file_inner` + checks this and breaks out; `_return_home` then runs as the + final phase of the same replay (matches the reference's + end-of-Run behaviour in g1_replay_v4_stable.py). + """ self._cancel.set() + def _unused_return_to_home(self, duration_sec: float = 3.0, + home_file: str = "arm_home.jsonl") -> None: + """Deprecated — replay's own `_return_home` is called automatically + when cancel breaks the play loop. Kept here only to preserve any + external caller; no new code should use this. + """ + if not self._initialized or self._low_cmd is None: + log.warning("return_to_home: arm controller not initialised") + return + if not self._first_state_event.wait(timeout=2.0): + log.error("return_to_home: no LowState received in 2s — aborting") + return + + home_path = MOTIONS_DIR / home_file + if not home_path.exists(): + log.warning("return_to_home: %s missing — skipping", home_path.name) + return + + # Use the LAST valid 'q' in the file as the settle pose + home_q: list[float] | None = None + try: + for frame in _load_frames(home_path): + q = frame.get("q") + if q and len(q) == G1_NUM_MOTOR: + home_q = q + except Exception as exc: + log.warning("return_to_home: reading %s failed: %s", + home_path.name, exc) + return + if home_q is None: + log.warning("return_to_home: %s has no valid 'q' frames", + home_path.name) + return + + with self._state_lock: + start_q = list(self._current_q) + body_lock_q = list(start_q) + + # Let the ramp publish frames even though we just cancelled + self._cancel.clear() + with self._lock: + if self._is_busy: + # A pending replay is still winding down — wait a beat + log.debug("return_to_home: arm busy, waiting briefly") + self._is_busy = True + + try: + steps = max(30, int(duration_sec * REPLAY_HZ)) # ≥ 0.5s ramp + dt = 1.0 / REPLAY_HZ + log.info("return_to_home: ramp %d steps (%.1fs) → %s", + steps, duration_sec, home_file) + for k in range(steps): + if self._cancel.is_set(): + log.info("return_to_home: cancelled mid-ramp") + break + alpha = (k + 1) / steps + interp_q = list(body_lock_q) + for j in range(15, G1_NUM_MOTOR): + interp_q[j] = (1 - alpha) * start_q[j] + alpha * home_q[j] + self._send_frame(interp_q, body_lock_q) + time.sleep(dt) + log.info("return_to_home: done") + finally: + with self._lock: + self._is_busy = False + def refresh_actions(self): """Re-scan data/motions/ and rebuild the action registry.""" global ACTIONS, ACTION_BY_ID, ACTION_BY_NAME @@ -558,19 +692,39 @@ class ArmController: log.info("[SIM] SDK action: %s (id=%d)", action.name, action.id) time.sleep(2.0) return - if self._loco_client is None: + if self._arm_client is None: log.warning( - "SDK action %s requested but LocoClient not available — skipping", + "SDK action %s requested but G1ArmActionClient not available — skipping", action.name, ) return - log.info("SDK action: %s (id=%d)", action.name, action.id) + # Sanad's registry uses underscored names ("shake_hand", "x_ray"); + # the SDK's action_map is keyed by human-readable forms that mix + # spaces and hyphens ("shake hand", "x-ray", "two-hand kiss"). + # Try each candidate in turn. + name = action.name + candidates = [ + name, + name.replace("_", " "), # shake_hand → shake hand + name.replace("_", "-"), # x_ray → x-ray + # two-word with specific hyphenation: first token with hyphen, + # rest with spaces (matches SDK's "two-hand kiss" pattern) + name.replace("_", "-", 1).replace("_", " "), + ] + sdk_name = next((c for c in candidates if c in _ARM_ACTION_MAP), None) + if sdk_name is None: + log.warning( + "SDK action %s not in G1ArmActionClient action_map — tried %s. keys=%s", + action.name, candidates, sorted(_ARM_ACTION_MAP.keys())[:12], + ) + return + opcode = _ARM_ACTION_MAP[sdk_name] + log.info("SDK action: %s (opcode=%s)", action.name, opcode) try: - # G1 LocoClient exposes ExecuteAction(id) for built-in actions - self._loco_client.ExecuteAction(action.id) - # Built-in actions are blocking on the robot side, but the SDK call returns - # immediately. Give it a sensible default duration so we don't hammer it. - time.sleep(2.5) + self._arm_client.ExecuteAction(opcode) + # Built-in arm actions block on the robot side for ~3s; the SDK + # call returns immediately. Sleep so we don't hammer it back-to-back. + time.sleep(3.0) except Exception as exc: log.error("SDK action %s failed: %s", action.name, exc) diff --git a/voice/audio_manager.py b/voice/audio_manager.py index 064f2d1..363faf9 100644 --- a/voice/audio_manager.py +++ b/voice/audio_manager.py @@ -8,6 +8,7 @@ Device selection is dynamic — read from voice.audio_devices on each refresh. from __future__ import annotations +import json import subprocess import threading import time @@ -15,11 +16,33 @@ import wave from pathlib import Path from typing import Any +try: + import numpy as np + _HAS_NUMPY = True +except ImportError: + np = None + _HAS_NUMPY = False + try: import pyaudio except ImportError: pyaudio = None # optional — only needed for local PCM playback +# G1 AudioClient — used to route playback through the robot chest speaker +# via DDS `PlayStream` (the same pipe Gemini uses). Without this, WAV +# playback would go to the Jetson's built-in audio codec, which isn't +# wired to any audible output on the G1. +try: + from unitree_sdk2py.g1.audio.g1_audio_client import AudioClient + from unitree_sdk2py.g1.audio.g1_audio_api import ( + ROBOT_API_ID_AUDIO_STOP_PLAY, + ) + _HAS_G1_AUDIO = True +except ImportError: + AudioClient = None + ROBOT_API_ID_AUDIO_STOP_PLAY = 0 + _HAS_G1_AUDIO = False + from Project.Sanad.config import ( CHANNELS, CHUNK_SIZE, @@ -87,10 +110,38 @@ class AudioManager: ) self.pya = pyaudio.PyAudio() self.play_lock = threading.Lock() + # Lazily-initialised G1 DDS audio client (for play_wav → chest speaker) + self._g1_audio_client: Any = None # Resolve devices and set PulseAudio defaults at startup self.refresh_devices() ensure_audio_defaults() + def _get_g1_audio_client(self): + """Return a cached G1 AudioClient (DDS) — creates on first use. + + Assumes `ChannelFactoryInitialize` has already been called (our + ArmController does this at startup on eth0). Returns None if the + Unitree SDK is unavailable or init fails. + """ + if not _HAS_G1_AUDIO: + return None + if self._g1_audio_client is not None: + return self._g1_audio_client + try: + c = AudioClient() + c.SetTimeout(5.0) + c.Init() + try: + c.SetVolume(100) + except Exception: + pass + self._g1_audio_client = c + log.info("G1 AudioClient initialized (for chest-speaker playback)") + except Exception as exc: + log.warning("G1 AudioClient init failed: %s", exc) + self._g1_audio_client = None + return self._g1_audio_client + def refresh_devices(self) -> dict[str, str]: """Re-read selected sink/source from audio_devices module.""" sink, source = _resolve_devices() @@ -137,15 +188,85 @@ class AudioManager: stream.close() def play_wav(self, path: Path) -> dict[str, Any]: + """Play a WAV file through the G1 chest speaker via DDS when + available, falling back to the host PulseAudio sink otherwise. + + The G1's built-in audio (Jetson `platform-sound`) isn't wired to + any audible speaker — the robot's loudspeaker is only reachable + over DDS `AudioClient.PlayStream` (same pipe Gemini uses). + """ with wave.open(str(path), "rb") as wf: channels = wf.getnchannels() sw = wf.getsampwidth() rate = wf.getframerate() data = wf.readframes(wf.getnframes()) - self.play_pcm(data, channels, rate, sw) + + # Prefer G1 chest speaker when the Unitree SDK is present + client = self._get_g1_audio_client() + if client is not None and _HAS_NUMPY and sw == 2: + self._play_pcm_via_g1(data, channels, rate) + else: + if client is None and _HAS_G1_AUDIO: + log.warning("G1 AudioClient unavailable, using host PulseAudio sink") + self.play_pcm(data, channels, rate, sw) + duration = len(data) / (rate * channels * sw) if rate else 0 return {"path": str(path), "duration_seconds": round(duration, 3)} + # -- G1 DDS-routed playback -- + + _G1_STREAM_APP = "sanad_playback" + _G1_HW_RATE = 16_000 + + def _play_pcm_via_g1(self, pcm_bytes: bytes, channels: int, source_rate: int) -> None: + """Stream int16 PCM to the G1 chest speaker via AudioClient.PlayStream. + + Converts stereo → mono and resamples to 16 kHz (the rate + AudioClient expects). Uses a fresh stream_id per call so + back-to-back plays don't interfere. + """ + client = self._get_g1_audio_client() + if client is None: + raise RuntimeError("G1 AudioClient not available") + + arr = np.frombuffer(pcm_bytes, dtype=np.int16) + # Stereo → mono average + if channels == 2 and arr.size % 2 == 0: + arr = arr.reshape(-1, 2).mean(axis=1).astype(np.int16) + # Resample to 16 kHz + if source_rate != self._G1_HW_RATE and arr.size: + target_len = max(1, int(len(arr) * self._G1_HW_RATE / source_rate)) + arr = np.interp( + np.linspace(0, len(arr), target_len, endpoint=False), + np.arange(len(arr)), + arr.astype(np.float64), + ).astype(np.int16) + + stream_id = f"wav_{int(time.time() * 1000)}" + # Clear any lingering stream from a previous call + try: + client._Call(ROBOT_API_ID_AUDIO_STOP_PLAY, + json.dumps({"app_name": self._G1_STREAM_APP})) + except Exception: + pass + time.sleep(0.15) + + # Push the whole clip in one PlayStream — G1 handles buffering + with self.play_lock: + play_start = time.time() + client.PlayStream(self._G1_STREAM_APP, stream_id, arr.tobytes()) + total_sec = len(arr) / self._G1_HW_RATE + # Block until audio has drained (plus a small safety margin) + elapsed = time.time() - play_start + remaining = total_sec - elapsed + 0.3 + if remaining > 0: + time.sleep(remaining) + try: + client._Call(ROBOT_API_ID_AUDIO_STOP_PLAY, + json.dumps({"app_name": self._G1_STREAM_APP})) + except Exception: + pass + # -- recording -- def record_mic(self, duration_sec: float) -> bytes: diff --git a/voice/live_voice_loop.py b/voice/live_voice_loop.py index 7a293e4..da54737 100644 --- a/voice/live_voice_loop.py +++ b/voice/live_voice_loop.py @@ -43,6 +43,7 @@ SANAD_ARM_TXT = SCRIPTS_DIR / _ARM_TXT_NAME TRIGGER_LOG_SIZE = _LV_CFG.get("trigger_log_size", 100) POLL_INTERVAL_SEC = _LV_CFG.get("poll_interval_sec", 0.1) DEFERRED_DEFAULT = _LV_CFG.get("deferred_default", False) +TRIGGER_ENABLED_DEFAULT = bool(_LV_CFG.get("trigger_enabled_default", False)) class LiveVoiceLoop: @@ -60,6 +61,11 @@ class LiveVoiceLoop: # Deferred-trigger toggle (fire on phrase match vs fire after AI responds) self.deferred_mode = DEFERRED_DEFAULT + # Master arm-trigger gate — when False, transcripts are still + # captured (you can watch them on the dashboard) but NO arm + # actions fire. Defaults to OFF so the robot doesn't move + # unexpectedly until the operator opts in. + self.trigger_enabled = TRIGGER_ENABLED_DEFAULT # Trigger history (dashboard log) self.triggers: deque[dict[str, Any]] = deque(maxlen=TRIGGER_LOG_SIZE) @@ -116,13 +122,82 @@ class LiveVoiceLoop: def set_deferred(self, enabled: bool) -> None: self.deferred_mode = bool(enabled) + def set_trigger_enabled(self, enabled: bool) -> None: + """Master arm-trigger gate. When False, phrase matches are ignored. + + Toggle semantics (no queue memory across the gate): + - Always clears any in-flight pending trigger so a late + fallback fire can't happen after disable/enable. + - On enable: snapshots every transcript currently in the + live_sub deque as already-seen. Only NEW speech after this + moment will dispatch — phrases said while the gate was off + don't suddenly fire when you turn it back on. + """ + self.trigger_enabled = bool(enabled) + + # Drop pending fallback timer — a queued "fire in 0.6s" from + # before the toggle must not leak across. + self.state._pending_arm_wave = False + self.state._pending_arm_wave_fired = False + self.state._pending_arm_trigger_fn = None + self.state._pending_arm_fallback_time = 0.0 + + snapshotted = 0 + if self.trigger_enabled: + try: + from Project.Sanad.main import live_sub + if live_sub is not None: + for txt in list(live_sub.user_transcript): + if txt and txt not in self._seen_transcripts: + self._seen_transcripts.add(txt) + snapshotted += 1 + except Exception as exc: + log.warning("set_trigger_enabled: snapshot failed: %s", exc) + + log.info("trigger_enabled=%s (pending cleared, %d transcripts marked seen)", + self.trigger_enabled, snapshotted) + # ── poll loop ──────────────────────────────────────────────── def _poll_loop(self): - """Poll GeminiSubprocess.user_transcript for new user texts.""" + """Poll GeminiSubprocess.user_transcript for new user texts AND + fire any deferred-mode arm trigger whose fallback timer elapsed. + + Without the pending-check, a deferred trigger (`fire_on_wake_match= + False`) would only fire when the NEXT transcript arrives — so if + the user says one sentence and stops, the arm never moves. + """ while not self._stop_event.is_set(): self._check_transcripts() + self._check_pending_trigger() self._stop_event.wait(POLL_INTERVAL_SEC) + def _check_pending_trigger(self): + """Fire a queued deferred trigger if its fallback time has passed.""" + # Master gate — same check as _dispatch + if not self.trigger_enabled: + return + if not getattr(self.state, "_pending_arm_wave", False): + return + if getattr(self.state, "_pending_arm_wave_fired", False): + return + fn = getattr(self.state, "_pending_arm_trigger_fn", None) + if fn is None: + return + fallback_at = float(getattr(self.state, "_pending_arm_fallback_time", 0.0) or 0.0) + if fallback_at <= 0.0 or time.time() < fallback_at: + return + # Gate on arm idle — skip fire if a motion is already running + if self.sanad_arm is not None and getattr(self.sanad_arm, "_is_busy", False): + return + try: + fn() + except Exception as exc: + log.warning("deferred arm trigger failed: %s", exc) + finally: + self.state._pending_arm_wave_fired = True + self.state._pending_arm_wave = False + self.state._pending_arm_trigger_fn = None + def _check_transcripts(self): try: from Project.Sanad.main import live_sub @@ -145,7 +220,9 @@ class LiveVoiceLoop: def _dispatch(self, transcript_text: str) -> None: if not self.wake_dispatch or self.sanad_arm is None: return - + # Master gate — skip arm triggering entirely when disabled + if not self.trigger_enabled: + return # Gate trigger on arm idle if getattr(self.sanad_arm, "_is_busy", False): return @@ -200,6 +277,7 @@ class LiveVoiceLoop: return { "running": self._running, "deferred_mode": self.deferred_mode, + "trigger_enabled": self.trigger_enabled, "last_heard": self.last_heard, "pending_action": pending, "last_action": self.last_action, diff --git a/voice/sanad_voice.py b/voice/sanad_voice.py index 8bb9098..fe1e3b4 100644 --- a/voice/sanad_voice.py +++ b/voice/sanad_voice.py @@ -29,16 +29,45 @@ from __future__ import annotations import array import asyncio +import importlib import json import logging import os import sys import threading import time +import types import wave from datetime import datetime from pathlib import Path +# ───────────────────────────────────────────────────────────────────────────── +# Layout bootstrap — MUST run before any `Project.Sanad.*` import. +# This file runs as a standalone subprocess (spawned by gemini/subprocess.py +# or local/subprocess.py); it can't rely on main.py having set up sys.path. +# Mirrors the dev-vs-deployed detection in main.py. +# dev layout: /Project/Sanad/voice/sanad_voice.py +# deployed layout: /home/unitree/Sanad/voice/sanad_voice.py +# ───────────────────────────────────────────────────────────────────────────── +_SANAD_DIR = Path(__file__).resolve().parent.parent # .../Sanad +_SANAD_PARENT = _SANAD_DIR.parent # .../Project OR /home/unitree + +if _SANAD_PARENT.name == "Project": + _ROOT = _SANAD_PARENT.parent + if str(_ROOT) not in sys.path: + sys.path.insert(0, str(_ROOT)) +else: + if str(_SANAD_PARENT) not in sys.path: + sys.path.insert(0, str(_SANAD_PARENT)) + if "Project" not in sys.modules: + _proj = types.ModuleType("Project") + _proj.__path__ = [] # namespace package marker + sys.modules["Project"] = _proj + if "Project.Sanad" not in sys.modules: + _sanad = importlib.import_module(_SANAD_DIR.name) + sys.modules["Project.Sanad"] = _sanad + sys.modules["Project"].Sanad = _sanad # type: ignore[attr-defined] + from unitree_sdk2py.core.channel import ChannelFactoryInitialize from unitree_sdk2py.g1.audio.g1_audio_client import AudioClient diff --git a/voice/typed_replay.py b/voice/typed_replay.py index 3403d6f..0443dbe 100644 --- a/voice/typed_replay.py +++ b/voice/typed_replay.py @@ -396,22 +396,92 @@ class TypedReplayEngine: return self.audio_mgr.pya.get_sample_size(pyaudio.paInt16) # ── generation ─────────────────────────────────────────────── + async def generate_audio(self, text: str) -> tuple[bytes, list[str]]: - """Send text to Gemini, return (pcm_audio, text_parts).""" + """Route typed text through Gemini Live as the voice, first-try reliable. + + The session's system-prompt sets a persona ("You are Sanad…"), + so the prompt that most reliably gets audio out is a direct + address to the persona with the quoted text. A transparent + retry chain covers the edge cases where the model still + replies with text only. + """ + stripped = text.strip() + if not stripped: + raise ValueError("text cannot be empty") + if self.voice_client is None: raise RuntimeError("voice_client unavailable") if not self.voice_client.connected: await self.voice_client.connect() - return await self.voice_client.send_text(text, owner="typed_replay") + + # Ordered by empirical reliability — first variant wins ~95% of turns. + # The quoted-phrase form is the most consistent trigger for an + # audio-only response with the current Sanad persona prompt. + attempts = [ + f'قل هذا بالضبط وبدون إضافات: "{stripped}"', # Arabic: "Say this exactly, no additions" + f'Say this exactly, nothing else: "{stripped}"', + f'"{stripped}"', + ] + last_parts: list[str] = [] + for idx, wrapped in enumerate(attempts, start=1): + try: + audio_bytes, text_parts = await self.voice_client.send_text( + wrapped, owner="typed_replay") + except Exception as exc: + log.warning("Gemini TTS attempt %d failed: %s", idx, exc) + continue + if audio_bytes: + if idx > 1: + log.info("Gemini TTS succeeded on attempt %d", idx) + return audio_bytes, text_parts + last_parts = text_parts + log.warning("Gemini TTS attempt %d returned no audio — parts: %s", + idx, " | ".join(text_parts or [])[:120]) + return b"", last_parts # ── playback + capture ─────────────────────────────────────── def play_audio(self, audio_bytes: bytes, capture_speaker: bool) -> bytes: - """Play PCM on speaker; optionally capture what was heard.""" + """Play Gemini PCM through the G1 chest speaker (via DDS) when + available; fall back to the host PulseAudio sink otherwise. + + The DDS path is audible on the robot; the PulseAudio path goes + to the Jetson's built-in audio codec, which isn't wired to any + audible output on the G1. `capture_speaker` is only supported + on the PulseAudio path (DDS has no monitor sink). + """ if not audio_bytes: return b"" - if self.audio_mgr is None or self.audio_mgr.pya is None: + if self.audio_mgr is None: raise RuntimeError("audio_mgr unavailable — cannot play") + # Try the G1 chest speaker first + g1_client = None + try: + g1_client = self.audio_mgr._get_g1_audio_client() + except Exception: + g1_client = None + + if g1_client is not None: + if capture_speaker: + log.info("capture_speaker requested, but G1 DDS path has no " + "PulseAudio monitor — capture skipped") + with self._play_lock: + try: + self.audio_mgr._play_pcm_via_g1( + audio_bytes, CHANNELS, RECEIVE_SAMPLE_RATE, + ) + return b"" + except Exception as exc: + log.warning("G1 speaker playback failed, falling back " + "to host PulseAudio sink: %s", exc) + # Fall through to the PyAudio path below + + if self.audio_mgr.pya is None: + raise RuntimeError("audio_mgr has no PyAudio backend for fallback") + + # Fallback: host PulseAudio sink (inaudible on G1, but keeps the + # capture-speaker feature working on desktop/dev setups). with self._play_lock: recorder = None restore_source = False