Update 2026-04-27 11:16:57

This commit is contained in:
kassam 2026-04-27 11:16:57 +04:00
parent 211d4f52ab
commit 2129cadb11
149 changed files with 2185 additions and 415 deletions

View File

@ -198,13 +198,32 @@ def _init_voice():
_audio_api = AudioAPI()
# Heuristic filter for unusable Gemini transcripts. Gemini emits
# `<noise>` literally when audio is non-speech and `.` for empty
# bursts. These shouldn't pollute the terminal or trigger motion.
def _is_garbage_transcript(t: str) -> bool:
stripped = t.strip().strip(".!?,").strip()
if not stripped:
return True
low = stripped.lower()
if low in ("<noise>", "noise", "yeah", "ok", "okay", "uh", "um", "hmm", "mm"):
return True
# Bare single character (often "." → ".") or all punctuation.
if all(not c.isalnum() for c in stripped):
return True
# Short non-ASCII fragments (e.g. Korean / Thai / Arabic
# snippets that come from echo or distortion).
if len(stripped) <= 6 and not all(ord(c) < 128 for c in stripped):
return True
return False
def _on_command(text, lang):
text = (text or "").strip()
if not text:
return
# One clean, distinctive line so the operator can see exactly
# what Whisper transcribed before the brain reacts. Everything
# else from the voice subsystem is file-only.
if _is_garbage_transcript(text):
# Skip silently — neither show nor dispatch to brain.
return
print(f' [Sanad] heard: "{text}"')
try:
result = process_command(text)
@ -213,19 +232,16 @@ def _init_voice():
return
if isinstance(result, dict):
sp = (result.get("speak") or "").strip()
if sp and _audio_api:
# Drop Gemini's mic buffer so the robot's own voice
# (picked up by the mic during TtsMaker playback)
# doesn't get transcribed and fed back as a new
# "user" utterance.
if _voice_module is not None:
try: _voice_module.flush_mic()
except Exception: pass
_audio_api.speak(sp)
if _voice_module is not None:
try: _voice_module.flush_mic()
except Exception: pass
# Redraw the Command: prompt that our print clobbered
act = (result.get("action") or "").strip()
# In the Gemini S2S architecture Gemini owns the voice; we
# do NOT call audio_api.speak(sp) here (would collide with
# Gemini's own audio reply). Just show the operator what
# the brain decided so they can correlate motion with
# Gemini's spoken acknowledgement.
if act and act not in ("NONE", "TALK"):
print(f" [Sanad] doing: {act}{f'{sp[:80]}' if sp else ''}")
elif sp:
print(f" [Brain] {sp[:120]}")
print("Command: ", end="", flush=True)
_voice_module = VoiceModule(_audio_api, on_command=_on_command)

View File

@ -7,9 +7,9 @@
},
"stt": {
"_comment": "Voice pipeline: Gemini Live STT (text-mode) → Marcus brain → TtsMaker. Gemini transcribes the user's speech with server-side VAD; Marcus's brain (Brain/marcus_brain.py) decides the reply and speaks it via AudioAPI.speak → TtsMaker. No audio comes back from Gemini (response_modalities=['TEXT']). Install on Jetson: `pip install google-genai`. API key: env MARCUS_GEMINI_API_KEY (or SANAD_GEMINI_API_KEY fallback).",
"_comment": "Voice pipeline: Gemini Live SPEECH-TO-SPEECH (Sanad pattern). Gemini hears the mic, sees camera frames streamed over from Marcus, and replies with its own voice through the G1 speaker. Marcus's brain still dispatches motion commands via a side channel — when the transcript matches 'Sanad + action', Marcus's command_parser fires the motion silently while Gemini speaks the verbal acknowledgement. The brain's `speak` reply is logged but NOT spoken (avoids double-audio collision with Gemini). Install on Jetson (gemini_sdk env): `pip install google-genai`. API key: env MARCUS_GEMINI_API_KEY (or SANAD_GEMINI_API_KEY fallback).",
"_gemini_comment": "Gemini Live STT-only settings. The actual Gemini WebSocket runs in a SEPARATE Python 3.10+ subprocess (Voice/gemini_runner.py) because google-genai requires Python ≥3.9 and marcus is pinned to Python 3.8 by the NVIDIA Jetson torch wheel. The marcus parent process spawns `gemini_python_path -u Voice/gemini_runner.py` and parses the JSON-line transcripts on stdout. Env overrides: MARCUS_GEMINI_API_KEY / MARCUS_GEMINI_MODEL / MARCUS_GEMINI_PYTHON.",
"_gemini_comment": "Gemini Live S2S settings. The actual Gemini WebSocket runs in a SEPARATE Python 3.10+ subprocess (Voice/gemini_runner.py) because google-genai requires Python ≥3.9 and marcus is pinned to Python 3.8 by the NVIDIA Jetson torch wheel. The runner ALSO owns the G1 speaker (unitree_sdk2py works in gemini_sdk env) so Gemini's audio plays directly without IPC. The marcus parent process forwards camera frames to the runner via stdin so Gemini can see what the robot sees. Env overrides: MARCUS_GEMINI_API_KEY / MARCUS_GEMINI_MODEL / MARCUS_GEMINI_VOICE / MARCUS_GEMINI_PYTHON.",
"_gemini_python_path_comment": "Path to a Python 3.10+ binary that has `google-genai` installed (typically a separate conda env, e.g. `gemini_sdk` on this Jetson). Leave empty to auto-detect — the manager tries ~/miniconda3/envs/gemini_sdk/bin/python and a few common alternates. Override at runtime via env MARCUS_GEMINI_PYTHON.",
"gemini_python_path": "",
"gemini_api_key": "AIzaSyDt9Xi83MDZuuPpfwfHyMD92X7ZKdGkqf8",
@ -18,11 +18,26 @@
"gemini_audio_profile": "builtin",
"gemini_chunk_size": 512,
"gemini_send_sample_rate": 16000,
"gemini_receive_sample_rate": 24000,
"gemini_record_enabled": true,
"_gemini_system_prompt_comment": "Marcus brain is the authoritative reply path; Gemini is just an ear here. Keep the prompt short — it tells Gemini to transcribe, not to chat. Override by pointing gemini_system_prompt_file at a text file (relative paths resolve from PROJECT_ROOT).",
"_gemini_camera_comment": "Stream camera frames to Gemini Live so vision answers ('what do you see') are correct rather than hallucinated. Marcus parent grabs JPEG frames via API.camera_api.get_frame() at gemini_frame_interval_sec cadence and pipes them to the runner over stdin. Frame_max_age_sec drops stale frames. Set gemini_send_frames=false to disable (saves API tokens but breaks vision questions).",
"gemini_send_frames": true,
"gemini_frame_interval_sec": 0.5,
"gemini_frame_max_age_sec": 1.5,
"_gemini_barge_comment": "Barge-in = user speaking over Gemini. Three loud chunks above barge_threshold interrupts Gemini mid-sentence. echo_suppress_below masks mic frames quieter than the threshold during playback so the mic doesn't re-feed Gemini its own voice. On the G1 the on-board speaker is loud enough that ECHO frames hit ~1500-3000 RMS, well above the 500 barge threshold — that's why earlier sessions saw self-interrupt loops. Tuned values: threshold 3500 (only a real shout cuts Gemini off), echo_suppress_below 3500 (mute everything below that during AI playback — anything quieter than the speaker's own echo is treated as silence). ai_speak_grace_sec 0.5 gives Gemini a half-second runway before barge can fire. If you find users genuinely can't interrupt Gemini, drop barge_threshold to ~2500 and accept some self-interrupts.",
"gemini_barge_threshold": 3500,
"gemini_barge_loud_chunks_needed": 5,
"gemini_barge_cooldown_sec": 0.5,
"gemini_echo_suppress_below": 3500,
"gemini_ai_speak_grace_sec": 0.5,
"gemini_begin_stream_pause_sec": 0.15,
"gemini_wait_finish_margin_sec": 0.3,
"_gemini_system_prompt_comment": "Persona for Gemini Live's spoken reply. Gemini owns the voice in this architecture, so make this prompt match the experience you want users to hear. The robot's body is controlled by Marcus's brain via a side channel — Gemini doesn't need to invoke motions itself, just acknowledge them naturally. Override by pointing gemini_system_prompt_file at a text file (relative paths resolve from PROJECT_ROOT).",
"gemini_system_prompt_file": "",
"gemini_system_prompt": "You are Sanad's ear. Your only job is to transcribe what the user says to Sanad, the humanoid robot. Do not respond conversationally. Do not speculate. Do not invent dialogue. If the user addresses Sanad, return exactly what they said. Stay completely silent in your response.",
"gemini_system_prompt": "You are Sanad (سند), a friendly humanoid robot assistant made by YS Lootah Technology in Dubai. Your body is a Unitree G1 humanoid. You can see the user through your camera and talk to them in real time. You speak both English and Arabic naturally — match the user's language in your reply. Reply briefly, usually one or two sentences. When the user asks 'what do you see' / 'ماذا ترى' or describes the scene, look at the camera frames you're receiving and answer accurately based on what's actually there; do not invent details. CRITICAL ACTION RULE — physical motion only happens when the user addresses you by name 'Sanad' (English) or 'سند' (Arabic) AND gives an action. Examples: 'Sanad, turn right' → say 'Turning right.' 'سند، استدر يميناً' → say 'أستدير يميناً.' Plain conversation or vision queries WITHOUT 'Sanad' / 'سند' are fine but DO NOT trigger any motion confirmation — just chat or describe. NEVER say 'Turning' / 'Moving' / 'Sitting' / 'أستدير' / 'أتحرك' unless the user actually said 'Sanad' / 'سند' first. When you do say a motion confirmation, use the same language the user used. Motion verbs supported (English / Arabic): turn left/right (استدر يميناً/يساراً), turn around (استدر للخلف), move forward/back (تحرك للأمام/للخلف), sit down (اجلس), stand up (قف), wave hello (لوّح), raise/lower arm (ارفع/اخفض يدك), come here (تعال), follow me (اتبعني), stay here (ابق هنا), go home (اذهب للبيت), stop (توقف), patrol (طوف), look around (انظر حولك).",
"_gemini_vad_comment": "Gemini server-side VAD tuning. start_sensitivity/end_sensitivity accept 'START_SENSITIVITY_HIGH|LOW' and 'END_SENSITIVITY_HIGH|LOW'. HIGH start = eagerly treats any speech-like sound as turn start, LOW = more conservative. LOW end = longer patience before ending a turn, HIGH = cuts turn sooner. prefix_padding_ms preserves audio from just before speech is detected. silence_duration_ms is how long of quiet ends a turn.",
"gemini_vad_start_sensitivity": "START_SENSITIVITY_HIGH",
@ -43,37 +58,7 @@
"command_cooldown_sec": 1.5,
"min_transcription_length": 3,
"_vocab_comment": "wake_words = variants Gemini may produce for 'Sanad' — word-boundary matched in the user transcript. command_vocab = canonical command phrases. The dispatcher fuzzy-matches the transcript (after wake-word strip) against command_vocab. garbage_patterns lists short noise phrases Gemini sometimes emits — rejected before fuzzy-match unless they happen to equal a vocab entry exactly. Edit these to add new vocabulary — NO code change required.",
"wake_words": [
"sanad", "sannad", "sennad", "sunnad", "sinnad", "sonnad",
"sanat", "sunnat", "sonnat", "sinnat", "sennat",
"sanid", "sanud", "saned", "sanod", "sanaad",
"senad", "sinad", "sonad", "sunad",
"sanah", "sanath", "sanadh", "sonadh",
"samad", "somad", "sumad",
"thanad", "zanad",
"sa nad", "san ad", "san odd", "san add"
],
"command_vocab": [
"what do you see", "what can you see", "look around",
"come to me", "come here", "come back", "come closer",
"approach", "get closer", "come",
"go home", "go back", "go forward", "go backward",
"go left", "go right", "go",
"sit down", "stand up", "sit", "stand",
"raise arm", "lower arm", "wave hello", "wave", "point",
"turn left", "turn right", "turn around",
"move forward", "move backward", "move back",
"move left", "move right",
"walk forward", "walk backward", "walk back",
"step forward", "step back", "step left", "step right",
"forward", "backward", "back", "left", "right",
"patrol", "stop", "halt", "wait", "pause", "freeze", "hold",
"hello", "hi", "hey", "help",
"who are you", "where are you", "where am i", "what is your name",
"remember this", "forget", "do it again", "repeat", "undo",
"follow me", "stay here"
],
"_vocab_comment": "wake_words and command_vocab now live in Config/instruction.json — single source of truth for all bilingual phrase tables (wake variants + per-action user_phrases + per-action bot_phrases, English AND Arabic). garbage_patterns stays here because it's noise filtering, not voice instruction.",
"garbage_patterns": [
"thanks for watching", "thank you for watching",
"thank you", "thanks",
@ -103,9 +88,9 @@
},
"speaker": {
"_comment": "G1 on-board speaker parameters. dds_interface is the robot's DDS NIC; app_name is the stream label used by AudioClient.PlayStream.",
"_comment": "G1 on-board speaker parameters. dds_interface is the robot's DDS NIC; app_name is the stream label used by AudioClient.PlayStream. volume is 0-100; lowered from 100 to 70 because the on-board mic picks up the on-board speaker's echo strongly enough to feed Gemini Live a self-loop at full volume — see the gemini_barge_in tunings.",
"dds_interface": "eth0",
"volume": 100,
"volume": 70,
"app_name": "sanad",
"begin_stream_pause_sec": 0.15,
"wait_finish_margin_sec": 0.3

View File

@ -1,250 +0,0 @@
{
"_description": "Gemini action dispatch — maps spoken phrases to canonical motion commands. Mirrors Sanad's scripts/sanad_arm.txt pattern (Project/Sanad/scripts/sanad_arm.txt) but in JSON with action groups instead of a Python-set file. When stt.backend='gemini', Voice/marcus_voice.py::_dispatch_gemini_command matches the user's transcript (after stripping 'Sanad') against 'phrases' under each action and fires on_command with the action's 'canonical' string. Edit this file to add new spoken variants WITHOUT touching code.",
"_format": "actions.<action_key>.phrases — array of spoken variants (lowercase, punctuation stripped). Match is whole-word, case-insensitive. One phrase hit = fire.\nactions.<action_key>.canonical — the string passed to self._on_command(text, 'en'). Must be a recognised command in Brain/command_parser.py.\nactions.<action_key>.description — human-only; dispatcher ignores it.\nNon-motion conversation ('how are you', 'who are you', 'what do you see') is NOT listed here — Gemini answers those naturally via voice. Only physical actions live in this file.",
"settings": {
"_comment": "Dispatcher behaviour. require_wake_word=true means the transcript must contain 'Sanad' (or a fuzzy variant from stt.wake_words) before any phrase is considered — matches the current Marcus persona rule. fire_on_wake_match=true fires the action instantly on transcript; false defers until Gemini's turn_complete (robot speaks the acknowledgement first, then moves) — mirrors Sanad's fire_on_wake_match flag in voice/text_utils.maybe_trigger_arm.",
"trigger_enabled": true,
"require_wake_word": true,
"fire_on_wake_match": true,
"stream_buffer_sec": 2.0,
"dedup_window_sec": 2.0,
"repeat_suppress_sec": 0.25,
"pending_action_ttl_sec": 6.0
},
"actions": {
"turn_left": {
"canonical": "turn left",
"description": "Rotate in place 90° to the left.",
"phrases": [
"turn left",
"rotate left",
"spin left",
"go left",
"face left"
]
},
"turn_right": {
"canonical": "turn right",
"description": "Rotate in place 90° to the right.",
"phrases": [
"turn right",
"rotate right",
"spin right",
"go right",
"face right"
]
},
"turn_around": {
"canonical": "turn around",
"description": "Rotate 180°.",
"phrases": [
"turn around",
"turn back",
"spin around",
"about face",
"face the other way"
]
},
"move_forward": {
"canonical": "move forward",
"description": "Walk forward one step interval.",
"phrases": [
"move forward",
"go forward",
"walk forward",
"step forward",
"forward",
"keep going",
"walk ahead"
]
},
"move_back": {
"canonical": "move backward",
"description": "Walk backward one step interval.",
"phrases": [
"move back",
"move backward",
"go back",
"go backward",
"walk back",
"walk backward",
"step back",
"backward",
"reverse"
]
},
"step_left": {
"canonical": "move left",
"description": "Sidestep left.",
"phrases": [
"step left",
"move left",
"slide left",
"strafe left"
]
},
"step_right": {
"canonical": "move right",
"description": "Sidestep right.",
"phrases": [
"step right",
"move right",
"slide right",
"strafe right"
]
},
"stop": {
"canonical": "stop",
"description": "Halt current motion immediately.",
"phrases": [
"stop",
"halt",
"wait",
"pause",
"freeze",
"hold",
"stop moving",
"stand still",
"don't move"
]
},
"sit_down": {
"canonical": "sit down",
"description": "Sit down to the ground from standing.",
"phrases": [
"sit down",
"sit",
"take a seat",
"have a seat"
]
},
"stand_up": {
"canonical": "stand up",
"description": "Stand up from sitting.",
"phrases": [
"stand up",
"stand",
"get up",
"rise"
]
},
"wave_hello": {
"canonical": "wave hello",
"description": "Wave with the right arm.",
"phrases": [
"wave hello",
"wave",
"say hi",
"greet",
"wave to me",
"wave at me"
]
},
"raise_arm": {
"canonical": "raise arm",
"description": "Raise the right arm straight up.",
"phrases": [
"raise arm",
"raise your arm",
"lift your arm",
"arm up",
"hand up"
]
},
"lower_arm": {
"canonical": "lower arm",
"description": "Return the arm to the resting position.",
"phrases": [
"lower arm",
"lower your arm",
"drop your arm",
"arm down",
"hand down",
"rest your arm"
]
},
"point": {
"canonical": "point",
"description": "Point with the right arm (used after 'look at ...').",
"phrases": [
"point",
"point at it",
"point to it",
"point there"
]
},
"come_here": {
"canonical": "come here",
"description": "Approach the speaker.",
"phrases": [
"come here",
"come to me",
"come closer",
"approach",
"get closer",
"come over here"
]
},
"follow_me": {
"canonical": "follow me",
"description": "Follow the speaker until told to stop.",
"phrases": [
"follow me",
"come with me",
"walk with me"
]
},
"stay_here": {
"canonical": "stay here",
"description": "Stop following and hold position.",
"phrases": [
"stay here",
"stay",
"wait here",
"hold position",
"don't follow me"
]
},
"go_home": {
"canonical": "go home",
"description": "Return to the home position.",
"phrases": [
"go home",
"return home",
"head home",
"go back home"
]
},
"patrol": {
"canonical": "patrol",
"description": "Start the patrol routine.",
"phrases": [
"patrol",
"start patrol",
"begin patrol",
"patrol the area",
"walk the route"
]
},
"look_around": {
"canonical": "look around",
"description": "Scan the environment (vision sweep).",
"phrases": [
"look around",
"scan the room",
"scan around",
"survey the area",
"have a look around"
]
}
}
}

277
Config/instruction.json Normal file
View File

@ -0,0 +1,277 @@
{
"_description": "Bilingual voice command instructions — single source of truth for the voice dispatch tables. Loaded by Voice/marcus_voice.py at module level. Adding a new motion command, a new accent variant, a new Arabic phrasing, or fixing a misheard wake-word transcription is a JSON-only edit; no Python change required.",
"_format": "wake_words = whole-word substrings the dispatch gate looks for in the user's transcript. Any match (English or Arabic) opens motion for the current turn. actions = per-motion phrase tables. Each action has a `canonical` string (what marcus_brain receives), `user_phrases` (what the user might SAY when asking for the motion — used for fuzzy-match + Arabic-to-English translation after wake-word strip), and `bot_phrases` (what Gemini might SPEAK when acknowledging — used by the bot-side dispatcher to fire motion off Gemini's own confirmation). All matching is substring-based; English entries are matched case-insensitively, Arabic entries match as-is. Keep the canonical string consistent with Brain/command_parser.py vocabulary.",
"wake_words": {
"_comment": "All variants of the robot's name 'Sanad' — the gate that authorises motion. Add new mishearings here when you see them in logs/transcript.log under HEARD lines that should have triggered motion but didn't.",
"english": [
"sanad", "sannad", "sennad", "sunnad", "sinnad", "sonnad",
"sanat", "sunnat", "sonnat", "sinnat", "sennat",
"sanid", "sanud", "saned", "sanod", "sanaad",
"senad", "sinad", "sonad", "sunad",
"sanah", "sanath", "sanadh", "sonadh",
"samad", "somad", "sumad",
"thanad", "zanad",
"sa nad", "san ad", "san odd", "san add"
],
"arabic": [
"سند", "سنّاد", "ساند", "سنود", "سنَد", "سنّد", "سَند",
"يا سند", "يا سنّاد", "يا ساند", "يا سَند"
]
},
"actions": {
"turn_right": {
"canonical": "turn right",
"user_phrases": {
"english": ["turn right", "rotate right", "spin right", "go right", "face right", "right"],
"arabic": ["استدر يميناً", "استدر يمينا", "ادر يميناً", "ادر يمينا", "لف يمين", "لف يميناً", "يمين"]
},
"bot_phrases": {
"english": ["turning right"],
"arabic": ["أستدير يميناً", "أستدير يمينا", "استدير يميناً", "استدير يمينا", "ألف يميناً", "ألف يمينا"]
}
},
"turn_left": {
"canonical": "turn left",
"user_phrases": {
"english": ["turn left", "rotate left", "spin left", "go left", "face left", "left"],
"arabic": ["استدر يساراً", "استدر يسارا", "ادر يساراً", "ادر يسارا", "لف يسار", "لف يساراً", "يسار", "شمال"]
},
"bot_phrases": {
"english": ["turning left"],
"arabic": ["أستدير يساراً", "أستدير يسارا", "استدير يساراً", "استدير يسارا", "ألف يساراً", "ألف يسارا"]
}
},
"turn_around": {
"canonical": "turn around",
"user_phrases": {
"english": ["turn around", "turn back", "spin around", "about face", "face the other way"],
"arabic": ["استدر للخلف", "استدر إلى الوراء", "اتجه للخلف", "ادر للخلف", "ارجع وجهك"]
},
"bot_phrases": {
"english": ["turning around"],
"arabic": ["أستدير للخلف", "أستدير إلى الوراء", "استدير للخلف"]
}
},
"move_forward": {
"canonical": "move forward",
"user_phrases": {
"english": ["move forward", "go forward", "walk forward", "step forward", "forward", "keep going", "walk ahead", "move ahead"],
"arabic": ["تحرك للأمام", "تحرك إلى الأمام", "اذهب للأمام", "امش للأمام", "تقدم", "للأمام", "أمام"]
},
"bot_phrases": {
"english": ["moving forward", "walking forward", "stepping forward", "going forward", "going ahead"],
"arabic": ["أتحرك للأمام", "أتحرك إلى الأمام", "أتقدم", "أمشي للأمام", "أذهب للأمام"]
}
},
"move_backward": {
"canonical": "move backward",
"user_phrases": {
"english": ["move back", "move backward", "go back", "go backward", "walk back", "walk backward", "step back", "backward", "back", "reverse"],
"arabic": ["تحرك للخلف", "تحرك إلى الخلف", "اذهب للخلف", "امش للخلف", "ارجع", "ارجع للخلف", "للخلف", "خلف"]
},
"bot_phrases": {
"english": ["moving backward", "moving back", "walking backward", "walking back", "stepping back", "going back"],
"arabic": ["أتحرك للخلف", "أتحرك إلى الخلف", "أرجع", "أمشي للخلف", "أعود للخلف"]
}
},
"move_right": {
"canonical": "move right",
"user_phrases": {
"english": ["step right", "move right", "slide right", "strafe right", "sidestep right"],
"arabic": ["تحرك يميناً", "تحرك يمينا", "خطوة يمين", "اتجه يميناً"]
},
"bot_phrases": {
"english": ["moving right", "stepping right", "sliding right"],
"arabic": ["أتحرك يميناً", "أتحرك يمينا", "أخطو يميناً"]
}
},
"move_left": {
"canonical": "move left",
"user_phrases": {
"english": ["step left", "move left", "slide left", "strafe left", "sidestep left"],
"arabic": ["تحرك يساراً", "تحرك يسارا", "خطوة يسار", "اتجه يساراً"]
},
"bot_phrases": {
"english": ["moving left", "stepping left", "sliding left"],
"arabic": ["أتحرك يساراً", "أتحرك يسارا", "أخطو يساراً"]
}
},
"stop": {
"canonical": "stop",
"user_phrases": {
"english": ["stop", "halt", "wait", "pause", "freeze", "hold", "stop moving", "stand still", "don't move"],
"arabic": ["توقف", "قف مكانك", "اوقف", "انتظر", "اثبت", "لا تتحرك"]
},
"bot_phrases": {
"english": ["stopping", "halting", "holding"],
"arabic": ["أتوقف", "توقفت", "أنتظر"]
}
},
"sit_down": {
"canonical": "sit down",
"user_phrases": {
"english": ["sit down", "sit", "take a seat", "have a seat"],
"arabic": ["اجلس", "ارتح", "اقعد"]
},
"bot_phrases": {
"english": ["sitting down", "sitting"],
"arabic": ["أجلس", "أقعد", "جلست"]
}
},
"stand_up": {
"canonical": "stand up",
"user_phrases": {
"english": ["stand up", "stand", "get up", "rise"],
"arabic": ["قف", "انهض", "ارفع نفسك"]
},
"bot_phrases": {
"english": ["standing up", "getting up", "rising"],
"arabic": ["أقف", "أنهض", "وقفت"]
}
},
"wave_hello": {
"canonical": "wave hello",
"user_phrases": {
"english": ["wave hello", "wave", "say hi", "greet", "wave to me", "wave at me"],
"arabic": ["لوّح", "لوح", "لوّح بيدك", "حيّ", "سلّم"]
},
"bot_phrases": {
"english": ["waving hello", "waving", "saying hi", "greeting"],
"arabic": ["ألوّح", "ألوح", "ألوّح بيدي", "أحيّ", "أسلّم"]
}
},
"raise_arm": {
"canonical": "raise arm",
"user_phrases": {
"english": ["raise arm", "raise your arm", "lift your arm", "arm up", "hand up"],
"arabic": ["ارفع يدك", "ارفع ذراعك", "اليد للأعلى"]
},
"bot_phrases": {
"english": ["raising arm", "raising my arm", "lifting my arm", "arm up"],
"arabic": ["أرفع يدي", "أرفع ذراعي"]
}
},
"lower_arm": {
"canonical": "lower arm",
"user_phrases": {
"english": ["lower arm", "lower your arm", "drop your arm", "arm down", "hand down", "rest your arm"],
"arabic": ["اخفض يدك", "اخفض ذراعك", "اليد للأسفل", "نزل يدك"]
},
"bot_phrases": {
"english": ["lowering arm", "lowering my arm", "dropping my arm", "arm down"],
"arabic": ["أخفض يدي", "أخفض ذراعي", "أنزل يدي"]
}
},
"point": {
"canonical": "point",
"user_phrases": {
"english": ["point", "point at it", "point to it", "point there"],
"arabic": ["اشر", "أشِر", "اشر إلى", "اشر هناك"]
},
"bot_phrases": {
"english": ["pointing"],
"arabic": ["أشير"]
}
},
"come_here": {
"canonical": "come here",
"user_phrases": {
"english": ["come here", "come to me", "come closer", "approach", "get closer", "come over here", "come"],
"arabic": ["تعال", "تعال هنا", "تعال إليّ", "اقترب", "تقرب"]
},
"bot_phrases": {
"english": ["coming over", "coming to you", "approaching"],
"arabic": ["آتي إليك", "أقترب", "أتقرّب"]
}
},
"follow_me": {
"canonical": "follow me",
"user_phrases": {
"english": ["follow me", "come with me", "walk with me"],
"arabic": ["اتبعني", "تعال معي", "امش معي"]
},
"bot_phrases": {
"english": ["following you", "following", "coming with you"],
"arabic": ["أتبعك", "آتي معك", "أمشي معك"]
}
},
"stay_here": {
"canonical": "stay here",
"user_phrases": {
"english": ["stay here", "stay", "wait here", "hold position", "don't follow me"],
"arabic": ["ابق هنا", "اثبت هنا", "انتظر هنا", "لا تتبعني"]
},
"bot_phrases": {
"english": ["staying here", "staying", "waiting here"],
"arabic": ["أبقى هنا", "أنتظر هنا", "أثبت هنا"]
}
},
"go_home": {
"canonical": "go home",
"user_phrases": {
"english": ["go home", "return home", "head home", "go back home"],
"arabic": ["اذهب للبيت", "اذهب إلى البيت", "ارجع للبيت", "عُد للبيت"]
},
"bot_phrases": {
"english": ["going home", "heading home", "returning home"],
"arabic": ["أعود للبيت", "أذهب للبيت", "أتجه للبيت"]
}
},
"patrol": {
"canonical": "patrol",
"user_phrases": {
"english": ["patrol", "start patrol", "begin patrol", "patrol the area", "walk the route"],
"arabic": ["طوف", "ابدأ الدورية", "ابدأ التطواف", "افحص المكان"]
},
"bot_phrases": {
"english": ["patrolling", "starting patrol", "beginning patrol"],
"arabic": ["أطوف", "أبدأ الدورية", "أبدأ التطواف"]
}
},
"look_around": {
"canonical": "look around",
"user_phrases": {
"english": ["look around", "scan the room", "scan around", "survey the area", "have a look around"],
"arabic": ["انظر حولك", "تفحص المكان", "افحص المكان", "تطلع حولك"]
},
"bot_phrases": {
"english": ["looking around", "scanning around", "surveying the area"],
"arabic": ["أنظر حولي", "أتفحص المكان", "أتطلع حولي"]
}
},
"what_do_you_see": {
"canonical": "what do you see",
"user_phrases": {
"english": ["what do you see", "what can you see", "describe this", "describe what you see", "tell me what you see"],
"arabic": ["ماذا ترى", "ماذا تشاهد", "صف ما تراه", "أخبرني ماذا ترى"]
},
"bot_phrases": {
"english": [],
"arabic": []
}
}
}
}

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1,128 @@
[
{
"time": "08:59:54",
"cmd": "turn right",
"response": "local command",
"duration_s": 0.0
},
{
"time": "09:00:04",
"cmd": "turn left",
"response": "local command",
"duration_s": 0.0
},
{
"time": "09:00:48",
"cmd": "turn left",
"response": "local command",
"duration_s": 0.0
},
{
"time": "09:02:00",
"cmd": "turn right",
"response": "local command",
"duration_s": 0.0
},
{
"time": "09:02:07",
"cmd": "turn right",
"response": "local command",
"duration_s": 0.0
},
{
"time": "09:02:26",
"cmd": "turn right",
"response": "local command",
"duration_s": 0.0
},
{
"time": "09:03:37",
"cmd": "turn left",
"response": "local command",
"duration_s": 0.0
},
{
"time": "09:03:53",
"cmd": "move forward",
"response": "local command",
"duration_s": 0.0
},
{
"time": "09:04:00",
"cmd": "move backward",
"response": "local command",
"duration_s": 0.0
},
{
"time": "09:04:38",
"cmd": "turn left",
"response": "local command",
"duration_s": 0.0
},
{
"time": "09:04:47",
"cmd": "turn right",
"response": "local command",
"duration_s": 0.0
},
{
"time": "09:05:09",
"cmd": "turn right",
"response": "local command",
"duration_s": 0.0
},
{
"time": "09:05:18",
"cmd": "turn left",
"response": "local command",
"duration_s": 0.0
},
{
"time": "09:05:21",
"cmd": "turn right",
"response": "local command",
"duration_s": 0.0
},
{
"time": "09:05:24",
"cmd": "move forward",
"response": "local command",
"duration_s": 0.0
},
{
"time": "09:05:31",
"cmd": "move backward",
"response": "local command",
"duration_s": 0.0
},
{
"time": "09:05:38",
"cmd": "turn left",
"response": "local command",
"duration_s": 0.0
},
{
"time": "09:06:40",
"cmd": "move forward",
"response": "local command",
"duration_s": 0.0
},
{
"time": "09:07:48",
"cmd": "move backward",
"response": "local command",
"duration_s": 0.0
},
{
"time": "09:07:57",
"cmd": "move backward",
"response": "local command",
"duration_s": 0.0
},
{
"time": "09:09:04",
"cmd": "turn right",
"response": "local command",
"duration_s": 0.0
}
]

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1 @@
{}

Some files were not shown because too many files have changed in this diff Show More