223 lines
9.1 KiB
JSON
223 lines
9.1 KiB
JSON
{
|
|
"_description": "Language tables for voice motion processing. Single source of truth for ALL vocabulary data — no Arabic/English motion words live in code anymore. Adding a new dialect / variant is a JSON-only edit.",
|
|
"_consumers": [
|
|
"Voice/number_words.py — spelled-out numbers → digits (EN + AR)",
|
|
"Voice/canonical_normalizer.py — Arabic structural translation → English",
|
|
"Voice/marcus_voice.py — reverse-command map for memory operations",
|
|
"Voice/sequences.py — control commands that must NOT be captured into recordings"
|
|
],
|
|
|
|
"english_fractions": {
|
|
"_description": "English fractional words. Two semantic flavours: 'additive' (combines with a preceding integer — '3 and a half steps' → 3.5) and 'leading' (standalone — 'half a meter' → 0.5). Both forms map to the same numeric value; flavours are kept separate so the parser knows the syntactic context to apply.",
|
|
"additive": {
|
|
"half": 0.5,
|
|
"halves": 0.5,
|
|
"quarter": 0.25,
|
|
"quarters": 0.25,
|
|
"third": 0.3333,
|
|
"thirds": 0.3333
|
|
},
|
|
"leading": {
|
|
"half": 0.5,
|
|
"quarter": 0.25,
|
|
"third": 0.3333
|
|
}
|
|
},
|
|
|
|
"arabic_fractions": {
|
|
"_description": "Arabic fractional words. Same additive vs leading split as English. 'Additive' applies after a digit + ون / و conjunction ('3 ونصف' → 3.5). 'Leading' applies before a unit ('نصف متر' → 0.5 meter). Multiple dialect spellings of half (نصف / نص).",
|
|
"additive": {
|
|
"نصف": 0.5,
|
|
"نص": 0.5,
|
|
"ربع": 0.25,
|
|
"ثلث": 0.3333
|
|
},
|
|
"leading": {
|
|
"نصف": 0.5,
|
|
"نص": 0.5,
|
|
"ربع": 0.25,
|
|
"ثلث": 0.3333
|
|
}
|
|
},
|
|
|
|
"english_numbers": {
|
|
"_description": "English number-word parser tables. Used by number_words.py to convert 'ninety degrees' → '90 degrees' before regex matching.",
|
|
"ones": {
|
|
"zero": 0, "one": 1, "two": 2, "three": 3, "four": 4,
|
|
"five": 5, "six": 6, "seven": 7, "eight": 8, "nine": 9,
|
|
"ten": 10, "eleven": 11, "twelve": 12, "thirteen": 13,
|
|
"fourteen": 14, "fifteen": 15, "sixteen": 16,
|
|
"seventeen": 17, "eighteen": 18, "nineteen": 19
|
|
},
|
|
"tens": {
|
|
"twenty": 20, "thirty": 30, "forty": 40, "fifty": 50,
|
|
"sixty": 60, "seventy": 70, "eighty": 80, "ninety": 90
|
|
},
|
|
"scale": {
|
|
"hundred": 100
|
|
},
|
|
"glue": ["and", "-"]
|
|
},
|
|
|
|
"arabic_numbers": {
|
|
"_description": "Arabic spelled-out numbers. Order matters — longer multi-word phrases come first so they're matched before their shorter prefixes.",
|
|
"literals": [
|
|
["ثلاثمائة وستون", 360],
|
|
["ثلاثمائة وستين", 360],
|
|
["ثلاث مائة وستون", 360],
|
|
["ثلاث مائة وستين", 360],
|
|
["مائة وثمانون", 180],
|
|
["مائة وثمانين", 180],
|
|
["مية وثمانين", 180],
|
|
["مائتان وسبعون", 270],
|
|
["مائتان وسبعين", 270],
|
|
["عشرون", 20], ["عشرين", 20],
|
|
["ثلاثون", 30], ["ثلاثين", 30],
|
|
["أربعون", 40], ["أربعين", 40], ["اربعون", 40], ["اربعين", 40],
|
|
["خمسون", 50], ["خمسين", 50],
|
|
["ستون", 60], ["ستين", 60],
|
|
["سبعون", 70], ["سبعين", 70],
|
|
["ثمانون", 80], ["ثمانين", 80],
|
|
["تسعون", 90], ["تسعين", 90],
|
|
["مائتان", 200], ["مائتين", 200], ["ميتين", 200],
|
|
["ثلاثمائة", 300], ["ثلاث مائة", 300],
|
|
["أربعمائة", 400], ["اربعمائة", 400], ["أربع مائة", 400],
|
|
["خمسمائة", 500], ["خمس مائة", 500],
|
|
["مائة", 100], ["مية", 100], ["ميه", 100],
|
|
["أحد عشر", 11], ["احد عشر", 11],
|
|
["اثنا عشر", 12], ["اثني عشر", 12],
|
|
["ثلاثة عشر", 13], ["ثلاث عشرة", 13],
|
|
["أربعة عشر", 14], ["اربعة عشر", 14],
|
|
["خمسة عشر", 15], ["خمس عشرة", 15],
|
|
["ستة عشر", 16], ["ست عشرة", 16],
|
|
["سبعة عشر", 17],
|
|
["ثمانية عشر", 18],
|
|
["تسعة عشر", 19],
|
|
["واحد", 1], ["واحدة", 1],
|
|
["اثنان", 2], ["اثنين", 2], ["اثنتان", 2], ["اثنتين", 2],
|
|
["ثلاثة", 3], ["ثلاث", 3],
|
|
["أربعة", 4], ["أربع", 4], ["اربعة", 4], ["اربع", 4],
|
|
["خمسة", 5], ["خمس", 5],
|
|
["ستة", 6], ["ست", 6],
|
|
["سبعة", 7], ["سبع", 7],
|
|
["ثمانية", 8], ["ثمان", 8],
|
|
["تسعة", 9], ["تسع", 9],
|
|
["عشرة", 10], ["عشر", 10]
|
|
]
|
|
},
|
|
|
|
"arabic_verbs": {
|
|
"_description": "Arabic verb roots → English gerund. Gerund matches Gemini's English bot_phrases (e.g. 'walking forward'). Multiple Arabic dialect roots can map to the same English verb.",
|
|
"walking": [
|
|
"أمشي", "امشي", "امش",
|
|
"أتحرك", "تحرك",
|
|
"اروح", "روح", "اروحي", "روحي",
|
|
"بمشي", "براح", "أراح",
|
|
"بفوت", "أفوت",
|
|
"سير", "اسير",
|
|
"أتجه", "اتجه", "يتجه", "نتجه",
|
|
"أتوجه", "اتوجه", "يتوجه", "توجه",
|
|
"متوجه", "متجه"
|
|
],
|
|
"turning": [
|
|
"أستدير", "استدر", "استدير",
|
|
"لفّ", "لف", "ألف", "بلف",
|
|
"دوّر", "دور", "أدور", "بدور",
|
|
"خش", "أخش", "بخش",
|
|
"حوّد", "أحوّد"
|
|
],
|
|
"walking backward": [
|
|
"أرجع", "ارجع", "برجع",
|
|
"أتراجع", "بتراجع", "تراجع"
|
|
],
|
|
"walking forward": [
|
|
"أتقدم", "بتقدم", "أتقدّم", "تقدم"
|
|
]
|
|
},
|
|
|
|
"arabic_directions": {
|
|
"_description": "Arabic direction words → English directions. Includes definite-article variants (اليمين), preposition-prefixed forms (لليمين), and dialect alternatives (شمال = left in Levantine/Egyptian).",
|
|
"right": [
|
|
"يميناً", "يمينا",
|
|
"اليمين", "لليمين",
|
|
"يمين"
|
|
],
|
|
"left": [
|
|
"يساراً", "يسارا",
|
|
"اليسار", "لليسار",
|
|
"يسار",
|
|
"الشمال", "للشمال", "شمال"
|
|
],
|
|
"forward": [
|
|
"للأمام", "الأمام", "أمام",
|
|
"لقدام", "للقدام", "قدام"
|
|
],
|
|
"backward": [
|
|
"للخلف", "الخلف",
|
|
"للوراء", "الوراء",
|
|
"لورا", "ورا", "للورا"
|
|
]
|
|
},
|
|
|
|
"arabic_units": {
|
|
"_description": "Arabic unit nouns → English unit. Singular-vs-plural form is preserved so the dispatcher's regex layer (which accepts both) sees a natural form. Dual forms (خطوتين / مترين / درجتين) live in arabic_duals.",
|
|
"step": ["خطوة"],
|
|
"steps": ["خطوات"],
|
|
"meter": ["متر"],
|
|
"meters": ["أمتار"],
|
|
"degree": ["درجة"],
|
|
"degrees": ["درجات"]
|
|
},
|
|
|
|
"arabic_duals": {
|
|
"_description": "Arabic dual forms — single words containing both count and unit ('خطوتين' = 'two steps'). Substituted as a single unit BEFORE the verb/dir/unit pass. The English target form must be readable as 'N units' so the regex layer can pick up the count.",
|
|
"2 steps": ["خطوتين"],
|
|
"2 meters": ["مترين", "أمتارين"],
|
|
"2 degrees": ["درجتين"],
|
|
"2 seconds": ["ثانيتين"]
|
|
},
|
|
|
|
"arabic_conjunctions": {
|
|
"_description": "Arabic conjunctions translated to space-padded English glue. Subbed early so subsequent verb/dir passes don't accidentally treat the conjunction as a word.",
|
|
" then ": ["ثم", "وبعدين", "بعدين", "وبعد"]
|
|
},
|
|
|
|
"arabic_connectives": {
|
|
"_description": "Arabic prepositions / determiners / pronouns. Mostly pass-through; 'إلى' / 'نحو' map to 'to' so walk-to-target patterns work. 'ع' / 'على' get folded into nothing meaningful but are translated to 'on' to avoid leaving raw Arabic mid-string.",
|
|
"to": ["إلى", "نحو", "تجاه", "باتجاه"],
|
|
"on": ["على", "ع"],
|
|
"in": ["في"],
|
|
"this": ["هذا", "هذه"],
|
|
"that": ["ذلك", "تلك"]
|
|
},
|
|
|
|
"motion_inverses": {
|
|
"_description": "Pairwise inverse map for reverse_last memory operation. Used by Voice/marcus_voice.py::_reverse_command. Parametric forms (e.g. 'turn left 90 degrees' ↔ 'turn right 90 degrees') are derived in code via regex, NOT listed here — only fixed pairs.",
|
|
"turn right": "turn left",
|
|
"turn left": "turn right",
|
|
"move forward": "move backward",
|
|
"move backward": "move forward",
|
|
"sit down": "stand up",
|
|
"stand up": "sit down",
|
|
"raise arm": "lower arm",
|
|
"lower arm": "raise arm"
|
|
},
|
|
|
|
"sequence_never_record": {
|
|
"_description": "Canonical commands that must NEVER be captured into a recording session. Control commands (start/save/cancel/play recording, pause/resume, stop, repeat/reverse) would create absurd macros if captured. Used by Voice/sequences.py::record_command.",
|
|
"canonicals": [
|
|
"start recording",
|
|
"save sequence",
|
|
"cancel recording",
|
|
"play sequence",
|
|
"list sequences",
|
|
"delete sequence",
|
|
"pause motion",
|
|
"resume motion",
|
|
"stop",
|
|
"repeat last",
|
|
"reverse last"
|
|
]
|
|
}
|
|
}
|