Marcus/Config/language_tables.json

223 lines
9.1 KiB
JSON

{
"_description": "Language tables for voice motion processing. Single source of truth for ALL vocabulary data — no Arabic/English motion words live in code anymore. Adding a new dialect / variant is a JSON-only edit.",
"_consumers": [
"Voice/number_words.py — spelled-out numbers → digits (EN + AR)",
"Voice/canonical_normalizer.py — Arabic structural translation → English",
"Voice/marcus_voice.py — reverse-command map for memory operations",
"Voice/sequences.py — control commands that must NOT be captured into recordings"
],
"english_fractions": {
"_description": "English fractional words. Two semantic flavours: 'additive' (combines with a preceding integer — '3 and a half steps' → 3.5) and 'leading' (standalone — 'half a meter' → 0.5). Both forms map to the same numeric value; flavours are kept separate so the parser knows the syntactic context to apply.",
"additive": {
"half": 0.5,
"halves": 0.5,
"quarter": 0.25,
"quarters": 0.25,
"third": 0.3333,
"thirds": 0.3333
},
"leading": {
"half": 0.5,
"quarter": 0.25,
"third": 0.3333
}
},
"arabic_fractions": {
"_description": "Arabic fractional words. Same additive vs leading split as English. 'Additive' applies after a digit + ون / و conjunction ('3 ونصف' → 3.5). 'Leading' applies before a unit ('نصف متر' → 0.5 meter). Multiple dialect spellings of half (نصف / نص).",
"additive": {
"نصف": 0.5,
"نص": 0.5,
"ربع": 0.25,
"ثلث": 0.3333
},
"leading": {
"نصف": 0.5,
"نص": 0.5,
"ربع": 0.25,
"ثلث": 0.3333
}
},
"english_numbers": {
"_description": "English number-word parser tables. Used by number_words.py to convert 'ninety degrees' → '90 degrees' before regex matching.",
"ones": {
"zero": 0, "one": 1, "two": 2, "three": 3, "four": 4,
"five": 5, "six": 6, "seven": 7, "eight": 8, "nine": 9,
"ten": 10, "eleven": 11, "twelve": 12, "thirteen": 13,
"fourteen": 14, "fifteen": 15, "sixteen": 16,
"seventeen": 17, "eighteen": 18, "nineteen": 19
},
"tens": {
"twenty": 20, "thirty": 30, "forty": 40, "fifty": 50,
"sixty": 60, "seventy": 70, "eighty": 80, "ninety": 90
},
"scale": {
"hundred": 100
},
"glue": ["and", "-"]
},
"arabic_numbers": {
"_description": "Arabic spelled-out numbers. Order matters — longer multi-word phrases come first so they're matched before their shorter prefixes.",
"literals": [
["ثلاثمائة وستون", 360],
["ثلاثمائة وستين", 360],
["ثلاث مائة وستون", 360],
["ثلاث مائة وستين", 360],
["مائة وثمانون", 180],
["مائة وثمانين", 180],
["مية وثمانين", 180],
["مائتان وسبعون", 270],
["مائتان وسبعين", 270],
["عشرون", 20], ["عشرين", 20],
["ثلاثون", 30], ["ثلاثين", 30],
["أربعون", 40], ["أربعين", 40], ["اربعون", 40], ["اربعين", 40],
["خمسون", 50], ["خمسين", 50],
["ستون", 60], ["ستين", 60],
["سبعون", 70], ["سبعين", 70],
["ثمانون", 80], ["ثمانين", 80],
["تسعون", 90], ["تسعين", 90],
["مائتان", 200], ["مائتين", 200], ["ميتين", 200],
["ثلاثمائة", 300], ["ثلاث مائة", 300],
["أربعمائة", 400], ["اربعمائة", 400], ["أربع مائة", 400],
["خمسمائة", 500], ["خمس مائة", 500],
["مائة", 100], ["مية", 100], ["ميه", 100],
["أحد عشر", 11], ["احد عشر", 11],
["اثنا عشر", 12], ["اثني عشر", 12],
["ثلاثة عشر", 13], ["ثلاث عشرة", 13],
["أربعة عشر", 14], ["اربعة عشر", 14],
["خمسة عشر", 15], ["خمس عشرة", 15],
["ستة عشر", 16], ["ست عشرة", 16],
["سبعة عشر", 17],
["ثمانية عشر", 18],
["تسعة عشر", 19],
["واحد", 1], ["واحدة", 1],
["اثنان", 2], ["اثنين", 2], ["اثنتان", 2], ["اثنتين", 2],
["ثلاثة", 3], ["ثلاث", 3],
["أربعة", 4], ["أربع", 4], ["اربعة", 4], ["اربع", 4],
["خمسة", 5], ["خمس", 5],
["ستة", 6], ["ست", 6],
["سبعة", 7], ["سبع", 7],
["ثمانية", 8], ["ثمان", 8],
["تسعة", 9], ["تسع", 9],
["عشرة", 10], ["عشر", 10]
]
},
"arabic_verbs": {
"_description": "Arabic verb roots → English gerund. Gerund matches Gemini's English bot_phrases (e.g. 'walking forward'). Multiple Arabic dialect roots can map to the same English verb.",
"walking": [
"أمشي", "امشي", "امش",
"أتحرك", "تحرك",
"اروح", "روح", "اروحي", "روحي",
"بمشي", "براح", "أراح",
"بفوت", "أفوت",
"سير", "اسير",
"أتجه", "اتجه", "يتجه", "نتجه",
"أتوجه", "اتوجه", "يتوجه", "توجه",
"متوجه", "متجه"
],
"turning": [
"أستدير", "استدر", "استدير",
"لفّ", "لف", "ألف", "بلف",
"دوّر", "دور", "أدور", "بدور",
"خش", "أخش", "بخش",
"حوّد", "أحوّد"
],
"walking backward": [
"أرجع", "ارجع", "برجع",
"أتراجع", "بتراجع", "تراجع"
],
"walking forward": [
"أتقدم", "بتقدم", "أتقدّم", "تقدم"
]
},
"arabic_directions": {
"_description": "Arabic direction words → English directions. Includes definite-article variants (اليمين), preposition-prefixed forms (لليمين), and dialect alternatives (شمال = left in Levantine/Egyptian).",
"right": [
"يميناً", "يمينا",
"اليمين", "لليمين",
"يمين"
],
"left": [
"يساراً", "يسارا",
"اليسار", "لليسار",
"يسار",
"الشمال", "للشمال", "شمال"
],
"forward": [
"للأمام", "الأمام", "أمام",
"لقدام", "للقدام", "قدام"
],
"backward": [
"للخلف", "الخلف",
"للوراء", "الوراء",
"لورا", "ورا", "للورا"
]
},
"arabic_units": {
"_description": "Arabic unit nouns → English unit. Singular-vs-plural form is preserved so the dispatcher's regex layer (which accepts both) sees a natural form. Dual forms (خطوتين / مترين / درجتين) live in arabic_duals.",
"step": ["خطوة"],
"steps": ["خطوات"],
"meter": ["متر"],
"meters": ["أمتار"],
"degree": ["درجة"],
"degrees": ["درجات"]
},
"arabic_duals": {
"_description": "Arabic dual forms — single words containing both count and unit ('خطوتين' = 'two steps'). Substituted as a single unit BEFORE the verb/dir/unit pass. The English target form must be readable as 'N units' so the regex layer can pick up the count.",
"2 steps": ["خطوتين"],
"2 meters": ["مترين", "أمتارين"],
"2 degrees": ["درجتين"],
"2 seconds": ["ثانيتين"]
},
"arabic_conjunctions": {
"_description": "Arabic conjunctions translated to space-padded English glue. Subbed early so subsequent verb/dir passes don't accidentally treat the conjunction as a word.",
" then ": ["ثم", "وبعدين", "بعدين", "وبعد"]
},
"arabic_connectives": {
"_description": "Arabic prepositions / determiners / pronouns. Mostly pass-through; 'إلى' / 'نحو' map to 'to' so walk-to-target patterns work. 'ع' / 'على' get folded into nothing meaningful but are translated to 'on' to avoid leaving raw Arabic mid-string.",
"to": ["إلى", "نحو", "تجاه", "باتجاه"],
"on": ["على", "ع"],
"in": ["في"],
"this": ["هذا", "هذه"],
"that": ["ذلك", "تلك"]
},
"motion_inverses": {
"_description": "Pairwise inverse map for reverse_last memory operation. Used by Voice/marcus_voice.py::_reverse_command. Parametric forms (e.g. 'turn left 90 degrees' ↔ 'turn right 90 degrees') are derived in code via regex, NOT listed here — only fixed pairs.",
"turn right": "turn left",
"turn left": "turn right",
"move forward": "move backward",
"move backward": "move forward",
"sit down": "stand up",
"stand up": "sit down",
"raise arm": "lower arm",
"lower arm": "raise arm"
},
"sequence_never_record": {
"_description": "Canonical commands that must NEVER be captured into a recording session. Control commands (start/save/cancel/play recording, pause/resume, stop, repeat/reverse) would create absurd macros if captured. Used by Voice/sequences.py::record_command.",
"canonicals": [
"start recording",
"save sequence",
"cancel recording",
"play sequence",
"list sequences",
"delete sequence",
"pause motion",
"resume motion",
"stop",
"repeat last",
"reverse last"
]
}
}