Marcus/Config/language_tables.json

{
  "_description": "Language tables for voice motion processing. Single source of truth for ALL vocabulary data — no Arabic/English motion words live in code anymore. Adding a new dialect / variant is a JSON-only edit.",
  "_consumers": [
    "Voice/number_words.py — spelled-out numbers → digits (EN + AR)",
    "Voice/canonical_normalizer.py — Arabic structural translation → English",
    "Voice/marcus_voice.py — reverse-command map for memory operations",
    "Voice/sequences.py — control commands that must NOT be captured into recordings"
  ],

  "english_fractions": {
    "_description": "English fractional words. Two semantic flavours: 'additive' (combines with a preceding integer — '3 and a half steps' → 3.5) and 'leading' (standalone — 'half a meter' → 0.5). Both forms map to the same numeric value; flavours are kept separate so the parser knows the syntactic context to apply.",
    "additive": {
      "half":     0.5,
      "halves":   0.5,
      "quarter":  0.25,
      "quarters": 0.25,
      "third":    0.3333,
      "thirds":   0.3333
    },
    "leading": {
      "half":     0.5,
      "quarter":  0.25,
      "third":    0.3333
    }
  },

  "arabic_fractions": {
    "_description": "Arabic fractional words. Same additive vs leading split as English. 'Additive' applies after a digit + ون / و conjunction ('3 ونصف' → 3.5). 'Leading' applies before a unit ('نصف متر' → 0.5 meter). Multiple dialect spellings of half (نصف / نص).",
    "additive": {
      "نصف":  0.5,
      "نص":   0.5,
      "ربع":  0.25,
      "ثلث":  0.3333
    },
    "leading": {
      "نصف":  0.5,
      "نص":   0.5,
      "ربع":  0.25,
      "ثلث":  0.3333
    }
  },

  "english_numbers": {
    "_description": "English number-word parser tables. Used by number_words.py to convert 'ninety degrees' → '90 degrees' before regex matching.",
    "ones": {
      "zero": 0, "one": 1, "two": 2, "three": 3, "four": 4,
      "five": 5, "six": 6, "seven": 7, "eight": 8, "nine": 9,
      "ten": 10, "eleven": 11, "twelve": 12, "thirteen": 13,
      "fourteen": 14, "fifteen": 15, "sixteen": 16,
      "seventeen": 17, "eighteen": 18, "nineteen": 19
    },
    "tens": {
      "twenty": 20, "thirty": 30, "forty": 40, "fifty": 50,
      "sixty": 60, "seventy": 70, "eighty": 80, "ninety": 90
    },
    "scale": {
      "hundred": 100
    },
    "glue": ["and", "-"]
  },

  "arabic_numbers": {
    "_description": "Arabic spelled-out numbers. Order matters — longer multi-word phrases come first so they're matched before their shorter prefixes.",
    "literals": [
      ["ثلاثمائة وستون", 360],
      ["ثلاثمائة وستين", 360],
      ["ثلاث مائة وستون", 360],
      ["ثلاث مائة وستين", 360],
      ["مائة وثمانون", 180],
      ["مائة وثمانين", 180],
      ["مية وثمانين", 180],
      ["مائتان وسبعون", 270],
      ["مائتان وسبعين", 270],
      ["عشرون", 20], ["عشرين", 20],
      ["ثلاثون", 30], ["ثلاثين", 30],
      ["أربعون", 40], ["أربعين", 40], ["اربعون", 40], ["اربعين", 40],
      ["خمسون", 50], ["خمسين", 50],
      ["ستون", 60], ["ستين", 60],
      ["سبعون", 70], ["سبعين", 70],
      ["ثمانون", 80], ["ثمانين", 80],
      ["تسعون", 90], ["تسعين", 90],
      ["مائتان", 200], ["مائتين", 200], ["ميتين", 200],
      ["ثلاثمائة", 300], ["ثلاث مائة", 300],
      ["أربعمائة", 400], ["اربعمائة", 400], ["أربع مائة", 400],
      ["خمسمائة", 500], ["خمس مائة", 500],
      ["مائة", 100], ["مية", 100], ["ميه", 100],
      ["أحد عشر", 11], ["احد عشر", 11],
      ["اثنا عشر", 12], ["اثني عشر", 12],
      ["ثلاثة عشر", 13], ["ثلاث عشرة", 13],
      ["أربعة عشر", 14], ["اربعة عشر", 14],
      ["خمسة عشر", 15], ["خمس عشرة", 15],
      ["ستة عشر", 16], ["ست عشرة", 16],
      ["سبعة عشر", 17],
      ["ثمانية عشر", 18],
      ["تسعة عشر", 19],
      ["واحد", 1], ["واحدة", 1],
      ["اثنان", 2], ["اثنين", 2], ["اثنتان", 2], ["اثنتين", 2],
      ["ثلاثة", 3], ["ثلاث", 3],
      ["أربعة", 4], ["أربع", 4], ["اربعة", 4], ["اربع", 4],
      ["خمسة", 5], ["خمس", 5],
      ["ستة", 6], ["ست", 6],
      ["سبعة", 7], ["سبع", 7],
      ["ثمانية", 8], ["ثمان", 8],
      ["تسعة", 9], ["تسع", 9],
      ["عشرة", 10], ["عشر", 10]
    ]
  },

  "arabic_verbs": {
    "_description": "Arabic verb roots → English gerund. Gerund matches Gemini's English bot_phrases (e.g. 'walking forward'). Multiple Arabic dialect roots can map to the same English verb.",
    "walking": [
      "أمشي", "امشي", "امش",
      "أتحرك", "تحرك",
      "اروح", "روح", "اروحي", "روحي",
      "بمشي", "براح", "أراح",
      "بفوت", "أفوت",
      "سير", "اسير",
      "أتجه", "اتجه", "يتجه", "نتجه",
      "أتوجه", "اتوجه", "يتوجه", "توجه",
      "متوجه", "متجه"
    ],
    "turning": [
      "أستدير", "استدر", "استدير",
      "لفّ", "لف", "ألف", "بلف",
      "دوّر", "دور", "أدور", "بدور",
      "خش", "أخش", "بخش",
      "حوّد", "أحوّد"
    ],
    "walking backward": [
      "أرجع", "ارجع", "برجع",
      "أتراجع", "بتراجع", "تراجع"
    ],
    "walking forward": [
      "أتقدم", "بتقدم", "أتقدّم", "تقدم"
    ]
  },

  "arabic_directions": {
    "_description": "Arabic direction words → English directions. Includes definite-article variants (اليمين), preposition-prefixed forms (لليمين), and dialect alternatives (شمال = left in Levantine/Egyptian).",
    "right": [
      "يميناً", "يمينا",
      "اليمين", "لليمين",
      "يمين"
    ],
    "left": [
      "يساراً", "يسارا",
      "اليسار", "لليسار",
      "يسار",
      "الشمال", "للشمال", "شمال"
    ],
    "forward": [
      "للأمام", "الأمام", "أمام",
      "لقدام", "للقدام", "قدام"
    ],
    "backward": [
      "للخلف", "الخلف",
      "للوراء", "الوراء",
      "لورا", "ورا", "للورا"
    ]
  },

  "arabic_units": {
    "_description": "Arabic unit nouns → English unit. Singular-vs-plural form is preserved so the dispatcher's regex layer (which accepts both) sees a natural form. Dual forms (خطوتين / مترين / درجتين) live in arabic_duals.",
    "step": ["خطوة"],
    "steps": ["خطوات"],
    "meter": ["متر"],
    "meters": ["أمتار"],
    "degree": ["درجة"],
    "degrees": ["درجات"]
  },

  "arabic_duals": {
    "_description": "Arabic dual forms — single words containing both count and unit ('خطوتين' = 'two steps'). Substituted as a single unit BEFORE the verb/dir/unit pass. The English target form must be readable as 'N units' so the regex layer can pick up the count.",
    "2 steps": ["خطوتين"],
    "2 meters": ["مترين", "أمتارين"],
    "2 degrees": ["درجتين"],
    "2 seconds": ["ثانيتين"]
  },

  "arabic_conjunctions": {
    "_description": "Arabic conjunctions translated to space-padded English glue. Subbed early so subsequent verb/dir passes don't accidentally treat the conjunction as a word.",
    " then ": ["ثم", "وبعدين", "بعدين", "وبعد"]
  },

  "arabic_connectives": {
    "_description": "Arabic prepositions / determiners / pronouns. Mostly pass-through; 'إلى' / 'نحو' map to 'to' so walk-to-target patterns work. 'ع' / 'على' get folded into nothing meaningful but are translated to 'on' to avoid leaving raw Arabic mid-string.",
    "to": ["إلى", "نحو", "تجاه", "باتجاه"],
    "on": ["على", "ع"],
    "in": ["في"],
    "this": ["هذا", "هذه"],
    "that": ["ذلك", "تلك"]
  },

  "motion_inverses": {
    "_description": "Pairwise inverse map for reverse_last memory operation. Used by Voice/marcus_voice.py::_reverse_command. Parametric forms (e.g. 'turn left 90 degrees' ↔ 'turn right 90 degrees') are derived in code via regex, NOT listed here — only fixed pairs.",
    "turn right":  "turn left",
    "turn left":   "turn right",
    "move forward":  "move backward",
    "move backward": "move forward",
    "sit down":  "stand up",
    "stand up":  "sit down",
    "raise arm": "lower arm",
    "lower arm": "raise arm"
  },

  "sequence_never_record": {
    "_description": "Canonical commands that must NEVER be captured into a recording session. Control commands (start/save/cancel/play recording, pause/resume, stop, repeat/reverse) would create absurd macros if captured. Used by Voice/sequences.py::record_command.",
    "canonicals": [
      "start recording",
      "save sequence",
      "cancel recording",
      "play sequence",
      "list sequences",
      "delete sequence",
      "pause motion",
      "resume motion",
      "stop",
      "repeat last",
      "reverse last"
    ]
  }
}