""" _language_tables.py — load Config/language_tables.json once and cache. All Voice/* modules that need vocabulary data import from here instead of hardcoding tables. The JSON file is the single source of truth for Arabic verbs / directions / units / numbers / English number words / motion inverses / sequence-never-record list. Loud failure on missing/malformed file (matches Marcus's 'no silent config degradation' policy from earlier rounds). Public API: LANG = load() ← dict with all top-level sections LANG["english_numbers"]["ones"] → {word: int} LANG["arabic_verbs"]["walking"] → list of Arabic root strings LANG["arabic_duals"]["2 steps"] → list of dual-form strings LANG["motion_inverses"] → flat dict of pairs LANG["sequence_never_record"]["canonicals"] → list of canonical names Convenience flatteners (build the inverse maps consumers usually want): flat_arabic_verbs() → {ar_root: en_gerund} flat_arabic_directions() → {ar_word: en_direction} flat_arabic_units() → {ar_unit: en_unit} flat_arabic_duals() → {ar_dual_word: 'N units'} flat_arabic_conjunctions()→ {ar_conj: ' english_glue '} flat_arabic_connectives() → {ar_word: en_word} """ from __future__ import annotations import json import os import sys from typing import Dict, List _PROJECT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) if _PROJECT_DIR not in sys.path: sys.path.insert(0, _PROJECT_DIR) _CONFIG_PATH = os.path.join(_PROJECT_DIR, "Config", "language_tables.json") _CACHE: dict = {} def load() -> dict: """Load and cache the language tables. Raises RuntimeError on missing/malformed config — voice processing is unusable without it, so loud failure surfaces the problem at startup rather than during a user demo.""" if _CACHE: return _CACHE if not os.path.isfile(_CONFIG_PATH): raise RuntimeError( "Config/language_tables.json missing at {} — voice motion " "vocabulary cannot load without it. Restore from git or " "rebuild from the documented schema.".format(_CONFIG_PATH) ) try: with open(_CONFIG_PATH, "r", encoding="utf-8") as f: data = json.load(f) or {} except Exception as e: raise RuntimeError( "Config/language_tables.json malformed: {}".format(e), ) from e # Quick sanity — make sure the top-level keys we care about exist required = ( "english_numbers", "arabic_numbers", "english_fractions", "arabic_fractions", "arabic_verbs", "arabic_directions", "arabic_units", "arabic_duals", "arabic_conjunctions", "arabic_connectives", "motion_inverses", "sequence_never_record", ) missing = [k for k in required if k not in data] if missing: raise RuntimeError( "Config/language_tables.json missing top-level sections: {}" .format(missing) ) _CACHE.update(data) return _CACHE def _invert_grouped(grouped: dict, drop_keys: tuple = ("_description",)) -> Dict[str, str]: """Helper: flip a {english_label: [arabic_word, ...]} dict into {arabic_word: english_label}. Skips keys named in drop_keys (used to skip the '_description' annotation in the JSON).""" out: Dict[str, str] = {} for en, ar_list in grouped.items(): if en in drop_keys: continue if not isinstance(ar_list, list): continue for ar in ar_list: if isinstance(ar, str) and ar.strip(): out[ar] = en return out def flat_arabic_verbs() -> Dict[str, str]: return _invert_grouped(load()["arabic_verbs"]) def flat_arabic_directions() -> Dict[str, str]: return _invert_grouped(load()["arabic_directions"]) def flat_arabic_units() -> Dict[str, str]: return _invert_grouped(load()["arabic_units"]) def flat_arabic_duals() -> Dict[str, str]: return _invert_grouped(load()["arabic_duals"]) def flat_arabic_conjunctions() -> Dict[str, str]: return _invert_grouped(load()["arabic_conjunctions"]) def flat_arabic_connectives() -> Dict[str, str]: return _invert_grouped(load()["arabic_connectives"]) def english_numbers_ones() -> Dict[str, int]: return {k: int(v) for k, v in load()["english_numbers"]["ones"].items() if k != "_description"} def english_numbers_tens() -> Dict[str, int]: return {k: int(v) for k, v in load()["english_numbers"]["tens"].items() if k != "_description"} def english_numbers_scale() -> Dict[str, int]: return {k: int(v) for k, v in load()["english_numbers"]["scale"].items() if k != "_description"} def english_numbers_glue() -> set: return set(load()["english_numbers"]["glue"]) def arabic_number_literals() -> List[tuple]: """List of (arabic_word, integer_value) pairs, in declaration order. Caller is expected to keep ordering (longest-first) when applying.""" out: List[tuple] = [] for entry in load()["arabic_numbers"].get("literals", []): if isinstance(entry, list) and len(entry) == 2: ar, val = entry if isinstance(ar, str) and isinstance(val, int): out.append((ar, val)) return out def motion_inverses() -> Dict[str, str]: return {k: v for k, v in load()["motion_inverses"].items() if k != "_description"} def sequence_never_record() -> set: return set(load()["sequence_never_record"].get("canonicals", [])) def english_fractions_additive() -> Dict[str, float]: """English fraction words that COMBINE with a preceding integer ('3 and a half' → 3 + 0.5).""" return {k: float(v) for k, v in load()["english_fractions"]["additive"].items() if k != "_description"} def english_fractions_leading() -> Dict[str, float]: """English fraction words that STAND ALONE before a unit ('half a meter' → 0.5 meter).""" return {k: float(v) for k, v in load()["english_fractions"]["leading"].items() if k != "_description"} def arabic_fractions_additive() -> Dict[str, float]: """Arabic fraction words that COMBINE with a preceding digit via و conjunction ('3 ونصف' → 3.5).""" return {k: float(v) for k, v in load()["arabic_fractions"]["additive"].items() if k != "_description"} def arabic_fractions_leading() -> Dict[str, float]: """Arabic fraction words STANDING ALONE before a unit ('نصف متر' → 0.5 meter).""" return {k: float(v) for k, v in load()["arabic_fractions"]["leading"].items() if k != "_description"} def arabic_unit_words() -> set: """Set of all Arabic unit words from arabic_units. Used by the fraction parser to detect 'N ونصف' patterns.""" out = set() for vals in load()["arabic_units"].values(): if isinstance(vals, list): for v in vals: if isinstance(v, str) and v.strip(): out.add(v) return out