""" number_words.py — convert spelled-out numbers to digits. Used by Voice/marcus_voice.py to preprocess Gemini's spoken text before the dispatcher scans it for parametric motion phrases. Reason: some Gemini Live voices occasionally speak 'ninety degrees' instead of '90 degrees' even when the persona prompt asks for digits, and the parametric regexes only recognise digits. Scope (intentional): - English compound numbers up to 999 ('three hundred sixty', 'one hundred and eighty', 'forty-five'). - Arabic ones/tens/hundreds with و-conjunction ('تسعون', 'تسعين', 'مائة وثمانون', 'ثلاثمائة وستون', 'خمس', 'خمسة'). - Word-boundary-aware so it does NOT eat the substring 'one' inside 'someone' or 'thirty' inside 'thirtysomething'. Out-of-scope: - Decimals ('two point five'). - Numbers above 999 (motion vocab maxes around 360 degrees / 30 meters / 100 steps; 999 is plenty of headroom). - Ordinals ('first', 'الأول') — those don't appear in motion text. API: >>> normalise_numbers("Walking forward five steps.") 'Walking forward 5 steps.' >>> normalise_numbers("أستدير تسعين درجة") 'أستدير 90 درجة' >>> normalise_numbers("turn three hundred sixty degrees") 'turn 360 degrees' """ from __future__ import annotations import re # ─── English ───────────────────────────────────────────────────── # # Number-word tables loaded from Config/language_tables.json. Adding # a new word (e.g. 'twenty-five' as a single hyphenated token, or a # new dialectal form) is a JSON edit, not a Python change. from Voice._language_tables import ( english_numbers_ones, english_numbers_tens, english_numbers_scale, english_numbers_glue, ) _EN_ONES = english_numbers_ones() _EN_TENS = english_numbers_tens() _EN_SCALE = english_numbers_scale() _EN_GLUE = english_numbers_glue() _EN_NUMBER_TOKEN = ( set(_EN_ONES.keys()) | set(_EN_TENS.keys()) | set(_EN_SCALE.keys()) | _EN_GLUE ) def _en_words_to_int(tokens: list) -> int: """Convert a list of English number-word tokens to an integer. Tokens are lowercase and stripped. Glue ('and', '-') is ignored. Returns the parsed int. Caller guarantees tokens are all in _EN_NUMBER_TOKEN.""" total = 0 current = 0 for t in tokens: if t in _EN_GLUE: continue if t in _EN_ONES: current += _EN_ONES[t] elif t in _EN_TENS: current += _EN_TENS[t] elif t in _EN_SCALE: current = max(current, 1) * _EN_SCALE[t] total += current current = 0 return total + current # Match a maximal run of English number-word tokens. Word-boundary on # each side so we don't eat 'one' inside 'someone' or 'ten' inside # 'often'. Allow hyphens between (twenty-five) and 'and' between scales. _EN_RUN = re.compile( r"(?:(?<=^)|(?<=[\s\.,!?;:\"\'\(\)\[\]\{\}\-]))" r"((?:" + "|".join(re.escape(w) for w in sorted(_EN_NUMBER_TOKEN, key=len, reverse=True)) + r")(?:[\s\-]+(?:" + "|".join(re.escape(w) for w in sorted(_EN_NUMBER_TOKEN, key=len, reverse=True)) + r"))*)" r"(?=$|[\s\.,!?;:\"\'\(\)\[\]\{\}\-])", re.IGNORECASE, ) def _normalise_english(text: str) -> str: def _sub(match): run = match.group(1) # Tokenise the run, splitting on whitespace and hyphens. toks = [t for t in re.split(r"[\s\-]+", run.lower()) if t] # If the run is JUST a single glue word ('and'), don't replace. digit_toks = [t for t in toks if t not in _EN_GLUE] if not digit_toks: return run # If the run is exactly one ones-word and the surrounding context # likely doesn't refer to a count (e.g. 'one' in 'one of the # things'), still replace — it's safer to over-digitize for the # parametric scan than under-digitize. The dispatcher only fires # when the digit appears in a parametric pattern context anyway. try: value = _en_words_to_int(digit_toks) except Exception: return run return str(value) return _EN_RUN.sub(_sub, text) # ─── Arabic ────────────────────────────────────────────────────── # # Arabic spelled-out numbers. The list lives in # Config/language_tables.json under arabic_numbers.literals. Adding # new dialectal variants (Egyptian / Maghrebi specific forms) is a # JSON edit, not a Python change. Order in the JSON is preserved here # (longest-first so multi-token phrases match before their prefixes). from Voice._language_tables import arabic_number_literals _AR_LITERALS = arabic_number_literals() def _normalise_arabic(text: str) -> str: # Apply longest first so multi-word values claim before singles. sorted_lits = sorted(_AR_LITERALS, key=lambda p: len(p[0]), reverse=True) for word, value in sorted_lits: # Substring replacement is safe for Arabic here because our # literals are full Arabic tokens with characters outside # English wordspace; no risk of eating into other words. Still # use word boundaries (whitespace OR start/end OR punctuation) # to avoid eating 'خمس' inside 'خمسة'. pattern = ( r"(? str: """Convert spelled-out numbers to digits in `text`. Idempotent — running it twice produces the same result. Returns the original text unchanged if it contains no recognisable number words. Order: integer-words pass FIRST (so 'three and a half' becomes '3 and a half' before fraction handling), then fractions ('3 and a half' → '3.5'). Without that order the fraction pass wouldn't see digits to attach to.""" if not text: return text out = _normalise_english(text) out = _normalise_arabic(out) out = _apply_fractions(out) return out # ─── Fraction parser (English + Arabic) ────────────────────────── # # Tables loaded from Config/language_tables.json. Two flavours: # - additive : combines with a preceding digit ('3 and a half' # / '3 ونصف' → 3.5). Includes a special-case # handler for Arabic 'N ونصف' where the # fraction trails after the unit noun. # - leading : standalone before a unit ('half a meter' / 'نصف # متر' → 0.5 meter / 0.5 متر). from Voice._language_tables import ( english_fractions_additive, english_fractions_leading, arabic_fractions_additive, arabic_fractions_leading, arabic_unit_words, ) _EN_FRAC_ADD = english_fractions_additive() _EN_FRAC_LEADING = english_fractions_leading() _AR_FRAC_ADD = arabic_fractions_additive() _AR_FRAC_LEADING = arabic_fractions_leading() _AR_UNITS = arabic_unit_words() def _fmt_decimal(v: float) -> str: """Format a float without trailing zeros: 3.0 → '3', 3.5 → '3.5', 1.79 → '1.79'.""" if v == int(v): return str(int(v)) s = "{:.4f}".format(v).rstrip("0").rstrip(".") return s def _apply_fractions(text: str) -> str: """Convert fractional expressions to decimal digits. Patterns handled (all idempotent — leaves digit-only text alone): EN '3 and a half steps' / '3 and half' → '3.5 steps' EN 'half a meter' / 'half meter' → '0.5 meter' EN 'a quarter step' → '0.25 step' AR '3 ونصف خطوات' → '3.5 خطوات' AR '3 خطوات ونصف' (trailing fraction) → '3.5 خطوات' AR 'نصف متر' → '0.5 متر' AR 'متر ونصف' (no preceding count) → '0.5 متر' (interpreted as 1.5 if a leading 1 is implicit; we treat as 0.5 — explicit form '1 ونصف متر' is preferred). """ if not text: return text # ── ENGLISH additive: ' and (a/an)? ' ───────────────── en_add_alt = "|".join(re.escape(k) for k in sorted(_EN_FRAC_ADD.keys(), key=len, reverse=True)) if en_add_alt: def _en_add(m): n = float(m.group(1)) frac_word = m.group(2).lower() return _fmt_decimal(n + _EN_FRAC_ADD[frac_word]) text = re.sub( r"\b(\d+(?:\.\d+)?)\s+and\s+(?:an?\s+)?(" + en_add_alt + r")\b", _en_add, text, flags=re.IGNORECASE, ) # ── ENGLISH leading: '(a/an)? (a/an)? ' ─────────── en_lead_alt = "|".join(re.escape(k) for k in sorted(_EN_FRAC_LEADING.keys(), key=len, reverse=True)) if en_lead_alt: def _en_lead(m): frac_word = m.group(1).lower() return _fmt_decimal(_EN_FRAC_LEADING[frac_word]) + " " text = re.sub( r"\b(?:an?\s+)?(" + en_lead_alt + r")\s+(?:an?\s+)?(?=[A-Za-z])", _en_lead, text, flags=re.IGNORECASE, ) # ── ARABIC trailing-fraction with unit: 'N ونصف' ───────── if _AR_FRAC_ADD and _AR_UNITS: unit_alt = "|".join(re.escape(u) for u in sorted(_AR_UNITS, key=len, reverse=True)) ar_add_alt = "|".join(re.escape(k) for k in sorted(_AR_FRAC_ADD.keys(), key=len, reverse=True)) def _ar_trail(m): n = float(m.group(1)) unit = m.group(2) frac_word = m.group(3) return "{} {}".format(_fmt_decimal(n + _AR_FRAC_ADD[frac_word]), unit) text = re.sub( r"(\d+(?:\.\d+)?)\s+(" + unit_alt + r")\s+و(" + ar_add_alt + r")\b", _ar_trail, text, ) # ── ARABIC additive: 'N ونصف' / 'N و نصف' ────────────────────── if _AR_FRAC_ADD: ar_add_alt = "|".join(re.escape(k) for k in sorted(_AR_FRAC_ADD.keys(), key=len, reverse=True)) def _ar_add(m): n = float(m.group(1)) frac_word = m.group(2) return _fmt_decimal(n + _AR_FRAC_ADD[frac_word]) text = re.sub( r"(\d+(?:\.\d+)?)\s*و\s*(" + ar_add_alt + r")\b", _ar_add, text, ) # ── ARABIC leading: ' ' ──────────────────────────── if _AR_FRAC_LEADING: ar_lead_alt = "|".join(re.escape(k) for k in sorted(_AR_FRAC_LEADING.keys(), key=len, reverse=True)) def _ar_lead(m): frac_word = m.group(1) return _fmt_decimal(_AR_FRAC_LEADING[frac_word]) + " " # Boundary: previous char is non-Arabic-letter, next char is # an Arabic letter (so 'نصف' inside another word is preserved). text = re.sub( r"(? {got!r}") print(f"\n{ok}/{ok+bad} passed")