AI_Photographer/Gemini/sanad_text_utils.py

import ast
import os
import re
import time
import asyncio

from Core import settings as config

# ==================================================
# 🔤 Arabic & English normalization
# ==================================================

def _norm_ar(s: str) -> str:
    s = (s or "").strip().lower()
    if not s:
        return ""

    # Arabic punctuation: ؟ ، ؛
    s = re.sub(r"[\u061F\u060C\u061B]", " ", s)

    # ✅ Keep Arabic, English letters, word chars, and spaces
    s = re.sub(r"[^\w\s\u0600-\u06FFa-zA-Z]", " ", s)

    # collapse spaces
    s = re.sub(r"\s+", " ", s).strip()

    # normalize hamza/alif variants
    s = s.replace("أ", "ا").replace("إ", "ا").replace("آ", "ا")

    # ta marbuta -> ha, alif maqsoora -> ya
    s = s.replace("ة", "ه").replace("ى", "ي")

    # tatweel
    s = s.replace("ـ", "")

    # common nickname normalization
    s = s.replace("ابو", "بو")

    return s.strip()


# ==================================================
# 📂 Load wake phrases from a python file (sanad_arm.txt)
# Modified to search the local Scripts folder if not found next to module
# ==================================================

def load_arm_phrases(filename: str = "sanad_arm.txt", *, var_name: str = "WAKE_PHRASES") -> set[str]:
    base_dir = os.path.dirname(os.path.abspath(__file__))

    # Candidate locations (module dir, module dir / Scripts, parent / Scripts)
    candidates = [
        os.path.join(base_dir, filename),
        os.path.join(base_dir, "Scripts", filename),
        os.path.join(base_dir, "..", "Scripts", filename),
    ]

    arm_path = None
    for c in candidates:
        if os.path.exists(c):
            arm_path = c
            break

    if arm_path is None:
        raise FileNotFoundError(f"Arm phrases file not found in candidates: {candidates}")

    print(f"📂 Loading arm phrases from: {arm_path} (var={var_name})")

    with open(arm_path, "r", encoding="utf-8-sig") as f:
        raw = f.read()

    # Try to parse as python file (legacy format with WAKE_PHRASES variables).
    # Uses ast.parse + ast.literal_eval instead of exec() for safety.
    try:
        tree = ast.parse(raw)
        for node in ast.walk(tree):
            if isinstance(node, ast.Assign):
                for target in node.targets:
                    if isinstance(target, ast.Name) and target.id == var_name:
                        phrases = ast.literal_eval(node.value)
                        if isinstance(phrases, (set, list, tuple)):
                            out = {_norm_ar(str(p)) for p in phrases if str(p).strip()}
                            out = {p for p in out if p}
                            print(f"✅ Loaded {len(out)} wake phrases from file variable: {var_name}")
                            return out
    except Exception:
        # fall through to plain text parsing
        pass

    # Plain-text newline-separated phrases fallback
    lines = [ln.strip() for ln in raw.splitlines()]
    cand = [ln for ln in lines if ln and not ln.startswith("#")]
    out = {_norm_ar(ln) for ln in cand}
    out = {p for p in out if p}
    print(f"✅ Loaded {len(out)} wake phrases from plain text file: {arm_path}")
    return out


# ==================================================
# 🧠 Wake phrase matching + scheduling
# ==================================================

def _is_valid_text(s: str) -> bool:
    # Allow Arabic OR English letters
    has_ar = bool(re.search(r"[\u0600-\u06FF]", s or ""))
    has_en = bool(re.search(r"[a-zA-Z]", s or ""))
    return has_ar or has_en


def _strip_ya_prefix(s: str) -> str:
    s = (s or "").strip()
    if not s:
        return ""
    if s.startswith("يا "):
        return s[3:].strip()
    if s.startswith("يا"):
        return s[2:].strip()
    return s


def _remove_al_prefix_words(text: str) -> str:
    if not text:
        return ""
    parts = text.split()
    out = []
    for w in parts:
        if w.startswith("ال") and len(w) > 2:
            out.append(w[2:])
        else:
            out.append(w)
    return " ".join(out).strip()


def _maybe_trigger_arm(
    obj,
    transcript_text: str,
    wake_phrases: set[str],
    *,
    fire_on_wake_match: bool = False,
    arm_trigger_fn=None,
) -> bool:
    if not transcript_text or not wake_phrases:
        return False

    # initialization of state variables (copied from original logic)
    if not hasattr(obj, "_asr_buf"):
        obj._asr_buf = ""
    if not hasattr(obj, "_asr_last_time"):
        obj._asr_last_time = 0.0
    if not hasattr(obj, "ASR_WINDOW_SEC"):
        obj.ASR_WINDOW_SEC = 2.0
    if not hasattr(obj, "ASR_SHORT_TOKEN_BONUS_SEC"):
        obj.ASR_SHORT_TOKEN_BONUS_SEC = 1.0
    if not hasattr(obj, "ASR_JOIN_NO_SPACE_MAXLEN"):
        obj.ASR_JOIN_NO_SPACE_MAXLEN = 2
    if not hasattr(obj, "ASR_MAX_CHARS"):
        obj.ASR_MAX_CHARS = 120

    if not hasattr(obj, "_last_trigger_norm"):
        obj._last_trigger_norm = ""
    if not hasattr(obj, "_last_trigger_time"):
        obj._last_trigger_time = 0.0
    if not hasattr(obj, "TRIGGER_DEDUP_WINDOW"):
        obj.TRIGGER_DEDUP_WINDOW = 2.0

    if not hasattr(obj, "_pending_arm_wave"):
        obj._pending_arm_wave = False
    if not hasattr(obj, "_pending_arm_wave_fired"):
        obj._pending_arm_wave_fired = False
    if not hasattr(obj, "_pending_arm_wave_set_time"):
        obj._pending_arm_wave_set_time = 0.0
    if not hasattr(obj, "PENDING_ARM_TTL"):
        obj.PENDING_ARM_TTL = 6.0

    if not hasattr(obj, "_pending_arm_trigger_fn"):
        obj._pending_arm_trigger_fn = None
    if not hasattr(obj, "_pending_arm_fallback_time"):
        obj._pending_arm_fallback_time = 0.0

    if not hasattr(obj, "_last_piece_call_norm"):
        obj._last_piece_call_norm = ""
    if not hasattr(obj, "_last_piece_call_time"):
        obj._last_piece_call_time = 0.0

    if not hasattr(obj, "_asr_stream"):
        obj._asr_stream = ""
    if not hasattr(obj, "ASR_STREAM_MAX_CHARS"):
        obj.ASR_STREAM_MAX_CHARS = 80

    dup_call_window = float(getattr(obj, "DUP_CALL_WINDOW_SEC", 0.25))
    dup_asr_repeat_window = float(getattr(obj, "DUP_ASR_REPEAT_WINDOW_SEC", 0.9))
    pending_fallback_sec = float(getattr(obj, "PENDING_ARM_FALLBACK_SEC", 0.65))

    piece_raw = (transcript_text or "").strip()
    if not piece_raw:
        return False

    piece_norm = _norm_ar(piece_raw)
    if not piece_norm:
        return False

    now = time.time()

    if not _is_valid_text(piece_norm):
        return False

    duplicate_call = (
        (piece_norm == obj._last_piece_call_norm)
        and ((now - obj._last_piece_call_time) < dup_call_window)
    )

    repeated_asr = (
        (piece_norm == obj._last_piece_call_norm)
        and ((now - obj._last_piece_call_time) < dup_asr_repeat_window)
    )

    obj._last_piece_call_norm = piece_norm
    obj._last_piece_call_time = now

    if not duplicate_call and not repeated_asr:
        print(f"📝 USER SAID (raw): {piece_raw}")
        print(f"📝 USER SAID (norm): {piece_norm}")

    if not duplicate_call and not repeated_asr:
        if obj._asr_last_time:
            gap = now - obj._asr_last_time
            window = obj.ASR_WINDOW_SEC
            if len(piece_norm) <= obj.ASR_JOIN_NO_SPACE_MAXLEN:
                window += obj.ASR_SHORT_TOKEN_BONUS_SEC
            if gap > window:
                obj._asr_buf = ""
                obj._asr_stream = ""

        obj._asr_last_time = now

        if obj._asr_buf:
            if len(piece_norm) <= obj.ASR_JOIN_NO_SPACE_MAXLEN:
                obj._asr_buf = (obj._asr_buf + piece_norm).strip()
            else:
                obj._asr_buf = (obj._asr_buf + " " + piece_norm).strip()
        else:
            obj._asr_buf = piece_norm

        compact = piece_norm.replace(" ", "")
        obj._asr_stream = (obj._asr_stream + compact)[-obj.ASR_STREAM_MAX_CHARS :]

        if len(obj._asr_buf) > obj.ASR_MAX_CHARS:
            obj._asr_buf = obj._asr_buf[-obj.ASR_MAX_CHARS :]

    buf_norm = _norm_ar(obj._asr_buf)
    buf_nospace = buf_norm.replace(" ", "")
    buf_noal = _remove_al_prefix_words(buf_norm)
    buf_noal_nospace = buf_noal.replace(" ", "")
    stream = _norm_ar(obj._asr_stream).replace(" ", "")
    stream_noal = _remove_al_prefix_words(stream)

    if not duplicate_call and not repeated_asr:
        print(f"🧩 ASR BUFFER: {buf_norm}")

    if (
        buf_norm == obj._last_trigger_norm
        and (now - obj._last_trigger_time) < obj.TRIGGER_DEDUP_WINDOW
    ):
        return False

    for phrase in wake_phrases:
        p_norm = _strip_ya_prefix(_norm_ar(str(phrase)))
        if not p_norm:
            continue

        p_nospace = p_norm.replace(" ", "")
        p_noal = _remove_al_prefix_words(p_norm)
        p_noal_nospace = p_noal.replace(" ", "")

        pattern = r'\b' + re.escape(p_norm) + r'\b'
        hit_buf = bool(re.search(pattern, buf_norm)) or (p_nospace and p_nospace == buf_nospace)
        hit_buf = hit_buf or (p_noal and (p_noal in buf_noal or (p_noal_nospace and p_noal_nospace in buf_noal_nospace)))

        hit_stream = False
        if p_nospace and p_nospace in stream:
            hit_stream = True
        elif p_noal_nospace and p_noal_nospace in stream_noal:
            hit_stream = True

        if hit_buf or hit_stream:
            if hit_stream and not hit_buf:
                print(f"⚡ FAST MATCH: '{phrase}' (recent tokens)")
            else:
                print(f"✅ MATCH: '{phrase}'  in  BUFFER='{obj._asr_buf}'")

            obj._last_trigger_norm = buf_norm
            obj._last_trigger_time = now

            obj._asr_buf = ""
            obj._asr_last_time = 0.0
            obj._asr_stream = ""

            if fire_on_wake_match:
                if arm_trigger_fn:
                    asyncio.create_task(asyncio.to_thread(arm_trigger_fn))
                obj._pending_arm_wave = False
                obj._pending_arm_wave_fired = False
                obj._pending_arm_wave_set_time = 0.0
                obj._pending_arm_trigger_fn = None
                obj._pending_arm_fallback_time = 0.0
            else:
                obj._pending_arm_wave = True
                obj._pending_arm_wave_fired = False
                obj._pending_arm_wave_set_time = now
                obj._pending_arm_trigger_fn = arm_trigger_fn
                obj._pending_arm_fallback_time = now + pending_fallback_sec

            return True

    return False


# ==================================================
# 🔁 Phrase map loader (plain-text grouped format)
# Format: groups separated by blank lines. First line = canonical command,
# following lines = aliases.
_phrase_map_cache = {}

def load_phrase_map(filename: str = "photo_command_ai.txt") -> dict:
    base_dir = os.path.dirname(os.path.abspath(__file__))
    candidates = [
        str(config.PHOTO_PHRASES_FILE),
        os.path.join(base_dir, filename),
        os.path.join(base_dir, "Scripts", filename),
        os.path.join(base_dir, "Data", filename),
        os.path.join(base_dir, "..", "Data", "Scripts", filename),
        os.path.join(base_dir, "..", "Scripts", filename),
        os.path.join(base_dir, "..", "Data", filename),
    ]
    path = None
    for c in candidates:
        if os.path.exists(c):
            path = c
            break
    if path is None:
        return {}

    if path in _phrase_map_cache:
        return _phrase_map_cache[path]

    with open(path, 'r', encoding='utf-8-sig') as f:
        lines = [ln.rstrip() for ln in f.readlines()]

    groups = []
    cur = []
    for ln in lines:
        s = ln.strip()
        if not s:
            if cur:
                groups.append(cur)
                cur = []
            continue
        if s.startswith("#"):
            continue
        cur.append(s)
    if cur:
        groups.append(cur)

    mapping = {}
    for g in groups:
        cmd = g[0]
        aliases = g
        for a in aliases:
            mapping[_norm_ar(a)] = cmd

    _phrase_map_cache[path] = mapping
    print(f"✅ Loaded phrase mapping from {path}: {len(mapping)} aliases")
    return mapping