AI_Photographer/Gemini/sanad_text_utils.py
2026-04-12 18:52:37 +04:00

373 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import ast
import os
import re
import time
import asyncio
from Core import settings as config
# ==================================================
# 🔤 Arabic & English normalization
# ==================================================
def _norm_ar(s: str) -> str:
s = (s or "").strip().lower()
if not s:
return ""
# Arabic punctuation: ؟ ، ؛
s = re.sub(r"[\u061F\u060C\u061B]", " ", s)
# ✅ Keep Arabic, English letters, word chars, and spaces
s = re.sub(r"[^\w\s\u0600-\u06FFa-zA-Z]", " ", s)
# collapse spaces
s = re.sub(r"\s+", " ", s).strip()
# normalize hamza/alif variants
s = s.replace("أ", "ا").replace("إ", "ا").replace("آ", "ا")
# ta marbuta -> ha, alif maqsoora -> ya
s = s.replace("ة", "ه").replace("ى", "ي")
# tatweel
s = s.replace("ـ", "")
# common nickname normalization
s = s.replace("ابو", "بو")
return s.strip()
# ==================================================
# 📂 Load wake phrases from a python file (sanad_arm.txt)
# Modified to search the local Scripts folder if not found next to module
# ==================================================
def load_arm_phrases(filename: str = "sanad_arm.txt", *, var_name: str = "WAKE_PHRASES") -> set[str]:
base_dir = os.path.dirname(os.path.abspath(__file__))
# Candidate locations (module dir, module dir / Scripts, parent / Scripts)
candidates = [
os.path.join(base_dir, filename),
os.path.join(base_dir, "Scripts", filename),
os.path.join(base_dir, "..", "Scripts", filename),
]
arm_path = None
for c in candidates:
if os.path.exists(c):
arm_path = c
break
if arm_path is None:
raise FileNotFoundError(f"Arm phrases file not found in candidates: {candidates}")
print(f"📂 Loading arm phrases from: {arm_path} (var={var_name})")
with open(arm_path, "r", encoding="utf-8-sig") as f:
raw = f.read()
# Try to parse as python file (legacy format with WAKE_PHRASES variables).
# Uses ast.parse + ast.literal_eval instead of exec() for safety.
try:
tree = ast.parse(raw)
for node in ast.walk(tree):
if isinstance(node, ast.Assign):
for target in node.targets:
if isinstance(target, ast.Name) and target.id == var_name:
phrases = ast.literal_eval(node.value)
if isinstance(phrases, (set, list, tuple)):
out = {_norm_ar(str(p)) for p in phrases if str(p).strip()}
out = {p for p in out if p}
print(f"✅ Loaded {len(out)} wake phrases from file variable: {var_name}")
return out
except Exception:
# fall through to plain text parsing
pass
# Plain-text newline-separated phrases fallback
lines = [ln.strip() for ln in raw.splitlines()]
cand = [ln for ln in lines if ln and not ln.startswith("#")]
out = {_norm_ar(ln) for ln in cand}
out = {p for p in out if p}
print(f"✅ Loaded {len(out)} wake phrases from plain text file: {arm_path}")
return out
# ==================================================
# 🧠 Wake phrase matching + scheduling
# ==================================================
def _is_valid_text(s: str) -> bool:
# Allow Arabic OR English letters
has_ar = bool(re.search(r"[\u0600-\u06FF]", s or ""))
has_en = bool(re.search(r"[a-zA-Z]", s or ""))
return has_ar or has_en
def _strip_ya_prefix(s: str) -> str:
s = (s or "").strip()
if not s:
return ""
if s.startswith("يا "):
return s[3:].strip()
if s.startswith("يا"):
return s[2:].strip()
return s
def _remove_al_prefix_words(text: str) -> str:
if not text:
return ""
parts = text.split()
out = []
for w in parts:
if w.startswith("ال") and len(w) > 2:
out.append(w[2:])
else:
out.append(w)
return " ".join(out).strip()
def _maybe_trigger_arm(
obj,
transcript_text: str,
wake_phrases: set[str],
*,
fire_on_wake_match: bool = False,
arm_trigger_fn=None,
) -> bool:
if not transcript_text or not wake_phrases:
return False
# initialization of state variables (copied from original logic)
if not hasattr(obj, "_asr_buf"):
obj._asr_buf = ""
if not hasattr(obj, "_asr_last_time"):
obj._asr_last_time = 0.0
if not hasattr(obj, "ASR_WINDOW_SEC"):
obj.ASR_WINDOW_SEC = 2.0
if not hasattr(obj, "ASR_SHORT_TOKEN_BONUS_SEC"):
obj.ASR_SHORT_TOKEN_BONUS_SEC = 1.0
if not hasattr(obj, "ASR_JOIN_NO_SPACE_MAXLEN"):
obj.ASR_JOIN_NO_SPACE_MAXLEN = 2
if not hasattr(obj, "ASR_MAX_CHARS"):
obj.ASR_MAX_CHARS = 120
if not hasattr(obj, "_last_trigger_norm"):
obj._last_trigger_norm = ""
if not hasattr(obj, "_last_trigger_time"):
obj._last_trigger_time = 0.0
if not hasattr(obj, "TRIGGER_DEDUP_WINDOW"):
obj.TRIGGER_DEDUP_WINDOW = 2.0
if not hasattr(obj, "_pending_arm_wave"):
obj._pending_arm_wave = False
if not hasattr(obj, "_pending_arm_wave_fired"):
obj._pending_arm_wave_fired = False
if not hasattr(obj, "_pending_arm_wave_set_time"):
obj._pending_arm_wave_set_time = 0.0
if not hasattr(obj, "PENDING_ARM_TTL"):
obj.PENDING_ARM_TTL = 6.0
if not hasattr(obj, "_pending_arm_trigger_fn"):
obj._pending_arm_trigger_fn = None
if not hasattr(obj, "_pending_arm_fallback_time"):
obj._pending_arm_fallback_time = 0.0
if not hasattr(obj, "_last_piece_call_norm"):
obj._last_piece_call_norm = ""
if not hasattr(obj, "_last_piece_call_time"):
obj._last_piece_call_time = 0.0
if not hasattr(obj, "_asr_stream"):
obj._asr_stream = ""
if not hasattr(obj, "ASR_STREAM_MAX_CHARS"):
obj.ASR_STREAM_MAX_CHARS = 80
dup_call_window = float(getattr(obj, "DUP_CALL_WINDOW_SEC", 0.25))
dup_asr_repeat_window = float(getattr(obj, "DUP_ASR_REPEAT_WINDOW_SEC", 0.9))
pending_fallback_sec = float(getattr(obj, "PENDING_ARM_FALLBACK_SEC", 0.65))
piece_raw = (transcript_text or "").strip()
if not piece_raw:
return False
piece_norm = _norm_ar(piece_raw)
if not piece_norm:
return False
now = time.time()
if not _is_valid_text(piece_norm):
return False
duplicate_call = (
(piece_norm == obj._last_piece_call_norm)
and ((now - obj._last_piece_call_time) < dup_call_window)
)
repeated_asr = (
(piece_norm == obj._last_piece_call_norm)
and ((now - obj._last_piece_call_time) < dup_asr_repeat_window)
)
obj._last_piece_call_norm = piece_norm
obj._last_piece_call_time = now
if not duplicate_call and not repeated_asr:
print(f"📝 USER SAID (raw): {piece_raw}")
print(f"📝 USER SAID (norm): {piece_norm}")
if not duplicate_call and not repeated_asr:
if obj._asr_last_time:
gap = now - obj._asr_last_time
window = obj.ASR_WINDOW_SEC
if len(piece_norm) <= obj.ASR_JOIN_NO_SPACE_MAXLEN:
window += obj.ASR_SHORT_TOKEN_BONUS_SEC
if gap > window:
obj._asr_buf = ""
obj._asr_stream = ""
obj._asr_last_time = now
if obj._asr_buf:
if len(piece_norm) <= obj.ASR_JOIN_NO_SPACE_MAXLEN:
obj._asr_buf = (obj._asr_buf + piece_norm).strip()
else:
obj._asr_buf = (obj._asr_buf + " " + piece_norm).strip()
else:
obj._asr_buf = piece_norm
compact = piece_norm.replace(" ", "")
obj._asr_stream = (obj._asr_stream + compact)[-obj.ASR_STREAM_MAX_CHARS :]
if len(obj._asr_buf) > obj.ASR_MAX_CHARS:
obj._asr_buf = obj._asr_buf[-obj.ASR_MAX_CHARS :]
buf_norm = _norm_ar(obj._asr_buf)
buf_nospace = buf_norm.replace(" ", "")
buf_noal = _remove_al_prefix_words(buf_norm)
buf_noal_nospace = buf_noal.replace(" ", "")
stream = _norm_ar(obj._asr_stream).replace(" ", "")
stream_noal = _remove_al_prefix_words(stream)
if not duplicate_call and not repeated_asr:
print(f"🧩 ASR BUFFER: {buf_norm}")
if (
buf_norm == obj._last_trigger_norm
and (now - obj._last_trigger_time) < obj.TRIGGER_DEDUP_WINDOW
):
return False
for phrase in wake_phrases:
p_norm = _strip_ya_prefix(_norm_ar(str(phrase)))
if not p_norm:
continue
p_nospace = p_norm.replace(" ", "")
p_noal = _remove_al_prefix_words(p_norm)
p_noal_nospace = p_noal.replace(" ", "")
pattern = r'\b' + re.escape(p_norm) + r'\b'
hit_buf = bool(re.search(pattern, buf_norm)) or (p_nospace and p_nospace == buf_nospace)
hit_buf = hit_buf or (p_noal and (p_noal in buf_noal or (p_noal_nospace and p_noal_nospace in buf_noal_nospace)))
hit_stream = False
if p_nospace and p_nospace in stream:
hit_stream = True
elif p_noal_nospace and p_noal_nospace in stream_noal:
hit_stream = True
if hit_buf or hit_stream:
if hit_stream and not hit_buf:
print(f"⚡ FAST MATCH: '{phrase}' (recent tokens)")
else:
print(f"✅ MATCH: '{phrase}' in BUFFER='{obj._asr_buf}'")
obj._last_trigger_norm = buf_norm
obj._last_trigger_time = now
obj._asr_buf = ""
obj._asr_last_time = 0.0
obj._asr_stream = ""
if fire_on_wake_match:
if arm_trigger_fn:
asyncio.create_task(asyncio.to_thread(arm_trigger_fn))
obj._pending_arm_wave = False
obj._pending_arm_wave_fired = False
obj._pending_arm_wave_set_time = 0.0
obj._pending_arm_trigger_fn = None
obj._pending_arm_fallback_time = 0.0
else:
obj._pending_arm_wave = True
obj._pending_arm_wave_fired = False
obj._pending_arm_wave_set_time = now
obj._pending_arm_trigger_fn = arm_trigger_fn
obj._pending_arm_fallback_time = now + pending_fallback_sec
return True
return False
# ==================================================
# 🔁 Phrase map loader (plain-text grouped format)
# Format: groups separated by blank lines. First line = canonical command,
# following lines = aliases.
_phrase_map_cache = {}
def load_phrase_map(filename: str = "photo_command_ai.txt") -> dict:
base_dir = os.path.dirname(os.path.abspath(__file__))
candidates = [
str(config.PHOTO_PHRASES_FILE),
os.path.join(base_dir, filename),
os.path.join(base_dir, "Scripts", filename),
os.path.join(base_dir, "Data", filename),
os.path.join(base_dir, "..", "Data", "Scripts", filename),
os.path.join(base_dir, "..", "Scripts", filename),
os.path.join(base_dir, "..", "Data", filename),
]
path = None
for c in candidates:
if os.path.exists(c):
path = c
break
if path is None:
return {}
if path in _phrase_map_cache:
return _phrase_map_cache[path]
with open(path, 'r', encoding='utf-8-sig') as f:
lines = [ln.rstrip() for ln in f.readlines()]
groups = []
cur = []
for ln in lines:
s = ln.strip()
if not s:
if cur:
groups.append(cur)
cur = []
continue
if s.startswith("#"):
continue
cur.append(s)
if cur:
groups.append(cur)
mapping = {}
for g in groups:
cmd = g[0]
aliases = g
for a in aliases:
mapping[_norm_ar(a)] = cmd
_phrase_map_cache[path] = mapping
print(f"✅ Loaded phrase mapping from {path}: {len(mapping)} aliases")
return mapping