1458 lines
69 KiB
Python
1458 lines
69 KiB
Python
import asyncio
|
|
import json
|
|
import os
|
|
import threading
|
|
import time
|
|
from pathlib import Path
|
|
from threading import Thread
|
|
|
|
import cv2
|
|
|
|
from Modes.AI.vision_detector import VisionDetector
|
|
from Core import settings as config
|
|
from Core.error_events import record_error
|
|
from Core.Logger import Logs
|
|
from Core import people_registry
|
|
from Server.capture_service import capture_with_replay_sync, replay_timing_profile
|
|
|
|
sanad_logger = Logs()
|
|
sanad_logger.LogEngine("G1_Logs", "autonomous_manager")
|
|
|
|
|
|
class AutonomousManager:
|
|
"""
|
|
Autonomous flow:
|
|
IDLE -> WAIT_CONFIRM -> FRAMING -> COUNTDOWN -> COMPLETE -> IDLE
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
zmq_host: str = "127.0.0.1",
|
|
zmq_port: int = 55555,
|
|
stability_frames: int = 3,
|
|
poll_hz: int = 8,
|
|
video_source: str | None = None,
|
|
):
|
|
self.detector = VisionDetector(
|
|
zmq_host=zmq_host,
|
|
zmq_port=zmq_port,
|
|
poll_hz=poll_hz,
|
|
video_source=video_source,
|
|
)
|
|
self.stability_frames = int(stability_frames)
|
|
self.cooldown_until = 0.0
|
|
self.session_id = 0
|
|
self._running = False
|
|
|
|
self._capture_done = False
|
|
self._capture_result = None
|
|
self._capture_lock = threading.Lock()
|
|
self._capture_cancel_event: threading.Event | None = None
|
|
|
|
self.interaction_active = False
|
|
self.interaction_flag = Path(config.SCRIPTS_DIR) / "interaction_triggered.flag"
|
|
self.request_photo_flag = Path(config.SCRIPTS_DIR) / "request_photo.flag"
|
|
self.confirm_yes_flag = Path(config.SCRIPTS_DIR) / "confirm_yes.flag"
|
|
self.confirm_no_flag = Path(config.SCRIPTS_DIR) / "confirm_no.flag"
|
|
self.state_file = Path(config.AUTONOMOUS_STATE_FILE)
|
|
|
|
self.confirm_timeout_sec = float(os.environ.get("CONFIRM_TIMEOUT_SEC", "15.0"))
|
|
self.confirm_reminder_sec = float(os.environ.get("CONFIRM_REMINDER_SEC", "5.0"))
|
|
self.confirm_guard_sec = float(os.environ.get("CONFIRM_GUARD_SEC", "1.2"))
|
|
self.session_cooldown_sec = float(os.environ.get("SESSION_COOLDOWN_SEC", "10.0"))
|
|
self.leave_timeout_sec = float(os.environ.get("VISITOR_LEAVE_TIMEOUT_SEC", "2.5"))
|
|
self.countdown_lose_subject_sec = float(os.environ.get("COUNTDOWN_LOSE_SUBJECT_SEC", "1.4"))
|
|
self.capture_finalize_grace_sec = float(os.environ.get("CAPTURE_FINALIZE_GRACE_SEC", "3.0"))
|
|
|
|
self.framing_timeout_sec = float(os.environ.get("FRAMING_TIMEOUT_SEC", "20.0"))
|
|
self.framing_feedback_interval_sec = float(os.environ.get("FRAMING_FEEDBACK_INTERVAL_SEC", "2.0"))
|
|
self.framing_good_frames_required = int(os.environ.get("FRAMING_GOOD_FRAMES_REQUIRED", "2"))
|
|
|
|
# Framing thresholds
|
|
self.center_tolerance = float(os.environ.get("FRAMING_CENTER_TOLERANCE", "0.18"))
|
|
self.subject_min_area_ratio = float(os.environ.get("FRAMING_MIN_AREA_RATIO", "0.06"))
|
|
self.subject_max_area_ratio = float(os.environ.get("FRAMING_MAX_AREA_RATIO", "0.55"))
|
|
self.min_blur_var = float(os.environ.get("FRAMING_MIN_BLUR_VAR", "80.0"))
|
|
self.min_exposure = float(os.environ.get("FRAMING_MIN_EXPOSURE", "55.0"))
|
|
self.max_exposure = float(os.environ.get("FRAMING_MAX_EXPOSURE", "200.0"))
|
|
self.headroom_min_ratio = float(config.read_vision_framing_headroom_min_ratio())
|
|
self.headroom_max_ratio = float(config.read_vision_framing_headroom_max_ratio())
|
|
self.eye_line_min_ratio = float(config.read_vision_framing_eye_line_min_ratio())
|
|
self.eye_line_max_ratio = float(config.read_vision_framing_eye_line_max_ratio())
|
|
self.retake_score_threshold = float(config.read_vision_framing_retake_score_threshold())
|
|
self.retake_prompt_enabled = bool(config.read_vision_retake_prompt_enabled())
|
|
self.retake_limit = int(config.read_vision_retake_max_per_session())
|
|
self.hard_target_lock_enabled = bool(config.read_vision_hard_target_lock_enabled())
|
|
self.autonomous_greeting_replay_enabled = bool(config.read_vision_autonomous_greeting_replay_enabled())
|
|
self.autonomous_greeting_replay_file = self._resolve_replay_path(
|
|
config.read_vision_autonomous_greeting_replay_file()
|
|
)
|
|
self.autonomous_capture_replay_enabled = bool(config.read_vision_autonomous_capture_replay_enabled())
|
|
self.retake_confirm_timeout_sec = float(os.environ.get("RETAKE_CONFIRM_TIMEOUT_SEC", "8.0"))
|
|
|
|
self.yolo_strict_required = bool(config.read_vision_yolo_strict_required())
|
|
self.gemini_context_hz = float(config.read_vision_gemini_context_hz())
|
|
self.gemini_context_silent = bool(config.read_vision_gemini_context_silent())
|
|
self._context_interval_sec = 1.0 / max(0.5, self.gemini_context_hz)
|
|
self._next_context_ts = 0.0
|
|
|
|
self.ai_blocked = False
|
|
self.ai_block_reason = ""
|
|
self._last_vision_log_signature = None
|
|
self._last_vision_log_ts = 0.0
|
|
self.face_recognition_enabled = bool(config.read_vision_face_recognition_enabled())
|
|
self.face_recognition_threshold = float(config.read_vision_face_recognition_threshold())
|
|
self.current_person: dict | None = None
|
|
|
|
async def _say(self, voice, text: str):
|
|
if voice is None:
|
|
return
|
|
try:
|
|
ok = await voice.send_text_prompt_live(text)
|
|
if not ok:
|
|
sanad_logger.print_and_log("Voice prompt skipped: Gemini WS not connected.", "warning")
|
|
except Exception as e:
|
|
sanad_logger.print_and_log(f"Voice prompt failed: {e}", "warning")
|
|
|
|
async def _say_prompt(
|
|
self,
|
|
voice,
|
|
prompt_key: str,
|
|
fallback_text: str,
|
|
*,
|
|
mode_override: str | None = None,
|
|
allow_gemini_fallback: bool | None = None,
|
|
):
|
|
if voice is None:
|
|
return
|
|
try:
|
|
if hasattr(voice, "play_prompt_key"):
|
|
ok = await voice.play_prompt_key(
|
|
prompt_key,
|
|
fallback_text=fallback_text,
|
|
allow_gemini_fallback=allow_gemini_fallback,
|
|
mode_override=mode_override,
|
|
)
|
|
else:
|
|
ok = await voice.send_text_prompt_live(fallback_text)
|
|
if not ok:
|
|
sanad_logger.print_and_log(f"Voice prompt skipped for {prompt_key}: output unavailable.", "warning")
|
|
except Exception as e:
|
|
sanad_logger.print_and_log(f"Voice prompt failed for {prompt_key}: {e}", "warning")
|
|
|
|
async def _say_capture_prompt(self, voice, prompt_key: str, fallback_text: str):
|
|
await self._say_prompt(
|
|
voice,
|
|
prompt_key,
|
|
fallback_text,
|
|
mode_override="audio",
|
|
allow_gemini_fallback=False,
|
|
)
|
|
|
|
def _maybe_log_vision_snapshot(self, snapshot: dict, now_ts: float):
|
|
try:
|
|
person_count = int(snapshot.get("person_count", 0))
|
|
face_count = int(snapshot.get("face_count", 0))
|
|
group_detected = bool(snapshot.get("group_detected", False))
|
|
group_size = int(snapshot.get("group_size", 0))
|
|
subject_visible = bool(snapshot.get("subject_visible", False))
|
|
intent_detected = bool(snapshot.get("intent_detected", False))
|
|
max_area = int(float(snapshot.get("max_area", 0.0) or 0.0))
|
|
depth_m = snapshot.get("depth_m", None)
|
|
if depth_m is not None:
|
|
depth_m = round(float(depth_m), 2)
|
|
|
|
sig = (
|
|
person_count,
|
|
face_count,
|
|
group_detected,
|
|
group_size,
|
|
subject_visible,
|
|
intent_detected,
|
|
max_area,
|
|
depth_m,
|
|
)
|
|
active = bool(person_count or face_count or subject_visible or group_detected or intent_detected)
|
|
if (not active) and self._last_vision_log_signature in (None, sig):
|
|
return
|
|
if sig == self._last_vision_log_signature and (now_ts - self._last_vision_log_ts) < 1.0:
|
|
return
|
|
|
|
self._last_vision_log_signature = sig
|
|
self._last_vision_log_ts = now_ts
|
|
sanad_logger.print_and_log(
|
|
"👁️ Vision: "
|
|
f"people={person_count} faces={face_count} group={group_detected}({group_size}) "
|
|
f"visible={subject_visible} intent={intent_detected} area={max_area} depth={depth_m if depth_m is not None else '-'}",
|
|
"info",
|
|
)
|
|
except Exception as e:
|
|
record_error("autonomous_manager", "vision_snapshot_log", e)
|
|
|
|
def _set_interaction_active(self, active: bool, voice=None, reason: str = ""):
|
|
active = bool(active)
|
|
self.interaction_active = active
|
|
try:
|
|
self.interaction_flag.parent.mkdir(parents=True, exist_ok=True)
|
|
if active:
|
|
self.interaction_flag.write_text(f"{time.time():.3f} {reason}".strip(), encoding="utf-8")
|
|
elif self.interaction_flag.exists():
|
|
self.interaction_flag.unlink()
|
|
except Exception as e:
|
|
record_error("autonomous_manager", "set_interaction_active_file", e, {"active": bool(active)})
|
|
|
|
if voice is not None and hasattr(voice, "set_audio_gate"):
|
|
try:
|
|
idle_voice_listen_enabled = bool(config.read_vision_idle_voice_listen_enabled())
|
|
mic_enabled = bool(config.read_gemini_mic_enabled())
|
|
runtime_mode = str(config.read_runtime_mode()).strip().lower()
|
|
if runtime_mode not in ("manual", "ai"):
|
|
runtime_mode = "manual"
|
|
if active:
|
|
if hasattr(voice, "set_passive_listen"):
|
|
voice.set_passive_listen(False, reason=reason or "interaction active")
|
|
voice.set_audio_gate(True, reason=reason)
|
|
else:
|
|
if runtime_mode != "ai":
|
|
if hasattr(voice, "set_passive_listen"):
|
|
voice.set_passive_listen(False, reason=reason or "manual mode")
|
|
voice.set_audio_gate(mic_enabled, reason=reason or "manual mode")
|
|
else:
|
|
if hasattr(voice, "set_passive_listen"):
|
|
voice.set_passive_listen(mic_enabled and idle_voice_listen_enabled, reason=reason or "idle")
|
|
voice.set_audio_gate(mic_enabled and idle_voice_listen_enabled, reason=reason or "idle")
|
|
except Exception as e:
|
|
record_error("autonomous_manager", "set_interaction_active_audio_gate", e, {"active": bool(active)})
|
|
|
|
@staticmethod
|
|
def _clear_flag(path: Path):
|
|
try:
|
|
if path.exists():
|
|
path.unlink()
|
|
except Exception as e:
|
|
record_error("autonomous_manager", "clear_flag", e, {"path": str(path)})
|
|
|
|
@staticmethod
|
|
def _consume_flag(path: Path) -> bool:
|
|
try:
|
|
if path.exists():
|
|
path.unlink()
|
|
return True
|
|
except Exception as e:
|
|
record_error("autonomous_manager", "consume_flag", e, {"path": str(path)})
|
|
return False
|
|
|
|
def _consume_request_photo_flag(self) -> bool:
|
|
return self._consume_flag(self.request_photo_flag) or self._consume_flag(self.confirm_yes_flag)
|
|
|
|
def _consume_direct_request_flag(self) -> bool:
|
|
return self._consume_flag(self.request_photo_flag)
|
|
|
|
def _consume_no_photo_flag(self) -> bool:
|
|
return self._consume_flag(self.confirm_no_flag)
|
|
|
|
def _clear_confirmation_flags(self):
|
|
self._clear_flag(self.request_photo_flag)
|
|
self._clear_flag(self.confirm_yes_flag)
|
|
self._clear_flag(self.confirm_no_flag)
|
|
|
|
def _cancel_capture_pipeline(self, reason: str = ""):
|
|
try:
|
|
ev = self._capture_cancel_event
|
|
if ev is not None:
|
|
ev.set()
|
|
self._capture_cancel_event = None
|
|
if reason:
|
|
sanad_logger.print_and_log(f"Capture pipeline cancelled: {reason}", "warning")
|
|
except Exception as e:
|
|
record_error("autonomous_manager", "cancel_capture_pipeline", e, {"reason": reason})
|
|
|
|
@staticmethod
|
|
def _resolve_replay_path(path_value: str) -> Path:
|
|
return config.resolve_replay_path(path_value)
|
|
|
|
def _start_greeting_replay(self, replay):
|
|
if replay is None or not bool(self.autonomous_greeting_replay_enabled):
|
|
return
|
|
replay_file = Path(self.autonomous_greeting_replay_file).resolve()
|
|
if not replay_file.exists():
|
|
sanad_logger.print_and_log(f"⚠️ Greeting replay missing: {replay_file}", "warning")
|
|
return
|
|
if bool(getattr(replay, "is_playing", False)):
|
|
sanad_logger.print_and_log("⚠️ Greeting replay skipped: replay already busy.", "warning")
|
|
return
|
|
|
|
def _run_greeting():
|
|
try:
|
|
sanad_logger.print_and_log(f"👋 Greeting replay: {replay_file.name}", "info")
|
|
replay.run(replay_file, config.HOME_FILE, 1.0)
|
|
except Exception as e:
|
|
record_error("autonomous_manager", "greeting_replay", e, {"replay_file": str(replay_file)})
|
|
|
|
Thread(target=_run_greeting, daemon=True).start()
|
|
|
|
def _reset_current_person(self):
|
|
self.current_person = None
|
|
|
|
def _session_person_label(self) -> str:
|
|
if not isinstance(self.current_person, dict):
|
|
return ""
|
|
return str(
|
|
self.current_person.get("display_label")
|
|
or self.current_person.get("display_name")
|
|
or self.current_person.get("person_id")
|
|
or ""
|
|
).strip()
|
|
|
|
def _current_person_extras(self) -> dict:
|
|
person = self.current_person if isinstance(self.current_person, dict) else {}
|
|
return {
|
|
"recognized_person_id": str(person.get("person_id") or ""),
|
|
"recognized_person_known": bool(person.get("known_person", False)),
|
|
"recognized_person_new": bool(person.get("new_person", False)),
|
|
"recognized_person_label": str(
|
|
person.get("display_label") or person.get("display_name") or person.get("person_id") or ""
|
|
),
|
|
"recognized_person_match_score": float(person.get("match_score", 0.0) or 0.0),
|
|
"recognized_person_created_date": str(person.get("created_date") or ""),
|
|
}
|
|
|
|
def _select_face_box(self, snapshot: dict) -> dict | None:
|
|
faces = snapshot.get("face_boxes") or []
|
|
if not isinstance(faces, list) or not faces:
|
|
return None
|
|
subject_box = self._find_subject_box(snapshot)
|
|
if not isinstance(subject_box, dict):
|
|
try:
|
|
return max(faces, key=lambda f: float(f.get("w", 0.0)) * float(f.get("h", 0.0)))
|
|
except Exception:
|
|
return faces[0] if faces else None
|
|
|
|
try:
|
|
sx1 = float(subject_box.get("x", 0.0))
|
|
sy1 = float(subject_box.get("y", 0.0))
|
|
sx2 = sx1 + max(1.0, float(subject_box.get("w", 1.0)))
|
|
sy2 = sy1 + max(1.0, float(subject_box.get("h", 1.0)))
|
|
except Exception:
|
|
sx1 = sy1 = 0.0
|
|
sx2 = sy2 = 0.0
|
|
|
|
best = None
|
|
best_overlap = -1.0
|
|
for face in faces:
|
|
try:
|
|
fx1 = float(face.get("x", 0.0))
|
|
fy1 = float(face.get("y", 0.0))
|
|
fx2 = fx1 + max(1.0, float(face.get("w", 1.0)))
|
|
fy2 = fy1 + max(1.0, float(face.get("h", 1.0)))
|
|
ix1 = max(sx1, fx1)
|
|
iy1 = max(sy1, fy1)
|
|
ix2 = min(sx2, fx2)
|
|
iy2 = min(sy2, fy2)
|
|
overlap = max(0.0, ix2 - ix1) * max(0.0, iy2 - iy1)
|
|
if overlap > best_overlap:
|
|
best_overlap = overlap
|
|
best = face
|
|
except Exception:
|
|
continue
|
|
if best is not None:
|
|
return best
|
|
try:
|
|
return max(faces, key=lambda f: float(f.get("w", 0.0)) * float(f.get("h", 0.0)))
|
|
except Exception:
|
|
return faces[0] if faces else None
|
|
|
|
def _identify_person_for_session(self, snapshot: dict, source: str = "vision") -> dict | None:
|
|
if not bool(self.face_recognition_enabled):
|
|
self._reset_current_person()
|
|
return None
|
|
frame = snapshot.get("frame")
|
|
if frame is None:
|
|
self._reset_current_person()
|
|
return None
|
|
if bool(snapshot.get("group_detected", False)) or int(snapshot.get("face_count", 0) or 0) > 1:
|
|
self._reset_current_person()
|
|
return None
|
|
|
|
face_box = self._select_face_box(snapshot)
|
|
if face_box is None:
|
|
self._reset_current_person()
|
|
return None
|
|
subject_box = self._find_subject_box(snapshot)
|
|
try:
|
|
result = people_registry.recognize_or_enroll(
|
|
frame,
|
|
face_box,
|
|
subject_box=subject_box,
|
|
threshold=self.face_recognition_threshold,
|
|
source=source,
|
|
)
|
|
except Exception as e:
|
|
record_error("autonomous_manager", "identify_person_for_session", e)
|
|
self._reset_current_person()
|
|
return None
|
|
if not isinstance(result, dict) or not bool(result.get("ok", False)):
|
|
self._reset_current_person()
|
|
return None
|
|
self.current_person = result
|
|
label = self._session_person_label()
|
|
if bool(result.get("known_person", False)):
|
|
sanad_logger.print_and_log(
|
|
f"🧑 Returning guest recognized: {label} (score={float(result.get('match_score', 0.0) or 0.0):.2f})",
|
|
"info",
|
|
)
|
|
else:
|
|
sanad_logger.print_and_log(f"🧑 New guest enrolled: {label}", "info")
|
|
return result
|
|
|
|
def _welcome_prompt_text(self, group_detected: bool) -> str:
|
|
if group_detected:
|
|
return (
|
|
"Hello everyone, welcome. We will take a photo together. "
|
|
"Would your group like a photo? Please say yes photo or no photo."
|
|
)
|
|
label = self._session_person_label()
|
|
if label and bool(self.current_person and self.current_person.get("known_person")):
|
|
return (
|
|
f"Welcome back, {label}. Would you like another photo? "
|
|
"Please say yes photo or no photo."
|
|
)
|
|
return (
|
|
"Hello, welcome. We will take a photo together. "
|
|
"Would you like a photo? Please say yes photo or no photo."
|
|
)
|
|
|
|
def _welcome_prompt_key(self, group_detected: bool) -> str:
|
|
if group_detected:
|
|
return "welcome_group"
|
|
if self._session_person_label() and bool(self.current_person and self.current_person.get("known_person")):
|
|
return "welcome_returning"
|
|
return "welcome_single"
|
|
|
|
def _framing_prompt_text(self, group_detected: bool) -> str:
|
|
if group_detected:
|
|
return "Great. Please stand with me in front of the camera, stay together in the center, and look at the camera."
|
|
return "Great. Please stand with me in front of the camera, stay in the center, and look at the camera."
|
|
|
|
@staticmethod
|
|
def _framing_prompt_key(group_detected: bool) -> str:
|
|
return "frame_group" if group_detected else "frame_single"
|
|
|
|
@staticmethod
|
|
def _find_subject_box(snapshot: dict) -> dict | None:
|
|
subj = snapshot.get("subject_box")
|
|
if isinstance(subj, dict):
|
|
return subj
|
|
boxes = snapshot.get("boxes") or []
|
|
if not boxes:
|
|
return None
|
|
try:
|
|
return max(boxes, key=lambda b: float(b.get("w", 0.0)) * float(b.get("h", 0.0)))
|
|
except Exception:
|
|
return boxes[0] if boxes else None
|
|
|
|
def _evaluate_framing_quality(self, snapshot: dict) -> tuple[bool, list[str], dict]:
|
|
frame = snapshot.get("frame")
|
|
box = self._find_subject_box(snapshot)
|
|
if frame is None or box is None:
|
|
return False, ["step into view"], {"reason": "no_frame_or_subject"}
|
|
|
|
try:
|
|
h, w = frame.shape[:2]
|
|
x = float(box.get("x", 0.0))
|
|
y = float(box.get("y", 0.0))
|
|
bw = max(1.0, float(box.get("w", 1.0)))
|
|
bh = max(1.0, float(box.get("h", 1.0)))
|
|
area_ratio = (bw * bh) / max(1.0, float(w * h))
|
|
cx = x + (bw / 2.0)
|
|
dx = (cx - (w / 2.0)) / max(1.0, float(w))
|
|
centered = abs(dx) <= self.center_tolerance
|
|
size_ok = self.subject_min_area_ratio <= area_ratio <= self.subject_max_area_ratio
|
|
|
|
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
|
|
blur_var = float(cv2.Laplacian(gray, cv2.CV_64F).var())
|
|
exposure = float(gray.mean())
|
|
blur_ok = blur_var >= self.min_blur_var
|
|
exposure_ok = self.min_exposure <= exposure <= self.max_exposure
|
|
|
|
faces = snapshot.get("face_boxes") or []
|
|
face_box = None
|
|
best_overlap = -1.0
|
|
for f in faces:
|
|
try:
|
|
fx = float(f.get("x", 0.0))
|
|
fy = float(f.get("y", 0.0))
|
|
fw = max(1.0, float(f.get("w", 1.0)))
|
|
fh = max(1.0, float(f.get("h", 1.0)))
|
|
ix1 = max(x, fx)
|
|
iy1 = max(y, fy)
|
|
ix2 = min(x + bw, fx + fw)
|
|
iy2 = min(y + bh, fy + fh)
|
|
iw = max(0.0, ix2 - ix1)
|
|
ih = max(0.0, iy2 - iy1)
|
|
overlap = iw * ih
|
|
if overlap > best_overlap:
|
|
best_overlap = overlap
|
|
face_box = {"x": fx, "y": fy, "w": fw, "h": fh}
|
|
except Exception:
|
|
continue
|
|
|
|
if face_box is not None:
|
|
headroom_ratio = float(face_box["y"]) / max(1.0, float(h))
|
|
eye_y = float(face_box["y"]) + (0.38 * float(face_box["h"]))
|
|
eye_line_ratio = eye_y / max(1.0, float(h))
|
|
else:
|
|
headroom_ratio = y / max(1.0, float(h))
|
|
eye_line_ratio = (y + (0.25 * bh)) / max(1.0, float(h))
|
|
headroom_ok = self.headroom_min_ratio <= headroom_ratio <= self.headroom_max_ratio
|
|
eye_line_ok = self.eye_line_min_ratio <= eye_line_ratio <= self.eye_line_max_ratio
|
|
|
|
mid = int(max(1, w // 2))
|
|
left_mean = float(gray[:, :mid].mean()) if mid > 0 else exposure
|
|
right_mean = float(gray[:, mid:].mean()) if (w - mid) > 0 else exposure
|
|
lr_delta = right_mean - left_mean
|
|
|
|
reasons: list[str] = []
|
|
if not centered:
|
|
reasons.append("move a bit to the center")
|
|
if not size_ok:
|
|
reasons.append("come a little closer" if area_ratio < self.subject_min_area_ratio else "step slightly back")
|
|
if not headroom_ok:
|
|
if headroom_ratio < self.headroom_min_ratio:
|
|
reasons.append("lower your chin a little")
|
|
else:
|
|
reasons.append("raise your chin a little")
|
|
if not eye_line_ok:
|
|
reasons.append("keep your eyes around the middle of the frame")
|
|
if not blur_ok:
|
|
reasons.append("hold still for a second")
|
|
if not exposure_ok:
|
|
if exposure < self.min_exposure:
|
|
if abs(lr_delta) > 12.0:
|
|
reasons.append(
|
|
"turn slightly toward the brighter side"
|
|
)
|
|
else:
|
|
reasons.append("face the light")
|
|
else:
|
|
reasons.append("avoid strong direct light")
|
|
|
|
metrics = {
|
|
"area_ratio": area_ratio,
|
|
"blur_var": blur_var,
|
|
"exposure": exposure,
|
|
"center_dx": dx,
|
|
"centered": centered,
|
|
"size_ok": size_ok,
|
|
"blur_ok": blur_ok,
|
|
"exposure_ok": exposure_ok,
|
|
"headroom_ratio": headroom_ratio,
|
|
"eye_line_ratio": eye_line_ratio,
|
|
"headroom_ok": headroom_ok,
|
|
"eye_line_ok": eye_line_ok,
|
|
"left_exposure": left_mean,
|
|
"right_exposure": right_mean,
|
|
"lr_exposure_delta": lr_delta,
|
|
}
|
|
return len(reasons) == 0, reasons, metrics
|
|
except Exception as e:
|
|
return False, ["hold still and face the camera"], {"reason": str(e)}
|
|
|
|
@staticmethod
|
|
def _framing_guidance_text(reasons: list[str]) -> str:
|
|
if not reasons:
|
|
return "Great framing. Hold still."
|
|
uniq = []
|
|
for r in reasons:
|
|
if r not in uniq:
|
|
uniq.append(r)
|
|
joined = ", and ".join(uniq[:2])
|
|
return f"Almost ready. Please {joined}."
|
|
|
|
@staticmethod
|
|
def _quality_score_from_metrics(metrics: dict) -> float:
|
|
checks = [
|
|
bool(metrics.get("centered", False)),
|
|
bool(metrics.get("size_ok", False)),
|
|
bool(metrics.get("blur_ok", False)),
|
|
bool(metrics.get("exposure_ok", False)),
|
|
bool(metrics.get("headroom_ok", False)),
|
|
bool(metrics.get("eye_line_ok", False)),
|
|
]
|
|
if not checks:
|
|
return 0.0
|
|
return float(sum(1 for x in checks if x)) / float(len(checks))
|
|
|
|
def _retake_assessment(self, snapshot: dict, previous_metrics: dict | None = None) -> tuple[bool, str, float, dict]:
|
|
good, reasons, metrics = self._evaluate_framing_quality(snapshot)
|
|
if previous_metrics and isinstance(previous_metrics, dict):
|
|
merged = dict(previous_metrics)
|
|
merged.update(metrics)
|
|
metrics = merged
|
|
score = self._quality_score_from_metrics(metrics)
|
|
if good and score >= self.retake_score_threshold:
|
|
return False, "", score, metrics
|
|
if reasons:
|
|
reason = reasons[0]
|
|
else:
|
|
reason = "framing quality is not optimal"
|
|
return True, str(reason), score, metrics
|
|
|
|
def _lock_target_on_session_start(self, snapshot: dict):
|
|
try:
|
|
if hasattr(self.detector, "set_hard_lock"):
|
|
self.detector.set_hard_lock(bool(self.hard_target_lock_enabled))
|
|
if hasattr(self.detector, "lock_target_from_snapshot"):
|
|
self.detector.lock_target_from_snapshot(
|
|
snapshot,
|
|
lock_group=bool(self.hard_target_lock_enabled and snapshot.get("group_detected", False)),
|
|
)
|
|
elif hasattr(self.detector, "lock_subject_from_snapshot"):
|
|
self.detector.lock_subject_from_snapshot(snapshot)
|
|
except Exception as e:
|
|
record_error("autonomous_manager", "lock_target_on_session_start", e)
|
|
|
|
def _unlock_target(self):
|
|
try:
|
|
if hasattr(self.detector, "unlock_target"):
|
|
self.detector.unlock_target()
|
|
elif hasattr(self.detector, "unlock_subject"):
|
|
self.detector.unlock_subject()
|
|
except Exception as e:
|
|
record_error("autonomous_manager", "unlock_target", e)
|
|
|
|
def _detector_readiness(self) -> dict:
|
|
try:
|
|
return self.detector.readiness(strict_required=self.yolo_strict_required)
|
|
except Exception as e:
|
|
record_error("autonomous_manager", "detector_readiness", e)
|
|
return {
|
|
"ok": False,
|
|
"strict_required": bool(self.yolo_strict_required),
|
|
"configured_backend": "normal",
|
|
"effective_backend": "normal",
|
|
"yolo_loaded": False,
|
|
"person_model_ok": False,
|
|
"face_model_ok": False,
|
|
"person_model_path": "",
|
|
"face_model_path": "",
|
|
"person_model_error": str(e),
|
|
"face_model_error": "",
|
|
"block_reason": f"Detector readiness failed: {e}",
|
|
}
|
|
|
|
async def _push_vision_context(self, voice, state_name: str, snapshot: dict):
|
|
if voice is None:
|
|
return
|
|
if not hasattr(voice, "send_vision_context_live"):
|
|
return
|
|
now = time.time()
|
|
if now < self._next_context_ts:
|
|
return
|
|
self._next_context_ts = now + self._context_interval_sec
|
|
|
|
payload = {
|
|
"person_count": int(snapshot.get("person_count", 0)),
|
|
"group_count": int(snapshot.get("group_count", 0)),
|
|
"group_size": int(snapshot.get("group_size", 0)),
|
|
"subject_visible": bool(snapshot.get("subject_visible", False)),
|
|
"depth_m": snapshot.get("depth_m"),
|
|
"approach_speed_mps": float(snapshot.get("approach_speed_mps", 0.0) or 0.0),
|
|
"state": str(state_name),
|
|
"intent_detected": bool(snapshot.get("intent_detected", False)),
|
|
}
|
|
try:
|
|
await voice.send_vision_context_live(payload, silent=bool(self.gemini_context_silent))
|
|
except Exception as e:
|
|
record_error("autonomous_manager", "push_vision_context", e, {"state": state_name})
|
|
|
|
def _write_runtime_state(self, state_name: str, snapshot: dict, voice=None, extras: dict | None = None):
|
|
payload = {
|
|
"state": state_name,
|
|
"session_id": self.session_id,
|
|
"interaction_active": bool(self.interaction_active),
|
|
"intent_detected": bool(snapshot.get("intent_detected", False)),
|
|
"detector_backend": str(snapshot.get("detector_backend", "normal")),
|
|
"yolo_runtime": str(snapshot.get("yolo_runtime", "")),
|
|
"ai_blocked": bool(self.ai_blocked),
|
|
"ai_block_reason": str(self.ai_block_reason or ""),
|
|
"person_count": int(snapshot.get("person_count", 0)),
|
|
"face_count": int(snapshot.get("face_count", 0)),
|
|
"group_count": int(snapshot.get("group_count", 0)),
|
|
"group_size": int(snapshot.get("group_size", 0)),
|
|
"group_detected": bool(snapshot.get("group_detected", False)),
|
|
"is_close": bool(snapshot.get("is_close", False)),
|
|
"is_approaching": bool(snapshot.get("is_approaching", False)),
|
|
"max_area": float(snapshot.get("max_area", 0.0)),
|
|
"depth_m": snapshot.get("depth_m"),
|
|
"approach_speed_mps": snapshot.get("approach_speed_mps"),
|
|
"subject_id": snapshot.get("subject_id"),
|
|
"subject_visible": bool(snapshot.get("subject_visible", False)),
|
|
"target_lock_active": bool(snapshot.get("target_lock_active", False)),
|
|
"target_lock_type": str(snapshot.get("target_lock_type", "") or ""),
|
|
"target_lock_id": snapshot.get("target_lock_id"),
|
|
"target_switch_blocked_count": int(snapshot.get("target_switch_blocked_count", 0)),
|
|
"camera_ok": bool(snapshot.get("camera_ok", False)),
|
|
"depth_ok": bool(snapshot.get("depth_ok", False)),
|
|
"camera_restarts": int(snapshot.get("camera_restarts", 0)),
|
|
"depth_restarts": int(snapshot.get("depth_restarts", 0)),
|
|
"audio_gate_open": bool(getattr(voice, "audio_gate_open", False)) if voice is not None else None,
|
|
"ws_connected": bool(getattr(voice, "is_ws_connected", lambda: False)()) if voice is not None else False,
|
|
"mic_state": "",
|
|
"speaker_state": "",
|
|
"cooldown_remaining": max(0.0, self.cooldown_until - time.time()),
|
|
"time": time.time(),
|
|
}
|
|
try:
|
|
if voice is not None and hasattr(voice, "get_runtime_health"):
|
|
vh = voice.get_runtime_health() or {}
|
|
payload["ws_connected"] = bool(vh.get("ws_connected", payload["ws_connected"]))
|
|
payload["mic_state"] = str(vh.get("mic_state", "") or "")
|
|
payload["speaker_state"] = str(vh.get("speaker_state", "") or "")
|
|
except Exception as e:
|
|
record_error("autonomous_manager", "write_runtime_state_voice_health", e)
|
|
if extras:
|
|
payload.update(extras)
|
|
try:
|
|
self.state_file.parent.mkdir(parents=True, exist_ok=True)
|
|
self.state_file.write_text(json.dumps(payload, ensure_ascii=False), encoding="utf-8")
|
|
except Exception as e:
|
|
record_error("autonomous_manager", "write_runtime_state", e, {"state": state_name})
|
|
|
|
def _start_capture_pipeline(self, replay, timing_info: dict | None = None):
|
|
self._cancel_capture_pipeline(reason="restart")
|
|
with self._capture_lock:
|
|
self._capture_done = False
|
|
self._capture_result = None
|
|
|
|
timing_info = timing_info or self._resolve_capture_timing(use_replay=bool(self.autonomous_capture_replay_enabled))
|
|
default_delay_sec = max(0.0, min(config.PHOTO_DELAY_SEC, config.PHOTO_TOTAL_SEC))
|
|
delay_sec = float(timing_info.get("capture_offset_sec") or default_delay_sec)
|
|
cancel_event = threading.Event()
|
|
self._capture_cancel_event = cancel_event
|
|
|
|
def _run_pipeline():
|
|
try:
|
|
base_prefix = os.environ.get("PHOTO_PREFIX", "photo")
|
|
prefix = f"session{self.session_id}_{base_prefix}"
|
|
run_delay_sec = delay_sec
|
|
capture_replay_runner = replay if bool(self.autonomous_capture_replay_enabled) else None
|
|
if capture_replay_runner is not None:
|
|
replay_wait_deadline = time.time() + 4.0
|
|
while (
|
|
bool(getattr(capture_replay_runner, "is_playing", False))
|
|
and time.time() < replay_wait_deadline
|
|
and not cancel_event.is_set()
|
|
):
|
|
time.sleep(0.05)
|
|
if bool(getattr(capture_replay_runner, "is_playing", False)):
|
|
sanad_logger.print_and_log(
|
|
"⚠️ AI capture replay still busy; falling back to still photo for this shot.",
|
|
"warning",
|
|
)
|
|
capture_replay_runner = None
|
|
run_delay_sec = default_delay_sec
|
|
if capture_replay_runner is None:
|
|
sanad_logger.print_and_log("📷 AI capture: still photo mode (no replay during photo).", "info")
|
|
else:
|
|
sanad_logger.print_and_log(
|
|
f"📷 AI capture: replaying {Path(config.REPLAY_FILE).name} during the shot. "
|
|
f"duration={float(timing_info.get('duration_sec') or 0.0):.3f}s "
|
|
f"shot_at={run_delay_sec:.3f}s "
|
|
f"source={timing_info.get('capture_source', 'config_fallback')}",
|
|
"info",
|
|
)
|
|
res = capture_with_replay_sync(
|
|
replay_runner=capture_replay_runner,
|
|
replay_file=config.REPLAY_FILE,
|
|
home_file=config.HOME_FILE,
|
|
delay_sec=run_delay_sec,
|
|
prefix=prefix,
|
|
speed=1.0,
|
|
cancel_event=cancel_event,
|
|
)
|
|
with self._capture_lock:
|
|
self._capture_result = str(res)
|
|
self._capture_done = True
|
|
if str(res).startswith("[ERR]"):
|
|
sanad_logger.print_and_log(f"Capture pipeline failed: {res}", "error")
|
|
else:
|
|
sanad_logger.print_and_log(f"Saved photo (pipeline): {res}", "info")
|
|
except Exception as e:
|
|
record_error("autonomous_manager", "start_capture_pipeline", e, {"session_id": self.session_id})
|
|
with self._capture_lock:
|
|
self._capture_result = f"[ERR] capture pipeline exception: {e}"
|
|
self._capture_done = True
|
|
|
|
Thread(target=_run_pipeline, daemon=True).start()
|
|
|
|
def _resolve_capture_timing(self, use_replay: bool) -> dict:
|
|
default_delay = max(0.0, min(config.PHOTO_DELAY_SEC, config.PHOTO_TOTAL_SEC))
|
|
timing_info = {
|
|
"capture_offset_sec": default_delay,
|
|
"duration_sec": 0.0,
|
|
"capture_source": "config_fallback",
|
|
}
|
|
if not use_replay:
|
|
return timing_info
|
|
try:
|
|
profile = replay_timing_profile(config.REPLAY_FILE)
|
|
if bool(profile.get("ok")):
|
|
return profile
|
|
if profile.get("capture_offset_sec") is not None:
|
|
timing_info["capture_offset_sec"] = float(profile.get("capture_offset_sec") or default_delay)
|
|
except Exception as e:
|
|
record_error("autonomous_manager", "resolve_capture_timing", e, {"replay_file": str(config.REPLAY_FILE)})
|
|
return timing_info
|
|
|
|
async def run(self, hub, replay, voice, ws=None):
|
|
self.detector.start()
|
|
self.hub = hub
|
|
self._running = True
|
|
self._set_interaction_active(False, voice=voice, reason="idle")
|
|
|
|
state = "IDLE"
|
|
state_enter_ts = time.time()
|
|
stable_count = 0
|
|
|
|
confirm_deadline = 0.0
|
|
confirm_last_prompt_ts = 0.0
|
|
confirm_ignore_until = 0.0
|
|
leave_since = 0.0
|
|
|
|
framing_deadline = 0.0
|
|
framing_last_feedback_ts = 0.0
|
|
framing_good_streak = 0
|
|
framing_metrics: dict = {}
|
|
|
|
countdown_deadline = 0.0
|
|
countdown_announced: set[int] = set()
|
|
countdown_lost_since = 0.0
|
|
|
|
retake_count = 0
|
|
retake_deadline = 0.0
|
|
retake_recommended = False
|
|
retake_reason = ""
|
|
retake_score = 1.0
|
|
|
|
sanad_logger.print_and_log("🤖 Autonomous mode enabled.", "info")
|
|
|
|
try:
|
|
while self._running:
|
|
await asyncio.sleep(1.0 / max(1, self.detector.poll_hz))
|
|
now = time.time()
|
|
runtime_mode = "manual"
|
|
try:
|
|
runtime_mode = str(config.read_runtime_mode()).strip().lower()
|
|
except Exception:
|
|
runtime_mode = "manual"
|
|
if runtime_mode not in ("manual", "ai"):
|
|
runtime_mode = "manual"
|
|
|
|
# Runtime-refresh operator toggles from config.json.
|
|
try:
|
|
self.hard_target_lock_enabled = bool(config.read_vision_hard_target_lock_enabled())
|
|
self.retake_prompt_enabled = bool(config.read_vision_retake_prompt_enabled())
|
|
self.retake_limit = int(config.read_vision_retake_max_per_session())
|
|
self.retake_score_threshold = float(config.read_vision_framing_retake_score_threshold())
|
|
self.autonomous_greeting_replay_enabled = bool(config.read_vision_autonomous_greeting_replay_enabled())
|
|
self.autonomous_greeting_replay_file = self._resolve_replay_path(
|
|
config.read_vision_autonomous_greeting_replay_file()
|
|
)
|
|
self.autonomous_capture_replay_enabled = bool(config.read_vision_autonomous_capture_replay_enabled())
|
|
self.face_recognition_enabled = bool(config.read_vision_face_recognition_enabled())
|
|
self.face_recognition_threshold = float(config.read_vision_face_recognition_threshold())
|
|
except Exception as e:
|
|
record_error("autonomous_manager", "runtime_option_refresh", e)
|
|
|
|
if hasattr(self.detector, "set_hard_lock"):
|
|
try:
|
|
self.detector.set_hard_lock(bool(self.hard_target_lock_enabled))
|
|
except Exception as e:
|
|
record_error("autonomous_manager", "detector_set_hard_lock", e)
|
|
|
|
snap = self.detector.latest()
|
|
face_count = int(snap.get("face_count", 0))
|
|
subject_visible = bool(snap.get("subject_visible", face_count > 0))
|
|
intent_detected = bool(snap.get("intent_detected", False))
|
|
max_area = float(snap.get("max_area", 0.0))
|
|
group_detected = bool(snap.get("group_detected", False))
|
|
self._maybe_log_vision_snapshot(snap, now)
|
|
|
|
if runtime_mode != "ai":
|
|
stable_count = 0
|
|
if state != "IDLE":
|
|
sanad_logger.print_and_log("🛑 Autonomous flow paused: runtime mode is MANUAL.", "info")
|
|
state = "IDLE"
|
|
state_enter_ts = now
|
|
self._clear_confirmation_flags()
|
|
self._set_interaction_active(False, voice=voice, reason="manual mode")
|
|
self._cancel_capture_pipeline(reason="manual mode")
|
|
self._unlock_target()
|
|
self._reset_current_person()
|
|
self._write_runtime_state(
|
|
"IDLE",
|
|
snap,
|
|
voice=voice,
|
|
extras={
|
|
"stable_count": 0,
|
|
"runtime_mode": runtime_mode,
|
|
"autonomous_paused": True,
|
|
"retake_prompt_enabled": bool(self.retake_prompt_enabled),
|
|
"retake_limit": int(self.retake_limit),
|
|
"retake_count": int(retake_count),
|
|
**self._current_person_extras(),
|
|
},
|
|
)
|
|
continue
|
|
|
|
readiness = self._detector_readiness()
|
|
if self.yolo_strict_required and (not bool(readiness.get("ok", False))):
|
|
block_reason = str(readiness.get("block_reason") or "AI blocked by strict YOLO policy.")
|
|
if (not self.ai_blocked) or (self.ai_block_reason != block_reason):
|
|
sanad_logger.print_and_log(f"⛔ AI blocked: {block_reason}", "error")
|
|
self._set_interaction_active(False, voice=voice, reason="strict yolo blocked")
|
|
self._clear_confirmation_flags()
|
|
self._cancel_capture_pipeline(reason="strict yolo blocked")
|
|
self._unlock_target()
|
|
self._reset_current_person()
|
|
self.ai_blocked = True
|
|
self.ai_block_reason = block_reason
|
|
state = "IDLE_BLOCKED"
|
|
stable_count = 0
|
|
self._write_runtime_state(
|
|
"IDLE_BLOCKED",
|
|
snap,
|
|
voice=voice,
|
|
extras={
|
|
"yolo_loaded": bool(readiness.get("yolo_loaded", False)),
|
|
"person_model_ok": bool(readiness.get("person_model_ok", False)),
|
|
"face_model_ok": bool(readiness.get("face_model_ok", False)),
|
|
"detector_backend": str(readiness.get("effective_backend", "normal")),
|
|
"retake_prompt_enabled": bool(self.retake_prompt_enabled),
|
|
"retake_limit": int(self.retake_limit),
|
|
"retake_count": int(retake_count),
|
|
**self._current_person_extras(),
|
|
},
|
|
)
|
|
await self._push_vision_context(voice, "IDLE_BLOCKED", snap)
|
|
continue
|
|
|
|
if self.ai_blocked:
|
|
sanad_logger.print_and_log("✅ AI readiness restored. Leaving blocked state.", "info")
|
|
self.ai_blocked = False
|
|
self.ai_block_reason = ""
|
|
state = "IDLE"
|
|
state_enter_ts = now
|
|
stable_count = 0
|
|
|
|
await self._push_vision_context(voice, state, snap)
|
|
|
|
try:
|
|
hard_cancel = False
|
|
if hub is not None:
|
|
if getattr(hub, "hard_cancel_combo", None):
|
|
hard_cancel = bool(hub.hard_cancel_combo())
|
|
elif getattr(hub, "combo_r2l1", None):
|
|
hard_cancel = bool(hub.combo_r2l1())
|
|
if hard_cancel:
|
|
sanad_logger.print_and_log("🛑 HARD CANCEL detected (R2+L1).", "warning")
|
|
state = "IDLE"
|
|
state_enter_ts = now
|
|
self.cooldown_until = now + self.session_cooldown_sec
|
|
self._clear_confirmation_flags()
|
|
self._set_interaction_active(False, voice=voice, reason="hard cancel")
|
|
self._cancel_capture_pipeline(reason="hard cancel")
|
|
self._unlock_target()
|
|
self._reset_current_person()
|
|
except Exception as e:
|
|
record_error("autonomous_manager", "remote_cancel_check", e)
|
|
|
|
if state == "IDLE":
|
|
self._write_runtime_state(
|
|
"IDLE",
|
|
snap,
|
|
voice=voice,
|
|
extras={
|
|
"stable_count": stable_count,
|
|
"retake_prompt_enabled": bool(self.retake_prompt_enabled),
|
|
"retake_limit": int(self.retake_limit),
|
|
"retake_count": int(retake_count),
|
|
**self._current_person_extras(),
|
|
},
|
|
)
|
|
|
|
# Allow visitor-initiated photo request from IDLE.
|
|
# Only explicit request_photo should start a new session from idle.
|
|
if self._consume_direct_request_flag():
|
|
self.session_id += 1
|
|
state = "FRAMING"
|
|
state_enter_ts = now
|
|
framing_deadline = now + self.framing_timeout_sec
|
|
framing_last_feedback_ts = 0.0
|
|
framing_good_streak = 0
|
|
framing_metrics = {}
|
|
retake_count = 0
|
|
retake_recommended = False
|
|
retake_reason = ""
|
|
retake_score = 1.0
|
|
self._clear_confirmation_flags()
|
|
self._set_interaction_active(True, voice=voice, reason="voice request from idle")
|
|
self._lock_target_on_session_start(snap)
|
|
self._identify_person_for_session(snap, source="voice_request")
|
|
sanad_logger.print_and_log(
|
|
f"🗣️ Voice photo request from IDLE -> session {self.session_id}",
|
|
"info",
|
|
)
|
|
await self._say_prompt(
|
|
voice,
|
|
self._framing_prompt_key(group_detected),
|
|
self._framing_prompt_text(group_detected),
|
|
)
|
|
continue
|
|
|
|
if now < self.cooldown_until:
|
|
continue
|
|
|
|
if intent_detected:
|
|
stable_count += 1
|
|
else:
|
|
stable_count = 0
|
|
|
|
if stable_count < self.stability_frames:
|
|
continue
|
|
|
|
stable_count = 0
|
|
self.session_id += 1
|
|
state = "WAIT_CONFIRM"
|
|
state_enter_ts = now
|
|
confirm_deadline = now + self.confirm_timeout_sec
|
|
confirm_ignore_until = now + self.confirm_guard_sec
|
|
confirm_last_prompt_ts = now
|
|
leave_since = 0.0
|
|
framing_metrics = {}
|
|
retake_count = 0
|
|
retake_recommended = False
|
|
retake_reason = ""
|
|
retake_score = 1.0
|
|
self._clear_confirmation_flags()
|
|
self._set_interaction_active(True, voice=voice, reason=f"intent max_area={max_area:.0f}")
|
|
self._lock_target_on_session_start(snap)
|
|
self._identify_person_for_session(snap, source="intent")
|
|
|
|
sanad_logger.print_and_log(
|
|
f"🔔 Intent detected (area={max_area:.0f}) -> session {self.session_id}",
|
|
"info",
|
|
)
|
|
self._start_greeting_replay(replay)
|
|
try:
|
|
wake_text = self._welcome_prompt_text(group_detected)
|
|
await voice.trigger_wake_sequence(
|
|
wake_text=wake_text,
|
|
prompt_key=self._welcome_prompt_key(group_detected),
|
|
)
|
|
except Exception as e:
|
|
record_error("autonomous_manager", "wake_sequence", e)
|
|
sanad_logger.print_and_log(f"Wake sequence failed: {e}", "warning")
|
|
continue
|
|
|
|
if state == "WAIT_CONFIRM":
|
|
confirm_remaining = max(0.0, confirm_deadline - now)
|
|
self._write_runtime_state(
|
|
"WAIT_CONFIRM",
|
|
snap,
|
|
voice=voice,
|
|
extras={
|
|
"confirm_timeout_remaining": confirm_remaining,
|
|
"confirm_guard_remaining": max(0.0, confirm_ignore_until - now),
|
|
"retake_prompt_enabled": bool(self.retake_prompt_enabled),
|
|
"retake_limit": int(self.retake_limit),
|
|
"retake_count": int(retake_count),
|
|
**self._current_person_extras(),
|
|
},
|
|
)
|
|
|
|
if not subject_visible:
|
|
if leave_since <= 0.0:
|
|
leave_since = now
|
|
elif (now - leave_since) >= self.leave_timeout_sec:
|
|
sanad_logger.print_and_log("🙈 Visitor left before confirmation.", "warning")
|
|
await self._say_prompt(
|
|
voice,
|
|
"visitor_left",
|
|
"No worries. I will wait here for the next visitor.",
|
|
)
|
|
state = "IDLE"
|
|
state_enter_ts = now
|
|
self.cooldown_until = now + self.session_cooldown_sec
|
|
self._clear_confirmation_flags()
|
|
self._set_interaction_active(False, voice=voice, reason="left before confirm")
|
|
self._cancel_capture_pipeline(reason="left before confirm")
|
|
self._unlock_target()
|
|
self._reset_current_person()
|
|
continue
|
|
else:
|
|
leave_since = 0.0
|
|
|
|
if now >= confirm_ignore_until:
|
|
if self._consume_no_photo_flag():
|
|
await self._say_prompt(voice, "declined", "No problem. We can do it anytime.")
|
|
state = "IDLE"
|
|
state_enter_ts = now
|
|
self.cooldown_until = now + self.session_cooldown_sec
|
|
self._clear_confirmation_flags()
|
|
self._set_interaction_active(False, voice=voice, reason="visitor declined")
|
|
self._cancel_capture_pipeline(reason="visitor declined")
|
|
self._unlock_target()
|
|
self._reset_current_person()
|
|
continue
|
|
|
|
if self._consume_request_photo_flag():
|
|
sanad_logger.print_and_log("✅ Confirmation received. Entering framing check.", "info")
|
|
await self._say_prompt(
|
|
voice,
|
|
self._framing_prompt_key(group_detected),
|
|
self._framing_prompt_text(group_detected),
|
|
)
|
|
state = "FRAMING"
|
|
state_enter_ts = now
|
|
framing_deadline = now + self.framing_timeout_sec
|
|
framing_last_feedback_ts = 0.0
|
|
framing_good_streak = 0
|
|
continue
|
|
|
|
if (now - confirm_last_prompt_ts) >= self.confirm_reminder_sec:
|
|
confirm_last_prompt_ts = now
|
|
await self._say_prompt(
|
|
voice,
|
|
"confirm_reminder",
|
|
"Say yes photo to continue, or no photo to cancel.",
|
|
)
|
|
|
|
if now >= confirm_deadline:
|
|
sanad_logger.print_and_log("⌛ Confirmation timeout.", "warning")
|
|
await self._say_prompt(
|
|
voice,
|
|
"confirm_timeout",
|
|
"No problem. I will wait here. Come back anytime for a photo.",
|
|
)
|
|
state = "IDLE"
|
|
state_enter_ts = now
|
|
self.cooldown_until = now + self.session_cooldown_sec
|
|
self._clear_confirmation_flags()
|
|
self._set_interaction_active(False, voice=voice, reason="confirm timeout")
|
|
self._cancel_capture_pipeline(reason="confirm timeout")
|
|
self._unlock_target()
|
|
self._reset_current_person()
|
|
continue
|
|
|
|
elif state == "FRAMING":
|
|
if self._consume_no_photo_flag():
|
|
await self._say_prompt(voice, "session_cancelled", "Okay. Session cancelled.")
|
|
state = "IDLE"
|
|
state_enter_ts = now
|
|
self.cooldown_until = now + self.session_cooldown_sec
|
|
self._clear_confirmation_flags()
|
|
self._set_interaction_active(False, voice=voice, reason="cancelled during framing")
|
|
self._cancel_capture_pipeline(reason="cancelled during framing")
|
|
self._unlock_target()
|
|
self._reset_current_person()
|
|
continue
|
|
|
|
good, reasons, framing_metrics = self._evaluate_framing_quality(snap)
|
|
framing_remaining = max(0.0, framing_deadline - now)
|
|
self._write_runtime_state(
|
|
"FRAMING",
|
|
snap,
|
|
voice=voice,
|
|
extras={
|
|
"framing_ok": bool(good),
|
|
"framing_reasons": reasons,
|
|
"framing_timeout_remaining": framing_remaining,
|
|
"framing_good_streak": framing_good_streak,
|
|
"framing_metrics": framing_metrics,
|
|
"retake_prompt_enabled": bool(self.retake_prompt_enabled),
|
|
"retake_limit": int(self.retake_limit),
|
|
"retake_count": int(retake_count),
|
|
**self._current_person_extras(),
|
|
},
|
|
)
|
|
|
|
if good:
|
|
framing_good_streak += 1
|
|
else:
|
|
framing_good_streak = 0
|
|
if (now - framing_last_feedback_ts) >= self.framing_feedback_interval_sec:
|
|
framing_last_feedback_ts = now
|
|
await self._say(voice, self._framing_guidance_text(reasons))
|
|
|
|
if framing_good_streak >= self.framing_good_frames_required:
|
|
capture_timing = self._resolve_capture_timing(
|
|
use_replay=bool(self.autonomous_capture_replay_enabled)
|
|
)
|
|
capture_start_ts = time.time()
|
|
self._start_capture_pipeline(replay, timing_info=capture_timing)
|
|
await self._say_capture_prompt(
|
|
voice,
|
|
"countdown_intro",
|
|
"Look at the camera, stay ready, hold your pose with me, keep still, keep your smile soft, and in a moment I will count down for the photo.",
|
|
)
|
|
state = "COUNTDOWN"
|
|
state_enter_ts = now
|
|
countdown_deadline = capture_start_ts + float(
|
|
capture_timing.get("capture_offset_sec")
|
|
or max(0.0, min(config.PHOTO_DELAY_SEC, config.PHOTO_TOTAL_SEC))
|
|
)
|
|
countdown_announced = set()
|
|
countdown_lost_since = 0.0
|
|
continue
|
|
|
|
if now >= framing_deadline:
|
|
sanad_logger.print_and_log("⌛ Framing timeout.", "warning")
|
|
await self._say_prompt(
|
|
voice,
|
|
"framing_timeout",
|
|
"I still need a better frame. Please step in front of me and say yes photo when ready.",
|
|
)
|
|
state = "WAIT_CONFIRM"
|
|
state_enter_ts = now
|
|
confirm_deadline = now + self.confirm_timeout_sec
|
|
confirm_ignore_until = now + self.confirm_guard_sec
|
|
confirm_last_prompt_ts = now
|
|
leave_since = 0.0
|
|
continue
|
|
|
|
elif state == "COUNTDOWN":
|
|
countdown_remaining = max(0.0, countdown_deadline - now)
|
|
self._write_runtime_state(
|
|
"COUNTDOWN",
|
|
snap,
|
|
voice=voice,
|
|
extras={
|
|
"countdown_remaining": countdown_remaining,
|
|
"framing_metrics": framing_metrics,
|
|
"retake_prompt_enabled": bool(self.retake_prompt_enabled),
|
|
"retake_limit": int(self.retake_limit),
|
|
"retake_count": int(retake_count),
|
|
**self._current_person_extras(),
|
|
},
|
|
)
|
|
|
|
if self._consume_no_photo_flag():
|
|
await self._say_prompt(voice, "countdown_cancelled", "Countdown cancelled.")
|
|
state = "IDLE"
|
|
state_enter_ts = now
|
|
self.cooldown_until = now + self.session_cooldown_sec
|
|
self._clear_confirmation_flags()
|
|
self._set_interaction_active(False, voice=voice, reason="cancelled during countdown")
|
|
self._cancel_capture_pipeline(reason="cancelled during countdown")
|
|
self._unlock_target()
|
|
self._reset_current_person()
|
|
continue
|
|
|
|
if not subject_visible:
|
|
if countdown_lost_since <= 0.0:
|
|
countdown_lost_since = now
|
|
elif (now - countdown_lost_since) >= self.countdown_lose_subject_sec:
|
|
await self._say_prompt(
|
|
voice,
|
|
"lost_from_frame",
|
|
"I lost you from frame. Let us try again.",
|
|
)
|
|
state = "FRAMING"
|
|
state_enter_ts = now
|
|
framing_deadline = now + self.framing_timeout_sec
|
|
framing_last_feedback_ts = 0.0
|
|
framing_good_streak = 0
|
|
continue
|
|
else:
|
|
countdown_lost_since = 0.0
|
|
|
|
sec_left = int(round(countdown_remaining))
|
|
if sec_left in (3, 2, 1) and sec_left not in countdown_announced:
|
|
countdown_announced.add(sec_left)
|
|
await self._say_capture_prompt(voice, f"count_{sec_left}", f"{sec_left}...")
|
|
elif sec_left <= 0 and 0 not in countdown_announced:
|
|
countdown_announced.add(0)
|
|
await self._say_capture_prompt(voice, "smile", "Smile.")
|
|
|
|
with self._capture_lock:
|
|
capture_done = bool(self._capture_done)
|
|
capture_result = self._capture_result
|
|
|
|
if capture_done:
|
|
if isinstance(capture_result, str) and capture_result.startswith("[ERR]"):
|
|
sanad_logger.print_and_log(f"Capture failed: {capture_result}", "error")
|
|
state = "IDLE"
|
|
state_enter_ts = now
|
|
self.cooldown_until = now + self.session_cooldown_sec
|
|
self._set_interaction_active(False, voice=voice, reason="capture failed")
|
|
self._cancel_capture_pipeline(reason="capture failed")
|
|
self._unlock_target()
|
|
self._reset_current_person()
|
|
else:
|
|
try:
|
|
person_id = str((self.current_person or {}).get("person_id") or "").strip()
|
|
if person_id:
|
|
people_registry.attach_captured_photo(person_id, str(capture_result))
|
|
except Exception as e:
|
|
record_error("autonomous_manager", "attach_captured_photo", e)
|
|
retake_recommended, retake_reason, retake_score, framing_metrics = self._retake_assessment(
|
|
snap,
|
|
previous_metrics=framing_metrics,
|
|
)
|
|
if (
|
|
bool(self.retake_prompt_enabled)
|
|
and bool(retake_recommended)
|
|
and int(retake_count) < int(self.retake_limit)
|
|
):
|
|
state = "RETAKE_CONFIRM"
|
|
state_enter_ts = now
|
|
retake_deadline = now + self.retake_confirm_timeout_sec
|
|
reason_txt = retake_reason or "the framing is not optimal"
|
|
await self._say_prompt(
|
|
voice,
|
|
"retake_recommended",
|
|
f"Photo captured. I recommend a retake because {reason_txt}. "
|
|
"Say yes photo to retake, or no photo to keep this one.",
|
|
)
|
|
else:
|
|
state = "COMPLETE"
|
|
state_enter_ts = now
|
|
continue
|
|
|
|
if now >= (countdown_deadline + self.capture_finalize_grace_sec):
|
|
record_error(
|
|
"autonomous_manager",
|
|
"countdown_capture_timeout",
|
|
context={"session_id": self.session_id, "grace_sec": self.capture_finalize_grace_sec},
|
|
)
|
|
sanad_logger.print_and_log("Capture timeout after countdown. Resetting session.", "error")
|
|
state = "IDLE"
|
|
state_enter_ts = now
|
|
self.cooldown_until = now + self.session_cooldown_sec
|
|
self._set_interaction_active(False, voice=voice, reason="capture timeout")
|
|
self._cancel_capture_pipeline(reason="capture timeout")
|
|
self._unlock_target()
|
|
self._reset_current_person()
|
|
continue
|
|
|
|
elif state == "RETAKE_CONFIRM":
|
|
rem = max(0.0, retake_deadline - now)
|
|
self._write_runtime_state(
|
|
"RETAKE_CONFIRM",
|
|
snap,
|
|
voice=voice,
|
|
extras={
|
|
"retake_prompt_enabled": bool(self.retake_prompt_enabled),
|
|
"retake_recommended": bool(retake_recommended),
|
|
"retake_reason": str(retake_reason or ""),
|
|
"retake_score": float(retake_score),
|
|
"retake_count": int(retake_count),
|
|
"retake_limit": int(self.retake_limit),
|
|
"retake_timeout_remaining": rem,
|
|
"framing_metrics": framing_metrics,
|
|
**self._current_person_extras(),
|
|
},
|
|
)
|
|
if self._consume_no_photo_flag():
|
|
state = "COMPLETE"
|
|
state_enter_ts = now
|
|
continue
|
|
if self._consume_request_photo_flag():
|
|
if int(retake_count) < int(self.retake_limit):
|
|
retake_count += 1
|
|
await self._say_prompt(
|
|
voice,
|
|
"retake_yes",
|
|
"Great, let us retake. Hold your pose.",
|
|
)
|
|
state = "FRAMING"
|
|
state_enter_ts = now
|
|
framing_deadline = now + self.framing_timeout_sec
|
|
framing_last_feedback_ts = 0.0
|
|
framing_good_streak = 0
|
|
else:
|
|
await self._say_prompt(
|
|
voice,
|
|
"retake_limit",
|
|
"Retake limit reached. Keeping the current photo.",
|
|
)
|
|
state = "COMPLETE"
|
|
state_enter_ts = now
|
|
continue
|
|
if now >= retake_deadline:
|
|
state = "COMPLETE"
|
|
state_enter_ts = now
|
|
continue
|
|
|
|
elif state == "COMPLETE":
|
|
self._write_runtime_state(
|
|
"COMPLETE",
|
|
snap,
|
|
voice=voice,
|
|
extras={
|
|
"capture_result": self._capture_result,
|
|
"retake_prompt_enabled": bool(self.retake_prompt_enabled),
|
|
"retake_recommended": bool(retake_recommended),
|
|
"retake_reason": str(retake_reason or ""),
|
|
"retake_score": float(retake_score),
|
|
"retake_count": int(retake_count),
|
|
"retake_limit": int(self.retake_limit),
|
|
"framing_metrics": framing_metrics,
|
|
**self._current_person_extras(),
|
|
},
|
|
)
|
|
await self._say_capture_prompt(
|
|
voice,
|
|
"photo_saved_thanks",
|
|
"Thank you. Photo saved. Don't forget to check your photos.",
|
|
)
|
|
sanad_logger.print_and_log(f"✅ Session {self.session_id} complete.", "info")
|
|
await asyncio.sleep(1.0)
|
|
state = "IDLE"
|
|
state_enter_ts = now
|
|
self.cooldown_until = time.time() + self.session_cooldown_sec
|
|
self._clear_confirmation_flags()
|
|
self._set_interaction_active(False, voice=voice, reason="session complete")
|
|
self._cancel_capture_pipeline(reason="session complete")
|
|
self._unlock_target()
|
|
self._reset_current_person()
|
|
continue
|
|
|
|
if (time.time() - state_enter_ts) > 120.0:
|
|
sanad_logger.print_and_log("Autonomous state timed out, forcing IDLE reset.", "warning")
|
|
state = "IDLE"
|
|
state_enter_ts = time.time()
|
|
self.cooldown_until = time.time() + self.session_cooldown_sec
|
|
self._clear_confirmation_flags()
|
|
self._set_interaction_active(False, voice=voice, reason="state watchdog reset")
|
|
self._cancel_capture_pipeline(reason="state watchdog reset")
|
|
self._unlock_target()
|
|
self._reset_current_person()
|
|
|
|
finally:
|
|
self._set_interaction_active(False, voice=voice, reason="autonomous stop")
|
|
self._cancel_capture_pipeline(reason="autonomous stop")
|
|
self._unlock_target()
|
|
self._reset_current_person()
|
|
self.detector.stop()
|
|
def stop(self):
|
|
self._running = False
|
|
self.detector.stop()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
async def main():
|
|
am = AutonomousManager()
|
|
|
|
class Stub:
|
|
pass
|
|
|
|
await am.run(Stub(), Stub(), Stub(), None)
|
|
|
|
try:
|
|
asyncio.run(main())
|
|
except KeyboardInterrupt:
|
|
pass
|