AI_Photographer/Modes/AI/autonomous_manager.py
2026-04-12 18:52:37 +04:00

1458 lines
69 KiB
Python

import asyncio
import json
import os
import threading
import time
from pathlib import Path
from threading import Thread
import cv2
from Modes.AI.vision_detector import VisionDetector
from Core import settings as config
from Core.error_events import record_error
from Core.Logger import Logs
from Core import people_registry
from Server.capture_service import capture_with_replay_sync, replay_timing_profile
sanad_logger = Logs()
sanad_logger.LogEngine("G1_Logs", "autonomous_manager")
class AutonomousManager:
"""
Autonomous flow:
IDLE -> WAIT_CONFIRM -> FRAMING -> COUNTDOWN -> COMPLETE -> IDLE
"""
def __init__(
self,
zmq_host: str = "127.0.0.1",
zmq_port: int = 55555,
stability_frames: int = 3,
poll_hz: int = 8,
video_source: str | None = None,
):
self.detector = VisionDetector(
zmq_host=zmq_host,
zmq_port=zmq_port,
poll_hz=poll_hz,
video_source=video_source,
)
self.stability_frames = int(stability_frames)
self.cooldown_until = 0.0
self.session_id = 0
self._running = False
self._capture_done = False
self._capture_result = None
self._capture_lock = threading.Lock()
self._capture_cancel_event: threading.Event | None = None
self.interaction_active = False
self.interaction_flag = Path(config.SCRIPTS_DIR) / "interaction_triggered.flag"
self.request_photo_flag = Path(config.SCRIPTS_DIR) / "request_photo.flag"
self.confirm_yes_flag = Path(config.SCRIPTS_DIR) / "confirm_yes.flag"
self.confirm_no_flag = Path(config.SCRIPTS_DIR) / "confirm_no.flag"
self.state_file = Path(config.AUTONOMOUS_STATE_FILE)
self.confirm_timeout_sec = float(os.environ.get("CONFIRM_TIMEOUT_SEC", "15.0"))
self.confirm_reminder_sec = float(os.environ.get("CONFIRM_REMINDER_SEC", "5.0"))
self.confirm_guard_sec = float(os.environ.get("CONFIRM_GUARD_SEC", "1.2"))
self.session_cooldown_sec = float(os.environ.get("SESSION_COOLDOWN_SEC", "10.0"))
self.leave_timeout_sec = float(os.environ.get("VISITOR_LEAVE_TIMEOUT_SEC", "2.5"))
self.countdown_lose_subject_sec = float(os.environ.get("COUNTDOWN_LOSE_SUBJECT_SEC", "1.4"))
self.capture_finalize_grace_sec = float(os.environ.get("CAPTURE_FINALIZE_GRACE_SEC", "3.0"))
self.framing_timeout_sec = float(os.environ.get("FRAMING_TIMEOUT_SEC", "20.0"))
self.framing_feedback_interval_sec = float(os.environ.get("FRAMING_FEEDBACK_INTERVAL_SEC", "2.0"))
self.framing_good_frames_required = int(os.environ.get("FRAMING_GOOD_FRAMES_REQUIRED", "2"))
# Framing thresholds
self.center_tolerance = float(os.environ.get("FRAMING_CENTER_TOLERANCE", "0.18"))
self.subject_min_area_ratio = float(os.environ.get("FRAMING_MIN_AREA_RATIO", "0.06"))
self.subject_max_area_ratio = float(os.environ.get("FRAMING_MAX_AREA_RATIO", "0.55"))
self.min_blur_var = float(os.environ.get("FRAMING_MIN_BLUR_VAR", "80.0"))
self.min_exposure = float(os.environ.get("FRAMING_MIN_EXPOSURE", "55.0"))
self.max_exposure = float(os.environ.get("FRAMING_MAX_EXPOSURE", "200.0"))
self.headroom_min_ratio = float(config.read_vision_framing_headroom_min_ratio())
self.headroom_max_ratio = float(config.read_vision_framing_headroom_max_ratio())
self.eye_line_min_ratio = float(config.read_vision_framing_eye_line_min_ratio())
self.eye_line_max_ratio = float(config.read_vision_framing_eye_line_max_ratio())
self.retake_score_threshold = float(config.read_vision_framing_retake_score_threshold())
self.retake_prompt_enabled = bool(config.read_vision_retake_prompt_enabled())
self.retake_limit = int(config.read_vision_retake_max_per_session())
self.hard_target_lock_enabled = bool(config.read_vision_hard_target_lock_enabled())
self.autonomous_greeting_replay_enabled = bool(config.read_vision_autonomous_greeting_replay_enabled())
self.autonomous_greeting_replay_file = self._resolve_replay_path(
config.read_vision_autonomous_greeting_replay_file()
)
self.autonomous_capture_replay_enabled = bool(config.read_vision_autonomous_capture_replay_enabled())
self.retake_confirm_timeout_sec = float(os.environ.get("RETAKE_CONFIRM_TIMEOUT_SEC", "8.0"))
self.yolo_strict_required = bool(config.read_vision_yolo_strict_required())
self.gemini_context_hz = float(config.read_vision_gemini_context_hz())
self.gemini_context_silent = bool(config.read_vision_gemini_context_silent())
self._context_interval_sec = 1.0 / max(0.5, self.gemini_context_hz)
self._next_context_ts = 0.0
self.ai_blocked = False
self.ai_block_reason = ""
self._last_vision_log_signature = None
self._last_vision_log_ts = 0.0
self.face_recognition_enabled = bool(config.read_vision_face_recognition_enabled())
self.face_recognition_threshold = float(config.read_vision_face_recognition_threshold())
self.current_person: dict | None = None
async def _say(self, voice, text: str):
if voice is None:
return
try:
ok = await voice.send_text_prompt_live(text)
if not ok:
sanad_logger.print_and_log("Voice prompt skipped: Gemini WS not connected.", "warning")
except Exception as e:
sanad_logger.print_and_log(f"Voice prompt failed: {e}", "warning")
async def _say_prompt(
self,
voice,
prompt_key: str,
fallback_text: str,
*,
mode_override: str | None = None,
allow_gemini_fallback: bool | None = None,
):
if voice is None:
return
try:
if hasattr(voice, "play_prompt_key"):
ok = await voice.play_prompt_key(
prompt_key,
fallback_text=fallback_text,
allow_gemini_fallback=allow_gemini_fallback,
mode_override=mode_override,
)
else:
ok = await voice.send_text_prompt_live(fallback_text)
if not ok:
sanad_logger.print_and_log(f"Voice prompt skipped for {prompt_key}: output unavailable.", "warning")
except Exception as e:
sanad_logger.print_and_log(f"Voice prompt failed for {prompt_key}: {e}", "warning")
async def _say_capture_prompt(self, voice, prompt_key: str, fallback_text: str):
await self._say_prompt(
voice,
prompt_key,
fallback_text,
mode_override="audio",
allow_gemini_fallback=False,
)
def _maybe_log_vision_snapshot(self, snapshot: dict, now_ts: float):
try:
person_count = int(snapshot.get("person_count", 0))
face_count = int(snapshot.get("face_count", 0))
group_detected = bool(snapshot.get("group_detected", False))
group_size = int(snapshot.get("group_size", 0))
subject_visible = bool(snapshot.get("subject_visible", False))
intent_detected = bool(snapshot.get("intent_detected", False))
max_area = int(float(snapshot.get("max_area", 0.0) or 0.0))
depth_m = snapshot.get("depth_m", None)
if depth_m is not None:
depth_m = round(float(depth_m), 2)
sig = (
person_count,
face_count,
group_detected,
group_size,
subject_visible,
intent_detected,
max_area,
depth_m,
)
active = bool(person_count or face_count or subject_visible or group_detected or intent_detected)
if (not active) and self._last_vision_log_signature in (None, sig):
return
if sig == self._last_vision_log_signature and (now_ts - self._last_vision_log_ts) < 1.0:
return
self._last_vision_log_signature = sig
self._last_vision_log_ts = now_ts
sanad_logger.print_and_log(
"👁️ Vision: "
f"people={person_count} faces={face_count} group={group_detected}({group_size}) "
f"visible={subject_visible} intent={intent_detected} area={max_area} depth={depth_m if depth_m is not None else '-'}",
"info",
)
except Exception as e:
record_error("autonomous_manager", "vision_snapshot_log", e)
def _set_interaction_active(self, active: bool, voice=None, reason: str = ""):
active = bool(active)
self.interaction_active = active
try:
self.interaction_flag.parent.mkdir(parents=True, exist_ok=True)
if active:
self.interaction_flag.write_text(f"{time.time():.3f} {reason}".strip(), encoding="utf-8")
elif self.interaction_flag.exists():
self.interaction_flag.unlink()
except Exception as e:
record_error("autonomous_manager", "set_interaction_active_file", e, {"active": bool(active)})
if voice is not None and hasattr(voice, "set_audio_gate"):
try:
idle_voice_listen_enabled = bool(config.read_vision_idle_voice_listen_enabled())
mic_enabled = bool(config.read_gemini_mic_enabled())
runtime_mode = str(config.read_runtime_mode()).strip().lower()
if runtime_mode not in ("manual", "ai"):
runtime_mode = "manual"
if active:
if hasattr(voice, "set_passive_listen"):
voice.set_passive_listen(False, reason=reason or "interaction active")
voice.set_audio_gate(True, reason=reason)
else:
if runtime_mode != "ai":
if hasattr(voice, "set_passive_listen"):
voice.set_passive_listen(False, reason=reason or "manual mode")
voice.set_audio_gate(mic_enabled, reason=reason or "manual mode")
else:
if hasattr(voice, "set_passive_listen"):
voice.set_passive_listen(mic_enabled and idle_voice_listen_enabled, reason=reason or "idle")
voice.set_audio_gate(mic_enabled and idle_voice_listen_enabled, reason=reason or "idle")
except Exception as e:
record_error("autonomous_manager", "set_interaction_active_audio_gate", e, {"active": bool(active)})
@staticmethod
def _clear_flag(path: Path):
try:
if path.exists():
path.unlink()
except Exception as e:
record_error("autonomous_manager", "clear_flag", e, {"path": str(path)})
@staticmethod
def _consume_flag(path: Path) -> bool:
try:
if path.exists():
path.unlink()
return True
except Exception as e:
record_error("autonomous_manager", "consume_flag", e, {"path": str(path)})
return False
def _consume_request_photo_flag(self) -> bool:
return self._consume_flag(self.request_photo_flag) or self._consume_flag(self.confirm_yes_flag)
def _consume_direct_request_flag(self) -> bool:
return self._consume_flag(self.request_photo_flag)
def _consume_no_photo_flag(self) -> bool:
return self._consume_flag(self.confirm_no_flag)
def _clear_confirmation_flags(self):
self._clear_flag(self.request_photo_flag)
self._clear_flag(self.confirm_yes_flag)
self._clear_flag(self.confirm_no_flag)
def _cancel_capture_pipeline(self, reason: str = ""):
try:
ev = self._capture_cancel_event
if ev is not None:
ev.set()
self._capture_cancel_event = None
if reason:
sanad_logger.print_and_log(f"Capture pipeline cancelled: {reason}", "warning")
except Exception as e:
record_error("autonomous_manager", "cancel_capture_pipeline", e, {"reason": reason})
@staticmethod
def _resolve_replay_path(path_value: str) -> Path:
return config.resolve_replay_path(path_value)
def _start_greeting_replay(self, replay):
if replay is None or not bool(self.autonomous_greeting_replay_enabled):
return
replay_file = Path(self.autonomous_greeting_replay_file).resolve()
if not replay_file.exists():
sanad_logger.print_and_log(f"⚠️ Greeting replay missing: {replay_file}", "warning")
return
if bool(getattr(replay, "is_playing", False)):
sanad_logger.print_and_log("⚠️ Greeting replay skipped: replay already busy.", "warning")
return
def _run_greeting():
try:
sanad_logger.print_and_log(f"👋 Greeting replay: {replay_file.name}", "info")
replay.run(replay_file, config.HOME_FILE, 1.0)
except Exception as e:
record_error("autonomous_manager", "greeting_replay", e, {"replay_file": str(replay_file)})
Thread(target=_run_greeting, daemon=True).start()
def _reset_current_person(self):
self.current_person = None
def _session_person_label(self) -> str:
if not isinstance(self.current_person, dict):
return ""
return str(
self.current_person.get("display_label")
or self.current_person.get("display_name")
or self.current_person.get("person_id")
or ""
).strip()
def _current_person_extras(self) -> dict:
person = self.current_person if isinstance(self.current_person, dict) else {}
return {
"recognized_person_id": str(person.get("person_id") or ""),
"recognized_person_known": bool(person.get("known_person", False)),
"recognized_person_new": bool(person.get("new_person", False)),
"recognized_person_label": str(
person.get("display_label") or person.get("display_name") or person.get("person_id") or ""
),
"recognized_person_match_score": float(person.get("match_score", 0.0) or 0.0),
"recognized_person_created_date": str(person.get("created_date") or ""),
}
def _select_face_box(self, snapshot: dict) -> dict | None:
faces = snapshot.get("face_boxes") or []
if not isinstance(faces, list) or not faces:
return None
subject_box = self._find_subject_box(snapshot)
if not isinstance(subject_box, dict):
try:
return max(faces, key=lambda f: float(f.get("w", 0.0)) * float(f.get("h", 0.0)))
except Exception:
return faces[0] if faces else None
try:
sx1 = float(subject_box.get("x", 0.0))
sy1 = float(subject_box.get("y", 0.0))
sx2 = sx1 + max(1.0, float(subject_box.get("w", 1.0)))
sy2 = sy1 + max(1.0, float(subject_box.get("h", 1.0)))
except Exception:
sx1 = sy1 = 0.0
sx2 = sy2 = 0.0
best = None
best_overlap = -1.0
for face in faces:
try:
fx1 = float(face.get("x", 0.0))
fy1 = float(face.get("y", 0.0))
fx2 = fx1 + max(1.0, float(face.get("w", 1.0)))
fy2 = fy1 + max(1.0, float(face.get("h", 1.0)))
ix1 = max(sx1, fx1)
iy1 = max(sy1, fy1)
ix2 = min(sx2, fx2)
iy2 = min(sy2, fy2)
overlap = max(0.0, ix2 - ix1) * max(0.0, iy2 - iy1)
if overlap > best_overlap:
best_overlap = overlap
best = face
except Exception:
continue
if best is not None:
return best
try:
return max(faces, key=lambda f: float(f.get("w", 0.0)) * float(f.get("h", 0.0)))
except Exception:
return faces[0] if faces else None
def _identify_person_for_session(self, snapshot: dict, source: str = "vision") -> dict | None:
if not bool(self.face_recognition_enabled):
self._reset_current_person()
return None
frame = snapshot.get("frame")
if frame is None:
self._reset_current_person()
return None
if bool(snapshot.get("group_detected", False)) or int(snapshot.get("face_count", 0) or 0) > 1:
self._reset_current_person()
return None
face_box = self._select_face_box(snapshot)
if face_box is None:
self._reset_current_person()
return None
subject_box = self._find_subject_box(snapshot)
try:
result = people_registry.recognize_or_enroll(
frame,
face_box,
subject_box=subject_box,
threshold=self.face_recognition_threshold,
source=source,
)
except Exception as e:
record_error("autonomous_manager", "identify_person_for_session", e)
self._reset_current_person()
return None
if not isinstance(result, dict) or not bool(result.get("ok", False)):
self._reset_current_person()
return None
self.current_person = result
label = self._session_person_label()
if bool(result.get("known_person", False)):
sanad_logger.print_and_log(
f"🧑 Returning guest recognized: {label} (score={float(result.get('match_score', 0.0) or 0.0):.2f})",
"info",
)
else:
sanad_logger.print_and_log(f"🧑 New guest enrolled: {label}", "info")
return result
def _welcome_prompt_text(self, group_detected: bool) -> str:
if group_detected:
return (
"Hello everyone, welcome. We will take a photo together. "
"Would your group like a photo? Please say yes photo or no photo."
)
label = self._session_person_label()
if label and bool(self.current_person and self.current_person.get("known_person")):
return (
f"Welcome back, {label}. Would you like another photo? "
"Please say yes photo or no photo."
)
return (
"Hello, welcome. We will take a photo together. "
"Would you like a photo? Please say yes photo or no photo."
)
def _welcome_prompt_key(self, group_detected: bool) -> str:
if group_detected:
return "welcome_group"
if self._session_person_label() and bool(self.current_person and self.current_person.get("known_person")):
return "welcome_returning"
return "welcome_single"
def _framing_prompt_text(self, group_detected: bool) -> str:
if group_detected:
return "Great. Please stand with me in front of the camera, stay together in the center, and look at the camera."
return "Great. Please stand with me in front of the camera, stay in the center, and look at the camera."
@staticmethod
def _framing_prompt_key(group_detected: bool) -> str:
return "frame_group" if group_detected else "frame_single"
@staticmethod
def _find_subject_box(snapshot: dict) -> dict | None:
subj = snapshot.get("subject_box")
if isinstance(subj, dict):
return subj
boxes = snapshot.get("boxes") or []
if not boxes:
return None
try:
return max(boxes, key=lambda b: float(b.get("w", 0.0)) * float(b.get("h", 0.0)))
except Exception:
return boxes[0] if boxes else None
def _evaluate_framing_quality(self, snapshot: dict) -> tuple[bool, list[str], dict]:
frame = snapshot.get("frame")
box = self._find_subject_box(snapshot)
if frame is None or box is None:
return False, ["step into view"], {"reason": "no_frame_or_subject"}
try:
h, w = frame.shape[:2]
x = float(box.get("x", 0.0))
y = float(box.get("y", 0.0))
bw = max(1.0, float(box.get("w", 1.0)))
bh = max(1.0, float(box.get("h", 1.0)))
area_ratio = (bw * bh) / max(1.0, float(w * h))
cx = x + (bw / 2.0)
dx = (cx - (w / 2.0)) / max(1.0, float(w))
centered = abs(dx) <= self.center_tolerance
size_ok = self.subject_min_area_ratio <= area_ratio <= self.subject_max_area_ratio
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
blur_var = float(cv2.Laplacian(gray, cv2.CV_64F).var())
exposure = float(gray.mean())
blur_ok = blur_var >= self.min_blur_var
exposure_ok = self.min_exposure <= exposure <= self.max_exposure
faces = snapshot.get("face_boxes") or []
face_box = None
best_overlap = -1.0
for f in faces:
try:
fx = float(f.get("x", 0.0))
fy = float(f.get("y", 0.0))
fw = max(1.0, float(f.get("w", 1.0)))
fh = max(1.0, float(f.get("h", 1.0)))
ix1 = max(x, fx)
iy1 = max(y, fy)
ix2 = min(x + bw, fx + fw)
iy2 = min(y + bh, fy + fh)
iw = max(0.0, ix2 - ix1)
ih = max(0.0, iy2 - iy1)
overlap = iw * ih
if overlap > best_overlap:
best_overlap = overlap
face_box = {"x": fx, "y": fy, "w": fw, "h": fh}
except Exception:
continue
if face_box is not None:
headroom_ratio = float(face_box["y"]) / max(1.0, float(h))
eye_y = float(face_box["y"]) + (0.38 * float(face_box["h"]))
eye_line_ratio = eye_y / max(1.0, float(h))
else:
headroom_ratio = y / max(1.0, float(h))
eye_line_ratio = (y + (0.25 * bh)) / max(1.0, float(h))
headroom_ok = self.headroom_min_ratio <= headroom_ratio <= self.headroom_max_ratio
eye_line_ok = self.eye_line_min_ratio <= eye_line_ratio <= self.eye_line_max_ratio
mid = int(max(1, w // 2))
left_mean = float(gray[:, :mid].mean()) if mid > 0 else exposure
right_mean = float(gray[:, mid:].mean()) if (w - mid) > 0 else exposure
lr_delta = right_mean - left_mean
reasons: list[str] = []
if not centered:
reasons.append("move a bit to the center")
if not size_ok:
reasons.append("come a little closer" if area_ratio < self.subject_min_area_ratio else "step slightly back")
if not headroom_ok:
if headroom_ratio < self.headroom_min_ratio:
reasons.append("lower your chin a little")
else:
reasons.append("raise your chin a little")
if not eye_line_ok:
reasons.append("keep your eyes around the middle of the frame")
if not blur_ok:
reasons.append("hold still for a second")
if not exposure_ok:
if exposure < self.min_exposure:
if abs(lr_delta) > 12.0:
reasons.append(
"turn slightly toward the brighter side"
)
else:
reasons.append("face the light")
else:
reasons.append("avoid strong direct light")
metrics = {
"area_ratio": area_ratio,
"blur_var": blur_var,
"exposure": exposure,
"center_dx": dx,
"centered": centered,
"size_ok": size_ok,
"blur_ok": blur_ok,
"exposure_ok": exposure_ok,
"headroom_ratio": headroom_ratio,
"eye_line_ratio": eye_line_ratio,
"headroom_ok": headroom_ok,
"eye_line_ok": eye_line_ok,
"left_exposure": left_mean,
"right_exposure": right_mean,
"lr_exposure_delta": lr_delta,
}
return len(reasons) == 0, reasons, metrics
except Exception as e:
return False, ["hold still and face the camera"], {"reason": str(e)}
@staticmethod
def _framing_guidance_text(reasons: list[str]) -> str:
if not reasons:
return "Great framing. Hold still."
uniq = []
for r in reasons:
if r not in uniq:
uniq.append(r)
joined = ", and ".join(uniq[:2])
return f"Almost ready. Please {joined}."
@staticmethod
def _quality_score_from_metrics(metrics: dict) -> float:
checks = [
bool(metrics.get("centered", False)),
bool(metrics.get("size_ok", False)),
bool(metrics.get("blur_ok", False)),
bool(metrics.get("exposure_ok", False)),
bool(metrics.get("headroom_ok", False)),
bool(metrics.get("eye_line_ok", False)),
]
if not checks:
return 0.0
return float(sum(1 for x in checks if x)) / float(len(checks))
def _retake_assessment(self, snapshot: dict, previous_metrics: dict | None = None) -> tuple[bool, str, float, dict]:
good, reasons, metrics = self._evaluate_framing_quality(snapshot)
if previous_metrics and isinstance(previous_metrics, dict):
merged = dict(previous_metrics)
merged.update(metrics)
metrics = merged
score = self._quality_score_from_metrics(metrics)
if good and score >= self.retake_score_threshold:
return False, "", score, metrics
if reasons:
reason = reasons[0]
else:
reason = "framing quality is not optimal"
return True, str(reason), score, metrics
def _lock_target_on_session_start(self, snapshot: dict):
try:
if hasattr(self.detector, "set_hard_lock"):
self.detector.set_hard_lock(bool(self.hard_target_lock_enabled))
if hasattr(self.detector, "lock_target_from_snapshot"):
self.detector.lock_target_from_snapshot(
snapshot,
lock_group=bool(self.hard_target_lock_enabled and snapshot.get("group_detected", False)),
)
elif hasattr(self.detector, "lock_subject_from_snapshot"):
self.detector.lock_subject_from_snapshot(snapshot)
except Exception as e:
record_error("autonomous_manager", "lock_target_on_session_start", e)
def _unlock_target(self):
try:
if hasattr(self.detector, "unlock_target"):
self.detector.unlock_target()
elif hasattr(self.detector, "unlock_subject"):
self.detector.unlock_subject()
except Exception as e:
record_error("autonomous_manager", "unlock_target", e)
def _detector_readiness(self) -> dict:
try:
return self.detector.readiness(strict_required=self.yolo_strict_required)
except Exception as e:
record_error("autonomous_manager", "detector_readiness", e)
return {
"ok": False,
"strict_required": bool(self.yolo_strict_required),
"configured_backend": "normal",
"effective_backend": "normal",
"yolo_loaded": False,
"person_model_ok": False,
"face_model_ok": False,
"person_model_path": "",
"face_model_path": "",
"person_model_error": str(e),
"face_model_error": "",
"block_reason": f"Detector readiness failed: {e}",
}
async def _push_vision_context(self, voice, state_name: str, snapshot: dict):
if voice is None:
return
if not hasattr(voice, "send_vision_context_live"):
return
now = time.time()
if now < self._next_context_ts:
return
self._next_context_ts = now + self._context_interval_sec
payload = {
"person_count": int(snapshot.get("person_count", 0)),
"group_count": int(snapshot.get("group_count", 0)),
"group_size": int(snapshot.get("group_size", 0)),
"subject_visible": bool(snapshot.get("subject_visible", False)),
"depth_m": snapshot.get("depth_m"),
"approach_speed_mps": float(snapshot.get("approach_speed_mps", 0.0) or 0.0),
"state": str(state_name),
"intent_detected": bool(snapshot.get("intent_detected", False)),
}
try:
await voice.send_vision_context_live(payload, silent=bool(self.gemini_context_silent))
except Exception as e:
record_error("autonomous_manager", "push_vision_context", e, {"state": state_name})
def _write_runtime_state(self, state_name: str, snapshot: dict, voice=None, extras: dict | None = None):
payload = {
"state": state_name,
"session_id": self.session_id,
"interaction_active": bool(self.interaction_active),
"intent_detected": bool(snapshot.get("intent_detected", False)),
"detector_backend": str(snapshot.get("detector_backend", "normal")),
"yolo_runtime": str(snapshot.get("yolo_runtime", "")),
"ai_blocked": bool(self.ai_blocked),
"ai_block_reason": str(self.ai_block_reason or ""),
"person_count": int(snapshot.get("person_count", 0)),
"face_count": int(snapshot.get("face_count", 0)),
"group_count": int(snapshot.get("group_count", 0)),
"group_size": int(snapshot.get("group_size", 0)),
"group_detected": bool(snapshot.get("group_detected", False)),
"is_close": bool(snapshot.get("is_close", False)),
"is_approaching": bool(snapshot.get("is_approaching", False)),
"max_area": float(snapshot.get("max_area", 0.0)),
"depth_m": snapshot.get("depth_m"),
"approach_speed_mps": snapshot.get("approach_speed_mps"),
"subject_id": snapshot.get("subject_id"),
"subject_visible": bool(snapshot.get("subject_visible", False)),
"target_lock_active": bool(snapshot.get("target_lock_active", False)),
"target_lock_type": str(snapshot.get("target_lock_type", "") or ""),
"target_lock_id": snapshot.get("target_lock_id"),
"target_switch_blocked_count": int(snapshot.get("target_switch_blocked_count", 0)),
"camera_ok": bool(snapshot.get("camera_ok", False)),
"depth_ok": bool(snapshot.get("depth_ok", False)),
"camera_restarts": int(snapshot.get("camera_restarts", 0)),
"depth_restarts": int(snapshot.get("depth_restarts", 0)),
"audio_gate_open": bool(getattr(voice, "audio_gate_open", False)) if voice is not None else None,
"ws_connected": bool(getattr(voice, "is_ws_connected", lambda: False)()) if voice is not None else False,
"mic_state": "",
"speaker_state": "",
"cooldown_remaining": max(0.0, self.cooldown_until - time.time()),
"time": time.time(),
}
try:
if voice is not None and hasattr(voice, "get_runtime_health"):
vh = voice.get_runtime_health() or {}
payload["ws_connected"] = bool(vh.get("ws_connected", payload["ws_connected"]))
payload["mic_state"] = str(vh.get("mic_state", "") or "")
payload["speaker_state"] = str(vh.get("speaker_state", "") or "")
except Exception as e:
record_error("autonomous_manager", "write_runtime_state_voice_health", e)
if extras:
payload.update(extras)
try:
self.state_file.parent.mkdir(parents=True, exist_ok=True)
self.state_file.write_text(json.dumps(payload, ensure_ascii=False), encoding="utf-8")
except Exception as e:
record_error("autonomous_manager", "write_runtime_state", e, {"state": state_name})
def _start_capture_pipeline(self, replay, timing_info: dict | None = None):
self._cancel_capture_pipeline(reason="restart")
with self._capture_lock:
self._capture_done = False
self._capture_result = None
timing_info = timing_info or self._resolve_capture_timing(use_replay=bool(self.autonomous_capture_replay_enabled))
default_delay_sec = max(0.0, min(config.PHOTO_DELAY_SEC, config.PHOTO_TOTAL_SEC))
delay_sec = float(timing_info.get("capture_offset_sec") or default_delay_sec)
cancel_event = threading.Event()
self._capture_cancel_event = cancel_event
def _run_pipeline():
try:
base_prefix = os.environ.get("PHOTO_PREFIX", "photo")
prefix = f"session{self.session_id}_{base_prefix}"
run_delay_sec = delay_sec
capture_replay_runner = replay if bool(self.autonomous_capture_replay_enabled) else None
if capture_replay_runner is not None:
replay_wait_deadline = time.time() + 4.0
while (
bool(getattr(capture_replay_runner, "is_playing", False))
and time.time() < replay_wait_deadline
and not cancel_event.is_set()
):
time.sleep(0.05)
if bool(getattr(capture_replay_runner, "is_playing", False)):
sanad_logger.print_and_log(
"⚠️ AI capture replay still busy; falling back to still photo for this shot.",
"warning",
)
capture_replay_runner = None
run_delay_sec = default_delay_sec
if capture_replay_runner is None:
sanad_logger.print_and_log("📷 AI capture: still photo mode (no replay during photo).", "info")
else:
sanad_logger.print_and_log(
f"📷 AI capture: replaying {Path(config.REPLAY_FILE).name} during the shot. "
f"duration={float(timing_info.get('duration_sec') or 0.0):.3f}s "
f"shot_at={run_delay_sec:.3f}s "
f"source={timing_info.get('capture_source', 'config_fallback')}",
"info",
)
res = capture_with_replay_sync(
replay_runner=capture_replay_runner,
replay_file=config.REPLAY_FILE,
home_file=config.HOME_FILE,
delay_sec=run_delay_sec,
prefix=prefix,
speed=1.0,
cancel_event=cancel_event,
)
with self._capture_lock:
self._capture_result = str(res)
self._capture_done = True
if str(res).startswith("[ERR]"):
sanad_logger.print_and_log(f"Capture pipeline failed: {res}", "error")
else:
sanad_logger.print_and_log(f"Saved photo (pipeline): {res}", "info")
except Exception as e:
record_error("autonomous_manager", "start_capture_pipeline", e, {"session_id": self.session_id})
with self._capture_lock:
self._capture_result = f"[ERR] capture pipeline exception: {e}"
self._capture_done = True
Thread(target=_run_pipeline, daemon=True).start()
def _resolve_capture_timing(self, use_replay: bool) -> dict:
default_delay = max(0.0, min(config.PHOTO_DELAY_SEC, config.PHOTO_TOTAL_SEC))
timing_info = {
"capture_offset_sec": default_delay,
"duration_sec": 0.0,
"capture_source": "config_fallback",
}
if not use_replay:
return timing_info
try:
profile = replay_timing_profile(config.REPLAY_FILE)
if bool(profile.get("ok")):
return profile
if profile.get("capture_offset_sec") is not None:
timing_info["capture_offset_sec"] = float(profile.get("capture_offset_sec") or default_delay)
except Exception as e:
record_error("autonomous_manager", "resolve_capture_timing", e, {"replay_file": str(config.REPLAY_FILE)})
return timing_info
async def run(self, hub, replay, voice, ws=None):
self.detector.start()
self.hub = hub
self._running = True
self._set_interaction_active(False, voice=voice, reason="idle")
state = "IDLE"
state_enter_ts = time.time()
stable_count = 0
confirm_deadline = 0.0
confirm_last_prompt_ts = 0.0
confirm_ignore_until = 0.0
leave_since = 0.0
framing_deadline = 0.0
framing_last_feedback_ts = 0.0
framing_good_streak = 0
framing_metrics: dict = {}
countdown_deadline = 0.0
countdown_announced: set[int] = set()
countdown_lost_since = 0.0
retake_count = 0
retake_deadline = 0.0
retake_recommended = False
retake_reason = ""
retake_score = 1.0
sanad_logger.print_and_log("🤖 Autonomous mode enabled.", "info")
try:
while self._running:
await asyncio.sleep(1.0 / max(1, self.detector.poll_hz))
now = time.time()
runtime_mode = "manual"
try:
runtime_mode = str(config.read_runtime_mode()).strip().lower()
except Exception:
runtime_mode = "manual"
if runtime_mode not in ("manual", "ai"):
runtime_mode = "manual"
# Runtime-refresh operator toggles from config.json.
try:
self.hard_target_lock_enabled = bool(config.read_vision_hard_target_lock_enabled())
self.retake_prompt_enabled = bool(config.read_vision_retake_prompt_enabled())
self.retake_limit = int(config.read_vision_retake_max_per_session())
self.retake_score_threshold = float(config.read_vision_framing_retake_score_threshold())
self.autonomous_greeting_replay_enabled = bool(config.read_vision_autonomous_greeting_replay_enabled())
self.autonomous_greeting_replay_file = self._resolve_replay_path(
config.read_vision_autonomous_greeting_replay_file()
)
self.autonomous_capture_replay_enabled = bool(config.read_vision_autonomous_capture_replay_enabled())
self.face_recognition_enabled = bool(config.read_vision_face_recognition_enabled())
self.face_recognition_threshold = float(config.read_vision_face_recognition_threshold())
except Exception as e:
record_error("autonomous_manager", "runtime_option_refresh", e)
if hasattr(self.detector, "set_hard_lock"):
try:
self.detector.set_hard_lock(bool(self.hard_target_lock_enabled))
except Exception as e:
record_error("autonomous_manager", "detector_set_hard_lock", e)
snap = self.detector.latest()
face_count = int(snap.get("face_count", 0))
subject_visible = bool(snap.get("subject_visible", face_count > 0))
intent_detected = bool(snap.get("intent_detected", False))
max_area = float(snap.get("max_area", 0.0))
group_detected = bool(snap.get("group_detected", False))
self._maybe_log_vision_snapshot(snap, now)
if runtime_mode != "ai":
stable_count = 0
if state != "IDLE":
sanad_logger.print_and_log("🛑 Autonomous flow paused: runtime mode is MANUAL.", "info")
state = "IDLE"
state_enter_ts = now
self._clear_confirmation_flags()
self._set_interaction_active(False, voice=voice, reason="manual mode")
self._cancel_capture_pipeline(reason="manual mode")
self._unlock_target()
self._reset_current_person()
self._write_runtime_state(
"IDLE",
snap,
voice=voice,
extras={
"stable_count": 0,
"runtime_mode": runtime_mode,
"autonomous_paused": True,
"retake_prompt_enabled": bool(self.retake_prompt_enabled),
"retake_limit": int(self.retake_limit),
"retake_count": int(retake_count),
**self._current_person_extras(),
},
)
continue
readiness = self._detector_readiness()
if self.yolo_strict_required and (not bool(readiness.get("ok", False))):
block_reason = str(readiness.get("block_reason") or "AI blocked by strict YOLO policy.")
if (not self.ai_blocked) or (self.ai_block_reason != block_reason):
sanad_logger.print_and_log(f"⛔ AI blocked: {block_reason}", "error")
self._set_interaction_active(False, voice=voice, reason="strict yolo blocked")
self._clear_confirmation_flags()
self._cancel_capture_pipeline(reason="strict yolo blocked")
self._unlock_target()
self._reset_current_person()
self.ai_blocked = True
self.ai_block_reason = block_reason
state = "IDLE_BLOCKED"
stable_count = 0
self._write_runtime_state(
"IDLE_BLOCKED",
snap,
voice=voice,
extras={
"yolo_loaded": bool(readiness.get("yolo_loaded", False)),
"person_model_ok": bool(readiness.get("person_model_ok", False)),
"face_model_ok": bool(readiness.get("face_model_ok", False)),
"detector_backend": str(readiness.get("effective_backend", "normal")),
"retake_prompt_enabled": bool(self.retake_prompt_enabled),
"retake_limit": int(self.retake_limit),
"retake_count": int(retake_count),
**self._current_person_extras(),
},
)
await self._push_vision_context(voice, "IDLE_BLOCKED", snap)
continue
if self.ai_blocked:
sanad_logger.print_and_log("✅ AI readiness restored. Leaving blocked state.", "info")
self.ai_blocked = False
self.ai_block_reason = ""
state = "IDLE"
state_enter_ts = now
stable_count = 0
await self._push_vision_context(voice, state, snap)
try:
hard_cancel = False
if hub is not None:
if getattr(hub, "hard_cancel_combo", None):
hard_cancel = bool(hub.hard_cancel_combo())
elif getattr(hub, "combo_r2l1", None):
hard_cancel = bool(hub.combo_r2l1())
if hard_cancel:
sanad_logger.print_and_log("🛑 HARD CANCEL detected (R2+L1).", "warning")
state = "IDLE"
state_enter_ts = now
self.cooldown_until = now + self.session_cooldown_sec
self._clear_confirmation_flags()
self._set_interaction_active(False, voice=voice, reason="hard cancel")
self._cancel_capture_pipeline(reason="hard cancel")
self._unlock_target()
self._reset_current_person()
except Exception as e:
record_error("autonomous_manager", "remote_cancel_check", e)
if state == "IDLE":
self._write_runtime_state(
"IDLE",
snap,
voice=voice,
extras={
"stable_count": stable_count,
"retake_prompt_enabled": bool(self.retake_prompt_enabled),
"retake_limit": int(self.retake_limit),
"retake_count": int(retake_count),
**self._current_person_extras(),
},
)
# Allow visitor-initiated photo request from IDLE.
# Only explicit request_photo should start a new session from idle.
if self._consume_direct_request_flag():
self.session_id += 1
state = "FRAMING"
state_enter_ts = now
framing_deadline = now + self.framing_timeout_sec
framing_last_feedback_ts = 0.0
framing_good_streak = 0
framing_metrics = {}
retake_count = 0
retake_recommended = False
retake_reason = ""
retake_score = 1.0
self._clear_confirmation_flags()
self._set_interaction_active(True, voice=voice, reason="voice request from idle")
self._lock_target_on_session_start(snap)
self._identify_person_for_session(snap, source="voice_request")
sanad_logger.print_and_log(
f"🗣️ Voice photo request from IDLE -> session {self.session_id}",
"info",
)
await self._say_prompt(
voice,
self._framing_prompt_key(group_detected),
self._framing_prompt_text(group_detected),
)
continue
if now < self.cooldown_until:
continue
if intent_detected:
stable_count += 1
else:
stable_count = 0
if stable_count < self.stability_frames:
continue
stable_count = 0
self.session_id += 1
state = "WAIT_CONFIRM"
state_enter_ts = now
confirm_deadline = now + self.confirm_timeout_sec
confirm_ignore_until = now + self.confirm_guard_sec
confirm_last_prompt_ts = now
leave_since = 0.0
framing_metrics = {}
retake_count = 0
retake_recommended = False
retake_reason = ""
retake_score = 1.0
self._clear_confirmation_flags()
self._set_interaction_active(True, voice=voice, reason=f"intent max_area={max_area:.0f}")
self._lock_target_on_session_start(snap)
self._identify_person_for_session(snap, source="intent")
sanad_logger.print_and_log(
f"🔔 Intent detected (area={max_area:.0f}) -> session {self.session_id}",
"info",
)
self._start_greeting_replay(replay)
try:
wake_text = self._welcome_prompt_text(group_detected)
await voice.trigger_wake_sequence(
wake_text=wake_text,
prompt_key=self._welcome_prompt_key(group_detected),
)
except Exception as e:
record_error("autonomous_manager", "wake_sequence", e)
sanad_logger.print_and_log(f"Wake sequence failed: {e}", "warning")
continue
if state == "WAIT_CONFIRM":
confirm_remaining = max(0.0, confirm_deadline - now)
self._write_runtime_state(
"WAIT_CONFIRM",
snap,
voice=voice,
extras={
"confirm_timeout_remaining": confirm_remaining,
"confirm_guard_remaining": max(0.0, confirm_ignore_until - now),
"retake_prompt_enabled": bool(self.retake_prompt_enabled),
"retake_limit": int(self.retake_limit),
"retake_count": int(retake_count),
**self._current_person_extras(),
},
)
if not subject_visible:
if leave_since <= 0.0:
leave_since = now
elif (now - leave_since) >= self.leave_timeout_sec:
sanad_logger.print_and_log("🙈 Visitor left before confirmation.", "warning")
await self._say_prompt(
voice,
"visitor_left",
"No worries. I will wait here for the next visitor.",
)
state = "IDLE"
state_enter_ts = now
self.cooldown_until = now + self.session_cooldown_sec
self._clear_confirmation_flags()
self._set_interaction_active(False, voice=voice, reason="left before confirm")
self._cancel_capture_pipeline(reason="left before confirm")
self._unlock_target()
self._reset_current_person()
continue
else:
leave_since = 0.0
if now >= confirm_ignore_until:
if self._consume_no_photo_flag():
await self._say_prompt(voice, "declined", "No problem. We can do it anytime.")
state = "IDLE"
state_enter_ts = now
self.cooldown_until = now + self.session_cooldown_sec
self._clear_confirmation_flags()
self._set_interaction_active(False, voice=voice, reason="visitor declined")
self._cancel_capture_pipeline(reason="visitor declined")
self._unlock_target()
self._reset_current_person()
continue
if self._consume_request_photo_flag():
sanad_logger.print_and_log("✅ Confirmation received. Entering framing check.", "info")
await self._say_prompt(
voice,
self._framing_prompt_key(group_detected),
self._framing_prompt_text(group_detected),
)
state = "FRAMING"
state_enter_ts = now
framing_deadline = now + self.framing_timeout_sec
framing_last_feedback_ts = 0.0
framing_good_streak = 0
continue
if (now - confirm_last_prompt_ts) >= self.confirm_reminder_sec:
confirm_last_prompt_ts = now
await self._say_prompt(
voice,
"confirm_reminder",
"Say yes photo to continue, or no photo to cancel.",
)
if now >= confirm_deadline:
sanad_logger.print_and_log("⌛ Confirmation timeout.", "warning")
await self._say_prompt(
voice,
"confirm_timeout",
"No problem. I will wait here. Come back anytime for a photo.",
)
state = "IDLE"
state_enter_ts = now
self.cooldown_until = now + self.session_cooldown_sec
self._clear_confirmation_flags()
self._set_interaction_active(False, voice=voice, reason="confirm timeout")
self._cancel_capture_pipeline(reason="confirm timeout")
self._unlock_target()
self._reset_current_person()
continue
elif state == "FRAMING":
if self._consume_no_photo_flag():
await self._say_prompt(voice, "session_cancelled", "Okay. Session cancelled.")
state = "IDLE"
state_enter_ts = now
self.cooldown_until = now + self.session_cooldown_sec
self._clear_confirmation_flags()
self._set_interaction_active(False, voice=voice, reason="cancelled during framing")
self._cancel_capture_pipeline(reason="cancelled during framing")
self._unlock_target()
self._reset_current_person()
continue
good, reasons, framing_metrics = self._evaluate_framing_quality(snap)
framing_remaining = max(0.0, framing_deadline - now)
self._write_runtime_state(
"FRAMING",
snap,
voice=voice,
extras={
"framing_ok": bool(good),
"framing_reasons": reasons,
"framing_timeout_remaining": framing_remaining,
"framing_good_streak": framing_good_streak,
"framing_metrics": framing_metrics,
"retake_prompt_enabled": bool(self.retake_prompt_enabled),
"retake_limit": int(self.retake_limit),
"retake_count": int(retake_count),
**self._current_person_extras(),
},
)
if good:
framing_good_streak += 1
else:
framing_good_streak = 0
if (now - framing_last_feedback_ts) >= self.framing_feedback_interval_sec:
framing_last_feedback_ts = now
await self._say(voice, self._framing_guidance_text(reasons))
if framing_good_streak >= self.framing_good_frames_required:
capture_timing = self._resolve_capture_timing(
use_replay=bool(self.autonomous_capture_replay_enabled)
)
capture_start_ts = time.time()
self._start_capture_pipeline(replay, timing_info=capture_timing)
await self._say_capture_prompt(
voice,
"countdown_intro",
"Look at the camera, stay ready, hold your pose with me, keep still, keep your smile soft, and in a moment I will count down for the photo.",
)
state = "COUNTDOWN"
state_enter_ts = now
countdown_deadline = capture_start_ts + float(
capture_timing.get("capture_offset_sec")
or max(0.0, min(config.PHOTO_DELAY_SEC, config.PHOTO_TOTAL_SEC))
)
countdown_announced = set()
countdown_lost_since = 0.0
continue
if now >= framing_deadline:
sanad_logger.print_and_log("⌛ Framing timeout.", "warning")
await self._say_prompt(
voice,
"framing_timeout",
"I still need a better frame. Please step in front of me and say yes photo when ready.",
)
state = "WAIT_CONFIRM"
state_enter_ts = now
confirm_deadline = now + self.confirm_timeout_sec
confirm_ignore_until = now + self.confirm_guard_sec
confirm_last_prompt_ts = now
leave_since = 0.0
continue
elif state == "COUNTDOWN":
countdown_remaining = max(0.0, countdown_deadline - now)
self._write_runtime_state(
"COUNTDOWN",
snap,
voice=voice,
extras={
"countdown_remaining": countdown_remaining,
"framing_metrics": framing_metrics,
"retake_prompt_enabled": bool(self.retake_prompt_enabled),
"retake_limit": int(self.retake_limit),
"retake_count": int(retake_count),
**self._current_person_extras(),
},
)
if self._consume_no_photo_flag():
await self._say_prompt(voice, "countdown_cancelled", "Countdown cancelled.")
state = "IDLE"
state_enter_ts = now
self.cooldown_until = now + self.session_cooldown_sec
self._clear_confirmation_flags()
self._set_interaction_active(False, voice=voice, reason="cancelled during countdown")
self._cancel_capture_pipeline(reason="cancelled during countdown")
self._unlock_target()
self._reset_current_person()
continue
if not subject_visible:
if countdown_lost_since <= 0.0:
countdown_lost_since = now
elif (now - countdown_lost_since) >= self.countdown_lose_subject_sec:
await self._say_prompt(
voice,
"lost_from_frame",
"I lost you from frame. Let us try again.",
)
state = "FRAMING"
state_enter_ts = now
framing_deadline = now + self.framing_timeout_sec
framing_last_feedback_ts = 0.0
framing_good_streak = 0
continue
else:
countdown_lost_since = 0.0
sec_left = int(round(countdown_remaining))
if sec_left in (3, 2, 1) and sec_left not in countdown_announced:
countdown_announced.add(sec_left)
await self._say_capture_prompt(voice, f"count_{sec_left}", f"{sec_left}...")
elif sec_left <= 0 and 0 not in countdown_announced:
countdown_announced.add(0)
await self._say_capture_prompt(voice, "smile", "Smile.")
with self._capture_lock:
capture_done = bool(self._capture_done)
capture_result = self._capture_result
if capture_done:
if isinstance(capture_result, str) and capture_result.startswith("[ERR]"):
sanad_logger.print_and_log(f"Capture failed: {capture_result}", "error")
state = "IDLE"
state_enter_ts = now
self.cooldown_until = now + self.session_cooldown_sec
self._set_interaction_active(False, voice=voice, reason="capture failed")
self._cancel_capture_pipeline(reason="capture failed")
self._unlock_target()
self._reset_current_person()
else:
try:
person_id = str((self.current_person or {}).get("person_id") or "").strip()
if person_id:
people_registry.attach_captured_photo(person_id, str(capture_result))
except Exception as e:
record_error("autonomous_manager", "attach_captured_photo", e)
retake_recommended, retake_reason, retake_score, framing_metrics = self._retake_assessment(
snap,
previous_metrics=framing_metrics,
)
if (
bool(self.retake_prompt_enabled)
and bool(retake_recommended)
and int(retake_count) < int(self.retake_limit)
):
state = "RETAKE_CONFIRM"
state_enter_ts = now
retake_deadline = now + self.retake_confirm_timeout_sec
reason_txt = retake_reason or "the framing is not optimal"
await self._say_prompt(
voice,
"retake_recommended",
f"Photo captured. I recommend a retake because {reason_txt}. "
"Say yes photo to retake, or no photo to keep this one.",
)
else:
state = "COMPLETE"
state_enter_ts = now
continue
if now >= (countdown_deadline + self.capture_finalize_grace_sec):
record_error(
"autonomous_manager",
"countdown_capture_timeout",
context={"session_id": self.session_id, "grace_sec": self.capture_finalize_grace_sec},
)
sanad_logger.print_and_log("Capture timeout after countdown. Resetting session.", "error")
state = "IDLE"
state_enter_ts = now
self.cooldown_until = now + self.session_cooldown_sec
self._set_interaction_active(False, voice=voice, reason="capture timeout")
self._cancel_capture_pipeline(reason="capture timeout")
self._unlock_target()
self._reset_current_person()
continue
elif state == "RETAKE_CONFIRM":
rem = max(0.0, retake_deadline - now)
self._write_runtime_state(
"RETAKE_CONFIRM",
snap,
voice=voice,
extras={
"retake_prompt_enabled": bool(self.retake_prompt_enabled),
"retake_recommended": bool(retake_recommended),
"retake_reason": str(retake_reason or ""),
"retake_score": float(retake_score),
"retake_count": int(retake_count),
"retake_limit": int(self.retake_limit),
"retake_timeout_remaining": rem,
"framing_metrics": framing_metrics,
**self._current_person_extras(),
},
)
if self._consume_no_photo_flag():
state = "COMPLETE"
state_enter_ts = now
continue
if self._consume_request_photo_flag():
if int(retake_count) < int(self.retake_limit):
retake_count += 1
await self._say_prompt(
voice,
"retake_yes",
"Great, let us retake. Hold your pose.",
)
state = "FRAMING"
state_enter_ts = now
framing_deadline = now + self.framing_timeout_sec
framing_last_feedback_ts = 0.0
framing_good_streak = 0
else:
await self._say_prompt(
voice,
"retake_limit",
"Retake limit reached. Keeping the current photo.",
)
state = "COMPLETE"
state_enter_ts = now
continue
if now >= retake_deadline:
state = "COMPLETE"
state_enter_ts = now
continue
elif state == "COMPLETE":
self._write_runtime_state(
"COMPLETE",
snap,
voice=voice,
extras={
"capture_result": self._capture_result,
"retake_prompt_enabled": bool(self.retake_prompt_enabled),
"retake_recommended": bool(retake_recommended),
"retake_reason": str(retake_reason or ""),
"retake_score": float(retake_score),
"retake_count": int(retake_count),
"retake_limit": int(self.retake_limit),
"framing_metrics": framing_metrics,
**self._current_person_extras(),
},
)
await self._say_capture_prompt(
voice,
"photo_saved_thanks",
"Thank you. Photo saved. Don't forget to check your photos.",
)
sanad_logger.print_and_log(f"✅ Session {self.session_id} complete.", "info")
await asyncio.sleep(1.0)
state = "IDLE"
state_enter_ts = now
self.cooldown_until = time.time() + self.session_cooldown_sec
self._clear_confirmation_flags()
self._set_interaction_active(False, voice=voice, reason="session complete")
self._cancel_capture_pipeline(reason="session complete")
self._unlock_target()
self._reset_current_person()
continue
if (time.time() - state_enter_ts) > 120.0:
sanad_logger.print_and_log("Autonomous state timed out, forcing IDLE reset.", "warning")
state = "IDLE"
state_enter_ts = time.time()
self.cooldown_until = time.time() + self.session_cooldown_sec
self._clear_confirmation_flags()
self._set_interaction_active(False, voice=voice, reason="state watchdog reset")
self._cancel_capture_pipeline(reason="state watchdog reset")
self._unlock_target()
self._reset_current_person()
finally:
self._set_interaction_active(False, voice=voice, reason="autonomous stop")
self._cancel_capture_pipeline(reason="autonomous stop")
self._unlock_target()
self._reset_current_person()
self.detector.stop()
def stop(self):
self._running = False
self.detector.stop()
if __name__ == "__main__":
async def main():
am = AutonomousManager()
class Stub:
pass
await am.run(Stub(), Stub(), Stub(), None)
try:
asyncio.run(main())
except KeyboardInterrupt:
pass