AI_Photographer/Core/audio_prompts.py

from __future__ import annotations

import json
import re
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List

from Core import settings as config

DEFAULT_PROMPT_TEXTS: Dict[str, str] = {
    "welcome_single": "Hello, welcome. We will take a photo together. Would you like a photo?",
    "welcome_group": "Hello everyone, welcome. We will take a photo together. Would your group like a photo?",
    "welcome_returning": "Welcome back. Would you like another photo?",
    "frame_single": "Great. Please stand with me in front of the camera, stay in the center, and look at the camera.",
    "frame_group": "Great. Please stand with me in front of the camera, stay together in the center, and look at the camera.",
    "confirm_reminder": "Please say yes photo to continue, or no photo to cancel.",
    "visitor_left": "No worries. I will wait here for the next visitor.",
    "declined": "No problem. We can do it anytime.",
    "confirm_timeout": "No problem. I will wait here. Come back anytime for a photo.",
    "session_cancelled": "Okay. Session cancelled.",
    "framing_timeout": "I still need a better frame. Please step in front of me and say yes photo when ready.",
    "countdown_intro": "Look at the camera, stay ready, hold your pose with me, keep still, keep your smile soft, and in a moment I will count down for the photo.",
    "count_3": "Three.",
    "count_2": "Two.",
    "count_1": "One.",
    "smile": "Smile.",
    "countdown_cancelled": "Countdown cancelled.",
    "lost_from_frame": "I lost you from the frame. Let us try again.",
    "retake_recommended": "Photo captured. I recommend a retake. Say yes photo to retake, or no photo to keep this one.",
    "retake_yes": "Great. Let us retake. Hold your pose.",
    "retake_limit": "Retake limit reached. Keeping the current photo.",
    "photo_saved_thanks": "Thank you. Photo saved. Do not forget to check your photos.",
}

PROMPT_KEYS = tuple(DEFAULT_PROMPT_TEXTS.keys())
RECORD_INDEX_PATH = config.AUDIO_PROMPT_RECORDS_FILE.resolve()
LEGACY_RECORD_INDEX_PATH = (config.AUDIO_PROMPTS_DIR / "records.json").resolve()


def _clean_key(key: str) -> str:
    clean = str(key or "").strip()
    if clean not in DEFAULT_PROMPT_TEXTS:
        raise KeyError(f"unknown audio prompt key: {clean}")
    return clean


def _safe_filename(name: str) -> str:
    cleaned = re.sub(r"[^A-Za-z0-9._-]+", "_", str(name or "").strip()).strip("._")
    if not cleaned:
        raise ValueError("invalid prompt filename")
    if not cleaned.lower().endswith(".wav"):
        raise ValueError("audio prompt files must be .wav")
    return cleaned


def _record_index_template() -> Dict[str, Any]:
    return {
        "created_by": "AI_Photographer.audio_prompts",
        "last_updated": "",
        "total_records": 0,
        "records": [],
    }


def _format_timestamp(dt: datetime) -> str:
    return dt.isoformat(timespec="seconds")


def _audio_duration_seconds(audio_bytes: bytes, sample_rate: int, channels: int, sample_width: int) -> float:
    frame_size = max(1, int(sample_width) * max(1, int(channels)))
    rate = max(1, int(sample_rate))
    return len(audio_bytes) / float(rate * frame_size)


def _build_file_info(path: Path, audio_bytes: bytes, sample_rate: int, channels: int, sample_width: int) -> Dict[str, Any]:
    size_bytes = int(path.stat().st_size) if path.exists() else int(len(audio_bytes))
    return {
        "path": _stored_project_path(path),
        "name": path.name,
        "size_bytes": size_bytes,
        "size_mb": round(size_bytes / (1024 * 1024), 3),
        "duration_seconds": round(_audio_duration_seconds(audio_bytes, sample_rate, channels, sample_width), 3),
        "sample_rate": int(sample_rate),
        "channels": int(channels),
        "sample_width_bytes": int(sample_width),
    }


def _stored_project_path(path: Path) -> str:
    resolved = Path(path).resolve()
    try:
        rel = resolved.relative_to(config.PROJECT_ROOT)
        return f"{config.PROJECT_ROOT.name}/{rel.as_posix()}"
    except Exception:
        return str(resolved)


def prompt_text(key: str) -> str:
    return DEFAULT_PROMPT_TEXTS[_clean_key(key)]


def prompt_filename(key: str) -> str:
    return config.read_audio_prompt_filename(_clean_key(key))


def prompt_path(key: str) -> Path:
    return (config.AUDIO_PROMPTS_DIR / prompt_filename(key)).resolve()


def raw_prompt_filename(key: str) -> str:
    clean_key = _clean_key(key)
    speaker = Path(prompt_filename(clean_key))
    return f"{speaker.stem}_raw.wav"


def raw_prompt_path(key: str) -> Path:
    return (config.AUDIO_PROMPTS_DIR / raw_prompt_filename(key)).resolve()


def prompt_exists(key: str) -> bool:
    return prompt_path(key).exists()


def load_record_index() -> Dict[str, Any]:
    for candidate in (RECORD_INDEX_PATH, LEGACY_RECORD_INDEX_PATH):
        if not candidate.exists():
            continue
        try:
            payload = json.loads(candidate.read_text(encoding="utf-8"))
            if not isinstance(payload, dict) or not isinstance(payload.get("records"), list):
                raise ValueError("invalid record index structure")
            payload.setdefault("created_by", "AI_Photographer.audio_prompts")
            payload.setdefault("last_updated", "")
            payload.setdefault("total_records", len(payload.get("records", [])))
            return reconcile_record_index(payload)
        except Exception:
            continue
    return _record_index_template()


def reconcile_record_index(payload: Dict[str, Any]) -> Dict[str, Any]:
    records = []
    for entry in payload.get("records", []):
        key = str(entry.get("record_name", "") or "").strip()
        if key not in DEFAULT_PROMPT_TEXTS:
            continue
        speaker_path = prompt_path(key)
        raw_path = raw_prompt_path(key)
        if not speaker_path.exists():
            continue

        files = entry.setdefault("files", {})
        speaker_info = files.get("speaker_recording")
        if isinstance(speaker_info, dict):
            speaker_info["path"] = _stored_project_path(speaker_path)
            speaker_info["name"] = speaker_path.name

        raw_info = files.get("gemini_raw_output")
        if isinstance(raw_info, dict):
            if raw_path.exists():
                raw_info["path"] = _stored_project_path(raw_path)
                raw_info["name"] = raw_path.name
            else:
                files.pop("gemini_raw_output", None)

        records.append(entry)

    payload["records"] = records
    payload["total_records"] = len(records)
    payload["last_updated"] = records[-1].get("timeline", {}).get("saved_at", "") if records else ""
    return payload


def save_record_index(payload: Dict[str, Any]) -> None:
    config.AUDIO_PROMPTS_DIR.mkdir(parents=True, exist_ok=True)
    normalized = reconcile_record_index(dict(payload))
    RECORD_INDEX_PATH.write_text(json.dumps(normalized, ensure_ascii=False, indent=2), encoding="utf-8")


def _record_entry_map() -> Dict[str, Dict[str, Any]]:
    payload = load_record_index()
    mapping: Dict[str, Dict[str, Any]] = {}
    for entry in payload.get("records", []):
        key = str(entry.get("record_name", "") or "").strip()
        if key:
            mapping[key] = entry
    return mapping


def upsert_record_entry(entry: Dict[str, Any]) -> None:
    key = _clean_key(str(entry.get("record_name", "") or ""))
    payload = load_record_index()
    records = [item for item in payload.get("records", []) if str(item.get("record_name", "") or "").strip() != key]
    records.append(entry)
    payload["records"] = records
    payload["total_records"] = len(records)
    payload["last_updated"] = entry.get("timeline", {}).get("saved_at", "")
    save_record_index(payload)


def delete_record_entry(key: str) -> None:
    clean_key = _clean_key(key)
    payload = load_record_index()
    payload["records"] = [item for item in payload.get("records", []) if str(item.get("record_name", "") or "").strip() != clean_key]
    payload["total_records"] = len(payload["records"])
    payload["last_updated"] = payload["records"][-1].get("timeline", {}).get("saved_at", "") if payload["records"] else ""
    save_record_index(payload)


def list_audio_prompts() -> List[dict]:
    fallback_to_gemini = bool(config.read_audio_prompts_fallback_to_gemini())
    record_map = _record_entry_map()
    items: List[dict] = []
    for key in PROMPT_KEYS:
        path = prompt_path(key)
        raw_path = raw_prompt_path(key)
        exists = path.exists()
        raw_exists = raw_path.exists()
        st = path.stat() if exists else None
        entry = record_map.get(key, {})
        speaker_info = entry.get("files", {}).get("speaker_recording", {}) if isinstance(entry, dict) else {}
        raw_info = entry.get("files", {}).get("gemini_raw_output", {}) if isinstance(entry, dict) else {}
        timeline = entry.get("timeline", {}) if isinstance(entry, dict) else {}
        items.append(
            {
                "key": key,
                "text": prompt_text(key),
                "filename": prompt_filename(key),
                "exists": bool(exists),
                "size": int(st.st_size) if st else 0,
                "mtime": float(st.st_mtime) if st else 0.0,
                "raw_filename": raw_path.name,
                "raw_exists": bool(raw_exists),
                "raw_size": int(raw_info.get("size_bytes", 0) or 0),
                "replay_count": int(entry.get("replay_count", 0) or 0),
                "saved_at": str(timeline.get("saved_at", "") or ""),
                "speaker_duration_seconds": float(speaker_info.get("duration_seconds", 0.0) or 0.0),
                "raw_duration_seconds": float(raw_info.get("duration_seconds", 0.0) or 0.0),
                "record": entry,
                "fallback_to_gemini": fallback_to_gemini,
            }
        )
    return items


def save_audio_prompt_bundle(
    key: str,
    speaker_data: bytes,
    filename: str = "",
    *,
    raw_data: bytes | None = None,
    text: str = "",
    model: str = "",
    voice_name: str = "",
    replay_count: int = 1,
    speaker_rate: int = 24000,
    speaker_channels: int = 1,
    raw_rate: int = 24000,
    raw_channels: int = 1,
    sample_width: int = 2,
    capture_device: str = "",
    sink: str = "",
    source: str = "",
    monitor_source: str = "",
) -> dict:
    clean_key = _clean_key(key)
    safe_filename = _safe_filename(filename or prompt_filename(clean_key))
    old_target = prompt_path(clean_key)
    old_raw = raw_prompt_path(clean_key)
    target = (config.AUDIO_PROMPTS_DIR / safe_filename).resolve()
    target.parent.mkdir(parents=True, exist_ok=True)

    if old_target.exists() and old_target != target:
        try:
            old_target.unlink()
        except Exception:
            pass

    target.write_bytes(speaker_data)
    config.write_audio_prompt_filename(clean_key, safe_filename)

    raw_target = raw_prompt_path(clean_key)
    if old_raw.exists() and old_raw != raw_target:
        try:
            old_raw.unlink()
        except Exception:
            pass
    if raw_data:
        raw_target.write_bytes(raw_data)
    elif raw_target.exists():
        try:
            raw_target.unlink()
        except Exception:
            pass

    now = datetime.now()
    entry = {
        "record_name": clean_key,
        "text": str(text or prompt_text(clean_key)).strip(),
        "model": str(model or config.GEMINI_MODEL),
        "voice_name": str(voice_name or config.VOICE_NAME),
        "replay_count": int(replay_count),
        "audio_capture": {
            "sink": str(sink or ""),
            "monitor_source": str(monitor_source or ""),
            "restored_microphone_source": str(source or ""),
            "capture_device": str(capture_device or ""),
        },
        "timeline": {
            "saved_at": _format_timestamp(now),
        },
        "files": {
            "speaker_recording": _build_file_info(
                target,
                speaker_data,
                speaker_rate,
                speaker_channels,
                sample_width,
            ),
        },
    }
    if raw_data:
        entry["files"]["gemini_raw_output"] = _build_file_info(
            raw_target,
            raw_data,
            raw_rate,
            raw_channels,
            sample_width,
        )

    upsert_record_entry(entry)
    return {
        "ok": True,
        "key": clean_key,
        "filename": target.name,
        "raw_filename": raw_target.name if raw_data else "",
        "path": str(target),
        "record": entry,
    }


def save_audio_prompt(key: str, data: bytes, filename: str) -> dict:
    return save_audio_prompt_bundle(key, data, filename=filename, text=prompt_text(key), replay_count=0)


def delete_audio_prompt(key: str) -> dict:
    clean_key = _clean_key(key)
    target = prompt_path(clean_key)
    raw_target = raw_prompt_path(clean_key)
    if target.exists():
        target.unlink()
    if raw_target.exists():
        raw_target.unlink()
    delete_record_entry(clean_key)
    return {
        "ok": True,
        "key": clean_key,
        "filename": prompt_filename(clean_key),
        "deleted": str(target),
        "deleted_raw": str(raw_target),
    }


def read_audio_prompt_bytes(key: str) -> tuple[Path, bytes]:
    clean_key = _clean_key(key)
    target = prompt_path(clean_key)
    if not target.exists():
        raise FileNotFoundError(f"audio prompt not found for key: {clean_key}")
    return target, target.read_bytes()