Marcus/Vision/marcus_yolo.py

"""
marcus_yolo.py — Marcus Vision Module
=======================================
Project  : Marcus | YS Lootah Technology
Purpose  : YOLO-based person + object detection
           Import this module in marcus_brain.py — runs as background thread

Usage (imported):
    from marcus_yolo import start_yolo, yolo_sees, yolo_count, yolo_closest, yolo_summary

Usage (standalone):
    conda run -n marcus python3 Vision/marcus_yolo.py
"""

import os
import sys
import time
import threading
import json
import numpy as np

from collections import defaultdict

# ── Configuration ─────────────────────────────────────────────────────────────

_PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
if _PROJECT_ROOT not in sys.path:
    sys.path.insert(0, _PROJECT_ROOT)

try:
    from Core.config_loader import load_config
    _cfg = load_config("Vision")
except Exception as _e:
    print(f"  [YOLO] config_Vision.json not loaded ({_e}) — using defaults")
    _cfg = {}

YOLO_MODEL_PATH = os.path.join(_PROJECT_ROOT, _cfg.get("yolo_model_path", "Models/yolov8m.pt"))
YOLO_CONFIDENCE = float(_cfg.get("yolo_confidence", 0.45))
YOLO_IOU        = float(_cfg.get("yolo_iou", 0.45))
YOLO_DEVICE     = _cfg.get("yolo_device", "cpu")     # "cpu" | "cuda" | "0" | "cuda:N"
YOLO_IMG_SIZE   = int(_cfg.get("yolo_img_size", 320))
YOLO_HALF       = bool(_cfg.get("yolo_half", True))  # FP16 on GPU (ignored on CPU)
# FPS cap. On CPU, Orin NX manages ~2-3 FPS of YOLOv8m @ 320px. We throttle
# lower so CPU inference doesn't compete with Holosoma for cycles. On CUDA,
# value is irrelevant (GPU is fast enough that the existing 0.02 s sleep
# already caps at ~21 FPS).
YOLO_FPS_CAP    = float(_cfg.get("yolo_fps_cap", 2.0))


def _resolve_device(requested: str) -> tuple:
    """
    Resolve the YOLO inference device. Both GPU and CPU are supported.

    On Jetson Orin NX with Qwen2.5-VL loaded, YOLO on "cuda" takes ~2 GiB of
    iGPU memory and forces Ollama into a 30/70 CPU/GPU split that crawls
    vision queries. Matching Marcus_v1's working architecture, the default
    is now "cpu" — YOLO gets ~1-3 FPS on Orin CPU which is plenty for
    "is there a person" queries, and Qwen keeps the whole iGPU. Set
    yolo_device="cuda" only if VLM is disabled (subsystems.vlm=false).

    Returns (device_str, use_half). Never raises for CPU; raises for a
    CUDA request only when CUDA is genuinely unavailable.
    """
    req = (requested or "cpu").lower()

    if req == "cpu":
        # half-precision only makes sense on GPU; force fp32 on CPU
        return "cpu", False

    try:
        import torch
    except ImportError as e:
        raise RuntimeError(
            "[YOLO] PyTorch not installed — cannot run on CUDA. "
            "Either install CUDA-enabled torch, or set "
            "yolo_device='cpu' in Config/config_Vision.json."
        ) from e

    if not torch.cuda.is_available():
        raise RuntimeError(
            "[YOLO] yolo_device='cuda' but torch.cuda.is_available()==False. "
            "Either fix CUDA (tegrastats, nvcc --version) or set "
            "yolo_device='cpu' in Config/config_Vision.json."
        )

    dev = req if (req.startswith("cuda") or req == "0") else "cuda"
    return dev, YOLO_HALF

# COCO classes to track (ignore everything else)
TRACKED_CLASSES = {
    "person", "chair", "couch", "bed", "dining table",
    "bottle", "cup", "laptop", "keyboard", "mouse",
    "backpack", "handbag", "suitcase",
    "car", "truck", "motorcycle", "bicycle",
    "fire hydrant", "stop sign",
}

# PPE classes — active when custom model loaded
PPE_VIOLATION_CLASSES = {"no-helmet", "no_helmet", "no-vest", "no_vest"}

# ── Shared state ──────────────────────────────────────────────────────────────

_detections_lock   = threading.Lock()
_latest_detections = []     # list of dicts
_yolo_running      = [False]
# When True, the inference loop skips model forward passes. Used by the VLM
# path: Qwen2.5-VL's vision encode needs ~1.5 GiB of iGPU activations and the
# Jetson's 15 GiB is shared with YOLO + Holosoma, so concurrent inference
# spikes the runner into OOM. Pausing YOLO for the ~1 s the VLM spends on an
# image prevents that peak. Model weights stay resident (fast resume).
_yolo_paused       = [False]
_yolo_fps          = [0.0]


# ── Detection class ───────────────────────────────────────────────────────────

class Detection:
    """Single YOLO detection result."""

    def __init__(self, class_name, confidence, x1, y1, x2, y2, frame_w, frame_h):
        self.class_name  = class_name
        self.confidence  = confidence
        self.x1, self.y1 = x1, y1
        self.x2, self.y2 = x2, y2
        self.cx = (x1 + x2) // 2
        self.cy = (y1 + y2) // 2
        self.width       = x2 - x1
        self.height      = y2 - y1
        self.area        = self.width * self.height
        self.frame_w     = frame_w
        self.frame_h     = frame_h

    @property
    def size_ratio(self) -> float:
        """Fraction of frame covered — larger = closer."""
        return self.area / max(self.frame_w * self.frame_h, 1)

    @property
    def position(self) -> str:
        """left / center / right based on bbox center."""
        third = self.frame_w // 3
        if self.cx < third:
            return "left"
        elif self.cx > third * 2:
            return "right"
        return "center"

    @property
    def distance_estimate(self) -> str:
        """Rough distance from size ratio."""
        r = self.size_ratio
        if r > 0.30: return "very close"
        if r > 0.10: return "close"
        if r > 0.03: return "medium"
        return "far"

    def to_dict(self) -> dict:
        return {
            "class":      self.class_name,
            "confidence": round(self.confidence, 2),
            "position":   self.position,
            "distance":   self.distance_estimate,
            "size_ratio": round(self.size_ratio, 4),
            "bbox":       [self.x1, self.y1, self.x2, self.y2],
            "center":     [self.cx, self.cy],
        }

    def __repr__(self):
        return (f"Detection({self.class_name} {self.confidence:.0%} "
                f"@ {self.position} {self.distance_estimate})")


# ── Public query API ──────────────────────────────────────────────────────────

def yolo_sees(class_name: str, min_confidence: float = 0.45) -> bool:
    """
    Check if YOLO currently detects a specific class.

    Args:
        class_name     : COCO class e.g. "person", "chair", "bottle"
        min_confidence : minimum confidence threshold (default 0.45)

    Returns:
        True if detected, False otherwise.

    Example:
        if yolo_sees("person"):
            gradual_stop()
    """
    with _detections_lock:
        return any(
            d.class_name.lower() == class_name.lower()
            and d.confidence >= min_confidence
            for d in _latest_detections
        )


def yolo_count(class_name: str) -> int:
    """
    Return number of detected instances of class_name.

    Example:
        n = yolo_count("person")
        print(f"Detected {n} people")
    """
    with _detections_lock:
        return sum(
            1 for d in _latest_detections
            if d.class_name.lower() == class_name.lower()
        )


def yolo_closest(class_name: str = "person"):
    """
    Return the closest Detection instance of class_name (largest bbox).

    Returns:
        Detection object or None.

    Example:
        p = yolo_closest("person")
        if p:
            print(p.position, p.distance_estimate)
    """
    with _detections_lock:
        matches = [d for d in _latest_detections
                   if d.class_name.lower() == class_name.lower()]
    if not matches:
        return None
    return max(matches, key=lambda d: d.size_ratio)


def yolo_all_classes() -> set:
    """Return set of all currently detected class names."""
    with _detections_lock:
        return {d.class_name for d in _latest_detections}


def yolo_summary() -> str:
    """
    Return human-readable summary of current detections.

    Returns:
        e.g. "2 persons (left, close) | 1 chair (center, medium)"
    """
    with _detections_lock:
        dets = list(_latest_detections)
    if not dets:
        return "nothing detected"
    counts = defaultdict(list)
    for d in dets:
        counts[d.class_name].append(d)
    parts = []
    for cls, items in counts.items():
        n    = len(items)
        name = f"{n} {cls}{'s' if n > 1 else ''}"
        locs = list(set(d.position for d in items))
        dist = items[0].distance_estimate
        parts.append(f"{name} ({', '.join(locs)}, {dist})")
    return " | ".join(parts)


def yolo_ppe_violations() -> list:
    """
    Return list of detected PPE violations.
    Requires custom PPE model loaded (not default yolov8m).

    Returns:
        List of violation strings e.g. ["no helmet (left)", "no vest (center)"]
    """
    violations = []
    with _detections_lock:
        for d in _latest_detections:
            cls = d.class_name.lower()
            if cls in ("no-helmet", "no_helmet"):
                violations.append(f"no helmet ({d.position})")
            elif cls in ("no-vest", "no_vest"):
                violations.append(f"no vest ({d.position})")
    return violations


def yolo_person_too_close(threshold: float = 0.25) -> bool:
    """
    Return True if a person occupies more than threshold of the frame.
    Use for safety stop — person is dangerously close.

    Args:
        threshold : size_ratio above which = too close (default 0.25)
    """
    p = yolo_closest("person")
    return p is not None and p.size_ratio > threshold


def yolo_is_running() -> bool:
    """Return True if YOLO inference loop is active."""
    return _yolo_running[0]


def yolo_pause() -> None:
    """
    Stop YOLO forward passes and release PyTorch's CUDA cache back to the
    driver so Ollama's vision encoder has contiguous iGPU memory to allocate
    into. Weights stay resident, so resume is instant.
    """
    _yolo_paused[0] = True
    try:
        import torch
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
    except Exception:
        pass


def yolo_resume() -> None:
    """Resume YOLO inference after a pause()."""
    _yolo_paused[0] = False


def yolo_is_paused() -> bool:
    return _yolo_paused[0]


def yolo_fps() -> float:
    """Return current YOLO inference FPS."""
    return _yolo_fps[0]


# ── YOLO inference loop ───────────────────────────────────────────────────────

def _inference_loop(model, is_ppe: bool, raw_frame_ref, frame_lock,
                    device: str, use_half: bool):
    """Background inference loop. Reads frames, updates _latest_detections."""
    frame_count = 0
    t_fps       = time.time()
    # Minimum wall-clock interval between inferences, in seconds. On CPU this
    # is the main throttle; on CUDA the model itself limits throughput.
    min_period  = 1.0 / max(YOLO_FPS_CAP, 0.1)
    last_infer  = 0.0

    while _yolo_running[0]:
        if _yolo_paused[0]:
            time.sleep(0.03)
            continue
        # FPS cap — on CPU especially, we don't want YOLO to hammer the cores
        # that Holosoma's 50 Hz RL policy also needs.
        dt_since = time.time() - last_infer
        if dt_since < min_period:
            time.sleep(min(0.05, min_period - dt_since))
            continue
        with frame_lock:
            frame = raw_frame_ref[0]
        if frame is None:
            time.sleep(0.05)
            continue
        last_infer = time.time()

        try:
            results = model(
                frame,
                imgsz=YOLO_IMG_SIZE,
                conf=YOLO_CONFIDENCE,
                iou=YOLO_IOU,
                device=device,
                half=use_half,
                verbose=False
            )[0]
        except Exception as e:
            print(f"  [YOLO] Inference error: {e}")
            time.sleep(0.2)
            continue

        h, w = frame.shape[:2]
        dets  = []
        for box in results.boxes:
            cls_id     = int(box.cls[0])
            class_name = model.names[cls_id]
            confidence = float(box.conf[0])
            if not is_ppe and class_name not in TRACKED_CLASSES:
                continue
            x1, y1, x2, y2 = map(int, box.xyxy[0])
            dets.append(Detection(class_name, confidence, x1, y1, x2, y2, w, h))

        with _detections_lock:
            _latest_detections.clear()
            _latest_detections.extend(dets)

        frame_count += 1
        elapsed = time.time() - t_fps
        if elapsed >= 1.0:
            _yolo_fps[0] = round(frame_count / elapsed, 1)
            frame_count  = 0
            t_fps        = time.time()

        time.sleep(0.02)


# ── Camera loop for standalone mode ──────────────────────────────────────────

def _camera_loop(raw_frame_ref, frame_lock, cam_alive):
    """Capture RealSense frames when running standalone."""
    import pyrealsense2 as rs

    while cam_alive[0]:
        pipeline = None
        try:
            pipeline = rs.pipeline()
            cfg      = rs.config()
            cfg.enable_stream(rs.stream.color, 424, 240, rs.format.bgr8, 15)
            pipeline.start(cfg)
            print("Camera connected ✅")
            while cam_alive[0]:
                frames = pipeline.wait_for_frames(timeout_ms=3000)
                frame  = np.asanyarray(frames.get_color_frame().get_data())
                with frame_lock:
                    raw_frame_ref[0] = frame.copy()
        except Exception as e:
            print(f"Camera: {e} — reconnecting...")
            # pipeline may already be stopped or never started; swallow only
            # the expected RealSense "pipeline not started" error, not every
            # possible failure mode.
            try:
                pipeline.stop()
            except RuntimeError:
                pass
            time.sleep(2.0)


# ── Start function — called by marcus_brain.py ────────────────────────────────

def start_yolo(raw_frame_ref=None, frame_lock=None):
    """
    Start YOLO inference in background thread.

    Called automatically by marcus_brain.py during startup.
    Shares the camera frame reference from marcus_brain's camera thread.

    Args:
        raw_frame_ref : list[np.ndarray|None] — shared raw BGR frame
        frame_lock    : threading.Lock protecting raw_frame_ref

    Example (in marcus_brain.py):
        from marcus_yolo import start_yolo, yolo_sees, yolo_summary
        start_yolo(raw_frame_ref=_raw_frame, frame_lock=_raw_lock)
    """
    try:
        from ultralytics import YOLO
    except ImportError:
        print("  [YOLO] ultralytics not installed — pip install ultralytics")
        return False

    print(f"  [YOLO] Loading model: {YOLO_MODEL_PATH}")
    try:
        model = YOLO(YOLO_MODEL_PATH)
    except Exception as e:
        print(f"  [YOLO] Failed to load model: {e}")
        return False

    names   = set(model.names.values())
    is_ppe  = bool(names & PPE_VIOLATION_CLASSES)

    device, use_half = _resolve_device(YOLO_DEVICE)

    # Move weights onto the target device once so inferences don't pay a
    # CPU→GPU copy every call. Ultralytics handles FP16 casting via the
    # `half=True` predict kwarg — don't call `.half()` on the inner module,
    # it conflicts with ultralytics' own input dtype preprocess.
    try:
        model.to(device)
    except Exception as e:
        print(f"  [YOLO] Could not move model to {device} ({e}) — continuing")

    gpu_info = ""
    if device != "cpu":
        try:
            import torch
            gpu_info = f" ({torch.cuda.get_device_name(0)})"
        except Exception:
            pass

    print(f"  [YOLO] Model loaded ✅ | device: {device}{gpu_info}"
          f"{' | FP16' if use_half else ''} | "
          f"{'PPE model' if is_ppe else f'{len(TRACKED_CLASSES & names)} tracked classes'}")

    _yolo_running[0] = True

    threading.Thread(
        target=_inference_loop,
        args=(model, is_ppe, raw_frame_ref, frame_lock, device, use_half),
        daemon=True
    ).start()

    return True


# ══════════════════════════════════════════════════════════════════════════════
# STANDALONE MODE — run directly for testing
# ══════════════════════════════════════════════════════════════════════════════

if __name__ == "__main__":
    import pyrealsense2 as rs

    raw_frame_ref = [None]
    frame_lock    = threading.Lock()
    cam_alive     = [True]

    # Start camera
    threading.Thread(
        target=_camera_loop,
        args=(raw_frame_ref, frame_lock, cam_alive),
        daemon=True
    ).start()
    time.sleep(3.0)

    # Start YOLO
    ok = start_yolo(raw_frame_ref=raw_frame_ref, frame_lock=frame_lock)
    if not ok:
        print("YOLO failed to start. Exiting.")
        exit(1)

    time.sleep(2.0)

    print()
    print("╔══════════════════════════════════════════════╗")
    print("║       MARCUS VISION — YOLO ACTIVE            ║")
    print("╠══════════════════════════════════════════════╣")
    print(f"║  Model : {YOLO_MODEL_PATH[-36:]:<36}║")
    print(f"║  Conf  : {YOLO_CONFIDENCE:<36}║")
    _dev, _half = _resolve_device(YOLO_DEVICE)
    _dev_label = f"{_dev}{' FP16' if _half else ''}"
    print(f"║  Device: {_dev_label:<36}║")
    print("╠══════════════════════════════════════════════╣")
    print("║  what         — describe scene               ║")
    print("║  person       — detect people                ║")
    print("║  ppe          — check PPE violations         ║")
    print("║  count <cls>  — count instances              ║")
    print("║  closest <cls>— closest instance info        ║")
    print("║  all          — all detections               ║")
    print("║  fps          — inference speed              ║")
    print("║  q            — quit                         ║")
    print("╚══════════════════════════════════════════════╝")
    print()

    while True:
        try:
            cmd = input("Vision: ").strip().lower()
        except (EOFError, KeyboardInterrupt):
            break

        if not cmd:
            continue

        if cmd == "q":
            break

        elif cmd == "what":
            print(f"  {yolo_summary()}")

        elif cmd == "person":
            n = yolo_count("person")
            if n == 0:
                print("  No person detected")
            else:
                for i, p in enumerate(
                    [d for d in _latest_detections if d.class_name == "person"], 1
                ):
                    print(f"  Person {i}: {p.position}, {p.distance_estimate} "
                          f"({p.confidence:.0%})")

        elif cmd == "ppe":
            v = yolo_ppe_violations()
            print(f"  {'No violations' if not v else chr(10).join(v)}")

        elif cmd.startswith("count "):
            cls = cmd[6:].strip()
            print(f"  {yolo_count(cls)} {cls}(s) detected")

        elif cmd.startswith("closest "):
            cls = cmd[8:].strip()
            d   = yolo_closest(cls)
            if d:
                print(f"  Closest {cls}: {d.position}, {d.distance_estimate} "
                      f"({d.confidence:.0%})")
            else:
                print(f"  No {cls} detected")

        elif cmd == "all":
            with _detections_lock:
                dets = list(_latest_detections)
            if not dets:
                print("  Nothing detected")
            else:
                for d in dets:
                    print(f"  {d}")

        elif cmd == "fps":
            print(f"  {yolo_fps():.1f} fps")

        else:
            print(f"  Unknown: {cmd}")

    cam_alive[0]     = False
    _yolo_running[0] = False
    print("Marcus Vision stopped.")