Marcus/Vision/marcus_yolo.py

"""
marcus_yolo.py — Marcus Vision Module
=======================================
Project  : Marcus | YS Lootah Technology
Purpose  : YOLO-based person + object detection
           Import this module in marcus_llava.py — runs as background thread

Usage (imported):
    from marcus_yolo import start_yolo, yolo_sees, yolo_count, yolo_closest, yolo_summary

Usage (standalone):
    /home/unitree/miniconda3/envs/marcus/bin/python3 ~/Models_marcus/marcus_yolo.py
"""

import os
import sys
import time
import threading
import json
import numpy as np

from collections import defaultdict

# ── Configuration ─────────────────────────────────────────────────────────────

_PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
if _PROJECT_ROOT not in sys.path:
    sys.path.insert(0, _PROJECT_ROOT)

try:
    from Core.config_loader import load_config
    _cfg = load_config("Vision")
except Exception as _e:
    print(f"  [YOLO] config_Vision.json not loaded ({_e}) — using defaults")
    _cfg = {}

YOLO_MODEL_PATH = os.path.join(_PROJECT_ROOT, _cfg.get("yolo_model_path", "Models/yolov8m.pt"))
YOLO_CONFIDENCE = float(_cfg.get("yolo_confidence", 0.45))
YOLO_IOU        = float(_cfg.get("yolo_iou", 0.45))
YOLO_DEVICE     = _cfg.get("yolo_device", "cuda")    # "cuda" | "0" | "cuda:N"
YOLO_IMG_SIZE   = int(_cfg.get("yolo_img_size", 320))
YOLO_HALF       = bool(_cfg.get("yolo_half", True))  # FP16 on GPU


def _resolve_device(requested: str) -> tuple:
    """
    Resolve the inference device. GPU is required — no CPU fallback.

    Returns (device_str, use_half). Raises RuntimeError if CUDA is unavailable
    or if the config requests CPU. Marcus must run on the Jetson Orin NX GPU.
    """
    req = (requested or "cuda").lower()

    if req == "cpu":
        raise RuntimeError(
            "[YOLO] yolo_device='cpu' in config — Marcus requires GPU. "
            "Set yolo_device to 'cuda' in Config/config_Vision.json."
        )

    try:
        import torch
    except ImportError as e:
        raise RuntimeError(
            "[YOLO] PyTorch not installed — cannot run on GPU. "
            "Install CUDA-enabled torch on the Jetson."
        ) from e

    if not torch.cuda.is_available():
        raise RuntimeError(
            "[YOLO] CUDA not available — torch.cuda.is_available() == False. "
            "Check nvidia driver / JetPack CUDA runtime on the Jetson "
            "(try `nvidia-smi` or `tegrastats`)."
        )

    dev = req if (req.startswith("cuda") or req == "0") else "cuda"
    return dev, YOLO_HALF

# COCO classes to track (ignore everything else)
TRACKED_CLASSES = {
    "person", "chair", "couch", "bed", "dining table",
    "bottle", "cup", "laptop", "keyboard", "mouse",
    "backpack", "handbag", "suitcase",
    "car", "truck", "motorcycle", "bicycle",
    "fire hydrant", "stop sign",
}

# PPE classes — active when custom model loaded
PPE_VIOLATION_CLASSES = {"no-helmet", "no_helmet", "no-vest", "no_vest"}

# ── Shared state ──────────────────────────────────────────────────────────────

_detections_lock   = threading.Lock()
_latest_detections = []     # list of dicts
_yolo_running      = [False]
_yolo_fps          = [0.0]


# ── Detection class ───────────────────────────────────────────────────────────

class Detection:
    """Single YOLO detection result."""

    def __init__(self, class_name, confidence, x1, y1, x2, y2, frame_w, frame_h):
        self.class_name  = class_name
        self.confidence  = confidence
        self.x1, self.y1 = x1, y1
        self.x2, self.y2 = x2, y2
        self.cx = (x1 + x2) // 2
        self.cy = (y1 + y2) // 2
        self.width       = x2 - x1
        self.height      = y2 - y1
        self.area        = self.width * self.height
        self.frame_w     = frame_w
        self.frame_h     = frame_h

    @property
    def size_ratio(self) -> float:
        """Fraction of frame covered — larger = closer."""
        return self.area / max(self.frame_w * self.frame_h, 1)

    @property
    def position(self) -> str:
        """left / center / right based on bbox center."""
        third = self.frame_w // 3
        if self.cx < third:
            return "left"
        elif self.cx > third * 2:
            return "right"
        return "center"

    @property
    def distance_estimate(self) -> str:
        """Rough distance from size ratio."""
        r = self.size_ratio
        if r > 0.30: return "very close"
        if r > 0.10: return "close"
        if r > 0.03: return "medium"
        return "far"

    def to_dict(self) -> dict:
        return {
            "class":      self.class_name,
            "confidence": round(self.confidence, 2),
            "position":   self.position,
            "distance":   self.distance_estimate,
            "size_ratio": round(self.size_ratio, 4),
            "bbox":       [self.x1, self.y1, self.x2, self.y2],
            "center":     [self.cx, self.cy],
        }

    def __repr__(self):
        return (f"Detection({self.class_name} {self.confidence:.0%} "
                f"@ {self.position} {self.distance_estimate})")


# ── Public query API ──────────────────────────────────────────────────────────

def yolo_sees(class_name: str, min_confidence: float = 0.45) -> bool:
    """
    Check if YOLO currently detects a specific class.

    Args:
        class_name     : COCO class e.g. "person", "chair", "bottle"
        min_confidence : minimum confidence threshold (default 0.45)

    Returns:
        True if detected, False otherwise.

    Example:
        if yolo_sees("person"):
            gradual_stop()
    """
    with _detections_lock:
        return any(
            d.class_name.lower() == class_name.lower()
            and d.confidence >= min_confidence
            for d in _latest_detections
        )


def yolo_count(class_name: str) -> int:
    """
    Return number of detected instances of class_name.

    Example:
        n = yolo_count("person")
        print(f"Detected {n} people")
    """
    with _detections_lock:
        return sum(
            1 for d in _latest_detections
            if d.class_name.lower() == class_name.lower()
        )


def yolo_closest(class_name: str = "person"):
    """
    Return the closest Detection instance of class_name (largest bbox).

    Returns:
        Detection object or None.

    Example:
        p = yolo_closest("person")
        if p:
            print(p.position, p.distance_estimate)
    """
    with _detections_lock:
        matches = [d for d in _latest_detections
                   if d.class_name.lower() == class_name.lower()]
    if not matches:
        return None
    return max(matches, key=lambda d: d.size_ratio)


def yolo_all_classes() -> set:
    """Return set of all currently detected class names."""
    with _detections_lock:
        return {d.class_name for d in _latest_detections}


def yolo_summary() -> str:
    """
    Return human-readable summary of current detections.

    Returns:
        e.g. "2 persons (left, close) | 1 chair (center, medium)"
    """
    with _detections_lock:
        dets = list(_latest_detections)
    if not dets:
        return "nothing detected"
    counts = defaultdict(list)
    for d in dets:
        counts[d.class_name].append(d)
    parts = []
    for cls, items in counts.items():
        n    = len(items)
        name = f"{n} {cls}{'s' if n > 1 else ''}"
        locs = list(set(d.position for d in items))
        dist = items[0].distance_estimate
        parts.append(f"{name} ({', '.join(locs)}, {dist})")
    return " | ".join(parts)


def yolo_ppe_violations() -> list:
    """
    Return list of detected PPE violations.
    Requires custom PPE model loaded (not default yolov8m).

    Returns:
        List of violation strings e.g. ["no helmet (left)", "no vest (center)"]
    """
    violations = []
    with _detections_lock:
        for d in _latest_detections:
            cls = d.class_name.lower()
            if cls in ("no-helmet", "no_helmet"):
                violations.append(f"no helmet ({d.position})")
            elif cls in ("no-vest", "no_vest"):
                violations.append(f"no vest ({d.position})")
    return violations


def yolo_person_too_close(threshold: float = 0.25) -> bool:
    """
    Return True if a person occupies more than threshold of the frame.
    Use for safety stop — person is dangerously close.

    Args:
        threshold : size_ratio above which = too close (default 0.25)
    """
    p = yolo_closest("person")
    return p is not None and p.size_ratio > threshold


def yolo_is_running() -> bool:
    """Return True if YOLO inference loop is active."""
    return _yolo_running[0]


def yolo_fps() -> float:
    """Return current YOLO inference FPS."""
    return _yolo_fps[0]


# ── YOLO inference loop ───────────────────────────────────────────────────────

def _inference_loop(model, is_ppe: bool, raw_frame_ref, frame_lock,
                    device: str, use_half: bool):
    """Background inference loop. Reads frames, updates _latest_detections."""
    frame_count = 0
    t_fps       = time.time()

    while _yolo_running[0]:
        with frame_lock:
            frame = raw_frame_ref[0]
        if frame is None:
            time.sleep(0.05)
            continue

        try:
            results = model(
                frame,
                imgsz=YOLO_IMG_SIZE,
                conf=YOLO_CONFIDENCE,
                iou=YOLO_IOU,
                device=device,
                half=use_half,
                verbose=False
            )[0]
        except Exception as e:
            print(f"  [YOLO] Inference error: {e}")
            time.sleep(0.2)
            continue

        h, w = frame.shape[:2]
        dets  = []
        for box in results.boxes:
            cls_id     = int(box.cls[0])
            class_name = model.names[cls_id]
            confidence = float(box.conf[0])
            if not is_ppe and class_name not in TRACKED_CLASSES:
                continue
            x1, y1, x2, y2 = map(int, box.xyxy[0])
            dets.append(Detection(class_name, confidence, x1, y1, x2, y2, w, h))

        with _detections_lock:
            _latest_detections.clear()
            _latest_detections.extend(dets)

        frame_count += 1
        elapsed = time.time() - t_fps
        if elapsed >= 1.0:
            _yolo_fps[0] = round(frame_count / elapsed, 1)
            frame_count  = 0
            t_fps        = time.time()

        time.sleep(0.02)


# ── Camera loop for standalone mode ──────────────────────────────────────────

def _camera_loop(raw_frame_ref, frame_lock, cam_alive):
    """Capture RealSense frames when running standalone."""
    import pyrealsense2 as rs

    while cam_alive[0]:
        pipeline = None
        try:
            pipeline = rs.pipeline()
            cfg      = rs.config()
            cfg.enable_stream(rs.stream.color, 424, 240, rs.format.bgr8, 15)
            pipeline.start(cfg)
            print("Camera connected ✅")
            while cam_alive[0]:
                frames = pipeline.wait_for_frames(timeout_ms=3000)
                frame  = np.asanyarray(frames.get_color_frame().get_data())
                with frame_lock:
                    raw_frame_ref[0] = frame.copy()
        except Exception as e:
            print(f"Camera: {e} — reconnecting...")
            try: pipeline.stop()
            except: pass
            time.sleep(2.0)


# ── Start function — called by marcus_llava.py ────────────────────────────────

def start_yolo(raw_frame_ref=None, frame_lock=None):
    """
    Start YOLO inference in background thread.

    Called automatically by marcus_llava.py during startup.
    Shares the camera frame reference from marcus_llava's camera thread.

    Args:
        raw_frame_ref : list[np.ndarray|None] — shared raw BGR frame
        frame_lock    : threading.Lock protecting raw_frame_ref

    Example (in marcus_llava.py):
        from marcus_yolo import start_yolo, yolo_sees, yolo_summary
        start_yolo(raw_frame_ref=_raw_frame, frame_lock=_raw_lock)
    """
    try:
        from ultralytics import YOLO
    except ImportError:
        print("  [YOLO] ultralytics not installed — pip install ultralytics")
        return False

    print(f"  [YOLO] Loading model: {YOLO_MODEL_PATH}")
    try:
        model = YOLO(YOLO_MODEL_PATH)
    except Exception as e:
        print(f"  [YOLO] Failed to load model: {e}")
        return False

    names   = set(model.names.values())
    is_ppe  = bool(names & PPE_VIOLATION_CLASSES)

    device, use_half = _resolve_device(YOLO_DEVICE)

    # Move weights onto the target device once so inferences don't pay a
    # CPU→GPU copy every call. Ultralytics handles FP16 casting via the
    # `half=True` predict kwarg — don't call `.half()` on the inner module,
    # it conflicts with ultralytics' own input dtype preprocess.
    try:
        model.to(device)
    except Exception as e:
        print(f"  [YOLO] Could not move model to {device} ({e}) — continuing")

    gpu_info = ""
    if device != "cpu":
        try:
            import torch
            gpu_info = f" ({torch.cuda.get_device_name(0)})"
        except Exception:
            pass

    print(f"  [YOLO] Model loaded ✅ | device: {device}{gpu_info}"
          f"{' | FP16' if use_half else ''} | "
          f"{'PPE model' if is_ppe else f'{len(TRACKED_CLASSES & names)} tracked classes'}")

    _yolo_running[0] = True

    threading.Thread(
        target=_inference_loop,
        args=(model, is_ppe, raw_frame_ref, frame_lock, device, use_half),
        daemon=True
    ).start()

    return True


# ══════════════════════════════════════════════════════════════════════════════
# STANDALONE MODE — run directly for testing
# ══════════════════════════════════════════════════════════════════════════════

if __name__ == "__main__":
    import pyrealsense2 as rs

    raw_frame_ref = [None]
    frame_lock    = threading.Lock()
    cam_alive     = [True]

    # Start camera
    threading.Thread(
        target=_camera_loop,
        args=(raw_frame_ref, frame_lock, cam_alive),
        daemon=True
    ).start()
    time.sleep(3.0)

    # Start YOLO
    ok = start_yolo(raw_frame_ref=raw_frame_ref, frame_lock=frame_lock)
    if not ok:
        print("YOLO failed to start. Exiting.")
        exit(1)

    time.sleep(2.0)

    print()
    print("╔══════════════════════════════════════════════╗")
    print("║       MARCUS VISION — YOLO ACTIVE            ║")
    print("╠══════════════════════════════════════════════╣")
    print(f"║  Model : {YOLO_MODEL_PATH[-36:]:<36}║")
    print(f"║  Conf  : {YOLO_CONFIDENCE:<36}║")
    _dev, _half = _resolve_device(YOLO_DEVICE)
    _dev_label = f"{_dev}{' FP16' if _half else ''}"
    print(f"║  Device: {_dev_label:<36}║")
    print("╠══════════════════════════════════════════════╣")
    print("║  what         — describe scene               ║")
    print("║  person       — detect people                ║")
    print("║  ppe          — check PPE violations         ║")
    print("║  count <cls>  — count instances              ║")
    print("║  closest <cls>— closest instance info        ║")
    print("║  all          — all detections               ║")
    print("║  fps          — inference speed              ║")
    print("║  q            — quit                         ║")
    print("╚══════════════════════════════════════════════╝")
    print()

    while True:
        try:
            cmd = input("Vision: ").strip().lower()
        except (EOFError, KeyboardInterrupt):
            break

        if not cmd:
            continue

        if cmd == "q":
            break

        elif cmd == "what":
            print(f"  {yolo_summary()}")

        elif cmd == "person":
            n = yolo_count("person")
            if n == 0:
                print("  No person detected")
            else:
                for i, p in enumerate(
                    [d for d in _latest_detections if d.class_name == "person"], 1
                ):
                    print(f"  Person {i}: {p.position}, {p.distance_estimate} "
                          f"({p.confidence:.0%})")

        elif cmd == "ppe":
            v = yolo_ppe_violations()
            print(f"  {'No violations' if not v else chr(10).join(v)}")

        elif cmd.startswith("count "):
            cls = cmd[6:].strip()
            print(f"  {yolo_count(cls)} {cls}(s) detected")

        elif cmd.startswith("closest "):
            cls = cmd[8:].strip()
            d   = yolo_closest(cls)
            if d:
                print(f"  Closest {cls}: {d.position}, {d.distance_estimate} "
                      f"({d.confidence:.0%})")
            else:
                print(f"  No {cls} detected")

        elif cmd == "all":
            with _detections_lock:
                dets = list(_latest_detections)
            if not dets:
                print("  Nothing detected")
            else:
                for d in dets:
                    print(f"  {d}")

        elif cmd == "fps":
            print(f"  {yolo_fps():.1f} fps")

        else:
            print(f"  Unknown: {cmd}")

    cam_alive[0]     = False
    _yolo_running[0] = False
    print("Marcus Vision stopped.")