Marcus/Vision/marcus_yolo.py
2026-04-12 18:50:22 +04:00

544 lines
18 KiB
Python

"""
marcus_yolo.py — Marcus Vision Module
=======================================
Project : Marcus | YS Lootah Technology
Purpose : YOLO-based person + object detection
Import this module in marcus_llava.py — runs as background thread
Usage (imported):
from marcus_yolo import start_yolo, yolo_sees, yolo_count, yolo_closest, yolo_summary
Usage (standalone):
/home/unitree/miniconda3/envs/marcus/bin/python3 ~/Models_marcus/marcus_yolo.py
"""
import os
import sys
import time
import threading
import json
import numpy as np
from collections import defaultdict
# ── Configuration ─────────────────────────────────────────────────────────────
_PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
if _PROJECT_ROOT not in sys.path:
sys.path.insert(0, _PROJECT_ROOT)
try:
from Core.config_loader import load_config
_cfg = load_config("Vision")
except Exception as _e:
print(f" [YOLO] config_Vision.json not loaded ({_e}) — using defaults")
_cfg = {}
YOLO_MODEL_PATH = os.path.join(_PROJECT_ROOT, _cfg.get("yolo_model_path", "Models/yolov8m.pt"))
YOLO_CONFIDENCE = float(_cfg.get("yolo_confidence", 0.45))
YOLO_IOU = float(_cfg.get("yolo_iou", 0.45))
YOLO_DEVICE = _cfg.get("yolo_device", "cuda") # "cuda" | "0" | "cuda:N"
YOLO_IMG_SIZE = int(_cfg.get("yolo_img_size", 320))
YOLO_HALF = bool(_cfg.get("yolo_half", True)) # FP16 on GPU
def _resolve_device(requested: str) -> tuple:
"""
Resolve the inference device. GPU is required — no CPU fallback.
Returns (device_str, use_half). Raises RuntimeError if CUDA is unavailable
or if the config requests CPU. Marcus must run on the Jetson Orin NX GPU.
"""
req = (requested or "cuda").lower()
if req == "cpu":
raise RuntimeError(
"[YOLO] yolo_device='cpu' in config — Marcus requires GPU. "
"Set yolo_device to 'cuda' in Config/config_Vision.json."
)
try:
import torch
except ImportError as e:
raise RuntimeError(
"[YOLO] PyTorch not installed — cannot run on GPU. "
"Install CUDA-enabled torch on the Jetson."
) from e
if not torch.cuda.is_available():
raise RuntimeError(
"[YOLO] CUDA not available — torch.cuda.is_available() == False. "
"Check nvidia driver / JetPack CUDA runtime on the Jetson "
"(try `nvidia-smi` or `tegrastats`)."
)
dev = req if (req.startswith("cuda") or req == "0") else "cuda"
return dev, YOLO_HALF
# COCO classes to track (ignore everything else)
TRACKED_CLASSES = {
"person", "chair", "couch", "bed", "dining table",
"bottle", "cup", "laptop", "keyboard", "mouse",
"backpack", "handbag", "suitcase",
"car", "truck", "motorcycle", "bicycle",
"fire hydrant", "stop sign",
}
# PPE classes — active when custom model loaded
PPE_VIOLATION_CLASSES = {"no-helmet", "no_helmet", "no-vest", "no_vest"}
# ── Shared state ──────────────────────────────────────────────────────────────
_detections_lock = threading.Lock()
_latest_detections = [] # list of dicts
_yolo_running = [False]
_yolo_fps = [0.0]
# ── Detection class ───────────────────────────────────────────────────────────
class Detection:
"""Single YOLO detection result."""
def __init__(self, class_name, confidence, x1, y1, x2, y2, frame_w, frame_h):
self.class_name = class_name
self.confidence = confidence
self.x1, self.y1 = x1, y1
self.x2, self.y2 = x2, y2
self.cx = (x1 + x2) // 2
self.cy = (y1 + y2) // 2
self.width = x2 - x1
self.height = y2 - y1
self.area = self.width * self.height
self.frame_w = frame_w
self.frame_h = frame_h
@property
def size_ratio(self) -> float:
"""Fraction of frame covered — larger = closer."""
return self.area / max(self.frame_w * self.frame_h, 1)
@property
def position(self) -> str:
"""left / center / right based on bbox center."""
third = self.frame_w // 3
if self.cx < third:
return "left"
elif self.cx > third * 2:
return "right"
return "center"
@property
def distance_estimate(self) -> str:
"""Rough distance from size ratio."""
r = self.size_ratio
if r > 0.30: return "very close"
if r > 0.10: return "close"
if r > 0.03: return "medium"
return "far"
def to_dict(self) -> dict:
return {
"class": self.class_name,
"confidence": round(self.confidence, 2),
"position": self.position,
"distance": self.distance_estimate,
"size_ratio": round(self.size_ratio, 4),
"bbox": [self.x1, self.y1, self.x2, self.y2],
"center": [self.cx, self.cy],
}
def __repr__(self):
return (f"Detection({self.class_name} {self.confidence:.0%} "
f"@ {self.position} {self.distance_estimate})")
# ── Public query API ──────────────────────────────────────────────────────────
def yolo_sees(class_name: str, min_confidence: float = 0.45) -> bool:
"""
Check if YOLO currently detects a specific class.
Args:
class_name : COCO class e.g. "person", "chair", "bottle"
min_confidence : minimum confidence threshold (default 0.45)
Returns:
True if detected, False otherwise.
Example:
if yolo_sees("person"):
gradual_stop()
"""
with _detections_lock:
return any(
d.class_name.lower() == class_name.lower()
and d.confidence >= min_confidence
for d in _latest_detections
)
def yolo_count(class_name: str) -> int:
"""
Return number of detected instances of class_name.
Example:
n = yolo_count("person")
print(f"Detected {n} people")
"""
with _detections_lock:
return sum(
1 for d in _latest_detections
if d.class_name.lower() == class_name.lower()
)
def yolo_closest(class_name: str = "person"):
"""
Return the closest Detection instance of class_name (largest bbox).
Returns:
Detection object or None.
Example:
p = yolo_closest("person")
if p:
print(p.position, p.distance_estimate)
"""
with _detections_lock:
matches = [d for d in _latest_detections
if d.class_name.lower() == class_name.lower()]
if not matches:
return None
return max(matches, key=lambda d: d.size_ratio)
def yolo_all_classes() -> set:
"""Return set of all currently detected class names."""
with _detections_lock:
return {d.class_name for d in _latest_detections}
def yolo_summary() -> str:
"""
Return human-readable summary of current detections.
Returns:
e.g. "2 persons (left, close) | 1 chair (center, medium)"
"""
with _detections_lock:
dets = list(_latest_detections)
if not dets:
return "nothing detected"
counts = defaultdict(list)
for d in dets:
counts[d.class_name].append(d)
parts = []
for cls, items in counts.items():
n = len(items)
name = f"{n} {cls}{'s' if n > 1 else ''}"
locs = list(set(d.position for d in items))
dist = items[0].distance_estimate
parts.append(f"{name} ({', '.join(locs)}, {dist})")
return " | ".join(parts)
def yolo_ppe_violations() -> list:
"""
Return list of detected PPE violations.
Requires custom PPE model loaded (not default yolov8m).
Returns:
List of violation strings e.g. ["no helmet (left)", "no vest (center)"]
"""
violations = []
with _detections_lock:
for d in _latest_detections:
cls = d.class_name.lower()
if cls in ("no-helmet", "no_helmet"):
violations.append(f"no helmet ({d.position})")
elif cls in ("no-vest", "no_vest"):
violations.append(f"no vest ({d.position})")
return violations
def yolo_person_too_close(threshold: float = 0.25) -> bool:
"""
Return True if a person occupies more than threshold of the frame.
Use for safety stop — person is dangerously close.
Args:
threshold : size_ratio above which = too close (default 0.25)
"""
p = yolo_closest("person")
return p is not None and p.size_ratio > threshold
def yolo_is_running() -> bool:
"""Return True if YOLO inference loop is active."""
return _yolo_running[0]
def yolo_fps() -> float:
"""Return current YOLO inference FPS."""
return _yolo_fps[0]
# ── YOLO inference loop ───────────────────────────────────────────────────────
def _inference_loop(model, is_ppe: bool, raw_frame_ref, frame_lock,
device: str, use_half: bool):
"""Background inference loop. Reads frames, updates _latest_detections."""
frame_count = 0
t_fps = time.time()
while _yolo_running[0]:
with frame_lock:
frame = raw_frame_ref[0]
if frame is None:
time.sleep(0.05)
continue
try:
results = model(
frame,
imgsz=YOLO_IMG_SIZE,
conf=YOLO_CONFIDENCE,
iou=YOLO_IOU,
device=device,
half=use_half,
verbose=False
)[0]
except Exception as e:
print(f" [YOLO] Inference error: {e}")
time.sleep(0.2)
continue
h, w = frame.shape[:2]
dets = []
for box in results.boxes:
cls_id = int(box.cls[0])
class_name = model.names[cls_id]
confidence = float(box.conf[0])
if not is_ppe and class_name not in TRACKED_CLASSES:
continue
x1, y1, x2, y2 = map(int, box.xyxy[0])
dets.append(Detection(class_name, confidence, x1, y1, x2, y2, w, h))
with _detections_lock:
_latest_detections.clear()
_latest_detections.extend(dets)
frame_count += 1
elapsed = time.time() - t_fps
if elapsed >= 1.0:
_yolo_fps[0] = round(frame_count / elapsed, 1)
frame_count = 0
t_fps = time.time()
time.sleep(0.02)
# ── Camera loop for standalone mode ──────────────────────────────────────────
def _camera_loop(raw_frame_ref, frame_lock, cam_alive):
"""Capture RealSense frames when running standalone."""
import pyrealsense2 as rs
while cam_alive[0]:
pipeline = None
try:
pipeline = rs.pipeline()
cfg = rs.config()
cfg.enable_stream(rs.stream.color, 424, 240, rs.format.bgr8, 15)
pipeline.start(cfg)
print("Camera connected ✅")
while cam_alive[0]:
frames = pipeline.wait_for_frames(timeout_ms=3000)
frame = np.asanyarray(frames.get_color_frame().get_data())
with frame_lock:
raw_frame_ref[0] = frame.copy()
except Exception as e:
print(f"Camera: {e} — reconnecting...")
try: pipeline.stop()
except: pass
time.sleep(2.0)
# ── Start function — called by marcus_llava.py ────────────────────────────────
def start_yolo(raw_frame_ref=None, frame_lock=None):
"""
Start YOLO inference in background thread.
Called automatically by marcus_llava.py during startup.
Shares the camera frame reference from marcus_llava's camera thread.
Args:
raw_frame_ref : list[np.ndarray|None] — shared raw BGR frame
frame_lock : threading.Lock protecting raw_frame_ref
Example (in marcus_llava.py):
from marcus_yolo import start_yolo, yolo_sees, yolo_summary
start_yolo(raw_frame_ref=_raw_frame, frame_lock=_raw_lock)
"""
try:
from ultralytics import YOLO
except ImportError:
print(" [YOLO] ultralytics not installed — pip install ultralytics")
return False
print(f" [YOLO] Loading model: {YOLO_MODEL_PATH}")
try:
model = YOLO(YOLO_MODEL_PATH)
except Exception as e:
print(f" [YOLO] Failed to load model: {e}")
return False
names = set(model.names.values())
is_ppe = bool(names & PPE_VIOLATION_CLASSES)
device, use_half = _resolve_device(YOLO_DEVICE)
# Move weights onto the target device once so inferences don't pay a
# CPU→GPU copy every call. Ultralytics handles FP16 casting via the
# `half=True` predict kwarg — don't call `.half()` on the inner module,
# it conflicts with ultralytics' own input dtype preprocess.
try:
model.to(device)
except Exception as e:
print(f" [YOLO] Could not move model to {device} ({e}) — continuing")
gpu_info = ""
if device != "cpu":
try:
import torch
gpu_info = f" ({torch.cuda.get_device_name(0)})"
except Exception:
pass
print(f" [YOLO] Model loaded ✅ | device: {device}{gpu_info}"
f"{' | FP16' if use_half else ''} | "
f"{'PPE model' if is_ppe else f'{len(TRACKED_CLASSES & names)} tracked classes'}")
_yolo_running[0] = True
threading.Thread(
target=_inference_loop,
args=(model, is_ppe, raw_frame_ref, frame_lock, device, use_half),
daemon=True
).start()
return True
# ══════════════════════════════════════════════════════════════════════════════
# STANDALONE MODE — run directly for testing
# ══════════════════════════════════════════════════════════════════════════════
if __name__ == "__main__":
import pyrealsense2 as rs
raw_frame_ref = [None]
frame_lock = threading.Lock()
cam_alive = [True]
# Start camera
threading.Thread(
target=_camera_loop,
args=(raw_frame_ref, frame_lock, cam_alive),
daemon=True
).start()
time.sleep(3.0)
# Start YOLO
ok = start_yolo(raw_frame_ref=raw_frame_ref, frame_lock=frame_lock)
if not ok:
print("YOLO failed to start. Exiting.")
exit(1)
time.sleep(2.0)
print()
print("╔══════════════════════════════════════════════╗")
print("║ MARCUS VISION — YOLO ACTIVE ║")
print("╠══════════════════════════════════════════════╣")
print(f"║ Model : {YOLO_MODEL_PATH[-36:]:<36}")
print(f"║ Conf : {YOLO_CONFIDENCE:<36}")
_dev, _half = _resolve_device(YOLO_DEVICE)
_dev_label = f"{_dev}{' FP16' if _half else ''}"
print(f"║ Device: {_dev_label:<36}")
print("╠══════════════════════════════════════════════╣")
print("║ what — describe scene ║")
print("║ person — detect people ║")
print("║ ppe — check PPE violations ║")
print("║ count <cls> — count instances ║")
print("║ closest <cls>— closest instance info ║")
print("║ all — all detections ║")
print("║ fps — inference speed ║")
print("║ q — quit ║")
print("╚══════════════════════════════════════════════╝")
print()
while True:
try:
cmd = input("Vision: ").strip().lower()
except (EOFError, KeyboardInterrupt):
break
if not cmd:
continue
if cmd == "q":
break
elif cmd == "what":
print(f" {yolo_summary()}")
elif cmd == "person":
n = yolo_count("person")
if n == 0:
print(" No person detected")
else:
for i, p in enumerate(
[d for d in _latest_detections if d.class_name == "person"], 1
):
print(f" Person {i}: {p.position}, {p.distance_estimate} "
f"({p.confidence:.0%})")
elif cmd == "ppe":
v = yolo_ppe_violations()
print(f" {'No violations' if not v else chr(10).join(v)}")
elif cmd.startswith("count "):
cls = cmd[6:].strip()
print(f" {yolo_count(cls)} {cls}(s) detected")
elif cmd.startswith("closest "):
cls = cmd[8:].strip()
d = yolo_closest(cls)
if d:
print(f" Closest {cls}: {d.position}, {d.distance_estimate} "
f"({d.confidence:.0%})")
else:
print(f" No {cls} detected")
elif cmd == "all":
with _detections_lock:
dets = list(_latest_detections)
if not dets:
print(" Nothing detected")
else:
for d in dets:
print(f" {d}")
elif cmd == "fps":
print(f" {yolo_fps():.1f} fps")
else:
print(f" Unknown: {cmd}")
cam_alive[0] = False
_yolo_running[0] = False
print("Marcus Vision stopped.")