GoWelcome/gowelcome/types.py

120 lines
3.9 KiB
Python

"""Shared data contracts for GoWelcome.
These types are the *frozen interface* between the perception, control, robot,
and state-machine layers. Every module imports from here; nothing here imports
heavy/optional deps (ultralytics, cv2, the Unitree SDK), so this module is
always importable for off-robot testing.
"""
from __future__ import annotations
from dataclasses import dataclass, field
from enum import Enum
from typing import List, Optional, Tuple, TYPE_CHECKING
if TYPE_CHECKING: # numpy is only a type hint here; keep import light at runtime
import numpy as np
NDArray = np.ndarray # real reference -> static checkers + linters see it used
else: # pragma: no cover
NDArray = object
class State(Enum):
"""The four core behaviour states of the GoWelcome machine."""
WANDER = "WANDER" # roam + idle dog-play, looking for a person
APPROACH = "APPROACH" # visual-servo toward a detected person
GREET = "GREET" # stop, play audio + gesture
AVOID_DANGER = "AVOID_DANGER" # road / vehicle detected -> steer to safety
BOUNDARY = "BOUNDARY" # near the geofence edge -> home back toward centre
@dataclass
class Detection:
"""A single YOLO bounding box (pixel coords, top-left / bottom-right)."""
label: str
conf: float
x1: int
y1: int
x2: int
y2: int
track_id: int = -1 # -1 when tracking is disabled / id unavailable
@property
def cx(self) -> float:
return (self.x1 + self.x2) / 2.0
@property
def cy(self) -> float:
return (self.y1 + self.y2) / 2.0
@property
def w(self) -> float:
return float(self.x2 - self.x1)
@property
def h(self) -> float:
return float(self.y2 - self.y1)
@property
def area(self) -> float:
return self.w * self.h
def height_ratio(self, frame_h: int) -> float:
"""Fraction of the frame height the box fills (distance proxy)."""
return self.h / max(1, frame_h)
def horizontal_offset(self, frame_w: int) -> float:
"""Normalised horizontal error in ``[-1, 1]``; ``+`` = right of centre."""
half = max(1.0, frame_w / 2.0)
return (self.cx - frame_w / 2.0) / half
@dataclass
class RoadInfo:
"""Output of the HSV asphalt/road mask over the bottom crop of the frame."""
coverage: float # fraction of crop flagged as road (0..1)
left: float # road coverage in the left third (0..1)
center: float # road coverage in the centre third (0..1)
right: float # road coverage in the right third (0..1)
mask: Optional[NDArray] = None # binary uint8 mask of the crop (debug only)
@property
def clearer_side(self) -> int:
"""+1 if the right third is clearer (less road), -1 if the left is.
Use to pick a turn direction that steers *away* from the road.
"""
return 1 if self.right <= self.left else -1
@dataclass
class PerceptionResult:
"""Immutable snapshot published by the perception thread each frame."""
frame_w: int
frame_h: int
detections: List[Detection] = field(default_factory=list)
persons: List[Detection] = field(default_factory=list)
dangers: List[Detection] = field(default_factory=list) # car/truck/bus/...
road: Optional[RoadInfo] = None
ts: float = 0.0
seq: int = 0
frame: Optional[NDArray] = None # BGR frame the result was computed from
def best_person(self) -> Optional[Detection]:
"""Highest-confidence person, or ``None``."""
return max(self.persons, key=lambda d: d.conf, default=None)
def biggest_person(self) -> Optional[Detection]:
"""Largest (nearest) person by box area, or ``None``."""
return max(self.persons, key=lambda d: d.area, default=None)
# --- convenience ----------------------------------------------------------
FrameSize = Tuple[int, int] # (width, height)
Velocity = Tuple[float, float, float] # (vx, vy, vyaw)