Sanadv3/requirements.txt

44 lines
1.6 KiB
Plaintext

# Sanad — Python dependencies
# Install: pip install -r requirements.txt
# Dashboard
fastapi>=0.110.0
uvicorn[standard]>=0.29.0
python-multipart>=0.0.9
# Gemini voice
# google-genai: the Gemini Live SDK — used by gemini/script.py (live brain)
# and gemini/client.py. Needs Python 3.10+, which is why the voice loop
# runs in the gemini_sdk conda env. send_realtime_input(video=)/(text=)
# and send_client_content() require a reasonably recent (>=1.x) release.
google-genai>=1.0.0
websockets>=12.0
pyaudio>=0.2.13
# Recognition (camera vision + face gallery for Gemini-side face recognition)
# opencv-python-headless: JPEG encode + USB-camera fallback. Headless wheel —
# the dashboard renders frames; we never need a GUI window.
# Pillow: resize face samples before the Gemini primer turn.
opencv-python-headless>=4.8.0
Pillow>=10.0.0
#
# pyrealsense2 — DO NOT `pip install` on Jetson / JetPack 5.
# The PyPI wheel is built against glibc 2.32+ (Ubuntu 22.04); JetPack 5 ships
# glibc 2.31, so the wheel fails to load with:
# ImportError: ... version `GLIBC_2.32' not found
# On Jetson, build the Python binding from source against the apt-installed
# librealsense2 runtime (see README → "Camera vision on Jetson").
# On x86_64 / Ubuntu 22.04+ desktops, `pip install pyrealsense2` works fine.
# If pyrealsense2 is absent, CameraDaemon falls back to cv2.VideoCapture(0).
# pyrealsense2>=2.50.0 # intentionally commented — see note above
# Local TTS (optional — only needed for MBZUAI model)
transformers>=4.40.0
sentencepiece>=0.2.0
torch>=2.2.0
datasets>=2.19.0
soundfile>=0.12.0
# General
numpy>=1.24.0