#!/usr/bin/env bash # start_ollama.sh — DEPRECATED, do not use. # # This script launches Ollama as the current (unitree) user and completely # BYPASSES the systemd drop-in at /etc/systemd/system/ollama.service.d/, # which is where all the safety-critical flags live: # # OOMScoreAdjust=500 (makes Ollama the preferred OOM victim) # IOSchedulingClass=idle (so disk reads don't starve Holosoma) # Nice=10 (lowest normal scheduler priority) # OLLAMA_GPU_OVERHEAD=4 GiB (reserves iGPU for YOLO/Holosoma/camera) # OLLAMA_KEEP_ALIVE=24h (model stays resident all day) # OLLAMA_CONTEXT_LENGTH=1024 (shrinks compute graph) # OLLAMA_FLASH_ATTENTION=1 # OLLAMA_KV_CACHE_TYPE=q8_0 # OLLAMA_MAX_LOADED_MODELS=1 # # Running this script instead of systemd means NONE of those protections # are active, and the robot WILL fall the next time a vision query runs. # # CORRECT WAY: # sudo systemctl start ollama # uses the drop-in # ./warmup_vlm.sh # then warm up # python3 run_marcus.py # then start Marcus # # ═════════════════════════════════════════════════════════════════════ # REFUSING TO RUN. Delete this block only if you know what you're doing. # ═════════════════════════════════════════════════════════════════════ echo "start_ollama.sh is DEPRECATED. Use: sudo systemctl start ollama" >&2 echo " (see comments at the top of this file for why)" >&2 exit 1 pkill -f "ollama (runner|serve)" 2>/dev/null sleep 1 export OLLAMA_FLASH_ATTENTION=1 export OLLAMA_KV_CACHE_TYPE=q8_0 export OLLAMA_KEEP_ALIVE=2m export OLLAMA_MAX_LOADED_MODELS=1 # Reserve 2 GiB of the Jetson's 15 GiB iGPU for the rest of the system # (YOLO in CUDA FP16, Holosoma, camera, Python heap). Without this, Ollama # assumes the full 13.8 GiB "available" is its to use and sizes its compute # graph that way — which works for text, but the vision-encode pass of # Qwen2.5-VL then pushes total allocation past physical memory and the # runner dies with status 500. export OLLAMA_GPU_OVERHEAD=2147483648 if [[ "$1" == "--fg" ]]; then echo "Running ollama in foreground..." ollama serve else ollama serve > /tmp/ollama.log 2>&1 & sleep 3 if curl -sf http://localhost:11434/api/version > /dev/null; then echo "✓ Ollama started (pid $(pgrep -f 'ollama serve'))" echo " logs: tail -f /tmp/ollama.log" echo " stop: pkill -f 'ollama serve'" else echo "✗ Ollama failed to start — see /tmp/ollama.log" exit 1 fi fi