Marcus/start_ollama.sh

63 lines
2.8 KiB
Bash
Executable File

#!/usr/bin/env bash
# start_ollama.sh — DEPRECATED, do not use.
#
# This script launches Ollama as the current (unitree) user and completely
# BYPASSES the systemd drop-in at /etc/systemd/system/ollama.service.d/,
# which is where all the safety-critical flags live:
#
# OOMScoreAdjust=500 (makes Ollama the preferred OOM victim)
# IOSchedulingClass=idle (so disk reads don't starve Holosoma)
# Nice=10 (lowest normal scheduler priority)
# OLLAMA_GPU_OVERHEAD=4 GiB (reserves iGPU for YOLO/Holosoma/camera)
# OLLAMA_KEEP_ALIVE=24h (model stays resident all day)
# OLLAMA_CONTEXT_LENGTH=1024 (shrinks compute graph)
# OLLAMA_FLASH_ATTENTION=1
# OLLAMA_KV_CACHE_TYPE=q8_0
# OLLAMA_MAX_LOADED_MODELS=1
#
# Running this script instead of systemd means NONE of those protections
# are active, and the robot WILL fall the next time a vision query runs.
#
# CORRECT WAY:
# sudo systemctl start ollama # uses the drop-in
# ./warmup_vlm.sh # then warm up
# python3 run_marcus.py # then start Marcus
#
# ═════════════════════════════════════════════════════════════════════
# REFUSING TO RUN. Delete this block only if you know what you're doing.
# ═════════════════════════════════════════════════════════════════════
echo "start_ollama.sh is DEPRECATED. Use: sudo systemctl start ollama" >&2
echo " (see comments at the top of this file for why)" >&2
exit 1
pkill -f "ollama (runner|serve)" 2>/dev/null
sleep 1
export OLLAMA_FLASH_ATTENTION=1
export OLLAMA_KV_CACHE_TYPE=q8_0
export OLLAMA_KEEP_ALIVE=2m
export OLLAMA_MAX_LOADED_MODELS=1
# Reserve 2 GiB of the Jetson's 15 GiB iGPU for the rest of the system
# (YOLO in CUDA FP16, Holosoma, camera, Python heap). Without this, Ollama
# assumes the full 13.8 GiB "available" is its to use and sizes its compute
# graph that way — which works for text, but the vision-encode pass of
# Qwen2.5-VL then pushes total allocation past physical memory and the
# runner dies with status 500.
export OLLAMA_GPU_OVERHEAD=2147483648
if [[ "$1" == "--fg" ]]; then
echo "Running ollama in foreground..."
ollama serve
else
ollama serve > /tmp/ollama.log 2>&1 &
sleep 3
if curl -sf http://localhost:11434/api/version > /dev/null; then
echo "✓ Ollama started (pid $(pgrep -f 'ollama serve'))"
echo " logs: tail -f /tmp/ollama.log"
echo " stop: pkill -f 'ollama serve'"
else
echo "✗ Ollama failed to start — see /tmp/ollama.log"
exit 1
fi
fi