Marcus/start_ollama.sh

#!/usr/bin/env bash
# start_ollama.sh — DEPRECATED, do not use.
#
# This script launches Ollama as the current (unitree) user and completely
# BYPASSES the systemd drop-in at /etc/systemd/system/ollama.service.d/,
# which is where all the safety-critical flags live:
#
#   OOMScoreAdjust=500           (makes Ollama the preferred OOM victim)
#   IOSchedulingClass=idle       (so disk reads don't starve Holosoma)
#   Nice=10                      (lowest normal scheduler priority)
#   OLLAMA_GPU_OVERHEAD=4 GiB    (reserves iGPU for YOLO/Holosoma/camera)
#   OLLAMA_KEEP_ALIVE=24h        (model stays resident all day)
#   OLLAMA_CONTEXT_LENGTH=1024   (shrinks compute graph)
#   OLLAMA_FLASH_ATTENTION=1
#   OLLAMA_KV_CACHE_TYPE=q8_0
#   OLLAMA_MAX_LOADED_MODELS=1
#
# Running this script instead of systemd means NONE of those protections
# are active, and the robot WILL fall the next time a vision query runs.
#
# CORRECT WAY:
#   sudo systemctl start ollama        # uses the drop-in
#   ./warmup_vlm.sh                    # then warm up
#   python3 run_marcus.py              # then start Marcus
#
# ═════════════════════════════════════════════════════════════════════
# REFUSING TO RUN. Delete this block only if you know what you're doing.
# ═════════════════════════════════════════════════════════════════════
echo "start_ollama.sh is DEPRECATED. Use:  sudo systemctl start ollama" >&2
echo "  (see comments at the top of this file for why)" >&2
exit 1

pkill -f "ollama (runner|serve)" 2>/dev/null
sleep 1

export OLLAMA_FLASH_ATTENTION=1
export OLLAMA_KV_CACHE_TYPE=q8_0
export OLLAMA_KEEP_ALIVE=2m
export OLLAMA_MAX_LOADED_MODELS=1
# Reserve 2 GiB of the Jetson's 15 GiB iGPU for the rest of the system
# (YOLO in CUDA FP16, Holosoma, camera, Python heap). Without this, Ollama
# assumes the full 13.8 GiB "available" is its to use and sizes its compute
# graph that way — which works for text, but the vision-encode pass of
# Qwen2.5-VL then pushes total allocation past physical memory and the
# runner dies with status 500.
export OLLAMA_GPU_OVERHEAD=2147483648

if [[ "$1" == "--fg" ]]; then
    echo "Running ollama in foreground..."
    ollama serve
else
    ollama serve > /tmp/ollama.log 2>&1 &
    sleep 3
    if curl -sf http://localhost:11434/api/version > /dev/null; then
        echo "✓ Ollama started (pid $(pgrep -f 'ollama serve'))"
        echo "  logs:  tail -f /tmp/ollama.log"
        echo "  stop:  pkill -f 'ollama serve'"
    else
        echo "✗ Ollama failed to start — see /tmp/ollama.log"
        exit 1
    fi
fi