63 lines
2.8 KiB
Bash
Executable File
63 lines
2.8 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# start_ollama.sh — DEPRECATED, do not use.
|
|
#
|
|
# This script launches Ollama as the current (unitree) user and completely
|
|
# BYPASSES the systemd drop-in at /etc/systemd/system/ollama.service.d/,
|
|
# which is where all the safety-critical flags live:
|
|
#
|
|
# OOMScoreAdjust=500 (makes Ollama the preferred OOM victim)
|
|
# IOSchedulingClass=idle (so disk reads don't starve Holosoma)
|
|
# Nice=10 (lowest normal scheduler priority)
|
|
# OLLAMA_GPU_OVERHEAD=4 GiB (reserves iGPU for YOLO/Holosoma/camera)
|
|
# OLLAMA_KEEP_ALIVE=24h (model stays resident all day)
|
|
# OLLAMA_CONTEXT_LENGTH=1024 (shrinks compute graph)
|
|
# OLLAMA_FLASH_ATTENTION=1
|
|
# OLLAMA_KV_CACHE_TYPE=q8_0
|
|
# OLLAMA_MAX_LOADED_MODELS=1
|
|
#
|
|
# Running this script instead of systemd means NONE of those protections
|
|
# are active, and the robot WILL fall the next time a vision query runs.
|
|
#
|
|
# CORRECT WAY:
|
|
# sudo systemctl start ollama # uses the drop-in
|
|
# ./warmup_vlm.sh # then warm up
|
|
# python3 run_marcus.py # then start Marcus
|
|
#
|
|
# ═════════════════════════════════════════════════════════════════════
|
|
# REFUSING TO RUN. Delete this block only if you know what you're doing.
|
|
# ═════════════════════════════════════════════════════════════════════
|
|
echo "start_ollama.sh is DEPRECATED. Use: sudo systemctl start ollama" >&2
|
|
echo " (see comments at the top of this file for why)" >&2
|
|
exit 1
|
|
|
|
pkill -f "ollama (runner|serve)" 2>/dev/null
|
|
sleep 1
|
|
|
|
export OLLAMA_FLASH_ATTENTION=1
|
|
export OLLAMA_KV_CACHE_TYPE=q8_0
|
|
export OLLAMA_KEEP_ALIVE=2m
|
|
export OLLAMA_MAX_LOADED_MODELS=1
|
|
# Reserve 2 GiB of the Jetson's 15 GiB iGPU for the rest of the system
|
|
# (YOLO in CUDA FP16, Holosoma, camera, Python heap). Without this, Ollama
|
|
# assumes the full 13.8 GiB "available" is its to use and sizes its compute
|
|
# graph that way — which works for text, but the vision-encode pass of
|
|
# Qwen2.5-VL then pushes total allocation past physical memory and the
|
|
# runner dies with status 500.
|
|
export OLLAMA_GPU_OVERHEAD=2147483648
|
|
|
|
if [[ "$1" == "--fg" ]]; then
|
|
echo "Running ollama in foreground..."
|
|
ollama serve
|
|
else
|
|
ollama serve > /tmp/ollama.log 2>&1 &
|
|
sleep 3
|
|
if curl -sf http://localhost:11434/api/version > /dev/null; then
|
|
echo "✓ Ollama started (pid $(pgrep -f 'ollama serve'))"
|
|
echo " logs: tail -f /tmp/ollama.log"
|
|
echo " stop: pkill -f 'ollama serve'"
|
|
else
|
|
echo "✗ Ollama failed to start — see /tmp/ollama.log"
|
|
exit 1
|
|
fi
|
|
fi
|