51 lines
2.1 KiB
Bash
Executable File
51 lines
2.1 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# start_ollama.sh — launch Ollama with Jetson-friendly memory settings
|
|
#
|
|
# The Jetson Orin NX has 16 GB unified CPU+GPU memory. When Marcus + YOLO +
|
|
# Whisper + Holosoma + the camera + audio all run alongside Qwen2.5-VL,
|
|
# the compute-graph OOMs the llama runner and Linux kills the biggest
|
|
# process (often Holosoma — which is a safety problem for locomotion).
|
|
#
|
|
# These env vars cut Ollama's memory footprint:
|
|
# OLLAMA_FLASH_ATTENTION=1 ~30 % less memory for attention tensors
|
|
# OLLAMA_KV_CACHE_TYPE=q8_0 quantize KV cache (halves it)
|
|
# OLLAMA_KEEP_ALIVE=2m keep the model warm for 2 min then evict
|
|
# (adjust if cold-load lag matters more
|
|
# than idle memory)
|
|
# OLLAMA_MAX_LOADED_MODELS=1 never hold two VL models at once
|
|
#
|
|
# Usage:
|
|
# ./start_ollama.sh # starts server in background, logs to /tmp/ollama.log
|
|
# ./start_ollama.sh --fg # runs in foreground (for debugging)
|
|
|
|
pkill -f "ollama (runner|serve)" 2>/dev/null
|
|
sleep 1
|
|
|
|
export OLLAMA_FLASH_ATTENTION=1
|
|
export OLLAMA_KV_CACHE_TYPE=q8_0
|
|
export OLLAMA_KEEP_ALIVE=2m
|
|
export OLLAMA_MAX_LOADED_MODELS=1
|
|
# Reserve 2 GiB of the Jetson's 15 GiB iGPU for the rest of the system
|
|
# (YOLO in CUDA FP16, Holosoma, camera, Python heap). Without this, Ollama
|
|
# assumes the full 13.8 GiB "available" is its to use and sizes its compute
|
|
# graph that way — which works for text, but the vision-encode pass of
|
|
# Qwen2.5-VL then pushes total allocation past physical memory and the
|
|
# runner dies with status 500.
|
|
export OLLAMA_GPU_OVERHEAD=2147483648
|
|
|
|
if [[ "$1" == "--fg" ]]; then
|
|
echo "Running ollama in foreground..."
|
|
ollama serve
|
|
else
|
|
ollama serve > /tmp/ollama.log 2>&1 &
|
|
sleep 3
|
|
if curl -sf http://localhost:11434/api/version > /dev/null; then
|
|
echo "✓ Ollama started (pid $(pgrep -f 'ollama serve'))"
|
|
echo " logs: tail -f /tmp/ollama.log"
|
|
echo " stop: pkill -f 'ollama serve'"
|
|
else
|
|
echo "✗ Ollama failed to start — see /tmp/ollama.log"
|
|
exit 1
|
|
fi
|
|
fi
|