Sanadv3/shell_scripts/clean_sanad.sh

157 lines
5.8 KiB
Bash
Executable File

#!/usr/bin/env bash
# clean_sanad.sh — wipe transient state (logs, recordings, audio, caches).
#
# Safe by default: shows a preview + asks for confirmation. Won't touch
# config files, skills.json, wake_phrases.json, recorded JSONL motions,
# or the model directory.
#
# Usage:
# ./clean_sanad.sh # interactive — preview + y/N prompt
# ./clean_sanad.sh -y # skip the prompt
# ./clean_sanad.sh --dry-run # show what would be deleted; delete nothing
# ./clean_sanad.sh --logs # logs only
# ./clean_sanad.sh --records # recorded turns + typed-replay audio only
# ./clean_sanad.sh --cache # __pycache__ only
# ./clean_sanad.sh --all # everything (default)
#
# Override the project location:
# SANAD_HOME=/some/path ./clean_sanad.sh
set -u
SANAD_HOME="${SANAD_HOME:-$HOME/Sanad}"
if [ ! -d "$SANAD_HOME" ]; then
# Fallback for invocation from the repo (workstation)
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
SANAD_HOME="$(dirname "$SCRIPT_DIR")"
fi
if [ ! -d "$SANAD_HOME" ]; then
echo "Sanad dir not found: $SANAD_HOME" >&2
exit 1
fi
# ── flag parsing ──────────────────────────────────────────
TARGET="all"
DRY_RUN=0
ASSUME_YES=0
for arg in "$@"; do
case "$arg" in
-y|--yes) ASSUME_YES=1 ;;
-n|--dry-run) DRY_RUN=1 ;;
--logs) TARGET="logs" ;;
--records) TARGET="records" ;;
--cache) TARGET="cache" ;;
--all) TARGET="all" ;;
-h|--help)
sed -n '2,20p' "$0"; exit 0 ;;
*)
echo "unknown option: $arg (try -h)" >&2; exit 2 ;;
esac
done
# ── targets — grouped per category so we can summarise per-category ──
declare -a LOGS_PATHS=()
declare -a RECORDS_PATHS=()
declare -a CACHE_PATHS=()
collect_logs() {
while IFS= read -r p; do LOGS_PATHS+=("$p"); done < <(
find "$SANAD_HOME/logs" -maxdepth 2 -type f 2>/dev/null
)
}
collect_records() {
while IFS= read -r p; do RECORDS_PATHS+=("$p"); done < <(
find "$SANAD_HOME/data/recordings" -type f 2>/dev/null
)
while IFS= read -r p; do RECORDS_PATHS+=("$p"); done < <(
find "$SANAD_HOME/data/audio" -maxdepth 1 -type f \
\( -name "*.wav" -o -name "*.pcm" \) 2>/dev/null
)
}
collect_cache() {
while IFS= read -r p; do CACHE_PATHS+=("$p"); done < <(
find "$SANAD_HOME" -type d -name "__pycache__" 2>/dev/null
)
while IFS= read -r p; do CACHE_PATHS+=("$p"); done < <(
find "$SANAD_HOME" -type f -name "*.pyc" 2>/dev/null
)
}
case "$TARGET" in
logs) collect_logs ;;
records) collect_records ;;
cache) collect_cache ;;
all) collect_logs; collect_records; collect_cache ;;
esac
# Bytes per array (silently skips missing paths)
sum_bytes() {
local total=0 p sz
for p in "$@"; do
[ -e "$p" ] || continue
sz=$(du -sb "$p" 2>/dev/null | awk '{print $1}')
total=$((total + ${sz:-0}))
done
echo "$total"
}
fmt() { numfmt --to=iec --suffix=B "$1" 2>/dev/null || echo "${1} B"; }
LOGS_BYTES=$(sum_bytes "${LOGS_PATHS[@]:-}")
RECORDS_BYTES=$(sum_bytes "${RECORDS_PATHS[@]:-}")
CACHE_BYTES=$(sum_bytes "${CACHE_PATHS[@]:-}")
TOTAL_BYTES=$((LOGS_BYTES + RECORDS_BYTES + CACHE_BYTES))
TOTAL_COUNT=$(( ${#LOGS_PATHS[@]} + ${#RECORDS_PATHS[@]} + ${#CACHE_PATHS[@]} ))
# ── preview ───────────────────────────────────────────────
if [ "$TOTAL_COUNT" -eq 0 ]; then
echo "Nothing to delete (target=$TARGET)."
exit 0
fi
echo
echo "Sanad clean — target=$TARGET dry_run=$DRY_RUN"
echo "Project: $SANAD_HOME"
echo
printf '┌──────────────┬─────────┬────────────┐\n'
printf '│ %-12s │ %7s │ %10s │\n' "Category" "Items" "Size"
printf '├──────────────┼─────────┼────────────┤\n'
if [ "${#LOGS_PATHS[@]}" -gt 0 ]; then printf '│ %-12s │ %7d │ %10s │\n' "logs" "${#LOGS_PATHS[@]}" "$(fmt "$LOGS_BYTES")"; fi
if [ "${#RECORDS_PATHS[@]}" -gt 0 ]; then printf '│ %-12s │ %7d │ %10s │\n' "records" "${#RECORDS_PATHS[@]}" "$(fmt "$RECORDS_BYTES")"; fi
if [ "${#CACHE_PATHS[@]}" -gt 0 ]; then printf '│ %-12s │ %7d │ %10s │\n' "cache" "${#CACHE_PATHS[@]}" "$(fmt "$CACHE_BYTES")"; fi
printf '├──────────────┼─────────┼────────────┤\n'
printf '│ %-12s │ %7d │ %10s │\n' "TOTAL" "$TOTAL_COUNT" "$(fmt "$TOTAL_BYTES")"
printf '└──────────────┴─────────┴────────────┘\n'
# Flatten for the delete loop
declare -a PATHS_TO_DELETE=( "${LOGS_PATHS[@]:-}" "${RECORDS_PATHS[@]:-}" "${CACHE_PATHS[@]:-}" )
# Strip any empty entries the unset-array fallback may have introduced
PATHS_TO_DELETE=("${PATHS_TO_DELETE[@]/#/}")
TMP_PATHS=()
for p in "${PATHS_TO_DELETE[@]}"; do [ -n "$p" ] && TMP_PATHS+=("$p"); done
PATHS_TO_DELETE=("${TMP_PATHS[@]}")
if [ "$DRY_RUN" -eq 1 ]; then
echo "Dry run — nothing deleted."
exit 0
fi
if [ "$ASSUME_YES" -ne 1 ]; then
read -r -p "Proceed with delete? [y/N] " ans
case "$ans" in
y|Y|yes|YES) ;;
*) echo "Aborted."; exit 0 ;;
esac
fi
# ── delete ────────────────────────────────────────────────
removed=0
for p in "${PATHS_TO_DELETE[@]}"; do
rm -rf -- "$p" && removed=$((removed + 1))
done
echo "Removed $removed of ${#PATHS_TO_DELETE[@]} item(s)."
# Recreate empty top-level dirs so next start_sanad.sh run doesn't
# complain about missing paths.
mkdir -p "$SANAD_HOME/logs" "$SANAD_HOME/data/recordings" "$SANAD_HOME/data/audio"
echo "Done."