Unified Health Check — Implementation Plan
For Claude: REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
Goal: Create a unified bin/lib/health_check.sh library that auto-detects deployment mode and runs mode-appropriate checks, replacing the duplicated check logic in check_system.sh and cli/install_menu.sh.
Architecture: A single library with detect_mode() and grouped check functions (run_core_checks, run_django_checks, run_dev_checks, run_docker_checks, run_systemd_checks). A top-level run_all_checks() orchestrates detection and execution. Both check_system.sh and cli’s check_installation() delegate to this library. JSON output supported via --json flag.
Tech Stack: Bash, BATS, existing bin/lib/ shared libraries
Task 1: Create bin/lib/health_check.sh — scaffolding + result helpers + detect_mode
Files:
- Create:
bin/lib/health_check.sh - Create:
bin/tests/lib/test_health_check.bats
Step 1: Write tests for detect_mode and result helpers
Create bin/tests/lib/test_health_check.bats:
#!/usr/bin/env bats
setup() {
load '../test_helper/common-setup'
_common_setup
source "$LIB_DIR/health_check.sh"
# Reset counters before each test
_hc_passed=0
_hc_warned=0
_hc_failed=0
_hc_json_results=()
_hc_json_mode=false
}
# --- Result helpers ---
@test "hc_pass increments passed counter" {
hc_pass "test check" "all good"
[ "$_hc_passed" -eq 1 ]
}
@test "hc_warn increments warned counter" {
hc_warn "test check" "something off"
[ "$_hc_warned" -eq 1 ]
}
@test "hc_fail increments failed counter" {
hc_fail "test check" "broken"
[ "$_hc_failed" -eq 1 ]
}
@test "hc_pass in JSON mode appends to results array" {
_hc_json_mode=true
hc_pass "uv" "uv is installed"
[ "${#_hc_json_results[@]}" -eq 1 ]
[[ "${_hc_json_results[0]}" == *'"status":"ok"'* ]]
[[ "${_hc_json_results[0]}" == *'"check":"uv"'* ]]
}
@test "hc_fail in JSON mode appends err status" {
_hc_json_mode=true
hc_fail "python" "not found"
[[ "${_hc_json_results[0]}" == *'"status":"err"'* ]]
}
# --- detect_mode ---
@test "detect_mode returns dev as fallback" {
# Override functions to ensure no Docker/systemd detected
docker() { return 1; }
export -f docker
systemctl() { return 1; }
export -f systemctl
run detect_mode
assert_success
assert_output "dev"
}
@test "detect_mode returns dev when .env has DJANGO_ENV=dev" {
local tmpdir
tmpdir="$(mktemp -d)"
echo "DJANGO_ENV=dev" > "$tmpdir/.env"
mkdir -p "$tmpdir/.venv"
PROJECT_DIR="$tmpdir"
docker() { return 1; }
export -f docker
systemctl() { return 1; }
export -f systemctl
run detect_mode
rm -rf "$tmpdir"
assert_output "dev"
}
@test "detect_mode returns prod when .env has DJANGO_ENV=prod and .venv exists" {
local tmpdir
tmpdir="$(mktemp -d)"
echo "DJANGO_ENV=prod" > "$tmpdir/.env"
mkdir -p "$tmpdir/.venv"
PROJECT_DIR="$tmpdir"
docker() { return 1; }
export -f docker
systemctl() { return 1; }
export -f systemctl
run detect_mode
rm -rf "$tmpdir"
assert_output "prod"
}
Step 2: Run tests to verify they fail
Run: ./bin/tests/test_helper/bats-core/bin/bats bin/tests/lib/test_health_check.bats Expected: FAIL — file does not exist
Step 3: Create the library
Create bin/lib/health_check.sh:
#!/usr/bin/env bash
#
# Unified health check library.
# Auto-detects deployment mode and runs appropriate checks.
# Source this file — do not execute directly.
#
[[ -n "${_LIB_HEALTH_CHECK_LOADED:-}" ]] && return 0
_LIB_HEALTH_CHECK_LOADED=1
_LIB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "$_LIB_DIR/colors.sh"
source "$_LIB_DIR/paths.sh"
source "$_LIB_DIR/checks.sh"
# --- State ---
_hc_passed=0
_hc_warned=0
_hc_failed=0
_hc_json_mode=false
_hc_json_results=()
# --- Result helpers ---
hc_pass() {
local check="$1" msg="$2"
if [ "$_hc_json_mode" = true ]; then
_hc_json_results+=("{\"check\":\"$check\",\"status\":\"ok\",\"message\":\"$msg\"}")
else
printf " %bOK%b %s\n" "$GREEN" "$NC" "$msg"
fi
((_hc_passed++)) || true
}
hc_warn() {
local check="$1" msg="$2"
if [ "$_hc_json_mode" = true ]; then
_hc_json_results+=("{\"check\":\"$check\",\"status\":\"warn\",\"message\":\"$msg\"}")
else
printf " %bWARN%b %s\n" "$YELLOW" "$NC" "$msg"
fi
((_hc_warned++)) || true
}
hc_fail() {
local check="$1" msg="$2"
if [ "$_hc_json_mode" = true ]; then
_hc_json_results+=("{\"check\":\"$check\",\"status\":\"err\",\"message\":\"$msg\"}")
else
printf " %bERR%b %s\n" "$RED" "$NC" "$msg"
fi
((_hc_failed++)) || true
}
# --- Mode detection ---
detect_mode() {
# 1. Docker — compose containers running for this project
local compose_file="$PROJECT_DIR/deploy/docker/docker-compose.yml"
if command_exists docker && docker compose -f "$compose_file" ps --format json 2>/dev/null | grep -q "running"; then
echo "docker"
return 0
fi
# 2. systemd — server-monitoring.service unit exists
if command -v systemctl &>/dev/null && systemctl list-unit-files server-monitoring.service &>/dev/null 2>&1 && \
systemctl list-unit-files server-monitoring.service 2>/dev/null | grep -q "server-monitoring"; then
echo "systemd"
return 0
fi
# 3. prod — .venv exists + DJANGO_ENV=prod in .env
if [ -d "$PROJECT_DIR/.venv" ] && [ -f "$PROJECT_DIR/.env" ]; then
if grep -qE "^DJANGO_ENV=prod" "$PROJECT_DIR/.env" 2>/dev/null; then
echo "prod"
return 0
fi
fi
# 4. dev — fallback
echo "dev"
}
# --- Check groups ---
run_core_checks() {
printf "\n%b=== Core Checks ===%b\n\n" "$BOLD" "$NC"
# Python 3.10+
local py_version
py_version=$(python3 --version 2>/dev/null | grep -oE '[0-9]+\.[0-9]+' || true)
if [ -n "$py_version" ]; then
local major minor
major=$(echo "$py_version" | cut -d. -f1)
minor=$(echo "$py_version" | cut -d. -f2)
if [ "$major" -ge 3 ] && [ "$minor" -ge 10 ]; then
hc_pass "python" "Python $py_version (>= 3.10)"
else
hc_fail "python" "Python $py_version (need >= 3.10)"
fi
else
hc_fail "python" "Python 3 not found"
fi
# uv installed
if command_exists uv; then
hc_pass "uv" "uv is installed ($(uv --version 2>/dev/null || echo 'unknown'))"
else
hc_fail "uv" "uv is not installed"
fi
# .env exists
if [ -f "$PROJECT_DIR/.env" ]; then
hc_pass "dotenv" ".env file found"
else
hc_warn "dotenv" ".env file not found (copy .env.sample to .env)"
fi
# .venv exists
if [ -d "$PROJECT_DIR/.venv" ]; then
hc_pass "venv" ".venv directory found"
else
hc_warn "venv" ".venv not found (run: uv sync)"
fi
# Project directory writable
if touch "$PROJECT_DIR/.check_system_test" 2>/dev/null; then
rm -f "$PROJECT_DIR/.check_system_test"
hc_pass "writable" "Project directory is writable"
else
hc_warn "writable" "Project directory is not writable"
fi
# Disk space (>1GB free)
if command -v df &>/dev/null; then
local free_kb free_gb
free_kb=$(df -k "$PROJECT_DIR" | tail -1 | awk '{print $4}')
free_gb=$((free_kb / 1024 / 1024))
if [ "$free_gb" -ge 1 ]; then
hc_pass "disk" "Disk space: ${free_gb}GB free"
else
hc_warn "disk" "Low disk space: ${free_gb}GB free (< 1GB)"
fi
fi
}
run_django_checks() {
printf "\n%b=== Django Checks ===%b\n\n" "$BOLD" "$NC"
if [ ! -d "$PROJECT_DIR/.venv" ]; then
hc_warn "django" "Skipping Django checks (.venv not found)"
return 0
fi
# Django system check
if uv run python manage.py check &>/dev/null; then
hc_pass "django_check" "Django system check passed"
else
hc_fail "django_check" "Django system check failed"
fi
# Pending migrations
if uv run python manage.py migrate --check &>/dev/null; then
hc_pass "migrations" "No pending migrations"
else
hc_warn "migrations" "Pending migrations found (run: uv run python manage.py migrate)"
fi
}
run_dev_checks() {
printf "\n%b=== Dev Checks ===%b\n\n" "$BOLD" "$NC"
# Pre-commit hooks
if [ -f "$PROJECT_DIR/.git/hooks/pre-commit" ]; then
hc_pass "precommit" "Pre-commit hooks installed"
else
hc_warn "precommit" "Pre-commit hooks not installed (run: uv run pre-commit install)"
fi
# Shell aliases
if [ -f "$PROJECT_DIR/bin/aliases.sh" ]; then
hc_pass "aliases" "Shell aliases configured"
else
hc_warn "aliases" "Shell aliases not configured (run: bin/setup_aliases.sh)"
fi
}
run_docker_checks() {
printf "\n%b=== Docker Checks ===%b\n\n" "$BOLD" "$NC"
local compose_file="$PROJECT_DIR/deploy/docker/docker-compose.yml"
# Docker daemon
if command_exists docker && docker info &>/dev/null; then
hc_pass "docker_daemon" "Docker daemon is running"
else
hc_fail "docker_daemon" "Docker daemon is not running"
return 0
fi
# docker compose v2
if docker compose version &>/dev/null; then
hc_pass "docker_compose" "docker compose v2 available ($(docker compose version --short 2>/dev/null))"
else
hc_fail "docker_compose" "docker compose v2 not available"
return 0
fi
# Container health — check each service
source "$_LIB_DIR/docker.sh"
local services=("redis" "web" "celery")
for svc in "${services[@]}"; do
local state
state=$(get_service_state "$compose_file" "$svc")
if [ "$state" = "running" ]; then
hc_pass "container_$svc" "$svc container is running"
else
hc_fail "container_$svc" "$svc container is not running (state: ${state:-unknown})"
fi
done
}
run_systemd_checks() {
printf "\n%b=== systemd Checks ===%b\n\n" "$BOLD" "$NC"
# server-monitoring.service
if systemctl is-active --quiet server-monitoring 2>/dev/null; then
hc_pass "systemd_web" "server-monitoring.service is active"
else
hc_fail "systemd_web" "server-monitoring.service is not active"
fi
# server-monitoring-celery.service
if systemctl is-active --quiet server-monitoring-celery 2>/dev/null; then
hc_pass "systemd_celery" "server-monitoring-celery.service is active"
else
hc_fail "systemd_celery" "server-monitoring-celery.service is not active"
fi
# Redis
if systemctl is-active --quiet redis-server 2>/dev/null || systemctl is-active --quiet redis 2>/dev/null; then
hc_pass "redis" "Redis service is active"
else
hc_fail "redis" "Redis service is not active"
fi
# Gunicorn socket
if [ -S /run/server-monitoring/gunicorn.sock ]; then
hc_pass "socket" "Gunicorn socket exists"
else
hc_warn "socket" "Gunicorn socket not found at /run/server-monitoring/gunicorn.sock"
fi
}
# --- Orchestrator ---
run_all_checks() {
local mode
mode=$(detect_mode)
if [ "$_hc_json_mode" = false ]; then
printf "\n%b============================================%b\n" "$BOLD" "$NC"
printf "%b server-maintanence Health Check%b\n" "$BOLD" "$NC"
printf "%b============================================%b\n" "$BOLD" "$NC"
printf "\n Detected mode: %b%s%b\n" "$CYAN" "$mode" "$NC"
fi
case "$mode" in
dev)
run_core_checks
run_django_checks
run_dev_checks
;;
prod)
run_core_checks
run_django_checks
;;
docker)
run_docker_checks
;;
systemd)
run_systemd_checks
;;
esac
if [ "$_hc_json_mode" = true ]; then
# Output JSON array
printf "["
local first=true
for item in "${_hc_json_results[@]}"; do
if [ "$first" = true ]; then
first=false
else
printf ","
fi
printf "%s" "$item"
done
printf "]\n"
else
# Summary line
printf "\n %b%d passed%b, %b%d warning(s)%b, %b%d error(s)%b\n\n" \
"$GREEN" "$_hc_passed" "$NC" \
"$YELLOW" "$_hc_warned" "$NC" \
"$RED" "$_hc_failed" "$NC"
fi
# Exit code: 1 if any errors
[ "$_hc_failed" -eq 0 ]
}
Step 4: Run tests
Run: ./bin/tests/test_helper/bats-core/bin/bats bin/tests/lib/test_health_check.bats Expected: All tests pass
Step 5: Commit
git add bin/lib/health_check.sh bin/tests/lib/test_health_check.bats
git commit -m "feat: add bin/lib/health_check.sh with detect_mode and check groups"
Task 2: Refactor check_system.sh to use health_check library
Files:
- Modify:
bin/check_system.sh
Step 1: Rewrite check_system.sh
Replace the entire file with:
#!/bin/bash
#
# System check script for server-maintanence
# Unified health check — auto-detects deployment mode
#
set -e
# Source shared libraries
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "$SCRIPT_DIR/lib/health_check.sh"
cd "$PROJECT_DIR"
# Parse flags
for arg in "$@"; do
case $arg in
--json) _hc_json_mode=true ;;
--help|-h)
echo "Usage: bin/check_system.sh [OPTIONS]"
echo ""
echo "Run health checks for server-maintanence."
echo "Auto-detects deployment mode (dev/prod/docker/systemd)."
echo ""
echo "Options:"
echo " --json Output as JSON"
echo " --help, -h Show this help"
exit 0
;;
esac
done
run_all_checks
Step 2: Verify syntax
Run: bash -n bin/check_system.sh Expected: No errors
Step 3: Commit
git add bin/check_system.sh
git commit -m "refactor: check_system.sh delegates to health_check library"
Task 3: Refactor cli/install_menu.sh to use health_check library
Files:
- Modify:
bin/cli/install_menu.sh
Step 1: Replace check_installation()
Replace the check_installation() function (lines 48-86) with:
check_installation() {
source "$SCRIPT_DIR/lib/health_check.sh"
run_all_checks
}
Keep install_project() unchanged.
Step 2: Verify syntax
Run: bash -n bin/cli.sh (since install_menu.sh is sourced by cli.sh) Expected: No errors
Step 3: Commit
git add bin/cli/install_menu.sh
git commit -m "refactor: cli check_installation delegates to health_check library"
Task 4: Update smoke tests
Files:
- Modify:
bin/tests/test_check_system.bats
Step 1: Update the test file
Replace the entire file with:
#!/usr/bin/env bats
setup() {
load 'test_helper/common-setup'
_common_setup
}
@test "check_system.sh passes syntax check" {
run bash -n "$BIN_DIR/check_system.sh"
assert_success
}
@test "check_system.sh --help shows usage" {
run "$BIN_DIR/check_system.sh" --help
assert_success
assert_output --partial "Usage"
assert_output --partial "--json"
}
@test "check_system.sh --json outputs valid JSON" {
run "$BIN_DIR/check_system.sh" --json
# May fail if Django not configured, but output should still be JSON
[[ "${output}" == "["* ]]
}
@test "check_system.sh detects a mode" {
run "$BIN_DIR/check_system.sh"
assert_output --partial "Detected mode:"
}
Step 2: Run all tests
Run: ./bin/tests/test_helper/bats-core/bin/bats bin/tests/lib/ bin/tests/ Expected: All pass
Step 3: Commit
git add bin/tests/test_check_system.bats
git commit -m "test: update check_system smoke tests for unified health check"