312 lines
9.2 KiB
Bash
Executable File
312 lines
9.2 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
set -Eeuo pipefail
|
|
|
|
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
|
RUNTIME_DIR="$ROOT_DIR/.runtime"
|
|
BACKUP_DIR="$ROOT_DIR/backups"
|
|
COMPOSE_FILE="$ROOT_DIR/docker-compose.single.yml"
|
|
ENV_FILE="$RUNTIME_DIR/single-node.env"
|
|
CONFIG_FILE="$RUNTIME_DIR/config.single.yaml"
|
|
BINARY_FILE="$RUNTIME_DIR/ai-ops"
|
|
PROJECT_NAME="${AI_OPS_PROJECT:-ai-ops-single}"
|
|
APP_PORT="${AI_OPS_APP_PORT:-18080}"
|
|
DB_PORT="${AI_OPS_DB_PORT:-15432}"
|
|
REDIS_PORT="${AI_OPS_REDIS_PORT:-16379}"
|
|
DB_USER="${AI_OPS_DB_USER:-aiops}"
|
|
DB_NAME="${AI_OPS_DB_NAME:-ai_ops}"
|
|
DB_PASSWORD="${AI_OPS_DB_PASSWORD:-aiops123}"
|
|
|
|
log() { printf '[ai-ops] %s\n' "$*"; }
|
|
fail() { printf '[ai-ops][ERROR] %s\n' "$*" >&2; exit 1; }
|
|
|
|
need_cmd() { command -v "$1" >/dev/null 2>&1 || fail "missing command: $1"; }
|
|
|
|
engine() {
|
|
if command -v docker >/dev/null 2>&1 && docker info >/dev/null 2>&1; then
|
|
echo docker
|
|
elif command -v podman >/dev/null 2>&1; then
|
|
echo podman
|
|
else
|
|
fail "docker or podman is required"
|
|
fi
|
|
}
|
|
|
|
compose_cmd() {
|
|
local eng="$1"
|
|
if [[ "$eng" == docker ]]; then
|
|
if docker compose version >/dev/null 2>&1; then
|
|
echo "docker compose"
|
|
elif command -v docker-compose >/dev/null 2>&1; then
|
|
echo "docker-compose"
|
|
else
|
|
fail "docker compose plugin or docker-compose is required"
|
|
fi
|
|
else
|
|
if command -v podman-compose >/dev/null 2>&1; then
|
|
echo "podman-compose"
|
|
else
|
|
fail "podman-compose is required for podman mode"
|
|
fi
|
|
fi
|
|
}
|
|
|
|
rand_hex() {
|
|
if command -v openssl >/dev/null 2>&1; then
|
|
openssl rand -hex "$1"
|
|
else
|
|
head -c "$1" /dev/urandom | od -An -tx1 | tr -d ' \n'
|
|
fi
|
|
}
|
|
|
|
load_env() {
|
|
local keys=(AI_OPS_PROJECT AI_OPS_APP_PORT AI_OPS_DB_PORT AI_OPS_REDIS_PORT AI_OPS_DB_USER AI_OPS_DB_PASSWORD AI_OPS_DB_NAME AI_OPS_JWT_SECRET AI_OPS_METRICS_AUTH AI_OPS_POSTGRES_IMAGE AI_OPS_REDIS_IMAGE AI_OPS_RUNTIME_IMAGE)
|
|
local saved_key saved_val
|
|
declare -A saved=()
|
|
for saved_key in "${keys[@]}"; do
|
|
saved_val="${!saved_key-}"
|
|
if [[ -n "$saved_val" ]]; then
|
|
saved["$saved_key"]="$saved_val"
|
|
fi
|
|
done
|
|
if [[ -f "$ENV_FILE" ]]; then
|
|
set -a
|
|
# shellcheck disable=SC1090
|
|
source "$ENV_FILE"
|
|
set +a
|
|
fi
|
|
for saved_key in "${!saved[@]}"; do
|
|
export "$saved_key=${saved[$saved_key]}"
|
|
done
|
|
PROJECT_NAME="${AI_OPS_PROJECT:-$PROJECT_NAME}"
|
|
APP_PORT="${AI_OPS_APP_PORT:-$APP_PORT}"
|
|
DB_PORT="${AI_OPS_DB_PORT:-$DB_PORT}"
|
|
REDIS_PORT="${AI_OPS_REDIS_PORT:-$REDIS_PORT}"
|
|
DB_USER="${AI_OPS_DB_USER:-$DB_USER}"
|
|
DB_NAME="${AI_OPS_DB_NAME:-$DB_NAME}"
|
|
DB_PASSWORD="${AI_OPS_DB_PASSWORD:-$DB_PASSWORD}"
|
|
}
|
|
|
|
write_env_if_missing() {
|
|
mkdir -p "$RUNTIME_DIR" "$BACKUP_DIR"
|
|
if [[ ! -f "$ENV_FILE" ]]; then
|
|
umask 077
|
|
cat >"$ENV_FILE" <<EOF_ENV
|
|
AI_OPS_PROJECT=$PROJECT_NAME
|
|
AI_OPS_APP_PORT=$APP_PORT
|
|
AI_OPS_DB_PORT=$DB_PORT
|
|
AI_OPS_REDIS_PORT=$REDIS_PORT
|
|
AI_OPS_BIND_ADDR=${AI_OPS_BIND_ADDR:-127.0.0.1}
|
|
AI_OPS_DB_USER=$DB_USER
|
|
AI_OPS_DB_PASSWORD=$DB_PASSWORD
|
|
AI_OPS_DB_NAME=$DB_NAME
|
|
AI_OPS_JWT_SECRET=$(rand_hex 32)
|
|
AI_OPS_METRICS_AUTH=$(rand_hex 24)
|
|
AI_OPS_POSTGRES_IMAGE=docker.io/library/postgres:16-alpine
|
|
AI_OPS_REDIS_IMAGE=docker.io/library/redis:8-alpine
|
|
AI_OPS_RUNTIME_IMAGE=docker.io/library/alpine:3.19
|
|
EOF_ENV
|
|
log "created $ENV_FILE with generated secrets"
|
|
fi
|
|
}
|
|
|
|
write_config() {
|
|
load_env
|
|
: "${AI_OPS_JWT_SECRET:?missing AI_OPS_JWT_SECRET}"
|
|
: "${AI_OPS_METRICS_AUTH:?missing AI_OPS_METRICS_AUTH}"
|
|
cat >"$CONFIG_FILE" <<EOF_CFG
|
|
server:
|
|
port: 8080
|
|
mode: production
|
|
jwt_secret: "${AI_OPS_JWT_SECRET}"
|
|
metrics_auth: "${AI_OPS_METRICS_AUTH}"
|
|
|
|
database:
|
|
host: postgres
|
|
port: 5432
|
|
user: "${AI_OPS_DB_USER:-aiops}"
|
|
password: "${AI_OPS_DB_PASSWORD:-aiops123}"
|
|
dbname: "${AI_OPS_DB_NAME:-ai_ops}"
|
|
sslmode: disable
|
|
pool_size: 10
|
|
|
|
redis:
|
|
host: redis
|
|
port: 6379
|
|
password: ""
|
|
db: 0
|
|
|
|
metrics:
|
|
prometheus_url: "http://localhost:9090"
|
|
retention_days: 7
|
|
EOF_CFG
|
|
}
|
|
|
|
build_binary() {
|
|
need_cmd go
|
|
mkdir -p "$RUNTIME_DIR"
|
|
log "building static binary"
|
|
(cd "$ROOT_DIR" && CGO_ENABLED=0 go build -buildvcs=false -o "$BINARY_FILE" ./cmd/ai-ops)
|
|
}
|
|
|
|
compose() {
|
|
local eng cmd
|
|
eng="$(engine)"
|
|
cmd="$(compose_cmd "$eng")"
|
|
load_env
|
|
export AI_OPS_PROJECT PROJECT_NAME AI_OPS_APP_PORT AI_OPS_DB_PORT AI_OPS_REDIS_PORT AI_OPS_DB_USER AI_OPS_DB_PASSWORD AI_OPS_DB_NAME AI_OPS_JWT_SECRET AI_OPS_METRICS_AUTH AI_OPS_POSTGRES_IMAGE AI_OPS_REDIS_IMAGE AI_OPS_RUNTIME_IMAGE
|
|
if [[ "$cmd" == "docker compose" ]]; then
|
|
(cd "$ROOT_DIR" && docker compose -p "$PROJECT_NAME" -f "$COMPOSE_FILE" "$@")
|
|
elif [[ "$cmd" == "docker-compose" ]]; then
|
|
(cd "$ROOT_DIR" && docker-compose -p "$PROJECT_NAME" -f "$COMPOSE_FILE" "$@")
|
|
else
|
|
(cd "$ROOT_DIR" && podman-compose -f "$COMPOSE_FILE" "$@")
|
|
fi
|
|
}
|
|
|
|
container_exec() {
|
|
local eng
|
|
eng="$(engine)"
|
|
"$eng" exec "$@"
|
|
}
|
|
|
|
curl_json() {
|
|
local path="$1"
|
|
curl -fsS --max-time 5 "http://127.0.0.1:${APP_PORT}${path}"
|
|
}
|
|
|
|
wait_ready() {
|
|
local i
|
|
for i in $(seq 1 60); do
|
|
if curl_json /actuator/health/ready >/dev/null 2>&1; then
|
|
log "ready: http://127.0.0.1:${APP_PORT}"
|
|
return 0
|
|
fi
|
|
sleep 1
|
|
done
|
|
compose logs --tail=120 ai-ops || true
|
|
fail "service did not become ready"
|
|
}
|
|
|
|
cmd_init() {
|
|
write_env_if_missing
|
|
write_config
|
|
log "runtime initialized under $RUNTIME_DIR"
|
|
}
|
|
|
|
cmd_start() {
|
|
cmd_init
|
|
build_binary
|
|
compose up -d
|
|
wait_ready
|
|
cmd_smoke
|
|
}
|
|
|
|
cmd_stop() { compose down; }
|
|
cmd_restart() { compose restart ai-ops; wait_ready; }
|
|
cmd_status() { compose ps; curl_json /actuator/health/ready || true; }
|
|
cmd_logs() { compose logs --tail="${TAIL:-200}" "${SERVICE:-ai-ops}"; }
|
|
|
|
cmd_smoke() {
|
|
load_env
|
|
log "health"
|
|
curl_json /health >/dev/null
|
|
curl_json /actuator/health/ready >/dev/null
|
|
log "login"
|
|
local token
|
|
token="$(curl -fsS --max-time 5 -X POST "http://127.0.0.1:${APP_PORT}/api/v1/ai-ops/login" -H 'Content-Type: application/json' -d '{"username":"admin","password":"admin"}' | python3 -c 'import sys,json; d=json.load(sys.stdin); print((d.get("data") or d).get("token", ""))')"
|
|
[[ -n "$token" ]] || fail "login did not return token"
|
|
log "authenticated APIs"
|
|
curl -fsS --max-time 5 -H "Authorization: Bearer $token" "http://127.0.0.1:${APP_PORT}/api/v1/ai-ops/alerts?page=1&page_size=5" >/dev/null
|
|
curl -fsS --max-time 5 -H "Authorization: Bearer $token" "http://127.0.0.1:${APP_PORT}/api/v1/ai-ops/rules" >/dev/null
|
|
curl -fsS --max-time 5 -H "Authorization: Bearer $token" "http://127.0.0.1:${APP_PORT}/api/v1/ai-ops/channels" >/dev/null
|
|
curl -fsS --max-time 5 "http://127.0.0.1:${APP_PORT}/ops/dashboard" >/dev/null
|
|
curl -fsS --max-time 5 "http://127.0.0.1:${APP_PORT}/openapi.json" >/dev/null
|
|
log "SMOKE_OK"
|
|
}
|
|
|
|
cmd_backup() {
|
|
load_env
|
|
mkdir -p "$BACKUP_DIR"
|
|
local ts out
|
|
ts="$(date +%Y%m%d-%H%M%S)"
|
|
out="$BACKUP_DIR/ai_ops_${ts}.sql.gz"
|
|
log "creating database backup: $out"
|
|
container_exec "${PROJECT_NAME}-postgres" pg_dump -U "${AI_OPS_DB_USER:-aiops}" "${AI_OPS_DB_NAME:-ai_ops}" | gzip >"$out"
|
|
test -s "$out" || fail "empty backup: $out"
|
|
log "BACKUP_OK $out"
|
|
}
|
|
|
|
cmd_restore() {
|
|
local file="${1:-}"
|
|
[[ -n "$file" && -f "$file" ]] || fail "usage: $0 restore backups/file.sql.gz"
|
|
load_env
|
|
log "restoring from $file"
|
|
compose stop ai-ops
|
|
container_exec "${PROJECT_NAME}-postgres" psql -v ON_ERROR_STOP=1 -U "${AI_OPS_DB_USER:-aiops}" "${AI_OPS_DB_NAME:-ai_ops}" -c 'DROP SCHEMA public CASCADE; CREATE SCHEMA public;'
|
|
zcat "$file" | container_exec -i "${PROJECT_NAME}-postgres" psql -v ON_ERROR_STOP=1 -U "${AI_OPS_DB_USER:-aiops}" "${AI_OPS_DB_NAME:-ai_ops}"
|
|
compose start ai-ops
|
|
wait_ready
|
|
cmd_smoke
|
|
log "RESTORE_OK"
|
|
}
|
|
|
|
cmd_recover() {
|
|
log "recovering single-node stack"
|
|
compose up -d postgres redis
|
|
compose up -d ai-ops
|
|
wait_ready
|
|
cmd_smoke
|
|
log "RECOVER_OK"
|
|
}
|
|
|
|
cmd_doctor() {
|
|
log "doctor: commands"
|
|
need_cmd go
|
|
command -v curl >/dev/null 2>&1 || fail "missing curl"
|
|
command -v python3 >/dev/null 2>&1 || fail "missing python3"
|
|
engine >/dev/null
|
|
compose_cmd "$(engine)" >/dev/null
|
|
log "doctor: ports"
|
|
ss -ltn 2>/dev/null | grep -E ":(${APP_PORT}|${DB_PORT}|${REDIS_PORT}) " || true
|
|
log "doctor: config"
|
|
cmd_init
|
|
log "DOCTOR_OK"
|
|
}
|
|
|
|
usage() {
|
|
cat <<'EOF_USAGE'
|
|
Usage: scripts/aiops-single-node.sh <command>
|
|
|
|
Commands:
|
|
init Generate .runtime/single-node.env and config.single.yaml
|
|
start Build binary, start DB/Redis/App, wait ready, run smoke
|
|
stop Stop and remove containers, keep volumes
|
|
restart Restart app container and wait ready
|
|
status Show compose status and readiness JSON
|
|
logs Show app logs; override SERVICE=postgres|redis|ai-ops TAIL=300
|
|
smoke Run health/login/API/dashboard/openapi smoke checks
|
|
backup Create backups/ai_ops_<timestamp>.sql.gz via pg_dump
|
|
restore Restore a gzipped SQL backup, restart app, run smoke
|
|
recover Recreate stopped containers from existing volumes and smoke test
|
|
doctor Check local prerequisites and render runtime config
|
|
EOF_USAGE
|
|
}
|
|
|
|
main() {
|
|
case "${1:-}" in
|
|
init) cmd_init ;;
|
|
start) cmd_start ;;
|
|
stop) cmd_stop ;;
|
|
restart) cmd_restart ;;
|
|
status) cmd_status ;;
|
|
logs) cmd_logs ;;
|
|
smoke) cmd_smoke ;;
|
|
backup) shift; cmd_backup "$@" ;;
|
|
restore) shift; cmd_restore "$@" ;;
|
|
recover) cmd_recover ;;
|
|
doctor) cmd_doctor ;;
|
|
*) usage; exit 2 ;;
|
|
esac
|
|
}
|
|
main "$@"
|