#!/usr/bin/env bash set -Eeuo pipefail ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" RUNTIME_DIR="$ROOT_DIR/.runtime" BACKUP_DIR="$ROOT_DIR/backups" COMPOSE_FILE="$ROOT_DIR/docker-compose.single.yml" ENV_FILE="$RUNTIME_DIR/single-node.env" CONFIG_FILE="$RUNTIME_DIR/config.single.yaml" BINARY_FILE="$RUNTIME_DIR/ai-ops" PROJECT_NAME="${AI_OPS_PROJECT:-ai-ops-single}" APP_PORT="${AI_OPS_APP_PORT:-18080}" DB_PORT="${AI_OPS_DB_PORT:-15432}" REDIS_PORT="${AI_OPS_REDIS_PORT:-16379}" DB_USER="${AI_OPS_DB_USER:-aiops}" DB_NAME="${AI_OPS_DB_NAME:-ai_ops}" DB_PASSWORD="${AI_OPS_DB_PASSWORD:-aiops123}" log() { printf '[ai-ops] %s\n' "$*"; } fail() { printf '[ai-ops][ERROR] %s\n' "$*" >&2; exit 1; } need_cmd() { command -v "$1" >/dev/null 2>&1 || fail "missing command: $1"; } engine() { if command -v docker >/dev/null 2>&1 && docker info >/dev/null 2>&1; then echo docker elif command -v podman >/dev/null 2>&1; then echo podman else fail "docker or podman is required" fi } compose_cmd() { local eng="$1" if [[ "$eng" == docker ]]; then if docker compose version >/dev/null 2>&1; then echo "docker compose" elif command -v docker-compose >/dev/null 2>&1; then echo "docker-compose" else fail "docker compose plugin or docker-compose is required" fi else if command -v podman-compose >/dev/null 2>&1; then echo "podman-compose" else fail "podman-compose is required for podman mode" fi fi } rand_hex() { if command -v openssl >/dev/null 2>&1; then openssl rand -hex "$1" else head -c "$1" /dev/urandom | od -An -tx1 | tr -d ' \n' fi } load_env() { local keys=(AI_OPS_PROJECT AI_OPS_APP_PORT AI_OPS_DB_PORT AI_OPS_REDIS_PORT AI_OPS_DB_USER AI_OPS_DB_PASSWORD AI_OPS_DB_NAME AI_OPS_JWT_SECRET AI_OPS_METRICS_AUTH AI_OPS_POSTGRES_IMAGE AI_OPS_REDIS_IMAGE AI_OPS_RUNTIME_IMAGE) local saved_key saved_val declare -A saved=() for saved_key in "${keys[@]}"; do saved_val="${!saved_key-}" if [[ -n "$saved_val" ]]; then saved["$saved_key"]="$saved_val" fi done if [[ -f "$ENV_FILE" ]]; then set -a # shellcheck disable=SC1090 source "$ENV_FILE" set +a fi for saved_key in "${!saved[@]}"; do export "$saved_key=${saved[$saved_key]}" done PROJECT_NAME="${AI_OPS_PROJECT:-$PROJECT_NAME}" APP_PORT="${AI_OPS_APP_PORT:-$APP_PORT}" DB_PORT="${AI_OPS_DB_PORT:-$DB_PORT}" REDIS_PORT="${AI_OPS_REDIS_PORT:-$REDIS_PORT}" DB_USER="${AI_OPS_DB_USER:-$DB_USER}" DB_NAME="${AI_OPS_DB_NAME:-$DB_NAME}" DB_PASSWORD="${AI_OPS_DB_PASSWORD:-$DB_PASSWORD}" } write_env_if_missing() { mkdir -p "$RUNTIME_DIR" "$BACKUP_DIR" if [[ ! -f "$ENV_FILE" ]]; then umask 077 cat >"$ENV_FILE" <"$CONFIG_FILE" </dev/null 2>&1; then log "ready: http://127.0.0.1:${APP_PORT}" return 0 fi sleep 1 done compose logs --tail=120 ai-ops || true fail "service did not become ready" } cmd_init() { write_env_if_missing write_config log "runtime initialized under $RUNTIME_DIR" } cmd_start() { cmd_init build_binary compose up -d wait_ready cmd_smoke } cmd_stop() { compose down; } cmd_restart() { compose restart ai-ops; wait_ready; } cmd_status() { compose ps; curl_json /actuator/health/ready || true; } cmd_logs() { compose logs --tail="${TAIL:-200}" "${SERVICE:-ai-ops}"; } cmd_smoke() { load_env log "health" curl_json /health >/dev/null curl_json /actuator/health/ready >/dev/null log "login" local token token="$(curl -fsS --max-time 5 -X POST "http://127.0.0.1:${APP_PORT}/api/v1/ai-ops/login" -H 'Content-Type: application/json' -d '{"username":"admin","password":"admin"}' | python3 -c 'import sys,json; d=json.load(sys.stdin); print((d.get("data") or d).get("token", ""))')" [[ -n "$token" ]] || fail "login did not return token" log "authenticated APIs" curl -fsS --max-time 5 -H "Authorization: Bearer $token" "http://127.0.0.1:${APP_PORT}/api/v1/ai-ops/alerts?page=1&page_size=5" >/dev/null curl -fsS --max-time 5 -H "Authorization: Bearer $token" "http://127.0.0.1:${APP_PORT}/api/v1/ai-ops/rules" >/dev/null curl -fsS --max-time 5 -H "Authorization: Bearer $token" "http://127.0.0.1:${APP_PORT}/api/v1/ai-ops/channels" >/dev/null curl -fsS --max-time 5 "http://127.0.0.1:${APP_PORT}/ops/dashboard" >/dev/null curl -fsS --max-time 5 "http://127.0.0.1:${APP_PORT}/openapi.json" >/dev/null log "SMOKE_OK" } cmd_backup() { load_env mkdir -p "$BACKUP_DIR" local ts out ts="$(date +%Y%m%d-%H%M%S)" out="$BACKUP_DIR/ai_ops_${ts}.sql.gz" log "creating database backup: $out" container_exec "${PROJECT_NAME}-postgres" pg_dump -U "${AI_OPS_DB_USER:-aiops}" "${AI_OPS_DB_NAME:-ai_ops}" | gzip >"$out" test -s "$out" || fail "empty backup: $out" log "BACKUP_OK $out" } cmd_restore() { local file="${1:-}" [[ -n "$file" && -f "$file" ]] || fail "usage: $0 restore backups/file.sql.gz" load_env log "restoring from $file" compose stop ai-ops container_exec "${PROJECT_NAME}-postgres" psql -v ON_ERROR_STOP=1 -U "${AI_OPS_DB_USER:-aiops}" "${AI_OPS_DB_NAME:-ai_ops}" -c 'DROP SCHEMA public CASCADE; CREATE SCHEMA public;' zcat "$file" | container_exec -i "${PROJECT_NAME}-postgres" psql -v ON_ERROR_STOP=1 -U "${AI_OPS_DB_USER:-aiops}" "${AI_OPS_DB_NAME:-ai_ops}" compose start ai-ops wait_ready cmd_smoke log "RESTORE_OK" } cmd_recover() { log "recovering single-node stack" compose up -d postgres redis compose up -d ai-ops wait_ready cmd_smoke log "RECOVER_OK" } cmd_doctor() { log "doctor: commands" need_cmd go command -v curl >/dev/null 2>&1 || fail "missing curl" command -v python3 >/dev/null 2>&1 || fail "missing python3" engine >/dev/null compose_cmd "$(engine)" >/dev/null log "doctor: ports" ss -ltn 2>/dev/null | grep -E ":(${APP_PORT}|${DB_PORT}|${REDIS_PORT}) " || true log "doctor: config" cmd_init log "DOCTOR_OK" } usage() { cat <<'EOF_USAGE' Usage: scripts/aiops-single-node.sh Commands: init Generate .runtime/single-node.env and config.single.yaml start Build binary, start DB/Redis/App, wait ready, run smoke stop Stop and remove containers, keep volumes restart Restart app container and wait ready status Show compose status and readiness JSON logs Show app logs; override SERVICE=postgres|redis|ai-ops TAIL=300 smoke Run health/login/API/dashboard/openapi smoke checks backup Create backups/ai_ops_.sql.gz via pg_dump restore Restore a gzipped SQL backup, restart app, run smoke recover Recreate stopped containers from existing volumes and smoke test doctor Check local prerequisites and render runtime config EOF_USAGE } main() { case "${1:-}" in init) cmd_init ;; start) cmd_start ;; stop) cmd_stop ;; restart) cmd_restart ;; status) cmd_status ;; logs) cmd_logs ;; smoke) cmd_smoke ;; backup) shift; cmd_backup "$@" ;; restore) shift; cmd_restore "$@" ;; recover) cmd_recover ;; doctor) cmd_doctor ;; *) usage; exit 2 ;; esac } main "$@"