1074 lines
23 KiB
Markdown
1074 lines
23 KiB
Markdown
# 部署和运维指南
|
||
|
||
## 概述
|
||
|
||
本文档描述用户管理系统的部署方案和运维规范,包括容器化部署、集群部署、监控告警、日志管理等。
|
||
|
||
---
|
||
|
||
## 1. 部署方案
|
||
|
||
### 1.1 系统架构
|
||
|
||
```
|
||
┌─────────────────────────────────────────────────────────┐
|
||
│ 负载均衡 (Nginx) │
|
||
└────────────────────┬────────────────────────────────────┘
|
||
│
|
||
┌────────────┴────────────┐
|
||
│ │
|
||
┌───────▼────────┐ ┌────────▼────────┐
|
||
│ 应用实例 1 │ │ 应用实例 N │
|
||
│ (Port 8080) │ │ (Port 8080) │
|
||
└───────┬────────┘ └────────┬────────┘
|
||
│ │
|
||
└───────────┬───────────┘
|
||
│
|
||
┌───────────┴───────────┐
|
||
│ │
|
||
┌───────▼────────┐ ┌────────▼────────┐
|
||
│ MySQL │ │ Redis │
|
||
│ (主从复制) │ │ (哨兵模式) │
|
||
└────────────────┘ └─────────────────┘
|
||
```
|
||
|
||
---
|
||
|
||
### 1.3 Docker 部署
|
||
|
||
#### 单机 Docker 部署
|
||
|
||
**docker-compose.yml(单机版)**
|
||
|
||
```yaml
|
||
version: '3.8'
|
||
|
||
services:
|
||
user-management:
|
||
image: user-management-system:1.0.0
|
||
container_name: user-ms
|
||
ports:
|
||
- "8080:8080"
|
||
volumes:
|
||
- ./data:/app/data
|
||
- ./config:/app/config
|
||
- ./logs:/app/logs
|
||
environment:
|
||
- SPRING_PROFILES_ACTIVE=docker
|
||
- DATABASE_TYPE=sqlite
|
||
- DATABASE_PATH=/app/data/user_management.db
|
||
restart: unless-stopped
|
||
healthcheck:
|
||
test: ["CMD", "curl", "-f", "http://localhost:8080/health/ready"]
|
||
interval: 30s
|
||
timeout: 10s
|
||
retries: 3
|
||
start_period: 40s
|
||
```
|
||
|
||
**启动命令**
|
||
|
||
```bash
|
||
# 启动
|
||
docker-compose up -d
|
||
|
||
# 查看日志
|
||
docker-compose logs -f
|
||
|
||
# 停止
|
||
docker-compose down
|
||
|
||
# 停止并删除数据
|
||
docker-compose down -v
|
||
```
|
||
|
||
#### 集群 Docker 部署
|
||
|
||
#### 目录结构
|
||
|
||
```
|
||
deployment/
|
||
├── docker/
|
||
│ ├── auth-service/
|
||
│ │ └── Dockerfile
|
||
│ ├── user-service/
|
||
│ │ └── Dockerfile
|
||
│ ├── permission-service/
|
||
│ │ └── Dockerfile
|
||
│ └── gateway/
|
||
│ └── Dockerfile
|
||
├── docker-compose.yml
|
||
├── docker-compose.prod.yml
|
||
└── init/
|
||
└── init.sql
|
||
```
|
||
|
||
#### Dockerfile 示例(Go)
|
||
|
||
```dockerfile
|
||
# 构建阶段
|
||
FROM golang:1.21-alpine AS builder
|
||
|
||
WORKDIR /app
|
||
|
||
# 复制依赖文件
|
||
COPY go.mod go.sum ./
|
||
RUN go mod download
|
||
|
||
# 复制源代码
|
||
COPY . .
|
||
|
||
# 编译
|
||
RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build \
|
||
-ldflags="-w -s" \
|
||
-o user-service \
|
||
./cmd/user-service
|
||
|
||
# 运行阶段
|
||
FROM alpine:latest
|
||
|
||
RUN apk --no-cache add ca-certificates tzdata
|
||
|
||
WORKDIR /app
|
||
|
||
COPY --from=builder /app/user-service .
|
||
|
||
EXPOSE 8080
|
||
|
||
CMD ["./user-service"]
|
||
```
|
||
|
||
#### docker-compose.yml
|
||
|
||
```yaml
|
||
version: '3.8'
|
||
|
||
services:
|
||
mysql:
|
||
image: mysql:8.0
|
||
container_name: user-ms-mysql
|
||
environment:
|
||
MYSQL_ROOT_PASSWORD: root_password
|
||
MYSQL_DATABASE: user_management
|
||
MYSQL_USER: app_user
|
||
MYSQL_PASSWORD: app_password
|
||
ports:
|
||
- "3306:3306"
|
||
volumes:
|
||
- mysql-data:/var/lib/mysql
|
||
- ./init/init.sql:/docker-entrypoint-initdb.d/init.sql
|
||
networks:
|
||
- user-ms-network
|
||
|
||
redis:
|
||
image: redis:7-alpine
|
||
container_name: user-ms-redis
|
||
ports:
|
||
- "6379:6379"
|
||
volumes:
|
||
- redis-data:/data
|
||
networks:
|
||
- user-ms-network
|
||
|
||
auth-service:
|
||
build:
|
||
context: ./docker/auth-service
|
||
container_name: user-ms-auth-service
|
||
environment:
|
||
- SPRING_PROFILES_ACTIVE=prod
|
||
- DB_HOST=mysql
|
||
- DB_PORT=3306
|
||
- DB_NAME=user_management
|
||
- DB_USER=app_user
|
||
- DB_PASSWORD=app_password
|
||
- REDIS_HOST=redis
|
||
- REDIS_PORT=6379
|
||
ports:
|
||
- "8081:8080"
|
||
depends_on:
|
||
- mysql
|
||
- redis
|
||
networks:
|
||
- user-ms-network
|
||
|
||
user-service:
|
||
build:
|
||
context: ./docker/user-service
|
||
container_name: user-ms-user-service
|
||
environment:
|
||
- SPRING_PROFILES_ACTIVE=prod
|
||
- DB_HOST=mysql
|
||
- DB_PORT=3306
|
||
- DB_NAME=user_management
|
||
- DB_USER=app_user
|
||
- DB_PASSWORD=app_password
|
||
- REDIS_HOST=redis
|
||
- REDIS_PORT=6379
|
||
ports:
|
||
- "8082:8080"
|
||
depends_on:
|
||
- mysql
|
||
- redis
|
||
networks:
|
||
- user-ms-network
|
||
|
||
permission-service:
|
||
build:
|
||
context: ./docker/permission-service
|
||
container_name: user-ms-permission-service
|
||
environment:
|
||
- SPRING_PROFILES_ACTIVE=prod
|
||
- DB_HOST=mysql
|
||
- DB_PORT=3306
|
||
- DB_NAME=user_management
|
||
- DB_USER=app_user
|
||
- DB_PASSWORD=app_password
|
||
- REDIS_HOST=redis
|
||
- REDIS_PORT=6379
|
||
ports:
|
||
- "8083:8080"
|
||
depends_on:
|
||
- mysql
|
||
- redis
|
||
networks:
|
||
- user-ms-network
|
||
|
||
gateway:
|
||
build:
|
||
context: ./docker/gateway
|
||
container_name: user-ms-gateway
|
||
environment:
|
||
- AUTH_SERVICE_URL=http://auth-service:8080
|
||
- USER_SERVICE_URL=http://user-service:8080
|
||
- PERMISSION_SERVICE_URL=http://permission-service:8080
|
||
ports:
|
||
- "8080:8080"
|
||
depends_on:
|
||
- auth-service
|
||
- user-service
|
||
- permission-service
|
||
networks:
|
||
- user-ms-network
|
||
|
||
prometheus:
|
||
image: prom/prometheus:latest
|
||
container_name: user-ms-prometheus
|
||
ports:
|
||
- "9090:9090"
|
||
volumes:
|
||
- ./deployment/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml
|
||
- prometheus-data:/prometheus
|
||
networks:
|
||
- user-ms-network
|
||
|
||
grafana:
|
||
image: grafana/grafana:latest
|
||
container_name: user-ms-grafana
|
||
ports:
|
||
- "3000:3000"
|
||
environment:
|
||
- GF_SECURITY_ADMIN_PASSWORD=admin
|
||
volumes:
|
||
- grafana-data:/var/lib/grafana
|
||
- ./deployment/grafana/provisioning:/etc/grafana/provisioning
|
||
networks:
|
||
- user-ms-network
|
||
|
||
volumes:
|
||
mysql-data:
|
||
redis-data:
|
||
prometheus-data:
|
||
grafana-data:
|
||
|
||
networks:
|
||
user-ms-network:
|
||
driver: bridge
|
||
```
|
||
|
||
#### 启动命令
|
||
|
||
```bash
|
||
# 开发环境
|
||
docker-compose up -d
|
||
|
||
# 生产环境
|
||
docker-compose -f docker-compose.prod.yml up -d
|
||
|
||
# 查看日志
|
||
docker-compose logs -f
|
||
|
||
# 停止服务
|
||
docker-compose down
|
||
|
||
# 清理数据
|
||
docker-compose down -v
|
||
```
|
||
|
||
---
|
||
|
||
### 1.3 Kubernetes 部署
|
||
|
||
#### Helm Charts 结构
|
||
|
||
```
|
||
deployment/kubernetes/helm/user-management-system/
|
||
├── Chart.yaml
|
||
├── values.yaml
|
||
├── values-prod.yaml
|
||
└── templates/
|
||
├── _helpers.tpl
|
||
├── deployment.yaml
|
||
├── service.yaml
|
||
├── ingress.yaml
|
||
├── configmap.yaml
|
||
├── secret.yaml
|
||
├── hpa.yaml
|
||
└── pdb.yaml
|
||
```
|
||
|
||
#### values.yaml
|
||
|
||
```yaml
|
||
# 默认配置
|
||
replicaCount: 2
|
||
|
||
image:
|
||
repository: example.com/user-management-system
|
||
pullPolicy: IfNotPresent
|
||
tag: "1.0.0"
|
||
|
||
imagePullSecrets: []
|
||
nameOverride: ""
|
||
fullnameOverride: ""
|
||
|
||
serviceAccount:
|
||
create: true
|
||
annotations: {}
|
||
name: ""
|
||
|
||
podAnnotations: {}
|
||
|
||
podSecurityContext: {}
|
||
# fsGroup: 2000
|
||
|
||
securityContext: {}
|
||
# capabilities:
|
||
# drop:
|
||
# - ALL
|
||
# readOnlyRootFilesystem: true
|
||
# runAsNonRoot: true
|
||
# runAsUser: 1000
|
||
|
||
service:
|
||
type: ClusterIP
|
||
port: 8080
|
||
|
||
ingress:
|
||
enabled: true
|
||
className: "nginx"
|
||
annotations: {}
|
||
# kubernetes.io/ingress.class: nginx
|
||
# cert-manager.io/cluster-issuer: letsencrypt-prod
|
||
hosts:
|
||
- host: api.example.com
|
||
paths:
|
||
- path: /
|
||
pathType: Prefix
|
||
tls: []
|
||
# - secretName: user-ms-tls
|
||
# hosts:
|
||
# - api.example.com
|
||
|
||
resources:
|
||
limits:
|
||
cpu: 1000m
|
||
memory: 512Mi
|
||
requests:
|
||
cpu: 500m
|
||
memory: 256Mi
|
||
|
||
autoscaling:
|
||
enabled: true
|
||
minReplicas: 2
|
||
maxReplicas: 10
|
||
targetCPUUtilizationPercentage: 70
|
||
targetMemoryUtilizationPercentage: 80
|
||
|
||
nodeSelector: {}
|
||
|
||
tolerations: []
|
||
|
||
affinity: {}
|
||
|
||
# 数据库配置
|
||
database:
|
||
host: mysql-service
|
||
port: 3306
|
||
name: user_management
|
||
username: app_user
|
||
password: app_password
|
||
|
||
# Redis 配置
|
||
redis:
|
||
host: redis-service
|
||
port: 6379
|
||
password: ""
|
||
database: 0
|
||
|
||
# 环境变量
|
||
env:
|
||
- name: SPRING_PROFILES_ACTIVE
|
||
value: "prod"
|
||
- name: LOG_LEVEL
|
||
value: "info"
|
||
|
||
# 健康检查
|
||
livenessProbe:
|
||
httpGet:
|
||
path: /health/live
|
||
port: 8080
|
||
initialDelaySeconds: 30
|
||
periodSeconds: 10
|
||
timeoutSeconds: 5
|
||
failureThreshold: 3
|
||
|
||
readinessProbe:
|
||
httpGet:
|
||
path: /health/ready
|
||
port: 8080
|
||
initialDelaySeconds: 10
|
||
periodSeconds: 5
|
||
timeoutSeconds: 3
|
||
failureThreshold: 3
|
||
```
|
||
|
||
#### 部署命令
|
||
|
||
```bash
|
||
# 安装 Helm Chart
|
||
helm install user-ms ./user-management-system \
|
||
-f values-prod.yaml \
|
||
--namespace production \
|
||
--create-namespace
|
||
|
||
# 更新部署
|
||
helm upgrade user-ms ./user-management-system \
|
||
-f values-prod.yaml \
|
||
--namespace production
|
||
|
||
# 回滚
|
||
helm rollback user-ms 1 --namespace production
|
||
|
||
# 卸载
|
||
helm uninstall user-ms --namespace production
|
||
```
|
||
|
||
---
|
||
|
||
### 1.4 传统安装包部署
|
||
|
||
#### 目录结构
|
||
|
||
```
|
||
user-management-system-1.0.0/
|
||
├── bin/
|
||
│ ├── auth-service
|
||
│ ├── user-service
|
||
│ ├── permission-service
|
||
│ └── gateway
|
||
├── config/
|
||
│ ├── application.yml
|
||
│ └── application-prod.yml
|
||
├── lib/
|
||
│ ├── *.jar
|
||
│ └── *.so
|
||
├── scripts/
|
||
│ ├── install.sh
|
||
│ ├── start.sh
|
||
│ ├── stop.sh
|
||
│ └── restart.sh
|
||
└── README.md
|
||
```
|
||
|
||
#### 安装脚本(install.sh)
|
||
|
||
```bash
|
||
#!/bin/bash
|
||
|
||
set -e
|
||
|
||
echo "开始安装用户管理系统..."
|
||
|
||
# 检查 Java 环境
|
||
if ! command -v java &> /dev/null; then
|
||
echo "错误: 未检测到 Java 环境"
|
||
exit 1
|
||
fi
|
||
|
||
# 检查 MySQL
|
||
if ! command -v mysql &> /dev/null; then
|
||
echo "错误: 未检测到 MySQL"
|
||
exit 1
|
||
fi
|
||
|
||
# 创建用户
|
||
if ! id -u userms &> /dev/null; then
|
||
echo "创建系统用户 userms..."
|
||
useradd -r -s /bin/false userms
|
||
fi
|
||
|
||
# 创建目录
|
||
INSTALL_DIR="/opt/user-management-system"
|
||
echo "安装目录: $INSTALL_DIR"
|
||
mkdir -p $INSTALL_DIR/{bin,config,lib,logs}
|
||
|
||
# 复制文件
|
||
echo "复制文件..."
|
||
cp -r bin/* $INSTALL_DIR/bin/
|
||
cp -r config/* $INSTALL_DIR/config/
|
||
cp -r lib/* $INSTALL_DIR/lib/
|
||
|
||
# 设置权限
|
||
chown -R userms:userms $INSTALL_DIR
|
||
chmod +x $INSTALL_DIR/bin/*
|
||
chmod +x scripts/*.sh
|
||
|
||
# 创建服务文件
|
||
cat > /etc/systemd/system/user-ms.service <<EOF
|
||
[Unit]
|
||
Description=User Management System
|
||
After=network.target mysql.service
|
||
|
||
[Service]
|
||
Type=forking
|
||
User=userms
|
||
WorkingDirectory=$INSTALL_DIR
|
||
ExecStart=$INSTALL_DIR/scripts/start.sh
|
||
ExecStop=$INSTALL_DIR/scripts/stop.sh
|
||
Restart=on-failure
|
||
RestartSec=10
|
||
|
||
[Install]
|
||
WantedBy=multi-user.target
|
||
EOF
|
||
|
||
# 重载 systemd
|
||
systemctl daemon-reload
|
||
|
||
echo "安装完成!"
|
||
echo "请修改配置文件 $INSTALL_DIR/config/application-prod.yml"
|
||
echo "启动服务: systemctl start user-ms"
|
||
echo "设置开机启动: systemctl enable user-ms"
|
||
```
|
||
|
||
#### 启动脚本(start.sh)
|
||
|
||
```bash
|
||
#!/bin/bash
|
||
|
||
INSTALL_DIR="/opt/user-management-system"
|
||
LOG_DIR="$INSTALL_DIR/logs"
|
||
|
||
cd $INSTALL_DIR
|
||
|
||
echo "启动用户管理系统..."
|
||
|
||
# 启动认证服务
|
||
nohup $INSTALL_DIR/bin/auth-service \
|
||
--spring.config.location=$INSTALL_DIR/config/application-prod.yml \
|
||
> $LOG_DIR/auth-service.log 2>&1 &
|
||
AUTH_PID=$!
|
||
echo "认证服务启动 (PID: $AUTH_PID)"
|
||
|
||
# 启动用户服务
|
||
nohup $INSTALL_DIR/bin/user-service \
|
||
--spring.config.location=$INSTALL_DIR/config/application-prod.yml \
|
||
> $LOG_DIR/user-service.log 2>&1 &
|
||
USER_PID=$!
|
||
echo "用户服务启动 (PID: $USER_PID)"
|
||
|
||
# 启动权限服务
|
||
nohup $INSTALL_DIR/bin/permission-service \
|
||
--spring.config.location=$INSTALL_DIR/config/application-prod.yml \
|
||
> $LOG_DIR/permission-service.log 2>&1 &
|
||
PERM_PID=$!
|
||
echo "权限服务启动 (PID: $PERM_PID)"
|
||
|
||
# 启动网关
|
||
nohup $INSTALL_DIR/bin/gateway \
|
||
--spring.config.location=$INSTALL_DIR/config/application-prod.yml \
|
||
> $LOG_DIR/gateway.log 2>&1 &
|
||
GATEWAY_PID=$!
|
||
echo "网关启动 (PID: $GATEWAY_PID)"
|
||
|
||
# 保存 PID
|
||
echo $AUTH_PID > $LOG_DIR/auth-service.pid
|
||
echo $USER_PID > $LOG_DIR/user-service.pid
|
||
echo $PERM_PID > $LOG_DIR/permission-service.pid
|
||
echo $GATEWAY_PID > $LOG_DIR/gateway.pid
|
||
|
||
echo "启动完成!"
|
||
```
|
||
|
||
---
|
||
|
||
## 2. 监控与告警
|
||
|
||
### 2.1 Prometheus 配置
|
||
|
||
#### prometheus.yml
|
||
|
||
```yaml
|
||
global:
|
||
scrape_interval: 15s
|
||
evaluation_interval: 15s
|
||
|
||
alerting:
|
||
alertmanagers:
|
||
- static_configs:
|
||
- targets: ['alertmanager:9093']
|
||
|
||
rule_files:
|
||
- "alerts/*.yml"
|
||
|
||
scrape_configs:
|
||
- job_name: 'user-ms-auth'
|
||
static_configs:
|
||
- targets: ['auth-service:8080']
|
||
metrics_path: '/metrics'
|
||
|
||
- job_name: 'user-ms-user'
|
||
static_configs:
|
||
- targets: ['user-service:8080']
|
||
metrics_path: '/metrics'
|
||
|
||
- job_name: 'user-ms-permission'
|
||
static_configs:
|
||
- targets: ['permission-service:8080']
|
||
metrics_path: '/metrics'
|
||
|
||
- job_name: 'mysql'
|
||
static_configs:
|
||
- targets: ['mysql-exporter:9104']
|
||
|
||
- job_name: 'redis'
|
||
static_configs:
|
||
- targets: ['redis-exporter:9121']
|
||
```
|
||
|
||
#### 告警规则(alerts.yml)
|
||
|
||
```yaml
|
||
groups:
|
||
- name: user-ms-alerts
|
||
interval: 30s
|
||
rules:
|
||
# 高错误率告警
|
||
- alert: HighErrorRate
|
||
expr: rate(http_requests_total{status=~"5.."}[5m]) > 0.05
|
||
for: 5m
|
||
labels:
|
||
severity: critical
|
||
annotations:
|
||
summary: "高错误率告警"
|
||
description: "{{ $labels.instance }} 的错误率超过 5%"
|
||
|
||
# 高响应时间告警
|
||
- alert: HighResponseTime
|
||
expr: histogram_quantile(0.99, rate(http_request_duration_seconds_bucket[5m])) > 0.5
|
||
for: 5m
|
||
labels:
|
||
severity: warning
|
||
annotations:
|
||
summary: "高响应时间告警"
|
||
description: "{{ $labels.instance }} 的 P99 响应时间超过 500ms"
|
||
|
||
# CPU 使用率告警
|
||
- alert: HighCPUUsage
|
||
expr: rate(process_cpu_seconds_total[5m]) > 0.7
|
||
for: 5m
|
||
labels:
|
||
severity: warning
|
||
annotations:
|
||
summary: "高 CPU 使用率"
|
||
description: "{{ $labels.instance }} 的 CPU 使用率超过 70%"
|
||
|
||
# 内存使用率告警
|
||
- alert: HighMemoryUsage
|
||
expr: (jvm_memory_used_bytes{area="heap"} / jvm_memory_max_bytes{area="heap"}) > 0.8
|
||
for: 5m
|
||
labels:
|
||
severity: warning
|
||
annotations:
|
||
summary: "高内存使用率"
|
||
description: "{{ $labels.instance }} 的堆内存使用率超过 80%"
|
||
|
||
# 数据库连接告警
|
||
- alert: DatabaseConnectionPoolExhausted
|
||
expr: hikaricp_connections_active / hikaricp_connections_max > 0.9
|
||
for: 5m
|
||
labels:
|
||
severity: critical
|
||
annotations:
|
||
summary: "数据库连接池耗尽"
|
||
description: "{{ $labels.instance }} 的数据库连接池使用率超过 90%"
|
||
|
||
# 在线用户数异常
|
||
- alert: LowOnlineUsers
|
||
expr: system_online_users < 10
|
||
for: 10m
|
||
labels:
|
||
severity: warning
|
||
annotations:
|
||
summary: "在线用户数异常"
|
||
description: "在线用户数低于 10,可能存在服务异常"
|
||
```
|
||
|
||
---
|
||
|
||
### 2.2 Grafana 仪表盘
|
||
|
||
#### 核心指标面板
|
||
|
||
| 面板名称 | 指标 | 说明 |
|
||
|----------|------|------|
|
||
| 总用户数 | `system_total_users` | 系统总用户数 |
|
||
| 在线用户数 | `system_online_users` | 当前在线用户数 |
|
||
| 今日注册数 | `increase(user_register_total[1d])` | 今日注册用户数 |
|
||
| 今日登录数 | `increase(user_login_total[1d])` | 今日登录次数 |
|
||
| QPS | `rate(http_requests_total[1m])` | 每秒请求数 |
|
||
| 响应时间 (P99) | `histogram_quantile(0.99, rate(http_request_duration_seconds_bucket[5m]))` | P99 响应时间 |
|
||
| 错误率 | `rate(http_requests_total{status=~"5.."}[5m]) / rate(http_requests_total[5m])` | 错误率 |
|
||
| CPU 使用率 | `rate(process_cpu_seconds_total[5m])` | CPU 使用率 |
|
||
| 内存使用率 | `jvm_memory_used_bytes{area="heap"} / jvm_memory_max_bytes{area="heap"}` | 内存使用率 |
|
||
|
||
---
|
||
|
||
### 2.3 日志管理
|
||
|
||
#### 日志配置(Logback)
|
||
|
||
```xml
|
||
<configuration>
|
||
<appender name="CONSOLE" class="ch.qos.logback.core.ConsoleAppender">
|
||
<encoder>
|
||
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
|
||
</encoder>
|
||
</appender>
|
||
|
||
<appender name="FILE" class="ch.qos.logback.core.rolling.RollingFileAppender">
|
||
<file>logs/application.log</file>
|
||
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
|
||
<fileNamePattern>logs/application.%d{yyyy-MM-dd}.log</fileNamePattern>
|
||
<maxHistory>30</maxHistory>
|
||
<totalSizeCap>10GB</totalSizeCap>
|
||
</rollingPolicy>
|
||
<encoder>
|
||
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
|
||
</encoder>
|
||
</appender>
|
||
|
||
<root level="INFO">
|
||
<appender-ref ref="CONSOLE" />
|
||
<appender-ref ref="FILE" />
|
||
</root>
|
||
</configuration>
|
||
```
|
||
|
||
#### ELK 集成
|
||
|
||
```yaml
|
||
# Filebeat 配置
|
||
filebeat.inputs:
|
||
- type: log
|
||
enabled: true
|
||
paths:
|
||
- /opt/user-management-system/logs/*.log
|
||
fields:
|
||
app: user-management-system
|
||
env: production
|
||
|
||
output.elasticsearch:
|
||
hosts: ["elasticsearch:9200"]
|
||
indices:
|
||
- index: "user-ms-%{+yyyy.MM.dd}"
|
||
when.contains:
|
||
app: "user-management-system"
|
||
|
||
setup.template.name: "user-ms"
|
||
setup.template.pattern: "user-ms-*"
|
||
```
|
||
|
||
---
|
||
|
||
## 3. 运维操作
|
||
|
||
### 3.1 日常巡检
|
||
|
||
#### 巡检清单
|
||
|
||
| 检查项 | 检查方法 | 正常值 | 异常处理 |
|
||
|--------|----------|--------|----------|
|
||
| 服务状态 | systemctl status | Active | 重启服务 |
|
||
| 磁盘空间 | df -h | 使用率 < 80% | 清理日志 |
|
||
| 内存使用 | free -h | 使用率 < 80% | 扩容或优化 |
|
||
| CPU 使用 | top | 使用率 < 70% | 扩容或优化 |
|
||
| 数据库连接 | SHOW PROCESSLIST | 连接数 < 100 | 优化连接池 |
|
||
| Redis 连接 | redis-cli info clients | 连接数正常 | 扩容 Redis |
|
||
| API 响应时间 | curl -w @curl-format.txt | < 500ms | 优化代码 |
|
||
| 错误日志 | tail -f error.log | 无新错误 | 排查问题 |
|
||
|
||
---
|
||
|
||
### 3.2 备份与恢复
|
||
|
||
#### 数据库备份
|
||
|
||
```bash
|
||
#!/bin/bash
|
||
# backup-mysql.sh
|
||
|
||
BACKUP_DIR="/backup/mysql"
|
||
DATE=$(date +%Y%m%d_%H%M%S)
|
||
DB_NAME="user_management"
|
||
DB_USER="root"
|
||
DB_PASSWORD="your_password"
|
||
|
||
mkdir -p $BACKUP_DIR
|
||
|
||
# 全量备份
|
||
mysqldump -u$DB_USER -p$DB_PASSWORD $DB_NAME | gzip > $BACKUP_DIR/$DB_NAME_$DATE.sql.gz
|
||
|
||
# 删除 7 天前的备份
|
||
find $BACKUP_DIR -name "*.sql.gz" -mtime +7 -delete
|
||
|
||
echo "备份完成: $BACKUP_DIR/$DB_NAME_$DATE.sql.gz"
|
||
```
|
||
|
||
#### 数据恢复
|
||
|
||
```bash
|
||
# 解压备份文件
|
||
gunzip user_management_20260310_120000.sql.gz
|
||
|
||
# 恢复数据库
|
||
mysql -u root -p user_management < user_management_20260310_120000.sql
|
||
```
|
||
|
||
---
|
||
|
||
### 3.3 版本升级
|
||
|
||
#### 升级流程
|
||
|
||
```bash
|
||
# 1. 备份数据库
|
||
./scripts/backup-mysql.sh
|
||
|
||
# 2. 停止服务
|
||
./scripts/stop.sh
|
||
|
||
# 3. 备份旧版本
|
||
cp -r /opt/user-management-system /opt/user-management-system.bak
|
||
|
||
# 4. 部署新版本
|
||
unzip user-management-system-1.1.0.zip -d /opt/
|
||
|
||
# 5. 执行数据库迁移
|
||
mysql -u root -p user_management < migration/1.1.0.sql
|
||
|
||
# 6. 启动服务
|
||
./scripts/start.sh
|
||
|
||
# 7. 验证服务
|
||
curl http://localhost:8080/health
|
||
curl http://localhost:8080/health/live
|
||
curl http://localhost:8080/health/ready
|
||
```
|
||
|
||
#### 回滚流程
|
||
|
||
```bash
|
||
# 1. 停止服务
|
||
./scripts/stop.sh
|
||
|
||
# 2. 删除新版本
|
||
rm -rf /opt/user-management-system
|
||
|
||
# 3. 恢复旧版本
|
||
mv /opt/user-management-system.bak /opt/user-management-system
|
||
|
||
# 4. 恢复数据库
|
||
mysql -u root -p user_management < /backup/mysql/user_management_20260310_120000.sql
|
||
|
||
# 5. 启动服务
|
||
./scripts/start.sh
|
||
```
|
||
|
||
---
|
||
|
||
### 3.4 故障排查
|
||
|
||
#### 常见问题
|
||
|
||
| 问题 | 可能原因 | 排查方法 | 解决方案 |
|
||
|------|----------|----------|----------|
|
||
| 服务启动失败 | 端口被占用 | netstat -tunlp | 修改端口或停止占用进程 |
|
||
| 数据库连接失败 | 网络问题 | ping、telnet | 检查网络和防火墙 |
|
||
| 响应慢 | 数据库查询慢 | 慢查询日志 | 优化 SQL、加索引 |
|
||
| 内存溢出 | 内存泄漏 | jmap -heap | 优化代码、扩容 |
|
||
| 登录失败 | 验证码过期 | 检查 Redis | 调整验证码有效期 |
|
||
|
||
---
|
||
|
||
## 4. 性能优化
|
||
|
||
### 4.1 数据库优化
|
||
|
||
#### 索引优化
|
||
|
||
```sql
|
||
-- 查看慢查询
|
||
SHOW VARIABLES LIKE 'slow_query%';
|
||
SHOW VARIABLES LIKE 'long_query_time';
|
||
|
||
-- 分析慢查询
|
||
EXPLAIN SELECT * FROM users WHERE username = 'john_doe';
|
||
|
||
-- 添加索引
|
||
CREATE INDEX idx_username ON users(username);
|
||
CREATE INDEX idx_email ON users(email);
|
||
CREATE INDEX idx_phone ON users(phone);
|
||
```
|
||
|
||
#### 查询优化
|
||
|
||
```sql
|
||
-- 使用覆盖索引
|
||
SELECT id, username, email FROM users WHERE status = 1;
|
||
|
||
-- 避免 SELECT *
|
||
SELECT id, username FROM users WHERE id = ?;
|
||
|
||
-- 使用 LIMIT 分页
|
||
SELECT * FROM users ORDER BY id LIMIT 20 OFFSET 0;
|
||
```
|
||
|
||
---
|
||
|
||
### 4.2 Redis 优化
|
||
|
||
#### 缓存策略
|
||
|
||
```yaml
|
||
cache:
|
||
# 用户信息缓存
|
||
user_info:
|
||
ttl: 3600 # 1 小时
|
||
max_size: 10000
|
||
|
||
# 权限信息缓存
|
||
user_permissions:
|
||
ttl: 1800 # 30 分钟
|
||
max_size: 5000
|
||
|
||
# Token 黑名单
|
||
token_blacklist:
|
||
ttl: 2592000 # 30 天
|
||
max_size: 50000
|
||
```
|
||
|
||
#### Redis 配置
|
||
|
||
```ini
|
||
# redis.conf
|
||
maxmemory 2gb
|
||
maxmemory-policy allkeys-lru
|
||
save 900 1
|
||
save 300 10
|
||
save 60 10000
|
||
```
|
||
|
||
---
|
||
|
||
### 4.3 应用优化
|
||
|
||
#### JVM 参数优化
|
||
|
||
```bash
|
||
java -jar app.jar \
|
||
-Xms512m \
|
||
-Xmx2g \
|
||
-XX:+UseG1GC \
|
||
-XX:MaxGCPauseMillis=200 \
|
||
-XX:+HeapDumpOnOutOfMemoryError \
|
||
-XX:HeapDumpPath=/opt/logs/heap_dump.hprof
|
||
```
|
||
|
||
#### 连接池优化
|
||
|
||
```yaml
|
||
datasource:
|
||
hikari:
|
||
maximum-pool-size: 50
|
||
minimum-idle: 10
|
||
connection-timeout: 30000
|
||
idle-timeout: 600000
|
||
max-lifetime: 1800000
|
||
```
|
||
|
||
---
|
||
|
||
## 5. 安全加固
|
||
|
||
### 5.1 防火墙配置
|
||
|
||
```bash
|
||
# 只开放必要端口
|
||
firewall-cmd --permanent --add-port=80/tcp
|
||
firewall-cmd --permanent --add-port=443/tcp
|
||
firewall-cmd --permanent --add-port=22/tcp
|
||
firewall-cmd --reload
|
||
|
||
# 限制数据库访问
|
||
firewall-cmd --permanent --add-rich-rule='rule family="ipv4" source address="10.0.0.0/8" port port="3306" protocol="tcp" accept'
|
||
firewall-cmd --reload
|
||
```
|
||
|
||
---
|
||
|
||
### 5.2 SSL/TLS 配置
|
||
|
||
```nginx
|
||
server {
|
||
listen 443 ssl http2;
|
||
server_name api.example.com;
|
||
|
||
ssl_certificate /path/to/cert.pem;
|
||
ssl_certificate_key /path/to/key.pem;
|
||
|
||
ssl_protocols TLSv1.2 TLSv1.3;
|
||
ssl_ciphers HIGH:!aNULL:!MD5;
|
||
ssl_prefer_server_ciphers on;
|
||
|
||
ssl_session_cache shared:SSL:10m;
|
||
ssl_session_timeout 10m;
|
||
|
||
add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always;
|
||
}
|
||
```
|
||
|
||
---
|
||
|
||
## 6. 监控告警联系人
|
||
|
||
| 级别 | 联系人 | 通知方式 |
|
||
|------|--------|----------|
|
||
| Critical | 运维团队 | 电话 + 短信 + 邮件 |
|
||
| Warning | 开发团队 | 邮件 + 钉钉/企业微信 |
|
||
| Info | 项目经理 | 邮件 |
|
||
|
||
---
|
||
|
||
*本文档持续更新中,如有疑问请联系运维团队。*
|