#!/bin/bash # run_daily.sh - 每日数据采集与报告生成流水线 # Sprint 3: 完整调度脚本(采集→质量检查→报告生成→归档→通知) set -euo pipefail PROJECT_DIR="/home/long/project/llm-intelligence" DB_URL="${DATABASE_URL:-host=/var/run/postgresql dbname=llm_intelligence user=long sslmode=disable}" REPORT_DATE=$(date +%Y-%m-%d) LOG_FILE="/tmp/llm_hub_daily_${REPORT_DATE}.log" FEISHU_WEBHOOK="${FEISHU_WEBHOOK:-}" # 日志函数 log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG_FILE" } # 错误处理 error_exit() { log "❌ 错误: $1" # 降级:复制昨日报告 fallback_report # 发送告警 if [ -n "$FEISHU_WEBHOOK" ]; then send_alert "$1" fi exit 1 } # 降级:复制昨日报告 fallback_report() { local yesterday=$(date -d "yesterday" +%Y-%m-%d) local yesterday_md="${PROJECT_DIR}/reports/daily/daily_report_${yesterday}.md" local today_md="${PROJECT_DIR}/reports/daily/daily_report_${REPORT_DATE}.md" if [ -f "$yesterday_md" ]; then cp "$yesterday_md" "$today_md" sed -i "s/${yesterday}/${REPORT_DATE}/g" "$today_md" sed -i "1s/^/# [数据延迟] /" "$today_md" log "⚠️ 已复制昨日报告并标记[数据延迟]" else log "⚠️ 无昨日报告可供复制" fi } # 发送飞书告警 send_alert() { local msg="$1" local payload="{\"msg_type\":\"text\",\"content\":{\"text\":\"🚨 LLM Hub 日报失败\\n日期: ${REPORT_DATE}\\n错误: ${msg}\\n请检查日志: ${LOG_FILE}\"}}" curl -s -X POST -H "Content-Type: application/json" \ -d "$payload" \ "$FEISHU_WEBHOOK" > /dev/null || true log "📢 飞书告警已发送" } # 主流程 log "🚀 开始每日流水线: ${REPORT_DATE}" cd "$PROJECT_DIR" # 1. 数据采集 log "1️⃣ 数据采集..." if ! go run scripts/fetch_openrouter.go >> "$LOG_FILE" 2>&1; then error_exit "数据采集失败" fi log "✅ 数据采集完成" # 2. 数据质量检查 log "2️⃣ 数据质量检查..." MODEL_COUNT=$(psql "$DB_URL" -t -c "SELECT COUNT(*) FROM models WHERE deleted_at IS NULL" 2>/dev/null | tr -d ' ') if [ "$MODEL_COUNT" -lt 10 ]; then error_exit "模型数量不足: ${MODEL_COUNT} < 10" fi log "✅ 数据质量检查通过 (模型数: ${MODEL_COUNT})" # 3. 生成日报 log "3️⃣ 生成日报..." export DATABASE_URL="$DB_URL" if ! go run scripts/generate_daily_report.go >> "$LOG_FILE" 2>&1; then error_exit "日报生成失败" fi log "✅ 日报生成完成" # 4. 归档 log "4️⃣ 归档报告..." ARCHIVE_DIR="reports/daily/$(date +%Y/%m)" mkdir -p "$ARCHIVE_DIR" cp "reports/daily/daily_report_${REPORT_DATE}.md" "$ARCHIVE_DIR/" 2>/dev/null || true cp "reports/daily/html/daily_report_${REPORT_DATE}.html" "$ARCHIVE_DIR/" 2>/dev/null || true log "✅ 归档完成" # 5. 更新 daily_report 表 log "5️⃣ 更新日报记录..." psql "$DB_URL" -c " INSERT INTO daily_report (report_date, status, model_count, output_path, created_at, updated_at) VALUES ('${REPORT_DATE}', 'generated', ${MODEL_COUNT}, 'reports/daily/daily_report_${REPORT_DATE}.md', NOW(), NOW()) ON CONFLICT (report_date) DO UPDATE SET status = 'generated', model_count = EXCLUDED.model_count, output_path = EXCLUDED.output_path, updated_at = NOW() " >> "$LOG_FILE" 2>&1 log "✅ 日报记录更新完成" log "🎉 每日流水线全部完成!" log "📄 Markdown: reports/daily/daily_report_${REPORT_DATE}.md" log "🌐 HTML: reports/daily/html/daily_report_${REPORT_DATE}.html" exit 0