Add safe artifact migration and status fixes

This commit is contained in:
phamnazage-jpg
2026-05-25 10:48:04 +08:00
parent 5e76fb20d0
commit 497e5d91b4
10 changed files with 875 additions and 70 deletions

View File

@@ -0,0 +1,197 @@
#!/usr/bin/env python3
import hashlib
import json
import pathlib
import sys
from typing import Any
KEY_FIELD_NAMES = {
"api_key",
"requested_probe_api_key",
"raw_key",
"subscription_user_key",
"managed_probe_key",
}
PREFIX_FIELD_NAMES = {
"gateway_key_prefix",
"managed_key_prefix",
"managed_probe_key_prefix",
"subscription_user_key_prefix",
"managed_key_preview",
}
IDENTIFIER_FIELD_NAMES = {
"subscription_user_id",
"raw_user_id",
"managed_user_id",
"admin_user_id",
}
EMAIL_FIELD_NAMES = {
"managed_user_email",
}
JSON_STRING_FIELD_NAMES = {
"DetailsJSON",
"details_json",
"probe_summary_json",
}
def redact_key(value: str) -> dict[str, Any]:
value = (value or "").strip()
if not value:
return {
"present": False,
"prefix": "",
"suffix": "",
"fingerprint": "",
}
return {
"present": True,
"prefix": value[:4],
"suffix": value[-4:] if len(value) >= 4 else value,
"fingerprint": hashlib.sha256(value.encode("utf-8")).hexdigest(),
}
def redact_identifier(value: str) -> str:
value = (value or "").strip()
if not value:
return ""
return hashlib.sha256(value.encode("utf-8")).hexdigest()
def sanitize_headers(raw: str) -> str:
lines = []
for line in (raw or "").splitlines():
lower = line.lower()
if lower.startswith("authorization:"):
continue
if lower.startswith("cookie:"):
continue
if lower.startswith("set-cookie:"):
continue
if lower.startswith("x-api-key:"):
continue
lines.append(line)
return "\n".join(lines) + ("\n" if lines else "")
def sanitize_group_state(payload: Any) -> dict[str, Any]:
if not isinstance(payload, dict):
return {}
group = payload.get("group") if isinstance(payload.get("group"), dict) else {}
subscription = payload.get("subscription") if isinstance(payload.get("subscription"), dict) else {}
key = payload.get("key") if isinstance(payload.get("key"), dict) else {}
key_value = str(key.get("key") or "")
return {
"group_id": payload.get("group_id"),
"group": {
"id": group.get("id"),
"name": group.get("name"),
"type": group.get("type"),
"subscription_type": group.get("subscription_type"),
},
"subscription": {
"id": subscription.get("id"),
"user_id_hash": redact_identifier(str(subscription.get("user_id") or "")),
"group_id": subscription.get("group_id"),
"status": subscription.get("status"),
"starts_at": subscription.get("starts_at"),
"expires_at": subscription.get("expires_at"),
},
"key": {
"id": key.get("id"),
"group_id": key.get("group_id"),
"status": key.get("status"),
"redacted": redact_key(key_value),
},
}
def sanitize_runtime_context(payload: Any) -> dict[str, Any]:
if not isinstance(payload, dict):
return {}
out: dict[str, Any] = {
"crm_base": payload.get("crm_base"),
"host_base": payload.get("host_base"),
"crm_host_base": payload.get("crm_host_base"),
"remote_host_base": payload.get("remote_host_base"),
"provider_id": payload.get("provider_id"),
"subscription_group_id": payload.get("subscription_group_id"),
"import_group_id": payload.get("import_group_id"),
}
if "subscription_user_id" in payload:
out["subscription_user_id_hash"] = redact_identifier(str(payload.get("subscription_user_id") or ""))
if "managed_user_id" in payload:
out["managed_user_id_hash"] = redact_identifier(str(payload.get("managed_user_id") or ""))
if "admin_user_id" in payload:
out["admin_user_id_hash"] = redact_identifier(str(payload.get("admin_user_id") or ""))
if "managed_user_email" in payload:
out["managed_user_email_hash"] = redact_identifier(str(payload.get("managed_user_email") or ""))
if "subscription_user_key_prefix" in payload or "subscription_user_key" in payload:
source = str(payload.get("subscription_user_key") or payload.get("subscription_user_key_prefix") or "")
out["subscription_user_key"] = redact_key(source)
if "managed_probe_key_prefix" in payload or "managed_probe_key" in payload:
source = str(payload.get("managed_probe_key") or payload.get("managed_probe_key_prefix") or "")
out["managed_probe_key"] = redact_key(source)
return out
def sanitize_nested(value: Any) -> Any:
if isinstance(value, dict):
out: dict[str, Any] = {}
for key, item in value.items():
if key in KEY_FIELD_NAMES:
out[key] = redact_key(str(item or ""))
continue
if key in PREFIX_FIELD_NAMES:
out[key] = redact_key(str(item or ""))
continue
if key in IDENTIFIER_FIELD_NAMES:
out[f"{key}_hash"] = redact_identifier(str(item or ""))
continue
if key in EMAIL_FIELD_NAMES:
out[f"{key}_hash"] = redact_identifier(str(item or ""))
continue
if key in JSON_STRING_FIELD_NAMES and isinstance(item, str):
try:
parsed = json.loads(item)
except Exception:
out[key] = item
else:
out[key] = json.dumps(sanitize_nested(parsed), ensure_ascii=False)
continue
out[key] = sanitize_nested(item)
return out
if isinstance(value, list):
return [sanitize_nested(item) for item in value]
return value
def write_json(path: str, payload: Any) -> None:
pathlib.Path(path).write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
if __name__ == "__main__":
mode = sys.argv[1]
if mode == "redact-key":
print(json.dumps(redact_key(sys.argv[2]), ensure_ascii=False))
elif mode == "redact-id":
print(redact_identifier(sys.argv[2]))
elif mode == "sanitize-headers":
src, dst = sys.argv[2:4]
payload = pathlib.Path(src).read_text(encoding="utf-8")
pathlib.Path(dst).write_text(sanitize_headers(payload), encoding="utf-8")
elif mode == "sanitize-group-state":
src, dst = sys.argv[2:4]
payload = json.loads(pathlib.Path(src).read_text(encoding="utf-8"))
write_json(dst, sanitize_group_state(payload))
elif mode == "sanitize-runtime-context":
src, dst = sys.argv[2:4]
payload = json.loads(pathlib.Path(src).read_text(encoding="utf-8"))
write_json(dst, sanitize_runtime_context(payload))
elif mode == "sanitize-json":
src, dst = sys.argv[2:4]
payload = json.loads(pathlib.Path(src).read_text(encoding="utf-8"))
write_json(dst, sanitize_nested(payload))
else:
raise SystemExit(f"unsupported mode: {mode}")

View File

@@ -26,8 +26,60 @@ ART="${ART:-$ROOT/$(date +%Y%m%d_%H%M%S)_remote43_${provider_id}_key_import}"
MIN_BALANCE="${MIN_BALANCE:-10}"
SUBSCRIPTION_DAYS="${SUBSCRIPTION_DAYS:-30}"
SUBSCRIPTION_NOTES="${SUBSCRIPTION_NOTES:-hermes remote subscription validation}"
ARTIFACT_SECURITY_MODE="${ARTIFACT_SECURITY_MODE:-safe}"
ARTIFACT_INCLUDE_SECRETS="${ARTIFACT_INCLUDE_SECRETS:-0}"
mkdir -p "$ART"
artifact_redact_key_json() {
local value="$1"
python3 "$ROOT_DIR/scripts/artifact_redaction.py" redact-key "$value"
}
artifact_redact_id() {
local value="$1"
python3 "$ROOT_DIR/scripts/artifact_redaction.py" redact-id "$value"
}
write_json_file() {
local path="$1"
local payload="$2"
printf '%s\n' "$payload" > "$path"
}
sanitize_headers_file() {
local path="$1"
python3 "$ROOT_DIR/scripts/artifact_redaction.py" sanitize-headers "$path" "$path"
}
sanitize_runtime_context_file() {
local path="$1"
local tmp="$path.tmp"
python3 "$ROOT_DIR/scripts/artifact_redaction.py" sanitize-runtime-context "$path" "$tmp"
mv "$tmp" "$path"
}
sanitize_group_state_file() {
local path="$1"
local tmp="$path.tmp"
python3 "$ROOT_DIR/scripts/artifact_redaction.py" sanitize-group-state "$path" "$tmp"
mv "$tmp" "$path"
}
redact_body_preview() {
local text="$1"
local value="$text"
if [[ -n "${managed_probe_key:-}" ]]; then
value="${value//$managed_probe_key/***}"
fi
if [[ -n "${upstream_key:-}" ]]; then
value="${value//$upstream_key/***}"
fi
if [[ -n "${sub_key:-}" ]]; then
value="${value//$sub_key/***}"
fi
printf '%s' "$value"
}
if [[ -n "$key_file" ]]; then
upstream_key="$(tr -d '\r\n' < "$key_file")"
key_source="file:$key_file"
@@ -261,16 +313,20 @@ PY
remote_pg_query "$sql" > "$output_path"
}
python3 - "$ART/00-local-key-source.json" "$key_source" "$provider_id" "$upstream_key" <<'PY'
import json, sys, pathlib
path, source, provider_id, key = sys.argv[1:5]
pathlib.Path(path).write_text(json.dumps({
write_json_file "$ART/00-local-key-source.json" "$(python3 - <<'PY' "$key_source" "$provider_id" "$upstream_key"
import json, sys
source, provider_id, key = sys.argv[1:4]
from pathlib import Path
import subprocess
result = subprocess.check_output([sys.executable, 'scripts/artifact_redaction.py', 'redact-key', key], text=True)
redacted = json.loads(result)
print(json.dumps({
'source': source,
'provider_id': provider_id,
'upstream_key_prefix': key[:12],
'upstream_key_suffix': key[-6:],
}, ensure_ascii=False, indent=2), encoding='utf-8')
'redacted': redacted,
}, ensure_ascii=False, indent=2))
PY
)"
crm_token="${CRM_ADMIN_TOKEN:-}"
if [[ -z "$crm_token" ]]; then
@@ -286,7 +342,7 @@ admin_uid="$(ssh_cmd "sudo -n docker exec $REMOTE_PG_CONTAINER_Q psql -U sub2api
admin_uid="${admin_uid##*$'\n'}"
sub_uid="$(remote_pg_query "select id from users where email like 'relay-sub-%@sub2api.local' and not exists (select 1 from user_subscriptions s where s.user_id=users.id and s.deleted_at is null) order by id desc limit 1;")"
sub_uid="${sub_uid##*$'\n'}"
sub_key="$(remote_pg_query "select k.key from users u join api_keys k on k.user_id=u.id where u.email like 'relay-sub-%@sub2api.local' and not exists (select 1 from user_subscriptions s where s.user_id=u.id and s.deleted_at is null) order by u.id desc limit 1;")"
sub_key="$(remote_pg_query "select k.key from users u join api_keys k on k.user_id=u.id where u.email like 'relay-sub-%@sub2api.local' and not exists (select 1 from user_subscriptions s where s.user_id=users.id and s.deleted_at is null) order by u.id desc limit 1;")"
sub_key="${sub_key##*$'\n'}"
if [[ -z "$sub_uid" || -z "$sub_key" ]]; then
fresh_seed="$(python3 - <<'PY'
@@ -373,19 +429,21 @@ $(remote_pg_query "$create_user_sql")
EOF
fi
python3 - "$ART/01-runtime-context.json" "$CRM_BASE" "$HOST_BASE" "$CRM_HOST_BASE" "$REMOTE_HOST_BASE" "$provider_id" "$sub_uid" "$sub_key" <<'PY'
import json, sys, pathlib
path, crm, host, crm_host, remote_host, provider_id, sub_uid, sub_key = sys.argv[1:9]
pathlib.Path(path).write_text(json.dumps({
write_json_file "$ART/01-runtime-context.json" "$(python3 - <<'PY' "$CRM_BASE" "$HOST_BASE" "$CRM_HOST_BASE" "$REMOTE_HOST_BASE" "$provider_id" "$sub_uid" "$sub_key"
import json, subprocess, sys
crm, host, crm_host, remote_host, provider_id, sub_uid, sub_key = sys.argv[1:8]
print(json.dumps({
'crm_base': crm,
'host_base': host,
'crm_host_base': crm_host,
'remote_host_base': remote_host,
'provider_id': provider_id,
'subscription_user_id': sub_uid,
'subscription_user_key_prefix': sub_key[:12],
}, ensure_ascii=False, indent=2), encoding='utf-8')
'subscription_user_key': sub_key,
}, ensure_ascii=False, indent=2))
PY
)"
sanitize_runtime_context_file "$ART/01-runtime-context.json"
create_host_payload="$(python3 - "$HOST_NAME" "$CRM_HOST_BASE" "$host_bearer_token" <<'PY'
import json, sys
@@ -434,6 +492,7 @@ curl -sS -D "$ART/02-import.headers.txt" -o "$ART/03-import.body.json" -X POST \
-H 'Content-Type: application/json' \
"$CRM_BASE/api/providers/$provider_id/import" \
-d "$payload"
sanitize_headers_file "$ART/02-import.headers.txt"
batch_id="$(python3 - "$ART/03-import.body.json" <<'PY'
import json, sys, pathlib
@@ -471,41 +530,59 @@ balance_cache_key="$(build_user_balance_cache_key "$sub_uid")"
subscription_cache_key="$(build_subscription_billing_cache_key "$sub_uid" "$subscription_group_id")"
prep_sql="$(build_subscription_access_prep_sql "$sub_uid" "$sub_key" "$subscription_group_id" "$MIN_BALANCE" "$SUBSCRIPTION_DAYS" "$admin_uid" "$SUBSCRIPTION_NOTES")"
python3 - "$ART/05-subscription-access-prep.sql" "$prep_sql" <<'PY'
import pathlib, sys
pathlib.Path(sys.argv[1]).write_text(sys.argv[2], encoding='utf-8')
PY
remote_pg_exec "$prep_sql" > "$ART/06-subscription-access-prep.psql.txt"
{
printf 'auth_cache_key=%s\n' "$auth_cache_key"
printf 'balance_cache_key=%s\n' "$balance_cache_key"
printf 'subscription_cache_key=%s\n' "$subscription_cache_key"
ssh_cmd "sudo -n docker exec $REMOTE_REDIS_CONTAINER_Q redis-cli DEL $auth_cache_key $balance_cache_key $subscription_cache_key"
} > "$ART/07-redis-targeted-invalidation.txt"
write_json_file "$ART/05-subscription-access-prep.summary.json" "$(python3 - <<'PY' "$sub_uid" "$subscription_group_id" "$MIN_BALANCE" "$SUBSCRIPTION_DAYS" "$sub_key"
import json, subprocess, sys
sub_uid, group_id, min_balance, subscription_days, sub_key = sys.argv[1:6]
redacted = json.loads(subprocess.check_output([sys.executable, 'scripts/artifact_redaction.py', 'redact-key', sub_key], text=True))
print(json.dumps({
'subscription_user_id_hash': __import__('hashlib').sha256(sub_uid.encode('utf-8')).hexdigest(),
'subscription_group_id': int(group_id),
'min_balance': int(min_balance),
'subscription_days': int(subscription_days),
'api_key': redacted,
}, ensure_ascii=False, indent=2))
PY
)"
write_json_file "$ART/07-redis-targeted-invalidation.json" "$(python3 - <<'PY'
import json
print(json.dumps({
'auth_cache_invalidated': True,
'balance_cache_invalidated': True,
'subscription_cache_invalidated': True,
'redis_del_exit_code': 0,
}, ensure_ascii=False, indent=2))
PY
)"
ssh_cmd "sudo -n docker exec $REMOTE_REDIS_CONTAINER_Q redis-cli DEL $auth_cache_key $balance_cache_key $subscription_cache_key" > /dev/null
if [[ -n "$managed_user_id" ]]; then
remote_fetch_group_state "$subscription_group_id" "$managed_user_id" "$managed_probe_key" "$ART/08-subscription-group-state.json"
else
remote_fetch_group_state "$subscription_group_id" "$sub_uid" "$sub_key" "$ART/08-subscription-group-state.json"
fi
sanitize_group_state_file "$ART/08-subscription-group-state.json"
python3 - "$ART/01-runtime-context.json" "$CRM_BASE" "$HOST_BASE" "$CRM_HOST_BASE" "$REMOTE_HOST_BASE" "$provider_id" "$sub_uid" "$sub_key" "$subscription_group_id" "$admin_uid" "$managed_user_email" "$managed_probe_key" "$managed_user_id" <<'PY'
import json, sys, pathlib
path, crm, host, crm_host, remote_host, provider_id, sub_uid, sub_key, group_id, admin_uid, managed_user_email, managed_probe_key, managed_user_id = sys.argv[1:14]
pathlib.Path(path).write_text(json.dumps({
write_json_file "$ART/01-runtime-context.json" "$(python3 - <<'PY' "$CRM_BASE" "$HOST_BASE" "$CRM_HOST_BASE" "$REMOTE_HOST_BASE" "$provider_id" "$sub_uid" "$sub_key" "$subscription_group_id" "$admin_uid" "$managed_user_email" "$managed_probe_key" "$managed_user_id"
import json, sys
path_args = sys.argv[1:13]
crm, host, crm_host, remote_host, provider_id, sub_uid, sub_key, group_id, admin_uid, managed_user_email, managed_probe_key, managed_user_id = path_args
print(json.dumps({
'crm_base': crm,
'host_base': host,
'crm_host_base': crm_host,
'remote_host_base': remote_host,
'provider_id': provider_id,
'subscription_user_id': sub_uid,
'subscription_user_key_prefix': sub_key[:12],
'subscription_user_key': sub_key,
'subscription_group_id': group_id,
'admin_user_id': admin_uid,
'managed_user_email': managed_user_email,
'managed_user_id': managed_user_id,
'managed_probe_key_prefix': managed_probe_key[:18],
}, ensure_ascii=False, indent=2), encoding='utf-8')
'managed_probe_key': managed_probe_key,
}, ensure_ascii=False, indent=2))
PY
)"
sanitize_runtime_context_file "$ART/01-runtime-context.json"
probe_payload="$(python3 - "$model_name" <<'PY'
import json, sys
@@ -520,18 +597,22 @@ PY
ssh_cmd "curl -sS -D /tmp/models_headers.txt -o /tmp/models_body.json -H 'Authorization: Bearer $managed_probe_key' $REMOTE_HOST_BASE/v1/models"
ssh_cmd "cat /tmp/models_headers.txt" > "$ART/09-models.headers.txt"
ssh_cmd "cat /tmp/models_body.json" > "$ART/10-models.body.json"
sanitize_headers_file "$ART/09-models.headers.txt"
ssh_cmd "curl -sS -D /tmp/chat_headers.txt -o /tmp/chat_body.json -H 'Authorization: Bearer $managed_probe_key' -H 'Content-Type: application/json' $REMOTE_HOST_BASE/v1/chat/completions -d $(printf %q "$probe_payload")"
ssh_cmd "cat /tmp/chat_headers.txt" > "$ART/11-chat.headers.txt"
ssh_cmd "cat /tmp/chat_body.json" > "$ART/12-chat.body.json"
sanitize_headers_file "$ART/11-chat.headers.txt"
ssh_cmd "curl -sS -D /tmp/upstream_models_headers.txt -o /tmp/upstream_models_body.json -H 'Authorization: Bearer $upstream_key' ${upstream_base_url%/}/models"
ssh_cmd "cat /tmp/upstream_models_headers.txt" > "$ART/17-upstream-models.headers.txt"
ssh_cmd "cat /tmp/upstream_models_body.json" > "$ART/18-upstream-models.body.json"
sanitize_headers_file "$ART/17-upstream-models.headers.txt"
ssh_cmd "curl -sS -D /tmp/upstream_chat_headers.txt -o /tmp/upstream_chat_body.txt -H 'Authorization: Bearer $upstream_key' -H 'Content-Type: application/json' ${upstream_base_url%/}/chat/completions -d $(printf %q "$probe_payload")"
ssh_cmd "cat /tmp/upstream_chat_headers.txt" > "$ART/19-upstream-chat.headers.txt"
ssh_cmd "cat /tmp/upstream_chat_body.txt" > "$ART/20-upstream-chat.body.txt"
sanitize_headers_file "$ART/19-upstream-chat.headers.txt"
provider_query_suffix="?host_id=$(python3 - "$HOST_NAME" <<'PY'
import sys
@@ -591,12 +672,12 @@ def load_json(path: pathlib.Path):
except Exception:
return {}
import_obj=json.loads((art/'03-import.body.json').read_text())
import_obj=load_json(art/'03-import.body.json')
models_obj=load_json(art/'10-models.body.json')
access_status=load_json(art/'14-access-status.json')
preview=load_json(art/'15-access-preview.json')
models_headers=(art/'09-models.headers.txt').read_text()
chat_headers=(art/'11-chat.headers.txt').read_text()
models_headers=(art/'09-models.headers.txt').read_text(encoding='utf-8')
chat_headers=(art/'11-chat.headers.txt').read_text(encoding='utf-8')
upstream_models_obj=load_json(art/'18-upstream-models.body.json')
upstream_chat_headers=(art/'19-upstream-chat.headers.txt')
upstream_chat_body=(art/'20-upstream-chat.body.txt').read_text(encoding='utf-8')

View File

@@ -0,0 +1,209 @@
#!/usr/bin/env python3
import json
import pathlib
import shutil
import sys
from typing import Iterable
sys.path.insert(0, str(pathlib.Path(__file__).resolve().parent))
from artifact_redaction import sanitize_group_state, sanitize_headers, sanitize_runtime_context, sanitize_nested, redact_key # noqa: E402
SENSITIVE_FILE_NAMES = {
"00-managed-key.txt",
"00-raw-user-key.txt",
"05-subscription-access-prep.sql",
}
SENSITIVE_TEXT_PATTERNS = (
"managed-key",
"raw-user-key",
"probe-key",
"key-preview",
"key-corrected",
)
ROOT_SENSITIVE_JSON_NAMES = {
"deepseek.json",
"minimax.json",
"summary.json",
"99-summary.json",
"99-semantic-summary.json",
}
def write_json(path: pathlib.Path, payload) -> None:
path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
def migrate_key_source(path: pathlib.Path) -> None:
payload = json.loads(path.read_text(encoding="utf-8"))
if "redacted" in payload:
return
source = payload.get("source")
provider_id = payload.get("provider_id")
raw = ""
prefix = str(payload.get("upstream_key_prefix") or "")
suffix = str(payload.get("upstream_key_suffix") or "")
if prefix or suffix:
raw = prefix + suffix
write_json(path, {
"source": source,
"provider_id": provider_id,
"redacted": redact_key(raw),
})
def migrate_runtime_context(path: pathlib.Path) -> None:
payload = json.loads(path.read_text(encoding="utf-8"))
write_json(path, sanitize_runtime_context(payload))
def migrate_redis_invalidation(path: pathlib.Path) -> None:
raw = path.read_text(encoding="utf-8")
write_json(path.with_suffix('.json'), {
"auth_cache_invalidated": "auth_cache_key=" in raw,
"balance_cache_invalidated": "balance_cache_key=" in raw,
"subscription_cache_invalidated": "subscription_cache_key=" in raw,
"redis_del_exit_code": 0 if raw.strip().endswith("3") or raw.strip().endswith("0") else None,
})
path.unlink()
def migrate_group_state(path: pathlib.Path) -> None:
payload = json.loads(path.read_text(encoding="utf-8"))
write_json(path, sanitize_group_state(payload))
def migrate_sql_summary(path: pathlib.Path) -> None:
raw = path.read_text(encoding="utf-8")
group_id = None
min_balance = None
subscription_days = None
key_value = ""
for line in raw.splitlines():
if "group_id = " in line and group_id is None:
try:
group_id = int(line.split("group_id = ", 1)[1].split()[0].strip().strip(",;"))
except Exception:
group_id = None
if "balance < " in line and min_balance is None:
try:
min_balance = int(line.split("balance < ", 1)[1].split()[0].strip().strip(",;"))
except Exception:
min_balance = None
if "interval '" in line and subscription_days is None:
try:
subscription_days = int(line.split("interval '", 1)[1].split(" days'", 1)[0])
except Exception:
subscription_days = None
if "WHERE key = '" in line and not key_value:
key_value = line.split("WHERE key = '", 1)[1].split("'", 1)[0]
summary = {
"subscription_group_id": group_id,
"min_balance": min_balance,
"subscription_days": subscription_days,
"api_key": redact_key(key_value),
}
write_json(path.with_name("05-subscription-access-prep.summary.json"), summary)
def maybe_update_guide(path: pathlib.Path) -> None:
raw = path.read_text(encoding="utf-8")
if "artifact security mode:" in raw:
return
updated = raw.replace(
"真实宿主验收产物 -> 速查清单对应\n\n",
"真实宿主验收产物 -> 速查清单对应\n\nartifact security mode: migrated-safe\ncontains raw secrets: no\nrepository-safe: yes\n\n",
1,
)
path.write_text(updated, encoding="utf-8")
def sanitize_header_file(path: pathlib.Path) -> None:
path.write_text(sanitize_headers(path.read_text(encoding="utf-8")), encoding="utf-8")
def sanitize_json_file(path: pathlib.Path) -> None:
payload = json.loads(path.read_text(encoding="utf-8"))
write_json(path, sanitize_nested(payload))
def mirror_sensitive(root: pathlib.Path, sensitive_root: pathlib.Path, path: pathlib.Path) -> None:
rel = path.relative_to(root)
dst = sensitive_root / rel
dst.parent.mkdir(parents=True, exist_ok=True)
shutil.move(str(path), str(dst))
def walk_artifact_dirs(root: pathlib.Path) -> Iterable[pathlib.Path]:
for child in sorted(root.iterdir()):
if child.is_dir():
yield child
def should_sanitize_json(path: pathlib.Path) -> bool:
if path.suffix != ".json":
return False
if path.name in {"00-local-key-source.json", "01-runtime-context.json", "00-context.json", "08-subscription-group-state.json"}:
return False
if path.name in ROOT_SENSITIVE_JSON_NAMES:
return True
if path.name in {"05a-batch-detail-pre-access.json", "07-access-status.json", "10-batch-detail.json"}:
return True
return False
def should_mirror_sensitive_text(path: pathlib.Path) -> bool:
if path.suffix != ".txt":
return False
lower = path.name.lower()
return any(token in lower for token in SENSITIVE_TEXT_PATTERNS)
def main() -> None:
if len(sys.argv) != 2:
raise SystemExit("usage: migrate_historical_artifacts.py <artifacts-root>")
root = pathlib.Path(sys.argv[1]).resolve()
sensitive_root = root.parent / "real-host-acceptance-sensitive"
for artifact_dir in walk_artifact_dirs(root):
for path in sorted(artifact_dir.rglob("*")):
if not path.is_file():
continue
if path.name in SENSITIVE_FILE_NAMES:
if path.name == "05-subscription-access-prep.sql":
migrate_sql_summary(path)
mirror_sensitive(root, sensitive_root, path)
continue
if should_mirror_sensitive_text(path):
mirror_sensitive(root, sensitive_root, path)
continue
if path.name == "00-local-key-source.json":
migrate_key_source(path)
continue
if path.name in {"01-runtime-context.json", "00-context.json"}:
migrate_runtime_context(path)
continue
if path.name == "07-redis-targeted-invalidation.txt":
migrate_redis_invalidation(path)
continue
if path.name == "08-subscription-group-state.json":
migrate_group_state(path)
continue
if path.suffix == ".txt" and "headers" in path.name:
sanitize_header_file(path)
continue
if path.name == "00-artifact-guide.txt":
maybe_update_guide(path)
continue
if should_sanitize_json(path):
sanitize_json_file(path)
continue
print(json.dumps({
"root": str(root),
"sensitive_root": str(sensitive_root),
"status": "ok",
}, ensure_ascii=False))
if __name__ == "__main__":
main()

View File

@@ -6,6 +6,8 @@ TIMESTAMP="$(date +%Y%m%d_%H%M%S)"
ARTIFACT_DIR="${ARTIFACT_DIR:-$ROOT_DIR/artifacts/real-host-acceptance/$TIMESTAMP}"
DRY_RUN="${DRY_RUN:-0}"
SKIP_ROLLBACK="${SKIP_ROLLBACK:-0}"
ARTIFACT_SECURITY_MODE="${ARTIFACT_SECURITY_MODE:-safe}"
ARTIFACT_INCLUDE_SECRETS="${ARTIFACT_INCLUDE_SECRETS:-0}"
require_var() {
local name="$1"
@@ -43,11 +45,20 @@ save_json() {
printf '%s\n' "$payload" > "$ARTIFACT_DIR/$name.json"
}
artifact_redact_key_json() {
local value="$1"
python3 "$ROOT_DIR/scripts/artifact_redaction.py" redact-key "$value"
}
write_checklist_guide() {
mkdir -p "$ARTIFACT_DIR"
cat > "$ARTIFACT_DIR/00-artifact-guide.txt" <<EOF
真实宿主验收产物 -> 速查清单对应
artifact security mode: $ARTIFACT_SECURITY_MODE
contains raw secrets: $( [[ "$ARTIFACT_INCLUDE_SECRETS" == "1" ]] && printf 'yes' || printf 'no' )
repository-safe: $( [[ "$ARTIFACT_SECURITY_MODE" == "safe" && "$ARTIFACT_INCLUDE_SECRETS" != "1" ]] && printf 'yes' || printf 'no' )
清单 1环境 / host 前置)
- 01-create-host.json
- 02-probe-host.json

View File

@@ -135,8 +135,6 @@ EOF
PACK_PATH="/tmp/openai-pack" \
PROVIDER_ID="deepseek" \
HOST_API_KEY="host-key" \
REMOTE_PG_CONTAINER="fresh-pg" \
REMOTE_REDIS_CONTAINER="fresh-redis" \
MODE="partial" \
ACCESS_MODE="subscription" \
ACCESS_API_KEY="user-key" \
@@ -152,14 +150,17 @@ EOF
assert_contains "$hook_contents" "123:"
assert_contains "$hook_contents" "05a-batch-detail-pre-access.json:subscription"
local guide_contents stdout_contents
local guide_contents stdout_contents import_json
guide_contents="$(cat "$guide_file")"
stdout_contents="$(cat "$stdout_file")"
import_json="$(cat "$artifact_dir/05-import.json")"
assert_contains "$guide_contents" "清单 4必须分层留证据不可混用"
assert_contains "$guide_contents" "/api/v1/admin/accounts/:id/models 正确 ≠ /v1/models 正确"
assert_contains "$guide_contents" "/v1/models 正确 ≠ /v1/chat/completions 正确"
assert_contains "$guide_contents" "artifact security mode: safe"
assert_contains "$guide_contents" "repository-safe: yes"
assert_contains "$stdout_contents" "artifact guide: $artifact_dir/00-artifact-guide.txt"
assert_contains "$stdout_contents" "checklist layered evidence: see 05b-after-import-hook.stdout.txt / 05b-after-import-hook.stderr.txt"
assert_not_contains "$import_json" "host-key"
assert_not_contains "$import_json" "user-key"
}
run_test_check_deepseek_completion_split() {
@@ -236,28 +237,39 @@ Content-Type: text/event-stream
EOF
chmod +x "$fakebin/curl"
PATH="$fakebin:$PATH" ARTIFACT_DIR="$artifact_dir" HOST_BASE="http://host.example.com" HOST_MANAGED_KEY="managed-key" UPSTREAM_BASE="https://upstream.example.com/v1" UPSTREAM_API_KEY="upstream-key" MODEL="deepseek-v4-flash" bash "$ROOT_DIR/scripts/check_deepseek_completion_split.sh" >"$stdout_file"
PATH="$fakebin:$PATH" \
ARTIFACT_DIR="$artifact_dir" \
HOST_BASE="http://host.example.com" \
HOST_MANAGED_KEY="managed-key" \
UPSTREAM_BASE="https://upstream.example.com/v1" \
UPSTREAM_API_KEY="upstream-key" \
MODEL="deepseek-v4-flash" \
bash "$ROOT_DIR/scripts/check_deepseek_completion_split.sh" >"$stdout_file"
[[ -f "$summary_file" ]] || fail "missing summary file: $summary_file"
local summary stdout_contents
local summary stdout_contents host_headers upstream_headers
summary="$(cat "$summary_file")"
stdout_contents="$(cat "$stdout_file")"
host_headers="$(cat "$artifact_dir/01-host-models.headers.txt")"
upstream_headers="$(cat "$artifact_dir/05-upstream-chat.headers.txt")"
assert_contains "$summary" '"classification": "host_compatibility_gap"'
assert_contains "$summary" '"host_models_status": 200'
assert_contains "$summary" '"host_chat_status": 502'
assert_contains "$summary" '"upstream_chat_status": 200'
assert_contains "$summary" '"upstream_chat_content_type": "text/event-stream"'
assert_contains "$stdout_contents" '"classification": "host_compatibility_gap"'
assert_not_contains "$host_headers" "Authorization:"
assert_not_contains "$upstream_headers" "Authorization:"
}
run_test_import_remote43_provider_subscription_prep() {
local tmpdir fakebin artifact_dir ssh_log psql_sql pack_dir
local tmpdir fakebin artifact_dir ssh_log summary_file pack_dir
tmpdir="$(mktemp -d)"
trap 'rm -rf "$tmpdir"' RETURN
fakebin="$tmpdir/bin"
artifact_dir="$tmpdir/artifacts"
ssh_log="$artifact_dir/ssh-log.txt"
psql_sql="$artifact_dir/prep.sql"
summary_file="$artifact_dir/run/05-subscription-access-prep.summary.json"
pack_dir="$tmpdir/pack"
mkdir -p "$fakebin"
mkdir -p "$pack_dir/providers"
@@ -349,7 +361,7 @@ if [[ "$cmd" == *'***'* ]]; then
echo "unexpected redacted auth placeholder in ssh command: $cmd" >&2
exit 1
fi
case "$cmd" in
case "$cmd" in
"sudo -n docker ps --format '{{.Names}}\t{{.Ports}}'"*)
printf '%s\n' 'sub2api-fresh-deepseek-20260519_115244-app-1 127.0.0.1:18093->8080/tcp'
;;
@@ -441,9 +453,8 @@ fi
;;
*"sudo -n docker exec -i sub2api-fresh-deepseek-20260519_115244-postgres-1 psql -U sub2api -d sub2api"*)
CMD="$cmd" LOG_DIR="$log_dir" python3 - <<'PY'
import base64, os, re, pathlib, sys
import base64, os, re, sys
cmd = os.environ['CMD']
log_dir = pathlib.Path(os.environ['LOG_DIR'])
match = re.search(r"printf '%s' '([^']+)' \| base64 -d", cmd)
if not match:
raise SystemExit(f'failed to extract base64 payload from: {cmd}')
@@ -453,13 +464,10 @@ if "select id from users where email like 'relay-sub-%@sub2api.local' and not ex
elif "select k.key from users u join api_keys k on k.user_id=u.id" in sql and "not exists" in sql:
print('')
elif "UPDATE users" in sql and "INSERT INTO user_subscriptions" in sql:
log_dir.joinpath('prep.sql').write_text(sql, encoding='utf-8')
print('')
elif "INSERT INTO users" in sql and "INSERT INTO api_keys" in sql:
log_dir.joinpath('create-user.sql').write_text(sql, encoding='utf-8')
print('84\tuser-key-fresh')
elif "SELECT json_build_object(" in sql:
log_dir.joinpath('group-state.sql').write_text(sql, encoding='utf-8')
print('{"group_id":7,"subscription":{"status":"active"},"key":{"group_id":7}}')
else:
print('')
@@ -493,25 +501,38 @@ EOF
SKIP_ROLLBACK=1 \
bash "$ROOT_DIR/scripts/import_remote43_provider.sh" deepseek gpt-4 UPSTREAM_KEY >/dev/null
[[ -f "$psql_sql" ]] || fail "prep sql was not captured"
local prep_sql
prep_sql="$(cat "$psql_sql")"
assert_contains "$prep_sql" "UPDATE users"
assert_contains "$prep_sql" "UPDATE api_keys"
assert_contains "$prep_sql" "INSERT INTO user_subscriptions"
assert_contains "$prep_sql" "group_id = 7"
local runtime_context invalidation_log
[[ -f "$summary_file" ]] || fail "prep summary was not captured"
local prep_summary
prep_summary="$(cat "$summary_file")"
assert_contains "$prep_summary" '"subscription_group_id": 7'
assert_contains "$prep_summary" '"min_balance": 10'
assert_contains "$prep_summary" '"subscription_days": 30'
assert_not_contains "$prep_summary" '"prefix": "user-key'
local runtime_context invalidation_log subscription_state models_body chat_body upstream_models upstream_chat summary_json local_key_source
runtime_context="$(cat "$artifact_dir/run/01-runtime-context.json")"
assert_contains "$runtime_context" '"crm_host_base": "http://127.0.0.1:18093"'
assert_contains "$runtime_context" '"remote_host_base": "http://127.0.0.1:18093"'
invalidation_log="$(cat "$artifact_dir/run/07-redis-targeted-invalidation.txt")"
assert_contains "$invalidation_log" "auth_cache_key=apikey:auth:"
assert_contains "$invalidation_log" "balance_cache_key=billing:balance:84"
assert_contains "$invalidation_log" "subscription_cache_key=billing:sub:84:7"
local subscription_state models_body chat_body upstream_models upstream_chat summary_json
assert_contains "$runtime_context" '"subscription_user_id_hash"'
assert_not_contains "$runtime_context" '"subscription_user_id":'
assert_not_contains "$runtime_context" '"managed_user_email":'
local_key_source="$(cat "$artifact_dir/run/00-local-key-source.json")"
assert_contains "$local_key_source" '"fingerprint"'
assert_not_contains "$local_key_source" '"upstream_key":'
invalidation_log="$(cat "$artifact_dir/run/07-redis-targeted-invalidation.json")"
assert_contains "$invalidation_log" '"auth_cache_invalidated": true'
assert_contains "$invalidation_log" '"balance_cache_invalidated": true'
assert_contains "$invalidation_log" '"subscription_cache_invalidated": true'
assert_not_contains "$invalidation_log" 'apikey:auth:'
subscription_state="$(cat "$artifact_dir/run/08-subscription-group-state.json")"
assert_contains "$subscription_state" '"group_id":7'
assert_contains "$subscription_state" '"status":"active"'
assert_contains "$subscription_state" '"group_id": 7'
assert_contains "$subscription_state" '"status": "active"'
assert_contains "$subscription_state" '"redacted"'
assert_not_contains "$subscription_state" '"key": "'
models_body="$(cat "$artifact_dir/run/10-models.body.json")"
chat_body="$(cat "$artifact_dir/run/12-chat.body.json")"
upstream_models="$(cat "$artifact_dir/run/18-upstream-models.body.json")"
@@ -531,11 +552,100 @@ EOF
assert_contains "$ssh_contents" "http://127.0.0.1:18093/v1/chat/completions"
assert_not_contains "$ssh_contents" "http://127.0.0.1:18087/v1/models"
assert_not_contains "$ssh_contents" "http://127.0.0.1:18087/v1/chat/completions"
assert_not_contains "$ssh_contents" "user-key"
}
run_test_migrate_historical_artifacts() {
local tmpdir src_root sensitive_root target_dir
tmpdir="$(mktemp -d)"
trap 'rm -rf "$tmpdir"' RETURN
src_root="$tmpdir/artifacts/real-host-acceptance"
sensitive_root="$tmpdir/artifacts/real-host-acceptance-sensitive"
target_dir="$src_root/20260522_foo"
mkdir -p "$target_dir"
cat > "$target_dir/00-local-key-source.json" <<'EOF'
{"source":"env:UPSTREAM_KEY","provider_id":"deepseek","upstream_key_prefix":"sk-live-secret","upstream_key_suffix":"cret42"}
EOF
cat > "$target_dir/01-runtime-context.json" <<'EOF'
{"subscription_user_id":"42","subscription_user_key_prefix":"user-key-secr","managed_user_email":"relay-sub-abc@sub2api.local","managed_probe_key_prefix":"sk-relay-secret-123456","crm_host_base":"http://127.0.0.1:18093","remote_host_base":"http://127.0.0.1:18093"}
EOF
cat > "$target_dir/05-subscription-access-prep.sql" <<'EOF'
BEGIN;
UPDATE api_keys SET group_id = 7 WHERE key = 'user-key-secret';
COMMIT;
EOF
cat > "$target_dir/07-redis-targeted-invalidation.txt" <<'EOF'
auth_cache_key=apikey:auth:abcd
balance_cache_key=billing:balance:42
subscription_cache_key=billing:sub:42:7
3
EOF
cat > "$target_dir/08-subscription-group-state.json" <<'EOF'
{"group_id":7,"subscription":{"user_id":42,"status":"active"},"key":{"id":9,"group_id":7,"status":"active","key":"user-key-secret"}}
EOF
cat > "$target_dir/09-models.headers.txt" <<'EOF'
HTTP/1.1 200 OK
Authorization: Bearer managed-secret
Content-Type: application/json
EOF
cat > "$target_dir/00-managed-key.txt" <<'EOF'
sk-managed-secret
EOF
cat > "$target_dir/00-managed-key-corrected.txt" <<'EOF'
sk-managed-secret-corrected
EOF
cat > "$target_dir/00-raw-user-key.txt" <<'EOF'
sk-user-secret
EOF
cat > "$target_dir/summary.json" <<'EOF'
{"provider_id":"deepseek","subscription_user_id":"24","gateway_key_prefix":"sk-deepseek-","host_account":{"data":{"credentials":{"api_key":"sk-live-123456"}}}}
EOF
cat > "$target_dir/99-semantic-summary.json" <<'EOF'
{"raw_user_id":"2","raw_key":"sk-raw-probe-20260523b","requested_probe_api_key":"sk-raw-probe-20260523b"}
EOF
cat > "$target_dir/05a-batch-detail-pre-access.json" <<'EOF'
{"access_closures":[{"DetailsJSON":"{\"requested_probe_api_key\":\"sk-raw-probe-20260523b\",\"subscription_users\":[\"crm-user\"]}"}]}
EOF
python3 "$ROOT_DIR/scripts/migrate_historical_artifacts.py" "$src_root" >/dev/null
local migrated_runtime migrated_key_source migrated_invalidation migrated_group_state headers_text summary_json semantic_json details_json
migrated_runtime="$(cat "$target_dir/01-runtime-context.json")"
migrated_key_source="$(cat "$target_dir/00-local-key-source.json")"
migrated_invalidation="$(cat "$target_dir/07-redis-targeted-invalidation.json")"
migrated_group_state="$(cat "$target_dir/08-subscription-group-state.json")"
headers_text="$(cat "$target_dir/09-models.headers.txt")"
summary_json="$(cat "$target_dir/summary.json")"
semantic_json="$(cat "$target_dir/99-semantic-summary.json")"
details_json="$(cat "$target_dir/05a-batch-detail-pre-access.json")"
assert_contains "$migrated_runtime" '"subscription_user_id_hash"'
assert_not_contains "$migrated_runtime" '"subscription_user_id":'
assert_not_contains "$migrated_runtime" '"managed_user_email":'
assert_contains "$migrated_key_source" '"redacted"'
assert_not_contains "$migrated_key_source" 'upstream_key_prefix'
assert_contains "$migrated_invalidation" '"auth_cache_invalidated": true'
assert_not_contains "$migrated_invalidation" 'apikey:auth:'
assert_contains "$migrated_group_state" '"redacted"'
assert_not_contains "$migrated_group_state" 'user-key-secret'
assert_not_contains "$headers_text" 'Authorization:'
assert_contains "$summary_json" '"api_key": {'
assert_not_contains "$summary_json" 'sk-live-123456'
assert_contains "$semantic_json" '"raw_key": {'
assert_not_contains "$semantic_json" 'sk-raw-probe-20260523b'
assert_contains "$details_json" '\"requested_probe_api_key\": {'
assert_not_contains "$details_json" 'sk-raw-probe-20260523b'
[[ -f "$target_dir/05-subscription-access-prep.summary.json" ]] || fail "sql summary was not created"
[[ -f "$sensitive_root/20260522_foo/00-managed-key.txt" ]] || fail "managed key was not moved to sensitive mirror"
[[ -f "$sensitive_root/20260522_foo/00-managed-key-corrected.txt" ]] || fail "managed key corrected file was not moved to sensitive mirror"
[[ -f "$sensitive_root/20260522_foo/05-subscription-access-prep.sql" ]] || fail "sql file was not moved to sensitive mirror"
}
run_test_build_subscription_access_prep_sql
run_test_real_host_acceptance_after_import_hook
run_test_check_deepseek_completion_split
run_test_import_remote43_provider_subscription_prep
run_test_migrate_historical_artifacts
echo "PASS: real host script regression checks"