Files
ai-customer-service/internal/store/postgres/platform_event_store.go
Your Name 34b175b130 feat(outbox): implement concurrent claim mechanism with UPDATE RETURNING + SKIP LOCKED
- Add migration 0004 to introduce 'claiming' status and timeout index
- Add StatusClaiming to platformevent domain and allow it in Validate()
- Rewrite ListDue as transactional UPDATE ... RETURNING with FOR UPDATE SKIP LOCKED
- Add ReleaseStaleClaims to reset expired claiming events back to retrying
- Worker Start() now runs a 30s ticker for stale claim recovery (5m timeout)
- Update stubEventStore in tests to satisfy new EventStore interface

Refs: D-02
2026-05-11 13:16:28 +08:00

235 lines
6.7 KiB
Go

package postgres
import (
"context"
"database/sql"
"encoding/json"
"fmt"
"strings"
"time"
"github.com/bridge/ai-customer-service/internal/domain/platformevent"
)
type PlatformEventStore struct {
db *sql.DB
}
func NewPlatformEventStore(db *sql.DB) *PlatformEventStore {
return &PlatformEventStore{db: db}
}
func (s *PlatformEventStore) InsertPending(ctx context.Context, event *platformevent.Event) error {
if event == nil {
return fmt.Errorf("event is nil")
}
return s.InsertPendingBatch(ctx, []platformevent.Event{*event})
}
func (s *PlatformEventStore) InsertPendingBatch(ctx context.Context, events []platformevent.Event) error {
if s.db == nil {
return fmt.Errorf("db is nil")
}
if len(events) == 0 {
return nil
}
tx, err := s.db.BeginTx(ctx, nil)
if err != nil {
return err
}
for _, event := range events {
if err := event.Validate(); err != nil {
_ = tx.Rollback()
return err
}
payload, err := json.Marshal(event.Payload)
if err != nil {
_ = tx.Rollback()
return err
}
if _, err := tx.ExecContext(ctx, `
INSERT INTO cs_platform_event_outbox(
id, platform, event_type, session_id, ticket_id, source_message_id,
payload, status, attempt_count, next_attempt_at, occurred_at, delivered_at, last_error, created_at, updated_at
) VALUES (
$1, $2, $3, NULLIF($4,'')::uuid, NULLIF($5,'')::uuid, $6,
$7::jsonb, $8, $9, $10, $11, $12, NULLIF($13,''), $14, $15
)
`, event.ID, event.Platform, event.EventType, event.SessionID, event.TicketID, event.SourceMessageID,
string(payload), string(event.Status), event.AttemptCount, event.NextAttemptAt, event.OccurredAt, event.DeliveredAt, event.LastError, event.CreatedAt, event.UpdatedAt); err != nil {
_ = tx.Rollback()
return err
}
}
return tx.Commit()
}
func (s *PlatformEventStore) ListDue(ctx context.Context, platform string, dueBefore time.Time, limit int) ([]platformevent.Event, error) {
if s.db == nil {
return nil, fmt.Errorf("db is nil")
}
if limit <= 0 {
return nil, fmt.Errorf("limit must be positive")
}
platform = strings.TrimSpace(platform)
if platform == "" {
return nil, fmt.Errorf("platform is required")
}
tx, err := s.db.BeginTx(ctx, nil)
if err != nil {
return nil, err
}
defer func() {
if err != nil {
_ = tx.Rollback()
}
}()
rows, err := tx.QueryContext(ctx, `
UPDATE cs_platform_event_outbox
SET status = 'claiming', updated_at = NOW()
WHERE id IN (
SELECT id FROM cs_platform_event_outbox
WHERE platform = $1 AND status IN ('pending','retrying') AND next_attempt_at <= $2
ORDER BY next_attempt_at ASC, occurred_at ASC, created_at ASC, id ASC
LIMIT $3
FOR UPDATE SKIP LOCKED
)
RETURNING id, platform, event_type, COALESCE(session_id::text, ''), COALESCE(ticket_id::text, ''), COALESCE(source_message_id, ''),
payload, status, attempt_count, next_attempt_at, occurred_at, created_at, updated_at,
delivered_at, COALESCE(last_error, '')
`, platform, dueBefore, limit)
if err != nil {
return nil, err
}
defer rows.Close()
events := make([]platformevent.Event, 0, limit)
for rows.Next() {
var (
event platformevent.Event
payloadJSON []byte
status string
)
if err := rows.Scan(
&event.ID,
&event.Platform,
&event.EventType,
&event.SessionID,
&event.TicketID,
&event.SourceMessageID,
&payloadJSON,
&status,
&event.AttemptCount,
&event.NextAttemptAt,
&event.OccurredAt,
&event.CreatedAt,
&event.UpdatedAt,
&event.DeliveredAt,
&event.LastError,
); err != nil {
return nil, err
}
event.Status = platformevent.Status(status)
if len(payloadJSON) > 0 {
if err := json.Unmarshal(payloadJSON, &event.Payload); err != nil {
return nil, err
}
}
events = append(events, event)
}
if err := rows.Err(); err != nil {
return nil, err
}
if err := tx.Commit(); err != nil {
return nil, err
}
return events, nil
}
func (s *PlatformEventStore) ReleaseStaleClaims(ctx context.Context, timeout time.Duration) (int, error) {
if s.db == nil {
return 0, fmt.Errorf("db is nil")
}
res, err := s.db.ExecContext(ctx, `
UPDATE cs_platform_event_outbox
SET status = 'retrying', updated_at = NOW()
WHERE status = 'claiming' AND updated_at < NOW() - $1::interval
`, timeout.Seconds())
if err != nil {
return 0, err
}
n, err := res.RowsAffected()
if err != nil {
return 0, err
}
return int(n), nil
}
func (s *PlatformEventStore) MarkDelivered(ctx context.Context, eventID string, deliveredAt time.Time) error {
if s.db == nil {
return fmt.Errorf("db is nil")
}
_, err := s.db.ExecContext(ctx, `
UPDATE cs_platform_event_outbox
SET status = 'delivered', delivered_at = $2, updated_at = $2
WHERE id = $1
`, eventID, deliveredAt)
return err
}
func (s *PlatformEventStore) RecordDeliveryAttempt(ctx context.Context, eventID string, attemptNo int, responseStatus int, responseBody string, errorMessage string) error {
if s.db == nil {
return fmt.Errorf("db is nil")
}
_, err := s.db.ExecContext(ctx, `
INSERT INTO cs_platform_event_delivery_attempts(event_id, attempt_no, response_status, response_body, error_message)
VALUES ($1, $2, NULLIF($3, 0), NULLIF($4, ''), NULLIF($5, ''))
`, eventID, attemptNo, responseStatus, responseBody, errorMessage)
return err
}
func (s *PlatformEventStore) MarkRetry(ctx context.Context, eventID string, attemptCount int, nextAttemptAt time.Time, lastError string) error {
if s.db == nil {
return fmt.Errorf("db is nil")
}
_, err := s.db.ExecContext(ctx, `
UPDATE cs_platform_event_outbox
SET status = 'retrying', attempt_count = $2, next_attempt_at = $3, last_error = NULLIF($4,''), updated_at = NOW()
WHERE id = $1
`, eventID, attemptCount, nextAttemptAt, lastError)
return err
}
func (s *PlatformEventStore) MarkDeadLetter(ctx context.Context, eventID string, attemptCount int, finalError string) error {
if s.db == nil {
return fmt.Errorf("db is nil")
}
tx, err := s.db.BeginTx(ctx, nil)
if err != nil {
return err
}
if _, err := tx.ExecContext(ctx, `
UPDATE cs_platform_event_outbox
SET status = 'dead_letter', attempt_count = $2, last_error = NULLIF($3,''), updated_at = NOW()
WHERE id = $1
`, eventID, attemptCount, finalError); err != nil {
_ = tx.Rollback()
return err
}
if _, err := tx.ExecContext(ctx, `
INSERT INTO cs_platform_event_dead_letters(event_id, platform, event_type, payload, attempt_count, final_error)
SELECT id, platform, event_type, payload, attempt_count, last_error
FROM cs_platform_event_outbox
WHERE id = $1
ON CONFLICT (event_id) DO UPDATE
SET attempt_count = EXCLUDED.attempt_count, final_error = EXCLUDED.final_error, payload = EXCLUDED.payload
`, eventID); err != nil {
_ = tx.Rollback()
return err
}
return tx.Commit()
}