feat(outbox): implement concurrent claim mechanism with UPDATE RETURNING + SKIP LOCKED

- Add migration 0004 to introduce 'claiming' status and timeout index
- Add StatusClaiming to platformevent domain and allow it in Validate()
- Rewrite ListDue as transactional UPDATE ... RETURNING with FOR UPDATE SKIP LOCKED
- Add ReleaseStaleClaims to reset expired claiming events back to retrying
- Worker Start() now runs a 30s ticker for stale claim recovery (5m timeout)
- Update stubEventStore in tests to satisfy new EventStore interface

Refs: D-02
This commit is contained in:
Your Name
2026-05-11 13:16:28 +08:00
parent 771304eabe
commit 34b175b130
5 changed files with 86 additions and 9 deletions

View File

@@ -20,6 +20,7 @@ type EventStore interface {
RecordDeliveryAttempt(ctx context.Context, eventID string, attemptNo int, responseStatus int, responseBody string, errorMessage string) error
MarkRetry(ctx context.Context, eventID string, attemptCount int, nextAttemptAt time.Time, lastError string) error
MarkDeadLetter(ctx context.Context, eventID string, attemptCount int, finalError string) error
ReleaseStaleClaims(ctx context.Context, timeout time.Duration) (int, error)
}
type Worker struct {
@@ -31,6 +32,7 @@ type Worker struct {
MaxRetries int
BatchSize int
PollInterval time.Duration
ClaimTimeout time.Duration
RetrySchedule []time.Duration
Now func() time.Time
Logger *slog.Logger
@@ -52,6 +54,7 @@ func NewWorker(platform, callbackURL string, store EventStore, client *http.Clie
MaxRetries: maxRetries,
BatchSize: 20,
PollInterval: 5 * time.Second,
ClaimTimeout: 5 * time.Minute,
RetrySchedule: []time.Duration{10 * time.Second, 30 * time.Second, 60 * time.Second, 5 * time.Minute, 15 * time.Minute},
Now: time.Now,
}
@@ -63,6 +66,8 @@ func (w *Worker) Start(ctx context.Context) {
}
ticker := time.NewTicker(w.pollInterval())
defer ticker.Stop()
claimTicker := time.NewTicker(30 * time.Second)
defer claimTicker.Stop()
for {
select {
case <-ctx.Done():
@@ -77,6 +82,16 @@ func (w *Worker) Start(ctx context.Context) {
return
case <-ticker.C:
}
select {
case <-ctx.Done():
return
case <-claimTicker.C:
if w.Store != nil {
if _, err := w.Store.ReleaseStaleClaims(ctx, w.claimTimeout()); err != nil && w.Logger != nil {
w.Logger.Error("release stale claims failed", "platform", w.Platform, "error", err.Error())
}
}
}
}
}
@@ -169,6 +184,13 @@ func (w *Worker) pollInterval() time.Duration {
return w.PollInterval
}
func (w *Worker) claimTimeout() time.Duration {
if w.ClaimTimeout <= 0 {
return 5 * time.Minute
}
return w.ClaimTimeout
}
func (w *Worker) now() time.Time {
if w.Now == nil {
return time.Now()