From ecdeedb1038eb5fd1630d6d43b5204fe13ec6de3 Mon Sep 17 00:00:00 2001 From: phamnazage-jpg Date: Fri, 29 May 2026 13:17:56 +0800 Subject: [PATCH] feat(routing): add formal chat route endpoint --- internal/app/http_api.go | 6 + internal/app/route_proxy_api.go | 114 +++++++++++++++ internal/app/route_proxy_api_test.go | 200 +++++++++++++++++++++++++++ internal/app/route_resolve_api.go | 8 +- 4 files changed, 325 insertions(+), 3 deletions(-) diff --git a/internal/app/http_api.go b/internal/app/http_api.go index acbef6cb..06305dfc 100644 --- a/internal/app/http_api.go +++ b/internal/app/http_api.go @@ -52,6 +52,7 @@ type ActionSet struct { AppendRouteStickyAudit func(context.Context, AppendRouteStickyAuditRequest) (RouteStickyAuditInfo, error) ListRouteStickyAudit func(context.Context, ListRouteStickyAuditRequest) ([]RouteStickyAuditInfo, error) ResolveRoute func(context.Context, ResolveRouteRequest) (ResolveRouteInfo, error) + RouteChatCompletions func(context.Context, RouteChatCompletionsRequest) (RouteChatCompletionsResult, error) ProxyRouteChatCompletions func(context.Context, ProxyRouteChatCompletionsRequest) (ProxyRouteChatCompletionsResult, error) SetStickyBinding func(context.Context, SetStickyBindingRequest) (StickyBindingInfo, error) GetStickyBinding func(context.Context, GetStickyBindingRequest) (StickyBindingInfo, error) @@ -403,6 +404,9 @@ func NewAPIHandlerWithAuth(adminAuth AdminAuthConfig, actions ActionSet) http.Ha mux.Handle("POST /api/routing/resolve", requireAdminAccess(adminAuth, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { handleResolveRoute(w, r, actions.ResolveRoute) }))) + mux.Handle("POST /api/routing/chat/completions", requireAdminAccess(adminAuth, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + handleRouteChatCompletions(w, r, actions.RouteChatCompletions) + }))) mux.Handle("POST /api/routing/proxy/chat/completions", requireAdminAccess(adminAuth, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { handleProxyRouteChatCompletions(w, r, actions.ProxyRouteChatCompletions) }))) @@ -1235,6 +1239,7 @@ func NewActionSetWithStickyRuntime(sqliteDSN string, stickyRuntime stickyStoreRu routeLogWriter := newLazyRouteLogWriter(sqliteDSN) resolveRoute := buildResolveRouteAction(sqliteDSN, stickyRuntime, routeLogWriter) proxyRouteChatCompletions := buildProxyRouteChatCompletionsAction(sqliteDSN, resolveRoute, routeLogWriter) + routeChatCompletions := buildRouteChatCompletionsAction(proxyRouteChatCompletions) return ActionSet{ CreateBatchImportRun: buildCreateBatchImportRunAction(sqliteDSN), ListBatchImportRuns: buildListBatchImportRunsAction(sqliteDSN), @@ -1262,6 +1267,7 @@ func NewActionSetWithStickyRuntime(sqliteDSN string, stickyRuntime stickyStoreRu AppendRouteStickyAudit: buildAppendRouteStickyAuditAction(routeLogWriter, sqliteDSN), ListRouteStickyAudit: buildListRouteStickyAuditAction(sqliteDSN), ResolveRoute: resolveRoute, + RouteChatCompletions: routeChatCompletions, ProxyRouteChatCompletions: proxyRouteChatCompletions, SetStickyBinding: buildSetStickyBindingAction(stickyRuntime), GetStickyBinding: buildGetStickyBindingAction(stickyRuntime), diff --git a/internal/app/route_proxy_api.go b/internal/app/route_proxy_api.go index d6fa9c6b..4cbf20d5 100644 --- a/internal/app/route_proxy_api.go +++ b/internal/app/route_proxy_api.go @@ -40,6 +40,22 @@ type ProxyRouteChatCompletionsRequest struct { Sync bool `json:"sync,omitempty"` } +type RouteChatCompletionsRequest struct { + RequestID string `json:"request_id,omitempty"` + LogicalGroupID string `json:"logical_group_id"` + Model string `json:"model"` + Scope string `json:"scope"` + SubjectID string `json:"subject_id"` + UserKey string `json:"user_key,omitempty"` + ConversationKey string `json:"conversation_key,omitempty"` + GatewayAPIKey string `json:"gateway_api_key,omitempty"` + SubscriptionUserID string `json:"subscription_user_id,omitempty"` + Messages []ChatCompletionMessage `json:"messages,omitempty"` + MaxTokens int `json:"max_tokens,omitempty"` + Temperature *float64 `json:"temperature,omitempty"` + Sync bool `json:"sync,omitempty"` +} + type ChatCompletionMessage struct { Role string `json:"role"` Content string `json:"content"` @@ -68,6 +84,33 @@ type RouteChatCompletionsForwardInfo struct { Response any `json:"response,omitempty"` } +type RouteChatCompletionsResult struct { + RequestID string `json:"request_id"` + Backend string `json:"backend"` + LogicalGroupID string `json:"logical_group_id"` + Model string `json:"model"` + Scope string `json:"scope"` + SubjectID string `json:"subject_id"` + StickyKey string `json:"sticky_key"` + StickyHit bool `json:"sticky_hit"` + StickyAction string `json:"sticky_action"` + FallbackUsed bool `json:"fallback_used,omitempty"` + SelectedRoute RouteChatCompletionsRouteInfo `json:"selected_route"` + Forward RouteChatCompletionsForwardInfo `json:"forward"` +} + +type RouteChatCompletionsRouteInfo struct { + RouteID string `json:"route_id"` + RouteName string `json:"route_name,omitempty"` + ShadowHostID string `json:"shadow_host_id"` + ShadowGroupID string `json:"shadow_group_id"` + ShadowModel string `json:"shadow_model,omitempty"` + Priority int `json:"priority"` + Weight int `json:"weight"` + BoundAt string `json:"bound_at,omitempty"` + ExpiresAt string `json:"expires_at,omitempty"` +} + func handleProxyRouteChatCompletions(w http.ResponseWriter, r *http.Request, fn func(context.Context, ProxyRouteChatCompletionsRequest) (ProxyRouteChatCompletionsResult, error)) { if fn == nil { writeHTTPError(w, &httpError{StatusCode: http.StatusInternalServerError, Code: "server_misconfigured", Message: "proxy-route-chat-completions action is not configured"}) @@ -86,6 +129,24 @@ func handleProxyRouteChatCompletions(w http.ResponseWriter, r *http.Request, fn writeJSON(w, http.StatusOK, result) } +func handleRouteChatCompletions(w http.ResponseWriter, r *http.Request, fn func(context.Context, RouteChatCompletionsRequest) (RouteChatCompletionsResult, error)) { + if fn == nil { + writeHTTPError(w, &httpError{StatusCode: http.StatusInternalServerError, Code: "server_misconfigured", Message: "route-chat-completions action is not configured"}) + return + } + var req RouteChatCompletionsRequest + if err := decodeJSON(r, &req); err != nil { + writeHTTPError(w, err) + return + } + result, err := fn(r.Context(), req) + if err != nil { + writeHTTPError(w, classifyError(err)) + return + } + writeJSON(w, http.StatusOK, result) +} + func buildProxyRouteChatCompletionsAction( sqliteDSN string, resolveRoute func(context.Context, ResolveRouteRequest) (ResolveRouteInfo, error), @@ -166,6 +227,59 @@ func buildProxyRouteChatCompletionsAction( } } +func buildRouteChatCompletionsAction( + proxyRouteChatCompletions func(context.Context, ProxyRouteChatCompletionsRequest) (ProxyRouteChatCompletionsResult, error), +) func(context.Context, RouteChatCompletionsRequest) (RouteChatCompletionsResult, error) { + return func(ctx context.Context, req RouteChatCompletionsRequest) (RouteChatCompletionsResult, error) { + result, err := proxyRouteChatCompletions(ctx, ProxyRouteChatCompletionsRequest{ + RequestID: req.RequestID, + LogicalGroupID: req.LogicalGroupID, + PublicModel: req.Model, + Scope: req.Scope, + SubjectID: req.SubjectID, + UserKey: req.UserKey, + ConversationKey: req.ConversationKey, + GatewayAPIKey: req.GatewayAPIKey, + SubscriptionUserID: req.SubscriptionUserID, + Messages: req.Messages, + MaxTokens: req.MaxTokens, + Temperature: req.Temperature, + Sync: req.Sync, + }) + if err != nil { + return RouteChatCompletionsResult{}, err + } + return routeChatCompletionsResultFromProxy(result), nil + } +} + +func routeChatCompletionsResultFromProxy(result ProxyRouteChatCompletionsResult) RouteChatCompletionsResult { + return RouteChatCompletionsResult{ + RequestID: result.Resolve.RequestID, + Backend: result.Resolve.Backend, + LogicalGroupID: result.Resolve.LogicalGroupID, + Model: result.Resolve.PublicModel, + Scope: result.Resolve.Scope, + SubjectID: result.Resolve.SubjectID, + StickyKey: result.Resolve.StickyKey, + StickyHit: result.Resolve.StickyHit, + StickyAction: result.Resolve.StickyAction, + FallbackUsed: result.Resolve.FallbackUsed, + SelectedRoute: RouteChatCompletionsRouteInfo{ + RouteID: result.Resolve.RouteID, + RouteName: result.Resolve.RouteName, + ShadowHostID: result.Resolve.ShadowHostID, + ShadowGroupID: result.Resolve.ShadowGroupID, + ShadowModel: result.Resolve.ShadowModel, + Priority: result.Resolve.Priority, + Weight: result.Resolve.Weight, + BoundAt: result.Resolve.BoundAt, + ExpiresAt: result.Resolve.ExpiresAt, + }, + Forward: result.Forward, + } +} + func appendProxyRouteDecisionLog( ctx context.Context, writerSource *lazyRouteLogWriter, diff --git a/internal/app/route_proxy_api_test.go b/internal/app/route_proxy_api_test.go index 27eb313f..11e9b664 100644 --- a/internal/app/route_proxy_api_test.go +++ b/internal/app/route_proxy_api_test.go @@ -70,6 +70,68 @@ func TestAPIProxyRouteChatCompletionsReturnsResolveAndForward(t *testing.T) { assertJSONContains(t, response.Body().Bytes(), "forward.upstream_status", float64(200)) } +func TestAPIRouteChatCompletionsReturnsFormalResult(t *testing.T) { + t.Parallel() + + handler := NewAPIHandler("secret-token", ActionSet{ + RouteChatCompletions: func(_ context.Context, req RouteChatCompletionsRequest) (RouteChatCompletionsResult, error) { + if req.LogicalGroupID != "gpt-shared" { + t.Fatalf("LogicalGroupID = %q, want gpt-shared", req.LogicalGroupID) + } + if req.Model != "gpt-5.4" { + t.Fatalf("Model = %q, want gpt-5.4", req.Model) + } + return RouteChatCompletionsResult{ + RequestID: "req-route-1", + Backend: "memory", + LogicalGroupID: req.LogicalGroupID, + Model: req.Model, + Scope: req.Scope, + SubjectID: req.SubjectID, + StickyKey: "lg:gpt-shared:m:gpt-5.4:conv:conv-1", + StickyHit: false, + StickyAction: "bind", + FallbackUsed: true, + SelectedRoute: RouteChatCompletionsRouteInfo{ + RouteID: "asxs", + RouteName: "ASXS", + ShadowHostID: "remote43", + ShadowGroupID: "9", + ShadowModel: "gpt-5.4", + Priority: 10, + Weight: 100, + }, + Forward: RouteChatCompletionsForwardInfo{ + OK: true, + HostID: "remote43", + HostBaseURL: "https://sub2api.example.com", + ShadowGroupID: "9", + ShadowModel: "gpt-5.4", + UpstreamPath: "/v1/chat/completions", + UpstreamStatus: 200, + LatencyMS: 12, + ContentType: "application/json", + }, + }, nil + }, + }) + + request := httptestRequest(t, http.MethodPost, "/api/routing/chat/completions", map[string]any{ + "logical_group_id": "gpt-shared", + "model": "gpt-5.4", + "scope": "conversation", + "subject_id": "conv-1", + "gateway_api_key": "gateway-key", + "sync": true, + }, "secret-token") + response := httptestRecorder(handler, request) + assertStatusCode(t, response, http.StatusOK) + assertJSONContains(t, response.Body().Bytes(), "selected_route.route_id", "asxs") + assertJSONContains(t, response.Body().Bytes(), "selected_route.shadow_model", "gpt-5.4") + assertJSONContains(t, response.Body().Bytes(), "fallback_used", true) + assertJSONContains(t, response.Body().Bytes(), "forward.upstream_status", float64(200)) +} + func TestNewActionSetProxyRouteChatCompletionsFlow(t *testing.T) { t.Parallel() @@ -225,6 +287,144 @@ func TestNewActionSetProxyRouteChatCompletionsFlow(t *testing.T) { } } +func TestNewActionSetRouteChatCompletionsFlow(t *testing.T) { + t.Parallel() + + var ( + gotAuthHeader string + gotModel string + gotPrompt string + ) + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/v1/chat/completions" { + t.Fatalf("URL.Path = %q, want /v1/chat/completions", r.URL.Path) + } + gotAuthHeader = r.Header.Get("Authorization") + + var payload struct { + Model string `json:"model"` + Messages []struct { + Role string `json:"role"` + Content string `json:"content"` + } `json:"messages"` + } + if err := json.NewDecoder(r.Body).Decode(&payload); err != nil { + t.Fatalf("json.Decode() error = %v", err) + } + gotModel = payload.Model + if len(payload.Messages) > 0 { + gotPrompt = payload.Messages[0].Content + } + + writeJSON(w, http.StatusOK, map[string]any{ + "id": "chatcmpl_route", + "object": "chat.completion", + "choices": []map[string]any{ + { + "index": 0, + "message": map[string]any{ + "role": "assistant", + "content": "pong-route", + }, + }, + }, + }) + })) + defer server.Close() + + dsn := "file:" + filepath.ToSlash(filepath.Join(t.TempDir(), "route-formal.db")) + "?_busy_timeout=5000" + actions := NewActionSet(dsn) + ctx := context.Background() + + store, err := sqlite.Open(ctx, dsn) + if err != nil { + t.Fatalf("sqlite.Open() error = %v", err) + } + defer store.Close() + + if _, err := store.Hosts().Create(ctx, sqlite.Host{ + HostID: "remote43", + BaseURL: server.URL, + HostVersion: "0.1.126", + AuthType: "apikey", + AuthToken: "host-admin-token", + }); err != nil { + t.Fatalf("Hosts().Create() error = %v", err) + } + if _, err := actions.CreateLogicalGroup(ctx, CreateLogicalGroupRequest{ + LogicalGroupID: "gpt-shared", + DisplayName: "GPT Shared", + Status: "active", + RoutePolicy: "priority", + StickyMode: "conversation_preferred", + ConversationTTLSeconds: 1200, + UserModelTTLSeconds: 600, + FailoverThreshold: 2, + CooldownSeconds: 300, + }); err != nil { + t.Fatalf("CreateLogicalGroup() error = %v", err) + } + if _, err := actions.CreateLogicalGroupModel(ctx, CreateLogicalGroupModelRequest{ + LogicalGroupID: "gpt-shared", + PublicModel: "gpt-5.4", + Status: "active", + }); err != nil { + t.Fatalf("CreateLogicalGroupModel() error = %v", err) + } + if _, err := actions.CreateLogicalGroupRoute(ctx, CreateLogicalGroupRouteRequest{ + LogicalGroupID: "gpt-shared", + RouteID: "asxs", + Name: "ASXS", + Status: "active", + Priority: 10, + ShadowGroupID: "gpt-shared__asxs", + ShadowHostID: "remote43", + UpstreamBaseURLHint: "https://api.asxs.top/v1", + }); err != nil { + t.Fatalf("CreateLogicalGroupRoute() error = %v", err) + } + if _, err := actions.CreateLogicalGroupRouteModel(ctx, CreateLogicalGroupRouteModelRequest{ + LogicalGroupID: "gpt-shared", + RouteID: "asxs", + PublicModel: "gpt-5.4", + ShadowModel: "gpt-5.4", + Status: "active", + }); err != nil { + t.Fatalf("CreateLogicalGroupRouteModel() error = %v", err) + } + + result, err := actions.RouteChatCompletions(ctx, RouteChatCompletionsRequest{ + RequestID: "req-route-1", + LogicalGroupID: "gpt-shared", + Model: "gpt-5.4", + Scope: "conversation", + SubjectID: "conv-1", + GatewayAPIKey: "gateway-key", + Sync: true, + }) + if err != nil { + t.Fatalf("RouteChatCompletions() error = %v", err) + } + if gotAuthHeader != "Bearer gateway-key" { + t.Fatalf("Authorization header = %q, want Bearer gateway-key", gotAuthHeader) + } + if gotModel != "gpt-5.4" { + t.Fatalf("forwarded model = %q, want gpt-5.4", gotModel) + } + if gotPrompt != "ping" { + t.Fatalf("forwarded prompt = %q, want ping", gotPrompt) + } + if result.SelectedRoute.RouteID != "asxs" || result.SelectedRoute.ShadowModel != "gpt-5.4" { + t.Fatalf("SelectedRoute = %+v, want asxs route with shadow model gpt-5.4", result.SelectedRoute) + } + if !result.Forward.OK || result.Forward.UpstreamStatus != http.StatusOK { + t.Fatalf("Forward = %+v, want successful 200 forward", result.Forward) + } + if result.Model != "gpt-5.4" || result.RequestID != "req-route-1" { + t.Fatalf("RouteChatCompletions() = %+v, want model gpt-5.4 and request id req-route-1", result) + } +} + func TestNewActionSetProxyRouteChatCompletionsManagedSubscriptionFlow(t *testing.T) { t.Parallel() diff --git a/internal/app/route_resolve_api.go b/internal/app/route_resolve_api.go index 37f8b7a7..9affbbae 100644 --- a/internal/app/route_resolve_api.go +++ b/internal/app/route_resolve_api.go @@ -32,6 +32,7 @@ type ResolveRouteInfo struct { StickyKey string `json:"sticky_key"` StickyHit bool `json:"sticky_hit"` StickyAction string `json:"sticky_action"` + FallbackUsed bool `json:"fallback_used,omitempty"` RouteID string `json:"route_id"` RouteName string `json:"route_name,omitempty"` ShadowGroupID string `json:"shadow_group_id"` @@ -116,7 +117,7 @@ func buildResolveRouteAction(sqliteDSN string, stickyRuntime stickyStoreRuntime, return ResolveRouteInfo{}, err } } else { - info := resolveRouteInfoFromBinding(stickyRuntime.backend, stickyKey, req.Scope, req.SubjectID, candidate, binding, requestID, true, "hit") + info := resolveRouteInfoFromBinding(stickyRuntime.backend, stickyKey, req.Scope, req.SubjectID, candidate, binding, requestID, true, "hit", false) if err := writer.AppendStickyAudit(ctx, routing.RouteStickyAuditEvent{ StickyKey: stickyKey, StickyKeyType: req.Scope, @@ -222,7 +223,7 @@ func buildResolveRouteAction(sqliteDSN string, stickyRuntime stickyStoreRuntime, return ResolveRouteInfo{}, err } } - return resolveRouteInfoFromBinding(stickyRuntime.backend, stickyKey, req.Scope, req.SubjectID, candidate, stored, requestID, false, "bind"), nil + return resolveRouteInfoFromBinding(stickyRuntime.backend, stickyKey, req.Scope, req.SubjectID, candidate, stored, requestID, false, "bind", selection.fallbackUsed), nil } } @@ -399,7 +400,7 @@ func resolveStickyTTL(group sqlite.LogicalGroup, scope string) (time.Duration, e } } -func resolveRouteInfoFromBinding(backend string, stickyKey string, scope string, subjectID string, candidate resolvedRouteCandidate, binding routing.StickyBinding, requestID string, stickyHit bool, stickyAction string) ResolveRouteInfo { +func resolveRouteInfoFromBinding(backend string, stickyKey string, scope string, subjectID string, candidate resolvedRouteCandidate, binding routing.StickyBinding, requestID string, stickyHit bool, stickyAction string, fallbackUsed bool) ResolveRouteInfo { return ResolveRouteInfo{ RequestID: requestID, Backend: backend, @@ -410,6 +411,7 @@ func resolveRouteInfoFromBinding(backend string, stickyKey string, scope string, StickyKey: stickyKey, StickyHit: stickyHit, StickyAction: stickyAction, + FallbackUsed: fallbackUsed, RouteID: candidate.route.RouteID, RouteName: candidate.route.Name, ShadowGroupID: candidate.route.ShadowGroupID,