chore: initial snapshot for gitea/github upload

2026-03-26 16:04:46 +08:00
commit a699a1ac98
3497 changed files with 1586237 additions and 0 deletions
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/proxy/middleware/in_flight_requests_middleware.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/proxy/middleware/in_flight_requests_middleware.py
@@ -0,0 +1,84 @@
+"""
+Tracks the number of HTTP requests currently in-flight on this uvicorn worker.
+
+Used by /health/backlog to expose per-pod queue depth, and emitted as the
+Prometheus gauge `litellm_in_flight_requests`.
+"""
+
+import os
+from typing import Any, Optional
+
+from starlette.types import ASGIApp, Receive, Scope, Send
+
+
+class InFlightRequestsMiddleware:
+    """
+    ASGI middleware that increments a counter when a request arrives and
+    decrements it when the response is sent (or an error occurs).
+
+    The counter is class-level and therefore scoped to a single uvicorn worker
+    process — exactly the per-pod granularity we want.
+
+    Also updates the `litellm_in_flight_requests` Prometheus gauge if
+    prometheus_client is installed. The gauge is lazily initialised on the
+    first request so that PROMETHEUS_MULTIPROC_DIR is already set by the time
+    we register the metric. Initialisation is attempted only once — if
+    prometheus_client is absent the class remembers and never retries.
+    """
+
+    _in_flight: int = 0
+    _gauge: Optional[Any] = None
+    _gauge_init_attempted: bool = False
+
+    def __init__(self, app: ASGIApp) -> None:
+        self.app = app
+
+    async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None:
+        if scope["type"] != "http":
+            await self.app(scope, receive, send)
+            return
+
+        InFlightRequestsMiddleware._in_flight += 1
+        gauge = InFlightRequestsMiddleware._get_gauge()
+        if gauge is not None:
+            gauge.inc()  # type: ignore
+        try:
+            await self.app(scope, receive, send)
+        finally:
+            InFlightRequestsMiddleware._in_flight -= 1
+            if gauge is not None:
+                gauge.dec()  # type: ignore
+
+    @staticmethod
+    def get_count() -> int:
+        """Return the number of HTTP requests currently in-flight."""
+        return InFlightRequestsMiddleware._in_flight
+
+    @staticmethod
+    def _get_gauge() -> Optional[Any]:
+        if InFlightRequestsMiddleware._gauge_init_attempted:
+            return InFlightRequestsMiddleware._gauge
+        InFlightRequestsMiddleware._gauge_init_attempted = True
+        try:
+            from prometheus_client import Gauge
+
+            if "PROMETHEUS_MULTIPROC_DIR" in os.environ:
+                # livesum aggregates across all worker processes in the scrape response
+                InFlightRequestsMiddleware._gauge = Gauge(
+                    "litellm_in_flight_requests",
+                    "Number of HTTP requests currently in-flight on this uvicorn worker",
+                    multiprocess_mode="livesum",
+                )
+            else:
+                InFlightRequestsMiddleware._gauge = Gauge(
+                    "litellm_in_flight_requests",
+                    "Number of HTTP requests currently in-flight on this uvicorn worker",
+                )
+        except Exception:
+            InFlightRequestsMiddleware._gauge = None
+        return InFlightRequestsMiddleware._gauge
+
+
+def get_in_flight_requests() -> int:
+    """Module-level convenience wrapper used by the /health/backlog endpoint."""
+    return InFlightRequestsMiddleware.get_count()
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/proxy/middleware/prometheus_auth_middleware.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/proxy/middleware/prometheus_auth_middleware.py
@@ -0,0 +1,73 @@
+"""
+Prometheus Auth Middleware - Pure ASGI implementation
+"""
+import json
+
+from fastapi import Request
+from starlette.types import ASGIApp, Receive, Scope, Send
+
+import litellm
+from litellm.proxy._types import SpecialHeaders
+from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
+
+# Cache the header name at module level to avoid repeated enum attribute access
+_AUTHORIZATION_HEADER = SpecialHeaders.openai_authorization.value  # "Authorization"
+
+
+class PrometheusAuthMiddleware:
+    """
+    Middleware to authenticate requests to the metrics endpoint.
+
+    By default, auth is not run on the metrics endpoint.
+
+    Enabled by setting the following in proxy_config.yaml:
+
+    ```yaml
+    litellm_settings:
+        require_auth_for_metrics_endpoint: true
+    ```
+    """
+
+    def __init__(self, app: ASGIApp) -> None:
+        self.app = app
+
+    async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None:
+        # Fast path: only inspect HTTP requests; pass through websocket/lifespan immediately
+        if scope["type"] != "http" or "/metrics" not in scope.get("path", ""):
+            await self.app(scope, receive, send)
+            return
+
+        # Only run auth if configured to do so
+        if litellm.require_auth_for_metrics_endpoint is True:
+            # Construct Request only when auth is actually needed
+            request = Request(scope, receive)
+            api_key = request.headers.get(_AUTHORIZATION_HEADER) or ""
+
+            try:
+                await user_api_key_auth(request=request, api_key=api_key)
+            except Exception as e:
+                # Send 401 response directly via ASGI protocol
+                error_message = getattr(e, "message", str(e))
+                body = json.dumps(
+                    f"Unauthorized access to metrics endpoint: {error_message}"
+                ).encode("utf-8")
+                await send(
+                    {
+                        "type": "http.response.start",
+                        "status": 401,
+                        "headers": [
+                            [b"content-type", b"application/json"],
+                            [b"content-length", str(len(body)).encode("ascii")],
+                        ],
+                    }
+                )
+                await send(
+                    {
+                        "type": "http.response.body",
+                        "body": body,
+                    }
+                )
+                return
+
+        # Pass through to the inner application
+        await self.app(scope, receive, send)