chore: initial snapshot for gitea/github upload
This commit is contained in:
@@ -0,0 +1,84 @@
|
||||
"""
|
||||
Tracks the number of HTTP requests currently in-flight on this uvicorn worker.
|
||||
|
||||
Used by /health/backlog to expose per-pod queue depth, and emitted as the
|
||||
Prometheus gauge `litellm_in_flight_requests`.
|
||||
"""
|
||||
|
||||
import os
|
||||
from typing import Any, Optional
|
||||
|
||||
from starlette.types import ASGIApp, Receive, Scope, Send
|
||||
|
||||
|
||||
class InFlightRequestsMiddleware:
|
||||
"""
|
||||
ASGI middleware that increments a counter when a request arrives and
|
||||
decrements it when the response is sent (or an error occurs).
|
||||
|
||||
The counter is class-level and therefore scoped to a single uvicorn worker
|
||||
process — exactly the per-pod granularity we want.
|
||||
|
||||
Also updates the `litellm_in_flight_requests` Prometheus gauge if
|
||||
prometheus_client is installed. The gauge is lazily initialised on the
|
||||
first request so that PROMETHEUS_MULTIPROC_DIR is already set by the time
|
||||
we register the metric. Initialisation is attempted only once — if
|
||||
prometheus_client is absent the class remembers and never retries.
|
||||
"""
|
||||
|
||||
_in_flight: int = 0
|
||||
_gauge: Optional[Any] = None
|
||||
_gauge_init_attempted: bool = False
|
||||
|
||||
def __init__(self, app: ASGIApp) -> None:
|
||||
self.app = app
|
||||
|
||||
async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None:
|
||||
if scope["type"] != "http":
|
||||
await self.app(scope, receive, send)
|
||||
return
|
||||
|
||||
InFlightRequestsMiddleware._in_flight += 1
|
||||
gauge = InFlightRequestsMiddleware._get_gauge()
|
||||
if gauge is not None:
|
||||
gauge.inc() # type: ignore
|
||||
try:
|
||||
await self.app(scope, receive, send)
|
||||
finally:
|
||||
InFlightRequestsMiddleware._in_flight -= 1
|
||||
if gauge is not None:
|
||||
gauge.dec() # type: ignore
|
||||
|
||||
@staticmethod
|
||||
def get_count() -> int:
|
||||
"""Return the number of HTTP requests currently in-flight."""
|
||||
return InFlightRequestsMiddleware._in_flight
|
||||
|
||||
@staticmethod
|
||||
def _get_gauge() -> Optional[Any]:
|
||||
if InFlightRequestsMiddleware._gauge_init_attempted:
|
||||
return InFlightRequestsMiddleware._gauge
|
||||
InFlightRequestsMiddleware._gauge_init_attempted = True
|
||||
try:
|
||||
from prometheus_client import Gauge
|
||||
|
||||
if "PROMETHEUS_MULTIPROC_DIR" in os.environ:
|
||||
# livesum aggregates across all worker processes in the scrape response
|
||||
InFlightRequestsMiddleware._gauge = Gauge(
|
||||
"litellm_in_flight_requests",
|
||||
"Number of HTTP requests currently in-flight on this uvicorn worker",
|
||||
multiprocess_mode="livesum",
|
||||
)
|
||||
else:
|
||||
InFlightRequestsMiddleware._gauge = Gauge(
|
||||
"litellm_in_flight_requests",
|
||||
"Number of HTTP requests currently in-flight on this uvicorn worker",
|
||||
)
|
||||
except Exception:
|
||||
InFlightRequestsMiddleware._gauge = None
|
||||
return InFlightRequestsMiddleware._gauge
|
||||
|
||||
|
||||
def get_in_flight_requests() -> int:
|
||||
"""Module-level convenience wrapper used by the /health/backlog endpoint."""
|
||||
return InFlightRequestsMiddleware.get_count()
|
||||
@@ -0,0 +1,73 @@
|
||||
"""
|
||||
Prometheus Auth Middleware - Pure ASGI implementation
|
||||
"""
|
||||
import json
|
||||
|
||||
from fastapi import Request
|
||||
from starlette.types import ASGIApp, Receive, Scope, Send
|
||||
|
||||
import litellm
|
||||
from litellm.proxy._types import SpecialHeaders
|
||||
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
|
||||
|
||||
# Cache the header name at module level to avoid repeated enum attribute access
|
||||
_AUTHORIZATION_HEADER = SpecialHeaders.openai_authorization.value # "Authorization"
|
||||
|
||||
|
||||
class PrometheusAuthMiddleware:
|
||||
"""
|
||||
Middleware to authenticate requests to the metrics endpoint.
|
||||
|
||||
By default, auth is not run on the metrics endpoint.
|
||||
|
||||
Enabled by setting the following in proxy_config.yaml:
|
||||
|
||||
```yaml
|
||||
litellm_settings:
|
||||
require_auth_for_metrics_endpoint: true
|
||||
```
|
||||
"""
|
||||
|
||||
def __init__(self, app: ASGIApp) -> None:
|
||||
self.app = app
|
||||
|
||||
async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None:
|
||||
# Fast path: only inspect HTTP requests; pass through websocket/lifespan immediately
|
||||
if scope["type"] != "http" or "/metrics" not in scope.get("path", ""):
|
||||
await self.app(scope, receive, send)
|
||||
return
|
||||
|
||||
# Only run auth if configured to do so
|
||||
if litellm.require_auth_for_metrics_endpoint is True:
|
||||
# Construct Request only when auth is actually needed
|
||||
request = Request(scope, receive)
|
||||
api_key = request.headers.get(_AUTHORIZATION_HEADER) or ""
|
||||
|
||||
try:
|
||||
await user_api_key_auth(request=request, api_key=api_key)
|
||||
except Exception as e:
|
||||
# Send 401 response directly via ASGI protocol
|
||||
error_message = getattr(e, "message", str(e))
|
||||
body = json.dumps(
|
||||
f"Unauthorized access to metrics endpoint: {error_message}"
|
||||
).encode("utf-8")
|
||||
await send(
|
||||
{
|
||||
"type": "http.response.start",
|
||||
"status": 401,
|
||||
"headers": [
|
||||
[b"content-type", b"application/json"],
|
||||
[b"content-length", str(len(body)).encode("ascii")],
|
||||
],
|
||||
}
|
||||
)
|
||||
await send(
|
||||
{
|
||||
"type": "http.response.body",
|
||||
"body": body,
|
||||
}
|
||||
)
|
||||
return
|
||||
|
||||
# Pass through to the inner application
|
||||
await self.app(scope, receive, send)
|
||||
Reference in New Issue
Block a user