chore: initial public snapshot for github upload
This commit is contained in:
@@ -0,0 +1,368 @@
|
||||
#### Realtime WebRTC Endpoints #####
|
||||
|
||||
import json
|
||||
import time
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
import httpx
|
||||
from fastapi import APIRouter, Depends, HTTPException, Request, Response
|
||||
from fastapi import status as http_status
|
||||
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.proxy._types import ProxyException, UserAPIKeyAuth
|
||||
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
|
||||
from litellm.proxy.common_utils.encrypt_decrypt_utils import (
|
||||
decrypt_value_helper,
|
||||
encrypt_value_helper,
|
||||
)
|
||||
from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
|
||||
from litellm.types.realtime import (
|
||||
RealtimeClientSecretRequest,
|
||||
RealtimeClientSecretResponse,
|
||||
)
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
_REALTIME_TOKEN_VERSION = "realtime_v1"
|
||||
|
||||
|
||||
def _encode_realtime_token_payload(
|
||||
ephemeral_key: str,
|
||||
model_id: str,
|
||||
user_id: Optional[str],
|
||||
team_id: Optional[str],
|
||||
expires_at: Optional[int],
|
||||
) -> str:
|
||||
"""
|
||||
Encode metadata with the upstream ephemeral key so /realtime/calls can
|
||||
route without requiring model as a query param.
|
||||
"""
|
||||
payload: Dict[str, Any] = {
|
||||
"v": _REALTIME_TOKEN_VERSION,
|
||||
"ephemeral_key": ephemeral_key,
|
||||
"model_id": model_id,
|
||||
"user_id": user_id or "",
|
||||
"team_id": team_id or "",
|
||||
"expires_at": expires_at,
|
||||
}
|
||||
return json.dumps(payload, separators=(",", ":"))
|
||||
|
||||
|
||||
def _decode_realtime_token_payload(
|
||||
decrypted_value: str,
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Decode realtime token payload; returns None for legacy/raw ephemeral tokens.
|
||||
"""
|
||||
try:
|
||||
decoded = json.loads(decrypted_value)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
if not isinstance(decoded, dict):
|
||||
return None
|
||||
if decoded.get("v") != _REALTIME_TOKEN_VERSION:
|
||||
return None
|
||||
if not isinstance(decoded.get("ephemeral_key"), str):
|
||||
return None
|
||||
if not isinstance(decoded.get("model_id"), str):
|
||||
return None
|
||||
return decoded
|
||||
|
||||
|
||||
@router.post(
|
||||
"/v1/realtime/client_secrets",
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
tags=["realtime"],
|
||||
)
|
||||
@router.post(
|
||||
"/realtime/client_secrets",
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
tags=["realtime"],
|
||||
)
|
||||
@router.post(
|
||||
"/openai/v1/realtime/client_secrets",
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
tags=["realtime"],
|
||||
)
|
||||
async def create_realtime_client_secret(
|
||||
request: Request,
|
||||
fastapi_response: Response,
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
) -> RealtimeClientSecretResponse:
|
||||
from litellm.proxy.proxy_server import (
|
||||
add_litellm_data_to_request,
|
||||
general_settings,
|
||||
llm_router,
|
||||
proxy_config,
|
||||
proxy_logging_obj,
|
||||
route_request,
|
||||
user_model,
|
||||
version,
|
||||
)
|
||||
|
||||
data: dict = {}
|
||||
try:
|
||||
body = await _read_request_body(request=request)
|
||||
req = RealtimeClientSecretRequest(**body)
|
||||
|
||||
model: str = (
|
||||
(req.session.model if req.session else None)
|
||||
or req.model
|
||||
or "gpt-4o-realtime-preview"
|
||||
)
|
||||
|
||||
data = {"model": model}
|
||||
|
||||
# If session is provided, use it; otherwise create one from model
|
||||
if req.session:
|
||||
data["session"] = req.session.model_dump(exclude_none=True)
|
||||
elif req.model:
|
||||
# User provided model at root level, convert to session format
|
||||
data["session"] = {"type": "realtime", "model": model}
|
||||
|
||||
if req.expires_after:
|
||||
data["expires_after"] = req.expires_after.model_dump(exclude_none=True)
|
||||
|
||||
data = await add_litellm_data_to_request(
|
||||
data=data,
|
||||
request=request,
|
||||
general_settings=general_settings,
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
version=version,
|
||||
proxy_config=proxy_config,
|
||||
)
|
||||
|
||||
data = await proxy_logging_obj.pre_call_hook(
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
data=data,
|
||||
call_type="acreate_realtime_client_secret",
|
||||
)
|
||||
|
||||
verbose_proxy_logger.debug(
|
||||
"WebRTC: /v1/realtime/client_secrets (model=%s)", model
|
||||
)
|
||||
|
||||
llm_call = await route_request(
|
||||
data=data,
|
||||
route_type="acreate_realtime_client_secret",
|
||||
llm_router=llm_router,
|
||||
user_model=user_model,
|
||||
)
|
||||
upstream_resp: httpx.Response = await llm_call # type: ignore
|
||||
|
||||
except Exception as e:
|
||||
await proxy_logging_obj.post_call_failure_hook(
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
original_exception=e,
|
||||
request_data=data,
|
||||
)
|
||||
verbose_proxy_logger.error(
|
||||
"litellm.proxy.realtime_endpoints.webrtc.create_realtime_client_secret(): Exception - %s",
|
||||
str(e),
|
||||
)
|
||||
if isinstance(e, HTTPException):
|
||||
raise ProxyException(
|
||||
message=getattr(e, "message", str(e)),
|
||||
type=getattr(e, "type", "None"),
|
||||
param=getattr(e, "param", "None"),
|
||||
code=getattr(e, "status_code", http_status.HTTP_400_BAD_REQUEST),
|
||||
)
|
||||
raise ProxyException(
|
||||
message=getattr(e, "message", str(e)),
|
||||
type=getattr(e, "type", "None"),
|
||||
param=getattr(e, "param", "None"),
|
||||
code=getattr(e, "status_code", 500),
|
||||
)
|
||||
|
||||
if upstream_resp.status_code != 200:
|
||||
verbose_proxy_logger.error(
|
||||
"WebRTC client_secrets upstream error %s: %s",
|
||||
upstream_resp.status_code,
|
||||
upstream_resp.text,
|
||||
)
|
||||
return Response( # type: ignore[return-value]
|
||||
content=upstream_resp.content,
|
||||
status_code=upstream_resp.status_code,
|
||||
media_type="application/json",
|
||||
)
|
||||
|
||||
upstream_json: dict = upstream_resp.json()
|
||||
|
||||
# Encrypt upstream ephemeral key with routing metadata so /realtime/calls
|
||||
# can recover model without requiring query params.
|
||||
raw_value: str = upstream_json.get("value", "")
|
||||
expires_at = upstream_json.get("expires_at")
|
||||
token_payload = _encode_realtime_token_payload(
|
||||
ephemeral_key=raw_value,
|
||||
model_id=model,
|
||||
user_id=getattr(user_api_key_dict, "user_id", None),
|
||||
team_id=getattr(user_api_key_dict, "team_id", None),
|
||||
expires_at=expires_at if isinstance(expires_at, int) else None,
|
||||
)
|
||||
encrypted_token: str = encrypt_value_helper(token_payload)
|
||||
upstream_json["value"] = encrypted_token
|
||||
|
||||
session_obj: Optional[dict] = upstream_json.get("session")
|
||||
if isinstance(session_obj, dict):
|
||||
cs = session_obj.get("client_secret")
|
||||
if isinstance(cs, dict) and "value" in cs:
|
||||
cs["value"] = encrypted_token
|
||||
upstream_json["session"] = session_obj
|
||||
|
||||
return RealtimeClientSecretResponse(**upstream_json)
|
||||
|
||||
|
||||
@router.post(
|
||||
"/v1/realtime/calls",
|
||||
tags=["realtime"],
|
||||
)
|
||||
@router.post(
|
||||
"/realtime/calls",
|
||||
tags=["realtime"],
|
||||
)
|
||||
@router.post(
|
||||
"/openai/v1/realtime/calls",
|
||||
tags=["realtime"],
|
||||
)
|
||||
async def proxy_realtime_calls(
|
||||
request: Request,
|
||||
fastapi_response: Response,
|
||||
) -> Response:
|
||||
from litellm.proxy.proxy_server import (
|
||||
add_litellm_data_to_request,
|
||||
general_settings,
|
||||
llm_router,
|
||||
proxy_config,
|
||||
proxy_logging_obj,
|
||||
route_request,
|
||||
user_model,
|
||||
version,
|
||||
)
|
||||
|
||||
# Auth: the Bearer token is the encrypted ephemeral key issued by
|
||||
# /realtime/client_secrets, not a standard proxy API key.
|
||||
auth_header: Optional[str] = request.headers.get("Authorization")
|
||||
if not auth_header or not auth_header.startswith("Bearer "):
|
||||
return Response(
|
||||
content=json.dumps({"error": "Missing or invalid Authorization header"}),
|
||||
status_code=http_status.HTTP_401_UNAUTHORIZED,
|
||||
media_type="application/json",
|
||||
)
|
||||
|
||||
encrypted_token = auth_header.removeprefix("Bearer ").strip()
|
||||
decrypted_token_value = decrypt_value_helper(
|
||||
value=encrypted_token,
|
||||
key="realtime_calls_auth",
|
||||
)
|
||||
if not decrypted_token_value:
|
||||
return Response(
|
||||
content=json.dumps({"error": "Invalid or expired token"}),
|
||||
status_code=http_status.HTTP_401_UNAUTHORIZED,
|
||||
media_type="application/json",
|
||||
)
|
||||
|
||||
sdp_body: bytes = await request.body()
|
||||
decoded_payload = _decode_realtime_token_payload(decrypted_token_value)
|
||||
if decoded_payload is not None:
|
||||
# Check token expiry
|
||||
expires_at = decoded_payload.get("expires_at")
|
||||
if expires_at is not None and isinstance(expires_at, int):
|
||||
if time.time() > expires_at:
|
||||
return Response(
|
||||
content=json.dumps({"error": "Token has expired"}),
|
||||
status_code=http_status.HTTP_401_UNAUTHORIZED,
|
||||
media_type="application/json",
|
||||
)
|
||||
|
||||
openai_ephemeral_key = decoded_payload.get("ephemeral_key", "")
|
||||
model = (
|
||||
decoded_payload.get("model_id")
|
||||
or request.query_params.get("model")
|
||||
or "gpt-4o-realtime-preview"
|
||||
)
|
||||
user_id = decoded_payload.get("user_id") or None
|
||||
team_id = decoded_payload.get("team_id") or None
|
||||
else:
|
||||
# Backward compatibility: older tokens contained only encrypted upstream key.
|
||||
openai_ephemeral_key = decrypted_token_value
|
||||
model = request.query_params.get("model", "gpt-4o-realtime-preview")
|
||||
user_id = None
|
||||
team_id = None
|
||||
|
||||
# Build a minimal UserAPIKeyAuth with user/team IDs from the token
|
||||
# so spend tracking and budget enforcement work correctly.
|
||||
minimal_auth = UserAPIKeyAuth(
|
||||
user_id=user_id,
|
||||
team_id=team_id,
|
||||
)
|
||||
|
||||
data: dict = {}
|
||||
try:
|
||||
# Build session config for the multipart form data
|
||||
session_config = {
|
||||
"type": "realtime",
|
||||
"model": model,
|
||||
}
|
||||
|
||||
data = {
|
||||
"model": model,
|
||||
"openai_ephemeral_key": openai_ephemeral_key,
|
||||
"sdp_body": sdp_body,
|
||||
"session": session_config,
|
||||
}
|
||||
|
||||
data = await add_litellm_data_to_request(
|
||||
data=data,
|
||||
request=request,
|
||||
general_settings=general_settings,
|
||||
user_api_key_dict=minimal_auth,
|
||||
version=version,
|
||||
proxy_config=proxy_config,
|
||||
)
|
||||
|
||||
data = await proxy_logging_obj.pre_call_hook(
|
||||
user_api_key_dict=minimal_auth,
|
||||
data=data,
|
||||
call_type="arealtime_calls",
|
||||
)
|
||||
|
||||
verbose_proxy_logger.debug("WebRTC: /v1/realtime/calls (model=%s)", model)
|
||||
|
||||
llm_call = await route_request(
|
||||
data=data,
|
||||
route_type="arealtime_calls",
|
||||
llm_router=llm_router,
|
||||
user_model=user_model,
|
||||
)
|
||||
upstream_resp: httpx.Response = await llm_call # type: ignore
|
||||
|
||||
except Exception as e:
|
||||
await proxy_logging_obj.post_call_failure_hook(
|
||||
user_api_key_dict=minimal_auth,
|
||||
original_exception=e,
|
||||
request_data=data,
|
||||
)
|
||||
verbose_proxy_logger.error(
|
||||
"litellm.proxy.realtime_endpoints.webrtc.proxy_realtime_calls(): Exception - %s",
|
||||
str(e),
|
||||
)
|
||||
if isinstance(e, HTTPException):
|
||||
raise ProxyException(
|
||||
message=getattr(e, "message", str(e)),
|
||||
type=getattr(e, "type", "None"),
|
||||
param=getattr(e, "param", "None"),
|
||||
code=getattr(e, "status_code", http_status.HTTP_400_BAD_REQUEST),
|
||||
)
|
||||
raise ProxyException(
|
||||
message=getattr(e, "message", str(e)),
|
||||
type=getattr(e, "type", "None"),
|
||||
param=getattr(e, "param", "None"),
|
||||
code=getattr(e, "status_code", 500),
|
||||
)
|
||||
|
||||
return Response(
|
||||
content=upstream_resp.content,
|
||||
status_code=upstream_resp.status_code,
|
||||
media_type=upstream_resp.headers.get("content-type", "application/sdp"),
|
||||
)
|
||||
Reference in New Issue
Block a user