chore: initial public snapshot for github upload

This commit is contained in:
Your Name
2026-03-26 20:06:14 +08:00
commit 0e5ecd930e
3497 changed files with 1586236 additions and 0 deletions

View File

@@ -0,0 +1,9 @@
"""
Usage endpoints package.
Re-exports the router from endpoints module.
"""
from litellm.proxy.management_endpoints.usage_endpoints.endpoints import ( # noqa: F401
router,
)

View File

@@ -0,0 +1,578 @@
"""
AI Usage Chat - uses LLM tool calling to answer questions about
usage/spend data by querying the aggregated daily activity endpoints.
"""
import json
from datetime import date
from typing import Any, AsyncIterator, Callable, Dict, List, Literal, Optional, cast
from typing_extensions import TypedDict
import litellm
from litellm._logging import verbose_proxy_logger
from litellm.constants import DEFAULT_COMPETITOR_DISCOVERY_MODEL
from litellm.types.proxy.management_endpoints.common_daily_activity import (
SpendAnalyticsPaginatedResponse,
)
# ---------------------------------------------------------------------------
# Constants
# ---------------------------------------------------------------------------
USAGE_AI_TEMPERATURE = 0.2
TABLE_DAILY_USER_SPEND = "litellm_dailyuserspend"
TABLE_DAILY_TEAM_SPEND = "litellm_dailyteamspend"
TABLE_DAILY_TAG_SPEND = "litellm_dailytagspend"
ENTITY_FIELD_USER = "user_id"
ENTITY_FIELD_TEAM = "team_id"
ENTITY_FIELD_TAG = "tag"
PAGINATED_PAGE_SIZE = 200
MAX_CHAT_MESSAGES = 20
TOP_N_MODELS = 15
TOP_N_PROVIDERS = 10
TOP_N_KEYS = 10
# ---------------------------------------------------------------------------
# Types
# ---------------------------------------------------------------------------
class SSEStatusEvent(TypedDict):
type: Literal["status"]
message: str
class SSEToolCallEvent(TypedDict, total=False):
type: Literal["tool_call"]
tool_name: str
tool_label: str
arguments: Dict[str, str]
status: Literal["running", "complete", "error"]
error: str
class SSEChunkEvent(TypedDict):
type: Literal["chunk"]
content: str
class SSEDoneEvent(TypedDict):
type: Literal["done"]
class SSEErrorEvent(TypedDict):
type: Literal["error"]
message: str
SSEEvent = (
SSEStatusEvent | SSEToolCallEvent | SSEChunkEvent | SSEDoneEvent | SSEErrorEvent
)
class ToolHandler(TypedDict):
fetch: Callable[..., Any]
summarise: Callable[[Dict[str, Any]], str]
label: str
# ---------------------------------------------------------------------------
# Tool definitions (OpenAI function-calling schema)
# ---------------------------------------------------------------------------
_DATE_PARAMS = {
"start_date": {"type": "string", "description": "Start date in YYYY-MM-DD format"},
"end_date": {"type": "string", "description": "End date in YYYY-MM-DD format"},
}
_TOOL_USAGE = {
"type": "function",
"function": {
"name": "get_usage_data",
"description": (
"Fetch aggregated global usage/spend data. Returns daily spend, "
"token counts, request counts, and breakdowns by model, provider, "
"and API key. Use for overall spend, top models, top providers."
),
"parameters": {
"type": "object",
"properties": {
**_DATE_PARAMS,
"user_id": {
"type": "string",
"description": "Optional user ID filter. Omit for global view.",
},
},
"required": ["start_date", "end_date"],
},
},
}
_TOOL_TEAM = {
"type": "function",
"function": {
"name": "get_team_usage_data",
"description": (
"Fetch usage/spend data broken down by team. Use for questions "
"like 'which team spends the most' or 'show me team X usage'."
),
"parameters": {
"type": "object",
"properties": {
**_DATE_PARAMS,
"team_ids": {
"type": "string",
"description": "Optional comma-separated team IDs. Omit for all teams.",
},
},
"required": ["start_date", "end_date"],
},
},
}
_TOOL_TAG = {
"type": "function",
"function": {
"name": "get_tag_usage_data",
"description": (
"Fetch usage/spend data broken down by tag. Tags are labels "
"attached to requests (features, environments, credentials)."
),
"parameters": {
"type": "object",
"properties": {
**_DATE_PARAMS,
"tags": {
"type": "string",
"description": "Optional comma-separated tag names. Omit for all tags.",
},
},
"required": ["start_date", "end_date"],
},
},
}
TOOLS_BASE = [_TOOL_USAGE]
TOOLS_ADMIN = [_TOOL_USAGE, _TOOL_TEAM, _TOOL_TAG]
def get_tools_for_role(is_admin: bool) -> List[Dict[str, Any]]:
"""Return the tool list appropriate for the user's role."""
return TOOLS_ADMIN if is_admin else TOOLS_BASE
_SYSTEM_PROMPT_BASE = (
"You are an AI assistant embedded in the LiteLLM Usage dashboard. "
"You help users understand their LLM API spend and usage data.\n\n"
"ALWAYS call the appropriate tool(s) first to fetch data before answering. "
"You may call multiple tools if the question spans different dimensions.\n\n"
"Guidelines:\n"
"- Be concise and specific. Use exact numbers from the data.\n"
"- Format costs as dollar amounts (e.g. $12.34).\n"
"- When comparing entities, show a ranked list.\n"
"- If data is empty or no results found, say so clearly.\n"
"- Do not hallucinate data — only use what the tools return.\n"
"- Today's date will be provided below. Use it to interpret relative dates "
"like 'this week', 'this month', 'last 7 days', etc."
)
_TOOL_DESCRIPTIONS_ADMIN = (
"You have access to these tools:\n"
"- `get_usage_data`: Global/user-level usage (spend, models, providers, API keys)\n"
"- `get_team_usage_data`: Team-level usage breakdown\n"
"- `get_tag_usage_data`: Tag-level usage breakdown\n\n"
)
_TOOL_DESCRIPTIONS_BASE = (
"You have access to this tool:\n"
"- `get_usage_data`: Your usage data (spend, models, providers, API keys)\n\n"
)
def _build_system_prompt(is_admin: bool) -> str:
"""Build role-appropriate system prompt with today's date."""
tool_desc = _TOOL_DESCRIPTIONS_ADMIN if is_admin else _TOOL_DESCRIPTIONS_BASE
return (
f"{_SYSTEM_PROMPT_BASE}\n\n{tool_desc}"
f"Today's date: {date.today().isoformat()}"
)
# keep a public reference for test assertions
SYSTEM_PROMPT = _SYSTEM_PROMPT_BASE
# ---------------------------------------------------------------------------
# Data fetchers
# ---------------------------------------------------------------------------
def _parse_csv_ids(raw: Optional[str]) -> Optional[List[str]]:
if not raw:
return None
return [t.strip() for t in raw.split(",") if t.strip()]
async def _query_activity(
table_name: str,
entity_id_field: str,
entity_id: Optional[Any],
start_date: str,
end_date: str,
*,
use_aggregated: bool = False,
) -> SpendAnalyticsPaginatedResponse:
"""Shared helper that calls the daily activity query layer."""
from litellm.proxy.management_endpoints.common_daily_activity import (
get_daily_activity,
get_daily_activity_aggregated,
)
from litellm.proxy.proxy_server import prisma_client
if use_aggregated:
return await get_daily_activity_aggregated(
prisma_client=prisma_client,
table_name=table_name,
entity_id_field=entity_id_field,
entity_id=entity_id,
entity_metadata_field=None,
start_date=start_date,
end_date=end_date,
model=None,
api_key=None,
)
return await get_daily_activity(
prisma_client=prisma_client,
table_name=table_name,
entity_id_field=entity_id_field,
entity_id=entity_id,
entity_metadata_field=None,
start_date=start_date,
end_date=end_date,
model=None,
api_key=None,
page=1,
page_size=PAGINATED_PAGE_SIZE,
)
async def _fetch_usage_data(
start_date: str, end_date: str, user_id: Optional[str] = None
) -> Dict[str, Any]:
resp = await _query_activity(
TABLE_DAILY_USER_SPEND,
ENTITY_FIELD_USER,
user_id,
start_date,
end_date,
use_aggregated=True,
)
return resp.model_dump(mode="json")
async def _fetch_team_usage_data(
start_date: str, end_date: str, team_ids: Optional[str] = None
) -> Dict[str, Any]:
resp = await _query_activity(
TABLE_DAILY_TEAM_SPEND,
ENTITY_FIELD_TEAM,
_parse_csv_ids(team_ids),
start_date,
end_date,
)
return resp.model_dump(mode="json")
async def _fetch_tag_usage_data(
start_date: str, end_date: str, tags: Optional[str] = None
) -> Dict[str, Any]:
resp = await _query_activity(
TABLE_DAILY_TAG_SPEND,
ENTITY_FIELD_TAG,
_parse_csv_ids(tags),
start_date,
end_date,
)
return resp.model_dump(mode="json")
# ---------------------------------------------------------------------------
# Summarisers — convert raw JSON to concise text the LLM can reason over
# ---------------------------------------------------------------------------
def _accumulate_breakdown(
results: List[Dict[str, Any]], dimension: str, fields: List[str]
) -> Dict[str, Dict[str, float]]:
"""Aggregate a single breakdown dimension across days."""
totals: Dict[str, Dict[str, float]] = {}
for day in results:
for key, entry in day.get("breakdown", {}).get(dimension, {}).items():
if key not in totals:
totals[key] = {f: 0.0 for f in fields}
m = entry.get("metrics", {})
for f in fields:
totals[key][f] += m.get(f, 0)
return totals
def _ranked_lines(
totals: Dict[str, Dict[str, float]],
fmt: Callable[[str, Dict[str, float]], str],
limit: int,
) -> List[str]:
"""Sort by spend descending, format each entry, and truncate."""
return [
fmt(name, vals)
for name, vals in sorted(totals.items(), key=lambda x: -x[1].get("spend", 0))[
:limit
]
]
def _summarise_usage_data(data: Dict[str, Any]) -> str:
meta = data.get("metadata", {})
results = data.get("results", [])
header = (
f"Total Spend: ${meta.get('total_spend', 0):.4f}\n"
f"Total Requests: {meta.get('total_api_requests', 0)}\n"
f"Successful: {meta.get('total_successful_requests', 0)} | "
f"Failed: {meta.get('total_failed_requests', 0)}\n"
f"Total Tokens: {meta.get('total_tokens', 0)}"
)
models = _accumulate_breakdown(
results, "models", ["spend", "api_requests", "total_tokens"]
)
providers = _accumulate_breakdown(results, "providers", ["spend", "api_requests"])
model_lines = _ranked_lines(
models,
lambda n, d: f" - {n}: ${d['spend']:.4f} ({int(d['api_requests'])} reqs, {int(d['total_tokens'])} tokens)",
TOP_N_MODELS,
)
provider_lines = _ranked_lines(
providers,
lambda n, d: f" - {n}: ${d['spend']:.4f} ({int(d['api_requests'])} reqs)",
TOP_N_PROVIDERS,
)
sections = [header, ""]
sections += ["Top Models by Spend:"] + (model_lines or [" (no data)"]) + [""]
sections += ["Top Providers by Spend:"] + (provider_lines or [" (no data)"])
return "\n".join(sections)
def _summarise_entity_data(data: Dict[str, Any], entity_label: str) -> str:
"""Summarise team/tag entity usage data."""
results = data.get("results", [])
if not results:
return f"No {entity_label} usage data found for the given date range."
totals: Dict[str, Dict[str, Any]] = {}
for day in results:
for eid, entry in day.get("breakdown", {}).get("entities", {}).items():
if eid not in totals:
alias = entry.get("metadata", {}).get("alias", eid)
totals[eid] = {"alias": alias, "spend": 0.0, "requests": 0, "tokens": 0}
m = entry.get("metrics", {})
totals[eid]["spend"] += m.get("spend", 0)
totals[eid]["requests"] += m.get("api_requests", 0)
totals[eid]["tokens"] += m.get("total_tokens", 0)
lines = [f"{entity_label} Usage ({len(totals)} {entity_label.lower()}s):", ""]
for eid, d in sorted(totals.items(), key=lambda x: -x[1]["spend"]):
label = d["alias"] if d["alias"] != eid else eid
lines.append(
f"- {label} (ID: {eid}): ${d['spend']:.4f} | "
f"{int(d['requests'])} reqs | {int(d['tokens'])} tokens"
)
return "\n".join(lines)
# ---------------------------------------------------------------------------
# Tool dispatch registry
# ---------------------------------------------------------------------------
TOOL_HANDLERS: Dict[str, ToolHandler] = {
"get_usage_data": ToolHandler(
fetch=_fetch_usage_data,
summarise=_summarise_usage_data,
label="global usage data",
),
"get_team_usage_data": ToolHandler(
fetch=_fetch_team_usage_data,
summarise=lambda data: _summarise_entity_data(data, "Team"),
label="team usage data",
),
"get_tag_usage_data": ToolHandler(
fetch=_fetch_tag_usage_data,
summarise=lambda data: _summarise_entity_data(data, "Tag"),
label="tag usage data",
),
}
# ---------------------------------------------------------------------------
# SSE streaming
# ---------------------------------------------------------------------------
def _sse(event: SSEEvent) -> str:
return f"data: {json.dumps(event)}\n\n"
def _resolve_fetch_kwargs(
fn_name: str,
fn_args: Dict[str, str],
user_id: Optional[str],
is_admin: bool,
) -> Dict[str, Any]:
"""Build keyword arguments for a tool's fetch function."""
start_date = fn_args.get("start_date", "")
end_date = fn_args.get("end_date", "")
if not start_date or not end_date:
raise ValueError("Missing required start_date or end_date from tool arguments")
kwargs: Dict[str, Any] = {"start_date": start_date, "end_date": end_date}
if fn_name == "get_usage_data":
if not is_admin:
kwargs["user_id"] = user_id
elif fn_args.get("user_id"):
kwargs["user_id"] = fn_args["user_id"]
elif fn_name == "get_team_usage_data" and fn_args.get("team_ids"):
kwargs["team_ids"] = fn_args["team_ids"]
elif fn_name == "get_tag_usage_data" and fn_args.get("tags"):
kwargs["tags"] = fn_args["tags"]
return kwargs
async def _execute_tool_call(
handler: ToolHandler,
fn_name: str,
fn_args: Dict[str, str],
user_id: Optional[str],
is_admin: bool,
) -> str:
"""Run a single tool and return the summarised result text."""
kwargs = _resolve_fetch_kwargs(fn_name, fn_args, user_id, is_admin)
raw_data = await handler["fetch"](**kwargs)
return handler["summarise"](raw_data)
async def _process_tool_call(
tc: Any,
chat_messages: List[Dict[str, Any]],
user_id: Optional[str],
is_admin: bool,
) -> AsyncIterator[str]:
"""Execute a single tool call, yielding SSE events for status."""
fn_name = tc.function.name
fn_args = json.loads(tc.function.arguments)
allowed_names = {t["function"]["name"] for t in get_tools_for_role(is_admin)}
handler = TOOL_HANDLERS.get(fn_name)
if fn_name not in allowed_names or not handler:
chat_messages.append(
{
"role": "tool",
"tool_call_id": tc.id,
"content": f"Tool not available: {fn_name}",
}
)
return
tool_event_base = {
"type": "tool_call",
"tool_name": fn_name,
"tool_label": handler["label"],
"arguments": fn_args,
}
yield _sse(cast(SSEToolCallEvent, {**tool_event_base, "status": "running"}))
try:
tool_result = await _execute_tool_call(
handler, fn_name, fn_args, user_id, is_admin
)
yield _sse(cast(SSEToolCallEvent, {**tool_event_base, "status": "complete"}))
except Exception as e:
verbose_proxy_logger.error("Tool %s failed: %s", fn_name, e)
tool_result = f"Error fetching {handler['label']}. Please try again."
yield _sse(cast(SSEToolCallEvent, {**tool_event_base, "status": "error"}))
chat_messages.append(
{"role": "tool", "tool_call_id": tc.id, "content": tool_result}
)
async def _stream_final_response(
model: str, chat_messages: List[Dict[str, Any]]
) -> AsyncIterator[str]:
"""Stream the final LLM response after tool results are appended."""
yield _sse({"type": "status", "message": "Analyzing results..."})
response = await litellm.acompletion(
model=model,
messages=chat_messages,
stream=True,
temperature=USAGE_AI_TEMPERATURE,
)
async for chunk in response:
delta = chunk.choices[0].delta.content
if delta:
yield _sse({"type": "chunk", "content": delta})
async def stream_usage_ai_chat(
messages: List[Dict[str, str]],
model: Optional[str] = None,
user_id: Optional[str] = None,
is_admin: bool = False,
) -> AsyncIterator[str]:
"""Stream SSE events: status → tool_call → chunk → done."""
resolved_model = (model or "").strip() or DEFAULT_COMPETITOR_DISCOVERY_MODEL
truncated = (
messages[-MAX_CHAT_MESSAGES:] if len(messages) > MAX_CHAT_MESSAGES else messages
)
chat_messages: List[Dict[str, Any]] = [
{"role": "system", "content": _build_system_prompt(is_admin)},
*truncated,
]
try:
yield _sse({"type": "status", "message": "Thinking..."})
tools = get_tools_for_role(is_admin)
response = await litellm.acompletion(
model=resolved_model,
messages=chat_messages,
tools=tools,
temperature=USAGE_AI_TEMPERATURE,
)
choice = response.choices[0] # type: ignore
if not choice.message.tool_calls:
if choice.message.content:
yield _sse({"type": "chunk", "content": choice.message.content})
yield _sse({"type": "done"})
return
chat_messages.append(choice.message.model_dump())
for tc in choice.message.tool_calls:
async for event in _process_tool_call(tc, chat_messages, user_id, is_admin):
yield event
async for event in _stream_final_response(resolved_model, chat_messages):
yield event
yield _sse({"type": "done"})
except Exception as e:
verbose_proxy_logger.error("AI usage chat failed: %s", e)
yield _sse(
{
"type": "error",
"message": "An internal error occurred. Please try again.",
}
)

View File

@@ -0,0 +1,65 @@
"""
USAGE AI CHAT ENDPOINTS
/usage/ai/chat - Stream AI chat responses about usage data
"""
from typing import List, Literal, Optional
from fastapi import APIRouter, Depends, Request
from fastapi.responses import StreamingResponse
from pydantic import BaseModel, Field
from litellm.proxy._types import UserAPIKeyAuth
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
router = APIRouter()
class ChatMessage(BaseModel):
role: Literal["user", "assistant"]
content: str
class UsageAIChatRequest(BaseModel):
messages: List[ChatMessage] = Field(
..., description="Chat messages (user/assistant history)"
)
model: Optional[str] = Field(default=None, description="Model to use for AI chat")
@router.post(
"/usage/ai/chat",
tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)],
)
async def usage_ai_chat(
data: UsageAIChatRequest,
request: Request,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
):
"""
AI chat about usage data. Streams SSE events with the AI response.
The AI agent has access to tools that query aggregated daily activity data.
"""
from litellm.proxy.management_endpoints.common_utils import (
_user_has_admin_view,
)
from litellm.proxy.management_endpoints.usage_endpoints.ai_usage_chat import (
stream_usage_ai_chat,
)
is_admin = _user_has_admin_view(user_api_key_dict)
user_id = user_api_key_dict.user_id
messages = [{"role": m.role, "content": m.content} for m in data.messages]
return StreamingResponse(
stream_usage_ai_chat(
messages=messages,
model=data.model,
user_id=user_id,
is_admin=is_admin,
),
media_type="text/event-stream",
headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
)