import json import os import re from importlib.resources import files from typing import Any, Dict, List, Optional import litellm from fastapi import APIRouter, Depends, HTTPException from litellm._logging import verbose_logger from litellm.litellm_core_utils.get_blog_posts import ( BlogPost, BlogPostsResponse, GetBlogPosts, get_blog_posts, ) from litellm.proxy._types import CommonProxyErrors from litellm.proxy.auth.user_api_key_auth import user_api_key_auth from litellm.types.agents import AgentCard from litellm.types.mcp import MCPPublicServer from litellm.types.proxy.management_endpoints.model_management_endpoints import ( ModelGroupInfoProxy, ) from litellm.types.proxy.public_endpoints.public_endpoints import ( AgentCreateInfo, ProviderCreateInfo, PublicModelHubInfo, SupportedEndpointsResponse, ) from litellm.types.utils import LlmProviders router = APIRouter() # --------------------------------------------------------------------------- # /public/endpoints — helpers # --------------------------------------------------------------------------- _ENDPOINT_METADATA: Dict[str, Dict[str, str]] = { "chat_completions": {"label": "Chat Completions", "endpoint": "/chat/completions"}, "messages": {"label": "Messages", "endpoint": "/messages"}, "responses": {"label": "Responses", "endpoint": "/responses"}, "embeddings": {"label": "Embeddings", "endpoint": "/embeddings"}, "image_generations": { "label": "Image Generations", "endpoint": "/images/generations", }, "audio_transcriptions": { "label": "Audio Transcriptions", "endpoint": "/audio/transcriptions", }, "audio_speech": {"label": "Audio Speech", "endpoint": "/audio/speech"}, "moderations": {"label": "Moderations", "endpoint": "/moderations"}, "batches": {"label": "Batches", "endpoint": "/batches"}, "rerank": {"label": "Rerank", "endpoint": "/rerank"}, "ocr": {"label": "OCR", "endpoint": "/ocr"}, "search": {"label": "Search", "endpoint": "/search"}, "skills": {"label": "Skills", "endpoint": "/skills"}, "interactions": {"label": "Interactions", "endpoint": "/interactions"}, "a2a": {"label": "A2A (Agent Gateway)", "endpoint": "/a2a/{agent}/message/send"}, "container": {"label": "Containers", "endpoint": "/containers"}, "container_files": { "label": "Container Files", "endpoint": "/containers/{id}/files", }, "compact": {"label": "Compact", "endpoint": "/responses/compact"}, "files": {"label": "Files", "endpoint": "/files"}, "image_edits": {"label": "Image Edits", "endpoint": "/images/edits"}, "vector_stores_create": { "label": "Vector Stores (Create)", "endpoint": "/vector_stores", }, "vector_stores_search": { "label": "Vector Stores (Search)", "endpoint": "/vector_stores/{id}/search", }, "vector_store_files": { "label": "Vector Store Files", "endpoint": "/vector_stores/{id}/files", }, "video_generations": { "label": "Video Generations", "endpoint": "/videos/generations", }, "assistants": {"label": "Assistants", "endpoint": "/assistants"}, "fine_tuning": {"label": "Fine Tuning", "endpoint": "/fine_tuning/jobs"}, "text_completion": {"label": "Text Completion", "endpoint": "/completions"}, "realtime": {"label": "Realtime", "endpoint": "/realtime"}, "count_tokens": {"label": "Count Tokens", "endpoint": "/utils/token_counter"}, "image_variations": {"label": "Image Variations", "endpoint": "/images/variations"}, "generateContent": {"label": "Generate Content", "endpoint": "/generateContent"}, "bedrock_invoke": {"label": "Bedrock Invoke", "endpoint": "/bedrock/invoke"}, "bedrock_converse": {"label": "Bedrock Converse", "endpoint": "/bedrock/converse"}, "rag_ingest": {"label": "RAG Ingest", "endpoint": "/rag/ingest"}, "rag_query": {"label": "RAG Query", "endpoint": "/rag/query"}, } _SLUG_SUFFIX_RE = re.compile(r"\s*\(`[^`]+`\)\s*$") # Loaded once on first request; never invalidated (local file, no TTL needed). _cached_endpoints: Optional[SupportedEndpointsResponse] = None def _clean_display_name(raw: str) -> str: return _SLUG_SUFFIX_RE.sub("", raw).strip() def _build_endpoints(raw: Dict[str, Any]) -> List[Dict[str, Any]]: """Transform raw provider_endpoints_support_backup.json into the response shape.""" providers: Dict[str, Any] = raw.get("providers", {}) # Collect endpoint keys in insertion order (union across all providers). seen: set = set() all_keys: List[str] = [] for provider_data in providers.values(): for key in provider_data.get("endpoints", {}): if key not in seen: seen.add(key) all_keys.append(key) result: List[Dict[str, Any]] = [] for key in all_keys: meta = _ENDPOINT_METADATA.get(key) label = meta["label"] if meta else key.replace("_", " ").title() path = meta["endpoint"] if meta else "/" + key.replace("_", "/") supporting: List[Dict[str, str]] = [ { "slug": slug, "display_name": _clean_display_name(pd.get("display_name", slug)), } for slug, pd in providers.items() if pd.get("endpoints", {}).get(key) ] result.append( {"key": key, "label": label, "endpoint": path, "providers": supporting} ) return result def _load_endpoints() -> List[Dict[str, Any]]: raw = json.loads( files("litellm") .joinpath("provider_endpoints_support_backup.json") .read_text(encoding="utf-8") ) return _build_endpoints(raw) # --------------------------------------------------------------------------- @router.get( "/public/model_hub", tags=["public", "model management"], dependencies=[Depends(user_api_key_auth)], response_model=List[ModelGroupInfoProxy], ) async def public_model_hub(): import litellm from litellm.proxy.proxy_server import ( _get_model_group_info, llm_router, prisma_client, ) from litellm.proxy.health_endpoints._health_endpoints import ( _convert_health_check_to_dict, ) if llm_router is None: raise HTTPException( status_code=400, detail=CommonProxyErrors.no_llm_router.value ) model_groups: List[ModelGroupInfoProxy] = [] if litellm.public_model_groups is not None: model_groups = _get_model_group_info( llm_router=llm_router, all_models_str=litellm.public_model_groups, model_group=None, ) # Fetch health check information if available health_checks_map = {} if prisma_client is not None: try: latest_checks = await prisma_client.get_all_latest_health_checks() for check in latest_checks: key = check.model_id if check.model_id else check.model_name if key: health_check_dict = _convert_health_check_to_dict(check) health_checks_map[key] = health_check_dict if check.model_name: health_checks_map[check.model_name] = health_check_dict except Exception: pass for model_group in model_groups: health_info = health_checks_map.get(model_group.model_group) if health_info: model_group.health_status = health_info.get("status") model_group.health_response_time = health_info.get("response_time_ms") model_group.health_checked_at = health_info.get("checked_at") return model_groups @router.get( "/public/agent_hub", tags=["[beta] Agents", "public"], dependencies=[Depends(user_api_key_auth)], response_model=List[AgentCard], ) async def get_agents(): import litellm from litellm.proxy.agent_endpoints.agent_registry import global_agent_registry agents = global_agent_registry.get_public_agent_list() if litellm.public_agent_groups is None: return [] agent_card_list = [ agent.agent_card_params for agent in agents if agent.agent_id in litellm.public_agent_groups ] return agent_card_list @router.get( "/public/mcp_hub", tags=["[beta] MCP", "public"], dependencies=[Depends(user_api_key_auth)], response_model=List[MCPPublicServer], ) async def get_mcp_servers(): from litellm.proxy._experimental.mcp_server.mcp_server_manager import ( global_mcp_server_manager, ) public_mcp_servers = global_mcp_server_manager.get_public_mcp_servers() return [ MCPPublicServer( **server.model_dump(), ) for server in public_mcp_servers ] @router.get( "/public/model_hub/info", tags=["public", "model management"], response_model=PublicModelHubInfo, ) async def public_model_hub_info(): import litellm from litellm.proxy.proxy_server import _title, version try: from litellm_enterprise.proxy.proxy_server import EnterpriseProxyConfig custom_docs_description = EnterpriseProxyConfig.get_custom_docs_description() except Exception: custom_docs_description = None return PublicModelHubInfo( docs_title=_title, custom_docs_description=custom_docs_description, litellm_version=version, useful_links=litellm.public_model_groups_links, ) @router.get( "/public/providers", tags=["public", "providers"], response_model=List[str], ) async def get_supported_providers() -> List[str]: """ Return a sorted list of all providers supported by LiteLLM. """ return sorted(provider.value for provider in LlmProviders) @router.get( "/public/providers/fields", tags=["public", "providers"], response_model=List[ProviderCreateInfo], ) async def get_provider_fields() -> List[ProviderCreateInfo]: """ Return provider metadata required by the dashboard create-model flow. """ provider_create_fields_path = os.path.join( os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "proxy", "public_endpoints", "provider_create_fields.json", ) with open(provider_create_fields_path, "r") as f: provider_create_fields = json.load(f) return provider_create_fields @router.get( "/public/litellm_model_cost_map", tags=["public", "model management"], ) async def get_litellm_model_cost_map(): """ Public endpoint to get the LiteLLM model cost map. Returns pricing information for all supported models. """ import litellm try: _model_cost_map = litellm.model_cost return _model_cost_map except Exception as e: raise HTTPException( status_code=500, detail=f"Internal Server Error ({str(e)})", ) @router.get( "/public/litellm_blog_posts", tags=["public"], response_model=BlogPostsResponse, ) async def get_litellm_blog_posts(): """ Public endpoint to get the latest LiteLLM blog posts. Fetches from GitHub with a 1-hour in-process cache. Falls back to the bundled local backup on any failure. """ try: posts_data = get_blog_posts(url=litellm.blog_posts_url) except Exception as e: verbose_logger.warning( "LiteLLM: get_litellm_blog_posts endpoint fallback triggered: %s", str(e) ) posts_data = GetBlogPosts.load_local_blog_posts() posts = [BlogPost(**p) for p in posts_data[:5]] return BlogPostsResponse(posts=posts) @router.get( "/public/endpoints", tags=["public"], response_model=SupportedEndpointsResponse, ) async def get_supported_endpoints() -> SupportedEndpointsResponse: """ Return the list of LiteLLM proxy endpoints and which providers support each one. Reads from the bundled local backup file. Result is cached in-process for the lifetime of the server process. """ global _cached_endpoints if _cached_endpoints is None: _cached_endpoints = SupportedEndpointsResponse(endpoints=_load_endpoints()) # type: ignore[arg-type] return _cached_endpoints @router.get( "/public/agents/fields", tags=["public", "[beta] Agents"], response_model=List[AgentCreateInfo], ) async def get_agent_fields() -> List[AgentCreateInfo]: """ Return agent type metadata required by the dashboard create-agent flow. If an agent has `inherit_credentials_from_provider`, the provider's credential fields are automatically appended to the agent's credential_fields. """ base_path = os.path.join( os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "proxy", "public_endpoints", ) agent_create_fields_path = os.path.join(base_path, "agent_create_fields.json") provider_create_fields_path = os.path.join(base_path, "provider_create_fields.json") with open(agent_create_fields_path, "r") as f: agent_create_fields = json.load(f) with open(provider_create_fields_path, "r") as f: provider_create_fields = json.load(f) # Build a lookup map for providers by name provider_map = {p["provider"]: p for p in provider_create_fields} # Merge inherited credential fields for agent in agent_create_fields: inherit_from = agent.get("inherit_credentials_from_provider") if inherit_from and inherit_from in provider_map: provider = provider_map[inherit_from] # Copy provider fields and mark them for inclusion in litellm_params inherited_fields = [] for field in provider.get("credential_fields", []): field_copy = field.copy() field_copy["include_in_litellm_params"] = True inherited_fields.append(field_copy) # Append provider credential fields after agent's own fields agent["credential_fields"] = ( agent.get("credential_fields", []) + inherited_fields ) # Remove the inherit field from response (not needed by frontend) agent.pop("inherit_credentials_from_provider", None) return agent_create_fields