chore: initial public snapshot for github upload

This commit is contained in:
Your Name
2026-03-26 20:06:14 +08:00
commit 0e5ecd930e
3497 changed files with 1586236 additions and 0 deletions

View File

@@ -0,0 +1,46 @@
# Slack Alerting on LiteLLM Gateway
This folder contains the Slack Alerting integration for LiteLLM Gateway.
## Folder Structure
- `slack_alerting.py`: This is the main file that handles sending different types of alerts
- `batching_handler.py`: Handles Batching + sending Httpx Post requests to slack. Slack alerts are sent every 10s or when events are greater than X events. Done to ensure litellm has good performance under high traffic
- `types.py`: This file contains the AlertType enum which is used to define the different types of alerts that can be sent to Slack.
- `utils.py`: This file contains common utils used specifically for slack alerting
## Budget Alert Types
The `budget_alert_types.py` module provides a flexible framework for handling different types of budget alerts:
- `BaseBudgetAlertType`: An abstract base class with abstract methods that all alert types must implement:
- `get_event_group()`: Returns the Litellm_EntityType for the alert
- `get_event_message()`: Returns the message prefix for the alert
- `get_id(user_info)`: Returns the ID to use for caching/tracking the alert
Concrete implementations include:
- `ProxyBudgetAlert`: Alerting for proxy-level budget concerns
- `SoftBudgetAlert`: Alerting when soft budgets are crossed
- `UserBudgetAlert`: Alerting for user-level budget concerns
- `TeamBudgetAlert`: Alerting for team-level budget concerns
- `TokenBudgetAlert`: Alerting for API key budget concerns
- `ProjectedLimitExceededAlert`: Alerting when projected spend will exceed budget
Use the `get_budget_alert_type()` factory function to get the appropriate alert type class for a given alert type string:
```python
from litellm.integrations.SlackAlerting.budget_alert_types import get_budget_alert_type
# Get the appropriate handler
budget_alert_class = get_budget_alert_type("user_budget")
# Use the handler methods
event_group = budget_alert_class.get_event_group() # Returns Litellm_EntityType.USER
event_message = budget_alert_class.get_event_message() # Returns "User Budget: "
cache_id = budget_alert_class.get_id(user_info) # Returns user_id
```
To add a new budget alert type, simply create a new class that extends `BaseBudgetAlertType` and implements all the required methods, then add it to the dictionary in the `get_budget_alert_type()` function.
## Further Reading
- [Doc setting up Alerting on LiteLLM Proxy (Gateway)](https://docs.litellm.ai/docs/proxy/alerting)

View File

@@ -0,0 +1,81 @@
"""
Handles Batching + sending Httpx Post requests to slack
Slack alerts are sent every 10s or when events are greater than X events
see custom_batch_logger.py for more details / defaults
"""
from typing import TYPE_CHECKING, Any
from litellm._logging import verbose_proxy_logger
if TYPE_CHECKING:
from .slack_alerting import SlackAlerting as _SlackAlerting
SlackAlertingType = _SlackAlerting
else:
SlackAlertingType = Any
def squash_payloads(queue):
squashed = {}
if len(queue) == 0:
return squashed
if len(queue) == 1:
return {"key": {"item": queue[0], "count": 1}}
for item in queue:
url = item["url"]
alert_type = item["alert_type"]
_key = (url, alert_type)
if _key in squashed:
squashed[_key]["count"] += 1
# Merge the payloads
else:
squashed[_key] = {"item": item, "count": 1}
return squashed
def _print_alerting_payload_warning(
payload: dict, slackAlertingInstance: SlackAlertingType
):
"""
Print the payload to the console when
slackAlertingInstance.alerting_args.log_to_console is True
Relevant issue: https://github.com/BerriAI/litellm/issues/7372
"""
if slackAlertingInstance.alerting_args.log_to_console is True:
verbose_proxy_logger.warning(payload)
async def send_to_webhook(slackAlertingInstance: SlackAlertingType, item, count):
"""
Send a single slack alert to the webhook
"""
import json
payload = item.get("payload", {})
try:
if count > 1:
payload["text"] = f"[Num Alerts: {count}]\n\n{payload['text']}"
response = await slackAlertingInstance.async_http_handler.post(
url=item["url"],
headers=item["headers"],
data=json.dumps(payload),
)
if response.status_code != 200:
verbose_proxy_logger.debug(
f"Error sending slack alert to url={item['url']}. Error={response.text}"
)
except Exception as e:
verbose_proxy_logger.debug(f"Error sending slack alert: {str(e)}")
finally:
_print_alerting_payload_warning(
payload, slackAlertingInstance=slackAlertingInstance
)

View File

@@ -0,0 +1,115 @@
from abc import ABC, abstractmethod
from typing import Literal
from litellm.proxy._types import CallInfo
class BaseBudgetAlertType(ABC):
"""Base class for different budget alert types"""
@abstractmethod
def get_event_message(self) -> str:
"""Return the event message for this alert type"""
pass
@abstractmethod
def get_id(self, user_info: CallInfo) -> str:
"""Return the ID to use for caching/tracking this alert"""
pass
class ProxyBudgetAlert(BaseBudgetAlertType):
def get_event_message(self) -> str:
return "Proxy Budget: "
def get_id(self, user_info: CallInfo) -> str:
return "default_id"
class SoftBudgetAlert(BaseBudgetAlertType):
def get_event_message(self) -> str:
return "Soft Budget Crossed: "
def get_id(self, user_info: CallInfo) -> str:
return user_info.token or "default_id"
class UserBudgetAlert(BaseBudgetAlertType):
def get_event_message(self) -> str:
return "User Budget: "
def get_id(self, user_info: CallInfo) -> str:
return user_info.user_id or "default_id"
class TeamBudgetAlert(BaseBudgetAlertType):
def get_event_message(self) -> str:
return "Team Budget: "
def get_id(self, user_info: CallInfo) -> str:
return user_info.team_id or "default_id"
class OrganizationBudgetAlert(BaseBudgetAlertType):
def get_event_message(self) -> str:
return "Organization Budget: "
def get_id(self, user_info: CallInfo) -> str:
return user_info.organization_id or "default_id"
class TokenBudgetAlert(BaseBudgetAlertType):
def get_event_message(self) -> str:
return "Key Budget: "
def get_id(self, user_info: CallInfo) -> str:
return user_info.token or "default_id"
class ProjectedLimitExceededAlert(BaseBudgetAlertType):
def get_event_message(self) -> str:
return "Key Budget: Projected Limit Exceeded"
def get_id(self, user_info: CallInfo) -> str:
return user_info.token or "default_id"
class ProjectBudgetAlert(BaseBudgetAlertType):
def get_event_message(self) -> str:
return "Project Budget: "
def get_id(self, user_info: CallInfo) -> str:
return user_info.token or "default_id"
def get_budget_alert_type(
type: Literal[
"token_budget",
"user_budget",
"soft_budget",
"max_budget_alert",
"team_budget",
"organization_budget",
"proxy_budget",
"projected_limit_exceeded",
"project_budget",
],
) -> BaseBudgetAlertType:
"""Factory function to get the appropriate budget alert type class"""
alert_types = {
"proxy_budget": ProxyBudgetAlert(),
"soft_budget": SoftBudgetAlert(),
"user_budget": UserBudgetAlert(),
"max_budget_alert": TokenBudgetAlert(),
"team_budget": TeamBudgetAlert(),
"organization_budget": OrganizationBudgetAlert(),
"token_budget": TokenBudgetAlert(),
"projected_limit_exceeded": ProjectedLimitExceededAlert(),
"project_budget": ProjectBudgetAlert(),
}
if type in alert_types:
return alert_types[type]
else:
return ProxyBudgetAlert()

View File

@@ -0,0 +1,177 @@
"""
Class to check for LLM API hanging requests
Notes:
- Do not create tasks that sleep, that can saturate the event loop
- Do not store large objects (eg. messages in memory) that can increase RAM usage
"""
import asyncio
from typing import TYPE_CHECKING, Any, Optional
import litellm
from litellm._logging import verbose_proxy_logger
from litellm.caching.in_memory_cache import InMemoryCache
from litellm.litellm_core_utils.core_helpers import get_litellm_metadata_from_kwargs
from litellm.types.integrations.slack_alerting import (
HANGING_ALERT_BUFFER_TIME_SECONDS,
MAX_OLDEST_HANGING_REQUESTS_TO_CHECK,
HangingRequestData,
)
if TYPE_CHECKING:
from litellm.integrations.SlackAlerting.slack_alerting import SlackAlerting
else:
SlackAlerting = Any
class AlertingHangingRequestCheck:
"""
Class to safely handle checking hanging requests alerts
"""
def __init__(
self,
slack_alerting_object: SlackAlerting,
):
self.slack_alerting_object = slack_alerting_object
self.hanging_request_cache = InMemoryCache(
default_ttl=int(
self.slack_alerting_object.alerting_threshold
+ HANGING_ALERT_BUFFER_TIME_SECONDS
),
)
async def add_request_to_hanging_request_check(
self,
request_data: Optional[dict] = None,
):
"""
Add a request to the hanging request cache. This is the list of request_ids that gets periodicall checked for hanging requests
"""
if request_data is None:
return
request_metadata = get_litellm_metadata_from_kwargs(kwargs=request_data)
model = request_data.get("model", "")
api_base: Optional[str] = None
if request_data.get("deployment", None) is not None and isinstance(
request_data["deployment"], dict
):
api_base = litellm.get_api_base(
model=model,
optional_params=request_data["deployment"].get("litellm_params", {}),
)
hanging_request_data = HangingRequestData(
request_id=request_data.get("litellm_call_id", ""),
model=model,
api_base=api_base,
key_alias=request_metadata.get("user_api_key_alias", ""),
team_alias=request_metadata.get("user_api_key_team_alias", ""),
)
await self.hanging_request_cache.async_set_cache(
key=hanging_request_data.request_id,
value=hanging_request_data,
ttl=int(
self.slack_alerting_object.alerting_threshold
+ HANGING_ALERT_BUFFER_TIME_SECONDS
),
)
return
async def send_alerts_for_hanging_requests(self):
"""
Send alerts for hanging requests
"""
from litellm.proxy.proxy_server import proxy_logging_obj
#########################################################
# Find all requests that have been hanging for more than the alerting threshold
# Get the last 50 oldest items in the cache and check if they have completed
#########################################################
# check if request_id is in internal usage cache
if proxy_logging_obj.internal_usage_cache is None:
return
hanging_requests = await self.hanging_request_cache.async_get_oldest_n_keys(
n=MAX_OLDEST_HANGING_REQUESTS_TO_CHECK,
)
for request_id in hanging_requests:
hanging_request_data: Optional[
HangingRequestData
] = await self.hanging_request_cache.async_get_cache(
key=request_id,
)
if hanging_request_data is None:
continue
request_status = (
await proxy_logging_obj.internal_usage_cache.async_get_cache(
key="request_status:{}".format(hanging_request_data.request_id),
litellm_parent_otel_span=None,
local_only=True,
)
)
# this means the request status was either success or fail
# and is not hanging
if request_status is not None:
# clear this request from hanging request cache since the request was either success or failed
self.hanging_request_cache._remove_key(
key=request_id,
)
continue
################
# Send the Alert on Slack
################
await self.send_hanging_request_alert(
hanging_request_data=hanging_request_data
)
return
async def check_for_hanging_requests(
self,
):
"""
Background task that checks all request ids in self.hanging_request_cache to check if they have completed
Runs every alerting_threshold/2 seconds to check for hanging requests
"""
while True:
verbose_proxy_logger.debug("Checking for hanging requests....")
await self.send_alerts_for_hanging_requests()
await asyncio.sleep(self.slack_alerting_object.alerting_threshold / 2)
async def send_hanging_request_alert(
self,
hanging_request_data: HangingRequestData,
):
"""
Send a hanging request alert
"""
from litellm.integrations.SlackAlerting.slack_alerting import AlertType
################
# Send the Alert on Slack
################
request_info = f"""Request Model: `{hanging_request_data.model}`
API Base: `{hanging_request_data.api_base}`
Key Alias: `{hanging_request_data.key_alias}`
Team Alias: `{hanging_request_data.team_alias}`"""
alerting_message = f"`Requests are hanging - {self.slack_alerting_object.alerting_threshold}s+ request time`"
await self.slack_alerting_object.send_alert(
message=alerting_message + "\n" + request_info,
level="Medium",
alert_type=AlertType.llm_requests_hanging,
alerting_metadata=hanging_request_data.alerting_metadata or {},
request_model=hanging_request_data.model,
api_base=hanging_request_data.api_base,
)

View File

@@ -0,0 +1,99 @@
"""
Utils used for slack alerting
"""
import asyncio
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
import litellm
from litellm.proxy._types import AlertType
from litellm.secret_managers.main import get_secret
if TYPE_CHECKING:
from litellm.litellm_core_utils.litellm_logging import Logging as _Logging
Logging = _Logging
else:
Logging = Any
def process_slack_alerting_variables(
alert_to_webhook_url: Optional[Dict[AlertType, Union[List[str], str]]]
) -> Optional[Dict[AlertType, Union[List[str], str]]]:
"""
process alert_to_webhook_url
- check if any urls are set as os.environ/SLACK_WEBHOOK_URL_1 read env var and set the correct value
"""
if alert_to_webhook_url is None:
return None
for alert_type, webhook_urls in alert_to_webhook_url.items():
if isinstance(webhook_urls, list):
_webhook_values: List[str] = []
for webhook_url in webhook_urls:
if "os.environ/" in webhook_url:
_env_value = get_secret(secret_name=webhook_url)
if not isinstance(_env_value, str):
raise ValueError(
f"Invalid webhook url value for: {webhook_url}. Got type={type(_env_value)}"
)
_webhook_values.append(_env_value)
else:
_webhook_values.append(webhook_url)
alert_to_webhook_url[alert_type] = _webhook_values
else:
_webhook_value_str: str = webhook_urls
if "os.environ/" in webhook_urls:
_env_value = get_secret(secret_name=webhook_urls)
if not isinstance(_env_value, str):
raise ValueError(
f"Invalid webhook url value for: {webhook_urls}. Got type={type(_env_value)}"
)
_webhook_value_str = _env_value
else:
_webhook_value_str = webhook_urls
alert_to_webhook_url[alert_type] = _webhook_value_str
return alert_to_webhook_url
async def _add_langfuse_trace_id_to_alert(
request_data: Optional[dict] = None,
) -> Optional[str]:
"""
Returns langfuse trace url
- check:
-> existing_trace_id
-> trace_id
-> litellm_call_id
"""
if "langfuse" not in litellm.logging_callback_manager._get_all_callbacks():
return None
#########################################################
# Only run if langfuse is added as a callback
#########################################################
if (
request_data is not None
and request_data.get("litellm_logging_obj", None) is not None
):
trace_id: Optional[str] = None
litellm_logging_obj: Logging = request_data["litellm_logging_obj"]
for _ in range(3):
trace_id = litellm_logging_obj._get_trace_id(service_name="langfuse")
if trace_id is not None:
break
await asyncio.sleep(3) # wait 3s before retrying for trace id
#########################################################
langfuse_object = litellm_logging_obj._get_callback_object(
service_name="langfuse"
)
if langfuse_object is not None:
base_url = langfuse_object.Langfuse.base_url
return f"{base_url}/trace/{trace_id}"
return None