chore: initial public snapshot for github upload

2026-03-26 20:06:14 +08:00
commit 0e5ecd930e
3497 changed files with 1586236 additions and 0 deletions
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/integrations/SlackAlerting/Readme.md
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/integrations/SlackAlerting/Readme.md
@@ -0,0 +1,46 @@
+# Slack Alerting on LiteLLM Gateway 
+
+This folder contains the Slack Alerting integration for LiteLLM Gateway. 
+
+## Folder Structure 
+
+- `slack_alerting.py`: This is the main file that handles sending different types of alerts
+- `batching_handler.py`: Handles Batching + sending Httpx Post requests to slack. Slack alerts are sent every 10s or when events are greater than X events. Done to ensure litellm has good performance under high traffic
+- `types.py`: This file contains the AlertType enum which is used to define the different types of alerts that can be sent to Slack.
+- `utils.py`: This file contains common utils used specifically for slack alerting
+
+## Budget Alert Types
+
+The `budget_alert_types.py` module provides a flexible framework for handling different types of budget alerts:
+
+- `BaseBudgetAlertType`: An abstract base class with abstract methods that all alert types must implement:
+  - `get_event_group()`: Returns the Litellm_EntityType for the alert
+  - `get_event_message()`: Returns the message prefix for the alert
+  - `get_id(user_info)`: Returns the ID to use for caching/tracking the alert
+
+Concrete implementations include:
+- `ProxyBudgetAlert`: Alerting for proxy-level budget concerns
+- `SoftBudgetAlert`: Alerting when soft budgets are crossed
+- `UserBudgetAlert`: Alerting for user-level budget concerns
+- `TeamBudgetAlert`: Alerting for team-level budget concerns
+- `TokenBudgetAlert`: Alerting for API key budget concerns
+- `ProjectedLimitExceededAlert`: Alerting when projected spend will exceed budget
+
+Use the `get_budget_alert_type()` factory function to get the appropriate alert type class for a given alert type string:
+
+```python
+from litellm.integrations.SlackAlerting.budget_alert_types import get_budget_alert_type
+
+# Get the appropriate handler
+budget_alert_class = get_budget_alert_type("user_budget")
+
+# Use the handler methods
+event_group = budget_alert_class.get_event_group()  # Returns Litellm_EntityType.USER
+event_message = budget_alert_class.get_event_message()  # Returns "User Budget: "
+cache_id = budget_alert_class.get_id(user_info)  # Returns user_id
+```
+
+To add a new budget alert type, simply create a new class that extends `BaseBudgetAlertType` and implements all the required methods, then add it to the dictionary in the `get_budget_alert_type()` function.
+
+## Further Reading
+- [Doc setting up Alerting on LiteLLM Proxy (Gateway)](https://docs.litellm.ai/docs/proxy/alerting)
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/integrations/SlackAlerting/batching_handler.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/integrations/SlackAlerting/batching_handler.py
@@ -0,0 +1,81 @@
+"""
+Handles Batching + sending Httpx Post requests to slack 
+
+Slack alerts are sent every 10s or when events are greater than X events 
+
+see custom_batch_logger.py for more details / defaults 
+"""
+
+from typing import TYPE_CHECKING, Any
+
+from litellm._logging import verbose_proxy_logger
+
+if TYPE_CHECKING:
+    from .slack_alerting import SlackAlerting as _SlackAlerting
+
+    SlackAlertingType = _SlackAlerting
+else:
+    SlackAlertingType = Any
+
+
+def squash_payloads(queue):
+    squashed = {}
+    if len(queue) == 0:
+        return squashed
+    if len(queue) == 1:
+        return {"key": {"item": queue[0], "count": 1}}
+
+    for item in queue:
+        url = item["url"]
+        alert_type = item["alert_type"]
+        _key = (url, alert_type)
+
+        if _key in squashed:
+            squashed[_key]["count"] += 1
+            # Merge the payloads
+
+        else:
+            squashed[_key] = {"item": item, "count": 1}
+
+    return squashed
+
+
+def _print_alerting_payload_warning(
+    payload: dict, slackAlertingInstance: SlackAlertingType
+):
+    """
+    Print the payload to the console when
+    slackAlertingInstance.alerting_args.log_to_console is True
+
+    Relevant issue: https://github.com/BerriAI/litellm/issues/7372
+    """
+    if slackAlertingInstance.alerting_args.log_to_console is True:
+        verbose_proxy_logger.warning(payload)
+
+
+async def send_to_webhook(slackAlertingInstance: SlackAlertingType, item, count):
+    """
+    Send a single slack alert to the webhook
+    """
+    import json
+
+    payload = item.get("payload", {})
+    try:
+        if count > 1:
+            payload["text"] = f"[Num Alerts: {count}]\n\n{payload['text']}"
+
+        response = await slackAlertingInstance.async_http_handler.post(
+            url=item["url"],
+            headers=item["headers"],
+            data=json.dumps(payload),
+        )
+        if response.status_code != 200:
+            verbose_proxy_logger.debug(
+                f"Error sending slack alert to url={item['url']}. Error={response.text}"
+            )
+    except Exception as e:
+        verbose_proxy_logger.debug(f"Error sending slack alert: {str(e)}")
+    finally:
+        _print_alerting_payload_warning(
+            payload, slackAlertingInstance=slackAlertingInstance
+        )
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/integrations/SlackAlerting/budget_alert_types.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/integrations/SlackAlerting/budget_alert_types.py
@@ -0,0 +1,115 @@
+from abc import ABC, abstractmethod
+from typing import Literal
+
+from litellm.proxy._types import CallInfo
+
+
+class BaseBudgetAlertType(ABC):
+    """Base class for different budget alert types"""
+
+    @abstractmethod
+    def get_event_message(self) -> str:
+        """Return the event message for this alert type"""
+        pass
+
+    @abstractmethod
+    def get_id(self, user_info: CallInfo) -> str:
+        """Return the ID to use for caching/tracking this alert"""
+        pass
+
+
+class ProxyBudgetAlert(BaseBudgetAlertType):
+    def get_event_message(self) -> str:
+        return "Proxy Budget: "
+
+    def get_id(self, user_info: CallInfo) -> str:
+        return "default_id"
+
+
+class SoftBudgetAlert(BaseBudgetAlertType):
+    def get_event_message(self) -> str:
+        return "Soft Budget Crossed: "
+
+    def get_id(self, user_info: CallInfo) -> str:
+        return user_info.token or "default_id"
+
+
+class UserBudgetAlert(BaseBudgetAlertType):
+    def get_event_message(self) -> str:
+        return "User Budget: "
+
+    def get_id(self, user_info: CallInfo) -> str:
+        return user_info.user_id or "default_id"
+
+
+class TeamBudgetAlert(BaseBudgetAlertType):
+    def get_event_message(self) -> str:
+        return "Team Budget: "
+
+    def get_id(self, user_info: CallInfo) -> str:
+        return user_info.team_id or "default_id"
+
+
+class OrganizationBudgetAlert(BaseBudgetAlertType):
+    def get_event_message(self) -> str:
+        return "Organization Budget: "
+
+    def get_id(self, user_info: CallInfo) -> str:
+        return user_info.organization_id or "default_id"
+
+
+class TokenBudgetAlert(BaseBudgetAlertType):
+    def get_event_message(self) -> str:
+        return "Key Budget: "
+
+    def get_id(self, user_info: CallInfo) -> str:
+        return user_info.token or "default_id"
+
+
+class ProjectedLimitExceededAlert(BaseBudgetAlertType):
+    def get_event_message(self) -> str:
+        return "Key Budget: Projected Limit Exceeded"
+
+    def get_id(self, user_info: CallInfo) -> str:
+        return user_info.token or "default_id"
+
+
+class ProjectBudgetAlert(BaseBudgetAlertType):
+    def get_event_message(self) -> str:
+        return "Project Budget: "
+
+    def get_id(self, user_info: CallInfo) -> str:
+        return user_info.token or "default_id"
+
+
+def get_budget_alert_type(
+    type: Literal[
+        "token_budget",
+        "user_budget",
+        "soft_budget",
+        "max_budget_alert",
+        "team_budget",
+        "organization_budget",
+        "proxy_budget",
+        "projected_limit_exceeded",
+        "project_budget",
+    ],
+) -> BaseBudgetAlertType:
+    """Factory function to get the appropriate budget alert type class"""
+
+    alert_types = {
+        "proxy_budget": ProxyBudgetAlert(),
+        "soft_budget": SoftBudgetAlert(),
+        "user_budget": UserBudgetAlert(),
+        "max_budget_alert": TokenBudgetAlert(),
+        "team_budget": TeamBudgetAlert(),
+        "organization_budget": OrganizationBudgetAlert(),
+        "token_budget": TokenBudgetAlert(),
+        "projected_limit_exceeded": ProjectedLimitExceededAlert(),
+        "project_budget": ProjectBudgetAlert(),
+    }
+
+    if type in alert_types:
+        return alert_types[type]
+    else:
+        return ProxyBudgetAlert()
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/integrations/SlackAlerting/hanging_request_check.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/integrations/SlackAlerting/hanging_request_check.py
@@ -0,0 +1,177 @@
+"""
+Class to check for LLM API hanging requests
+
+
+Notes:
+- Do not create tasks that sleep, that can saturate the event loop
+- Do not store large objects (eg. messages in memory) that can increase RAM usage
+"""
+
+import asyncio
+from typing import TYPE_CHECKING, Any, Optional
+
+import litellm
+from litellm._logging import verbose_proxy_logger
+from litellm.caching.in_memory_cache import InMemoryCache
+from litellm.litellm_core_utils.core_helpers import get_litellm_metadata_from_kwargs
+from litellm.types.integrations.slack_alerting import (
+    HANGING_ALERT_BUFFER_TIME_SECONDS,
+    MAX_OLDEST_HANGING_REQUESTS_TO_CHECK,
+    HangingRequestData,
+)
+
+if TYPE_CHECKING:
+    from litellm.integrations.SlackAlerting.slack_alerting import SlackAlerting
+else:
+    SlackAlerting = Any
+
+
+class AlertingHangingRequestCheck:
+    """
+    Class to safely handle checking hanging requests alerts
+    """
+
+    def __init__(
+        self,
+        slack_alerting_object: SlackAlerting,
+    ):
+        self.slack_alerting_object = slack_alerting_object
+        self.hanging_request_cache = InMemoryCache(
+            default_ttl=int(
+                self.slack_alerting_object.alerting_threshold
+                + HANGING_ALERT_BUFFER_TIME_SECONDS
+            ),
+        )
+
+    async def add_request_to_hanging_request_check(
+        self,
+        request_data: Optional[dict] = None,
+    ):
+        """
+        Add a request to the hanging request cache. This is the list of request_ids that gets periodicall checked for hanging requests
+        """
+        if request_data is None:
+            return
+
+        request_metadata = get_litellm_metadata_from_kwargs(kwargs=request_data)
+        model = request_data.get("model", "")
+        api_base: Optional[str] = None
+
+        if request_data.get("deployment", None) is not None and isinstance(
+            request_data["deployment"], dict
+        ):
+            api_base = litellm.get_api_base(
+                model=model,
+                optional_params=request_data["deployment"].get("litellm_params", {}),
+            )
+
+        hanging_request_data = HangingRequestData(
+            request_id=request_data.get("litellm_call_id", ""),
+            model=model,
+            api_base=api_base,
+            key_alias=request_metadata.get("user_api_key_alias", ""),
+            team_alias=request_metadata.get("user_api_key_team_alias", ""),
+        )
+
+        await self.hanging_request_cache.async_set_cache(
+            key=hanging_request_data.request_id,
+            value=hanging_request_data,
+            ttl=int(
+                self.slack_alerting_object.alerting_threshold
+                + HANGING_ALERT_BUFFER_TIME_SECONDS
+            ),
+        )
+        return
+
+    async def send_alerts_for_hanging_requests(self):
+        """
+        Send alerts for hanging requests
+        """
+        from litellm.proxy.proxy_server import proxy_logging_obj
+
+        #########################################################
+        # Find all requests that have been hanging for more than the alerting threshold
+        # Get the last 50 oldest items in the cache and check if they have completed
+        #########################################################
+        # check if request_id is in internal usage cache
+        if proxy_logging_obj.internal_usage_cache is None:
+            return
+
+        hanging_requests = await self.hanging_request_cache.async_get_oldest_n_keys(
+            n=MAX_OLDEST_HANGING_REQUESTS_TO_CHECK,
+        )
+
+        for request_id in hanging_requests:
+            hanging_request_data: Optional[
+                HangingRequestData
+            ] = await self.hanging_request_cache.async_get_cache(
+                key=request_id,
+            )
+
+            if hanging_request_data is None:
+                continue
+
+            request_status = (
+                await proxy_logging_obj.internal_usage_cache.async_get_cache(
+                    key="request_status:{}".format(hanging_request_data.request_id),
+                    litellm_parent_otel_span=None,
+                    local_only=True,
+                )
+            )
+            # this means the request status was either success or fail
+            # and is not hanging
+            if request_status is not None:
+                # clear this request from hanging request cache since the request was either success or failed
+                self.hanging_request_cache._remove_key(
+                    key=request_id,
+                )
+                continue
+
+            ################
+            # Send the Alert on Slack
+            ################
+            await self.send_hanging_request_alert(
+                hanging_request_data=hanging_request_data
+            )
+
+        return
+
+    async def check_for_hanging_requests(
+        self,
+    ):
+        """
+        Background task that checks all request ids in self.hanging_request_cache to check if they have completed
+
+        Runs every alerting_threshold/2 seconds to check for hanging requests
+        """
+        while True:
+            verbose_proxy_logger.debug("Checking for hanging requests....")
+            await self.send_alerts_for_hanging_requests()
+            await asyncio.sleep(self.slack_alerting_object.alerting_threshold / 2)
+
+    async def send_hanging_request_alert(
+        self,
+        hanging_request_data: HangingRequestData,
+    ):
+        """
+        Send a hanging request alert
+        """
+        from litellm.integrations.SlackAlerting.slack_alerting import AlertType
+
+        ################
+        # Send the Alert on Slack
+        ################
+        request_info = f"""Request Model: `{hanging_request_data.model}`
+API Base: `{hanging_request_data.api_base}`
+Key Alias: `{hanging_request_data.key_alias}`
+Team Alias: `{hanging_request_data.team_alias}`"""
+
+        alerting_message = f"`Requests are hanging - {self.slack_alerting_object.alerting_threshold}s+ request time`"
+        await self.slack_alerting_object.send_alert(
+            message=alerting_message + "\n" + request_info,
+            level="Medium",
+            alert_type=AlertType.llm_requests_hanging,
+            alerting_metadata=hanging_request_data.alerting_metadata or {},
+            request_model=hanging_request_data.model,
+            api_base=hanging_request_data.api_base,
+        )
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/integrations/SlackAlerting/slack_alerting.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/integrations/SlackAlerting/slack_alerting.py
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/integrations/SlackAlerting/utils.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/integrations/SlackAlerting/utils.py
@@ -0,0 +1,99 @@
+"""
+Utils used for slack alerting
+"""
+
+import asyncio
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
+
+import litellm
+from litellm.proxy._types import AlertType
+from litellm.secret_managers.main import get_secret
+
+if TYPE_CHECKING:
+    from litellm.litellm_core_utils.litellm_logging import Logging as _Logging
+
+    Logging = _Logging
+else:
+    Logging = Any
+
+
+def process_slack_alerting_variables(
+    alert_to_webhook_url: Optional[Dict[AlertType, Union[List[str], str]]]
+) -> Optional[Dict[AlertType, Union[List[str], str]]]:
+    """
+    process alert_to_webhook_url
+    - check if any urls are set as os.environ/SLACK_WEBHOOK_URL_1 read env var and set the correct value
+    """
+    if alert_to_webhook_url is None:
+        return None
+
+    for alert_type, webhook_urls in alert_to_webhook_url.items():
+        if isinstance(webhook_urls, list):
+            _webhook_values: List[str] = []
+            for webhook_url in webhook_urls:
+                if "os.environ/" in webhook_url:
+                    _env_value = get_secret(secret_name=webhook_url)
+                    if not isinstance(_env_value, str):
+                        raise ValueError(
+                            f"Invalid webhook url value for: {webhook_url}. Got type={type(_env_value)}"
+                        )
+                    _webhook_values.append(_env_value)
+                else:
+                    _webhook_values.append(webhook_url)
+
+            alert_to_webhook_url[alert_type] = _webhook_values
+        else:
+            _webhook_value_str: str = webhook_urls
+            if "os.environ/" in webhook_urls:
+                _env_value = get_secret(secret_name=webhook_urls)
+                if not isinstance(_env_value, str):
+                    raise ValueError(
+                        f"Invalid webhook url value for: {webhook_urls}. Got type={type(_env_value)}"
+                    )
+                _webhook_value_str = _env_value
+            else:
+                _webhook_value_str = webhook_urls
+
+            alert_to_webhook_url[alert_type] = _webhook_value_str
+
+    return alert_to_webhook_url
+
+
+async def _add_langfuse_trace_id_to_alert(
+    request_data: Optional[dict] = None,
+) -> Optional[str]:
+    """
+    Returns langfuse trace url
+
+    - check:
+    -> existing_trace_id
+    -> trace_id
+    -> litellm_call_id
+    """
+    if "langfuse" not in litellm.logging_callback_manager._get_all_callbacks():
+        return None
+    #########################################################
+    # Only run if langfuse is added as a callback
+    #########################################################
+
+    if (
+        request_data is not None
+        and request_data.get("litellm_logging_obj", None) is not None
+    ):
+        trace_id: Optional[str] = None
+        litellm_logging_obj: Logging = request_data["litellm_logging_obj"]
+
+        for _ in range(3):
+            trace_id = litellm_logging_obj._get_trace_id(service_name="langfuse")
+            if trace_id is not None:
+                break
+            await asyncio.sleep(3)  # wait 3s before retrying for trace id
+        #########################################################
+        langfuse_object = litellm_logging_obj._get_callback_object(
+            service_name="langfuse"
+        )
+        if langfuse_object is not None:
+            base_url = langfuse_object.Langfuse.base_url
+            return f"{base_url}/trace/{trace_id}"
+
+    return None