chore: initial public snapshot for github upload

2026-03-26 20:06:14 +08:00
commit 0e5ecd930e
3497 changed files with 1586236 additions and 0 deletions
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/llms/azure_ai/anthropic/init.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/llms/azure_ai/anthropic/init.py
@@ -0,0 +1,16 @@
+"""
+Azure Anthropic provider - supports Claude models via Azure Foundry
+"""
+from .handler import AzureAnthropicChatCompletion
+from .transformation import AzureAnthropicConfig
+
+try:
+    from .messages_transformation import AzureAnthropicMessagesConfig
+
+    __all__ = [
+        "AzureAnthropicChatCompletion",
+        "AzureAnthropicConfig",
+        "AzureAnthropicMessagesConfig",
+    ]
+except ImportError:
+    __all__ = ["AzureAnthropicChatCompletion", "AzureAnthropicConfig"]
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/llms/azure_ai/anthropic/count_tokens/init.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/llms/azure_ai/anthropic/count_tokens/init.py
@@ -0,0 +1,19 @@
+"""
+Azure AI Anthropic CountTokens API implementation.
+"""
+
+from litellm.llms.azure_ai.anthropic.count_tokens.handler import (
+    AzureAIAnthropicCountTokensHandler,
+)
+from litellm.llms.azure_ai.anthropic.count_tokens.token_counter import (
+    AzureAIAnthropicTokenCounter,
+)
+from litellm.llms.azure_ai.anthropic.count_tokens.transformation import (
+    AzureAIAnthropicCountTokensConfig,
+)
+
+__all__ = [
+    "AzureAIAnthropicCountTokensHandler",
+    "AzureAIAnthropicCountTokensConfig",
+    "AzureAIAnthropicTokenCounter",
+]
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/llms/azure_ai/anthropic/count_tokens/handler.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/llms/azure_ai/anthropic/count_tokens/handler.py
@@ -0,0 +1,133 @@
+"""
+Azure AI Anthropic CountTokens API handler.
+
+Uses httpx for HTTP requests with Azure authentication.
+"""
+
+from typing import Any, Dict, List, Optional, Union
+
+import httpx
+
+import litellm
+from litellm._logging import verbose_logger
+from litellm.llms.anthropic.common_utils import AnthropicError
+from litellm.llms.azure_ai.anthropic.count_tokens.transformation import (
+    AzureAIAnthropicCountTokensConfig,
+)
+from litellm.llms.custom_httpx.http_handler import get_async_httpx_client
+
+
+class AzureAIAnthropicCountTokensHandler(AzureAIAnthropicCountTokensConfig):
+    """
+    Handler for Azure AI Anthropic CountTokens API requests.
+
+    Uses httpx for HTTP requests with Azure authentication.
+    """
+
+    async def handle_count_tokens_request(
+        self,
+        model: str,
+        messages: List[Dict[str, Any]],
+        api_key: str,
+        api_base: str,
+        litellm_params: Optional[Dict[str, Any]] = None,
+        timeout: Optional[Union[float, httpx.Timeout]] = None,
+        tools: Optional[List[Dict[str, Any]]] = None,
+        system: Optional[Any] = None,
+    ) -> Dict[str, Any]:
+        """
+        Handle a CountTokens request using httpx with Azure authentication.
+
+        Args:
+            model: The model identifier (e.g., "claude-3-5-sonnet")
+            messages: The messages to count tokens for
+            api_key: The Azure AI API key
+            api_base: The Azure AI API base URL
+            litellm_params: Optional LiteLLM parameters
+            timeout: Optional timeout for the request (defaults to litellm.request_timeout)
+
+        Returns:
+            Dictionary containing token count response
+
+        Raises:
+            AnthropicError: If the API request fails
+        """
+        try:
+            # Validate the request
+            self.validate_request(model, messages)
+
+            verbose_logger.debug(
+                f"Processing Azure AI Anthropic CountTokens request for model: {model}"
+            )
+
+            # Transform request to Anthropic format
+            request_body = self.transform_request_to_count_tokens(
+                model=model,
+                messages=messages,
+                tools=tools,
+                system=system,
+            )
+
+            verbose_logger.debug(f"Transformed request: {request_body}")
+
+            # Get endpoint URL
+            endpoint_url = self.get_count_tokens_endpoint(api_base)
+
+            verbose_logger.debug(f"Making request to: {endpoint_url}")
+
+            # Get required headers with Azure authentication
+            headers = self.get_required_headers(
+                api_key=api_key,
+                litellm_params=litellm_params,
+            )
+
+            # Use LiteLLM's async httpx client
+            async_client = get_async_httpx_client(
+                llm_provider=litellm.LlmProviders.AZURE_AI
+            )
+
+            # Use provided timeout or fall back to litellm.request_timeout
+            request_timeout = (
+                timeout if timeout is not None else litellm.request_timeout
+            )
+
+            response = await async_client.post(
+                endpoint_url,
+                headers=headers,
+                json=request_body,
+                timeout=request_timeout,
+            )
+
+            verbose_logger.debug(f"Response status: {response.status_code}")
+
+            if response.status_code != 200:
+                error_text = response.text
+                verbose_logger.error(f"Azure AI Anthropic API error: {error_text}")
+                raise AnthropicError(
+                    status_code=response.status_code,
+                    message=error_text,
+                )
+
+            azure_response = response.json()
+
+            verbose_logger.debug(f"Azure AI Anthropic response: {azure_response}")
+
+            # Return Anthropic-compatible response directly - no transformation needed
+            return azure_response
+
+        except AnthropicError:
+            # Re-raise Anthropic exceptions as-is
+            raise
+        except httpx.HTTPStatusError as e:
+            # HTTP errors - preserve the actual status code
+            verbose_logger.error(f"HTTP error in CountTokens handler: {str(e)}")
+            raise AnthropicError(
+                status_code=e.response.status_code,
+                message=e.response.text,
+            )
+        except Exception as e:
+            verbose_logger.error(f"Error in CountTokens handler: {str(e)}")
+            raise AnthropicError(
+                status_code=500,
+                message=f"CountTokens processing error: {str(e)}",
+            )
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/llms/azure_ai/anthropic/count_tokens/token_counter.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/llms/azure_ai/anthropic/count_tokens/token_counter.py
@@ -0,0 +1,123 @@
+"""
+Azure AI Anthropic Token Counter implementation using the CountTokens API.
+"""
+
+import os
+from typing import Any, Dict, List, Optional
+
+from litellm._logging import verbose_logger
+from litellm.llms.azure_ai.anthropic.count_tokens.handler import (
+    AzureAIAnthropicCountTokensHandler,
+)
+from litellm.llms.base_llm.base_utils import BaseTokenCounter
+from litellm.types.utils import LlmProviders, TokenCountResponse
+
+# Global handler instance - reuse across all token counting requests
+azure_ai_anthropic_count_tokens_handler = AzureAIAnthropicCountTokensHandler()
+
+
+class AzureAIAnthropicTokenCounter(BaseTokenCounter):
+    """Token counter implementation for Azure AI Anthropic provider using the CountTokens API."""
+
+    def should_use_token_counting_api(
+        self,
+        custom_llm_provider: Optional[str] = None,
+    ) -> bool:
+        return custom_llm_provider == LlmProviders.AZURE_AI.value
+
+    async def count_tokens(
+        self,
+        model_to_use: str,
+        messages: Optional[List[Dict[str, Any]]],
+        contents: Optional[List[Dict[str, Any]]],
+        deployment: Optional[Dict[str, Any]] = None,
+        request_model: str = "",
+        tools: Optional[List[Dict[str, Any]]] = None,
+        system: Optional[Any] = None,
+    ) -> Optional[TokenCountResponse]:
+        """
+        Count tokens using Azure AI Anthropic's CountTokens API.
+
+        Args:
+            model_to_use: The model identifier
+            messages: The messages to count tokens for
+            contents: Alternative content format (not used for Anthropic)
+            deployment: Deployment configuration containing litellm_params
+            request_model: The original request model name
+
+        Returns:
+            TokenCountResponse with token count, or None if counting fails
+        """
+        from litellm.llms.anthropic.common_utils import AnthropicError
+
+        if not messages:
+            return None
+
+        deployment = deployment or {}
+        litellm_params = deployment.get("litellm_params", {})
+
+        # Get Azure AI API key from deployment config or environment
+        api_key = litellm_params.get("api_key")
+        if not api_key:
+            api_key = os.getenv("AZURE_AI_API_KEY")
+
+        # Get API base from deployment config or environment
+        api_base = litellm_params.get("api_base")
+        if not api_base:
+            api_base = os.getenv("AZURE_AI_API_BASE")
+
+        if not api_key:
+            verbose_logger.warning("No Azure AI API key found for token counting")
+            return None
+
+        if not api_base:
+            verbose_logger.warning("No Azure AI API base found for token counting")
+            return None
+
+        try:
+            result = await azure_ai_anthropic_count_tokens_handler.handle_count_tokens_request(
+                model=model_to_use,
+                messages=messages,
+                api_key=api_key,
+                api_base=api_base,
+                litellm_params=litellm_params,
+                tools=tools,
+                system=system,
+            )
+
+            if result is not None:
+                return TokenCountResponse(
+                    total_tokens=result.get("input_tokens", 0),
+                    request_model=request_model,
+                    model_used=model_to_use,
+                    tokenizer_type="azure_ai_anthropic_api",
+                    original_response=result,
+                )
+        except AnthropicError as e:
+            verbose_logger.warning(
+                f"Azure AI Anthropic CountTokens API error: status={e.status_code}, message={e.message}"
+            )
+            return TokenCountResponse(
+                total_tokens=0,
+                request_model=request_model,
+                model_used=model_to_use,
+                tokenizer_type="azure_ai_anthropic_api",
+                error=True,
+                error_message=e.message,
+                status_code=e.status_code,
+            )
+        except Exception as e:
+            verbose_logger.warning(
+                f"Error calling Azure AI Anthropic CountTokens API: {e}"
+            )
+            return TokenCountResponse(
+                total_tokens=0,
+                request_model=request_model,
+                model_used=model_to_use,
+                tokenizer_type="azure_ai_anthropic_api",
+                error=True,
+                error_message=str(e),
+                status_code=500,
+            )
+
+        return None
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/llms/azure_ai/anthropic/count_tokens/transformation.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/llms/azure_ai/anthropic/count_tokens/transformation.py
@@ -0,0 +1,90 @@
+"""
+Azure AI Anthropic CountTokens API transformation logic.
+
+Extends the base Anthropic CountTokens transformation with Azure authentication.
+"""
+
+from typing import Any, Dict, Optional
+
+from litellm.constants import ANTHROPIC_TOKEN_COUNTING_BETA_VERSION
+from litellm.llms.anthropic.count_tokens.transformation import (
+    AnthropicCountTokensConfig,
+)
+from litellm.llms.azure.common_utils import BaseAzureLLM
+from litellm.types.router import GenericLiteLLMParams
+
+
+class AzureAIAnthropicCountTokensConfig(AnthropicCountTokensConfig):
+    """
+    Configuration and transformation logic for Azure AI Anthropic CountTokens API.
+
+    Extends AnthropicCountTokensConfig with Azure authentication.
+    Azure AI Anthropic uses the same endpoint format but with Azure auth headers.
+    """
+
+    def get_required_headers(
+        self,
+        api_key: str,
+        litellm_params: Optional[Dict[str, Any]] = None,
+    ) -> Dict[str, str]:
+        """
+        Get the required headers for the Azure AI Anthropic CountTokens API.
+
+        Azure AI Anthropic uses Anthropic's native API format, which requires the
+        x-api-key header for authentication (in addition to Azure's api-key header).
+
+        Args:
+            api_key: The Azure AI API key
+            litellm_params: Optional LiteLLM parameters for additional auth config
+
+        Returns:
+            Dictionary of required headers with both x-api-key and Azure authentication
+        """
+        # Start with base headers including x-api-key for Anthropic API compatibility
+        headers = {
+            "Content-Type": "application/json",
+            "anthropic-version": "2023-06-01",
+            "anthropic-beta": ANTHROPIC_TOKEN_COUNTING_BETA_VERSION,
+            "x-api-key": api_key,  # Azure AI Anthropic requires this header
+        }
+
+        # Also set up Azure auth headers for flexibility
+        litellm_params = litellm_params or {}
+        if "api_key" not in litellm_params:
+            litellm_params["api_key"] = api_key
+
+        litellm_params_obj = GenericLiteLLMParams(**litellm_params)
+
+        # Get Azure auth headers (api-key or Authorization)
+        azure_headers = BaseAzureLLM._base_validate_azure_environment(
+            headers={}, litellm_params=litellm_params_obj
+        )
+
+        # Merge Azure auth headers
+        headers.update(azure_headers)
+
+        return headers
+
+    def get_count_tokens_endpoint(self, api_base: str) -> str:
+        """
+        Get the Azure AI Anthropic CountTokens API endpoint.
+
+        Args:
+            api_base: The Azure AI API base URL
+                      (e.g., https://my-resource.services.ai.azure.com or
+                       https://my-resource.services.ai.azure.com/anthropic)
+
+        Returns:
+            The endpoint URL for the CountTokens API
+        """
+        # Azure AI Anthropic endpoint format:
+        # https://<resource>.services.ai.azure.com/anthropic/v1/messages/count_tokens
+        api_base = api_base.rstrip("/")
+
+        # Ensure the URL has /anthropic path
+        if not api_base.endswith("/anthropic"):
+            if "/anthropic" not in api_base:
+                api_base = f"{api_base}/anthropic"
+
+        # Add the count_tokens path
+        return f"{api_base}/v1/messages/count_tokens"
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/llms/azure_ai/anthropic/handler.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/llms/azure_ai/anthropic/handler.py
@@ -0,0 +1,226 @@
+"""
+Azure Anthropic handler - reuses AnthropicChatCompletion logic with Azure authentication
+"""
+import copy
+import json
+from typing import TYPE_CHECKING, Callable, Union
+
+import httpx
+
+from litellm.llms.anthropic.chat.handler import AnthropicChatCompletion
+from litellm.llms.custom_httpx.http_handler import (
+    AsyncHTTPHandler,
+    HTTPHandler,
+)
+from litellm.types.utils import ModelResponse
+from litellm.utils import CustomStreamWrapper
+
+from .transformation import AzureAnthropicConfig
+
+if TYPE_CHECKING:
+    pass
+
+
+class AzureAnthropicChatCompletion(AnthropicChatCompletion):
+    """
+    Azure Anthropic chat completion handler.
+    Reuses all Anthropic logic but with Azure authentication.
+    """
+
+    def __init__(self) -> None:
+        super().__init__()
+
+    def completion(
+        self,
+        model: str,
+        messages: list,
+        api_base: str,
+        custom_llm_provider: str,
+        custom_prompt_dict: dict,
+        model_response: ModelResponse,
+        print_verbose: Callable,
+        encoding,
+        api_key,
+        logging_obj,
+        optional_params: dict,
+        timeout: Union[float, httpx.Timeout],
+        litellm_params: dict,
+        acompletion=None,
+        logger_fn=None,
+        headers={},
+        client=None,
+    ):
+        """
+        Completion method that uses Azure authentication instead of Anthropic's x-api-key.
+        All other logic is the same as AnthropicChatCompletion.
+        """
+
+        optional_params = copy.deepcopy(optional_params)
+        stream = optional_params.pop("stream", None)
+        json_mode: bool = optional_params.pop("json_mode", False)
+        is_vertex_request: bool = optional_params.pop("is_vertex_request", False)
+        _is_function_call = False
+        messages = copy.deepcopy(messages)
+
+        # Use AzureAnthropicConfig for both azure_anthropic and azure_ai Claude models
+        config = AzureAnthropicConfig()
+
+        headers = config.validate_environment(
+            api_key=api_key,
+            headers=headers,
+            model=model,
+            messages=messages,
+            optional_params={**optional_params, "is_vertex_request": is_vertex_request},
+            litellm_params=litellm_params,
+        )
+
+        data = config.transform_request(
+            model=model,
+            messages=messages,
+            optional_params=optional_params,
+            litellm_params=litellm_params,
+            headers=headers,
+        )
+
+        ## LOGGING
+        logging_obj.pre_call(
+            input=messages,
+            api_key=api_key,
+            additional_args={
+                "complete_input_dict": data,
+                "api_base": api_base,
+                "headers": headers,
+            },
+        )
+        print_verbose(f"_is_function_call: {_is_function_call}")
+        if acompletion is True:
+            if (
+                stream is True
+            ):  # if function call - fake the streaming (need complete blocks for output parsing in openai format)
+                print_verbose("makes async azure anthropic streaming POST request")
+                data["stream"] = stream
+                return self.acompletion_stream_function(
+                    model=model,
+                    messages=messages,
+                    data=data,
+                    api_base=api_base,
+                    custom_prompt_dict=custom_prompt_dict,
+                    model_response=model_response,
+                    print_verbose=print_verbose,
+                    encoding=encoding,
+                    api_key=api_key,
+                    logging_obj=logging_obj,
+                    optional_params=optional_params,
+                    stream=stream,
+                    _is_function_call=_is_function_call,
+                    json_mode=json_mode,
+                    litellm_params=litellm_params,
+                    logger_fn=logger_fn,
+                    headers=headers,
+                    timeout=timeout,
+                    client=(
+                        client
+                        if client is not None and isinstance(client, AsyncHTTPHandler)
+                        else None
+                    ),
+                )
+            else:
+                return self.acompletion_function(
+                    model=model,
+                    messages=messages,
+                    data=data,
+                    api_base=api_base,
+                    custom_prompt_dict=custom_prompt_dict,
+                    model_response=model_response,
+                    print_verbose=print_verbose,
+                    encoding=encoding,
+                    api_key=api_key,
+                    provider_config=config,
+                    logging_obj=logging_obj,
+                    optional_params=optional_params,
+                    stream=stream,
+                    _is_function_call=_is_function_call,
+                    litellm_params=litellm_params,
+                    logger_fn=logger_fn,
+                    headers=headers,
+                    client=client,
+                    json_mode=json_mode,
+                    timeout=timeout,
+                )
+        else:
+            ## COMPLETION CALL
+            if (
+                stream is True
+            ):  # if function call - fake the streaming (need complete blocks for output parsing in openai format)
+                data["stream"] = stream
+                # Import the make_sync_call from parent
+                from litellm.llms.anthropic.chat.handler import make_sync_call
+
+                completion_stream, response_headers = make_sync_call(
+                    client=client,
+                    api_base=api_base,
+                    headers=headers,  # type: ignore
+                    data=json.dumps(data),
+                    model=model,
+                    messages=messages,
+                    logging_obj=logging_obj,
+                    timeout=timeout,
+                    json_mode=json_mode,
+                )
+                from litellm.llms.anthropic.common_utils import (
+                    process_anthropic_headers,
+                )
+
+                return CustomStreamWrapper(
+                    completion_stream=completion_stream,
+                    model=model,
+                    custom_llm_provider="azure_ai",
+                    logging_obj=logging_obj,
+                    _response_headers=process_anthropic_headers(response_headers),
+                )
+
+            else:
+                if client is None or not isinstance(client, HTTPHandler):
+                    from litellm.llms.custom_httpx.http_handler import _get_httpx_client
+
+                    client = _get_httpx_client(params={"timeout": timeout})
+                else:
+                    client = client
+
+                try:
+                    response = client.post(
+                        api_base,
+                        headers=headers,
+                        data=json.dumps(data),
+                        timeout=timeout,
+                    )
+                except Exception as e:
+                    from litellm.llms.anthropic.common_utils import AnthropicError
+
+                    status_code = getattr(e, "status_code", 500)
+                    error_headers = getattr(e, "headers", None)
+                    error_text = getattr(e, "text", str(e))
+                    error_response = getattr(e, "response", None)
+                    if error_headers is None and error_response:
+                        error_headers = getattr(error_response, "headers", None)
+                    if error_response and hasattr(error_response, "text"):
+                        error_text = getattr(error_response, "text", error_text)
+                    raise AnthropicError(
+                        message=error_text,
+                        status_code=status_code,
+                        headers=error_headers,
+                    )
+
+        return config.transform_response(
+            model=model,
+            raw_response=response,
+            model_response=model_response,
+            logging_obj=logging_obj,
+            api_key=api_key,
+            request_data=data,
+            messages=messages,
+            optional_params=optional_params,
+            litellm_params=litellm_params,
+            encoding=encoding,
+            json_mode=json_mode,
+        )
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/llms/azure_ai/anthropic/messages_transformation.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/llms/azure_ai/anthropic/messages_transformation.py
@@ -0,0 +1,166 @@
+"""
+Azure Anthropic messages transformation config - extends AnthropicMessagesConfig with Azure authentication
+"""
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
+
+from litellm.llms.anthropic.experimental_pass_through.messages.transformation import (
+    AnthropicMessagesConfig,
+)
+from litellm.llms.azure.common_utils import BaseAzureLLM
+from litellm.types.router import GenericLiteLLMParams
+
+if TYPE_CHECKING:
+    pass
+
+
+class AzureAnthropicMessagesConfig(AnthropicMessagesConfig):
+    """
+    Azure Anthropic messages configuration that extends AnthropicMessagesConfig.
+    The only difference is authentication - Azure uses x-api-key header (not api-key)
+    and Azure endpoint format.
+    """
+
+    def validate_anthropic_messages_environment(
+        self,
+        headers: dict,
+        model: str,
+        messages: List[Any],
+        optional_params: dict,
+        litellm_params: dict,
+        api_key: Optional[str] = None,
+        api_base: Optional[str] = None,
+    ) -> Tuple[dict, Optional[str]]:
+        """
+        Validate environment and set up Azure authentication headers for /v1/messages endpoint.
+        Azure Anthropic uses x-api-key header (not api-key).
+        """
+        # Convert dict to GenericLiteLLMParams if needed
+        if isinstance(litellm_params, dict):
+            if api_key and "api_key" not in litellm_params:
+                litellm_params = {**litellm_params, "api_key": api_key}
+            litellm_params_obj = GenericLiteLLMParams(**litellm_params)
+        else:
+            litellm_params_obj = litellm_params or GenericLiteLLMParams()
+            if api_key and not litellm_params_obj.api_key:
+                litellm_params_obj.api_key = api_key
+
+        # Use Azure authentication logic
+        headers = BaseAzureLLM._base_validate_azure_environment(
+            headers=headers, litellm_params=litellm_params_obj
+        )
+
+        # Azure Anthropic uses x-api-key header (not api-key)
+        # Convert api-key to x-api-key if present
+        if "api-key" in headers and "x-api-key" not in headers:
+            headers["x-api-key"] = headers.pop("api-key")
+
+        # Set anthropic-version header
+        if "anthropic-version" not in headers:
+            headers["anthropic-version"] = "2023-06-01"
+
+        # Set content-type header
+        if "content-type" not in headers:
+            headers["content-type"] = "application/json"
+
+        headers = self._update_headers_with_anthropic_beta(
+            headers=headers,
+            optional_params=optional_params,
+        )
+
+        return headers, api_base
+
+    def get_complete_url(
+        self,
+        api_base: Optional[str],
+        api_key: Optional[str],
+        model: str,
+        optional_params: dict,
+        litellm_params: dict,
+        stream: Optional[bool] = None,
+    ) -> str:
+        """
+        Get the complete URL for Azure Anthropic /v1/messages endpoint.
+        Azure Foundry endpoint format: https://<resource-name>.services.ai.azure.com/anthropic/v1/messages
+        """
+        from litellm.secret_managers.main import get_secret_str
+
+        api_base = api_base or get_secret_str("AZURE_API_BASE")
+        if api_base is None:
+            raise ValueError(
+                "Missing Azure API Base - Please set `api_base` or `AZURE_API_BASE` environment variable. "
+                "Expected format: https://<resource-name>.services.ai.azure.com/anthropic"
+            )
+
+        # Ensure the URL ends with /v1/messages
+        api_base = api_base.rstrip("/")
+        if api_base.endswith("/v1/messages"):
+            # Already correct
+            pass
+        elif api_base.endswith("/anthropic/v1/messages"):
+            # Already correct
+            pass
+        else:
+            # Check if /anthropic is already in the path
+            if "/anthropic" in api_base:
+                # /anthropic exists, ensure we end with /anthropic/v1/messages
+                # Extract the base URL up to and including /anthropic
+                parts = api_base.split("/anthropic", 1)
+                api_base = parts[0] + "/anthropic"
+            else:
+                # /anthropic not in path, add it
+                api_base = api_base + "/anthropic"
+            # Add /v1/messages
+            api_base = api_base + "/v1/messages"
+
+        return api_base
+
+    def _remove_scope_from_cache_control(
+        self, anthropic_messages_request: Dict
+    ) -> None:
+        """
+        Remove `scope` field from cache_control for Azure AI Foundry.
+
+        Azure AI Foundry's Anthropic endpoint does not support the `scope` field
+        (e.g., "global" for cross-request caching). Only `type` and `ttl` are supported.
+
+        Processes both `system` and `messages` content blocks.
+        """
+
+        def _sanitize(cache_control: Any) -> None:
+            if isinstance(cache_control, dict):
+                cache_control.pop("scope", None)
+
+        def _process_content_list(content: list) -> None:
+            for item in content:
+                if isinstance(item, dict) and "cache_control" in item:
+                    _sanitize(item["cache_control"])
+
+        if "system" in anthropic_messages_request:
+            system = anthropic_messages_request["system"]
+            if isinstance(system, list):
+                _process_content_list(system)
+
+        if "messages" in anthropic_messages_request:
+            for message in anthropic_messages_request["messages"]:
+                if isinstance(message, dict) and "content" in message:
+                    content = message["content"]
+                    if isinstance(content, list):
+                        _process_content_list(content)
+
+    def transform_anthropic_messages_request(
+        self,
+        model: str,
+        messages: List[Dict],
+        anthropic_messages_optional_request_params: Dict,
+        litellm_params: GenericLiteLLMParams,
+        headers: dict,
+    ) -> Dict:
+        anthropic_messages_request = super().transform_anthropic_messages_request(
+            model=model,
+            messages=messages,
+            anthropic_messages_optional_request_params=anthropic_messages_optional_request_params,
+            litellm_params=litellm_params,
+            headers=headers,
+        )
+        self._remove_scope_from_cache_control(anthropic_messages_request)
+        return anthropic_messages_request
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/llms/azure_ai/anthropic/transformation.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/llms/azure_ai/anthropic/transformation.py
@@ -0,0 +1,117 @@
+"""
+Azure Anthropic transformation config - extends AnthropicConfig with Azure authentication
+"""
+from typing import TYPE_CHECKING, Dict, List, Optional, Union
+from litellm.llms.anthropic.chat.transformation import AnthropicConfig
+from litellm.llms.azure.common_utils import BaseAzureLLM
+from litellm.types.llms.openai import AllMessageValues
+from litellm.types.router import GenericLiteLLMParams
+
+if TYPE_CHECKING:
+    pass
+
+
+class AzureAnthropicConfig(AnthropicConfig):
+    """
+    Azure Anthropic configuration that extends AnthropicConfig.
+    The only difference is authentication - Azure uses api-key header or Azure AD token
+    instead of x-api-key header.
+    """
+
+    @property
+    def custom_llm_provider(self) -> Optional[str]:
+        return "azure_ai"
+
+    def validate_environment(
+        self,
+        headers: dict,
+        model: str,
+        messages: List[AllMessageValues],
+        optional_params: dict,
+        litellm_params: Union[dict, GenericLiteLLMParams],
+        api_key: Optional[str] = None,
+        api_base: Optional[str] = None,
+    ) -> Dict:
+        """
+        Validate environment and set up Azure authentication headers.
+        Azure supports:
+        1. API key via 'api-key' header
+        2. Azure AD token via 'Authorization: Bearer <token>' header
+        """
+        # Convert dict to GenericLiteLLMParams if needed
+        if isinstance(litellm_params, dict):
+            # Ensure api_key is included if provided
+            if api_key and "api_key" not in litellm_params:
+                litellm_params = {**litellm_params, "api_key": api_key}
+            litellm_params_obj = GenericLiteLLMParams(**litellm_params)
+        else:
+            litellm_params_obj = litellm_params or GenericLiteLLMParams()
+            # Set api_key if provided and not already set
+            if api_key and not litellm_params_obj.api_key:
+                litellm_params_obj.api_key = api_key
+
+        # Use Azure authentication logic
+        headers = BaseAzureLLM._base_validate_azure_environment(
+            headers=headers, litellm_params=litellm_params_obj
+        )
+
+        # Get tools and other anthropic-specific setup
+        tools = optional_params.get("tools")
+        prompt_caching_set = self.is_cache_control_set(messages=messages)
+        computer_tool_used = self.is_computer_tool_used(tools=tools)
+        mcp_server_used = self.is_mcp_server_used(
+            mcp_servers=optional_params.get("mcp_servers")
+        )
+        pdf_used = self.is_pdf_used(messages=messages)
+        file_id_used = self.is_file_id_used(messages=messages)
+        user_anthropic_beta_headers = self._get_user_anthropic_beta_headers(
+            anthropic_beta_header=headers.get("anthropic-beta")
+        )
+
+        # Get anthropic headers (but we'll replace x-api-key with Azure auth)
+        anthropic_headers = self.get_anthropic_headers(
+            computer_tool_used=computer_tool_used,
+            prompt_caching_set=prompt_caching_set,
+            pdf_used=pdf_used,
+            api_key=api_key or "",  # Azure auth is already in headers
+            file_id_used=file_id_used,
+            is_vertex_request=optional_params.get("is_vertex_request", False),
+            user_anthropic_beta_headers=user_anthropic_beta_headers,
+            mcp_server_used=mcp_server_used,
+        )
+        # Merge headers - Azure auth (api-key or Authorization) takes precedence
+        headers = {**anthropic_headers, **headers}
+
+        # Ensure anthropic-version header is set
+        if "anthropic-version" not in headers:
+            headers["anthropic-version"] = "2023-06-01"
+
+        return headers
+
+    def transform_request(
+        self,
+        model: str,
+        messages: List[AllMessageValues],
+        optional_params: dict,
+        litellm_params: dict,
+        headers: dict,
+    ) -> dict:
+        """
+        Transform request using parent AnthropicConfig, then remove unsupported params.
+        Azure Anthropic doesn't support extra_body, max_retries, or stream_options parameters.
+        """
+        # Call parent transform_request
+        data = super().transform_request(
+            model=model,
+            messages=messages,
+            optional_params=optional_params,
+            litellm_params=litellm_params,
+            headers=headers,
+        )
+
+        # Remove unsupported parameters for Azure AI Anthropic
+        data.pop("extra_body", None)
+        data.pop("max_retries", None)
+        data.pop("stream_options", None)
+
+        return data