chore: initial public snapshot for github upload
This commit is contained in:
@@ -0,0 +1,16 @@
|
||||
"""
|
||||
Azure Anthropic provider - supports Claude models via Azure Foundry
|
||||
"""
|
||||
from .handler import AzureAnthropicChatCompletion
|
||||
from .transformation import AzureAnthropicConfig
|
||||
|
||||
try:
|
||||
from .messages_transformation import AzureAnthropicMessagesConfig
|
||||
|
||||
__all__ = [
|
||||
"AzureAnthropicChatCompletion",
|
||||
"AzureAnthropicConfig",
|
||||
"AzureAnthropicMessagesConfig",
|
||||
]
|
||||
except ImportError:
|
||||
__all__ = ["AzureAnthropicChatCompletion", "AzureAnthropicConfig"]
|
||||
@@ -0,0 +1,19 @@
|
||||
"""
|
||||
Azure AI Anthropic CountTokens API implementation.
|
||||
"""
|
||||
|
||||
from litellm.llms.azure_ai.anthropic.count_tokens.handler import (
|
||||
AzureAIAnthropicCountTokensHandler,
|
||||
)
|
||||
from litellm.llms.azure_ai.anthropic.count_tokens.token_counter import (
|
||||
AzureAIAnthropicTokenCounter,
|
||||
)
|
||||
from litellm.llms.azure_ai.anthropic.count_tokens.transformation import (
|
||||
AzureAIAnthropicCountTokensConfig,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"AzureAIAnthropicCountTokensHandler",
|
||||
"AzureAIAnthropicCountTokensConfig",
|
||||
"AzureAIAnthropicTokenCounter",
|
||||
]
|
||||
@@ -0,0 +1,133 @@
|
||||
"""
|
||||
Azure AI Anthropic CountTokens API handler.
|
||||
|
||||
Uses httpx for HTTP requests with Azure authentication.
|
||||
"""
|
||||
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
|
||||
import httpx
|
||||
|
||||
import litellm
|
||||
from litellm._logging import verbose_logger
|
||||
from litellm.llms.anthropic.common_utils import AnthropicError
|
||||
from litellm.llms.azure_ai.anthropic.count_tokens.transformation import (
|
||||
AzureAIAnthropicCountTokensConfig,
|
||||
)
|
||||
from litellm.llms.custom_httpx.http_handler import get_async_httpx_client
|
||||
|
||||
|
||||
class AzureAIAnthropicCountTokensHandler(AzureAIAnthropicCountTokensConfig):
|
||||
"""
|
||||
Handler for Azure AI Anthropic CountTokens API requests.
|
||||
|
||||
Uses httpx for HTTP requests with Azure authentication.
|
||||
"""
|
||||
|
||||
async def handle_count_tokens_request(
|
||||
self,
|
||||
model: str,
|
||||
messages: List[Dict[str, Any]],
|
||||
api_key: str,
|
||||
api_base: str,
|
||||
litellm_params: Optional[Dict[str, Any]] = None,
|
||||
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
||||
tools: Optional[List[Dict[str, Any]]] = None,
|
||||
system: Optional[Any] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Handle a CountTokens request using httpx with Azure authentication.
|
||||
|
||||
Args:
|
||||
model: The model identifier (e.g., "claude-3-5-sonnet")
|
||||
messages: The messages to count tokens for
|
||||
api_key: The Azure AI API key
|
||||
api_base: The Azure AI API base URL
|
||||
litellm_params: Optional LiteLLM parameters
|
||||
timeout: Optional timeout for the request (defaults to litellm.request_timeout)
|
||||
|
||||
Returns:
|
||||
Dictionary containing token count response
|
||||
|
||||
Raises:
|
||||
AnthropicError: If the API request fails
|
||||
"""
|
||||
try:
|
||||
# Validate the request
|
||||
self.validate_request(model, messages)
|
||||
|
||||
verbose_logger.debug(
|
||||
f"Processing Azure AI Anthropic CountTokens request for model: {model}"
|
||||
)
|
||||
|
||||
# Transform request to Anthropic format
|
||||
request_body = self.transform_request_to_count_tokens(
|
||||
model=model,
|
||||
messages=messages,
|
||||
tools=tools,
|
||||
system=system,
|
||||
)
|
||||
|
||||
verbose_logger.debug(f"Transformed request: {request_body}")
|
||||
|
||||
# Get endpoint URL
|
||||
endpoint_url = self.get_count_tokens_endpoint(api_base)
|
||||
|
||||
verbose_logger.debug(f"Making request to: {endpoint_url}")
|
||||
|
||||
# Get required headers with Azure authentication
|
||||
headers = self.get_required_headers(
|
||||
api_key=api_key,
|
||||
litellm_params=litellm_params,
|
||||
)
|
||||
|
||||
# Use LiteLLM's async httpx client
|
||||
async_client = get_async_httpx_client(
|
||||
llm_provider=litellm.LlmProviders.AZURE_AI
|
||||
)
|
||||
|
||||
# Use provided timeout or fall back to litellm.request_timeout
|
||||
request_timeout = (
|
||||
timeout if timeout is not None else litellm.request_timeout
|
||||
)
|
||||
|
||||
response = await async_client.post(
|
||||
endpoint_url,
|
||||
headers=headers,
|
||||
json=request_body,
|
||||
timeout=request_timeout,
|
||||
)
|
||||
|
||||
verbose_logger.debug(f"Response status: {response.status_code}")
|
||||
|
||||
if response.status_code != 200:
|
||||
error_text = response.text
|
||||
verbose_logger.error(f"Azure AI Anthropic API error: {error_text}")
|
||||
raise AnthropicError(
|
||||
status_code=response.status_code,
|
||||
message=error_text,
|
||||
)
|
||||
|
||||
azure_response = response.json()
|
||||
|
||||
verbose_logger.debug(f"Azure AI Anthropic response: {azure_response}")
|
||||
|
||||
# Return Anthropic-compatible response directly - no transformation needed
|
||||
return azure_response
|
||||
|
||||
except AnthropicError:
|
||||
# Re-raise Anthropic exceptions as-is
|
||||
raise
|
||||
except httpx.HTTPStatusError as e:
|
||||
# HTTP errors - preserve the actual status code
|
||||
verbose_logger.error(f"HTTP error in CountTokens handler: {str(e)}")
|
||||
raise AnthropicError(
|
||||
status_code=e.response.status_code,
|
||||
message=e.response.text,
|
||||
)
|
||||
except Exception as e:
|
||||
verbose_logger.error(f"Error in CountTokens handler: {str(e)}")
|
||||
raise AnthropicError(
|
||||
status_code=500,
|
||||
message=f"CountTokens processing error: {str(e)}",
|
||||
)
|
||||
@@ -0,0 +1,123 @@
|
||||
"""
|
||||
Azure AI Anthropic Token Counter implementation using the CountTokens API.
|
||||
"""
|
||||
|
||||
import os
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from litellm._logging import verbose_logger
|
||||
from litellm.llms.azure_ai.anthropic.count_tokens.handler import (
|
||||
AzureAIAnthropicCountTokensHandler,
|
||||
)
|
||||
from litellm.llms.base_llm.base_utils import BaseTokenCounter
|
||||
from litellm.types.utils import LlmProviders, TokenCountResponse
|
||||
|
||||
# Global handler instance - reuse across all token counting requests
|
||||
azure_ai_anthropic_count_tokens_handler = AzureAIAnthropicCountTokensHandler()
|
||||
|
||||
|
||||
class AzureAIAnthropicTokenCounter(BaseTokenCounter):
|
||||
"""Token counter implementation for Azure AI Anthropic provider using the CountTokens API."""
|
||||
|
||||
def should_use_token_counting_api(
|
||||
self,
|
||||
custom_llm_provider: Optional[str] = None,
|
||||
) -> bool:
|
||||
return custom_llm_provider == LlmProviders.AZURE_AI.value
|
||||
|
||||
async def count_tokens(
|
||||
self,
|
||||
model_to_use: str,
|
||||
messages: Optional[List[Dict[str, Any]]],
|
||||
contents: Optional[List[Dict[str, Any]]],
|
||||
deployment: Optional[Dict[str, Any]] = None,
|
||||
request_model: str = "",
|
||||
tools: Optional[List[Dict[str, Any]]] = None,
|
||||
system: Optional[Any] = None,
|
||||
) -> Optional[TokenCountResponse]:
|
||||
"""
|
||||
Count tokens using Azure AI Anthropic's CountTokens API.
|
||||
|
||||
Args:
|
||||
model_to_use: The model identifier
|
||||
messages: The messages to count tokens for
|
||||
contents: Alternative content format (not used for Anthropic)
|
||||
deployment: Deployment configuration containing litellm_params
|
||||
request_model: The original request model name
|
||||
|
||||
Returns:
|
||||
TokenCountResponse with token count, or None if counting fails
|
||||
"""
|
||||
from litellm.llms.anthropic.common_utils import AnthropicError
|
||||
|
||||
if not messages:
|
||||
return None
|
||||
|
||||
deployment = deployment or {}
|
||||
litellm_params = deployment.get("litellm_params", {})
|
||||
|
||||
# Get Azure AI API key from deployment config or environment
|
||||
api_key = litellm_params.get("api_key")
|
||||
if not api_key:
|
||||
api_key = os.getenv("AZURE_AI_API_KEY")
|
||||
|
||||
# Get API base from deployment config or environment
|
||||
api_base = litellm_params.get("api_base")
|
||||
if not api_base:
|
||||
api_base = os.getenv("AZURE_AI_API_BASE")
|
||||
|
||||
if not api_key:
|
||||
verbose_logger.warning("No Azure AI API key found for token counting")
|
||||
return None
|
||||
|
||||
if not api_base:
|
||||
verbose_logger.warning("No Azure AI API base found for token counting")
|
||||
return None
|
||||
|
||||
try:
|
||||
result = await azure_ai_anthropic_count_tokens_handler.handle_count_tokens_request(
|
||||
model=model_to_use,
|
||||
messages=messages,
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
litellm_params=litellm_params,
|
||||
tools=tools,
|
||||
system=system,
|
||||
)
|
||||
|
||||
if result is not None:
|
||||
return TokenCountResponse(
|
||||
total_tokens=result.get("input_tokens", 0),
|
||||
request_model=request_model,
|
||||
model_used=model_to_use,
|
||||
tokenizer_type="azure_ai_anthropic_api",
|
||||
original_response=result,
|
||||
)
|
||||
except AnthropicError as e:
|
||||
verbose_logger.warning(
|
||||
f"Azure AI Anthropic CountTokens API error: status={e.status_code}, message={e.message}"
|
||||
)
|
||||
return TokenCountResponse(
|
||||
total_tokens=0,
|
||||
request_model=request_model,
|
||||
model_used=model_to_use,
|
||||
tokenizer_type="azure_ai_anthropic_api",
|
||||
error=True,
|
||||
error_message=e.message,
|
||||
status_code=e.status_code,
|
||||
)
|
||||
except Exception as e:
|
||||
verbose_logger.warning(
|
||||
f"Error calling Azure AI Anthropic CountTokens API: {e}"
|
||||
)
|
||||
return TokenCountResponse(
|
||||
total_tokens=0,
|
||||
request_model=request_model,
|
||||
model_used=model_to_use,
|
||||
tokenizer_type="azure_ai_anthropic_api",
|
||||
error=True,
|
||||
error_message=str(e),
|
||||
status_code=500,
|
||||
)
|
||||
|
||||
return None
|
||||
@@ -0,0 +1,90 @@
|
||||
"""
|
||||
Azure AI Anthropic CountTokens API transformation logic.
|
||||
|
||||
Extends the base Anthropic CountTokens transformation with Azure authentication.
|
||||
"""
|
||||
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from litellm.constants import ANTHROPIC_TOKEN_COUNTING_BETA_VERSION
|
||||
from litellm.llms.anthropic.count_tokens.transformation import (
|
||||
AnthropicCountTokensConfig,
|
||||
)
|
||||
from litellm.llms.azure.common_utils import BaseAzureLLM
|
||||
from litellm.types.router import GenericLiteLLMParams
|
||||
|
||||
|
||||
class AzureAIAnthropicCountTokensConfig(AnthropicCountTokensConfig):
|
||||
"""
|
||||
Configuration and transformation logic for Azure AI Anthropic CountTokens API.
|
||||
|
||||
Extends AnthropicCountTokensConfig with Azure authentication.
|
||||
Azure AI Anthropic uses the same endpoint format but with Azure auth headers.
|
||||
"""
|
||||
|
||||
def get_required_headers(
|
||||
self,
|
||||
api_key: str,
|
||||
litellm_params: Optional[Dict[str, Any]] = None,
|
||||
) -> Dict[str, str]:
|
||||
"""
|
||||
Get the required headers for the Azure AI Anthropic CountTokens API.
|
||||
|
||||
Azure AI Anthropic uses Anthropic's native API format, which requires the
|
||||
x-api-key header for authentication (in addition to Azure's api-key header).
|
||||
|
||||
Args:
|
||||
api_key: The Azure AI API key
|
||||
litellm_params: Optional LiteLLM parameters for additional auth config
|
||||
|
||||
Returns:
|
||||
Dictionary of required headers with both x-api-key and Azure authentication
|
||||
"""
|
||||
# Start with base headers including x-api-key for Anthropic API compatibility
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"anthropic-version": "2023-06-01",
|
||||
"anthropic-beta": ANTHROPIC_TOKEN_COUNTING_BETA_VERSION,
|
||||
"x-api-key": api_key, # Azure AI Anthropic requires this header
|
||||
}
|
||||
|
||||
# Also set up Azure auth headers for flexibility
|
||||
litellm_params = litellm_params or {}
|
||||
if "api_key" not in litellm_params:
|
||||
litellm_params["api_key"] = api_key
|
||||
|
||||
litellm_params_obj = GenericLiteLLMParams(**litellm_params)
|
||||
|
||||
# Get Azure auth headers (api-key or Authorization)
|
||||
azure_headers = BaseAzureLLM._base_validate_azure_environment(
|
||||
headers={}, litellm_params=litellm_params_obj
|
||||
)
|
||||
|
||||
# Merge Azure auth headers
|
||||
headers.update(azure_headers)
|
||||
|
||||
return headers
|
||||
|
||||
def get_count_tokens_endpoint(self, api_base: str) -> str:
|
||||
"""
|
||||
Get the Azure AI Anthropic CountTokens API endpoint.
|
||||
|
||||
Args:
|
||||
api_base: The Azure AI API base URL
|
||||
(e.g., https://my-resource.services.ai.azure.com or
|
||||
https://my-resource.services.ai.azure.com/anthropic)
|
||||
|
||||
Returns:
|
||||
The endpoint URL for the CountTokens API
|
||||
"""
|
||||
# Azure AI Anthropic endpoint format:
|
||||
# https://<resource>.services.ai.azure.com/anthropic/v1/messages/count_tokens
|
||||
api_base = api_base.rstrip("/")
|
||||
|
||||
# Ensure the URL has /anthropic path
|
||||
if not api_base.endswith("/anthropic"):
|
||||
if "/anthropic" not in api_base:
|
||||
api_base = f"{api_base}/anthropic"
|
||||
|
||||
# Add the count_tokens path
|
||||
return f"{api_base}/v1/messages/count_tokens"
|
||||
@@ -0,0 +1,226 @@
|
||||
"""
|
||||
Azure Anthropic handler - reuses AnthropicChatCompletion logic with Azure authentication
|
||||
"""
|
||||
import copy
|
||||
import json
|
||||
from typing import TYPE_CHECKING, Callable, Union
|
||||
|
||||
import httpx
|
||||
|
||||
from litellm.llms.anthropic.chat.handler import AnthropicChatCompletion
|
||||
from litellm.llms.custom_httpx.http_handler import (
|
||||
AsyncHTTPHandler,
|
||||
HTTPHandler,
|
||||
)
|
||||
from litellm.types.utils import ModelResponse
|
||||
from litellm.utils import CustomStreamWrapper
|
||||
|
||||
from .transformation import AzureAnthropicConfig
|
||||
|
||||
if TYPE_CHECKING:
|
||||
pass
|
||||
|
||||
|
||||
class AzureAnthropicChatCompletion(AnthropicChatCompletion):
|
||||
"""
|
||||
Azure Anthropic chat completion handler.
|
||||
Reuses all Anthropic logic but with Azure authentication.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
|
||||
def completion(
|
||||
self,
|
||||
model: str,
|
||||
messages: list,
|
||||
api_base: str,
|
||||
custom_llm_provider: str,
|
||||
custom_prompt_dict: dict,
|
||||
model_response: ModelResponse,
|
||||
print_verbose: Callable,
|
||||
encoding,
|
||||
api_key,
|
||||
logging_obj,
|
||||
optional_params: dict,
|
||||
timeout: Union[float, httpx.Timeout],
|
||||
litellm_params: dict,
|
||||
acompletion=None,
|
||||
logger_fn=None,
|
||||
headers={},
|
||||
client=None,
|
||||
):
|
||||
"""
|
||||
Completion method that uses Azure authentication instead of Anthropic's x-api-key.
|
||||
All other logic is the same as AnthropicChatCompletion.
|
||||
"""
|
||||
|
||||
optional_params = copy.deepcopy(optional_params)
|
||||
stream = optional_params.pop("stream", None)
|
||||
json_mode: bool = optional_params.pop("json_mode", False)
|
||||
is_vertex_request: bool = optional_params.pop("is_vertex_request", False)
|
||||
_is_function_call = False
|
||||
messages = copy.deepcopy(messages)
|
||||
|
||||
# Use AzureAnthropicConfig for both azure_anthropic and azure_ai Claude models
|
||||
config = AzureAnthropicConfig()
|
||||
|
||||
headers = config.validate_environment(
|
||||
api_key=api_key,
|
||||
headers=headers,
|
||||
model=model,
|
||||
messages=messages,
|
||||
optional_params={**optional_params, "is_vertex_request": is_vertex_request},
|
||||
litellm_params=litellm_params,
|
||||
)
|
||||
|
||||
data = config.transform_request(
|
||||
model=model,
|
||||
messages=messages,
|
||||
optional_params=optional_params,
|
||||
litellm_params=litellm_params,
|
||||
headers=headers,
|
||||
)
|
||||
|
||||
## LOGGING
|
||||
logging_obj.pre_call(
|
||||
input=messages,
|
||||
api_key=api_key,
|
||||
additional_args={
|
||||
"complete_input_dict": data,
|
||||
"api_base": api_base,
|
||||
"headers": headers,
|
||||
},
|
||||
)
|
||||
print_verbose(f"_is_function_call: {_is_function_call}")
|
||||
if acompletion is True:
|
||||
if (
|
||||
stream is True
|
||||
): # if function call - fake the streaming (need complete blocks for output parsing in openai format)
|
||||
print_verbose("makes async azure anthropic streaming POST request")
|
||||
data["stream"] = stream
|
||||
return self.acompletion_stream_function(
|
||||
model=model,
|
||||
messages=messages,
|
||||
data=data,
|
||||
api_base=api_base,
|
||||
custom_prompt_dict=custom_prompt_dict,
|
||||
model_response=model_response,
|
||||
print_verbose=print_verbose,
|
||||
encoding=encoding,
|
||||
api_key=api_key,
|
||||
logging_obj=logging_obj,
|
||||
optional_params=optional_params,
|
||||
stream=stream,
|
||||
_is_function_call=_is_function_call,
|
||||
json_mode=json_mode,
|
||||
litellm_params=litellm_params,
|
||||
logger_fn=logger_fn,
|
||||
headers=headers,
|
||||
timeout=timeout,
|
||||
client=(
|
||||
client
|
||||
if client is not None and isinstance(client, AsyncHTTPHandler)
|
||||
else None
|
||||
),
|
||||
)
|
||||
else:
|
||||
return self.acompletion_function(
|
||||
model=model,
|
||||
messages=messages,
|
||||
data=data,
|
||||
api_base=api_base,
|
||||
custom_prompt_dict=custom_prompt_dict,
|
||||
model_response=model_response,
|
||||
print_verbose=print_verbose,
|
||||
encoding=encoding,
|
||||
api_key=api_key,
|
||||
provider_config=config,
|
||||
logging_obj=logging_obj,
|
||||
optional_params=optional_params,
|
||||
stream=stream,
|
||||
_is_function_call=_is_function_call,
|
||||
litellm_params=litellm_params,
|
||||
logger_fn=logger_fn,
|
||||
headers=headers,
|
||||
client=client,
|
||||
json_mode=json_mode,
|
||||
timeout=timeout,
|
||||
)
|
||||
else:
|
||||
## COMPLETION CALL
|
||||
if (
|
||||
stream is True
|
||||
): # if function call - fake the streaming (need complete blocks for output parsing in openai format)
|
||||
data["stream"] = stream
|
||||
# Import the make_sync_call from parent
|
||||
from litellm.llms.anthropic.chat.handler import make_sync_call
|
||||
|
||||
completion_stream, response_headers = make_sync_call(
|
||||
client=client,
|
||||
api_base=api_base,
|
||||
headers=headers, # type: ignore
|
||||
data=json.dumps(data),
|
||||
model=model,
|
||||
messages=messages,
|
||||
logging_obj=logging_obj,
|
||||
timeout=timeout,
|
||||
json_mode=json_mode,
|
||||
)
|
||||
from litellm.llms.anthropic.common_utils import (
|
||||
process_anthropic_headers,
|
||||
)
|
||||
|
||||
return CustomStreamWrapper(
|
||||
completion_stream=completion_stream,
|
||||
model=model,
|
||||
custom_llm_provider="azure_ai",
|
||||
logging_obj=logging_obj,
|
||||
_response_headers=process_anthropic_headers(response_headers),
|
||||
)
|
||||
|
||||
else:
|
||||
if client is None or not isinstance(client, HTTPHandler):
|
||||
from litellm.llms.custom_httpx.http_handler import _get_httpx_client
|
||||
|
||||
client = _get_httpx_client(params={"timeout": timeout})
|
||||
else:
|
||||
client = client
|
||||
|
||||
try:
|
||||
response = client.post(
|
||||
api_base,
|
||||
headers=headers,
|
||||
data=json.dumps(data),
|
||||
timeout=timeout,
|
||||
)
|
||||
except Exception as e:
|
||||
from litellm.llms.anthropic.common_utils import AnthropicError
|
||||
|
||||
status_code = getattr(e, "status_code", 500)
|
||||
error_headers = getattr(e, "headers", None)
|
||||
error_text = getattr(e, "text", str(e))
|
||||
error_response = getattr(e, "response", None)
|
||||
if error_headers is None and error_response:
|
||||
error_headers = getattr(error_response, "headers", None)
|
||||
if error_response and hasattr(error_response, "text"):
|
||||
error_text = getattr(error_response, "text", error_text)
|
||||
raise AnthropicError(
|
||||
message=error_text,
|
||||
status_code=status_code,
|
||||
headers=error_headers,
|
||||
)
|
||||
|
||||
return config.transform_response(
|
||||
model=model,
|
||||
raw_response=response,
|
||||
model_response=model_response,
|
||||
logging_obj=logging_obj,
|
||||
api_key=api_key,
|
||||
request_data=data,
|
||||
messages=messages,
|
||||
optional_params=optional_params,
|
||||
litellm_params=litellm_params,
|
||||
encoding=encoding,
|
||||
json_mode=json_mode,
|
||||
)
|
||||
@@ -0,0 +1,166 @@
|
||||
"""
|
||||
Azure Anthropic messages transformation config - extends AnthropicMessagesConfig with Azure authentication
|
||||
"""
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
|
||||
|
||||
from litellm.llms.anthropic.experimental_pass_through.messages.transformation import (
|
||||
AnthropicMessagesConfig,
|
||||
)
|
||||
from litellm.llms.azure.common_utils import BaseAzureLLM
|
||||
from litellm.types.router import GenericLiteLLMParams
|
||||
|
||||
if TYPE_CHECKING:
|
||||
pass
|
||||
|
||||
|
||||
class AzureAnthropicMessagesConfig(AnthropicMessagesConfig):
|
||||
"""
|
||||
Azure Anthropic messages configuration that extends AnthropicMessagesConfig.
|
||||
The only difference is authentication - Azure uses x-api-key header (not api-key)
|
||||
and Azure endpoint format.
|
||||
"""
|
||||
|
||||
def validate_anthropic_messages_environment(
|
||||
self,
|
||||
headers: dict,
|
||||
model: str,
|
||||
messages: List[Any],
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
) -> Tuple[dict, Optional[str]]:
|
||||
"""
|
||||
Validate environment and set up Azure authentication headers for /v1/messages endpoint.
|
||||
Azure Anthropic uses x-api-key header (not api-key).
|
||||
"""
|
||||
# Convert dict to GenericLiteLLMParams if needed
|
||||
if isinstance(litellm_params, dict):
|
||||
if api_key and "api_key" not in litellm_params:
|
||||
litellm_params = {**litellm_params, "api_key": api_key}
|
||||
litellm_params_obj = GenericLiteLLMParams(**litellm_params)
|
||||
else:
|
||||
litellm_params_obj = litellm_params or GenericLiteLLMParams()
|
||||
if api_key and not litellm_params_obj.api_key:
|
||||
litellm_params_obj.api_key = api_key
|
||||
|
||||
# Use Azure authentication logic
|
||||
headers = BaseAzureLLM._base_validate_azure_environment(
|
||||
headers=headers, litellm_params=litellm_params_obj
|
||||
)
|
||||
|
||||
# Azure Anthropic uses x-api-key header (not api-key)
|
||||
# Convert api-key to x-api-key if present
|
||||
if "api-key" in headers and "x-api-key" not in headers:
|
||||
headers["x-api-key"] = headers.pop("api-key")
|
||||
|
||||
# Set anthropic-version header
|
||||
if "anthropic-version" not in headers:
|
||||
headers["anthropic-version"] = "2023-06-01"
|
||||
|
||||
# Set content-type header
|
||||
if "content-type" not in headers:
|
||||
headers["content-type"] = "application/json"
|
||||
|
||||
headers = self._update_headers_with_anthropic_beta(
|
||||
headers=headers,
|
||||
optional_params=optional_params,
|
||||
)
|
||||
|
||||
return headers, api_base
|
||||
|
||||
def get_complete_url(
|
||||
self,
|
||||
api_base: Optional[str],
|
||||
api_key: Optional[str],
|
||||
model: str,
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
stream: Optional[bool] = None,
|
||||
) -> str:
|
||||
"""
|
||||
Get the complete URL for Azure Anthropic /v1/messages endpoint.
|
||||
Azure Foundry endpoint format: https://<resource-name>.services.ai.azure.com/anthropic/v1/messages
|
||||
"""
|
||||
from litellm.secret_managers.main import get_secret_str
|
||||
|
||||
api_base = api_base or get_secret_str("AZURE_API_BASE")
|
||||
if api_base is None:
|
||||
raise ValueError(
|
||||
"Missing Azure API Base - Please set `api_base` or `AZURE_API_BASE` environment variable. "
|
||||
"Expected format: https://<resource-name>.services.ai.azure.com/anthropic"
|
||||
)
|
||||
|
||||
# Ensure the URL ends with /v1/messages
|
||||
api_base = api_base.rstrip("/")
|
||||
if api_base.endswith("/v1/messages"):
|
||||
# Already correct
|
||||
pass
|
||||
elif api_base.endswith("/anthropic/v1/messages"):
|
||||
# Already correct
|
||||
pass
|
||||
else:
|
||||
# Check if /anthropic is already in the path
|
||||
if "/anthropic" in api_base:
|
||||
# /anthropic exists, ensure we end with /anthropic/v1/messages
|
||||
# Extract the base URL up to and including /anthropic
|
||||
parts = api_base.split("/anthropic", 1)
|
||||
api_base = parts[0] + "/anthropic"
|
||||
else:
|
||||
# /anthropic not in path, add it
|
||||
api_base = api_base + "/anthropic"
|
||||
# Add /v1/messages
|
||||
api_base = api_base + "/v1/messages"
|
||||
|
||||
return api_base
|
||||
|
||||
def _remove_scope_from_cache_control(
|
||||
self, anthropic_messages_request: Dict
|
||||
) -> None:
|
||||
"""
|
||||
Remove `scope` field from cache_control for Azure AI Foundry.
|
||||
|
||||
Azure AI Foundry's Anthropic endpoint does not support the `scope` field
|
||||
(e.g., "global" for cross-request caching). Only `type` and `ttl` are supported.
|
||||
|
||||
Processes both `system` and `messages` content blocks.
|
||||
"""
|
||||
|
||||
def _sanitize(cache_control: Any) -> None:
|
||||
if isinstance(cache_control, dict):
|
||||
cache_control.pop("scope", None)
|
||||
|
||||
def _process_content_list(content: list) -> None:
|
||||
for item in content:
|
||||
if isinstance(item, dict) and "cache_control" in item:
|
||||
_sanitize(item["cache_control"])
|
||||
|
||||
if "system" in anthropic_messages_request:
|
||||
system = anthropic_messages_request["system"]
|
||||
if isinstance(system, list):
|
||||
_process_content_list(system)
|
||||
|
||||
if "messages" in anthropic_messages_request:
|
||||
for message in anthropic_messages_request["messages"]:
|
||||
if isinstance(message, dict) and "content" in message:
|
||||
content = message["content"]
|
||||
if isinstance(content, list):
|
||||
_process_content_list(content)
|
||||
|
||||
def transform_anthropic_messages_request(
|
||||
self,
|
||||
model: str,
|
||||
messages: List[Dict],
|
||||
anthropic_messages_optional_request_params: Dict,
|
||||
litellm_params: GenericLiteLLMParams,
|
||||
headers: dict,
|
||||
) -> Dict:
|
||||
anthropic_messages_request = super().transform_anthropic_messages_request(
|
||||
model=model,
|
||||
messages=messages,
|
||||
anthropic_messages_optional_request_params=anthropic_messages_optional_request_params,
|
||||
litellm_params=litellm_params,
|
||||
headers=headers,
|
||||
)
|
||||
self._remove_scope_from_cache_control(anthropic_messages_request)
|
||||
return anthropic_messages_request
|
||||
@@ -0,0 +1,117 @@
|
||||
"""
|
||||
Azure Anthropic transformation config - extends AnthropicConfig with Azure authentication
|
||||
"""
|
||||
from typing import TYPE_CHECKING, Dict, List, Optional, Union
|
||||
from litellm.llms.anthropic.chat.transformation import AnthropicConfig
|
||||
from litellm.llms.azure.common_utils import BaseAzureLLM
|
||||
from litellm.types.llms.openai import AllMessageValues
|
||||
from litellm.types.router import GenericLiteLLMParams
|
||||
|
||||
if TYPE_CHECKING:
|
||||
pass
|
||||
|
||||
|
||||
class AzureAnthropicConfig(AnthropicConfig):
|
||||
"""
|
||||
Azure Anthropic configuration that extends AnthropicConfig.
|
||||
The only difference is authentication - Azure uses api-key header or Azure AD token
|
||||
instead of x-api-key header.
|
||||
"""
|
||||
|
||||
@property
|
||||
def custom_llm_provider(self) -> Optional[str]:
|
||||
return "azure_ai"
|
||||
|
||||
def validate_environment(
|
||||
self,
|
||||
headers: dict,
|
||||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: dict,
|
||||
litellm_params: Union[dict, GenericLiteLLMParams],
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
) -> Dict:
|
||||
"""
|
||||
Validate environment and set up Azure authentication headers.
|
||||
Azure supports:
|
||||
1. API key via 'api-key' header
|
||||
2. Azure AD token via 'Authorization: Bearer <token>' header
|
||||
"""
|
||||
# Convert dict to GenericLiteLLMParams if needed
|
||||
if isinstance(litellm_params, dict):
|
||||
# Ensure api_key is included if provided
|
||||
if api_key and "api_key" not in litellm_params:
|
||||
litellm_params = {**litellm_params, "api_key": api_key}
|
||||
litellm_params_obj = GenericLiteLLMParams(**litellm_params)
|
||||
else:
|
||||
litellm_params_obj = litellm_params or GenericLiteLLMParams()
|
||||
# Set api_key if provided and not already set
|
||||
if api_key and not litellm_params_obj.api_key:
|
||||
litellm_params_obj.api_key = api_key
|
||||
|
||||
# Use Azure authentication logic
|
||||
headers = BaseAzureLLM._base_validate_azure_environment(
|
||||
headers=headers, litellm_params=litellm_params_obj
|
||||
)
|
||||
|
||||
# Get tools and other anthropic-specific setup
|
||||
tools = optional_params.get("tools")
|
||||
prompt_caching_set = self.is_cache_control_set(messages=messages)
|
||||
computer_tool_used = self.is_computer_tool_used(tools=tools)
|
||||
mcp_server_used = self.is_mcp_server_used(
|
||||
mcp_servers=optional_params.get("mcp_servers")
|
||||
)
|
||||
pdf_used = self.is_pdf_used(messages=messages)
|
||||
file_id_used = self.is_file_id_used(messages=messages)
|
||||
user_anthropic_beta_headers = self._get_user_anthropic_beta_headers(
|
||||
anthropic_beta_header=headers.get("anthropic-beta")
|
||||
)
|
||||
|
||||
# Get anthropic headers (but we'll replace x-api-key with Azure auth)
|
||||
anthropic_headers = self.get_anthropic_headers(
|
||||
computer_tool_used=computer_tool_used,
|
||||
prompt_caching_set=prompt_caching_set,
|
||||
pdf_used=pdf_used,
|
||||
api_key=api_key or "", # Azure auth is already in headers
|
||||
file_id_used=file_id_used,
|
||||
is_vertex_request=optional_params.get("is_vertex_request", False),
|
||||
user_anthropic_beta_headers=user_anthropic_beta_headers,
|
||||
mcp_server_used=mcp_server_used,
|
||||
)
|
||||
# Merge headers - Azure auth (api-key or Authorization) takes precedence
|
||||
headers = {**anthropic_headers, **headers}
|
||||
|
||||
# Ensure anthropic-version header is set
|
||||
if "anthropic-version" not in headers:
|
||||
headers["anthropic-version"] = "2023-06-01"
|
||||
|
||||
return headers
|
||||
|
||||
def transform_request(
|
||||
self,
|
||||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
headers: dict,
|
||||
) -> dict:
|
||||
"""
|
||||
Transform request using parent AnthropicConfig, then remove unsupported params.
|
||||
Azure Anthropic doesn't support extra_body, max_retries, or stream_options parameters.
|
||||
"""
|
||||
# Call parent transform_request
|
||||
data = super().transform_request(
|
||||
model=model,
|
||||
messages=messages,
|
||||
optional_params=optional_params,
|
||||
litellm_params=litellm_params,
|
||||
headers=headers,
|
||||
)
|
||||
|
||||
# Remove unsupported parameters for Azure AI Anthropic
|
||||
data.pop("extra_body", None)
|
||||
data.pop("max_retries", None)
|
||||
data.pop("stream_options", None)
|
||||
|
||||
return data
|
||||
Reference in New Issue
Block a user