1304 lines
47 KiB
Python
1304 lines
47 KiB
Python
import asyncio
|
||
import os
|
||
import ssl
|
||
import sys
|
||
import time
|
||
from typing import (
|
||
TYPE_CHECKING,
|
||
Any,
|
||
Callable,
|
||
Dict,
|
||
List,
|
||
Mapping,
|
||
Optional,
|
||
Tuple,
|
||
Union,
|
||
)
|
||
|
||
import certifi
|
||
import httpx
|
||
from aiohttp import ClientSession, TCPConnector
|
||
from httpx import USE_CLIENT_DEFAULT, AsyncHTTPTransport, HTTPTransport
|
||
from httpx._types import RequestFiles
|
||
|
||
import litellm
|
||
from litellm._logging import verbose_logger
|
||
from litellm.constants import (
|
||
_DEFAULT_TTL_FOR_HTTPX_CLIENTS,
|
||
AIOHTTP_CONNECTOR_LIMIT,
|
||
AIOHTTP_CONNECTOR_LIMIT_PER_HOST,
|
||
AIOHTTP_KEEPALIVE_TIMEOUT,
|
||
AIOHTTP_NEEDS_CLEANUP_CLOSED,
|
||
AIOHTTP_TTL_DNS_CACHE,
|
||
DEFAULT_SSL_CIPHERS,
|
||
)
|
||
from litellm.litellm_core_utils.logging_utils import track_llm_api_timing
|
||
from litellm.types.llms.custom_http import *
|
||
|
||
if TYPE_CHECKING:
|
||
from litellm import LlmProviders
|
||
from litellm.litellm_core_utils.litellm_logging import (
|
||
Logging as LiteLLMLoggingObject,
|
||
)
|
||
from litellm.llms.custom_httpx.aiohttp_transport import LiteLLMAiohttpTransport
|
||
else:
|
||
LlmProviders = Any
|
||
LiteLLMLoggingObject = Any
|
||
LiteLLMAiohttpTransport = Any
|
||
|
||
try:
|
||
from litellm._version import version
|
||
except Exception:
|
||
version = "0.0.0"
|
||
|
||
|
||
def get_default_headers() -> dict:
|
||
"""
|
||
Get default headers for HTTP requests.
|
||
|
||
- Default: `User-Agent: litellm/{version}`
|
||
- Override: set `LITELLM_USER_AGENT` to fully override the header value.
|
||
"""
|
||
user_agent = os.environ.get("LITELLM_USER_AGENT")
|
||
if user_agent is not None:
|
||
return {"User-Agent": user_agent}
|
||
|
||
return {"User-Agent": f"litellm/{version}"}
|
||
|
||
|
||
# Initialize headers (User-Agent)
|
||
headers = get_default_headers()
|
||
|
||
# https://www.python-httpx.org/advanced/timeouts
|
||
_DEFAULT_TIMEOUT = httpx.Timeout(timeout=5.0, connect=5.0)
|
||
|
||
|
||
def _prepare_request_data_and_content(
|
||
data: Optional[Union[dict, str, bytes]] = None,
|
||
content: Any = None,
|
||
) -> Tuple[Optional[Union[dict, Mapping]], Any]:
|
||
"""
|
||
Helper function to route data/content parameters correctly for httpx requests
|
||
|
||
This prevents httpx DeprecationWarnings that cause memory leaks.
|
||
|
||
Background:
|
||
- httpx shows a DeprecationWarning when you pass bytes/str to `data=`
|
||
- It wants you to use `content=` instead for bytes/str
|
||
- The warning itself leaks memory when triggered repeatedly
|
||
|
||
Solution:
|
||
- Move bytes/str from `data=` to `content=` before calling build_request
|
||
- Keep dicts in `data=` (that's still the correct parameter for dicts)
|
||
|
||
Args:
|
||
data: Request data (can be dict, str, or bytes)
|
||
content: Request content (raw bytes/str)
|
||
|
||
Returns:
|
||
Tuple of (request_data, request_content) properly routed for httpx
|
||
"""
|
||
request_data = None
|
||
request_content = content
|
||
|
||
if data is not None:
|
||
if isinstance(data, (bytes, str)):
|
||
# Bytes/strings belong in content= (only if not already provided)
|
||
if content is None:
|
||
request_content = data
|
||
else:
|
||
# dict/Mapping stays in data= parameter
|
||
request_data = data
|
||
|
||
return request_data, request_content
|
||
|
||
|
||
# Cache for SSL contexts to avoid creating duplicate contexts with the same configuration
|
||
# Key: tuple of (cafile, ssl_security_level, ssl_ecdh_curve)
|
||
# Value: ssl.SSLContext
|
||
_ssl_context_cache: Dict[
|
||
Tuple[Optional[str], Optional[str], Optional[str]], ssl.SSLContext
|
||
] = {}
|
||
|
||
|
||
def _create_ssl_context(
|
||
cafile: Optional[str],
|
||
ssl_security_level: Optional[str],
|
||
ssl_ecdh_curve: Optional[str],
|
||
) -> ssl.SSLContext:
|
||
"""
|
||
Create an SSL context with the given configuration.
|
||
This is separated from get_ssl_configuration to enable caching.
|
||
"""
|
||
custom_ssl_context = ssl.create_default_context(cafile=cafile)
|
||
|
||
# Optimize SSL handshake performance
|
||
# Set minimum TLS version to 1.2 for better performance
|
||
custom_ssl_context.minimum_version = ssl.TLSVersion.TLSv1_2
|
||
|
||
# Configure cipher suites for optimal performance
|
||
if ssl_security_level and isinstance(ssl_security_level, str):
|
||
# User provided custom cipher configuration (e.g., via SSL_SECURITY_LEVEL env var)
|
||
custom_ssl_context.set_ciphers(ssl_security_level)
|
||
else:
|
||
# Use optimized cipher list that strongly prefers fast ciphers
|
||
# but falls back to widely compatible ones
|
||
custom_ssl_context.set_ciphers(DEFAULT_SSL_CIPHERS)
|
||
|
||
# Configure ECDH curve for key exchange (e.g., to disable PQC and improve performance)
|
||
# Set SSL_ECDH_CURVE env var or litellm.ssl_ecdh_curve to 'X25519' to disable PQC
|
||
# Common valid curves: X25519, prime256v1, secp384r1, secp521r1
|
||
if ssl_ecdh_curve and isinstance(ssl_ecdh_curve, str):
|
||
try:
|
||
custom_ssl_context.set_ecdh_curve(ssl_ecdh_curve)
|
||
verbose_logger.debug(f"SSL ECDH curve set to: {ssl_ecdh_curve}")
|
||
except AttributeError:
|
||
verbose_logger.warning(
|
||
f"SSL ECDH curve configuration not supported. "
|
||
f"Python version: {sys.version.split()[0]}, OpenSSL version: {ssl.OPENSSL_VERSION}. "
|
||
f"Requested curve: {ssl_ecdh_curve}. Continuing with default curves."
|
||
)
|
||
except ValueError as e:
|
||
# Invalid curve name
|
||
verbose_logger.warning(
|
||
f"Invalid SSL ECDH curve name: '{ssl_ecdh_curve}'. {e}. "
|
||
f"Common valid curves: X25519, prime256v1, secp384r1, secp521r1. "
|
||
f"Continuing with default curves (including PQC)."
|
||
)
|
||
|
||
return custom_ssl_context
|
||
|
||
|
||
def get_ssl_verify(
|
||
ssl_verify: Optional[Union[bool, str]] = None,
|
||
) -> Union[bool, str]:
|
||
"""
|
||
Common utility to resolve the SSL verification setting.
|
||
Prioritizes:
|
||
1. Passed-in ssl_verify
|
||
2. os.environ["SSL_VERIFY"]
|
||
3. litellm.ssl_verify
|
||
4. os.environ["SSL_CERT_FILE"] (if ssl_verify is True)
|
||
|
||
Returns:
|
||
Union[bool, str]: The resolved SSL verification setting (bool or path to CA bundle)
|
||
"""
|
||
from litellm.secret_managers.main import str_to_bool
|
||
|
||
if ssl_verify is None:
|
||
ssl_verify = os.getenv("SSL_VERIFY", litellm.ssl_verify)
|
||
|
||
# Convert string "False"/"True" to boolean if applicable
|
||
if isinstance(ssl_verify, str):
|
||
# If it's a file path, return it directly
|
||
if os.path.exists(ssl_verify):
|
||
return ssl_verify
|
||
|
||
# Otherwise, check if it's a boolean string
|
||
ssl_verify_bool = str_to_bool(ssl_verify)
|
||
if ssl_verify_bool is not None:
|
||
ssl_verify = ssl_verify_bool
|
||
|
||
# If SSL verification is enabled, check for SSL_CERT_FILE override
|
||
if ssl_verify is True:
|
||
ssl_cert_file = os.getenv("SSL_CERT_FILE")
|
||
if ssl_cert_file and os.path.exists(ssl_cert_file):
|
||
return ssl_cert_file
|
||
|
||
return ssl_verify if ssl_verify is not None else True
|
||
|
||
|
||
def get_ssl_configuration(
|
||
ssl_verify: Optional[VerifyTypes] = None,
|
||
) -> Union[bool, str, ssl.SSLContext]:
|
||
"""
|
||
Unified SSL configuration function that handles ssl_context and ssl_verify logic.
|
||
|
||
SSL Configuration Priority:
|
||
1. If ssl_verify is provided -> is a SSL context use the custom SSL context
|
||
2. If ssl_verify is False -> disable SSL verification (ssl=False)
|
||
3. If ssl_verify is a string -> use it as a path to CA bundle file
|
||
4. If SSL_CERT_FILE environment variable is set and exists -> use it as CA bundle file
|
||
5. Else will use default SSL context with certifi CA bundle
|
||
|
||
If ssl_security_level is set, it will apply the security level to the SSL context.
|
||
|
||
SSL contexts are cached to avoid creating duplicate contexts with the same configuration,
|
||
which reduces memory allocation and improves performance.
|
||
|
||
Args:
|
||
ssl_verify: SSL verification setting. Can be:
|
||
- None: Use default from environment/litellm settings
|
||
- False: Disable SSL verification
|
||
- True: Enable SSL verification
|
||
- str: Path to CA bundle file
|
||
|
||
Returns:
|
||
Union[bool, str, ssl.SSLContext]: Appropriate SSL configuration
|
||
"""
|
||
if isinstance(ssl_verify, ssl.SSLContext):
|
||
# If ssl_verify is already an SSLContext, return it directly
|
||
return ssl_verify
|
||
|
||
# Get resolved ssl_verify
|
||
ssl_verify = get_ssl_verify(ssl_verify=ssl_verify)
|
||
|
||
ssl_security_level = os.getenv("SSL_SECURITY_LEVEL", litellm.ssl_security_level)
|
||
ssl_ecdh_curve = os.getenv("SSL_ECDH_CURVE", litellm.ssl_ecdh_curve)
|
||
|
||
cafile = None
|
||
if isinstance(ssl_verify, str) and os.path.exists(ssl_verify):
|
||
cafile = ssl_verify
|
||
if not cafile:
|
||
ssl_cert_file = os.getenv("SSL_CERT_FILE")
|
||
if ssl_cert_file and os.path.exists(ssl_cert_file):
|
||
cafile = ssl_cert_file
|
||
else:
|
||
cafile = certifi.where()
|
||
|
||
if ssl_verify is not False:
|
||
# Create cache key from configuration parameters
|
||
cache_key = (cafile, ssl_security_level, ssl_ecdh_curve)
|
||
|
||
# Check if we have a cached SSL context for this configuration
|
||
if cache_key not in _ssl_context_cache:
|
||
_ssl_context_cache[cache_key] = _create_ssl_context(
|
||
cafile=cafile,
|
||
ssl_security_level=ssl_security_level,
|
||
ssl_ecdh_curve=ssl_ecdh_curve,
|
||
)
|
||
|
||
# Return the cached SSL context
|
||
return _ssl_context_cache[cache_key]
|
||
|
||
return ssl_verify
|
||
|
||
|
||
_shared_realtime_ssl_context: Optional[Union[bool, str, ssl.SSLContext]] = None
|
||
|
||
|
||
def get_shared_realtime_ssl_context() -> Union[bool, str, ssl.SSLContext]:
|
||
"""
|
||
Lazily create the SSL context reused by realtime websocket clients so we avoid
|
||
import-order cycles during startup while keeping a single shared configuration.
|
||
"""
|
||
global _shared_realtime_ssl_context
|
||
if _shared_realtime_ssl_context is None:
|
||
_shared_realtime_ssl_context = get_ssl_configuration()
|
||
return _shared_realtime_ssl_context
|
||
|
||
|
||
def mask_sensitive_info(error_message):
|
||
# Find the start of the key parameter
|
||
if isinstance(error_message, str):
|
||
key_index = error_message.find("key=")
|
||
else:
|
||
return error_message
|
||
|
||
# If key is found
|
||
if key_index != -1:
|
||
# Find the end of the key parameter (next & or end of string)
|
||
next_param = error_message.find("&", key_index)
|
||
|
||
if next_param == -1:
|
||
# If no more parameters, mask until the end of the string
|
||
masked_message = error_message[: key_index + 4] + "[REDACTED_API_KEY]"
|
||
else:
|
||
# Replace the key with redacted value, keeping other parameters
|
||
masked_message = (
|
||
error_message[: key_index + 4]
|
||
+ "[REDACTED_API_KEY]"
|
||
+ error_message[next_param:]
|
||
)
|
||
|
||
return masked_message
|
||
|
||
return error_message
|
||
|
||
|
||
class MaskedHTTPStatusError(httpx.HTTPStatusError):
|
||
def __init__(
|
||
self, original_error, message: Optional[str] = None, text: Optional[str] = None
|
||
):
|
||
# Create a new error with the masked URL
|
||
masked_url = mask_sensitive_info(str(original_error.request.url))
|
||
# Create a new error that looks like the original, but with a masked URL
|
||
|
||
super().__init__(
|
||
message=original_error.message,
|
||
request=httpx.Request(
|
||
method=original_error.request.method,
|
||
url=masked_url,
|
||
headers=original_error.request.headers,
|
||
content=original_error.request.content,
|
||
),
|
||
response=httpx.Response(
|
||
status_code=original_error.response.status_code,
|
||
content=original_error.response.content,
|
||
headers=original_error.response.headers,
|
||
),
|
||
)
|
||
self.message = message
|
||
self.text = text
|
||
|
||
|
||
class AsyncHTTPHandler:
|
||
def __init__(
|
||
self,
|
||
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
||
event_hooks: Optional[Mapping[str, List[Callable[..., Any]]]] = None,
|
||
concurrent_limit=None, # Kept for backward compatibility, but ignored (no limits)
|
||
client_alias: Optional[str] = None, # name for client in logs
|
||
ssl_verify: Optional[VerifyTypes] = None,
|
||
shared_session: Optional["ClientSession"] = None,
|
||
):
|
||
self.timeout = timeout
|
||
self.event_hooks = event_hooks
|
||
self.client = self.create_client(
|
||
timeout=timeout,
|
||
event_hooks=event_hooks,
|
||
ssl_verify=ssl_verify,
|
||
shared_session=shared_session,
|
||
)
|
||
self.client_alias = client_alias
|
||
|
||
def create_client(
|
||
self,
|
||
timeout: Optional[Union[float, httpx.Timeout]],
|
||
event_hooks: Optional[Mapping[str, List[Callable[..., Any]]]],
|
||
ssl_verify: Optional[VerifyTypes] = None,
|
||
shared_session: Optional["ClientSession"] = None,
|
||
) -> httpx.AsyncClient:
|
||
# Get unified SSL configuration
|
||
ssl_config = get_ssl_configuration(ssl_verify)
|
||
|
||
# An SSL certificate used by the requested host to authenticate the client.
|
||
# /path/to/client.pem
|
||
cert = os.getenv("SSL_CERTIFICATE", litellm.ssl_certificate)
|
||
|
||
if timeout is None:
|
||
timeout = _DEFAULT_TIMEOUT
|
||
# Create a client with a connection pool
|
||
|
||
transport = AsyncHTTPHandler._create_async_transport(
|
||
ssl_context=ssl_config if isinstance(ssl_config, ssl.SSLContext) else None,
|
||
ssl_verify=ssl_config if isinstance(ssl_config, bool) else None,
|
||
shared_session=shared_session,
|
||
)
|
||
|
||
# Get default headers (User-Agent, overridable via LITELLM_USER_AGENT)
|
||
default_headers = get_default_headers()
|
||
|
||
return httpx.AsyncClient(
|
||
transport=transport,
|
||
event_hooks=event_hooks,
|
||
timeout=timeout,
|
||
verify=ssl_config,
|
||
cert=cert,
|
||
headers=default_headers,
|
||
follow_redirects=True,
|
||
)
|
||
|
||
async def close(self):
|
||
# Close the client when you're done with it
|
||
await self.client.aclose()
|
||
|
||
async def __aenter__(self):
|
||
return self.client
|
||
|
||
async def __aexit__(self):
|
||
# close the client when exiting
|
||
await self.client.aclose()
|
||
|
||
async def get(
|
||
self,
|
||
url: str,
|
||
params: Optional[dict] = None,
|
||
headers: Optional[dict] = None,
|
||
follow_redirects: Optional[bool] = None,
|
||
):
|
||
# Set follow_redirects to UseClientDefault if None
|
||
_follow_redirects = (
|
||
follow_redirects if follow_redirects is not None else USE_CLIENT_DEFAULT
|
||
)
|
||
|
||
params = params or {}
|
||
params.update(HTTPHandler.extract_query_params(url))
|
||
|
||
response = await self.client.get(
|
||
url, params=params, headers=headers, follow_redirects=_follow_redirects # type: ignore
|
||
)
|
||
return response
|
||
|
||
@track_llm_api_timing()
|
||
async def post(
|
||
self,
|
||
url: str,
|
||
data: Optional[Union[dict, str, bytes]] = None, # type: ignore
|
||
json: Optional[dict] = None,
|
||
params: Optional[dict] = None,
|
||
headers: Optional[dict] = None,
|
||
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
||
stream: bool = False,
|
||
logging_obj: Optional[LiteLLMLoggingObject] = None,
|
||
files: Optional[RequestFiles] = None,
|
||
content: Any = None,
|
||
):
|
||
start_time = time.time()
|
||
try:
|
||
if timeout is None:
|
||
timeout = self.timeout
|
||
|
||
# Prepare data/content parameters to prevent httpx DeprecationWarning (memory leak fix)
|
||
request_data, request_content = _prepare_request_data_and_content(
|
||
data, content
|
||
)
|
||
|
||
req = self.client.build_request(
|
||
"POST",
|
||
url,
|
||
data=request_data,
|
||
json=json,
|
||
params=params,
|
||
headers=headers,
|
||
timeout=timeout,
|
||
files=files,
|
||
content=request_content,
|
||
)
|
||
response = await self.client.send(req, stream=stream)
|
||
response.raise_for_status()
|
||
return response
|
||
except (httpx.RemoteProtocolError, httpx.ConnectError):
|
||
# Retry the request with a new session if there is a connection error
|
||
new_client = self.create_client(
|
||
timeout=timeout, event_hooks=self.event_hooks
|
||
)
|
||
try:
|
||
return await self.single_connection_post_request(
|
||
url=url,
|
||
client=new_client,
|
||
data=data,
|
||
json=json,
|
||
params=params,
|
||
headers=headers,
|
||
stream=stream,
|
||
)
|
||
finally:
|
||
await new_client.aclose()
|
||
except httpx.TimeoutException as e:
|
||
end_time = time.time()
|
||
time_delta = round(end_time - start_time, 3)
|
||
headers = {}
|
||
error_response = getattr(e, "response", None)
|
||
if error_response is not None:
|
||
for key, value in error_response.headers.items():
|
||
headers["response_headers-{}".format(key)] = value
|
||
|
||
raise litellm.Timeout(
|
||
message=f"Connection timed out. Timeout passed={timeout}, time taken={time_delta} seconds",
|
||
model="default-model-name",
|
||
llm_provider="litellm-httpx-handler",
|
||
headers=headers,
|
||
)
|
||
except httpx.HTTPStatusError as e:
|
||
if stream is True:
|
||
setattr(e, "message", await e.response.aread())
|
||
setattr(e, "text", await e.response.aread())
|
||
else:
|
||
setattr(e, "message", mask_sensitive_info(e.response.text))
|
||
setattr(e, "text", mask_sensitive_info(e.response.text))
|
||
|
||
setattr(e, "status_code", e.response.status_code)
|
||
|
||
raise e
|
||
except Exception as e:
|
||
raise e
|
||
|
||
async def put(
|
||
self,
|
||
url: str,
|
||
data: Optional[Union[dict, str, bytes]] = None, # type: ignore
|
||
json: Optional[dict] = None,
|
||
params: Optional[dict] = None,
|
||
headers: Optional[dict] = None,
|
||
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
||
stream: bool = False,
|
||
content: Any = None,
|
||
):
|
||
try:
|
||
if timeout is None:
|
||
timeout = self.timeout
|
||
|
||
# Prepare data/content parameters to prevent httpx DeprecationWarning (memory leak fix)
|
||
request_data, request_content = _prepare_request_data_and_content(
|
||
data, content
|
||
)
|
||
|
||
req = self.client.build_request(
|
||
"PUT", url, data=request_data, json=json, params=params, headers=headers, timeout=timeout, content=request_content # type: ignore
|
||
)
|
||
response = await self.client.send(req)
|
||
response.raise_for_status()
|
||
return response
|
||
except (httpx.RemoteProtocolError, httpx.ConnectError):
|
||
# Retry the request with a new session if there is a connection error
|
||
new_client = self.create_client(
|
||
timeout=timeout, event_hooks=self.event_hooks
|
||
)
|
||
try:
|
||
return await self.single_connection_post_request(
|
||
url=url,
|
||
client=new_client,
|
||
data=data,
|
||
json=json,
|
||
params=params,
|
||
headers=headers,
|
||
stream=stream,
|
||
)
|
||
finally:
|
||
await new_client.aclose()
|
||
except httpx.TimeoutException as e:
|
||
headers = {}
|
||
error_response = getattr(e, "response", None)
|
||
if error_response is not None:
|
||
for key, value in error_response.headers.items():
|
||
headers["response_headers-{}".format(key)] = value
|
||
|
||
raise litellm.Timeout(
|
||
message=f"Connection timed out after {timeout} seconds.",
|
||
model="default-model-name",
|
||
llm_provider="litellm-httpx-handler",
|
||
headers=headers,
|
||
)
|
||
except httpx.HTTPStatusError as e:
|
||
setattr(e, "status_code", e.response.status_code)
|
||
if stream is True:
|
||
setattr(e, "message", await e.response.aread())
|
||
else:
|
||
setattr(e, "message", e.response.text)
|
||
raise e
|
||
except Exception as e:
|
||
raise e
|
||
|
||
async def patch(
|
||
self,
|
||
url: str,
|
||
data: Optional[Union[dict, str, bytes]] = None, # type: ignore
|
||
json: Optional[dict] = None,
|
||
params: Optional[dict] = None,
|
||
headers: Optional[dict] = None,
|
||
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
||
stream: bool = False,
|
||
content: Any = None,
|
||
):
|
||
try:
|
||
if timeout is None:
|
||
timeout = self.timeout
|
||
|
||
# Prepare data/content parameters to prevent httpx DeprecationWarning (memory leak fix)
|
||
request_data, request_content = _prepare_request_data_and_content(
|
||
data, content
|
||
)
|
||
|
||
req = self.client.build_request(
|
||
"PATCH", url, data=request_data, json=json, params=params, headers=headers, timeout=timeout, content=request_content # type: ignore
|
||
)
|
||
response = await self.client.send(req)
|
||
response.raise_for_status()
|
||
return response
|
||
except (httpx.RemoteProtocolError, httpx.ConnectError):
|
||
# Retry the request with a new session if there is a connection error
|
||
new_client = self.create_client(
|
||
timeout=timeout, event_hooks=self.event_hooks
|
||
)
|
||
try:
|
||
return await self.single_connection_post_request(
|
||
url=url,
|
||
client=new_client,
|
||
data=data,
|
||
json=json,
|
||
params=params,
|
||
headers=headers,
|
||
stream=stream,
|
||
)
|
||
finally:
|
||
await new_client.aclose()
|
||
except httpx.TimeoutException as e:
|
||
headers = {}
|
||
error_response = getattr(e, "response", None)
|
||
if error_response is not None:
|
||
for key, value in error_response.headers.items():
|
||
headers["response_headers-{}".format(key)] = value
|
||
|
||
raise litellm.Timeout(
|
||
message=f"Connection timed out after {timeout} seconds.",
|
||
model="default-model-name",
|
||
llm_provider="litellm-httpx-handler",
|
||
headers=headers,
|
||
)
|
||
except httpx.HTTPStatusError as e:
|
||
setattr(e, "status_code", e.response.status_code)
|
||
if stream is True:
|
||
setattr(e, "message", await e.response.aread())
|
||
else:
|
||
setattr(e, "message", e.response.text)
|
||
raise e
|
||
except Exception as e:
|
||
raise e
|
||
|
||
async def delete(
|
||
self,
|
||
url: str,
|
||
data: Optional[Union[dict, str, bytes]] = None, # type: ignore
|
||
json: Optional[dict] = None,
|
||
params: Optional[dict] = None,
|
||
headers: Optional[dict] = None,
|
||
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
||
stream: bool = False,
|
||
content: Any = None,
|
||
):
|
||
try:
|
||
if timeout is None:
|
||
timeout = self.timeout
|
||
|
||
# Prepare data/content parameters to prevent httpx DeprecationWarning (memory leak fix)
|
||
request_data, request_content = _prepare_request_data_and_content(
|
||
data, content
|
||
)
|
||
|
||
req = self.client.build_request(
|
||
"DELETE", url, data=request_data, json=json, params=params, headers=headers, timeout=timeout, content=request_content # type: ignore
|
||
)
|
||
response = await self.client.send(req, stream=stream)
|
||
response.raise_for_status()
|
||
return response
|
||
except (httpx.RemoteProtocolError, httpx.ConnectError):
|
||
# Retry the request with a new session if there is a connection error
|
||
new_client = self.create_client(
|
||
timeout=timeout, event_hooks=self.event_hooks
|
||
)
|
||
try:
|
||
return await self.single_connection_post_request(
|
||
url=url,
|
||
client=new_client,
|
||
data=data,
|
||
json=json,
|
||
params=params,
|
||
headers=headers,
|
||
stream=stream,
|
||
)
|
||
finally:
|
||
await new_client.aclose()
|
||
except httpx.HTTPStatusError as e:
|
||
setattr(e, "status_code", e.response.status_code)
|
||
if stream is True:
|
||
setattr(e, "message", await e.response.aread())
|
||
else:
|
||
setattr(e, "message", e.response.text)
|
||
raise e
|
||
except Exception as e:
|
||
raise e
|
||
|
||
async def single_connection_post_request(
|
||
self,
|
||
url: str,
|
||
client: httpx.AsyncClient,
|
||
data: Optional[Union[dict, str, bytes]] = None, # type: ignore
|
||
json: Optional[dict] = None,
|
||
params: Optional[dict] = None,
|
||
headers: Optional[dict] = None,
|
||
stream: bool = False,
|
||
content: Any = None,
|
||
):
|
||
"""
|
||
Making POST request for a single connection client.
|
||
|
||
Used for retrying connection client errors.
|
||
"""
|
||
# Prepare data/content parameters to prevent httpx DeprecationWarning (memory leak fix)
|
||
request_data, request_content = _prepare_request_data_and_content(data, content)
|
||
|
||
req = client.build_request(
|
||
"POST", url, data=request_data, json=json, params=params, headers=headers, content=request_content # type: ignore
|
||
)
|
||
response = await client.send(req, stream=stream)
|
||
response.raise_for_status()
|
||
return response
|
||
|
||
def __del__(self) -> None:
|
||
try:
|
||
asyncio.get_running_loop().create_task(self.close())
|
||
except Exception:
|
||
pass
|
||
|
||
@staticmethod
|
||
def _create_async_transport(
|
||
ssl_context: Optional[ssl.SSLContext] = None,
|
||
ssl_verify: Optional[bool] = None,
|
||
shared_session: Optional["ClientSession"] = None,
|
||
) -> Optional[Union[LiteLLMAiohttpTransport, AsyncHTTPTransport]]:
|
||
"""
|
||
- Creates a transport for httpx.AsyncClient
|
||
- if litellm.force_ipv4 is True, it will return AsyncHTTPTransport with local_address="0.0.0.0"
|
||
- [Default] It will return AiohttpTransport
|
||
- Users can opt out of using AiohttpTransport by setting litellm.use_aiohttp_transport to False
|
||
|
||
|
||
Notes on this handler:
|
||
- Why AiohttpTransport?
|
||
- By default, we use AiohttpTransport since it offers much higher throughput and lower latency than httpx.
|
||
|
||
- Why force ipv4?
|
||
- Some users have seen httpx ConnectionError when using ipv6 - forcing ipv4 resolves the issue for them
|
||
"""
|
||
#########################################################
|
||
# AIOHTTP TRANSPORT is off by default
|
||
#########################################################
|
||
if AsyncHTTPHandler._should_use_aiohttp_transport():
|
||
return AsyncHTTPHandler._create_aiohttp_transport(
|
||
ssl_context=ssl_context,
|
||
ssl_verify=ssl_verify,
|
||
shared_session=shared_session,
|
||
)
|
||
|
||
#########################################################
|
||
# HTTPX TRANSPORT is used when aiohttp is not installed
|
||
#########################################################
|
||
return AsyncHTTPHandler._create_httpx_transport()
|
||
|
||
@staticmethod
|
||
def _should_use_aiohttp_transport() -> bool:
|
||
"""
|
||
AiohttpTransport is the default transport for litellm.
|
||
|
||
Httpx can be used by the following
|
||
- litellm.disable_aiohttp_transport = True
|
||
- os.getenv("DISABLE_AIOHTTP_TRANSPORT") = "True"
|
||
"""
|
||
import os
|
||
|
||
from litellm.secret_managers.main import str_to_bool
|
||
|
||
#########################################################
|
||
# Check if user disabled aiohttp transport
|
||
########################################################
|
||
if (
|
||
litellm.disable_aiohttp_transport is True
|
||
or str_to_bool(os.getenv("DISABLE_AIOHTTP_TRANSPORT", "False")) is True
|
||
):
|
||
return False
|
||
|
||
#########################################################
|
||
# Default: Use AiohttpTransport
|
||
########################################################
|
||
verbose_logger.debug("Using AiohttpTransport...")
|
||
return True
|
||
|
||
@staticmethod
|
||
def _get_ssl_connector_kwargs(
|
||
ssl_verify: Optional[bool] = None,
|
||
ssl_context: Optional[ssl.SSLContext] = None,
|
||
) -> Dict[str, Any]:
|
||
"""
|
||
Helper method to get SSL connector initialization arguments for aiohttp TCPConnector.
|
||
|
||
SSL Configuration Priority:
|
||
1. If ssl_context is provided -> use the custom SSL context
|
||
2. If ssl_verify is False -> disable SSL verification (ssl=False)
|
||
|
||
Returns:
|
||
Dict with appropriate SSL configuration for TCPConnector
|
||
"""
|
||
connector_kwargs: Dict[str, Any] = {
|
||
"local_addr": ("0.0.0.0", 0) if litellm.force_ipv4 else None,
|
||
}
|
||
|
||
if ssl_context is not None:
|
||
# Priority 1: Use the provided custom SSL context
|
||
connector_kwargs["ssl"] = ssl_context
|
||
elif ssl_verify is False:
|
||
# Priority 2: Explicitly disable SSL verification
|
||
connector_kwargs["ssl"] = False
|
||
|
||
return connector_kwargs
|
||
|
||
@staticmethod
|
||
def _create_aiohttp_transport(
|
||
ssl_verify: Optional[bool] = None,
|
||
ssl_context: Optional[ssl.SSLContext] = None,
|
||
shared_session: Optional["ClientSession"] = None,
|
||
) -> LiteLLMAiohttpTransport:
|
||
"""
|
||
Creates an AiohttpTransport with RequestNotRead error handling
|
||
|
||
Note: aiohttp TCPConnector ssl parameter accepts:
|
||
- SSLContext: custom SSL context
|
||
- False: disable SSL verification
|
||
"""
|
||
from litellm.llms.custom_httpx.aiohttp_transport import LiteLLMAiohttpTransport
|
||
from litellm.secret_managers.main import str_to_bool
|
||
|
||
connector_kwargs = AsyncHTTPHandler._get_ssl_connector_kwargs(
|
||
ssl_verify=ssl_verify, ssl_context=ssl_context
|
||
)
|
||
#########################################################
|
||
# Check if user enabled aiohttp trust env
|
||
# use for HTTP_PROXY, HTTPS_PROXY, etc.
|
||
########################################################
|
||
trust_env: bool = litellm.aiohttp_trust_env
|
||
if str_to_bool(os.getenv("AIOHTTP_TRUST_ENV", "False")) is True:
|
||
trust_env = True
|
||
|
||
#########################################################
|
||
# Determine SSL config to pass to transport for per-request override
|
||
# This ensures ssl_verify works even with shared sessions
|
||
#########################################################
|
||
ssl_for_transport: Optional[Union[bool, ssl.SSLContext]] = None
|
||
if ssl_context is not None:
|
||
ssl_for_transport = ssl_context
|
||
elif ssl_verify is False:
|
||
ssl_for_transport = False
|
||
|
||
verbose_logger.debug("Creating AiohttpTransport...")
|
||
|
||
# Use shared session if provided and valid
|
||
if shared_session is not None and not shared_session.closed:
|
||
verbose_logger.debug(
|
||
f"SHARED SESSION: Reusing existing ClientSession (ID: {id(shared_session)})"
|
||
)
|
||
return LiteLLMAiohttpTransport(
|
||
client=shared_session,
|
||
ssl_verify=ssl_for_transport,
|
||
owns_session=False,
|
||
)
|
||
|
||
# Create new session only if none provided or existing one is invalid
|
||
verbose_logger.debug(
|
||
"NEW SESSION: Creating new ClientSession (no shared session provided)"
|
||
)
|
||
transport_connector_kwargs = {
|
||
"keepalive_timeout": AIOHTTP_KEEPALIVE_TIMEOUT,
|
||
"ttl_dns_cache": AIOHTTP_TTL_DNS_CACHE,
|
||
**connector_kwargs,
|
||
}
|
||
if AIOHTTP_NEEDS_CLEANUP_CLOSED:
|
||
transport_connector_kwargs["enable_cleanup_closed"] = True
|
||
if AIOHTTP_CONNECTOR_LIMIT > 0:
|
||
transport_connector_kwargs["limit"] = AIOHTTP_CONNECTOR_LIMIT
|
||
if AIOHTTP_CONNECTOR_LIMIT_PER_HOST > 0:
|
||
transport_connector_kwargs[
|
||
"limit_per_host"
|
||
] = AIOHTTP_CONNECTOR_LIMIT_PER_HOST
|
||
|
||
return LiteLLMAiohttpTransport(
|
||
client=lambda: ClientSession(
|
||
connector=TCPConnector(**transport_connector_kwargs),
|
||
trust_env=trust_env,
|
||
),
|
||
ssl_verify=ssl_for_transport,
|
||
)
|
||
|
||
@staticmethod
|
||
def _create_httpx_transport() -> Optional[AsyncHTTPTransport]:
|
||
"""
|
||
Creates an AsyncHTTPTransport
|
||
|
||
- If force_ipv4 is True, it will create an AsyncHTTPTransport with local_address set to "0.0.0.0"
|
||
- [Default] If force_ipv4 is False, it will return None
|
||
"""
|
||
if litellm.force_ipv4:
|
||
return AsyncHTTPTransport(local_address="0.0.0.0")
|
||
else:
|
||
return None
|
||
|
||
|
||
class HTTPHandler:
|
||
def __init__(
|
||
self,
|
||
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
||
concurrent_limit=None, # Kept for backward compatibility, but ignored (no limits)
|
||
client: Optional[httpx.Client] = None,
|
||
ssl_verify: Optional[Union[bool, str]] = None,
|
||
disable_default_headers: Optional[
|
||
bool
|
||
] = False, # arize phoenix returns different API responses when user agent header in request
|
||
):
|
||
if timeout is None:
|
||
timeout = _DEFAULT_TIMEOUT
|
||
|
||
# Get unified SSL configuration
|
||
ssl_config = get_ssl_configuration(ssl_verify)
|
||
|
||
# An SSL certificate used by the requested host to authenticate the client.
|
||
# /path/to/client.pem
|
||
cert = os.getenv("SSL_CERTIFICATE", litellm.ssl_certificate)
|
||
|
||
# Get default headers (User-Agent, overridable via LITELLM_USER_AGENT)
|
||
default_headers = get_default_headers() if not disable_default_headers else None
|
||
|
||
if client is None:
|
||
transport = self._create_sync_transport()
|
||
|
||
# Create a client with a connection pool
|
||
self.client = httpx.Client(
|
||
transport=transport,
|
||
timeout=timeout,
|
||
verify=ssl_config,
|
||
cert=cert,
|
||
headers=default_headers,
|
||
follow_redirects=True,
|
||
)
|
||
else:
|
||
self.client = client
|
||
|
||
def close(self):
|
||
# Close the client when you're done with it
|
||
self.client.close()
|
||
|
||
def get(
|
||
self,
|
||
url: str,
|
||
params: Optional[dict] = None,
|
||
headers: Optional[dict] = None,
|
||
follow_redirects: Optional[bool] = None,
|
||
):
|
||
# Set follow_redirects to UseClientDefault if None
|
||
_follow_redirects = (
|
||
follow_redirects if follow_redirects is not None else USE_CLIENT_DEFAULT
|
||
)
|
||
params = params or {}
|
||
params.update(self.extract_query_params(url))
|
||
|
||
response = self.client.get(
|
||
url,
|
||
params=params,
|
||
headers=headers,
|
||
)
|
||
|
||
return response
|
||
|
||
@staticmethod
|
||
def extract_query_params(url: str) -> Dict[str, str]:
|
||
"""
|
||
Parse a URL’s query-string into a dict.
|
||
|
||
:param url: full URL, e.g. "https://.../path?foo=1&bar=2"
|
||
:return: {"foo": "1", "bar": "2"}
|
||
"""
|
||
from urllib.parse import parse_qsl, urlsplit
|
||
|
||
parts = urlsplit(url)
|
||
return dict(parse_qsl(parts.query))
|
||
|
||
def post(
|
||
self,
|
||
url: str,
|
||
data: Optional[Union[dict, str, bytes]] = None,
|
||
json: Optional[Union[dict, str, List]] = None,
|
||
params: Optional[dict] = None,
|
||
headers: Optional[dict] = None,
|
||
stream: bool = False,
|
||
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
||
files: Optional[Union[dict, RequestFiles]] = None,
|
||
content: Any = None,
|
||
logging_obj: Optional[LiteLLMLoggingObject] = None,
|
||
):
|
||
try:
|
||
# Prepare data/content parameters to prevent httpx DeprecationWarning (memory leak fix)
|
||
request_data, request_content = _prepare_request_data_and_content(
|
||
data, content
|
||
)
|
||
|
||
if timeout is not None:
|
||
req = self.client.build_request(
|
||
"POST",
|
||
url,
|
||
data=request_data, # type: ignore
|
||
json=json,
|
||
params=params,
|
||
headers=headers,
|
||
timeout=timeout,
|
||
files=files,
|
||
content=request_content, # type: ignore
|
||
)
|
||
else:
|
||
req = self.client.build_request(
|
||
"POST", url, data=request_data, json=json, params=params, headers=headers, files=files, content=request_content # type: ignore
|
||
)
|
||
response = self.client.send(req, stream=stream)
|
||
response.raise_for_status()
|
||
return response
|
||
except httpx.TimeoutException:
|
||
raise litellm.Timeout(
|
||
message=f"Connection timed out after {timeout} seconds.",
|
||
model="default-model-name",
|
||
llm_provider="litellm-httpx-handler",
|
||
)
|
||
except httpx.HTTPStatusError as e:
|
||
if stream is True:
|
||
setattr(e, "message", mask_sensitive_info(e.response.read()))
|
||
setattr(e, "text", mask_sensitive_info(e.response.read()))
|
||
else:
|
||
error_text = mask_sensitive_info(e.response.text)
|
||
setattr(e, "message", error_text)
|
||
setattr(e, "text", error_text)
|
||
|
||
setattr(e, "status_code", e.response.status_code)
|
||
raise e
|
||
except Exception as e:
|
||
raise e
|
||
|
||
def patch(
|
||
self,
|
||
url: str,
|
||
data: Optional[Union[dict, str, bytes]] = None,
|
||
json: Optional[Union[dict, str]] = None,
|
||
params: Optional[dict] = None,
|
||
headers: Optional[dict] = None,
|
||
stream: bool = False,
|
||
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
||
content: Any = None,
|
||
):
|
||
try:
|
||
# Prepare data/content parameters to prevent httpx DeprecationWarning (memory leak fix)
|
||
request_data, request_content = _prepare_request_data_and_content(
|
||
data, content
|
||
)
|
||
|
||
if timeout is not None:
|
||
req = self.client.build_request(
|
||
"PATCH", url, data=request_data, json=json, params=params, headers=headers, timeout=timeout, content=request_content # type: ignore
|
||
)
|
||
else:
|
||
req = self.client.build_request(
|
||
"PATCH", url, data=request_data, json=json, params=params, headers=headers, content=request_content # type: ignore
|
||
)
|
||
response = self.client.send(req, stream=stream)
|
||
response.raise_for_status()
|
||
return response
|
||
except httpx.TimeoutException:
|
||
raise litellm.Timeout(
|
||
message=f"Connection timed out after {timeout} seconds.",
|
||
model="default-model-name",
|
||
llm_provider="litellm-httpx-handler",
|
||
)
|
||
except httpx.HTTPStatusError as e:
|
||
if stream is True:
|
||
setattr(e, "message", mask_sensitive_info(e.response.read()))
|
||
setattr(e, "text", mask_sensitive_info(e.response.read()))
|
||
else:
|
||
error_text = mask_sensitive_info(e.response.text)
|
||
setattr(e, "message", error_text)
|
||
setattr(e, "text", error_text)
|
||
|
||
setattr(e, "status_code", e.response.status_code)
|
||
|
||
raise e
|
||
except Exception as e:
|
||
raise e
|
||
|
||
def put(
|
||
self,
|
||
url: str,
|
||
data: Optional[Union[dict, str, bytes]] = None,
|
||
json: Optional[Union[dict, str]] = None,
|
||
params: Optional[dict] = None,
|
||
headers: Optional[dict] = None,
|
||
stream: bool = False,
|
||
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
||
content: Any = None,
|
||
):
|
||
try:
|
||
# Prepare data/content parameters to prevent httpx DeprecationWarning (memory leak fix)
|
||
request_data, request_content = _prepare_request_data_and_content(
|
||
data, content
|
||
)
|
||
|
||
if timeout is not None:
|
||
req = self.client.build_request(
|
||
"PUT", url, data=request_data, json=json, params=params, headers=headers, timeout=timeout, content=request_content # type: ignore
|
||
)
|
||
else:
|
||
req = self.client.build_request(
|
||
"PUT", url, data=request_data, json=json, params=params, headers=headers, content=request_content # type: ignore
|
||
)
|
||
response = self.client.send(req, stream=stream)
|
||
return response
|
||
except httpx.TimeoutException:
|
||
raise litellm.Timeout(
|
||
message=f"Connection timed out after {timeout} seconds.",
|
||
model="default-model-name",
|
||
llm_provider="litellm-httpx-handler",
|
||
)
|
||
except Exception as e:
|
||
raise e
|
||
|
||
def delete(
|
||
self,
|
||
url: str,
|
||
data: Optional[Union[dict, str, bytes]] = None, # type: ignore
|
||
json: Optional[dict] = None,
|
||
params: Optional[dict] = None,
|
||
headers: Optional[dict] = None,
|
||
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
||
stream: bool = False,
|
||
content: Any = None,
|
||
):
|
||
try:
|
||
# Prepare data/content parameters to prevent httpx DeprecationWarning (memory leak fix)
|
||
request_data, request_content = _prepare_request_data_and_content(
|
||
data, content
|
||
)
|
||
|
||
if timeout is not None:
|
||
req = self.client.build_request(
|
||
"DELETE", url, data=request_data, json=json, params=params, headers=headers, timeout=timeout, content=request_content # type: ignore
|
||
)
|
||
else:
|
||
req = self.client.build_request(
|
||
"DELETE", url, data=request_data, json=json, params=params, headers=headers, content=request_content # type: ignore
|
||
)
|
||
response = self.client.send(req, stream=stream)
|
||
response.raise_for_status()
|
||
return response
|
||
except httpx.TimeoutException:
|
||
raise litellm.Timeout(
|
||
message=f"Connection timed out after {timeout} seconds.",
|
||
model="default-model-name",
|
||
llm_provider="litellm-httpx-handler",
|
||
)
|
||
except httpx.HTTPStatusError as e:
|
||
if stream is True:
|
||
setattr(e, "message", mask_sensitive_info(e.response.read()))
|
||
setattr(e, "text", mask_sensitive_info(e.response.read()))
|
||
else:
|
||
error_text = mask_sensitive_info(e.response.text)
|
||
setattr(e, "message", error_text)
|
||
setattr(e, "text", error_text)
|
||
|
||
setattr(e, "status_code", e.response.status_code)
|
||
|
||
raise e
|
||
except Exception as e:
|
||
raise e
|
||
|
||
def __del__(self) -> None:
|
||
try:
|
||
self.close()
|
||
except Exception:
|
||
pass
|
||
|
||
def _create_sync_transport(self) -> Optional[HTTPTransport]:
|
||
"""
|
||
Create an HTTP transport with IPv4 only if litellm.force_ipv4 is True.
|
||
Otherwise, return None.
|
||
|
||
Some users have seen httpx ConnectionError when using ipv6 - forcing ipv4 resolves the issue for them
|
||
"""
|
||
if litellm.force_ipv4:
|
||
return HTTPTransport(local_address="0.0.0.0")
|
||
else:
|
||
return getattr(litellm, "sync_transport", None)
|
||
|
||
|
||
def get_async_httpx_client(
|
||
llm_provider: Union[LlmProviders, httpxSpecialProvider],
|
||
params: Optional[dict] = None,
|
||
shared_session: Optional["ClientSession"] = None,
|
||
) -> AsyncHTTPHandler:
|
||
"""
|
||
Retrieves the async HTTP client from the cache
|
||
If not present, creates a new client
|
||
|
||
Caches the new client and returns it.
|
||
"""
|
||
_params_key_name = ""
|
||
if params is not None:
|
||
for key, value in params.items():
|
||
try:
|
||
_params_key_name += f"{key}_{value}"
|
||
except Exception:
|
||
pass
|
||
|
||
_cache_key_name = "async_httpx_client" + _params_key_name + llm_provider
|
||
|
||
# Lazily initialize the global in-memory client cache to avoid relying on
|
||
# litellm globals being fully populated during import time.
|
||
cache = getattr(litellm, "in_memory_llm_clients_cache", None)
|
||
if cache is None:
|
||
from litellm.caching.llm_caching_handler import LLMClientCache
|
||
|
||
cache = LLMClientCache()
|
||
setattr(litellm, "in_memory_llm_clients_cache", cache)
|
||
|
||
_cached_client = cache.get_cache(_cache_key_name)
|
||
if _cached_client:
|
||
return _cached_client
|
||
|
||
if params is not None:
|
||
# Filter out params that are only used for cache key, not for AsyncHTTPHandler.__init__
|
||
handler_params = {
|
||
k: v for k, v in params.items() if k != "disable_aiohttp_transport"
|
||
}
|
||
handler_params["shared_session"] = shared_session
|
||
_new_client = AsyncHTTPHandler(**handler_params)
|
||
else:
|
||
_new_client = AsyncHTTPHandler(
|
||
timeout=httpx.Timeout(timeout=600.0, connect=5.0),
|
||
shared_session=shared_session,
|
||
)
|
||
|
||
cache.set_cache(
|
||
key=_cache_key_name,
|
||
value=_new_client,
|
||
ttl=_DEFAULT_TTL_FOR_HTTPX_CLIENTS,
|
||
)
|
||
return _new_client
|
||
|
||
|
||
def _get_httpx_client(params: Optional[dict] = None) -> HTTPHandler:
|
||
"""
|
||
Retrieves the HTTP client from the cache
|
||
If not present, creates a new client
|
||
|
||
Caches the new client and returns it.
|
||
"""
|
||
_params_key_name = ""
|
||
if params is not None:
|
||
for key, value in params.items():
|
||
try:
|
||
_params_key_name += f"{key}_{value}"
|
||
except Exception:
|
||
pass
|
||
|
||
_cache_key_name = "httpx_client" + _params_key_name
|
||
|
||
# Lazily initialize the global in-memory client cache to avoid relying on
|
||
# litellm globals being fully populated during import time.
|
||
cache = getattr(litellm, "in_memory_llm_clients_cache", None)
|
||
if cache is None:
|
||
from litellm.caching.llm_caching_handler import LLMClientCache
|
||
|
||
cache = LLMClientCache()
|
||
setattr(litellm, "in_memory_llm_clients_cache", cache)
|
||
|
||
_cached_client = cache.get_cache(_cache_key_name)
|
||
if _cached_client:
|
||
return _cached_client
|
||
|
||
if params is not None:
|
||
# Filter out params that are only used for cache key, not for HTTPHandler.__init__
|
||
handler_params = {
|
||
k: v for k, v in params.items() if k != "disable_aiohttp_transport"
|
||
}
|
||
_new_client = HTTPHandler(**handler_params)
|
||
else:
|
||
_new_client = HTTPHandler(timeout=httpx.Timeout(timeout=600.0, connect=5.0))
|
||
|
||
cache.set_cache(
|
||
key=_cache_key_name,
|
||
value=_new_client,
|
||
ttl=_DEFAULT_TTL_FOR_HTTPX_CLIENTS,
|
||
)
|
||
return _new_client
|