chore: initial snapshot for gitea/github upload

This commit is contained in:
Your Name
2026-03-26 16:04:46 +08:00
commit a699a1ac98
3497 changed files with 1586237 additions and 0 deletions

View File

@@ -0,0 +1,19 @@
"""
OpenAI Responses API token counting implementation.
"""
from litellm.llms.openai.responses.count_tokens.handler import (
OpenAICountTokensHandler,
)
from litellm.llms.openai.responses.count_tokens.token_counter import (
OpenAITokenCounter,
)
from litellm.llms.openai.responses.count_tokens.transformation import (
OpenAICountTokensConfig,
)
__all__ = [
"OpenAICountTokensHandler",
"OpenAICountTokensConfig",
"OpenAITokenCounter",
]

View File

@@ -0,0 +1,107 @@
"""
OpenAI Responses API token counting handler.
Uses httpx for HTTP requests to OpenAI's /v1/responses/input_tokens endpoint.
"""
import json
from typing import Any, Dict, List, Optional, Union
import httpx
import litellm
from litellm._logging import verbose_logger
from litellm.llms.custom_httpx.http_handler import get_async_httpx_client
from litellm.llms.openai.common_utils import OpenAIError
from litellm.llms.openai.responses.count_tokens.transformation import (
OpenAICountTokensConfig,
)
class OpenAICountTokensHandler(OpenAICountTokensConfig):
"""
Handler for OpenAI Responses API token counting requests.
"""
async def handle_count_tokens_request(
self,
model: str,
input: Union[str, List[Any]],
api_key: str,
api_base: Optional[str] = None,
timeout: Optional[Union[float, httpx.Timeout]] = None,
tools: Optional[List[Dict[str, Any]]] = None,
instructions: Optional[str] = None,
) -> Dict[str, Any]:
"""
Handle a token counting request to OpenAI's Responses API.
Returns:
Dictionary containing {"input_tokens": <number>}
Raises:
OpenAIError: If the API request fails
"""
try:
self.validate_request(model, input)
verbose_logger.debug(
f"Processing OpenAI CountTokens request for model: {model}"
)
request_body = self.transform_request_to_count_tokens(
model=model,
input=input,
tools=tools,
instructions=instructions,
)
endpoint_url = self.get_openai_count_tokens_endpoint(api_base)
verbose_logger.debug(f"Making request to: {endpoint_url}")
headers = self.get_required_headers(api_key)
async_client = get_async_httpx_client(
llm_provider=litellm.LlmProviders.OPENAI
)
request_timeout = (
timeout if timeout is not None else litellm.request_timeout
)
response = await async_client.post(
endpoint_url,
headers=headers,
json=request_body,
timeout=request_timeout,
)
verbose_logger.debug(f"Response status: {response.status_code}")
if response.status_code != 200:
error_text = response.text
verbose_logger.error(f"OpenAI API error: {error_text}")
raise OpenAIError(
status_code=response.status_code,
message=error_text,
)
openai_response = response.json()
verbose_logger.debug(f"OpenAI response: {openai_response}")
return openai_response
except OpenAIError:
raise
except httpx.HTTPStatusError as e:
verbose_logger.error(f"HTTP error in CountTokens handler: {str(e)}")
raise OpenAIError(
status_code=e.response.status_code,
message=e.response.text,
)
except (httpx.RequestError, json.JSONDecodeError, ValueError) as e:
verbose_logger.error(f"Error in CountTokens handler: {str(e)}")
raise OpenAIError(
status_code=500,
message=f"CountTokens processing error: {str(e)}",
)

View File

@@ -0,0 +1,118 @@
"""
OpenAI Token Counter implementation using the Responses API /input_tokens endpoint.
"""
import os
from typing import Any, Dict, List, Optional
from litellm._logging import verbose_logger
from litellm.llms.base_llm.base_utils import BaseTokenCounter
from litellm.llms.openai.common_utils import OpenAIError
from litellm.llms.openai.responses.count_tokens.handler import (
OpenAICountTokensHandler,
)
from litellm.llms.openai.responses.count_tokens.transformation import (
OpenAICountTokensConfig,
)
from litellm.types.utils import LlmProviders, TokenCountResponse
# Global handler instance - reuse across all token counting requests
openai_count_tokens_handler = OpenAICountTokensHandler()
class OpenAITokenCounter(BaseTokenCounter):
"""Token counter implementation for OpenAI provider using the Responses API."""
def should_use_token_counting_api(
self,
custom_llm_provider: Optional[str] = None,
) -> bool:
return custom_llm_provider == LlmProviders.OPENAI.value
async def count_tokens(
self,
model_to_use: str,
messages: Optional[List[Dict[str, Any]]],
contents: Optional[List[Dict[str, Any]]],
deployment: Optional[Dict[str, Any]] = None,
request_model: str = "",
tools: Optional[List[Dict[str, Any]]] = None,
system: Optional[Any] = None,
) -> Optional[TokenCountResponse]:
"""
Count tokens using OpenAI's Responses API /input_tokens endpoint.
"""
if not messages:
return None
deployment = deployment or {}
litellm_params = deployment.get("litellm_params", {})
# Get OpenAI API key from deployment config or environment
api_key = litellm_params.get("api_key")
if not api_key:
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
verbose_logger.warning("No OpenAI API key found for token counting")
return None
api_base = litellm_params.get("api_base")
# Convert chat messages to Responses API input format
input_items, instructions = OpenAICountTokensConfig.messages_to_responses_input(
messages
)
# Use system param if instructions not extracted from messages
if instructions is None and system is not None:
instructions = system if isinstance(system, str) else str(system)
# If no input items were produced (e.g., system-only messages), fall back to local counting
if not input_items:
return None
try:
result = await openai_count_tokens_handler.handle_count_tokens_request(
model=model_to_use,
input=input_items if input_items is not None else [],
api_key=api_key,
api_base=api_base,
tools=tools,
instructions=instructions,
)
if result is not None:
return TokenCountResponse(
total_tokens=result.get("input_tokens", 0),
request_model=request_model,
model_used=model_to_use,
tokenizer_type="openai_api",
original_response=result,
)
except OpenAIError as e:
verbose_logger.warning(
f"OpenAI CountTokens API error: status={e.status_code}, message={e.message}"
)
return TokenCountResponse(
total_tokens=0,
request_model=request_model,
model_used=model_to_use,
tokenizer_type="openai_api",
error=True,
error_message=e.message,
status_code=e.status_code,
)
except Exception as e:
verbose_logger.warning(f"Error calling OpenAI CountTokens API: {e}")
return TokenCountResponse(
total_tokens=0,
request_model=request_model,
model_used=model_to_use,
tokenizer_type="openai_api",
error=True,
error_message=str(e),
status_code=500,
)
return None

View File

@@ -0,0 +1,160 @@
"""
OpenAI Responses API token counting transformation logic.
This module handles the transformation of requests to OpenAI's /v1/responses/input_tokens endpoint.
"""
from typing import Any, Dict, List, Optional, Union
class OpenAICountTokensConfig:
"""
Configuration and transformation logic for OpenAI Responses API token counting.
OpenAI Responses API Token Counting Specification:
- Endpoint: POST https://api.openai.com/v1/responses/input_tokens
- Response: {"input_tokens": <number>}
"""
def get_openai_count_tokens_endpoint(self, api_base: Optional[str] = None) -> str:
base = api_base or "https://api.openai.com/v1"
base = base.rstrip("/")
return f"{base}/responses/input_tokens"
def transform_request_to_count_tokens(
self,
model: str,
input: Union[str, List[Any]],
tools: Optional[List[Dict[str, Any]]] = None,
instructions: Optional[str] = None,
) -> Dict[str, Any]:
"""
Transform request to OpenAI Responses API token counting format.
The Responses API uses `input` (not `messages`) and `instructions` (not `system`).
"""
request: Dict[str, Any] = {
"model": model,
"input": input,
}
if instructions is not None:
request["instructions"] = instructions
if tools is not None:
request["tools"] = self._transform_tools_for_responses_api(tools)
return request
def get_required_headers(self, api_key: str) -> Dict[str, str]:
return {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}",
}
def validate_request(self, model: str, input: Union[str, List[Any]]) -> None:
if not model:
raise ValueError("model parameter is required")
if not input:
raise ValueError("input parameter is required")
@staticmethod
def _transform_tools_for_responses_api(
tools: List[Dict[str, Any]],
) -> List[Dict[str, Any]]:
"""
Transform OpenAI chat tools format to Responses API tools format.
Chat format: {"type": "function", "function": {"name": "...", "parameters": {...}}}
Responses format: {"type": "function", "name": "...", "parameters": {...}}
"""
transformed = []
for tool in tools:
if tool.get("type") == "function" and "function" in tool:
func = tool["function"]
item: Dict[str, Any] = {
"type": "function",
"name": func.get("name", ""),
"description": func.get("description", ""),
"parameters": func.get("parameters", {}),
}
if "strict" in func:
item["strict"] = func["strict"]
transformed.append(item)
else:
# Pass through non-function tools (e.g., web_search, file_search)
transformed.append(tool)
return transformed
@staticmethod
def messages_to_responses_input(
messages: List[Dict[str, Any]],
) -> tuple:
"""
Convert standard chat messages format to OpenAI Responses API input format.
Returns:
(input_items, instructions) tuple where instructions is extracted
from system/developer messages.
"""
input_items: List[Dict[str, Any]] = []
instructions_parts: List[str] = []
for msg in messages:
role = msg.get("role", "")
content = msg.get("content") or ""
if role in ("system", "developer"):
# Extract system/developer messages as instructions
if isinstance(content, str):
instructions_parts.append(content)
elif isinstance(content, list):
# Handle content blocks - extract text
text_parts = []
for block in content:
if isinstance(block, dict) and block.get("type") == "text":
text_parts.append(block.get("text", ""))
elif isinstance(block, str):
text_parts.append(block)
instructions_parts.append("\n".join(text_parts))
elif role == "user":
if isinstance(content, list):
# Extract text from content blocks for Responses API
text_parts = []
for block in content:
if isinstance(block, dict) and block.get("type") == "text":
text_parts.append(block.get("text", ""))
elif isinstance(block, str):
text_parts.append(block)
content = "\n".join(text_parts)
input_items.append({"role": "user", "content": content})
elif role == "assistant":
# Map tool_calls to Responses API function_call items
tool_calls = msg.get("tool_calls")
if content:
input_items.append({"role": "assistant", "content": content})
if tool_calls:
for tc in tool_calls:
func = tc.get("function", {})
input_items.append(
{
"type": "function_call",
"call_id": tc.get("id", ""),
"name": func.get("name", ""),
"arguments": func.get("arguments", ""),
}
)
elif not content:
input_items.append({"role": "assistant", "content": content})
elif role == "tool":
input_items.append(
{
"type": "function_call_output",
"call_id": msg.get("tool_call_id", ""),
"output": content if isinstance(content, str) else str(content),
}
)
instructions = "\n".join(instructions_parts) if instructions_parts else None
return input_items, instructions