chore: initial snapshot for gitea/github upload

This commit is contained in:
Your Name
2026-03-26 16:04:46 +08:00
commit a699a1ac98
3497 changed files with 1586237 additions and 0 deletions

View File

@@ -0,0 +1,10 @@
"""
Opik payload builder namespace.
Public API:
build_opik_payload - Main function to create Opik trace and span payloads
"""
from .api import build_opik_payload
__all__ = ["build_opik_payload"]

View File

@@ -0,0 +1,121 @@
"""Public API for Opik payload building."""
from datetime import datetime
from typing import Any, Dict, Optional, Tuple
from litellm.integrations.opik import utils
from . import extractors, payload_builders, types
def build_opik_payload(
kwargs: Dict[str, Any],
response_obj: Dict[str, Any],
start_time: datetime,
end_time: datetime,
project_name: str,
) -> Tuple[Optional[types.TracePayload], types.SpanPayload]:
"""
Build Opik trace and span payloads from LiteLLM completion data.
This is the main public API for creating Opik payloads. It:
1. Extracts all necessary data from LiteLLM kwargs and response
2. Decides whether to create a new trace or attach to existing
3. Builds trace payload (if new trace)
4. Builds span payload (always)
Args:
kwargs: LiteLLM kwargs containing request metadata and logging data
response_obj: LiteLLM response object containing model response
start_time: Request start time
end_time: Request end time
project_name: Default Opik project name
Returns:
Tuple of (optional trace payload, span payload)
- First element is TracePayload if creating a new trace, None if attaching to existing
- Second element is always SpanPayload
"""
standard_logging_object = kwargs["standard_logging_object"]
# Extract litellm params and metadata
litellm_params = kwargs.get("litellm_params", {}) or {}
litellm_metadata = litellm_params.get("metadata", {}) or {}
standard_logging_metadata = standard_logging_object.get("metadata", {}) or {}
# Extract and merge Opik metadata
opik_metadata = extractors.extract_opik_metadata(
litellm_metadata, standard_logging_metadata
)
# Extract project name
current_project_name = opik_metadata.get("project_name", project_name)
# Extract trace identifiers
current_span_data = opik_metadata.get("current_span_data")
trace_id, parent_span_id = extractors.extract_span_identifiers(current_span_data)
# Extract tags and thread_id
tags = extractors.extract_tags(opik_metadata, kwargs.get("custom_llm_provider"))
thread_id = opik_metadata.get("thread_id")
# Apply proxy header overrides
proxy_request = litellm_params.get("proxy_server_request", {}) or {}
proxy_headers = proxy_request.get("headers", {}) or {}
current_project_name, tags, thread_id = extractors.apply_proxy_header_overrides(
current_project_name, tags, thread_id, proxy_headers
)
# Build shared metadata
metadata = extractors.extract_and_build_metadata(
opik_metadata=opik_metadata,
standard_logging_metadata=standard_logging_metadata,
standard_logging_object=standard_logging_object,
litellm_kwargs=kwargs,
)
# Get input/output data
input_data = standard_logging_object.get("messages", {})
output_data = standard_logging_object.get("response", {})
# Decide whether to create a new trace or attach to existing
trace_payload: Optional[types.TracePayload] = None
if trace_id is None:
trace_id = utils.create_uuid7()
trace_payload = payload_builders.build_trace_payload(
project_name=current_project_name,
trace_id=trace_id,
response_obj=response_obj,
start_time=start_time,
end_time=end_time,
input_data=input_data,
output_data=output_data,
metadata=metadata,
tags=tags,
thread_id=thread_id,
)
# Always create a span
usage = utils.create_usage_object(response_obj["usage"])
# Extract provider and cost
provider = extractors.normalize_provider_name(kwargs.get("custom_llm_provider"))
cost = kwargs.get("response_cost")
span_payload = payload_builders.build_span_payload(
project_name=current_project_name,
trace_id=trace_id,
parent_span_id=parent_span_id,
response_obj=response_obj,
start_time=start_time,
end_time=end_time,
input_data=input_data,
output_data=output_data,
metadata=metadata,
tags=tags,
usage=usage,
provider=provider,
cost=cost,
)
return trace_payload, span_payload

View File

@@ -0,0 +1,221 @@
"""Data extraction functions for Opik payload building."""
import json
from typing import Any, Dict, List, Optional, Tuple
from litellm import _logging
def normalize_provider_name(provider: Optional[str]) -> Optional[str]:
"""
Normalize LiteLLM provider names to standardized string names.
Args:
provider: LiteLLM internal provider name
Returns:
Normalized provider name or the original if no mapping exists
"""
if provider is None:
return None
# Provider mapping to names used in Opik
provider_mapping = {
"openai": "openai",
"vertex_ai-language-models": "google_vertexai",
"gemini": "google_ai",
"anthropic": "anthropic",
"vertex_ai-anthropic_models": "anthropic_vertexai",
"bedrock": "bedrock",
"bedrock_converse": "bedrock",
"groq": "groq",
}
return provider_mapping.get(provider, provider)
def extract_opik_metadata(
litellm_metadata: Dict[str, Any],
standard_logging_metadata: Dict[str, Any],
) -> Dict[str, Any]:
"""
Extract and merge Opik metadata from request and requester.
Args:
litellm_metadata: Metadata from litellm_params
standard_logging_metadata: Metadata from standard_logging_object
Returns:
Merged Opik metadata dictionary
"""
opik_meta = litellm_metadata.get("opik", {}).copy()
requester_metadata = standard_logging_metadata.get("requester_metadata", {}) or {}
requester_opik = requester_metadata.get("opik", {}) or {}
opik_meta.update(requester_opik)
_logging.verbose_logger.debug(
f"litellm_opik_metadata - {json.dumps(opik_meta, default=str)}"
)
return opik_meta
def extract_span_identifiers(
current_span_data: Any,
) -> Tuple[Optional[str], Optional[str]]:
"""
Extract trace_id and parent_span_id from current_span_data.
Args:
current_span_data: Either dict with trace_id/id keys or Opik object
Returns:
Tuple of (trace_id, parent_span_id), both optional
"""
if current_span_data is None:
return None, None
if isinstance(current_span_data, dict):
return (current_span_data.get("trace_id"), current_span_data.get("id"))
try:
return current_span_data.trace_id, current_span_data.id
except AttributeError:
_logging.verbose_logger.warning(
f"Unexpected current_span_data format: {type(current_span_data)}"
)
return None, None
def extract_tags(
opik_metadata: Dict[str, Any],
custom_llm_provider: Optional[str],
) -> List[str]:
"""
Extract and build list of tags.
Args:
opik_metadata: Opik metadata dictionary
custom_llm_provider: LLM provider name to add as tag
Returns:
List of tags
"""
tags = list(opik_metadata.get("tags", []))
if custom_llm_provider:
tags.append(custom_llm_provider)
return tags
def apply_proxy_header_overrides(
project_name: str,
tags: List[str],
thread_id: Optional[str],
proxy_headers: Dict[str, Any],
) -> Tuple[str, List[str], Optional[str]]:
"""
Apply overrides from proxy request headers (opik_* prefix).
Args:
project_name: Current project name
tags: Current tags list
thread_id: Current thread ID
proxy_headers: HTTP headers from proxy request
Returns:
Tuple of (project_name, tags, thread_id) with overrides applied
"""
for key, value in proxy_headers.items():
if not key.startswith("opik_") or not value:
continue
param_key = key.replace("opik_", "", 1)
if param_key == "project_name":
project_name = value
elif param_key == "thread_id":
thread_id = value
elif param_key == "tags":
try:
parsed_tags = json.loads(value)
if isinstance(parsed_tags, list):
tags.extend(parsed_tags)
except (json.JSONDecodeError, TypeError):
_logging.verbose_logger.warning(
f"Failed to parse tags from header: {value}"
)
return project_name, tags, thread_id
def extract_and_build_metadata(
opik_metadata: Dict[str, Any],
standard_logging_metadata: Dict[str, Any],
standard_logging_object: Dict[str, Any],
litellm_kwargs: Dict[str, Any],
) -> Dict[str, Any]:
"""
Build the complete metadata dictionary from all available sources.
This combines:
- Opik-specific metadata (tags, etc.)
- Standard logging metadata
- Fields from standard_logging_object (model info, status, etc.)
- Cost information from litellm_kwargs (calculated after completion)
Args:
opik_metadata: Opik-specific metadata from request
standard_logging_metadata: Standard logging metadata
standard_logging_object: Full standard logging object with call details
litellm_kwargs: Original LiteLLM kwargs (includes response_cost)
Returns:
Complete metadata dictionary for trace/span
"""
# Start with opik metadata (excluding current_span_data which is used for trace linking)
metadata = {k: v for k, v in opik_metadata.items() if k != "current_span_data"}
metadata["created_from"] = "litellm"
# Merge with standard logging metadata
metadata.update(standard_logging_metadata)
# Add fields from standard_logging_object
# These come from the LiteLLM logging infrastructure
field_mappings = {
"call_type": "type",
"status": "status",
"model": "model",
"model_id": "model_id",
"model_group": "model_group",
"api_base": "api_base",
"cache_hit": "cache_hit",
"saved_cache_cost": "saved_cache_cost",
"error_str": "error_str",
"model_parameters": "model_parameters",
"hidden_params": "hidden_params",
"model_map_information": "model_map_information",
}
for source_key, dest_key in field_mappings.items():
if source_key in standard_logging_object:
metadata[dest_key] = standard_logging_object[source_key]
# Add cost information
# response_cost is calculated by LiteLLM after completion and added to kwargs
# See: litellm/litellm_core_utils/llm_response_utils/response_metadata.py
if "response_cost" in litellm_kwargs:
metadata["cost"] = {
"total_tokens": litellm_kwargs["response_cost"],
"currency": "USD",
}
# Add debug info if cost calculation failed
if "response_cost_failure_debug_info" in litellm_kwargs:
metadata["response_cost_failure_debug_info"] = litellm_kwargs[
"response_cost_failure_debug_info"
]
return metadata

View File

@@ -0,0 +1,89 @@
"""Payload builders for Opik traces and spans."""
from datetime import datetime, timezone
from typing import Any, Dict, List, Optional
from litellm import _logging
from litellm.integrations.opik import utils
from . import types
def build_trace_payload(
project_name: str,
trace_id: str,
response_obj: Dict[str, Any],
start_time: datetime,
end_time: datetime,
input_data: Any,
output_data: Any,
metadata: Dict[str, Any],
tags: List[str],
thread_id: Optional[str],
) -> types.TracePayload:
"""Build a complete trace payload."""
trace_name = response_obj.get("object", "unknown type")
return types.TracePayload(
project_name=project_name,
id=trace_id,
name=trace_name,
start_time=(
start_time.astimezone(timezone.utc).isoformat().replace("+00:00", "Z")
),
end_time=end_time.astimezone(timezone.utc).isoformat().replace("+00:00", "Z"),
input=input_data,
output=output_data,
metadata=metadata,
tags=tags,
thread_id=thread_id,
)
def build_span_payload(
project_name: str,
trace_id: str,
parent_span_id: Optional[str],
response_obj: Dict[str, Any],
start_time: datetime,
end_time: datetime,
input_data: Any,
output_data: Any,
metadata: Dict[str, Any],
tags: List[str],
usage: Dict[str, int],
provider: Optional[str] = None,
cost: Optional[float] = None,
) -> types.SpanPayload:
"""Build a complete span payload."""
span_id = utils.create_uuid7()
model = response_obj.get("model", "unknown-model")
obj_type = response_obj.get("object", "unknown-object")
created = response_obj.get("created", 0)
span_name = f"{model}_{obj_type}_{created}"
_logging.verbose_logger.debug(
f"OpikLogger creating span with id {span_id} for trace {trace_id}"
)
return types.SpanPayload(
id=span_id,
project_name=project_name,
trace_id=trace_id,
parent_span_id=parent_span_id,
name=span_name,
type="llm",
model=model,
start_time=(
start_time.astimezone(timezone.utc).isoformat().replace("+00:00", "Z")
),
end_time=end_time.astimezone(timezone.utc).isoformat().replace("+00:00", "Z"),
input=input_data,
output=output_data,
metadata=metadata,
tags=tags,
usage=usage,
provider=provider,
total_cost=cost,
)

View File

@@ -0,0 +1,46 @@
"""Type definitions for Opik payload building."""
from dataclasses import dataclass
from typing import Any, Dict, List, Literal, Optional, Tuple, Union
@dataclass
class TracePayload:
"""Opik trace payload structure"""
project_name: str
id: str
name: str
start_time: str
end_time: str
input: Any
output: Any
metadata: Dict[str, Any]
tags: List[str]
thread_id: Optional[str] = None
@dataclass
class SpanPayload:
"""Opik span payload structure"""
id: str
project_name: str
trace_id: str
name: str
type: Literal["llm"]
model: str
start_time: str
end_time: str
input: Any
output: Any
metadata: Dict[str, Any]
tags: List[str]
usage: Dict[str, int]
parent_span_id: Optional[str] = None
provider: Optional[str] = None
total_cost: Optional[float] = None
PayloadItem = Union[TracePayload, SpanPayload]
TraceSpanPayloadTuple = Tuple[Optional[TracePayload], SpanPayload]