125 lines
4.2 KiB
Python
125 lines
4.2 KiB
Python
"""Core export engine for Focus integrations (heavy dependencies)."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from typing import Any, Dict, Optional
|
|
|
|
import polars as pl
|
|
|
|
from litellm._logging import verbose_logger
|
|
|
|
from .database import FocusLiteLLMDatabase
|
|
from .destinations import FocusDestinationFactory, FocusTimeWindow
|
|
from .serializers import FocusParquetSerializer, FocusSerializer
|
|
from .transformer import FocusTransformer
|
|
|
|
|
|
class FocusExportEngine:
|
|
"""Engine that fetches, normalizes, and uploads Focus exports."""
|
|
|
|
def __init__(
|
|
self,
|
|
*,
|
|
provider: str,
|
|
export_format: str,
|
|
prefix: str,
|
|
destination_config: Optional[dict[str, Any]] = None,
|
|
) -> None:
|
|
self.provider = provider
|
|
self.export_format = export_format
|
|
self.prefix = prefix
|
|
self._destination = FocusDestinationFactory.create(
|
|
provider=self.provider,
|
|
prefix=self.prefix,
|
|
config=destination_config,
|
|
)
|
|
self._serializer = self._init_serializer()
|
|
self._transformer = FocusTransformer()
|
|
self._database = FocusLiteLLMDatabase()
|
|
|
|
def _init_serializer(self) -> FocusSerializer:
|
|
if self.export_format != "parquet":
|
|
raise NotImplementedError("Only parquet export supported currently")
|
|
return FocusParquetSerializer()
|
|
|
|
async def dry_run_export_usage_data(self, limit: Optional[int]) -> Dict[str, Any]:
|
|
data = await self._database.get_usage_data(limit=limit)
|
|
normalized = self._transformer.transform(data)
|
|
|
|
usage_sample = data.head(min(50, len(data))).to_dicts()
|
|
normalized_sample = normalized.head(min(50, len(normalized))).to_dicts()
|
|
|
|
summary = {
|
|
"total_records": len(normalized),
|
|
"total_spend": self._sum_column(normalized, "spend"),
|
|
"total_tokens": self._sum_column(normalized, "total_tokens"),
|
|
"unique_teams": self._count_unique(normalized, "team_id"),
|
|
"unique_models": self._count_unique(normalized, "model"),
|
|
}
|
|
|
|
return {
|
|
"usage_data": usage_sample,
|
|
"normalized_data": normalized_sample,
|
|
"summary": summary,
|
|
}
|
|
|
|
async def export_window(
|
|
self,
|
|
*,
|
|
window: FocusTimeWindow,
|
|
limit: Optional[int],
|
|
) -> None:
|
|
data = await self._database.get_usage_data(
|
|
limit=limit,
|
|
start_time_utc=window.start_time,
|
|
end_time_utc=window.end_time,
|
|
)
|
|
if data.is_empty():
|
|
verbose_logger.debug("Focus export: no usage data for window %s", window)
|
|
return
|
|
|
|
normalized = self._transformer.transform(data)
|
|
if normalized.is_empty():
|
|
verbose_logger.debug(
|
|
"Focus export: normalized data empty for window %s", window
|
|
)
|
|
return
|
|
|
|
await self._serialize_and_upload(normalized, window)
|
|
|
|
async def _serialize_and_upload(
|
|
self, frame: pl.DataFrame, window: FocusTimeWindow
|
|
) -> None:
|
|
payload = self._serializer.serialize(frame)
|
|
if not payload:
|
|
verbose_logger.debug("Focus export: serializer returned empty payload")
|
|
return
|
|
await self._destination.deliver(
|
|
content=payload,
|
|
time_window=window,
|
|
filename=self._build_filename(),
|
|
)
|
|
|
|
def _build_filename(self) -> str:
|
|
if not self._serializer.extension:
|
|
raise ValueError("Serializer must declare a file extension")
|
|
return f"usage.{self._serializer.extension}"
|
|
|
|
@staticmethod
|
|
def _sum_column(frame: pl.DataFrame, column: str) -> float:
|
|
if frame.is_empty() or column not in frame.columns:
|
|
return 0.0
|
|
value = frame.select(pl.col(column).sum().alias("sum")).row(0)[0]
|
|
if value is None:
|
|
return 0.0
|
|
return float(value)
|
|
|
|
@staticmethod
|
|
def _count_unique(frame: pl.DataFrame, column: str) -> int:
|
|
if frame.is_empty() or column not in frame.columns:
|
|
return 0
|
|
value = frame.select(pl.col(column).n_unique().alias("unique")).row(0)[0]
|
|
if value is None:
|
|
return 0
|
|
return int(value)
|