chore: initial snapshot for gitea/github upload

2026-03-26 16:04:46 +08:00
commit a699a1ac98
3497 changed files with 1586237 additions and 0 deletions
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/integrations/cloudzero/cloudzero.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/integrations/cloudzero/cloudzero.py
@@ -0,0 +1,422 @@
+import os
+from datetime import datetime
+from typing import TYPE_CHECKING, Any, List, Optional, cast
+
+import litellm
+from litellm._logging import verbose_logger
+from litellm.constants import CLOUDZERO_EXPORT_INTERVAL_MINUTES
+from litellm.integrations.custom_logger import CustomLogger
+
+if TYPE_CHECKING:
+    from apscheduler.schedulers.asyncio import AsyncIOScheduler
+else:
+    AsyncIOScheduler = Any
+
+
+class CloudZeroLogger(CustomLogger):
+    """
+    CloudZero Logger for exporting LiteLLM usage data to CloudZero AnyCost API.
+
+    Environment Variables:
+        CLOUDZERO_API_KEY: CloudZero API key for authentication
+        CLOUDZERO_CONNECTION_ID: CloudZero connection ID for data submission
+        CLOUDZERO_TIMEZONE: Timezone for date handling (default: UTC)
+    """
+
+    def __init__(
+        self,
+        api_key: Optional[str] = None,
+        connection_id: Optional[str] = None,
+        timezone: Optional[str] = None,
+        **kwargs,
+    ):
+        """Initialize CloudZero logger with configuration from parameters or environment variables."""
+        super().__init__(**kwargs)
+
+        # Get configuration from parameters first, fall back to environment variables
+        self.api_key = api_key or os.getenv("CLOUDZERO_API_KEY")
+        self.connection_id = connection_id or os.getenv("CLOUDZERO_CONNECTION_ID")
+        self.timezone = timezone or os.getenv("CLOUDZERO_TIMEZONE", "UTC")
+        verbose_logger.debug(
+            f"CloudZero Logger initialized with connection ID: {self.connection_id}, timezone: {self.timezone}"
+        )
+
+    async def initialize_cloudzero_export_job(self):
+        """
+        Handler for initializing CloudZero export job.
+
+        Runs when CloudZero logger starts up.
+
+        - If redis cache is available, we use the pod lock manager to acquire a lock and export the data.
+            - Ensures only one pod exports the data at a time.
+        - If redis cache is not available, we export the data directly.
+        """
+        from litellm.constants import (
+            CLOUDZERO_EXPORT_USAGE_DATA_JOB_NAME,
+        )
+        from litellm.proxy.proxy_server import proxy_logging_obj
+
+        pod_lock_manager = proxy_logging_obj.db_spend_update_writer.pod_lock_manager
+
+        # if using redis, ensure only one pod exports the data at a time
+        if pod_lock_manager and pod_lock_manager.redis_cache:
+            if await pod_lock_manager.acquire_lock(
+                cronjob_id=CLOUDZERO_EXPORT_USAGE_DATA_JOB_NAME
+            ):
+                try:
+                    await self._hourly_usage_data_export()
+                finally:
+                    await pod_lock_manager.release_lock(
+                        cronjob_id=CLOUDZERO_EXPORT_USAGE_DATA_JOB_NAME
+                    )
+        else:
+            # if not using redis, export the data directly
+            await self._hourly_usage_data_export()
+
+    async def _hourly_usage_data_export(self):
+        """
+        Exports the hourly usage data to CloudZero.
+
+        Start time: 1 hour ago
+        End time: current time
+        """
+        from datetime import timedelta, timezone
+
+        from litellm.constants import CLOUDZERO_MAX_FETCHED_DATA_RECORDS
+
+        current_time_utc = datetime.now(timezone.utc)
+        # Mitigates the possibility of missing spend if an hour is skipped due to a restart in an ephemeral environment
+        one_hour_ago_utc = current_time_utc - timedelta(
+            minutes=CLOUDZERO_EXPORT_INTERVAL_MINUTES * 2
+        )
+        await self.export_usage_data(
+            limit=CLOUDZERO_MAX_FETCHED_DATA_RECORDS,
+            operation="replace_hourly",
+            start_time_utc=one_hour_ago_utc,
+            end_time_utc=current_time_utc,
+        )
+
+    async def export_usage_data(
+        self,
+        limit: Optional[int] = None,
+        operation: str = "replace_hourly",
+        start_time_utc: Optional[datetime] = None,
+        end_time_utc: Optional[datetime] = None,
+    ):
+        """
+        Exports the usage data to CloudZero.
+
+        - Reads data from the DB
+        - Transforms the data to the CloudZero format
+        - Sends the data to CloudZero
+
+        Args:
+            limit: Optional limit on number of records to export
+            operation: CloudZero operation type ("replace_hourly" or "sum")
+        """
+        from litellm.integrations.cloudzero.cz_stream_api import CloudZeroStreamer
+        from litellm.integrations.cloudzero.database import LiteLLMDatabase
+        from litellm.integrations.cloudzero.transform import CBFTransformer
+
+        try:
+            verbose_logger.debug("CloudZero Logger: Starting usage data export")
+
+            # Validate required configuration
+            if not self.api_key or not self.connection_id:
+                raise ValueError(
+                    "CloudZero configuration missing. Please set CLOUDZERO_API_KEY and CLOUDZERO_CONNECTION_ID environment variables."
+                )
+
+            # Initialize database connection and load data
+            database = LiteLLMDatabase()
+            verbose_logger.debug("CloudZero Logger: Loading usage data from database")
+            data = await database.get_usage_data(
+                limit=limit, start_time_utc=start_time_utc, end_time_utc=end_time_utc
+            )
+
+            if data.is_empty():
+                verbose_logger.debug("CloudZero Logger: No usage data found to export")
+                return
+
+            verbose_logger.debug(f"CloudZero Logger: Processing {len(data)} records")
+
+            # Transform data to CloudZero CBF format
+            transformer = CBFTransformer()
+            cbf_data = transformer.transform(data)
+
+            if cbf_data.is_empty():
+                verbose_logger.warning(
+                    "CloudZero Logger: No valid data after transformation"
+                )
+                return
+
+            # Send data to CloudZero
+            streamer = CloudZeroStreamer(
+                api_key=self.api_key,
+                connection_id=self.connection_id,
+                user_timezone=self.timezone,
+            )
+
+            verbose_logger.debug(
+                f"CloudZero Logger: Transmitting {len(cbf_data)} records to CloudZero"
+            )
+            streamer.send_batched(cbf_data, operation=operation)
+
+            verbose_logger.debug(
+                f"CloudZero Logger: Successfully exported {len(cbf_data)} records to CloudZero"
+            )
+
+        except Exception as e:
+            verbose_logger.error(
+                f"CloudZero Logger: Error exporting usage data: {str(e)}"
+            )
+            raise
+
+    async def dry_run_export_usage_data(self, limit: Optional[int] = 10000):
+        """
+        Returns the data that would be exported to CloudZero without actually sending it.
+
+        Args:
+            limit: Limit number of records to display (default: 10000)
+
+        Returns:
+            dict: Contains usage_data, cbf_data, and summary statistics
+        """
+        from litellm.integrations.cloudzero.database import LiteLLMDatabase
+        from litellm.integrations.cloudzero.transform import CBFTransformer
+
+        try:
+            verbose_logger.debug("CloudZero Logger: Starting dry run export")
+
+            # Initialize database connection and load data
+            database = LiteLLMDatabase()
+            verbose_logger.debug("CloudZero Logger: Loading usage data for dry run")
+            data = await database.get_usage_data(limit=limit)
+
+            if data.is_empty():
+                verbose_logger.warning("CloudZero Dry Run: No usage data found")
+                return {
+                    "usage_data": [],
+                    "cbf_data": [],
+                    "summary": {
+                        "total_records": 0,
+                        "total_cost": 0,
+                        "total_tokens": 0,
+                        "unique_accounts": 0,
+                        "unique_services": 0,
+                    },
+                }
+
+            verbose_logger.debug(
+                f"CloudZero Dry Run: Processing {len(data)} records..."
+            )
+
+            # Convert usage data to dict format for response
+            usage_data_sample = data.head(50).to_dicts()  # Return first 50 rows
+
+            # Transform data to CloudZero CBF format
+            transformer = CBFTransformer()
+            cbf_data = transformer.transform(data)
+
+            if cbf_data.is_empty():
+                verbose_logger.warning(
+                    "CloudZero Dry Run: No valid data after transformation"
+                )
+                return {
+                    "usage_data": usage_data_sample,
+                    "cbf_data": [],
+                    "summary": {
+                        "total_records": len(usage_data_sample),
+                        "total_cost": sum(
+                            row.get("spend", 0) for row in usage_data_sample
+                        ),
+                        "total_tokens": sum(
+                            row.get("prompt_tokens", 0)
+                            + row.get("completion_tokens", 0)
+                            for row in usage_data_sample
+                        ),
+                        "unique_accounts": 0,
+                        "unique_services": 0,
+                    },
+                }
+
+            # Convert CBF data to dict format for response
+            cbf_data_dict = cbf_data.to_dicts()
+
+            # Calculate summary statistics
+            total_cost = sum(record.get("cost/cost", 0) for record in cbf_data_dict)
+            unique_accounts = len(
+                set(
+                    record.get("resource/account", "")
+                    for record in cbf_data_dict
+                    if record.get("resource/account")
+                )
+            )
+            unique_services = len(
+                set(
+                    record.get("resource/service", "")
+                    for record in cbf_data_dict
+                    if record.get("resource/service")
+                )
+            )
+            total_tokens = sum(
+                record.get("usage/amount", 0) for record in cbf_data_dict
+            )
+
+            verbose_logger.debug(
+                f"CloudZero Logger: Dry run completed for {len(cbf_data)} records"
+            )
+
+            return {
+                "usage_data": usage_data_sample,
+                "cbf_data": cbf_data_dict,
+                "summary": {
+                    "total_records": len(cbf_data_dict),
+                    "total_cost": total_cost,
+                    "total_tokens": total_tokens,
+                    "unique_accounts": unique_accounts,
+                    "unique_services": unique_services,
+                },
+            }
+
+        except Exception as e:
+            verbose_logger.error(f"CloudZero Logger: Error in dry run export: {str(e)}")
+            verbose_logger.error(f"CloudZero Dry Run Error: {str(e)}")
+            raise
+
+    def _display_cbf_data_on_screen(self, cbf_data):
+        """Display CBF transformed data in a formatted table on screen."""
+        from rich.box import SIMPLE
+        from rich.console import Console
+        from rich.table import Table
+
+        console = Console()
+
+        if cbf_data.is_empty():
+            console.print("[yellow]No CBF data to display[/yellow]")
+            return
+
+        console.print(
+            f"\n[bold green]💰 CloudZero CBF Transformed Data ({len(cbf_data)} records)[/bold green]"
+        )
+
+        # Convert to dicts for easier processing
+        records = cbf_data.to_dicts()
+
+        # Create main CBF table
+        cbf_table = Table(
+            show_header=True, header_style="bold cyan", box=SIMPLE, padding=(0, 1)
+        )
+        cbf_table.add_column("time/usage_start", style="blue", no_wrap=False)
+        cbf_table.add_column("cost/cost", style="green", justify="right", no_wrap=False)
+        cbf_table.add_column(
+            "entity_type", style="magenta", justify="right", no_wrap=False
+        )
+        cbf_table.add_column(
+            "entity_id", style="magenta", justify="right", no_wrap=False
+        )
+        cbf_table.add_column("team_id", style="cyan", no_wrap=False)
+        cbf_table.add_column("team_alias", style="cyan", no_wrap=False)
+        cbf_table.add_column("user_email", style="cyan", no_wrap=False)
+        cbf_table.add_column("api_key_alias", style="yellow", no_wrap=False)
+        cbf_table.add_column(
+            "usage/amount", style="yellow", justify="right", no_wrap=False
+        )
+        cbf_table.add_column("resource/id", style="magenta", no_wrap=False)
+        cbf_table.add_column("resource/service", style="cyan", no_wrap=False)
+        cbf_table.add_column("resource/account", style="white", no_wrap=False)
+        cbf_table.add_column("resource/region", style="dim", no_wrap=False)
+
+        for record in records:
+            # Use proper CBF field names
+            time_usage_start = str(record.get("time/usage_start", "N/A"))
+            cost_cost = str(record.get("cost/cost", 0))
+            usage_amount = str(record.get("usage/amount", 0))
+            resource_id = str(record.get("resource/id", "N/A"))
+            resource_service = str(record.get("resource/service", "N/A"))
+            resource_account = str(record.get("resource/account", "N/A"))
+            resource_region = str(record.get("resource/region", "N/A"))
+            entity_type = str(record.get("entity_type", "N/A"))
+            entity_id = str(record.get("entity_id", "N/A"))
+            team_id = str(record.get("resource/tag:team_id", "N/A"))
+            team_alias = str(record.get("resource/tag:team_alias", "N/A"))
+            user_email = str(record.get("resource/tag:user_email", "N/A"))
+            api_key_alias = str(record.get("resource/tag:api_key_alias", "N/A"))
+
+            cbf_table.add_row(
+                time_usage_start,
+                cost_cost,
+                entity_type,
+                entity_id,
+                team_id,
+                team_alias,
+                user_email,
+                api_key_alias,
+                usage_amount,
+                resource_id,
+                resource_service,
+                resource_account,
+                resource_region,
+            )
+
+        console.print(cbf_table)
+
+        # Show summary statistics
+        total_cost = sum(record.get("cost/cost", 0) for record in records)
+        unique_accounts = len(
+            set(
+                record.get("resource/account", "")
+                for record in records
+                if record.get("resource/account")
+            )
+        )
+        unique_services = len(
+            set(
+                record.get("resource/service", "")
+                for record in records
+                if record.get("resource/service")
+            )
+        )
+
+        # Count total tokens from usage metrics
+        total_tokens = sum(record.get("usage/amount", 0) for record in records)
+
+        console.print("\n[bold blue]📊 CBF Summary[/bold blue]")
+        console.print(f"  Records: {len(records):,}")
+        console.print(f"  Total Cost: ${total_cost:.2f}")
+        console.print(f"  Total Tokens: {total_tokens:,}")
+        console.print(f"  Unique Accounts: {unique_accounts}")
+        console.print(f"  Unique Services: {unique_services}")
+
+        console.print(
+            "\n[dim]💡 This is the CloudZero CBF format ready for AnyCost ingestion[/dim]"
+        )
+
+    @staticmethod
+    async def init_cloudzero_background_job(scheduler: AsyncIOScheduler):
+        """
+        Initialize the CloudZero background job.
+
+        Starts the background job that exports the usage data to CloudZero every hour.
+        """
+        from litellm.constants import CLOUDZERO_EXPORT_INTERVAL_MINUTES
+        from litellm.integrations.custom_logger import CustomLogger
+
+        prometheus_loggers: List[
+            CustomLogger
+        ] = litellm.logging_callback_manager.get_custom_loggers_for_type(
+            callback_type=CloudZeroLogger
+        )
+        # we need to get the initialized prometheus logger instance(s) and call logger.initialize_remaining_budget_metrics() on them
+        verbose_logger.debug("found %s cloudzero loggers", len(prometheus_loggers))
+        if len(prometheus_loggers) > 0:
+            cloudzero_logger = cast(CloudZeroLogger, prometheus_loggers[0])
+            verbose_logger.debug(
+                "Initializing remaining budget metrics as a cron job executing every %s minutes"
+                % CLOUDZERO_EXPORT_INTERVAL_MINUTES
+            )
+            scheduler.add_job(
+                cloudzero_logger.initialize_cloudzero_export_job,
+                "interval",
+                minutes=CLOUDZERO_EXPORT_INTERVAL_MINUTES,
+            )
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/integrations/cloudzero/cz_resource_names.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/integrations/cloudzero/cz_resource_names.py
@@ -0,0 +1,161 @@
+# Copyright 2025 CloudZero
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# CHANGELOG: 2025-01-19 - Initial CZRN module for CloudZero Resource Names (erik.peterson)
+
+"""CloudZero Resource Names (CZRN) generation and validation for LiteLLM resources."""
+
+import re
+from enum import Enum
+from typing import Any, cast
+
+import litellm
+
+
+class CZEntityType(str, Enum):
+    TEAM = "team"
+
+
+class CZRNGenerator:
+    """Generate CloudZero Resource Names (CZRNs) for LiteLLM resources."""
+
+    CZRN_REGEX = re.compile(
+        r"^czrn:([a-z0-9-]+):([a-zA-Z0-9-]+):([a-z0-9-]+):([a-z0-9-]+):([a-z0-9-]+):(.+)$"
+    )
+
+    def __init__(self):
+        """Initialize CZRN generator."""
+        pass
+
+    def create_from_litellm_data(self, row: dict[str, Any]) -> str:
+        """Create a CZRN from LiteLLM daily spend data.
+
+        CZRN format: czrn:<service-type>:<provider>:<region>:<owner-account-id>:<resource-type>:<cloud-local-id>
+
+        For LiteLLM resources, we map:
+        - service-type: 'litellm' (the service managing the LLM calls)
+        - provider: The custom_llm_provider (e.g., 'openai', 'anthropic', 'azure')
+        - region: 'cross-region' (LiteLLM operates across regions)
+        - owner-account-id: The team_id or user_id (entity_id)
+        - resource-type: 'llm-usage' (represents LLM usage/inference)
+        - cloud-local-id: model
+        """
+        service_type = "litellm"
+        provider = self._normalize_provider(row.get("custom_llm_provider", "unknown"))
+        region = "cross-region"
+
+        # Use the actual entity_id (team_id or user_id) as the owner account
+        team_id = row.get("team_id", "unknown")
+        owner_account_id = self._normalize_component(team_id)
+
+        resource_type = "llm-usage"
+
+        # Create a unique identifier with just the model (entity info already in owner_account_id)
+        model = row.get("model", "unknown")
+
+        cloud_local_id = model
+
+        return self.create_from_components(
+            service_type=service_type,
+            provider=provider,
+            region=region,
+            owner_account_id=owner_account_id,
+            resource_type=resource_type,
+            cloud_local_id=cloud_local_id,
+        )
+
+    def create_from_components(
+        self,
+        service_type: str,
+        provider: str,
+        region: str,
+        owner_account_id: str,
+        resource_type: str,
+        cloud_local_id: str,
+    ) -> str:
+        """Create a CZRN from individual components."""
+        # Normalize components to ensure they meet CZRN requirements
+        service_type = self._normalize_component(service_type, allow_uppercase=True)
+        provider = self._normalize_component(provider)
+        region = self._normalize_component(region)
+        owner_account_id = self._normalize_component(owner_account_id)
+        resource_type = self._normalize_component(resource_type)
+        # cloud_local_id can contain pipes and other characters, so don't normalize it
+
+        czrn = f"czrn:{service_type}:{provider}:{region}:{owner_account_id}:{resource_type}:{cloud_local_id}"
+
+        if not self.is_valid(czrn):
+            raise ValueError(f"Generated CZRN is invalid: {czrn}")
+
+        return czrn
+
+    def is_valid(self, czrn: str) -> bool:
+        """Validate a CZRN string against the standard format."""
+        return bool(self.CZRN_REGEX.match(czrn))
+
+    def extract_components(self, czrn: str) -> tuple[str, str, str, str, str, str]:
+        """Extract all components from a CZRN.
+
+        Returns: (service_type, provider, region, owner_account_id, resource_type, cloud_local_id)
+        """
+        match = self.CZRN_REGEX.match(czrn)
+        if not match:
+            raise ValueError(f"Invalid CZRN format: {czrn}")
+
+        return cast(tuple[str, str, str, str, str, str], match.groups())
+
+    def _normalize_provider(self, provider: str) -> str:
+        """Normalize provider names to standard CZRN format."""
+        # Map common provider names to CZRN standards
+        provider_map = {
+            litellm.LlmProviders.AZURE.value: "azure",
+            litellm.LlmProviders.AZURE_AI.value: "azure",
+            litellm.LlmProviders.ANTHROPIC.value: "anthropic",
+            litellm.LlmProviders.BEDROCK.value: "aws",
+            litellm.LlmProviders.VERTEX_AI.value: "gcp",
+            litellm.LlmProviders.GEMINI.value: "google",
+            litellm.LlmProviders.COHERE.value: "cohere",
+            litellm.LlmProviders.HUGGINGFACE.value: "huggingface",
+            litellm.LlmProviders.REPLICATE.value: "replicate",
+            litellm.LlmProviders.TOGETHER_AI.value: "together-ai",
+        }
+
+        normalized = provider.lower().replace("_", "-")
+
+        # use litellm custom llm provider if not in provider_map
+        if normalized not in provider_map:
+            return normalized
+        return provider_map.get(normalized, normalized)
+
+    def _normalize_component(
+        self, component: str, allow_uppercase: bool = False
+    ) -> str:
+        """Normalize a CZRN component to meet format requirements."""
+        if not component:
+            return "unknown"
+
+        # Convert to lowercase unless uppercase is allowed
+        if not allow_uppercase:
+            component = component.lower()
+
+        # Replace invalid characters with hyphens
+        component = re.sub(r"[^a-zA-Z0-9-]", "-", component)
+
+        # Remove consecutive hyphens
+        component = re.sub(r"-+", "-", component)
+
+        # Remove leading/trailing hyphens
+        component = component.strip("-")
+
+        return component or "unknown"
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/integrations/cloudzero/cz_stream_api.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/integrations/cloudzero/cz_stream_api.py
@@ -0,0 +1,278 @@
+# Copyright 2025 CloudZero
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# CHANGELOG: 2025-01-19 - Added pathlib for filesystem operations (erik.peterson)
+# CHANGELOG: 2025-01-19 - Migrated from pandas to polars and requests to httpx (erik.peterson)
+# CHANGELOG: 2025-01-19 - Initial output module for CSV and CloudZero API (erik.peterson)
+
+"""Output modules for writing CBF data to various destinations."""
+
+import zoneinfo
+from datetime import datetime, timezone
+from typing import Any, Optional, Union
+
+import httpx
+import polars as pl
+from rich.console import Console
+
+
+class CloudZeroStreamer:
+    """Stream CBF data to CloudZero AnyCost API with proper batching and timezone handling."""
+
+    def __init__(
+        self, api_key: str, connection_id: str, user_timezone: Optional[str] = None
+    ):
+        """Initialize CloudZero streamer with credentials."""
+        self.api_key = api_key
+        self.connection_id = connection_id
+        self.base_url = "https://api.cloudzero.com"
+        self.console = Console()
+
+        # Set timezone - default to UTC
+        self.user_timezone: Union[zoneinfo.ZoneInfo, timezone]
+        if user_timezone:
+            try:
+                self.user_timezone = zoneinfo.ZoneInfo(user_timezone)
+            except zoneinfo.ZoneInfoNotFoundError:
+                self.console.print(
+                    f"[yellow]Warning: Unknown timezone '{user_timezone}', using UTC[/yellow]"
+                )
+                self.user_timezone = timezone.utc
+        else:
+            self.user_timezone = timezone.utc
+
+    def send_batched(
+        self, data: pl.DataFrame, operation: str = "replace_hourly"
+    ) -> None:
+        """Send CBF data in daily batches to CloudZero AnyCost API."""
+        if data.is_empty():
+            self.console.print("[yellow]No data to send to CloudZero[/yellow]")
+            return
+
+        # Group data by date and send each day as a batch
+        daily_batches = self._group_by_date(data)
+
+        if not daily_batches:
+            self.console.print("[yellow]No valid daily batches to send[/yellow]")
+            return
+
+        self.console.print(
+            f"[blue]Sending {len(daily_batches)} daily batch(es) with operation '{operation}'[/blue]"
+        )
+
+        for batch_date, batch_data in daily_batches.items():
+            self._send_daily_batch(batch_date, batch_data, operation)
+
+    def _group_by_date(self, data: pl.DataFrame) -> dict[str, pl.DataFrame]:
+        """Group data by date, converting to UTC and validating dates."""
+        daily_batches: dict[str, list[dict[str, Any]]] = {}
+
+        # Ensure we have the required columns
+        if "time/usage_start" not in data.columns:
+            self.console.print(
+                "[red]Error: Missing 'time/usage_start' column for date grouping[/red]"
+            )
+            return {}
+
+        timestamp_str: Optional[str] = None
+        for row in data.iter_rows(named=True):
+            try:
+                # Parse the timestamp and convert to UTC
+                timestamp_str = row.get("time/usage_start")
+                if not timestamp_str:
+                    continue
+
+                # Parse timestamp and handle timezone conversion
+                dt = self._parse_and_convert_timestamp(timestamp_str)
+                batch_date = dt.strftime("%Y-%m-%d")
+
+                if batch_date not in daily_batches:
+                    daily_batches[batch_date] = []
+
+                daily_batches[batch_date].append(row)
+
+            except Exception as e:
+                self.console.print(
+                    f"[yellow]Warning: Could not process timestamp '{timestamp_str}': {e}[/yellow]"
+                )
+                continue
+
+        # Convert lists back to DataFrames
+        return {
+            date_key: pl.DataFrame(records)
+            for date_key, records in daily_batches.items()
+            if records
+        }
+
+    def _parse_and_convert_timestamp(self, timestamp_str: str) -> datetime:
+        """Parse timestamp string and convert to UTC."""
+        # Try to parse the timestamp string
+        try:
+            # Handle various ISO 8601 formats
+            if timestamp_str.endswith("Z"):
+                dt = datetime.fromisoformat(timestamp_str.replace("Z", "+00:00"))
+            elif "+" in timestamp_str or timestamp_str.endswith(
+                (
+                    "-00:00",
+                    "-01:00",
+                    "-02:00",
+                    "-03:00",
+                    "-04:00",
+                    "-05:00",
+                    "-06:00",
+                    "-07:00",
+                    "-08:00",
+                    "-09:00",
+                    "-10:00",
+                    "-11:00",
+                    "-12:00",
+                    "+01:00",
+                    "+02:00",
+                    "+03:00",
+                    "+04:00",
+                    "+05:00",
+                    "+06:00",
+                    "+07:00",
+                    "+08:00",
+                    "+09:00",
+                    "+10:00",
+                    "+11:00",
+                    "+12:00",
+                )
+            ):
+                dt = datetime.fromisoformat(timestamp_str)
+            else:
+                # Assume user timezone if no timezone info
+                dt = datetime.fromisoformat(timestamp_str)
+                if dt.tzinfo is None:
+                    dt = dt.replace(tzinfo=self.user_timezone)
+
+            # Convert to UTC
+            return dt.astimezone(timezone.utc)
+
+        except ValueError as e:
+            raise ValueError(f"Could not parse timestamp '{timestamp_str}': {e}")
+
+    def _send_daily_batch(
+        self, batch_date: str, batch_data: pl.DataFrame, operation: str
+    ) -> None:
+        """Send a single daily batch to CloudZero API."""
+        if batch_data.is_empty():
+            return
+
+        headers = {
+            "Authorization": f"Bearer {self.api_key}",
+            "Content-Type": "application/json",
+        }
+
+        # Use the correct API endpoint format from documentation
+        url = f"{self.base_url}/v2/connections/billing/anycost/{self.connection_id}/billing_drops"
+
+        # Prepare the batch payload according to AnyCost API format
+        payload = self._prepare_batch_payload(batch_date, batch_data, operation)
+
+        try:
+            with httpx.Client(timeout=30.0) as client:
+                self.console.print(
+                    f"[blue]Sending batch for {batch_date} ({len(batch_data)} records)[/blue]"
+                )
+
+                response = client.post(url, headers=headers, json=payload)
+                response.raise_for_status()
+
+                self.console.print(
+                    f"[green]✓ Successfully sent batch for {batch_date} ({len(batch_data)} records)[/green]"
+                )
+
+        except httpx.RequestError as e:
+            self.console.print(
+                f"[red]✗ Network error sending batch for {batch_date}: {e}[/red]"
+            )
+            raise
+        except httpx.HTTPStatusError as e:
+            self.console.print(
+                f"[red]✗ HTTP error sending batch for {batch_date}: {e.response.status_code} {e.response.text}[/red]"
+            )
+            raise
+
+    def _prepare_batch_payload(
+        self, batch_date: str, batch_data: pl.DataFrame, operation: str
+    ) -> dict[str, Any]:
+        """Prepare batch payload according to CloudZero AnyCost API format."""
+        # Convert batch_date to month for the API (YYYY-MM format)
+        try:
+            date_obj = datetime.strptime(batch_date, "%Y-%m-%d")
+            month_str = date_obj.strftime("%Y-%m")
+        except ValueError:
+            # Fallback to current month
+            month_str = datetime.now().strftime("%Y-%m")
+
+        # Convert DataFrame rows to API format
+        data_records = []
+        for row in batch_data.iter_rows(named=True):
+            record = self._convert_cbf_to_api_format(row)
+            if record:
+                data_records.append(record)
+
+        payload = {"month": month_str, "operation": operation, "data": data_records}
+
+        return payload
+
+    def _convert_cbf_to_api_format(
+        self, row: dict[str, Any]
+    ) -> Optional[dict[str, Any]]:
+        """Convert CBF row to CloudZero API format - keeping CBF field names as CloudZero expects them."""
+        try:
+            # CloudZero expects CBF format field names directly, not converted names
+            api_record = {}
+
+            # Copy all CBF fields, converting numeric values to strings as required by CloudZero
+            for key, value in row.items():
+                if value is not None:
+                    # CloudZero requires numeric values to be strings, but NOT in scientific notation
+                    if isinstance(value, (int, float)):
+                        # Format floats to avoid scientific notation
+                        if isinstance(value, float):
+                            # Use a reasonable precision that avoids scientific notation
+                            api_record[key] = f"{value:.10f}".rstrip("0").rstrip(".")
+                        else:
+                            api_record[key] = str(value)
+                    else:
+                        api_record[key] = value
+
+            # Ensure timestamp is in UTC format
+            if "time/usage_start" in api_record:
+                api_record["time/usage_start"] = self._ensure_utc_timestamp(
+                    api_record["time/usage_start"]
+                )
+
+            return api_record
+
+        except Exception as e:
+            self.console.print(
+                f"[yellow]Warning: Could not convert record to API format: {e}[/yellow]"
+            )
+            return None
+
+    def _ensure_utc_timestamp(self, timestamp_str: str) -> str:
+        """Ensure timestamp is in UTC format for API."""
+        if not timestamp_str:
+            return datetime.now(timezone.utc).isoformat()
+
+        try:
+            dt = self._parse_and_convert_timestamp(timestamp_str)
+            return dt.isoformat().replace("+00:00", "Z")
+        except Exception:
+            # Fallback to current time in UTC
+            return datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/integrations/cloudzero/database.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/integrations/cloudzero/database.py
@@ -0,0 +1,101 @@
+# Copyright 2025 CloudZero
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# CHANGELOG: 2025-01-19 - Refactored to use daily spend tables for proper CBF mapping (erik.peterson)
+# CHANGELOG: 2025-01-19 - Migrated from pandas to polars for database operations (erik.peterson)
+# CHANGELOG: 2025-01-19 - Initial database module for LiteLLM data extraction (erik.peterson)
+
+"""Database connection and data extraction for LiteLLM."""
+
+from datetime import datetime
+from typing import Any, Optional, List
+
+import polars as pl
+
+
+class LiteLLMDatabase:
+    """Handle LiteLLM PostgreSQL database connections and queries."""
+
+    def _ensure_prisma_client(self):
+        from litellm.proxy.proxy_server import prisma_client
+
+        """Ensure prisma client is available."""
+        if prisma_client is None:
+            raise Exception(
+                "Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys"
+            )
+        return prisma_client
+
+    async def get_usage_data(
+        self,
+        limit: Optional[int] = None,
+        start_time_utc: Optional[datetime] = None,
+        end_time_utc: Optional[datetime] = None,
+    ) -> pl.DataFrame:
+        """Retrieve usage data from LiteLLM daily user spend table."""
+        client = self._ensure_prisma_client()
+
+        # Query to get user spend data with team information. Use parameter binding to
+        # avoid SQL injection from user-supplied timestamps or limits.
+        query = """
+        SELECT
+            dus.id,
+            dus.date,
+            dus.user_id,
+            dus.api_key,
+            dus.model,
+            dus.model_group,
+            dus.custom_llm_provider,
+            dus.prompt_tokens,
+            dus.completion_tokens,
+            dus.spend,
+            dus.api_requests,
+            dus.successful_requests,
+            dus.failed_requests,
+            dus.cache_creation_input_tokens,
+            dus.cache_read_input_tokens,
+            dus.created_at,
+            dus.updated_at,
+            vt.team_id,
+            vt.key_alias as api_key_alias,
+            tt.team_alias,
+            ut.user_email as user_email
+        FROM "LiteLLM_DailyUserSpend" dus
+        LEFT JOIN "LiteLLM_VerificationToken" vt ON dus.api_key = vt.token
+        LEFT JOIN "LiteLLM_TeamTable" tt ON vt.team_id = tt.team_id
+        LEFT JOIN "LiteLLM_UserTable" ut ON dus.user_id = ut.user_id
+        WHERE ($1::timestamptz IS NULL OR dus.updated_at >= $1::timestamptz)
+          AND ($2::timestamptz IS NULL OR dus.updated_at <= $2::timestamptz)
+        ORDER BY dus.date DESC, dus.created_at DESC
+        """
+
+        params: List[Any] = [
+            start_time_utc,
+            end_time_utc,
+        ]
+
+        if limit is not None:
+            try:
+                params.append(int(limit))
+            except (TypeError, ValueError):
+                raise ValueError("limit must be an integer")
+            query += " LIMIT $3"
+
+        try:
+            db_response = await client.db.query_raw(query, *params)
+            # Convert the response to polars DataFrame with full schema inference
+            # This prevents schema mismatch errors when data types vary across rows
+            return pl.DataFrame(db_response, infer_schema_length=None)
+        except Exception as e:
+            raise Exception(f"Error retrieving usage data: {str(e)}")
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/integrations/cloudzero/transform.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/integrations/cloudzero/transform.py
@@ -0,0 +1,223 @@
+# Copyright 2025 CloudZero
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# CHANGELOG: 2025-01-19 - Updated CBF transformation for daily spend tables and proper CloudZero mapping (erik.peterson)
+# CHANGELOG: 2025-01-19 - Migrated from pandas to polars for data transformation (erik.peterson)
+# CHANGELOG: 2025-01-19 - Initial CBF transformation module (erik.peterson)
+
+"""Transform LiteLLM data to CloudZero AnyCost CBF format."""
+
+from datetime import datetime
+from typing import Any, Optional
+
+import polars as pl
+
+from ...types.integrations.cloudzero import CBFRecord
+from .cz_resource_names import CZEntityType, CZRNGenerator
+
+
+class CBFTransformer:
+    """Transform LiteLLM usage data to CloudZero Billing Format (CBF)."""
+
+    def __init__(self):
+        """Initialize transformer with CZRN generator."""
+        self.czrn_generator = CZRNGenerator()
+
+    def transform(self, data: pl.DataFrame) -> pl.DataFrame:
+        """Transform LiteLLM data to CBF format, dropping records with zero successful_requests or invalid CZRNs."""
+        if data.is_empty():
+            return pl.DataFrame()
+
+        # Filter out records with zero successful_requests first
+        original_count = len(data)
+        if "successful_requests" in data.columns:
+            filtered_data = data.filter(pl.col("successful_requests") > 0)
+            zero_requests_dropped = original_count - len(filtered_data)
+        else:
+            filtered_data = data
+            zero_requests_dropped = 0
+
+        cbf_data = []
+        czrn_dropped_count = 0
+        filtered_count = len(filtered_data)
+
+        for row in filtered_data.iter_rows(named=True):
+            try:
+                cbf_record = self._create_cbf_record(row)
+                # Only include the record if CZRN generation was successful
+                cbf_data.append(cbf_record)
+            except Exception:
+                # Skip records that fail CZRN generation
+                czrn_dropped_count += 1
+                continue
+
+        # Print summary of dropped records if any
+        from rich.console import Console
+
+        console = Console()
+
+        if zero_requests_dropped > 0:
+            console.print(
+                f"[yellow]⚠️  Dropped {zero_requests_dropped:,} of {original_count:,} records with zero successful_requests[/yellow]"
+            )
+
+        if czrn_dropped_count > 0:
+            console.print(
+                f"[yellow]⚠️  Dropped {czrn_dropped_count:,} of {filtered_count:,} filtered records due to invalid CZRNs[/yellow]"
+            )
+
+        if len(cbf_data) > 0:
+            console.print(
+                f"[green]✓ Successfully transformed {len(cbf_data):,} records[/green]"
+            )
+
+        return pl.DataFrame(cbf_data)
+
+    def _create_cbf_record(self, row: dict[str, Any]) -> CBFRecord:
+        """Create a single CBF record from LiteLLM daily spend row."""
+
+        # Parse date (daily spend tables use date strings like '2025-04-19')
+        usage_date = self._parse_date(row.get("date"))
+
+        # Calculate total tokens
+        prompt_tokens = int(row.get("prompt_tokens", 0))
+        completion_tokens = int(row.get("completion_tokens", 0))
+        total_tokens = prompt_tokens + completion_tokens
+
+        # Create CloudZero Resource Name (CZRN) as resource_id
+        resource_id = self.czrn_generator.create_from_litellm_data(row)
+
+        # Build dimensions for CloudZero
+        model = str(row.get("model", ""))
+        api_key_hash = str(row.get("api_key", ""))[
+            :8
+        ]  # First 8 chars for identification
+
+        # Handle team information with fallbacks
+        team_id = row.get("team_id")
+        team_alias = row.get("team_alias")
+        user_email = row.get("user_email")
+
+        # Use team_alias if available, otherwise team_id, otherwise fallback to 'unknown'
+        entity_id = (
+            str(team_alias) if team_alias else (str(team_id) if team_id else "unknown")
+        )
+
+        # Get alias fields if they exist
+        api_key_alias = row.get("api_key_alias")
+        organization_alias = row.get("organization_alias")
+        project_alias = row.get("project_alias")
+        user_alias = row.get("user_alias")
+
+        dimensions = {
+            "entity_type": CZEntityType.TEAM.value,
+            "entity_id": entity_id,
+            "team_alias": str(team_alias) if team_alias else "unknown",
+            "model": model,
+            "model_group": str(row.get("model_group", "")),
+            "provider": str(row.get("custom_llm_provider", "")),
+            "api_key_prefix": api_key_hash,
+            "api_key_alias": str(row.get("api_key_alias", "")),
+            "user_email": str(user_email) if user_email else "",
+            "api_requests": str(row.get("api_requests", 0)),
+            "successful_requests": str(row.get("successful_requests", 0)),
+            "failed_requests": str(row.get("failed_requests", 0)),
+            "cache_creation_tokens": str(row.get("cache_creation_input_tokens", 0)),
+            "cache_read_tokens": str(row.get("cache_read_input_tokens", 0)),
+            "organization_alias": str(organization_alias) if organization_alias else "",
+            "project_alias": str(project_alias) if project_alias else "",
+            "user_alias": str(user_alias) if user_alias else "",
+        }
+
+        # Extract CZRN components to populate corresponding CBF columns
+        czrn_components = self.czrn_generator.extract_components(resource_id)
+        (
+            service_type,
+            provider,
+            region,
+            owner_account_id,
+            resource_type,
+            cloud_local_id,
+        ) = czrn_components
+
+        # Build resource/account as concat of api_key_alias and api_key_prefix
+        resource_account = (
+            f"{api_key_alias}|{api_key_hash}" if api_key_alias else api_key_hash
+        )
+
+        # CloudZero CBF format with proper column names
+        cbf_record = {
+            # Required CBF fields
+            "time/usage_start": usage_date.isoformat()
+            if usage_date
+            else None,  # Required: ISO-formatted UTC datetime
+            "cost/cost": float(row.get("spend", 0.0)),  # Required: billed cost
+            "resource/id": resource_id,  # CZRN (CloudZero Resource Name)
+            # Usage metrics for token consumption
+            "usage/amount": total_tokens,  # Numeric value of tokens consumed
+            "usage/units": "tokens",  # Description of token units
+            # CBF fields - updated per LIT-1907
+            "resource/service": str(row.get("model_group", "")),  # Send model_group
+            "resource/account": resource_account,  # Send api_key_alias|api_key_prefix
+            "resource/region": region,  # Maps to CZRN region (cross-region)
+            "resource/usage_family": str(
+                row.get("custom_llm_provider", "")
+            ),  # Send provider
+            # Action field
+            "action/operation": str(team_id) if team_id else "",  # Send team_id
+            # Line item details
+            "lineitem/type": "Usage",  # Standard usage line item
+        }
+
+        # Add CZRN components that don't have direct CBF column mappings as resource tags
+        cbf_record["resource/tag:provider"] = provider  # CZRN provider component
+        cbf_record[
+            "resource/tag:model"
+        ] = cloud_local_id  # CZRN cloud-local-id component (model)
+
+        # Add resource tags for all dimensions (using resource/tag:<key> format)
+        for key, value in dimensions.items():
+            if (
+                value and value != "N/A" and value != "unknown"
+            ):  # Only add meaningful tags
+                cbf_record[f"resource/tag:{key}"] = str(value)
+
+        # Add token breakdown as resource tags for analysis (excluding total_tokens per LIT-1907)
+        if prompt_tokens > 0:
+            cbf_record["resource/tag:prompt_tokens"] = str(prompt_tokens)
+        if completion_tokens > 0:
+            cbf_record["resource/tag:completion_tokens"] = str(completion_tokens)
+
+        return CBFRecord(cbf_record)
+
+    def _parse_date(self, date_str) -> Optional[datetime]:
+        """Parse date string from daily spend tables (e.g., '2025-04-19')."""
+        if date_str is None:
+            return None
+
+        if isinstance(date_str, datetime):
+            return date_str
+
+        if isinstance(date_str, str):
+            try:
+                # Parse date string and set to midnight UTC for daily aggregation
+                return pl.Series([date_str]).str.to_datetime("%Y-%m-%d").item()
+            except Exception:
+                try:
+                    # Fallback: try ISO format parsing
+                    return pl.Series([date_str]).str.to_datetime().item()
+                except Exception:
+                    return None
+
+        return None