# Copyright 2025 CloudZero # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # CHANGELOG: 2025-01-19 - Added pathlib for filesystem operations (erik.peterson) # CHANGELOG: 2025-01-19 - Migrated from pandas to polars and requests to httpx (erik.peterson) # CHANGELOG: 2025-01-19 - Initial output module for CSV and CloudZero API (erik.peterson) """Output modules for writing CBF data to various destinations.""" import zoneinfo from datetime import datetime, timezone from typing import Any, Optional, Union import httpx import polars as pl from rich.console import Console class CloudZeroStreamer: """Stream CBF data to CloudZero AnyCost API with proper batching and timezone handling.""" def __init__( self, api_key: str, connection_id: str, user_timezone: Optional[str] = None ): """Initialize CloudZero streamer with credentials.""" self.api_key = api_key self.connection_id = connection_id self.base_url = "https://api.cloudzero.com" self.console = Console() # Set timezone - default to UTC self.user_timezone: Union[zoneinfo.ZoneInfo, timezone] if user_timezone: try: self.user_timezone = zoneinfo.ZoneInfo(user_timezone) except zoneinfo.ZoneInfoNotFoundError: self.console.print( f"[yellow]Warning: Unknown timezone '{user_timezone}', using UTC[/yellow]" ) self.user_timezone = timezone.utc else: self.user_timezone = timezone.utc def send_batched( self, data: pl.DataFrame, operation: str = "replace_hourly" ) -> None: """Send CBF data in daily batches to CloudZero AnyCost API.""" if data.is_empty(): self.console.print("[yellow]No data to send to CloudZero[/yellow]") return # Group data by date and send each day as a batch daily_batches = self._group_by_date(data) if not daily_batches: self.console.print("[yellow]No valid daily batches to send[/yellow]") return self.console.print( f"[blue]Sending {len(daily_batches)} daily batch(es) with operation '{operation}'[/blue]" ) for batch_date, batch_data in daily_batches.items(): self._send_daily_batch(batch_date, batch_data, operation) def _group_by_date(self, data: pl.DataFrame) -> dict[str, pl.DataFrame]: """Group data by date, converting to UTC and validating dates.""" daily_batches: dict[str, list[dict[str, Any]]] = {} # Ensure we have the required columns if "time/usage_start" not in data.columns: self.console.print( "[red]Error: Missing 'time/usage_start' column for date grouping[/red]" ) return {} timestamp_str: Optional[str] = None for row in data.iter_rows(named=True): try: # Parse the timestamp and convert to UTC timestamp_str = row.get("time/usage_start") if not timestamp_str: continue # Parse timestamp and handle timezone conversion dt = self._parse_and_convert_timestamp(timestamp_str) batch_date = dt.strftime("%Y-%m-%d") if batch_date not in daily_batches: daily_batches[batch_date] = [] daily_batches[batch_date].append(row) except Exception as e: self.console.print( f"[yellow]Warning: Could not process timestamp '{timestamp_str}': {e}[/yellow]" ) continue # Convert lists back to DataFrames return { date_key: pl.DataFrame(records) for date_key, records in daily_batches.items() if records } def _parse_and_convert_timestamp(self, timestamp_str: str) -> datetime: """Parse timestamp string and convert to UTC.""" # Try to parse the timestamp string try: # Handle various ISO 8601 formats if timestamp_str.endswith("Z"): dt = datetime.fromisoformat(timestamp_str.replace("Z", "+00:00")) elif "+" in timestamp_str or timestamp_str.endswith( ( "-00:00", "-01:00", "-02:00", "-03:00", "-04:00", "-05:00", "-06:00", "-07:00", "-08:00", "-09:00", "-10:00", "-11:00", "-12:00", "+01:00", "+02:00", "+03:00", "+04:00", "+05:00", "+06:00", "+07:00", "+08:00", "+09:00", "+10:00", "+11:00", "+12:00", ) ): dt = datetime.fromisoformat(timestamp_str) else: # Assume user timezone if no timezone info dt = datetime.fromisoformat(timestamp_str) if dt.tzinfo is None: dt = dt.replace(tzinfo=self.user_timezone) # Convert to UTC return dt.astimezone(timezone.utc) except ValueError as e: raise ValueError(f"Could not parse timestamp '{timestamp_str}': {e}") def _send_daily_batch( self, batch_date: str, batch_data: pl.DataFrame, operation: str ) -> None: """Send a single daily batch to CloudZero API.""" if batch_data.is_empty(): return headers = { "Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json", } # Use the correct API endpoint format from documentation url = f"{self.base_url}/v2/connections/billing/anycost/{self.connection_id}/billing_drops" # Prepare the batch payload according to AnyCost API format payload = self._prepare_batch_payload(batch_date, batch_data, operation) try: with httpx.Client(timeout=30.0) as client: self.console.print( f"[blue]Sending batch for {batch_date} ({len(batch_data)} records)[/blue]" ) response = client.post(url, headers=headers, json=payload) response.raise_for_status() self.console.print( f"[green]✓ Successfully sent batch for {batch_date} ({len(batch_data)} records)[/green]" ) except httpx.RequestError as e: self.console.print( f"[red]✗ Network error sending batch for {batch_date}: {e}[/red]" ) raise except httpx.HTTPStatusError as e: self.console.print( f"[red]✗ HTTP error sending batch for {batch_date}: {e.response.status_code} {e.response.text}[/red]" ) raise def _prepare_batch_payload( self, batch_date: str, batch_data: pl.DataFrame, operation: str ) -> dict[str, Any]: """Prepare batch payload according to CloudZero AnyCost API format.""" # Convert batch_date to month for the API (YYYY-MM format) try: date_obj = datetime.strptime(batch_date, "%Y-%m-%d") month_str = date_obj.strftime("%Y-%m") except ValueError: # Fallback to current month month_str = datetime.now().strftime("%Y-%m") # Convert DataFrame rows to API format data_records = [] for row in batch_data.iter_rows(named=True): record = self._convert_cbf_to_api_format(row) if record: data_records.append(record) payload = {"month": month_str, "operation": operation, "data": data_records} return payload def _convert_cbf_to_api_format( self, row: dict[str, Any] ) -> Optional[dict[str, Any]]: """Convert CBF row to CloudZero API format - keeping CBF field names as CloudZero expects them.""" try: # CloudZero expects CBF format field names directly, not converted names api_record = {} # Copy all CBF fields, converting numeric values to strings as required by CloudZero for key, value in row.items(): if value is not None: # CloudZero requires numeric values to be strings, but NOT in scientific notation if isinstance(value, (int, float)): # Format floats to avoid scientific notation if isinstance(value, float): # Use a reasonable precision that avoids scientific notation api_record[key] = f"{value:.10f}".rstrip("0").rstrip(".") else: api_record[key] = str(value) else: api_record[key] = value # Ensure timestamp is in UTC format if "time/usage_start" in api_record: api_record["time/usage_start"] = self._ensure_utc_timestamp( api_record["time/usage_start"] ) return api_record except Exception as e: self.console.print( f"[yellow]Warning: Could not convert record to API format: {e}[/yellow]" ) return None def _ensure_utc_timestamp(self, timestamp_str: str) -> str: """Ensure timestamp is in UTC format for API.""" if not timestamp_str: return datetime.now(timezone.utc).isoformat() try: dt = self._parse_and_convert_timestamp(timestamp_str) return dt.isoformat().replace("+00:00", "Z") except Exception: # Fallback to current time in UTC return datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")