chore: initial public snapshot for github upload

This commit is contained in:
Your Name
2026-03-26 20:06:14 +08:00
commit 0e5ecd930e
3497 changed files with 1586236 additions and 0 deletions

View File

@@ -0,0 +1,9 @@
"""
Vertex AI Video Generation Module
This module provides support for Vertex AI's Veo video generation API.
"""
from .transformation import VertexAIVideoConfig
__all__ = ["VertexAIVideoConfig"]

View File

@@ -0,0 +1,636 @@
"""
Vertex AI Video Generation Transformation
Handles transformation of requests/responses for Vertex AI's Veo video generation API.
Based on: https://docs.cloud.google.com/vertex-ai/generative-ai/docs/model-reference/veo-video-generation
"""
import base64
import time
from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union, cast
import httpx
from httpx._types import RequestFiles
from litellm.constants import DEFAULT_GOOGLE_VIDEO_DURATION_SECONDS
from litellm.images.utils import ImageEditRequestUtils
from litellm.llms.base_llm.videos.transformation import BaseVideoConfig
from litellm.llms.vertex_ai.common_utils import (
_convert_vertex_datetime_to_openai_datetime,
get_vertex_base_url,
)
from litellm.llms.vertex_ai.vertex_llm_base import VertexBase
from litellm.types.router import GenericLiteLLMParams
from litellm.types.videos.main import VideoCreateOptionalRequestParams, VideoObject
from litellm.types.videos.utils import (
encode_video_id_with_provider,
extract_original_video_id,
)
if TYPE_CHECKING:
from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj
from litellm.llms.base_llm.chat.transformation import (
BaseLLMException as _BaseLLMException,
)
LiteLLMLoggingObj = _LiteLLMLoggingObj
BaseLLMException = _BaseLLMException
else:
LiteLLMLoggingObj = Any
BaseLLMException = Any
def _convert_image_to_vertex_format(image_file) -> Dict[str, str]:
"""
Convert image file to Vertex AI format with base64 encoding and MIME type.
Args:
image_file: File-like object opened in binary mode (e.g., open("path", "rb"))
Returns:
Dict with bytesBase64Encoded and mimeType
"""
mime_type = ImageEditRequestUtils.get_image_content_type(image_file)
if hasattr(image_file, "seek"):
image_file.seek(0)
image_bytes = image_file.read()
base64_encoded = base64.b64encode(image_bytes).decode("utf-8")
return {"bytesBase64Encoded": base64_encoded, "mimeType": mime_type}
class VertexAIVideoConfig(BaseVideoConfig, VertexBase):
"""
Configuration class for Vertex AI (Veo) video generation.
Veo uses a long-running operation model:
1. POST to :predictLongRunning returns operation name
2. Poll operation using :fetchPredictOperation until done=true
3. Extract video data (base64) from response
"""
def __init__(self):
BaseVideoConfig.__init__(self)
VertexBase.__init__(self)
@staticmethod
def extract_model_from_operation_name(operation_name: str) -> Optional[str]:
"""
Extract the model name from a Vertex AI operation name.
Args:
operation_name: Operation name in format:
projects/PROJECT/locations/LOCATION/publishers/google/models/MODEL/operations/OPERATION_ID
Returns:
Model name (e.g., "veo-2.0-generate-001") or None if extraction fails
"""
parts = operation_name.split("/")
# Model is at index 7 in the operation name format
if len(parts) >= 8:
return parts[7]
return None
def get_supported_openai_params(self, model: str) -> list:
"""
Get the list of supported OpenAI parameters for Veo video generation.
Veo supports minimal parameters compared to OpenAI.
"""
return ["model", "prompt", "input_reference", "seconds", "size"]
def map_openai_params(
self,
video_create_optional_params: VideoCreateOptionalRequestParams,
model: str,
drop_params: bool,
) -> Dict[str, Any]:
"""
Map OpenAI-style parameters to Veo format.
Mappings:
- prompt → prompt (in instances)
- input_reference → image (in instances)
- size → aspectRatio (e.g., "1280x720""16:9")
- seconds → durationSeconds (defaults to 4 seconds if not provided)
"""
mapped_params: Dict[str, Any] = {}
# Map input_reference to image (will be processed in transform_video_create_request)
if "input_reference" in video_create_optional_params:
mapped_params["image"] = video_create_optional_params["input_reference"]
elif "image" in video_create_optional_params:
mapped_params["image"] = video_create_optional_params["image"]
# Pass through a provider-specific parameters block if provided directly
if "parameters" in video_create_optional_params:
mapped_params["parameters"] = video_create_optional_params["parameters"]
# Map size to aspectRatio
if "size" in video_create_optional_params:
size = video_create_optional_params["size"]
if size is not None:
aspect_ratio = self._convert_size_to_aspect_ratio(size)
if aspect_ratio:
mapped_params["aspectRatio"] = aspect_ratio
# Map seconds to durationSeconds, default to 4 seconds (matching OpenAI)
if "seconds" in video_create_optional_params:
seconds = video_create_optional_params["seconds"]
try:
duration = int(seconds) if isinstance(seconds, str) else seconds
if duration is not None:
mapped_params["durationSeconds"] = duration
except (ValueError, TypeError):
# If conversion fails, use default
pass
return mapped_params
def _convert_size_to_aspect_ratio(self, size: str) -> Optional[str]:
"""
Convert OpenAI size format to Veo aspectRatio format.
Supported aspect ratios: 9:16 (portrait), 16:9 (landscape)
"""
if not size:
return None
aspect_ratio_map = {
"1280x720": "16:9",
"1920x1080": "16:9",
"720x1280": "9:16",
"1080x1920": "9:16",
}
return aspect_ratio_map.get(size, "16:9")
def validate_environment(
self,
headers: dict,
model: str,
api_key: Optional[str] = None,
litellm_params: Optional[Union[GenericLiteLLMParams, dict]] = None,
) -> dict:
"""
Validate environment and return headers for Vertex AI OCR.
Vertex AI uses Bearer token authentication with access token from credentials.
"""
# Extract Vertex AI parameters using safe helpers from VertexBase
# Use safe_get_* methods that don't mutate litellm_params dict
# Ensure litellm_params is a dict for type checking
params_dict: Dict[str, Any] = (
cast(Dict[str, Any], litellm_params) if litellm_params is not None else {}
)
vertex_project = VertexBase.safe_get_vertex_ai_project(
litellm_params=params_dict
)
vertex_credentials = VertexBase.safe_get_vertex_ai_credentials(
litellm_params=params_dict
)
# Get access token from Vertex credentials
access_token, project_id = self.get_access_token(
credentials=vertex_credentials,
project_id=vertex_project,
)
headers = {
"Authorization": f"Bearer {access_token}",
"Content-Type": "application/json",
**headers,
}
return headers
def get_complete_url(
self,
model: str,
api_base: Optional[str],
litellm_params: dict,
) -> str:
"""
Get the complete URL for Veo video generation.
Returns URL for :predictLongRunning endpoint:
https://LOCATION-aiplatform.googleapis.com/v1/projects/PROJECT/locations/LOCATION/publishers/google/models/MODEL:predictLongRunning
"""
vertex_project = VertexBase.safe_get_vertex_ai_project(litellm_params)
vertex_location = VertexBase.safe_get_vertex_ai_location(litellm_params)
if not vertex_project:
raise ValueError(
"vertex_project is required for Vertex AI video generation. "
"Set it via environment variable VERTEXAI_PROJECT or pass as parameter."
)
# Default to us-central1 if no location specified
vertex_location = vertex_location or "us-central1"
# Extract model name (remove vertex_ai/ prefix if present)
model_name = model.replace("vertex_ai/", "")
# Construct the URL
if api_base:
base_url = api_base.rstrip("/")
else:
base_url = get_vertex_base_url(vertex_location)
url = f"{base_url}/v1/projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/{model_name}"
return url
def transform_video_create_request(
self,
model: str,
prompt: str,
api_base: str,
video_create_optional_request_params: Dict,
litellm_params: GenericLiteLLMParams,
headers: dict,
) -> Tuple[Dict, RequestFiles, str]:
"""
Transform the video creation request for Veo API.
Veo expects:
{
"instances": [
{
"prompt": "A cat playing with a ball of yarn",
"image": {
"bytesBase64Encoded": "...",
"mimeType": "image/jpeg"
}
}
],
"parameters": {
"aspectRatio": "16:9",
"durationSeconds": 8
}
}
"""
# Build instance with prompt
instance_dict: Dict[str, Any] = {"prompt": prompt}
params_copy = video_create_optional_request_params.copy()
# Check if user wants to provide full instance dict
if "instances" in params_copy and isinstance(params_copy["instances"], dict):
# Replace/merge with user-provided instance
instance_dict.update(params_copy["instances"])
params_copy.pop("instances")
elif "image" in params_copy and params_copy["image"] is not None:
image = params_copy["image"]
if isinstance(image, dict):
# Already in Vertex format e.g. {"gcsUri": "gs://..."} or
# {"bytesBase64Encoded": "...", "mimeType": "..."}
image_data = image
elif isinstance(image, str) and image.startswith("gs://"):
# Bare GCS URI — Vertex AI accepts gcsUri natively, no download needed
image_data = {"gcsUri": image}
elif isinstance(image, str):
raise ValueError(
f"Unsupported image value '{image}'. "
"Provide a GCS URI (gs://...), a dict with 'gcsUri' or "
"'bytesBase64Encoded'/'mimeType', or a binary file-like object."
)
else:
# File-like object — encode to base64
image_data = _convert_image_to_vertex_format(image)
instance_dict["image"] = image_data
params_copy.pop("image")
# Extract a nested "parameters" block that map_openai_params may have placed
# inside params_copy (e.g. from provider-specific pass-through). Merging it
# flat prevents the double-nesting bug:
# {"parameters": {"parameters": {...}}} ← wrong
# {"parameters": {...}} ← correct
nested_params = params_copy.pop("parameters", None)
vertex_params: Dict[str, Any] = {}
if isinstance(nested_params, dict):
vertex_params.update(nested_params)
vertex_params.update(params_copy)
# Build request data directly (TypedDict doesn't have model_dump)
request_data: Dict[str, Any] = {"instances": [instance_dict]}
# Only add parameters if there are any
if vertex_params:
request_data["parameters"] = vertex_params
# Append :predictLongRunning endpoint to api_base
url = f"{api_base}:predictLongRunning"
# No files needed - everything is in JSON
return request_data, [], url
def transform_video_create_response(
self,
model: str,
raw_response: httpx.Response,
logging_obj: LiteLLMLoggingObj,
custom_llm_provider: Optional[str] = None,
request_data: Optional[Dict] = None,
) -> VideoObject:
"""
Transform the Veo video creation response.
Veo returns:
{
"name": "projects/PROJECT_ID/locations/LOCATION/publishers/google/models/MODEL/operations/OPERATION_ID"
}
We return this as a VideoObject with:
- id: operation name (used for polling)
- status: "processing"
- usage: includes duration_seconds for cost calculation
"""
response_data = raw_response.json()
operation_name = response_data.get("name")
if not operation_name:
raise ValueError(f"No operation name in Veo response: {response_data}")
if custom_llm_provider:
video_id = encode_video_id_with_provider(
operation_name, custom_llm_provider, model
)
else:
video_id = operation_name
video_obj = VideoObject(
id=video_id, object="video", status="processing", model=model
)
usage_data = {}
if request_data:
parameters = request_data.get("parameters", {})
duration = (
parameters.get("durationSeconds")
or DEFAULT_GOOGLE_VIDEO_DURATION_SECONDS
)
if duration is not None:
try:
usage_data["duration_seconds"] = float(duration)
except (ValueError, TypeError):
pass
video_obj.usage = usage_data
return video_obj
def transform_video_status_retrieve_request(
self,
video_id: str,
api_base: str,
litellm_params: GenericLiteLLMParams,
headers: dict,
) -> Tuple[str, Dict]:
"""
Transform the video status retrieve request for Veo API.
Veo polls operations using :fetchPredictOperation endpoint with POST request.
"""
operation_name = extract_original_video_id(video_id)
model = self.extract_model_from_operation_name(operation_name)
if not model:
raise ValueError(
f"Invalid operation name format: {operation_name}. "
"Expected format: projects/PROJECT/locations/LOCATION/publishers/google/models/MODEL/operations/OPERATION_ID"
)
# Construct the full URL including model ID
# URL format: https://LOCATION-aiplatform.googleapis.com/v1/projects/PROJECT/locations/LOCATION/publishers/google/models/MODEL:fetchPredictOperation
# Strip trailing slashes from api_base and append model
url = f"{api_base.rstrip('/')}/{model}:fetchPredictOperation"
# Request body contains the operation name
params = {"operationName": operation_name}
return url, params
def transform_video_status_retrieve_response(
self,
raw_response: httpx.Response,
logging_obj: LiteLLMLoggingObj,
custom_llm_provider: Optional[str] = None,
) -> VideoObject:
"""
Transform the Veo operation status response.
Veo returns:
{
"name": "projects/.../operations/OPERATION_ID",
"done": false # or true when complete
}
When done=true:
{
"name": "projects/.../operations/OPERATION_ID",
"done": true,
"response": {
"@type": "type.googleapis.com/cloud.ai.large_models.vision.GenerateVideoResponse",
"raiMediaFilteredCount": 0,
"videos": [
{
"bytesBase64Encoded": "...",
"mimeType": "video/mp4"
}
]
}
}
"""
response_data = raw_response.json()
operation_name = response_data.get("name", "")
is_done = response_data.get("done", False)
error_data = response_data.get("error")
# Extract model from operation name
model = self.extract_model_from_operation_name(operation_name)
if custom_llm_provider:
video_id = encode_video_id_with_provider(
operation_name, custom_llm_provider, model
)
else:
video_id = operation_name
# Convert createTime to Unix timestamp
create_time_str = response_data.get("metadata", {}).get("createTime")
if create_time_str:
try:
created_at = _convert_vertex_datetime_to_openai_datetime(
create_time_str
)
except Exception:
created_at = int(time.time())
else:
created_at = int(time.time())
if error_data:
status = "failed"
elif is_done:
status = "completed"
else:
status = "processing"
video_obj = VideoObject(
id=video_id,
object="video",
status=status,
model=model,
created_at=created_at,
error=error_data,
)
return video_obj
def transform_video_content_request(
self,
video_id: str,
api_base: str,
litellm_params: GenericLiteLLMParams,
headers: dict,
variant: Optional[str] = None,
) -> Tuple[str, Dict]:
"""
Transform the video content request for Veo API.
For Veo, we need to:
1. Poll the operation status to ensure it's complete
2. Extract the base64 video data from the response
3. Return it for decoding
Since we need to make an HTTP call here, we'll use the same fetchPredictOperation
approach as status retrieval.
"""
return self.transform_video_status_retrieve_request(
video_id, api_base, litellm_params, headers
)
def transform_video_content_response(
self,
raw_response: httpx.Response,
logging_obj: LiteLLMLoggingObj,
) -> bytes:
"""
Transform the Veo video content download response.
Extracts the base64 encoded video from the response and decodes it to bytes.
"""
response_data = raw_response.json()
if not response_data.get("done", False):
raise ValueError(
"Video generation is not complete yet. "
"Please check status with video_status() before downloading."
)
try:
video_response = response_data.get("response", {})
videos = video_response.get("videos", [])
if not videos or len(videos) == 0:
raise ValueError("No video data found in completed operation")
# Get the first video
video_data = videos[0]
base64_encoded = video_data.get("bytesBase64Encoded")
if not base64_encoded:
raise ValueError("No base64 encoded video data found")
# Decode base64 to bytes
video_bytes = base64.b64decode(base64_encoded)
return video_bytes
except (KeyError, IndexError) as e:
raise ValueError(f"Failed to extract video data: {e}")
def transform_video_remix_request(
self,
video_id: str,
prompt: str,
api_base: str,
litellm_params: GenericLiteLLMParams,
headers: dict,
extra_body: Optional[Dict[str, Any]] = None,
) -> Tuple[str, Dict]:
"""
Video remix is not supported by Veo API.
"""
raise NotImplementedError(
"Video remix is not supported by Vertex AI Veo. "
"Please use video_generation() to create new videos."
)
def transform_video_remix_response(
self,
raw_response: httpx.Response,
logging_obj: LiteLLMLoggingObj,
custom_llm_provider: Optional[str] = None,
) -> VideoObject:
"""Video remix is not supported."""
raise NotImplementedError("Video remix is not supported by Vertex AI Veo.")
def transform_video_list_request(
self,
api_base: str,
litellm_params: GenericLiteLLMParams,
headers: dict,
after: Optional[str] = None,
limit: Optional[int] = None,
order: Optional[str] = None,
extra_query: Optional[Dict[str, Any]] = None,
) -> Tuple[str, Dict]:
"""
Video list is not supported by Veo API.
"""
raise NotImplementedError(
"Video list is not supported by Vertex AI Veo. "
"Use the operations endpoint directly if you need to list operations."
)
def transform_video_list_response(
self,
raw_response: httpx.Response,
logging_obj: LiteLLMLoggingObj,
custom_llm_provider: Optional[str] = None,
) -> Dict[str, str]:
"""Video list is not supported."""
raise NotImplementedError("Video list is not supported by Vertex AI Veo.")
def transform_video_delete_request(
self,
video_id: str,
api_base: str,
litellm_params: GenericLiteLLMParams,
headers: dict,
) -> Tuple[str, Dict]:
"""
Video delete is not supported by Veo API.
"""
raise NotImplementedError(
"Video delete is not supported by Vertex AI Veo. "
"Videos are automatically cleaned up by Google."
)
def transform_video_delete_response(
self,
raw_response: httpx.Response,
logging_obj: LiteLLMLoggingObj,
) -> VideoObject:
"""Video delete is not supported."""
raise NotImplementedError("Video delete is not supported by Vertex AI Veo.")
def get_error_class(
self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
) -> BaseLLMException:
from litellm.llms.vertex_ai.common_utils import VertexAIError
return VertexAIError(
status_code=status_code,
message=error_message,
headers=headers,
)