chore: initial public snapshot for github upload
This commit is contained in:
@@ -0,0 +1,41 @@
|
||||
from typing import List
|
||||
|
||||
from litellm.llms.base_llm.audio_transcription.transformation import (
|
||||
AudioTranscriptionRequestData,
|
||||
)
|
||||
from litellm.types.llms.openai import OpenAIAudioTranscriptionOptionalParams
|
||||
from litellm.types.utils import FileTypes
|
||||
|
||||
from .whisper_transformation import OpenAIWhisperAudioTranscriptionConfig
|
||||
|
||||
|
||||
class OpenAIGPTAudioTranscriptionConfig(OpenAIWhisperAudioTranscriptionConfig):
|
||||
def get_supported_openai_params(
|
||||
self, model: str
|
||||
) -> List[OpenAIAudioTranscriptionOptionalParams]:
|
||||
"""
|
||||
Get the supported OpenAI params for the `gpt-4o-transcribe` models
|
||||
"""
|
||||
return [
|
||||
"language",
|
||||
"prompt",
|
||||
"response_format",
|
||||
"temperature",
|
||||
"include",
|
||||
]
|
||||
|
||||
def transform_audio_transcription_request(
|
||||
self,
|
||||
model: str,
|
||||
audio_file: FileTypes,
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
) -> AudioTranscriptionRequestData:
|
||||
"""
|
||||
Transform the audio transcription request
|
||||
"""
|
||||
data = {"model": model, "file": audio_file, **optional_params}
|
||||
|
||||
return AudioTranscriptionRequestData(
|
||||
data=data,
|
||||
)
|
||||
@@ -0,0 +1,159 @@
|
||||
# OpenAI Audio Transcription Guardrail Translation Handler
|
||||
|
||||
Handler for processing OpenAI's audio transcription endpoint (`/v1/audio/transcriptions`) with guardrails.
|
||||
|
||||
## Overview
|
||||
|
||||
This handler processes audio transcription responses by:
|
||||
1. Applying guardrails to the transcribed text output
|
||||
2. Returning the input unchanged (since input is an audio file, not text)
|
||||
|
||||
## Data Format
|
||||
|
||||
### Input Format
|
||||
|
||||
The input is an audio file, which cannot be guardrailed (it's binary data, not text).
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "whisper-1",
|
||||
"file": "<audio file>",
|
||||
"response_format": "json",
|
||||
"language": "en"
|
||||
}
|
||||
```
|
||||
|
||||
### Output Format
|
||||
|
||||
```json
|
||||
{
|
||||
"text": "This is the transcribed text from the audio file."
|
||||
}
|
||||
```
|
||||
|
||||
Or with additional metadata:
|
||||
|
||||
```json
|
||||
{
|
||||
"text": "This is the transcribed text from the audio file.",
|
||||
"duration": 3.5,
|
||||
"language": "en"
|
||||
}
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
The handler is automatically discovered and applied when guardrails are used with the audio transcription endpoint.
|
||||
|
||||
### Example: Using Guardrails with Audio Transcription
|
||||
|
||||
```bash
|
||||
curl -X POST 'http://localhost:4000/v1/audio/transcriptions' \
|
||||
-H 'Authorization: Bearer your-api-key' \
|
||||
-F 'file=@audio.mp3' \
|
||||
-F 'model=whisper-1' \
|
||||
-F 'guardrails=["pii_mask"]'
|
||||
```
|
||||
|
||||
The guardrail will be applied to the **output** transcribed text only.
|
||||
|
||||
### Example: PII Masking in Transcribed Text
|
||||
|
||||
```bash
|
||||
curl -X POST 'http://localhost:4000/v1/audio/transcriptions' \
|
||||
-H 'Authorization: Bearer your-api-key' \
|
||||
-F 'file=@meeting_recording.mp3' \
|
||||
-F 'model=whisper-1' \
|
||||
-F 'guardrails=["mask_pii"]' \
|
||||
-F 'response_format=json'
|
||||
```
|
||||
|
||||
If the audio contains: "My name is John Doe and my email is john@example.com"
|
||||
|
||||
The transcription output will be: "My name is [NAME_REDACTED] and my email is [EMAIL_REDACTED]"
|
||||
|
||||
### Example: Content Moderation on Transcriptions
|
||||
|
||||
```bash
|
||||
curl -X POST 'http://localhost:4000/v1/audio/transcriptions' \
|
||||
-H 'Authorization: Bearer your-api-key' \
|
||||
-F 'file=@audio.wav' \
|
||||
-F 'model=whisper-1' \
|
||||
-F 'guardrails=["content_moderation"]'
|
||||
```
|
||||
|
||||
## Implementation Details
|
||||
|
||||
### Input Processing
|
||||
|
||||
- **Status**: Not applicable
|
||||
- **Reason**: Input is an audio file (binary data), not text
|
||||
- **Result**: Request data returned unchanged
|
||||
|
||||
### Output Processing
|
||||
|
||||
- **Field**: `text` (string)
|
||||
- **Processing**: Applies guardrail to the transcribed text
|
||||
- **Result**: Updated text in response
|
||||
|
||||
## Use Cases
|
||||
|
||||
1. **PII Protection**: Automatically redact personally identifiable information from transcriptions
|
||||
2. **Content Filtering**: Remove or flag inappropriate content in transcribed audio
|
||||
3. **Compliance**: Ensure transcriptions meet regulatory requirements
|
||||
4. **Data Sanitization**: Clean up transcriptions before storage or further processing
|
||||
|
||||
## Extension
|
||||
|
||||
Override these methods to customize behavior:
|
||||
|
||||
- `process_output_response()`: Customize how transcribed text is processed
|
||||
- `process_input_messages()`: Currently a no-op, but can be overridden if needed
|
||||
|
||||
## Supported Call Types
|
||||
|
||||
- `CallTypes.transcription` - Synchronous audio transcription
|
||||
- `CallTypes.atranscription` - Asynchronous audio transcription
|
||||
|
||||
## Notes
|
||||
|
||||
- Input processing is a no-op since audio files cannot be text-guardrailed
|
||||
- Only the transcribed text output is processed
|
||||
- Guardrails apply after transcription is complete
|
||||
- Both sync and async call types use the same handler
|
||||
- Works with all Whisper models and response formats
|
||||
|
||||
## Common Patterns
|
||||
|
||||
### Transcribe and Redact PII
|
||||
|
||||
```python
|
||||
import litellm
|
||||
|
||||
response = litellm.transcription(
|
||||
model="whisper-1",
|
||||
file=open("interview.mp3", "rb"),
|
||||
guardrails=["mask_pii"],
|
||||
)
|
||||
|
||||
# response.text will have PII redacted
|
||||
print(response.text)
|
||||
```
|
||||
|
||||
### Async Transcription with Guardrails
|
||||
|
||||
```python
|
||||
import litellm
|
||||
import asyncio
|
||||
|
||||
async def transcribe_with_guardrails():
|
||||
response = await litellm.atranscription(
|
||||
model="whisper-1",
|
||||
file=open("audio.mp3", "rb"),
|
||||
guardrails=["content_filter"],
|
||||
)
|
||||
return response.text
|
||||
|
||||
text = asyncio.run(transcribe_with_guardrails())
|
||||
```
|
||||
|
||||
@@ -0,0 +1,13 @@
|
||||
"""OpenAI Audio Transcription handler for Unified Guardrails."""
|
||||
|
||||
from litellm.llms.openai.transcriptions.guardrail_translation.handler import (
|
||||
OpenAIAudioTranscriptionHandler,
|
||||
)
|
||||
from litellm.types.utils import CallTypes
|
||||
|
||||
guardrail_translation_mappings = {
|
||||
CallTypes.transcription: OpenAIAudioTranscriptionHandler,
|
||||
CallTypes.atranscription: OpenAIAudioTranscriptionHandler,
|
||||
}
|
||||
|
||||
__all__ = ["guardrail_translation_mappings", "OpenAIAudioTranscriptionHandler"]
|
||||
@@ -0,0 +1,117 @@
|
||||
"""
|
||||
OpenAI Audio Transcription Handler for Unified Guardrails
|
||||
|
||||
This module provides guardrail translation support for OpenAI's audio transcription endpoint.
|
||||
The handler processes the output transcribed text (input is audio, so no text to guardrail).
|
||||
"""
|
||||
|
||||
from typing import TYPE_CHECKING, Any, Optional
|
||||
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.llms.base_llm.guardrail_translation.base_translation import BaseTranslation
|
||||
from litellm.types.utils import GenericGuardrailAPIInputs
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from litellm.integrations.custom_guardrail import CustomGuardrail
|
||||
from litellm.utils import TranscriptionResponse
|
||||
|
||||
|
||||
class OpenAIAudioTranscriptionHandler(BaseTranslation):
|
||||
"""
|
||||
Handler for processing OpenAI audio transcription responses with guardrails.
|
||||
|
||||
This class provides methods to:
|
||||
1. Process output transcription text (post-call hook)
|
||||
|
||||
Note: Input processing is not applicable since the input is an audio file,
|
||||
not text. Only the transcribed text output is processed.
|
||||
"""
|
||||
|
||||
async def process_input_messages(
|
||||
self,
|
||||
data: dict,
|
||||
guardrail_to_apply: "CustomGuardrail",
|
||||
litellm_logging_obj: Optional[Any] = None,
|
||||
) -> Any:
|
||||
"""
|
||||
Process input - not applicable for audio transcription.
|
||||
|
||||
The input is an audio file, not text, so there's nothing to apply
|
||||
guardrails to. This method returns the data unchanged.
|
||||
|
||||
Args:
|
||||
data: Request data dictionary containing audio file
|
||||
guardrail_to_apply: The guardrail instance (unused)
|
||||
|
||||
Returns:
|
||||
Unmodified data (audio files don't need text guardrails)
|
||||
"""
|
||||
verbose_proxy_logger.debug(
|
||||
"OpenAI Audio Transcription: Input processing not applicable "
|
||||
"(input is audio file, not text)"
|
||||
)
|
||||
return data
|
||||
|
||||
async def process_output_response(
|
||||
self,
|
||||
response: "TranscriptionResponse",
|
||||
guardrail_to_apply: "CustomGuardrail",
|
||||
litellm_logging_obj: Optional[Any] = None,
|
||||
user_api_key_dict: Optional[Any] = None,
|
||||
) -> Any:
|
||||
"""
|
||||
Process output transcription by applying guardrails to transcribed text.
|
||||
|
||||
Args:
|
||||
response: Transcription response object containing transcribed text
|
||||
guardrail_to_apply: The guardrail instance to apply
|
||||
litellm_logging_obj: Optional logging object
|
||||
user_api_key_dict: User API key metadata to pass to guardrails
|
||||
|
||||
Returns:
|
||||
Modified response with guardrails applied to transcribed text
|
||||
"""
|
||||
if not hasattr(response, "text") or response.text is None:
|
||||
verbose_proxy_logger.debug(
|
||||
"OpenAI Audio Transcription: No text in response to process"
|
||||
)
|
||||
return response
|
||||
|
||||
if isinstance(response.text, str):
|
||||
original_text = response.text
|
||||
# Create a request_data dict with response info and user API key metadata
|
||||
request_data: dict = {"response": response}
|
||||
|
||||
# Add user API key metadata with prefixed keys
|
||||
user_metadata = self.transform_user_api_key_dict_to_metadata(
|
||||
user_api_key_dict
|
||||
)
|
||||
if user_metadata:
|
||||
request_data["litellm_metadata"] = user_metadata
|
||||
|
||||
inputs = GenericGuardrailAPIInputs(texts=[original_text])
|
||||
# Include model information from the response if available
|
||||
if hasattr(response, "model") and response.model:
|
||||
inputs["model"] = response.model
|
||||
guardrailed_inputs = await guardrail_to_apply.apply_guardrail(
|
||||
inputs=inputs,
|
||||
request_data=request_data,
|
||||
input_type="response",
|
||||
logging_obj=litellm_logging_obj,
|
||||
)
|
||||
guardrailed_texts = guardrailed_inputs.get("texts", [])
|
||||
response.text = guardrailed_texts[0] if guardrailed_texts else original_text
|
||||
|
||||
verbose_proxy_logger.debug(
|
||||
"OpenAI Audio Transcription: Applied guardrail to transcribed text. "
|
||||
"Original length: %d, New length: %d",
|
||||
len(original_text),
|
||||
len(response.text),
|
||||
)
|
||||
else:
|
||||
verbose_proxy_logger.debug(
|
||||
"OpenAI Audio Transcription: Unexpected text type: %s. Expected string.",
|
||||
type(response.text),
|
||||
)
|
||||
|
||||
return response
|
||||
@@ -0,0 +1,231 @@
|
||||
from typing import TYPE_CHECKING, Optional, Union, cast
|
||||
|
||||
import httpx
|
||||
from openai import AsyncOpenAI, OpenAI
|
||||
from pydantic import BaseModel
|
||||
|
||||
import litellm
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from aiohttp import ClientSession
|
||||
from litellm.litellm_core_utils.audio_utils.utils import get_audio_file_name
|
||||
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
|
||||
from litellm.llms.base_llm.audio_transcription.transformation import (
|
||||
BaseAudioTranscriptionConfig,
|
||||
)
|
||||
from litellm.types.utils import FileTypes
|
||||
from litellm.utils import (
|
||||
TranscriptionResponse,
|
||||
convert_to_model_response_object,
|
||||
extract_duration_from_srt_or_vtt,
|
||||
)
|
||||
|
||||
from ..openai import OpenAIChatCompletion
|
||||
|
||||
|
||||
class OpenAIAudioTranscription(OpenAIChatCompletion):
|
||||
# Audio Transcriptions
|
||||
async def make_openai_audio_transcriptions_request(
|
||||
self,
|
||||
openai_aclient: AsyncOpenAI,
|
||||
data: dict,
|
||||
timeout: Union[float, httpx.Timeout],
|
||||
):
|
||||
"""
|
||||
Helper to:
|
||||
- call openai_aclient.audio.transcriptions.with_raw_response when litellm.return_response_headers is True
|
||||
- call openai_aclient.audio.transcriptions.create by default
|
||||
"""
|
||||
try:
|
||||
raw_response = (
|
||||
await openai_aclient.audio.transcriptions.with_raw_response.create(
|
||||
**data, timeout=timeout
|
||||
)
|
||||
) # type: ignore
|
||||
headers = dict(raw_response.headers)
|
||||
response = raw_response.parse()
|
||||
|
||||
return headers, response
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
def make_sync_openai_audio_transcriptions_request(
|
||||
self,
|
||||
openai_client: OpenAI,
|
||||
data: dict,
|
||||
timeout: Union[float, httpx.Timeout],
|
||||
):
|
||||
"""
|
||||
Helper to:
|
||||
- call openai_aclient.audio.transcriptions.with_raw_response when litellm.return_response_headers is True
|
||||
- call openai_aclient.audio.transcriptions.create by default
|
||||
"""
|
||||
try:
|
||||
if litellm.return_response_headers is True:
|
||||
raw_response = (
|
||||
openai_client.audio.transcriptions.with_raw_response.create(
|
||||
**data, timeout=timeout
|
||||
)
|
||||
) # type: ignore
|
||||
headers = dict(raw_response.headers)
|
||||
response = raw_response.parse()
|
||||
return headers, response
|
||||
else:
|
||||
response = openai_client.audio.transcriptions.create(**data, timeout=timeout) # type: ignore
|
||||
return None, response
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
def audio_transcriptions(
|
||||
self,
|
||||
model: str,
|
||||
audio_file: FileTypes,
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
model_response: TranscriptionResponse,
|
||||
timeout: float,
|
||||
max_retries: int,
|
||||
logging_obj: LiteLLMLoggingObj,
|
||||
api_key: Optional[str],
|
||||
api_base: Optional[str],
|
||||
client=None,
|
||||
atranscription: bool = False,
|
||||
provider_config: Optional[BaseAudioTranscriptionConfig] = None,
|
||||
shared_session: Optional["ClientSession"] = None,
|
||||
) -> TranscriptionResponse:
|
||||
"""
|
||||
Handle audio transcription request
|
||||
"""
|
||||
if provider_config is not None:
|
||||
transformed_data = provider_config.transform_audio_transcription_request(
|
||||
model=model,
|
||||
audio_file=audio_file,
|
||||
optional_params=optional_params,
|
||||
litellm_params=litellm_params,
|
||||
)
|
||||
|
||||
data = cast(dict, transformed_data.data)
|
||||
else:
|
||||
data = {"model": model, "file": audio_file, **optional_params}
|
||||
|
||||
if atranscription is True:
|
||||
return self.async_audio_transcriptions( # type: ignore
|
||||
audio_file=audio_file,
|
||||
data=data,
|
||||
model_response=model_response,
|
||||
timeout=timeout,
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
client=client,
|
||||
max_retries=max_retries,
|
||||
logging_obj=logging_obj,
|
||||
shared_session=shared_session,
|
||||
)
|
||||
|
||||
openai_client: OpenAI = self._get_openai_client( # type: ignore
|
||||
is_async=False,
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
timeout=timeout,
|
||||
max_retries=max_retries,
|
||||
client=client,
|
||||
)
|
||||
|
||||
## LOGGING
|
||||
logging_obj.pre_call(
|
||||
input=None,
|
||||
api_key=openai_client.api_key,
|
||||
additional_args={
|
||||
"api_base": openai_client._base_url._uri_reference,
|
||||
"atranscription": True,
|
||||
"complete_input_dict": data,
|
||||
},
|
||||
)
|
||||
_, response = self.make_sync_openai_audio_transcriptions_request(
|
||||
openai_client=openai_client,
|
||||
data=data,
|
||||
timeout=timeout,
|
||||
)
|
||||
|
||||
if isinstance(response, BaseModel):
|
||||
stringified_response = response.model_dump()
|
||||
else:
|
||||
stringified_response = TranscriptionResponse(text=response).model_dump()
|
||||
|
||||
## LOGGING
|
||||
logging_obj.post_call(
|
||||
input=get_audio_file_name(audio_file),
|
||||
api_key=api_key,
|
||||
additional_args={"complete_input_dict": data},
|
||||
original_response=stringified_response,
|
||||
)
|
||||
hidden_params = {"model": model, "custom_llm_provider": "openai"}
|
||||
final_response: TranscriptionResponse = convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, hidden_params=hidden_params, response_type="audio_transcription") # type: ignore
|
||||
return final_response
|
||||
|
||||
async def async_audio_transcriptions(
|
||||
self,
|
||||
audio_file: FileTypes,
|
||||
data: dict,
|
||||
model_response: TranscriptionResponse,
|
||||
timeout: float,
|
||||
logging_obj: LiteLLMLoggingObj,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
client=None,
|
||||
max_retries=None,
|
||||
shared_session: Optional["ClientSession"] = None,
|
||||
):
|
||||
try:
|
||||
openai_aclient: AsyncOpenAI = self._get_openai_client( # type: ignore
|
||||
is_async=True,
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
timeout=timeout,
|
||||
max_retries=max_retries,
|
||||
client=client,
|
||||
shared_session=shared_session,
|
||||
)
|
||||
|
||||
## LOGGING
|
||||
logging_obj.pre_call(
|
||||
input=None,
|
||||
api_key=openai_aclient.api_key,
|
||||
additional_args={
|
||||
"api_base": openai_aclient._base_url._uri_reference,
|
||||
"atranscription": True,
|
||||
"complete_input_dict": data,
|
||||
},
|
||||
)
|
||||
headers, response = await self.make_openai_audio_transcriptions_request(
|
||||
openai_aclient=openai_aclient,
|
||||
data=data,
|
||||
timeout=timeout,
|
||||
)
|
||||
logging_obj.model_call_details["response_headers"] = headers
|
||||
if isinstance(response, BaseModel):
|
||||
stringified_response = response.model_dump()
|
||||
else:
|
||||
duration = extract_duration_from_srt_or_vtt(response)
|
||||
stringified_response = TranscriptionResponse(text=response).model_dump()
|
||||
stringified_response["_audio_transcription_duration"] = duration
|
||||
## LOGGING
|
||||
logging_obj.post_call(
|
||||
input=get_audio_file_name(audio_file),
|
||||
api_key=api_key,
|
||||
additional_args={"complete_input_dict": data},
|
||||
original_response=stringified_response,
|
||||
)
|
||||
# Extract the actual model from data instead of hardcoding "whisper-1"
|
||||
actual_model = data.get("model", "whisper-1")
|
||||
hidden_params = {"model": actual_model, "custom_llm_provider": "openai"}
|
||||
|
||||
return convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, hidden_params=hidden_params, response_type="audio_transcription") # type: ignore
|
||||
except Exception as e:
|
||||
## LOGGING
|
||||
logging_obj.post_call(
|
||||
input=input,
|
||||
api_key=api_key,
|
||||
original_response=str(e),
|
||||
)
|
||||
raise e
|
||||
@@ -0,0 +1,150 @@
|
||||
from typing import List, Optional, Union
|
||||
|
||||
from httpx import Headers, Response
|
||||
|
||||
from litellm.llms.base_llm.audio_transcription.transformation import (
|
||||
AudioTranscriptionRequestData,
|
||||
BaseAudioTranscriptionConfig,
|
||||
)
|
||||
from litellm.llms.base_llm.chat.transformation import BaseLLMException
|
||||
from litellm.secret_managers.main import get_secret_str
|
||||
from litellm.types.llms.openai import (
|
||||
AllMessageValues,
|
||||
OpenAIAudioTranscriptionOptionalParams,
|
||||
)
|
||||
from litellm.types.utils import FileTypes, TranscriptionResponse
|
||||
|
||||
from ..common_utils import OpenAIError
|
||||
|
||||
|
||||
class OpenAIWhisperAudioTranscriptionConfig(BaseAudioTranscriptionConfig):
|
||||
def get_complete_url(
|
||||
self,
|
||||
api_base: Optional[str],
|
||||
api_key: Optional[str],
|
||||
model: str,
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
stream: Optional[bool] = None,
|
||||
) -> str:
|
||||
"""
|
||||
OPTIONAL
|
||||
|
||||
Get the complete url for the request
|
||||
|
||||
Some providers need `model` in `api_base`
|
||||
"""
|
||||
## get the api base, attach the endpoint - v1/audio/transcriptions
|
||||
# strip trailing slash if present
|
||||
api_base = api_base.rstrip("/") if api_base else ""
|
||||
|
||||
# if endswith "/v1"
|
||||
if api_base and api_base.endswith("/v1"):
|
||||
api_base = f"{api_base}/audio/transcriptions"
|
||||
else:
|
||||
api_base = f"{api_base}/v1/audio/transcriptions"
|
||||
|
||||
return api_base or ""
|
||||
|
||||
def get_supported_openai_params(
|
||||
self, model: str
|
||||
) -> List[OpenAIAudioTranscriptionOptionalParams]:
|
||||
"""
|
||||
Get the supported OpenAI params for the `whisper-1` models
|
||||
"""
|
||||
return [
|
||||
"language",
|
||||
"prompt",
|
||||
"response_format",
|
||||
"temperature",
|
||||
"timestamp_granularities",
|
||||
]
|
||||
|
||||
def map_openai_params(
|
||||
self,
|
||||
non_default_params: dict,
|
||||
optional_params: dict,
|
||||
model: str,
|
||||
drop_params: bool,
|
||||
) -> dict:
|
||||
"""
|
||||
Map the OpenAI params to the Whisper params
|
||||
"""
|
||||
supported_params = self.get_supported_openai_params(model)
|
||||
for k, v in non_default_params.items():
|
||||
if k in supported_params:
|
||||
optional_params[k] = v
|
||||
return optional_params
|
||||
|
||||
def validate_environment(
|
||||
self,
|
||||
headers: dict,
|
||||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
) -> dict:
|
||||
api_key = api_key or get_secret_str("OPENAI_API_KEY")
|
||||
|
||||
auth_header = {
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
}
|
||||
|
||||
headers.update(auth_header)
|
||||
return headers
|
||||
|
||||
def transform_audio_transcription_request(
|
||||
self,
|
||||
model: str,
|
||||
audio_file: FileTypes,
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
) -> AudioTranscriptionRequestData:
|
||||
"""
|
||||
Transform the audio transcription request
|
||||
"""
|
||||
data = {"model": model, "file": audio_file, **optional_params}
|
||||
|
||||
if "response_format" not in data or (
|
||||
data["response_format"] == "text" or data["response_format"] == "json"
|
||||
):
|
||||
data[
|
||||
"response_format"
|
||||
] = "verbose_json" # ensures 'duration' is received - used for cost calculation
|
||||
|
||||
return AudioTranscriptionRequestData(
|
||||
data=data,
|
||||
)
|
||||
|
||||
def get_error_class(
|
||||
self, error_message: str, status_code: int, headers: Union[dict, Headers]
|
||||
) -> BaseLLMException:
|
||||
return OpenAIError(
|
||||
status_code=status_code,
|
||||
message=error_message,
|
||||
headers=headers,
|
||||
)
|
||||
|
||||
def transform_audio_transcription_response(
|
||||
self,
|
||||
raw_response: Response,
|
||||
) -> TranscriptionResponse:
|
||||
try:
|
||||
raw_response_json = raw_response.json()
|
||||
except Exception as e:
|
||||
raise ValueError(
|
||||
f"Error transforming response to json: {str(e)}\nResponse: {raw_response.text}"
|
||||
)
|
||||
|
||||
if any(
|
||||
key in raw_response_json
|
||||
for key in TranscriptionResponse.model_fields.keys()
|
||||
):
|
||||
return TranscriptionResponse(**raw_response_json)
|
||||
else:
|
||||
raise ValueError(
|
||||
"Invalid response format. Received response does not match the expected format. Got: ",
|
||||
raw_response_json,
|
||||
)
|
||||
Reference in New Issue
Block a user