chore: initial public snapshot for github upload

This commit is contained in:
Your Name
2026-03-26 20:06:14 +08:00
commit 0e5ecd930e
3497 changed files with 1586236 additions and 0 deletions

View File

@@ -0,0 +1,259 @@
"""
Translates from OpenAI's `/v1/audio/transcriptions` to Deepgram's `/v1/listen`
"""
from typing import List, Optional, Union
from urllib.parse import urlencode
from httpx import Headers, Response
from litellm.litellm_core_utils.audio_utils.utils import process_audio_file
from litellm.llms.base_llm.chat.transformation import BaseLLMException
from litellm.secret_managers.main import get_secret_str
from litellm.types.llms.openai import (
AllMessageValues,
OpenAIAudioTranscriptionOptionalParams,
)
from litellm.types.utils import FileTypes, TranscriptionResponse
from ...base_llm.audio_transcription.transformation import (
AudioTranscriptionRequestData,
BaseAudioTranscriptionConfig,
)
from ..common_utils import DeepgramException
class DeepgramAudioTranscriptionConfig(BaseAudioTranscriptionConfig):
def get_supported_openai_params(
self, model: str
) -> List[OpenAIAudioTranscriptionOptionalParams]:
return ["language"]
def map_openai_params(
self,
non_default_params: dict,
optional_params: dict,
model: str,
drop_params: bool,
) -> dict:
supported_params = self.get_supported_openai_params(model)
for k, v in non_default_params.items():
if k in supported_params:
optional_params[k] = v
return optional_params
def get_error_class(
self, error_message: str, status_code: int, headers: Union[dict, Headers]
) -> BaseLLMException:
return DeepgramException(
message=error_message, status_code=status_code, headers=headers
)
def transform_audio_transcription_request(
self,
model: str,
audio_file: FileTypes,
optional_params: dict,
litellm_params: dict,
) -> AudioTranscriptionRequestData:
"""
Processes the audio file input based on its type and returns AudioTranscriptionRequestData.
For Deepgram, the binary audio data is sent directly as the request body.
Args:
audio_file: Can be a file path (str), a tuple (filename, file_content), or binary data (bytes).
Returns:
AudioTranscriptionRequestData with binary data and no files.
"""
# Use common utility to process the audio file
processed_audio = process_audio_file(audio_file)
# Return structured data with binary content and no files
# For Deepgram, we send binary data directly as request body
return AudioTranscriptionRequestData(
data=processed_audio.file_content, files=None
)
def transform_audio_transcription_response(
self,
raw_response: Response,
) -> TranscriptionResponse:
"""
Transforms the raw response from Deepgram to the TranscriptionResponse format
"""
try:
response_json = raw_response.json()
# Get the first alternative from the first channel
first_channel = response_json["results"]["channels"][0]
first_alternative = first_channel["alternatives"][0]
# Detect if diarization is active by checking if words have 'speaker' field
has_diarization = False
if "words" in first_alternative and len(first_alternative["words"]) > 0:
has_diarization = "speaker" in first_alternative["words"][0]
# Extract the transcript based on diarization mode
if not has_diarization:
# No diarization: use the standard transcript
text = first_alternative["transcript"]
elif "paragraphs" in first_alternative:
# Diarization with paragraphs: use the pre-formatted diarized transcript
text = first_alternative["paragraphs"]["transcript"]
else:
# Diarization without paragraphs: reconstruct from words
text = self._reconstruct_diarized_transcript(first_alternative["words"])
# Create TranscriptionResponse object
response = TranscriptionResponse(text=text)
# Add additional metadata matching OpenAI format
response["task"] = "transcribe"
# Use detected_language if available, otherwise default to "en"
detected_language = first_channel.get("detected_language")
response["language"] = detected_language if detected_language else "en"
response["duration"] = response_json["metadata"]["duration"]
# Transform words to match OpenAI format
if "words" in first_alternative:
response["words"] = [
{"word": word["word"], "start": word["start"], "end": word["end"]}
for word in first_alternative["words"]
]
# Store full response in hidden params
response._hidden_params = response_json
return response
except Exception as e:
raise ValueError(
f"Error transforming Deepgram response: {str(e)}\nResponse: {raw_response.text}"
)
def _reconstruct_diarized_transcript(self, words: list) -> str:
"""
Reconstructs a diarized transcript from words with speaker information.
Args:
words: List of word objects with speaker, word, and optionally punctuated_word
Returns:
Formatted transcript with speaker labels
"""
if not words:
return ""
segments = []
current_speaker = None
current_words: list[str] = []
for word_obj in words:
speaker = word_obj.get("speaker")
# Use punctuated_word if available, otherwise fall back to word
word_text = word_obj.get("punctuated_word", word_obj.get("word", ""))
if speaker != current_speaker:
# New speaker: save previous segment and start new one
if current_words:
segments.append(
f"Speaker {current_speaker}: {' '.join(current_words)}"
)
current_speaker = speaker
current_words = [word_text]
else:
# Same speaker: add word to current segment
current_words.append(word_text)
# Add the last segment
if current_words:
segments.append(f"\nSpeaker {current_speaker}: {' '.join(current_words)}\n")
return "\n".join(segments)
def get_complete_url(
self,
api_base: Optional[str],
api_key: Optional[str],
model: str,
optional_params: dict,
litellm_params: dict,
stream: Optional[bool] = None,
) -> str:
if api_base is None:
api_base = (
get_secret_str("DEEPGRAM_API_BASE") or "https://api.deepgram.com/v1"
)
api_base = api_base.rstrip("/") # Remove trailing slash if present
# Build query parameters including the model
all_query_params = {"model": model}
# Add filtered optional parameters
additional_params = self._build_query_params(optional_params, model)
all_query_params.update(additional_params)
# Construct URL with proper query string encoding
base_url = f"{api_base}/listen"
query_string = urlencode(all_query_params)
url = f"{base_url}?{query_string}"
return url
def _format_param_value(self, value) -> str:
"""
Formats a parameter value for use in query string.
Args:
value: The parameter value to format
Returns:
Formatted string value
"""
if isinstance(value, bool):
return str(value).lower()
return str(value)
def _build_query_params(self, optional_params: dict, model: str) -> dict:
"""
Builds a dictionary of query parameters from optional_params.
Args:
optional_params: Dictionary of optional parameters
model: Model name
Returns:
Dictionary of filtered and formatted query parameters
"""
query_params = {}
provider_specific_params = self.get_provider_specific_params(
optional_params=optional_params,
model=model,
openai_params=self.get_supported_openai_params(model),
)
for key, value in provider_specific_params.items():
# Format and add the parameter
formatted_value = self._format_param_value(value)
query_params[key] = formatted_value
return query_params
def validate_environment(
self,
headers: dict,
model: str,
messages: List[AllMessageValues],
optional_params: dict,
litellm_params: dict,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
) -> dict:
api_key = api_key or get_secret_str("DEEPGRAM_API_KEY")
return {
"Authorization": f"Token {api_key}",
}

View File

@@ -0,0 +1,5 @@
from litellm.llms.base_llm.chat.transformation import BaseLLMException
class DeepgramException(BaseLLMException):
pass