""" Translate from OpenAI's `/v1/chat/completions` to Perplexity's `/v1/chat/completions` """ from typing import Any, List, Optional, Tuple import httpx import litellm from litellm._logging import verbose_logger from litellm.secret_managers.main import get_secret_str from litellm.types.llms.openai import AllMessageValues from litellm.types.utils import Usage, PromptTokensDetailsWrapper from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig from litellm.types.utils import ModelResponse from litellm.types.llms.openai import ChatCompletionAnnotation from litellm.types.llms.openai import ChatCompletionAnnotationURLCitation class PerplexityChatConfig(OpenAIGPTConfig): @property def custom_llm_provider(self) -> Optional[str]: return "perplexity" def _get_openai_compatible_provider_info( self, api_base: Optional[str], api_key: Optional[str] ) -> Tuple[Optional[str], Optional[str]]: api_base = api_base or get_secret_str("PERPLEXITY_API_BASE") or "https://api.perplexity.ai" # type: ignore dynamic_api_key = ( api_key or get_secret_str("PERPLEXITYAI_API_KEY") or get_secret_str("PERPLEXITY_API_KEY") ) return api_base, dynamic_api_key def get_supported_openai_params(self, model: str) -> list: """ Perplexity supports a subset of OpenAI params Ref: https://docs.perplexity.ai/api-reference/chat-completions Eg. Perplexity does not support tools, tool_choice, function_call, functions, etc. """ base_openai_params = [ "frequency_penalty", "max_tokens", "max_completion_tokens", "presence_penalty", "response_format", "stream", "temperature", "top_p", "max_retries", "extra_headers", ] try: if litellm.supports_reasoning( model=model, custom_llm_provider=self.custom_llm_provider ): base_openai_params.append("reasoning_effort") except Exception as e: verbose_logger.debug(f"Error checking if model supports reasoning: {e}") try: if litellm.supports_web_search( model=model, custom_llm_provider=self.custom_llm_provider ): base_openai_params.append("web_search_options") except Exception as e: verbose_logger.debug(f"Error checking if model supports web search: {e}") return base_openai_params def transform_response( self, model: str, raw_response: httpx.Response, model_response: ModelResponse, logging_obj: LiteLLMLoggingObj, request_data: dict, messages: List[AllMessageValues], optional_params: dict, litellm_params: dict, encoding: Any, api_key: Optional[str] = None, json_mode: Optional[bool] = None, ) -> ModelResponse: # Call the parent transform_response first to handle the standard transformation model_response = super().transform_response( model=model, raw_response=raw_response, model_response=model_response, logging_obj=logging_obj, request_data=request_data, messages=messages, optional_params=optional_params, litellm_params=litellm_params, encoding=encoding, api_key=api_key, json_mode=json_mode, ) # Extract and enhance usage with Perplexity-specific fields try: raw_response_json = raw_response.json() self._enhance_usage_with_perplexity_fields( model_response, raw_response_json ) self._add_citations_as_annotations(model_response, raw_response_json) except Exception as e: verbose_logger.debug( f"Error extracting Perplexity-specific usage fields: {e}" ) return model_response def _enhance_usage_with_perplexity_fields( self, model_response: ModelResponse, raw_response_json: dict ) -> None: """ Extract citation tokens and search queries from Perplexity API response and add them to the usage object using standard LiteLLM fields. """ if not hasattr(model_response, "usage") or model_response.usage is None: # Create a usage object if it doesn't exist (when usage was None) model_response.usage = Usage( # type: ignore[attr-defined] prompt_tokens=0, completion_tokens=0, total_tokens=0 ) usage = model_response.usage # type: ignore[attr-defined] # Extract citation tokens count citations = raw_response_json.get("citations", []) citation_tokens = 0 if citations: # Count total characters in citations as a proxy for citation tokens # This is an estimation - in practice, you might want to use proper tokenization total_citation_chars = sum( len(str(citation)) for citation in citations if citation ) # Rough estimation: ~4 characters per token (OpenAI's general rule) if total_citation_chars > 0: citation_tokens = max(1, total_citation_chars // 4) # Extract search queries count from usage or response metadata # Perplexity might include this in the usage object or as separate metadata perplexity_usage = raw_response_json.get("usage", {}) # Try to extract search queries from usage field first, then root level num_search_queries = perplexity_usage.get("num_search_queries") if num_search_queries is None: num_search_queries = raw_response_json.get("num_search_queries") if num_search_queries is None: num_search_queries = perplexity_usage.get("search_queries") if num_search_queries is None: num_search_queries = raw_response_json.get("search_queries") # Create or update prompt_tokens_details to include web search requests and citation tokens if citation_tokens > 0 or ( num_search_queries is not None and num_search_queries > 0 ): if usage.prompt_tokens_details is None: usage.prompt_tokens_details = PromptTokensDetailsWrapper() # Store citation tokens count for cost calculation if citation_tokens > 0: setattr(usage, "citation_tokens", citation_tokens) # Store search queries count in the standard web_search_requests field if num_search_queries is not None and num_search_queries > 0: usage.prompt_tokens_details.web_search_requests = num_search_queries def _add_citations_as_annotations( self, model_response: ModelResponse, raw_response_json: dict ) -> None: """ Extract citations and search_results from Perplexity API response and add them as ChatCompletionAnnotation objects to the message. """ if not model_response.choices: return # Get the first choice (assuming single response) choice = model_response.choices[0] if not hasattr(choice, "message") or choice.message is None: return message = choice.message annotations = [] # Extract citations from the response citations = raw_response_json.get("citations", []) search_results = raw_response_json.get("search_results", []) # Create a mapping of URLs to search result titles url_to_title = {} for result in search_results: if isinstance(result, dict) and "url" in result and "title" in result: url_to_title[result["url"]] = result["title"] # Get the message content to find citation positions content = getattr(message, "content", "") if not content: return # Find all citation markers like [1], [2], [3], [4] in the text import re citation_pattern = r"\[(\d+)\]" citation_matches = list(re.finditer(citation_pattern, content)) # Create a mapping of citation numbers to URLs citation_number_to_url = {} for i, citation in enumerate(citations): if isinstance(citation, str): citation_number_to_url[i + 1] = citation # 1-indexed # Create annotations for each citation match found in the text for match in citation_matches: citation_number = int(match.group(1)) if citation_number in citation_number_to_url: url = citation_number_to_url[citation_number] title = url_to_title.get(url, "") # Create the URL citation annotation with actual text positions url_citation: ChatCompletionAnnotationURLCitation = { "url": url, "title": title, "start_index": match.start(), "end_index": match.end(), } annotation: ChatCompletionAnnotation = { "type": "url_citation", "url_citation": url_citation, } annotations.append(annotation) # Add annotations to the message if we have any if annotations: if not hasattr(message, "annotations") or message.annotations is None: message.annotations = [] message.annotations.extend(annotations) # Also add the raw citations and search_results as attributes for backward compatibility if citations: setattr(model_response, "citations", citations) if search_results: setattr(model_response, "search_results", search_results)