Files
lijiaoqiao/llm-gateway-competitors/litellm-wheel-src/litellm/llms/duckduckgo/search/transformation.py
2026-03-26 20:06:14 +08:00

258 lines
9.1 KiB
Python

"""
Calls DuckDuckGo's Instant Answer API to search the web.
DuckDuckGo API Reference: https://duckduckgo.com/api
"""
from typing import Dict, List, Literal, Optional, TypedDict, Union
from urllib.parse import urlencode
import httpx
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
from litellm.llms.base_llm.search.transformation import (
BaseSearchConfig,
SearchResponse,
SearchResult,
)
from litellm.secret_managers.main import get_secret_str
class _DuckDuckGoSearchRequestRequired(TypedDict):
"""Required fields for DuckDuckGo Search API request."""
q: str # Required - search query
class DuckDuckGoSearchRequest(_DuckDuckGoSearchRequestRequired, total=False):
"""
DuckDuckGo Instant Answer API request format.
Based on: https://duckduckgo.com/api
"""
format: str # Optional - output format ('json', 'xml'), default 'json'
pretty: int # Optional - pretty print (0 or 1), default 1
no_redirect: int # Optional - skip HTTP redirects (0 or 1), default 0
no_html: int # Optional - remove HTML from text (0 or 1), default 0
skip_disambig: int # Optional - skip disambiguation results (0 or 1), default 0
class DuckDuckGoSearchConfig(BaseSearchConfig):
DUCKDUCKGO_API_BASE = "https://api.duckduckgo.com"
@staticmethod
def ui_friendly_name() -> str:
return "DuckDuckGo"
def get_http_method(self) -> Literal["GET", "POST"]:
"""
Get HTTP method for search requests.
DuckDuckGo Instant Answer API uses GET requests.
Returns:
HTTP method 'GET'
"""
return "GET"
def validate_environment(
self,
headers: Dict,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
**kwargs,
) -> Dict:
"""
Validate environment and return headers.
DuckDuckGo Instant Answer API does not require authentication.
"""
# DuckDuckGo API is free and doesn't require API key
headers["Content-Type"] = "application/json"
return headers
def get_complete_url(
self,
api_base: Optional[str],
optional_params: dict,
data: Optional[Union[Dict, List[Dict]]] = None,
**kwargs,
) -> str:
"""
Get complete URL for Search endpoint.
DuckDuckGo uses query parameters, so we construct the URL with the query.
"""
api_base = (
api_base
or get_secret_str("DUCKDUCKGO_API_BASE")
or self.DUCKDUCKGO_API_BASE
)
# Build query parameters from the transformed request body
if data and isinstance(data, dict) and "_duckduckgo_params" in data:
params = data["_duckduckgo_params"]
query_string = urlencode(params, doseq=True)
return f"{api_base}/?{query_string}"
return api_base
def transform_search_request(
self,
query: Union[str, List[str]],
optional_params: dict,
**kwargs,
) -> Dict:
"""
Transform Search request to DuckDuckGo API format.
Args:
query: Search query (string or list of strings). DuckDuckGo only supports single string queries.
optional_params: Optional parameters for the request
- max_results: Maximum number of search results (DuckDuckGo API doesn't directly support this, used for filtering)
- format: Output format ('json', 'xml')
- pretty: Pretty print (0 or 1)
- no_redirect: Skip HTTP redirects (0 or 1)
- no_html: Remove HTML from text (0 or 1)
- skip_disambig: Skip disambiguation results (0 or 1)
Returns:
Dict with typed request data following DuckDuckGoSearchRequest spec
"""
if isinstance(query, list):
# DuckDuckGo only supports single string queries
query = " ".join(query)
request_data: DuckDuckGoSearchRequest = {
"q": query,
"format": "json", # Always use JSON format
}
# Convert to dict before dynamic key assignments
result_data = dict(request_data)
if "max_results" in optional_params:
result_data["_max_results"] = optional_params["max_results"]
# Pass through DuckDuckGo-specific parameters
ddg_params = ["pretty", "no_redirect", "no_html", "skip_disambig"]
for param in ddg_params:
if param in optional_params:
result_data[param] = optional_params[param]
return {
"_duckduckgo_params": result_data,
}
def transform_search_response(
self,
raw_response: httpx.Response,
logging_obj: LiteLLMLoggingObj,
**kwargs,
) -> SearchResponse:
"""
Transform DuckDuckGo API response to LiteLLM unified SearchResponse format.
DuckDuckGo → LiteLLM mappings:
- RelatedTopics[].Text → SearchResult.title + snippet
- RelatedTopics[].FirstURL → SearchResult.url
- RelatedTopics[].Text → SearchResult.snippet
- No date/last_updated fields in DuckDuckGo response (set to None)
Args:
raw_response: Raw httpx response from DuckDuckGo API
logging_obj: Logging object for tracking
Returns:
SearchResponse with standardized format
"""
response_json = raw_response.json()
# Extract max_results from the request URL params
query_params = raw_response.request.url.params if raw_response.request else {}
max_results = None
if "_max_results" in query_params:
try:
max_results = int(query_params["_max_results"])
except (ValueError, TypeError):
pass
# Transform results to SearchResult objects
results = []
# DuckDuckGo can return results in different fields
# Priority: Abstract > Answer > RelatedTopics
# Check if there's an Abstract with URL
if response_json.get("AbstractURL") and response_json.get("AbstractText"):
abstract_result = SearchResult(
title=response_json.get("Heading", ""),
url=response_json.get("AbstractURL", ""),
snippet=response_json.get("AbstractText", ""),
date=None,
last_updated=None,
)
results.append(abstract_result)
# Process RelatedTopics
related_topics = response_json.get("RelatedTopics", [])
for topic in related_topics:
# Stop if we've reached max_results
if max_results is not None and len(results) >= max_results:
break
if isinstance(topic, dict):
# Check if it's a direct result
if "FirstURL" in topic and "Text" in topic:
text = topic.get("Text", "")
url = topic.get("FirstURL", "")
# Try to split title and snippet
if " - " in text:
parts = text.split(" - ", 1)
title = parts[0]
snippet = parts[1] if len(parts) > 1 else text
else:
title = text[:50] + "..." if len(text) > 50 else text
snippet = text
search_result = SearchResult(
title=title,
url=url,
snippet=snippet,
date=None,
last_updated=None,
)
results.append(search_result)
# Check if it contains nested topics
elif "Topics" in topic:
nested_topics = topic.get("Topics", [])
for nested_topic in nested_topics:
# Stop if we've reached max_results
if max_results is not None and len(results) >= max_results:
break
if "FirstURL" in nested_topic and "Text" in nested_topic:
text = nested_topic.get("Text", "")
url = nested_topic.get("FirstURL", "")
# Try to split title and snippet
if " - " in text:
parts = text.split(" - ", 1)
title = parts[0]
snippet = parts[1] if len(parts) > 1 else text
else:
title = text[:50] + "..." if len(text) > 50 else text
snippet = text
search_result = SearchResult(
title=title,
url=url,
snippet=snippet,
date=None,
last_updated=None,
)
results.append(search_result)
return SearchResponse(
results=results,
object="search",
)