chore: initial public snapshot for github upload
This commit is contained in:
@@ -0,0 +1,6 @@
|
||||
"""
|
||||
Firecrawl API integration module.
|
||||
"""
|
||||
from litellm.llms.firecrawl.search.transformation import FirecrawlSearchConfig
|
||||
|
||||
__all__ = ["FirecrawlSearchConfig"]
|
||||
@@ -0,0 +1,6 @@
|
||||
"""
|
||||
Firecrawl Search API module.
|
||||
"""
|
||||
from litellm.llms.firecrawl.search.transformation import FirecrawlSearchConfig
|
||||
|
||||
__all__ = ["FirecrawlSearchConfig"]
|
||||
@@ -0,0 +1,218 @@
|
||||
"""
|
||||
Calls Firecrawl's /search endpoint to search the web.
|
||||
|
||||
Firecrawl API Reference: https://docs.firecrawl.dev/api-reference/endpoint/search
|
||||
"""
|
||||
from typing import Dict, List, Optional, TypedDict, Union
|
||||
|
||||
import httpx
|
||||
|
||||
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
|
||||
from litellm.llms.base_llm.search.transformation import (
|
||||
BaseSearchConfig,
|
||||
SearchResponse,
|
||||
SearchResult,
|
||||
)
|
||||
from litellm.secret_managers.main import get_secret_str
|
||||
|
||||
|
||||
class _FirecrawlSearchRequestRequired(TypedDict):
|
||||
"""Required fields for Firecrawl Search API request."""
|
||||
|
||||
query: str # Required - search query
|
||||
|
||||
|
||||
class FirecrawlSearchRequest(_FirecrawlSearchRequestRequired, total=False):
|
||||
"""
|
||||
Firecrawl Search API request format.
|
||||
Based on: https://docs.firecrawl.dev/api-reference/endpoint/search
|
||||
"""
|
||||
|
||||
limit: int # Optional - maximum number of results to return (default 5, max 100)
|
||||
sources: List[
|
||||
str
|
||||
] # Optional - sources to search ('web', 'images', 'news'), default ['web']
|
||||
categories: List[
|
||||
Dict[str, str]
|
||||
] # Optional - categories to filter by (github, research, pdf)
|
||||
tbs: str # Optional - time-based search parameter
|
||||
location: str # Optional - location parameter for geo-targeting
|
||||
country: str # Optional - ISO country code (default 'US')
|
||||
timeout: int # Optional - timeout in milliseconds (default 60000)
|
||||
ignoreInvalidURLs: bool # Optional - exclude invalid URLs (default false)
|
||||
scrapeOptions: Dict # Optional - options for scraping search results
|
||||
|
||||
|
||||
class FirecrawlSearchConfig(BaseSearchConfig):
|
||||
FIRECRAWL_API_BASE = "https://api.firecrawl.dev/v2"
|
||||
|
||||
@staticmethod
|
||||
def ui_friendly_name() -> str:
|
||||
return "Firecrawl"
|
||||
|
||||
def validate_environment(
|
||||
self,
|
||||
headers: Dict,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
**kwargs,
|
||||
) -> Dict:
|
||||
"""
|
||||
Validate environment and return headers.
|
||||
"""
|
||||
api_key = api_key or get_secret_str("FIRECRAWL_API_KEY")
|
||||
if not api_key:
|
||||
raise ValueError(
|
||||
"FIRECRAWL_API_KEY is not set. Set `FIRECRAWL_API_KEY` environment variable."
|
||||
)
|
||||
headers["Authorization"] = f"Bearer {api_key}"
|
||||
headers["Content-Type"] = "application/json"
|
||||
return headers
|
||||
|
||||
def get_complete_url(
|
||||
self,
|
||||
api_base: Optional[str],
|
||||
optional_params: dict,
|
||||
data: Optional[Union[Dict, List[Dict]]] = None,
|
||||
**kwargs,
|
||||
) -> str:
|
||||
"""
|
||||
Get complete URL for Search endpoint.
|
||||
"""
|
||||
api_base = (
|
||||
api_base or get_secret_str("FIRECRAWL_API_BASE") or self.FIRECRAWL_API_BASE
|
||||
)
|
||||
|
||||
# Append "/search" to the api base if it's not already there
|
||||
if not api_base.endswith("/search"):
|
||||
api_base = f"{api_base}/search"
|
||||
|
||||
return api_base
|
||||
|
||||
def transform_search_request(
|
||||
self,
|
||||
query: Union[str, List[str]],
|
||||
optional_params: dict,
|
||||
**kwargs,
|
||||
) -> Dict:
|
||||
"""
|
||||
Transform Search request to Firecrawl API format.
|
||||
|
||||
Transforms Perplexity unified spec parameters:
|
||||
- query → query (same)
|
||||
- max_results → limit
|
||||
- search_domain_filter → (not directly supported, can use scrapeOptions)
|
||||
- country → country
|
||||
- max_tokens_per_page → (not applicable, ignored)
|
||||
|
||||
All other Firecrawl-specific parameters are passed through as-is.
|
||||
|
||||
Args:
|
||||
query: Search query (string or list of strings). Firecrawl only supports single string queries.
|
||||
optional_params: Optional parameters for the request
|
||||
|
||||
Returns:
|
||||
Dict with typed request data following FirecrawlSearchRequest spec
|
||||
"""
|
||||
if isinstance(query, list):
|
||||
# Firecrawl only supports single string queries, join with spaces
|
||||
query = " ".join(query)
|
||||
|
||||
request_data: FirecrawlSearchRequest = {
|
||||
"query": query,
|
||||
}
|
||||
|
||||
# Transform Perplexity unified spec parameters to Firecrawl format
|
||||
if "max_results" in optional_params:
|
||||
request_data["limit"] = optional_params["max_results"]
|
||||
|
||||
if "country" in optional_params:
|
||||
request_data["country"] = optional_params["country"]
|
||||
|
||||
# Convert to dict before dynamic key assignments
|
||||
result_data = dict(request_data)
|
||||
|
||||
# pass through all other parameters as-is
|
||||
for param, value in optional_params.items():
|
||||
if (
|
||||
param not in self.get_supported_perplexity_optional_params()
|
||||
and param not in result_data
|
||||
):
|
||||
result_data[param] = value
|
||||
|
||||
# By default, request markdown content if not explicitly specified
|
||||
# Firecrawl doesn't return content unless explicitly requested via scrapeOptions
|
||||
if "scrapeOptions" not in result_data:
|
||||
result_data["scrapeOptions"] = {
|
||||
"formats": ["markdown"],
|
||||
"onlyMainContent": True,
|
||||
}
|
||||
|
||||
return result_data
|
||||
|
||||
def transform_search_response(
|
||||
self,
|
||||
raw_response: httpx.Response,
|
||||
logging_obj: LiteLLMLoggingObj,
|
||||
**kwargs,
|
||||
) -> SearchResponse:
|
||||
"""
|
||||
Transform Firecrawl API response to LiteLLM unified SearchResponse format.
|
||||
|
||||
Firecrawl → LiteLLM mappings:
|
||||
- data.web[].title → SearchResult.title
|
||||
- data.web[].url → SearchResult.url
|
||||
- data.web[].description OR data.web[].markdown → SearchResult.snippet
|
||||
- No date field in web results (set to None)
|
||||
- No last_updated field in Firecrawl response (set to None)
|
||||
|
||||
Note: Firecrawl v2 returns results organized by source type (web, images, news).
|
||||
We primarily use web results for the unified format.
|
||||
|
||||
Args:
|
||||
raw_response: Raw httpx response from Firecrawl API
|
||||
logging_obj: Logging object for tracking
|
||||
|
||||
Returns:
|
||||
SearchResponse with standardized format
|
||||
"""
|
||||
response_json = raw_response.json()
|
||||
|
||||
# Transform results to SearchResult objects
|
||||
results = []
|
||||
|
||||
# Process web results (primary source)
|
||||
data = response_json.get("data", {})
|
||||
web_results = data.get("web", [])
|
||||
|
||||
for result in web_results:
|
||||
# Use markdown if available, otherwise fall back to description
|
||||
snippet = result.get("markdown") or result.get("description", "")
|
||||
|
||||
search_result = SearchResult(
|
||||
title=result.get("title", ""),
|
||||
url=result.get("url", ""),
|
||||
snippet=snippet,
|
||||
date=None, # Web results don't include date
|
||||
last_updated=None, # Firecrawl doesn't provide last_updated in response
|
||||
)
|
||||
results.append(search_result)
|
||||
|
||||
# Process news results if available (they have date field)
|
||||
news_results = data.get("news", [])
|
||||
for result in news_results:
|
||||
snippet = result.get("markdown") or result.get("snippet", "")
|
||||
|
||||
search_result = SearchResult(
|
||||
title=result.get("title", ""),
|
||||
url=result.get("url", ""),
|
||||
snippet=snippet,
|
||||
date=result.get("date"), # News results include date
|
||||
last_updated=None,
|
||||
)
|
||||
results.append(search_result)
|
||||
|
||||
return SearchResponse(
|
||||
results=results,
|
||||
object="search",
|
||||
)
|
||||
Reference in New Issue
Block a user