chore: initial public snapshot for github upload

2026-03-26 20:06:14 +08:00
commit 0e5ecd930e
3497 changed files with 1586236 additions and 0 deletions
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/llms/firecrawl/init.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/llms/firecrawl/init.py
@@ -0,0 +1,6 @@
+"""
+Firecrawl API integration module.
+"""
+from litellm.llms.firecrawl.search.transformation import FirecrawlSearchConfig
+
+__all__ = ["FirecrawlSearchConfig"]
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/llms/firecrawl/search/init.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/llms/firecrawl/search/init.py
@@ -0,0 +1,6 @@
+"""
+Firecrawl Search API module.
+"""
+from litellm.llms.firecrawl.search.transformation import FirecrawlSearchConfig
+
+__all__ = ["FirecrawlSearchConfig"]
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/llms/firecrawl/search/transformation.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/llms/firecrawl/search/transformation.py
@@ -0,0 +1,218 @@
+"""
+Calls Firecrawl's /search endpoint to search the web.
+
+Firecrawl API Reference: https://docs.firecrawl.dev/api-reference/endpoint/search
+"""
+from typing import Dict, List, Optional, TypedDict, Union
+
+import httpx
+
+from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
+from litellm.llms.base_llm.search.transformation import (
+    BaseSearchConfig,
+    SearchResponse,
+    SearchResult,
+)
+from litellm.secret_managers.main import get_secret_str
+
+
+class _FirecrawlSearchRequestRequired(TypedDict):
+    """Required fields for Firecrawl Search API request."""
+
+    query: str  # Required - search query
+
+
+class FirecrawlSearchRequest(_FirecrawlSearchRequestRequired, total=False):
+    """
+    Firecrawl Search API request format.
+    Based on: https://docs.firecrawl.dev/api-reference/endpoint/search
+    """
+
+    limit: int  # Optional - maximum number of results to return (default 5, max 100)
+    sources: List[
+        str
+    ]  # Optional - sources to search ('web', 'images', 'news'), default ['web']
+    categories: List[
+        Dict[str, str]
+    ]  # Optional - categories to filter by (github, research, pdf)
+    tbs: str  # Optional - time-based search parameter
+    location: str  # Optional - location parameter for geo-targeting
+    country: str  # Optional - ISO country code (default 'US')
+    timeout: int  # Optional - timeout in milliseconds (default 60000)
+    ignoreInvalidURLs: bool  # Optional - exclude invalid URLs (default false)
+    scrapeOptions: Dict  # Optional - options for scraping search results
+
+
+class FirecrawlSearchConfig(BaseSearchConfig):
+    FIRECRAWL_API_BASE = "https://api.firecrawl.dev/v2"
+
+    @staticmethod
+    def ui_friendly_name() -> str:
+        return "Firecrawl"
+
+    def validate_environment(
+        self,
+        headers: Dict,
+        api_key: Optional[str] = None,
+        api_base: Optional[str] = None,
+        **kwargs,
+    ) -> Dict:
+        """
+        Validate environment and return headers.
+        """
+        api_key = api_key or get_secret_str("FIRECRAWL_API_KEY")
+        if not api_key:
+            raise ValueError(
+                "FIRECRAWL_API_KEY is not set. Set `FIRECRAWL_API_KEY` environment variable."
+            )
+        headers["Authorization"] = f"Bearer {api_key}"
+        headers["Content-Type"] = "application/json"
+        return headers
+
+    def get_complete_url(
+        self,
+        api_base: Optional[str],
+        optional_params: dict,
+        data: Optional[Union[Dict, List[Dict]]] = None,
+        **kwargs,
+    ) -> str:
+        """
+        Get complete URL for Search endpoint.
+        """
+        api_base = (
+            api_base or get_secret_str("FIRECRAWL_API_BASE") or self.FIRECRAWL_API_BASE
+        )
+
+        # Append "/search" to the api base if it's not already there
+        if not api_base.endswith("/search"):
+            api_base = f"{api_base}/search"
+
+        return api_base
+
+    def transform_search_request(
+        self,
+        query: Union[str, List[str]],
+        optional_params: dict,
+        **kwargs,
+    ) -> Dict:
+        """
+        Transform Search request to Firecrawl API format.
+
+        Transforms Perplexity unified spec parameters:
+        - query → query (same)
+        - max_results → limit
+        - search_domain_filter → (not directly supported, can use scrapeOptions)
+        - country → country
+        - max_tokens_per_page → (not applicable, ignored)
+
+        All other Firecrawl-specific parameters are passed through as-is.
+
+        Args:
+            query: Search query (string or list of strings). Firecrawl only supports single string queries.
+            optional_params: Optional parameters for the request
+
+        Returns:
+            Dict with typed request data following FirecrawlSearchRequest spec
+        """
+        if isinstance(query, list):
+            # Firecrawl only supports single string queries, join with spaces
+            query = " ".join(query)
+
+        request_data: FirecrawlSearchRequest = {
+            "query": query,
+        }
+
+        # Transform Perplexity unified spec parameters to Firecrawl format
+        if "max_results" in optional_params:
+            request_data["limit"] = optional_params["max_results"]
+
+        if "country" in optional_params:
+            request_data["country"] = optional_params["country"]
+
+        # Convert to dict before dynamic key assignments
+        result_data = dict(request_data)
+
+        # pass through all other parameters as-is
+        for param, value in optional_params.items():
+            if (
+                param not in self.get_supported_perplexity_optional_params()
+                and param not in result_data
+            ):
+                result_data[param] = value
+
+        # By default, request markdown content if not explicitly specified
+        # Firecrawl doesn't return content unless explicitly requested via scrapeOptions
+        if "scrapeOptions" not in result_data:
+            result_data["scrapeOptions"] = {
+                "formats": ["markdown"],
+                "onlyMainContent": True,
+            }
+
+        return result_data
+
+    def transform_search_response(
+        self,
+        raw_response: httpx.Response,
+        logging_obj: LiteLLMLoggingObj,
+        **kwargs,
+    ) -> SearchResponse:
+        """
+        Transform Firecrawl API response to LiteLLM unified SearchResponse format.
+
+        Firecrawl → LiteLLM mappings:
+        - data.web[].title → SearchResult.title
+        - data.web[].url → SearchResult.url
+        - data.web[].description OR data.web[].markdown → SearchResult.snippet
+        - No date field in web results (set to None)
+        - No last_updated field in Firecrawl response (set to None)
+
+        Note: Firecrawl v2 returns results organized by source type (web, images, news).
+        We primarily use web results for the unified format.
+
+        Args:
+            raw_response: Raw httpx response from Firecrawl API
+            logging_obj: Logging object for tracking
+
+        Returns:
+            SearchResponse with standardized format
+        """
+        response_json = raw_response.json()
+
+        # Transform results to SearchResult objects
+        results = []
+
+        # Process web results (primary source)
+        data = response_json.get("data", {})
+        web_results = data.get("web", [])
+
+        for result in web_results:
+            # Use markdown if available, otherwise fall back to description
+            snippet = result.get("markdown") or result.get("description", "")
+
+            search_result = SearchResult(
+                title=result.get("title", ""),
+                url=result.get("url", ""),
+                snippet=snippet,
+                date=None,  # Web results don't include date
+                last_updated=None,  # Firecrawl doesn't provide last_updated in response
+            )
+            results.append(search_result)
+
+        # Process news results if available (they have date field)
+        news_results = data.get("news", [])
+        for result in news_results:
+            snippet = result.get("markdown") or result.get("snippet", "")
+
+            search_result = SearchResult(
+                title=result.get("title", ""),
+                url=result.get("url", ""),
+                snippet=snippet,
+                date=result.get("date"),  # News results include date
+                last_updated=None,
+            )
+            results.append(search_result)
+
+        return SearchResponse(
+            results=results,
+            object="search",
+        )