diff --git a/docs/en/tools/web-scraping/firecrawlcrawlwebsitetool.mdx b/docs/en/tools/web-scraping/firecrawlcrawlwebsitetool.mdx index 15e44996c3..65ecd4e244 100644 --- a/docs/en/tools/web-scraping/firecrawlcrawlwebsitetool.mdx +++ b/docs/en/tools/web-scraping/firecrawlcrawlwebsitetool.mdx @@ -14,7 +14,7 @@ mode: "wide" ## Installation - Get an API key from [firecrawl.dev](https://firecrawl.dev) and set it in environment variables (`FIRECRAWL_API_KEY`). -- Install the [Firecrawl SDK](https://github.com/mendableai/firecrawl) along with `crewai[tools]` package: +- Install the [Firecrawl SDK](https://github.com/firecrawl/firecrawl) along with `crewai[tools]` package: ```shell pip install firecrawl-py 'crewai[tools]' @@ -32,17 +32,6 @@ tool = FirecrawlCrawlWebsiteTool(url='firecrawl.dev') ## Arguments -- `api_key`: Optional. Specifies Firecrawl API key. Defaults is the `FIRECRAWL_API_KEY` environment variable. -- `url`: The base URL to start crawling from. -- `page_options`: Optional. - - `onlyMainContent`: Optional. Only return the main content of the page excluding headers, navs, footers, etc. - - `includeHtml`: Optional. Include the raw HTML content of the page. Will output a html key in the response. -- `crawler_options`: Optional. Options for controlling the crawling behavior. - - `includes`: Optional. URL patterns to include in the crawl. - - `exclude`: Optional. URL patterns to exclude from the crawl. - - `generateImgAltText`: Optional. Generate alt text for images using LLMs (requires a paid plan). - - `returnOnlyUrls`: Optional. If true, returns only the URLs as a list in the crawl status. Note: the response will be a list of URLs inside the data, not a list of documents. - - `maxDepth`: Optional. Maximum depth to crawl. Depth 1 is the base URL, depth 2 includes the base URL and its direct children, and so on. - - `mode`: Optional. The crawling mode to use. Fast mode crawls 4x faster on websites without a sitemap but may not be as accurate and shouldn't be used on heavily JavaScript-rendered websites. - - `limit`: Optional. Maximum number of pages to crawl. - - `timeout`: Optional. Timeout in milliseconds for the crawling operation. +- `url`: Required. The base URL to start crawling from. +- `config`: Optional. A dictionary of [Firecrawl v2 crawl options](https://docs.firecrawl.dev/api-reference/endpoint/crawl-post) passed directly to the SDK — for example `limit`, `max_discovery_depth`, `include_paths`, `exclude_paths`, `crawl_entire_domain`, `sitemap` (`"skip"`, `"include"`, or `"only"`), and `scrape_options` for per-page scrape settings. +- `api_key`: Optional. Your Firecrawl API key. Defaults to the `FIRECRAWL_API_KEY` environment variable. diff --git a/docs/en/tools/web-scraping/firecrawlinteracttool.mdx b/docs/en/tools/web-scraping/firecrawlinteracttool.mdx new file mode 100644 index 0000000000..856765a96c --- /dev/null +++ b/docs/en/tools/web-scraping/firecrawlinteracttool.mdx @@ -0,0 +1,41 @@ +--- +title: Firecrawl Interact +description: The `FirecrawlInteractTool` runs an autonomous Firecrawl browser agent that navigates and interacts with web pages to accomplish a task. +icon: fire-flame +mode: "wide" +--- + +# `FirecrawlInteractTool` + +## Description + +[Firecrawl](https://firecrawl.dev) is a platform for crawling and converting any website into clean markdown or structured data. The interact tool runs an autonomous browser **agent** that navigates and interacts with pages to accomplish a natural-language task, then returns the result. + +## Installation + +- Get an API key from [firecrawl.dev](https://firecrawl.dev) and set it in environment variables (`FIRECRAWL_API_KEY`). +- Install the [Firecrawl SDK](https://github.com/firecrawl/firecrawl) along with `crewai[tools]` package: + +```shell +pip install firecrawl-py 'crewai[tools]' +``` + +## Example + +Utilize the FirecrawlInteractTool as follows to let your agent act on the web: + +```python Code +from crewai_tools import FirecrawlInteractTool + +tool = FirecrawlInteractTool() +result = tool.run( + prompt="Go to news.ycombinator.com and return the titles of the top 3 stories", +) +``` + +## Arguments + +- `prompt`: Required. A natural-language description of the task for the agent to carry out. +- `urls`: Optional. A list of URLs to start from or constrain the agent to. +- `config`: Optional. A dictionary of [Firecrawl v2 agent options](https://docs.firecrawl.dev/api-reference/endpoint/agent) passed directly to the SDK — for example `model` (`spark-1-mini` / `spark-1-pro`), `max_credits`, `strict_constrain_to_urls`, `poll_interval`, and `timeout`. +- `api_key`: Optional. Your Firecrawl API key. Defaults to the `FIRECRAWL_API_KEY` environment variable. diff --git a/docs/en/tools/web-scraping/firecrawlscrapewebsitetool.mdx b/docs/en/tools/web-scraping/firecrawlscrapewebsitetool.mdx index 51bb672ef0..9e67476962 100644 --- a/docs/en/tools/web-scraping/firecrawlscrapewebsitetool.mdx +++ b/docs/en/tools/web-scraping/firecrawlscrapewebsitetool.mdx @@ -14,7 +14,7 @@ mode: "wide" ## Installation - Get an API key from [firecrawl.dev](https://firecrawl.dev) and set it in environment variables (`FIRECRAWL_API_KEY`). -- Install the [Firecrawl SDK](https://github.com/mendableai/firecrawl) along with `crewai[tools]` package: +- Install the [Firecrawl SDK](https://github.com/firecrawl/firecrawl) along with `crewai[tools]` package: ```shell pip install firecrawl-py 'crewai[tools]' @@ -32,13 +32,6 @@ tool = FirecrawlScrapeWebsiteTool(url='firecrawl.dev') ## Arguments -- `api_key`: Optional. Specifies Firecrawl API key. Defaults is the `FIRECRAWL_API_KEY` environment variable. -- `url`: The URL to scrape. -- `page_options`: Optional. - - `onlyMainContent`: Optional. Only return the main content of the page excluding headers, navs, footers, etc. - - `includeHtml`: Optional. Include the raw HTML content of the page. Will output a html key in the response. -- `extractor_options`: Optional. Options for LLM-based extraction of structured information from the page content - - `mode`: The extraction mode to use, currently supports 'llm-extraction' - - `extractionPrompt`: Optional. A prompt describing what information to extract from the page - - `extractionSchema`: Optional. The schema for the data to be extracted -- `timeout`: Optional. Timeout in milliseconds for the request +- `url`: Required. The URL to scrape. +- `config`: Optional. A dictionary of [Firecrawl v2 scrape options](https://docs.firecrawl.dev/api-reference/endpoint/scrape) passed directly to the SDK — for example `formats`, `only_main_content`, `include_tags`, `exclude_tags`, `wait_for`, and `timeout`. For structured/LLM extraction, add a `json` entry to `formats`, e.g. `{"formats": [{"type": "json", "prompt": "...", "schema": {...}}]}`. +- `api_key`: Optional. Your Firecrawl API key. Defaults to the `FIRECRAWL_API_KEY` environment variable. diff --git a/docs/en/tools/web-scraping/firecrawlsearchtool.mdx b/docs/en/tools/web-scraping/firecrawlsearchtool.mdx index 9864bfcf52..84b803eaa4 100644 --- a/docs/en/tools/web-scraping/firecrawlsearchtool.mdx +++ b/docs/en/tools/web-scraping/firecrawlsearchtool.mdx @@ -14,7 +14,7 @@ mode: "wide" ## Installation - Get an API key from [firecrawl.dev](https://firecrawl.dev) and set it in environment variables (`FIRECRAWL_API_KEY`). -- Install the [Firecrawl SDK](https://github.com/mendableai/firecrawl) along with `crewai[tools]` package: +- Install the [Firecrawl SDK](https://github.com/firecrawl/firecrawl) along with `crewai[tools]` package: ```shell pip install firecrawl-py 'crewai[tools]' @@ -32,11 +32,6 @@ tool = FirecrawlSearchTool(query='what is firecrawl?') ## Arguments -- `api_key`: Optional. Specifies Firecrawl API key. Defaults is the `FIRECRAWL_API_KEY` environment variable. -- `query`: The search query string to be used for searching. -- `page_options`: Optional. Options for result formatting. - - `onlyMainContent`: Optional. Only return the main content of the page excluding headers, navs, footers, etc. - - `includeHtml`: Optional. Include the raw HTML content of the page. Will output a html key in the response. - - `fetchPageContent`: Optional. Fetch the full content of the page. -- `search_options`: Optional. Options for controlling the crawling behavior. - - `limit`: Optional. Maximum number of pages to crawl. \ No newline at end of file +- `query`: Required. The search query. +- `config`: Optional. A dictionary of [Firecrawl v2 search options](https://docs.firecrawl.dev/api-reference/endpoint/search) passed directly to the SDK — for example `limit`, `sources`, `location`, `tbs`, and `scrape_options` to scrape the result pages. +- `api_key`: Optional. Your Firecrawl API key. Defaults to the `FIRECRAWL_API_KEY` environment variable. diff --git a/lib/crewai-tools/pyproject.toml b/lib/crewai-tools/pyproject.toml index 543c0ff7d8..064372ce74 100644 --- a/lib/crewai-tools/pyproject.toml +++ b/lib/crewai-tools/pyproject.toml @@ -36,7 +36,7 @@ multion = [ "multion>=1.1.0", ] firecrawl-py = [ - "firecrawl-py>=1.8.0", + "firecrawl-py>=4.0.0,<5", ] composio-core = [ "composio-core>=0.6.11.post1", diff --git a/lib/crewai-tools/src/crewai_tools/__init__.py b/lib/crewai-tools/src/crewai_tools/__init__.py index 4a7dc71033..6cce12c699 100644 --- a/lib/crewai-tools/src/crewai_tools/__init__.py +++ b/lib/crewai-tools/src/crewai_tools/__init__.py @@ -85,6 +85,9 @@ from crewai_tools.tools.firecrawl_crawl_website_tool.firecrawl_crawl_website_tool import ( FirecrawlCrawlWebsiteTool, ) +from crewai_tools.tools.firecrawl_interact_tool.firecrawl_interact_tool import ( + FirecrawlInteractTool, +) from crewai_tools.tools.firecrawl_scrape_website_tool.firecrawl_scrape_website_tool import ( FirecrawlScrapeWebsiteTool, ) @@ -263,6 +266,7 @@ "FileReadTool", "FileWriterTool", "FirecrawlCrawlWebsiteTool", + "FirecrawlInteractTool", "FirecrawlScrapeWebsiteTool", "FirecrawlSearchTool", "GenerateCrewaiAutomationTool", diff --git a/lib/crewai-tools/src/crewai_tools/tools/__init__.py b/lib/crewai-tools/src/crewai_tools/tools/__init__.py index 18bf4e5638..d55b67e2f6 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/__init__.py +++ b/lib/crewai-tools/src/crewai_tools/tools/__init__.py @@ -74,6 +74,9 @@ from crewai_tools.tools.firecrawl_crawl_website_tool.firecrawl_crawl_website_tool import ( FirecrawlCrawlWebsiteTool, ) +from crewai_tools.tools.firecrawl_interact_tool.firecrawl_interact_tool import ( + FirecrawlInteractTool, +) from crewai_tools.tools.firecrawl_scrape_website_tool.firecrawl_scrape_website_tool import ( FirecrawlScrapeWebsiteTool, ) @@ -247,6 +250,7 @@ "FileReadTool", "FileWriterTool", "FirecrawlCrawlWebsiteTool", + "FirecrawlInteractTool", "FirecrawlScrapeWebsiteTool", "FirecrawlSearchTool", "GenerateCrewaiAutomationTool", diff --git a/lib/crewai-tools/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py b/lib/crewai-tools/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py index 47e98135ca..2ad7316d0d 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py @@ -9,7 +9,7 @@ try: - from firecrawl import FirecrawlApp # type: ignore[import-untyped] + from firecrawl import Firecrawl # type: ignore[import-untyped] FIRECRAWL_AVAILABLE = True except ImportError: @@ -17,6 +17,7 @@ class FirecrawlCrawlWebsiteToolSchema(BaseModel): + """Input schema for the Firecrawl crawl tool.""" url: str = Field(description="Website URL") @@ -29,7 +30,7 @@ class FirecrawlCrawlWebsiteTool(BaseTool): Default configuration options (Firecrawl v2 API): max_discovery_depth (int): Maximum depth for discovering pages. Default: 2 - ignore_sitemap (bool): Whether to ignore sitemap. Default: True + sitemap (str): Sitemap usage mode ("skip", "include", "only"). Default: "skip" limit (int): Maximum number of pages to crawl. Default: 10 allow_external_links (bool): Allow crawling external links. Default: False allow_subdomains (bool): Allow crawling subdomains. Default: False @@ -50,7 +51,7 @@ class FirecrawlCrawlWebsiteTool(BaseTool): config: dict[str, Any] | None = Field( default_factory=lambda: { "max_discovery_depth": 2, - "ignore_sitemap": True, + "sitemap": "skip", "limit": 10, "allow_external_links": False, "allow_subdomains": False, @@ -75,15 +76,17 @@ class FirecrawlCrawlWebsiteTool(BaseTool): ) def __init__(self, api_key: str | None = None, **kwargs: Any) -> None: + """Initialize the tool and its Firecrawl v2 client.""" super().__init__(**kwargs) self.api_key = api_key self._initialize_firecrawl() def _initialize_firecrawl(self) -> None: + """Import firecrawl-py (installing it on demand if missing) and build the client.""" try: - from firecrawl import FirecrawlApp + from firecrawl import Firecrawl - self._firecrawl = FirecrawlApp(api_key=self.api_key) + self._firecrawl = Firecrawl(api_key=self.api_key) except ImportError: import click @@ -93,27 +96,28 @@ def _initialize_firecrawl(self) -> None: import subprocess try: - subprocess.run(["uv", "add", "firecrawl-py"], check=True) # noqa: S607 - from firecrawl import FirecrawlApp + subprocess.run(["uv", "add", "firecrawl-py>=4.0.0,<5"], check=True) # noqa: S607 + from firecrawl import Firecrawl - self._firecrawl = FirecrawlApp(api_key=self.api_key) + self._firecrawl = Firecrawl(api_key=self.api_key) except subprocess.CalledProcessError as e: raise ImportError("Failed to install firecrawl-py package") from e else: raise ImportError( - "`firecrawl-py` package not found, please run `uv add firecrawl-py`" + "`firecrawl-py` package not found, please run `uv add 'firecrawl-py>=4.0.0,<5'`" ) from None def _run(self, url: str) -> Any: + """Crawl the given URL and return the crawled pages.""" if not self._firecrawl: - raise RuntimeError("FirecrawlApp not properly initialized") + raise RuntimeError("Firecrawl client not properly initialized") url = validate_url(url) return self._firecrawl.crawl(url=url, poll_interval=2, **self.config) try: - from firecrawl import FirecrawlApp # noqa: F401 + from firecrawl import Firecrawl # noqa: F401 if not getattr(FirecrawlCrawlWebsiteTool, "_model_rebuilt", False): FirecrawlCrawlWebsiteTool.model_rebuild() diff --git a/lib/crewai-tools/src/crewai_tools/tools/firecrawl_interact_tool/README.md b/lib/crewai-tools/src/crewai_tools/tools/firecrawl_interact_tool/README.md new file mode 100644 index 0000000000..71ad71e235 --- /dev/null +++ b/lib/crewai-tools/src/crewai_tools/tools/firecrawl_interact_tool/README.md @@ -0,0 +1,44 @@ +# FirecrawlInteractTool + +## Description + +[Firecrawl](https://firecrawl.dev) is a platform for crawling and converting any website into clean markdown or structured data. This tool runs an autonomous browser **agent** that navigates and interacts with web pages to accomplish a natural-language task, then returns the result. + +## Installation + +- Get an API key from [firecrawl.dev](https://firecrawl.dev) and set it in environment variables (`FIRECRAWL_API_KEY`). +- Install the [Firecrawl SDK](https://github.com/firecrawl/firecrawl) along with `crewai[tools]` package: + +```shell +pip install firecrawl-py 'crewai[tools]' +``` + +## Example + +Utilize the FirecrawlInteractTool as follows to let your agent act on the web: + +```python +from crewai_tools import FirecrawlInteractTool + +tool = FirecrawlInteractTool(config={"model": "spark-1-mini"}) +tool.run(prompt="Find the pricing page on firecrawl.dev and return the plan names") +``` + +## Arguments + +- `prompt`: Required. A natural-language description of the task for the agent to carry out. +- `urls`: Optional. A list of URLs to start from or constrain the agent to. +- `api_key`: Optional. Specifies the Firecrawl API key. Defaults to the `FIRECRAWL_API_KEY` environment variable. +- `config`: Optional. It contains Firecrawl v2 agent parameters. + +This is the default configuration + +```python +{ + "model": "spark-1-mini", + "max_credits": None, + "strict_constrain_to_urls": None, + "poll_interval": 2, + "timeout": None, +} +``` diff --git a/lib/crewai-tools/src/crewai_tools/tools/firecrawl_interact_tool/__init__.py b/lib/crewai-tools/src/crewai_tools/tools/firecrawl_interact_tool/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/lib/crewai-tools/src/crewai_tools/tools/firecrawl_interact_tool/firecrawl_interact_tool.py b/lib/crewai-tools/src/crewai_tools/tools/firecrawl_interact_tool/firecrawl_interact_tool.py new file mode 100644 index 0000000000..02d230ea5b --- /dev/null +++ b/lib/crewai-tools/src/crewai_tools/tools/firecrawl_interact_tool/firecrawl_interact_tool.py @@ -0,0 +1,120 @@ +from __future__ import annotations + +from typing import Any + +from crewai.tools import BaseTool, EnvVar +from pydantic import BaseModel, ConfigDict, Field, PrivateAttr + +from crewai_tools.security.safe_path import validate_url + + +try: + from firecrawl import Firecrawl # type: ignore[import-untyped] + + FIRECRAWL_AVAILABLE = True +except ImportError: + FIRECRAWL_AVAILABLE = False + + +class FirecrawlInteractToolSchema(BaseModel): + """Input schema for the Firecrawl interact tool.""" + prompt: str = Field( + description="Natural-language description of the task for the Firecrawl agent to carry out by navigating and interacting with web pages" + ) + urls: list[str] | None = Field( + default=None, + description="Optional list of URLs to start from or constrain the agent to", + ) + + +class FirecrawlInteractTool(BaseTool): + """Tool for running an autonomous Firecrawl browser agent using the Firecrawl v2 API. To run this tool, you need to have a Firecrawl API key. + + The agent navigates and interacts with web pages to accomplish a natural-language + task, then returns the result. + + Args: + api_key (str): Your Firecrawl API key. + config (dict): Optional. It contains Firecrawl v2 agent parameters. + + Default configuration options (Firecrawl v2 API): + model (str): Agent model to use ("spark-1-mini" or "spark-1-pro"). Default: "spark-1-mini" + max_credits (int): Maximum credits the agent may spend. Default: None (no cap) + strict_constrain_to_urls (bool): Restrict the agent to the provided urls only. Default: None + poll_interval (int): Seconds between status polls while the agent runs. Default: 2 + timeout (int): Overall timeout in seconds. Default: None + """ + + model_config = ConfigDict( + arbitrary_types_allowed=True, validate_assignment=True, frozen=False + ) + name: str = "Firecrawl web interact tool" + description: str = ( + "Run an autonomous Firecrawl browser agent that navigates and interacts with " + "web pages to accomplish a task, then returns the result" + ) + args_schema: type[BaseModel] = FirecrawlInteractToolSchema + api_key: str | None = None + config: dict[str, Any] = Field( + default_factory=lambda: { + "model": "spark-1-mini", + "max_credits": None, + "strict_constrain_to_urls": None, + "poll_interval": 2, + "timeout": None, + } + ) + + _firecrawl: Any = PrivateAttr(None) + package_dependencies: list[str] = Field(default_factory=lambda: ["firecrawl-py"]) + env_vars: list[EnvVar] = Field( + default_factory=lambda: [ + EnvVar( + name="FIRECRAWL_API_KEY", + description="API key for Firecrawl services", + required=True, + ), + ] + ) + + def __init__(self, api_key: str | None = None, **kwargs: Any) -> None: + """Initialize the tool and its Firecrawl v2 client.""" + super().__init__(**kwargs) + try: + from firecrawl import Firecrawl + except ImportError: + import click + + if click.confirm( + "You are missing the 'firecrawl-py' package. Would you like to install it?" + ): + import subprocess + + subprocess.run(["uv", "add", "firecrawl-py>=4.0.0,<5"], check=True) # noqa: S607 + from firecrawl import ( + Firecrawl, + ) + else: + raise ImportError( + "`firecrawl-py` package not found, please run `uv add 'firecrawl-py>=4.0.0,<5'`" + ) from None + + self._firecrawl = Firecrawl(api_key=api_key) + + def _run(self, prompt: str, urls: list[str] | None = None) -> Any: + """Run the Firecrawl browser agent for the prompt and return the result.""" + if not self._firecrawl: + raise RuntimeError("Firecrawl client not properly initialized") + + validated_urls = [validate_url(u) for u in urls] if urls else None + return self._firecrawl.agent(urls=validated_urls, prompt=prompt, **self.config) + + +try: + from firecrawl import Firecrawl # noqa: F401 + + if not getattr(FirecrawlInteractTool, "_model_rebuilt", False): + FirecrawlInteractTool.model_rebuild() + FirecrawlInteractTool._model_rebuilt = True # type: ignore[attr-defined] +except ImportError: + pass diff --git a/lib/crewai-tools/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py b/lib/crewai-tools/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py index 81df69ef8f..338c318107 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py @@ -9,7 +9,7 @@ try: - from firecrawl import FirecrawlApp # type: ignore[import-untyped] + from firecrawl import Firecrawl # type: ignore[import-untyped] FIRECRAWL_AVAILABLE = True except ImportError: @@ -17,6 +17,7 @@ class FirecrawlScrapeWebsiteToolSchema(BaseModel): + """Input schema for the Firecrawl scrape tool.""" url: str = Field(description="Website URL") @@ -82,9 +83,10 @@ class FirecrawlScrapeWebsiteTool(BaseTool): ) def __init__(self, api_key: str | None = None, **kwargs: Any) -> None: + """Initialize the tool and its Firecrawl v2 client.""" super().__init__(**kwargs) try: - from firecrawl import FirecrawlApp + from firecrawl import Firecrawl except ImportError: import click @@ -93,27 +95,28 @@ def __init__(self, api_key: str | None = None, **kwargs: Any) -> None: ): import subprocess - subprocess.run(["uv", "add", "firecrawl-py"], check=True) # noqa: S607 + subprocess.run(["uv", "add", "firecrawl-py>=4.0.0,<5"], check=True) # noqa: S607 from firecrawl import ( - FirecrawlApp, + Firecrawl, ) else: raise ImportError( - "`firecrawl-py` package not found, please run `uv add firecrawl-py`" + "`firecrawl-py` package not found, please run `uv add 'firecrawl-py>=4.0.0,<5'`" ) from None - self._firecrawl = FirecrawlApp(api_key=api_key) + self._firecrawl = Firecrawl(api_key=api_key) def _run(self, url: str) -> Any: + """Scrape the given URL and return its contents.""" if not self._firecrawl: - raise RuntimeError("FirecrawlApp not properly initialized") + raise RuntimeError("Firecrawl client not properly initialized") url = validate_url(url) return self._firecrawl.scrape(url=url, **self.config) try: - from firecrawl import FirecrawlApp # noqa: F401 + from firecrawl import Firecrawl # noqa: F401 if not getattr(FirecrawlScrapeWebsiteTool, "_model_rebuilt", False): FirecrawlScrapeWebsiteTool.model_rebuild() diff --git a/lib/crewai-tools/src/crewai_tools/tools/firecrawl_search_tool/firecrawl_search_tool.py b/lib/crewai-tools/src/crewai_tools/tools/firecrawl_search_tool/firecrawl_search_tool.py index 42294606aa..0df4c34670 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/firecrawl_search_tool/firecrawl_search_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/firecrawl_search_tool/firecrawl_search_tool.py @@ -7,7 +7,7 @@ try: - from firecrawl import FirecrawlApp # type: ignore[import-untyped] + from firecrawl import Firecrawl # type: ignore[import-untyped] FIRECRAWL_AVAILABLE = True except ImportError: @@ -15,6 +15,7 @@ class FirecrawlSearchToolSchema(BaseModel): + """Input schema for the Firecrawl search tool.""" query: str = Field(description="Search query") @@ -74,15 +75,17 @@ class FirecrawlSearchTool(BaseTool): ) def __init__(self, api_key: str | None = None, **kwargs: Any) -> None: + """Initialize the tool and its Firecrawl v2 client.""" super().__init__(**kwargs) self.api_key = api_key self._initialize_firecrawl() def _initialize_firecrawl(self) -> None: + """Import firecrawl-py (installing it on demand if missing) and build the client.""" try: - from firecrawl import FirecrawlApp + from firecrawl import Firecrawl - self._firecrawl = FirecrawlApp(api_key=self.api_key) + self._firecrawl = Firecrawl(api_key=self.api_key) except ImportError: import click @@ -92,23 +95,24 @@ def _initialize_firecrawl(self) -> None: import subprocess try: - subprocess.run(["uv", "add", "firecrawl-py"], check=True) # noqa: S607 - from firecrawl import FirecrawlApp + subprocess.run(["uv", "add", "firecrawl-py>=4.0.0,<5"], check=True) # noqa: S607 + from firecrawl import Firecrawl - self._firecrawl = FirecrawlApp(api_key=self.api_key) + self._firecrawl = Firecrawl(api_key=self.api_key) except subprocess.CalledProcessError as e: raise ImportError("Failed to install firecrawl-py package") from e else: raise ImportError( - "`firecrawl-py` package not found, please run `uv add firecrawl-py`" + "`firecrawl-py` package not found, please run `uv add 'firecrawl-py>=4.0.0,<5'`" ) from None def _run( self, query: str, ) -> Any: + """Run a Firecrawl web search for the query and return the results.""" if not self._firecrawl: - raise RuntimeError("FirecrawlApp not properly initialized") + raise RuntimeError("Firecrawl client not properly initialized") return self._firecrawl.search( query=query, @@ -117,7 +121,7 @@ def _run( try: - from firecrawl import FirecrawlApp # noqa: F401 + from firecrawl import Firecrawl # noqa: F401 if not getattr(FirecrawlSearchTool, "_model_rebuilt", False): FirecrawlSearchTool.model_rebuild()