From e52fc622ede5c91648c86347fee082b29cea9403 Mon Sep 17 00:00:00 2001 From: rramprakash Date: Thu, 4 Jun 2026 14:57:48 +0530 Subject: [PATCH 1/6] Upgrade Firecrawl tools to firecrawl-py v2 SDK (v4) Migrate the three Firecrawl tools (scrape, crawl, search) from the legacy v1 FirecrawlApp client to the v2 Firecrawl client in firecrawl-py >= 4. - Import `Firecrawl` instead of the legacy `FirecrawlApp` class (the v2 client). The package name stays `firecrawl-py`; the import root stays `firecrawl`. - Bump the optional dependency pin from `firecrawl-py>=1.8.0` to `firecrawl-py>=4.0.0,<5`. - Crawl tool: replace the deprecated v1 `ignore_sitemap` option with the v2 `sitemap` enum (`ignore_sitemap=True` -> `sitemap="skip"`). - Update the RuntimeError message to refer to the Firecrawl client. The tools already used the v2 method names (`.scrape`/`.crawl`/`.search`) and snake_case config keys; this change aligns the client class, pin, and crawl sitemap option with the v2 SDK. Public tool args, `_run` signatures, and the typed return shapes (Document / CrawlJob / SearchData) are preserved, so agents using these tools are unaffected. Co-Authored-By: Claude Opus 4.8 (1M context) --- lib/crewai-tools/pyproject.toml | 2 +- .../firecrawl_crawl_website_tool.py | 18 +++++++++--------- .../firecrawl_scrape_website_tool.py | 12 ++++++------ .../firecrawl_search_tool.py | 14 +++++++------- 4 files changed, 23 insertions(+), 23 deletions(-) diff --git a/lib/crewai-tools/pyproject.toml b/lib/crewai-tools/pyproject.toml index 543c0ff7d8..064372ce74 100644 --- a/lib/crewai-tools/pyproject.toml +++ b/lib/crewai-tools/pyproject.toml @@ -36,7 +36,7 @@ multion = [ "multion>=1.1.0", ] firecrawl-py = [ - "firecrawl-py>=1.8.0", + "firecrawl-py>=4.0.0,<5", ] composio-core = [ "composio-core>=0.6.11.post1", diff --git a/lib/crewai-tools/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py b/lib/crewai-tools/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py index 47e98135ca..460084fa04 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py @@ -9,7 +9,7 @@ try: - from firecrawl import FirecrawlApp # type: ignore[import-untyped] + from firecrawl import Firecrawl # type: ignore[import-untyped] FIRECRAWL_AVAILABLE = True except ImportError: @@ -29,7 +29,7 @@ class FirecrawlCrawlWebsiteTool(BaseTool): Default configuration options (Firecrawl v2 API): max_discovery_depth (int): Maximum depth for discovering pages. Default: 2 - ignore_sitemap (bool): Whether to ignore sitemap. Default: True + sitemap (str): Sitemap usage mode ("skip", "include", "only"). Default: "skip" limit (int): Maximum number of pages to crawl. Default: 10 allow_external_links (bool): Allow crawling external links. Default: False allow_subdomains (bool): Allow crawling subdomains. Default: False @@ -50,7 +50,7 @@ class FirecrawlCrawlWebsiteTool(BaseTool): config: dict[str, Any] | None = Field( default_factory=lambda: { "max_discovery_depth": 2, - "ignore_sitemap": True, + "sitemap": "skip", "limit": 10, "allow_external_links": False, "allow_subdomains": False, @@ -81,9 +81,9 @@ def __init__(self, api_key: str | None = None, **kwargs: Any) -> None: def _initialize_firecrawl(self) -> None: try: - from firecrawl import FirecrawlApp + from firecrawl import Firecrawl - self._firecrawl = FirecrawlApp(api_key=self.api_key) + self._firecrawl = Firecrawl(api_key=self.api_key) except ImportError: import click @@ -94,9 +94,9 @@ def _initialize_firecrawl(self) -> None: try: subprocess.run(["uv", "add", "firecrawl-py"], check=True) # noqa: S607 - from firecrawl import FirecrawlApp + from firecrawl import Firecrawl - self._firecrawl = FirecrawlApp(api_key=self.api_key) + self._firecrawl = Firecrawl(api_key=self.api_key) except subprocess.CalledProcessError as e: raise ImportError("Failed to install firecrawl-py package") from e else: @@ -106,14 +106,14 @@ def _initialize_firecrawl(self) -> None: def _run(self, url: str) -> Any: if not self._firecrawl: - raise RuntimeError("FirecrawlApp not properly initialized") + raise RuntimeError("Firecrawl client not properly initialized") url = validate_url(url) return self._firecrawl.crawl(url=url, poll_interval=2, **self.config) try: - from firecrawl import FirecrawlApp # noqa: F401 + from firecrawl import Firecrawl # noqa: F401 if not getattr(FirecrawlCrawlWebsiteTool, "_model_rebuilt", False): FirecrawlCrawlWebsiteTool.model_rebuild() diff --git a/lib/crewai-tools/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py b/lib/crewai-tools/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py index 81df69ef8f..7a0581ca59 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py @@ -9,7 +9,7 @@ try: - from firecrawl import FirecrawlApp # type: ignore[import-untyped] + from firecrawl import Firecrawl # type: ignore[import-untyped] FIRECRAWL_AVAILABLE = True except ImportError: @@ -84,7 +84,7 @@ class FirecrawlScrapeWebsiteTool(BaseTool): def __init__(self, api_key: str | None = None, **kwargs: Any) -> None: super().__init__(**kwargs) try: - from firecrawl import FirecrawlApp + from firecrawl import Firecrawl except ImportError: import click @@ -95,25 +95,25 @@ def __init__(self, api_key: str | None = None, **kwargs: Any) -> None: subprocess.run(["uv", "add", "firecrawl-py"], check=True) # noqa: S607 from firecrawl import ( - FirecrawlApp, + Firecrawl, ) else: raise ImportError( "`firecrawl-py` package not found, please run `uv add firecrawl-py`" ) from None - self._firecrawl = FirecrawlApp(api_key=api_key) + self._firecrawl = Firecrawl(api_key=api_key) def _run(self, url: str) -> Any: if not self._firecrawl: - raise RuntimeError("FirecrawlApp not properly initialized") + raise RuntimeError("Firecrawl client not properly initialized") url = validate_url(url) return self._firecrawl.scrape(url=url, **self.config) try: - from firecrawl import FirecrawlApp # noqa: F401 + from firecrawl import Firecrawl # noqa: F401 if not getattr(FirecrawlScrapeWebsiteTool, "_model_rebuilt", False): FirecrawlScrapeWebsiteTool.model_rebuild() diff --git a/lib/crewai-tools/src/crewai_tools/tools/firecrawl_search_tool/firecrawl_search_tool.py b/lib/crewai-tools/src/crewai_tools/tools/firecrawl_search_tool/firecrawl_search_tool.py index 42294606aa..797167cc6b 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/firecrawl_search_tool/firecrawl_search_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/firecrawl_search_tool/firecrawl_search_tool.py @@ -7,7 +7,7 @@ try: - from firecrawl import FirecrawlApp # type: ignore[import-untyped] + from firecrawl import Firecrawl # type: ignore[import-untyped] FIRECRAWL_AVAILABLE = True except ImportError: @@ -80,9 +80,9 @@ def __init__(self, api_key: str | None = None, **kwargs: Any) -> None: def _initialize_firecrawl(self) -> None: try: - from firecrawl import FirecrawlApp + from firecrawl import Firecrawl - self._firecrawl = FirecrawlApp(api_key=self.api_key) + self._firecrawl = Firecrawl(api_key=self.api_key) except ImportError: import click @@ -93,9 +93,9 @@ def _initialize_firecrawl(self) -> None: try: subprocess.run(["uv", "add", "firecrawl-py"], check=True) # noqa: S607 - from firecrawl import FirecrawlApp + from firecrawl import Firecrawl - self._firecrawl = FirecrawlApp(api_key=self.api_key) + self._firecrawl = Firecrawl(api_key=self.api_key) except subprocess.CalledProcessError as e: raise ImportError("Failed to install firecrawl-py package") from e else: @@ -108,7 +108,7 @@ def _run( query: str, ) -> Any: if not self._firecrawl: - raise RuntimeError("FirecrawlApp not properly initialized") + raise RuntimeError("Firecrawl client not properly initialized") return self._firecrawl.search( query=query, @@ -117,7 +117,7 @@ def _run( try: - from firecrawl import FirecrawlApp # noqa: F401 + from firecrawl import Firecrawl # noqa: F401 if not getattr(FirecrawlSearchTool, "_model_rebuilt", False): FirecrawlSearchTool.model_rebuild() From 753d9947567547c15b3e2e48e26d4801ae02d622 Mon Sep 17 00:00:00 2001 From: Rakshith Ramprakash Date: Thu, 4 Jun 2026 16:45:56 +0530 Subject: [PATCH 2/6] docs: refresh Firecrawl tool docs for the v2 config API Co-Authored-By: Claude Opus 4.8 (1M context) --- .../web-scraping/firecrawlscrapewebsitetool.mdx | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/docs/en/tools/web-scraping/firecrawlscrapewebsitetool.mdx b/docs/en/tools/web-scraping/firecrawlscrapewebsitetool.mdx index 51bb672ef0..9e67476962 100644 --- a/docs/en/tools/web-scraping/firecrawlscrapewebsitetool.mdx +++ b/docs/en/tools/web-scraping/firecrawlscrapewebsitetool.mdx @@ -14,7 +14,7 @@ mode: "wide" ## Installation - Get an API key from [firecrawl.dev](https://firecrawl.dev) and set it in environment variables (`FIRECRAWL_API_KEY`). -- Install the [Firecrawl SDK](https://github.com/mendableai/firecrawl) along with `crewai[tools]` package: +- Install the [Firecrawl SDK](https://github.com/firecrawl/firecrawl) along with `crewai[tools]` package: ```shell pip install firecrawl-py 'crewai[tools]' @@ -32,13 +32,6 @@ tool = FirecrawlScrapeWebsiteTool(url='firecrawl.dev') ## Arguments -- `api_key`: Optional. Specifies Firecrawl API key. Defaults is the `FIRECRAWL_API_KEY` environment variable. -- `url`: The URL to scrape. -- `page_options`: Optional. - - `onlyMainContent`: Optional. Only return the main content of the page excluding headers, navs, footers, etc. - - `includeHtml`: Optional. Include the raw HTML content of the page. Will output a html key in the response. -- `extractor_options`: Optional. Options for LLM-based extraction of structured information from the page content - - `mode`: The extraction mode to use, currently supports 'llm-extraction' - - `extractionPrompt`: Optional. A prompt describing what information to extract from the page - - `extractionSchema`: Optional. The schema for the data to be extracted -- `timeout`: Optional. Timeout in milliseconds for the request +- `url`: Required. The URL to scrape. +- `config`: Optional. A dictionary of [Firecrawl v2 scrape options](https://docs.firecrawl.dev/api-reference/endpoint/scrape) passed directly to the SDK — for example `formats`, `only_main_content`, `include_tags`, `exclude_tags`, `wait_for`, and `timeout`. For structured/LLM extraction, add a `json` entry to `formats`, e.g. `{"formats": [{"type": "json", "prompt": "...", "schema": {...}}]}`. +- `api_key`: Optional. Your Firecrawl API key. Defaults to the `FIRECRAWL_API_KEY` environment variable. From 77701b81b2cbc6d26b8ffa3b7bc7412d77b65e1b Mon Sep 17 00:00:00 2001 From: Rakshith Ramprakash Date: Thu, 4 Jun 2026 16:45:58 +0530 Subject: [PATCH 3/6] docs: refresh Firecrawl tool docs for the v2 config API Co-Authored-By: Claude Opus 4.8 (1M context) --- .../firecrawlcrawlwebsitetool.mdx | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/docs/en/tools/web-scraping/firecrawlcrawlwebsitetool.mdx b/docs/en/tools/web-scraping/firecrawlcrawlwebsitetool.mdx index 15e44996c3..65ecd4e244 100644 --- a/docs/en/tools/web-scraping/firecrawlcrawlwebsitetool.mdx +++ b/docs/en/tools/web-scraping/firecrawlcrawlwebsitetool.mdx @@ -14,7 +14,7 @@ mode: "wide" ## Installation - Get an API key from [firecrawl.dev](https://firecrawl.dev) and set it in environment variables (`FIRECRAWL_API_KEY`). -- Install the [Firecrawl SDK](https://github.com/mendableai/firecrawl) along with `crewai[tools]` package: +- Install the [Firecrawl SDK](https://github.com/firecrawl/firecrawl) along with `crewai[tools]` package: ```shell pip install firecrawl-py 'crewai[tools]' @@ -32,17 +32,6 @@ tool = FirecrawlCrawlWebsiteTool(url='firecrawl.dev') ## Arguments -- `api_key`: Optional. Specifies Firecrawl API key. Defaults is the `FIRECRAWL_API_KEY` environment variable. -- `url`: The base URL to start crawling from. -- `page_options`: Optional. - - `onlyMainContent`: Optional. Only return the main content of the page excluding headers, navs, footers, etc. - - `includeHtml`: Optional. Include the raw HTML content of the page. Will output a html key in the response. -- `crawler_options`: Optional. Options for controlling the crawling behavior. - - `includes`: Optional. URL patterns to include in the crawl. - - `exclude`: Optional. URL patterns to exclude from the crawl. - - `generateImgAltText`: Optional. Generate alt text for images using LLMs (requires a paid plan). - - `returnOnlyUrls`: Optional. If true, returns only the URLs as a list in the crawl status. Note: the response will be a list of URLs inside the data, not a list of documents. - - `maxDepth`: Optional. Maximum depth to crawl. Depth 1 is the base URL, depth 2 includes the base URL and its direct children, and so on. - - `mode`: Optional. The crawling mode to use. Fast mode crawls 4x faster on websites without a sitemap but may not be as accurate and shouldn't be used on heavily JavaScript-rendered websites. - - `limit`: Optional. Maximum number of pages to crawl. - - `timeout`: Optional. Timeout in milliseconds for the crawling operation. +- `url`: Required. The base URL to start crawling from. +- `config`: Optional. A dictionary of [Firecrawl v2 crawl options](https://docs.firecrawl.dev/api-reference/endpoint/crawl-post) passed directly to the SDK — for example `limit`, `max_discovery_depth`, `include_paths`, `exclude_paths`, `crawl_entire_domain`, `sitemap` (`"skip"`, `"include"`, or `"only"`), and `scrape_options` for per-page scrape settings. +- `api_key`: Optional. Your Firecrawl API key. Defaults to the `FIRECRAWL_API_KEY` environment variable. From 7d6259993058fe19b2d0b3e7d8f07d1ec5bd80e0 Mon Sep 17 00:00:00 2001 From: Rakshith Ramprakash Date: Thu, 4 Jun 2026 16:46:02 +0530 Subject: [PATCH 4/6] docs: refresh Firecrawl tool docs for the v2 config API Co-Authored-By: Claude Opus 4.8 (1M context) --- docs/en/tools/web-scraping/firecrawlsearchtool.mdx | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/docs/en/tools/web-scraping/firecrawlsearchtool.mdx b/docs/en/tools/web-scraping/firecrawlsearchtool.mdx index 9864bfcf52..84b803eaa4 100644 --- a/docs/en/tools/web-scraping/firecrawlsearchtool.mdx +++ b/docs/en/tools/web-scraping/firecrawlsearchtool.mdx @@ -14,7 +14,7 @@ mode: "wide" ## Installation - Get an API key from [firecrawl.dev](https://firecrawl.dev) and set it in environment variables (`FIRECRAWL_API_KEY`). -- Install the [Firecrawl SDK](https://github.com/mendableai/firecrawl) along with `crewai[tools]` package: +- Install the [Firecrawl SDK](https://github.com/firecrawl/firecrawl) along with `crewai[tools]` package: ```shell pip install firecrawl-py 'crewai[tools]' @@ -32,11 +32,6 @@ tool = FirecrawlSearchTool(query='what is firecrawl?') ## Arguments -- `api_key`: Optional. Specifies Firecrawl API key. Defaults is the `FIRECRAWL_API_KEY` environment variable. -- `query`: The search query string to be used for searching. -- `page_options`: Optional. Options for result formatting. - - `onlyMainContent`: Optional. Only return the main content of the page excluding headers, navs, footers, etc. - - `includeHtml`: Optional. Include the raw HTML content of the page. Will output a html key in the response. - - `fetchPageContent`: Optional. Fetch the full content of the page. -- `search_options`: Optional. Options for controlling the crawling behavior. - - `limit`: Optional. Maximum number of pages to crawl. \ No newline at end of file +- `query`: Required. The search query. +- `config`: Optional. A dictionary of [Firecrawl v2 search options](https://docs.firecrawl.dev/api-reference/endpoint/search) passed directly to the SDK — for example `limit`, `sources`, `location`, `tbs`, and `scrape_options` to scrape the result pages. +- `api_key`: Optional. Your Firecrawl API key. Defaults to the `FIRECRAWL_API_KEY` environment variable. From 490cb56762cbde4d225173260280392657eb01ea Mon Sep 17 00:00:00 2001 From: Rakshith Ramprakash Date: Thu, 4 Jun 2026 21:58:17 +0530 Subject: [PATCH 5/6] Add Firecrawl Interact tool (agentic browser) to the bundle Wraps the v2 SDK agent() endpoint as FirecrawlInteractTool: the CrewAI agent passes a natural-language task (and optional start urls) and Firecrawl's autonomous browser agent navigates/interacts and returns the result. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../web-scraping/firecrawlinteracttool.mdx | 41 ++++++ lib/crewai-tools/src/crewai_tools/__init__.py | 4 + .../src/crewai_tools/tools/__init__.py | 4 + .../tools/firecrawl_interact_tool/README.md | 44 +++++++ .../tools/firecrawl_interact_tool/__init__.py | 0 .../firecrawl_interact_tool.py | 117 ++++++++++++++++++ 6 files changed, 210 insertions(+) create mode 100644 docs/en/tools/web-scraping/firecrawlinteracttool.mdx create mode 100644 lib/crewai-tools/src/crewai_tools/tools/firecrawl_interact_tool/README.md create mode 100644 lib/crewai-tools/src/crewai_tools/tools/firecrawl_interact_tool/__init__.py create mode 100644 lib/crewai-tools/src/crewai_tools/tools/firecrawl_interact_tool/firecrawl_interact_tool.py diff --git a/docs/en/tools/web-scraping/firecrawlinteracttool.mdx b/docs/en/tools/web-scraping/firecrawlinteracttool.mdx new file mode 100644 index 0000000000..856765a96c --- /dev/null +++ b/docs/en/tools/web-scraping/firecrawlinteracttool.mdx @@ -0,0 +1,41 @@ +--- +title: Firecrawl Interact +description: The `FirecrawlInteractTool` runs an autonomous Firecrawl browser agent that navigates and interacts with web pages to accomplish a task. +icon: fire-flame +mode: "wide" +--- + +# `FirecrawlInteractTool` + +## Description + +[Firecrawl](https://firecrawl.dev) is a platform for crawling and converting any website into clean markdown or structured data. The interact tool runs an autonomous browser **agent** that navigates and interacts with pages to accomplish a natural-language task, then returns the result. + +## Installation + +- Get an API key from [firecrawl.dev](https://firecrawl.dev) and set it in environment variables (`FIRECRAWL_API_KEY`). +- Install the [Firecrawl SDK](https://github.com/firecrawl/firecrawl) along with `crewai[tools]` package: + +```shell +pip install firecrawl-py 'crewai[tools]' +``` + +## Example + +Utilize the FirecrawlInteractTool as follows to let your agent act on the web: + +```python Code +from crewai_tools import FirecrawlInteractTool + +tool = FirecrawlInteractTool() +result = tool.run( + prompt="Go to news.ycombinator.com and return the titles of the top 3 stories", +) +``` + +## Arguments + +- `prompt`: Required. A natural-language description of the task for the agent to carry out. +- `urls`: Optional. A list of URLs to start from or constrain the agent to. +- `config`: Optional. A dictionary of [Firecrawl v2 agent options](https://docs.firecrawl.dev/api-reference/endpoint/agent) passed directly to the SDK — for example `model` (`spark-1-mini` / `spark-1-pro`), `max_credits`, `strict_constrain_to_urls`, `poll_interval`, and `timeout`. +- `api_key`: Optional. Your Firecrawl API key. Defaults to the `FIRECRAWL_API_KEY` environment variable. diff --git a/lib/crewai-tools/src/crewai_tools/__init__.py b/lib/crewai-tools/src/crewai_tools/__init__.py index 4a7dc71033..6cce12c699 100644 --- a/lib/crewai-tools/src/crewai_tools/__init__.py +++ b/lib/crewai-tools/src/crewai_tools/__init__.py @@ -85,6 +85,9 @@ from crewai_tools.tools.firecrawl_crawl_website_tool.firecrawl_crawl_website_tool import ( FirecrawlCrawlWebsiteTool, ) +from crewai_tools.tools.firecrawl_interact_tool.firecrawl_interact_tool import ( + FirecrawlInteractTool, +) from crewai_tools.tools.firecrawl_scrape_website_tool.firecrawl_scrape_website_tool import ( FirecrawlScrapeWebsiteTool, ) @@ -263,6 +266,7 @@ "FileReadTool", "FileWriterTool", "FirecrawlCrawlWebsiteTool", + "FirecrawlInteractTool", "FirecrawlScrapeWebsiteTool", "FirecrawlSearchTool", "GenerateCrewaiAutomationTool", diff --git a/lib/crewai-tools/src/crewai_tools/tools/__init__.py b/lib/crewai-tools/src/crewai_tools/tools/__init__.py index 18bf4e5638..d55b67e2f6 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/__init__.py +++ b/lib/crewai-tools/src/crewai_tools/tools/__init__.py @@ -74,6 +74,9 @@ from crewai_tools.tools.firecrawl_crawl_website_tool.firecrawl_crawl_website_tool import ( FirecrawlCrawlWebsiteTool, ) +from crewai_tools.tools.firecrawl_interact_tool.firecrawl_interact_tool import ( + FirecrawlInteractTool, +) from crewai_tools.tools.firecrawl_scrape_website_tool.firecrawl_scrape_website_tool import ( FirecrawlScrapeWebsiteTool, ) @@ -247,6 +250,7 @@ "FileReadTool", "FileWriterTool", "FirecrawlCrawlWebsiteTool", + "FirecrawlInteractTool", "FirecrawlScrapeWebsiteTool", "FirecrawlSearchTool", "GenerateCrewaiAutomationTool", diff --git a/lib/crewai-tools/src/crewai_tools/tools/firecrawl_interact_tool/README.md b/lib/crewai-tools/src/crewai_tools/tools/firecrawl_interact_tool/README.md new file mode 100644 index 0000000000..4a10b3b352 --- /dev/null +++ b/lib/crewai-tools/src/crewai_tools/tools/firecrawl_interact_tool/README.md @@ -0,0 +1,44 @@ +# FirecrawlInteractTool + +## Description + +[Firecrawl](https://firecrawl.dev) is a platform for crawling and converting any website into clean markdown or structured data. This tool runs an autonomous browser **agent** that navigates and interacts with web pages to accomplish a natural-language task, then returns the result. + +## Installation + +- Get an API key from [firecrawl.dev](https://firecrawl.dev) and set it in environment variables (`FIRECRAWL_API_KEY`). +- Install the [Firecrawl SDK](https://github.com/firecrawl/firecrawl) along with `crewai[tools]` package: + +``` +pip install firecrawl-py 'crewai[tools]' +``` + +## Example + +Utilize the FirecrawlInteractTool as follows to let your agent act on the web: + +```python +from crewai_tools import FirecrawlInteractTool + +tool = FirecrawlInteractTool(config={"model": "spark-1-mini"}) +tool.run(prompt="Find the pricing page on firecrawl.dev and return the plan names") +``` + +## Arguments + +- `prompt`: Required. A natural-language description of the task for the agent to carry out. +- `urls`: Optional. A list of URLs to start from or constrain the agent to. +- `api_key`: Optional. Specifies the Firecrawl API key. Defaults to the `FIRECRAWL_API_KEY` environment variable. +- `config`: Optional. It contains Firecrawl v2 agent parameters. + +This is the default configuration + +```python +{ + "model": "spark-1-mini", + "max_credits": None, + "strict_constrain_to_urls": None, + "poll_interval": 2, + "timeout": None, +} +``` diff --git a/lib/crewai-tools/src/crewai_tools/tools/firecrawl_interact_tool/__init__.py b/lib/crewai-tools/src/crewai_tools/tools/firecrawl_interact_tool/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/lib/crewai-tools/src/crewai_tools/tools/firecrawl_interact_tool/firecrawl_interact_tool.py b/lib/crewai-tools/src/crewai_tools/tools/firecrawl_interact_tool/firecrawl_interact_tool.py new file mode 100644 index 0000000000..ac68c18e5c --- /dev/null +++ b/lib/crewai-tools/src/crewai_tools/tools/firecrawl_interact_tool/firecrawl_interact_tool.py @@ -0,0 +1,117 @@ +from __future__ import annotations + +from typing import Any + +from crewai.tools import BaseTool, EnvVar +from pydantic import BaseModel, ConfigDict, Field, PrivateAttr + +from crewai_tools.security.safe_path import validate_url + + +try: + from firecrawl import Firecrawl # type: ignore[import-untyped] + + FIRECRAWL_AVAILABLE = True +except ImportError: + FIRECRAWL_AVAILABLE = False + + +class FirecrawlInteractToolSchema(BaseModel): + prompt: str = Field( + description="Natural-language description of the task for the Firecrawl agent to carry out by navigating and interacting with web pages" + ) + urls: list[str] | None = Field( + default=None, + description="Optional list of URLs to start from or constrain the agent to", + ) + + +class FirecrawlInteractTool(BaseTool): + """Tool for running an autonomous Firecrawl browser agent using the Firecrawl v2 API. To run this tool, you need to have a Firecrawl API key. + + The agent navigates and interacts with web pages to accomplish a natural-language + task, then returns the result. + + Args: + api_key (str): Your Firecrawl API key. + config (dict): Optional. It contains Firecrawl v2 agent parameters. + + Default configuration options (Firecrawl v2 API): + model (str): Agent model to use ("spark-1-mini" or "spark-1-pro"). Default: "spark-1-mini" + max_credits (int): Maximum credits the agent may spend. Default: None (no cap) + strict_constrain_to_urls (bool): Restrict the agent to the provided urls only. Default: None + poll_interval (int): Seconds between status polls while the agent runs. Default: 2 + timeout (int): Overall timeout in seconds. Default: None + """ + + model_config = ConfigDict( + arbitrary_types_allowed=True, validate_assignment=True, frozen=False + ) + name: str = "Firecrawl web interact tool" + description: str = ( + "Run an autonomous Firecrawl browser agent that navigates and interacts with " + "web pages to accomplish a task, then returns the result" + ) + args_schema: type[BaseModel] = FirecrawlInteractToolSchema + api_key: str | None = None + config: dict[str, Any] = Field( + default_factory=lambda: { + "model": "spark-1-mini", + "max_credits": None, + "strict_constrain_to_urls": None, + "poll_interval": 2, + "timeout": None, + } + ) + + _firecrawl: Any = PrivateAttr(None) + package_dependencies: list[str] = Field(default_factory=lambda: ["firecrawl-py"]) + env_vars: list[EnvVar] = Field( + default_factory=lambda: [ + EnvVar( + name="FIRECRAWL_API_KEY", + description="API key for Firecrawl services", + required=True, + ), + ] + ) + + def __init__(self, api_key: str | None = None, **kwargs: Any) -> None: + super().__init__(**kwargs) + try: + from firecrawl import Firecrawl + except ImportError: + import click + + if click.confirm( + "You are missing the 'firecrawl-py' package. Would you like to install it?" + ): + import subprocess + + subprocess.run(["uv", "add", "firecrawl-py"], check=True) # noqa: S607 + from firecrawl import ( + Firecrawl, + ) + else: + raise ImportError( + "`firecrawl-py` package not found, please run `uv add firecrawl-py`" + ) from None + + self._firecrawl = Firecrawl(api_key=api_key) + + def _run(self, prompt: str, urls: list[str] | None = None) -> Any: + if not self._firecrawl: + raise RuntimeError("Firecrawl client not properly initialized") + + validated_urls = [validate_url(u) for u in urls] if urls else None + return self._firecrawl.agent(urls=validated_urls, prompt=prompt, **self.config) + + +try: + from firecrawl import Firecrawl # noqa: F401 + + if not getattr(FirecrawlInteractTool, "_model_rebuilt", False): + FirecrawlInteractTool.model_rebuild() + FirecrawlInteractTool._model_rebuilt = True # type: ignore[attr-defined] +except ImportError: + pass From 4750ab1122b4e8dd0af79831e7246189d49816c6 Mon Sep 17 00:00:00 2001 From: Rakshith Ramprakash Date: Fri, 5 Jun 2026 12:59:29 +0530 Subject: [PATCH 6/6] chore(firecrawl): pin runtime uv-add, add docstrings, tag README fence Addresses CodeRabbit review on crewAIInc/crewAI#6051: - Pin the on-demand `uv add firecrawl-py` (+ error hint) to >=4.0.0,<5 in all four tools, matching pyproject and the v2-only `Firecrawl` client. - Add docstrings to the schema classes, __init__, _run, and _initialize_firecrawl (docstring-coverage check). - Tag the README install code fence as shell (MD040). Co-Authored-By: Claude Opus 4.8 (1M context) --- .../firecrawl_crawl_website_tool.py | 8 ++++++-- .../crewai_tools/tools/firecrawl_interact_tool/README.md | 2 +- .../firecrawl_interact_tool/firecrawl_interact_tool.py | 7 +++++-- .../firecrawl_scrape_website_tool.py | 7 +++++-- .../tools/firecrawl_search_tool/firecrawl_search_tool.py | 8 ++++++-- 5 files changed, 23 insertions(+), 9 deletions(-) diff --git a/lib/crewai-tools/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py b/lib/crewai-tools/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py index 460084fa04..2ad7316d0d 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py @@ -17,6 +17,7 @@ class FirecrawlCrawlWebsiteToolSchema(BaseModel): + """Input schema for the Firecrawl crawl tool.""" url: str = Field(description="Website URL") @@ -75,11 +76,13 @@ class FirecrawlCrawlWebsiteTool(BaseTool): ) def __init__(self, api_key: str | None = None, **kwargs: Any) -> None: + """Initialize the tool and its Firecrawl v2 client.""" super().__init__(**kwargs) self.api_key = api_key self._initialize_firecrawl() def _initialize_firecrawl(self) -> None: + """Import firecrawl-py (installing it on demand if missing) and build the client.""" try: from firecrawl import Firecrawl @@ -93,7 +96,7 @@ def _initialize_firecrawl(self) -> None: import subprocess try: - subprocess.run(["uv", "add", "firecrawl-py"], check=True) # noqa: S607 + subprocess.run(["uv", "add", "firecrawl-py>=4.0.0,<5"], check=True) # noqa: S607 from firecrawl import Firecrawl self._firecrawl = Firecrawl(api_key=self.api_key) @@ -101,10 +104,11 @@ def _initialize_firecrawl(self) -> None: raise ImportError("Failed to install firecrawl-py package") from e else: raise ImportError( - "`firecrawl-py` package not found, please run `uv add firecrawl-py`" + "`firecrawl-py` package not found, please run `uv add 'firecrawl-py>=4.0.0,<5'`" ) from None def _run(self, url: str) -> Any: + """Crawl the given URL and return the crawled pages.""" if not self._firecrawl: raise RuntimeError("Firecrawl client not properly initialized") diff --git a/lib/crewai-tools/src/crewai_tools/tools/firecrawl_interact_tool/README.md b/lib/crewai-tools/src/crewai_tools/tools/firecrawl_interact_tool/README.md index 4a10b3b352..71ad71e235 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/firecrawl_interact_tool/README.md +++ b/lib/crewai-tools/src/crewai_tools/tools/firecrawl_interact_tool/README.md @@ -9,7 +9,7 @@ - Get an API key from [firecrawl.dev](https://firecrawl.dev) and set it in environment variables (`FIRECRAWL_API_KEY`). - Install the [Firecrawl SDK](https://github.com/firecrawl/firecrawl) along with `crewai[tools]` package: -``` +```shell pip install firecrawl-py 'crewai[tools]' ``` diff --git a/lib/crewai-tools/src/crewai_tools/tools/firecrawl_interact_tool/firecrawl_interact_tool.py b/lib/crewai-tools/src/crewai_tools/tools/firecrawl_interact_tool/firecrawl_interact_tool.py index ac68c18e5c..02d230ea5b 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/firecrawl_interact_tool/firecrawl_interact_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/firecrawl_interact_tool/firecrawl_interact_tool.py @@ -17,6 +17,7 @@ class FirecrawlInteractToolSchema(BaseModel): + """Input schema for the Firecrawl interact tool.""" prompt: str = Field( description="Natural-language description of the task for the Firecrawl agent to carry out by navigating and interacting with web pages" ) @@ -77,6 +78,7 @@ class FirecrawlInteractTool(BaseTool): ) def __init__(self, api_key: str | None = None, **kwargs: Any) -> None: + """Initialize the tool and its Firecrawl v2 client.""" super().__init__(**kwargs) try: from firecrawl import Firecrawl @@ -88,18 +90,19 @@ def __init__(self, api_key: str | None = None, **kwargs: Any) -> None: ): import subprocess - subprocess.run(["uv", "add", "firecrawl-py"], check=True) # noqa: S607 + subprocess.run(["uv", "add", "firecrawl-py>=4.0.0,<5"], check=True) # noqa: S607 from firecrawl import ( Firecrawl, ) else: raise ImportError( - "`firecrawl-py` package not found, please run `uv add firecrawl-py`" + "`firecrawl-py` package not found, please run `uv add 'firecrawl-py>=4.0.0,<5'`" ) from None self._firecrawl = Firecrawl(api_key=api_key) def _run(self, prompt: str, urls: list[str] | None = None) -> Any: + """Run the Firecrawl browser agent for the prompt and return the result.""" if not self._firecrawl: raise RuntimeError("Firecrawl client not properly initialized") diff --git a/lib/crewai-tools/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py b/lib/crewai-tools/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py index 7a0581ca59..338c318107 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py @@ -17,6 +17,7 @@ class FirecrawlScrapeWebsiteToolSchema(BaseModel): + """Input schema for the Firecrawl scrape tool.""" url: str = Field(description="Website URL") @@ -82,6 +83,7 @@ class FirecrawlScrapeWebsiteTool(BaseTool): ) def __init__(self, api_key: str | None = None, **kwargs: Any) -> None: + """Initialize the tool and its Firecrawl v2 client.""" super().__init__(**kwargs) try: from firecrawl import Firecrawl @@ -93,18 +95,19 @@ def __init__(self, api_key: str | None = None, **kwargs: Any) -> None: ): import subprocess - subprocess.run(["uv", "add", "firecrawl-py"], check=True) # noqa: S607 + subprocess.run(["uv", "add", "firecrawl-py>=4.0.0,<5"], check=True) # noqa: S607 from firecrawl import ( Firecrawl, ) else: raise ImportError( - "`firecrawl-py` package not found, please run `uv add firecrawl-py`" + "`firecrawl-py` package not found, please run `uv add 'firecrawl-py>=4.0.0,<5'`" ) from None self._firecrawl = Firecrawl(api_key=api_key) def _run(self, url: str) -> Any: + """Scrape the given URL and return its contents.""" if not self._firecrawl: raise RuntimeError("Firecrawl client not properly initialized") diff --git a/lib/crewai-tools/src/crewai_tools/tools/firecrawl_search_tool/firecrawl_search_tool.py b/lib/crewai-tools/src/crewai_tools/tools/firecrawl_search_tool/firecrawl_search_tool.py index 797167cc6b..0df4c34670 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/firecrawl_search_tool/firecrawl_search_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/firecrawl_search_tool/firecrawl_search_tool.py @@ -15,6 +15,7 @@ class FirecrawlSearchToolSchema(BaseModel): + """Input schema for the Firecrawl search tool.""" query: str = Field(description="Search query") @@ -74,11 +75,13 @@ class FirecrawlSearchTool(BaseTool): ) def __init__(self, api_key: str | None = None, **kwargs: Any) -> None: + """Initialize the tool and its Firecrawl v2 client.""" super().__init__(**kwargs) self.api_key = api_key self._initialize_firecrawl() def _initialize_firecrawl(self) -> None: + """Import firecrawl-py (installing it on demand if missing) and build the client.""" try: from firecrawl import Firecrawl @@ -92,7 +95,7 @@ def _initialize_firecrawl(self) -> None: import subprocess try: - subprocess.run(["uv", "add", "firecrawl-py"], check=True) # noqa: S607 + subprocess.run(["uv", "add", "firecrawl-py>=4.0.0,<5"], check=True) # noqa: S607 from firecrawl import Firecrawl self._firecrawl = Firecrawl(api_key=self.api_key) @@ -100,13 +103,14 @@ def _initialize_firecrawl(self) -> None: raise ImportError("Failed to install firecrawl-py package") from e else: raise ImportError( - "`firecrawl-py` package not found, please run `uv add firecrawl-py`" + "`firecrawl-py` package not found, please run `uv add 'firecrawl-py>=4.0.0,<5'`" ) from None def _run( self, query: str, ) -> Any: + """Run a Firecrawl web search for the query and return the results.""" if not self._firecrawl: raise RuntimeError("Firecrawl client not properly initialized")