chore(backend/deps): Bump firecrawl-py from 2.16.3 to 4.3.1 in /autogpt_platform/backend (#10809)

dependabot[bot] · github-actions[bot] · ntindle · web-flow · commit f5ee579ab203 · 2025-10-02T20:14:18.000Z
Bumps [firecrawl-py](https://github.com/firecrawl/firecrawl) from 2.16.3 to 4.3.1. <details> <summary>Commits</summary> <ul> <li>See full diff in <a href="https://github.com/firecrawl/firecrawl/commits">compare view</a></li> </ul> </details> <br /> [![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=firecrawl-py&package-manager=pip&previous-version=2.16.3&new-version=4.3.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) You can trigger a rebase of this PR by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) --- <details> <summary>Dependabot commands and options</summary> <br /> You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself) </details>  --- > [!NOTE] > Upgrade firecrawl-py to v4.3.6 and refactor firecrawl blocks to new v4 API, formats handling, method names, and response fields. > > - **Dependencies** > - Bump `firecrawl-py` from `2.16.3` to `4.3.6` (adds `httpx`, updates `pydantic>=2`). > - **Firecrawl API migration** > - Centralize `ScrapeFormat` in `backend/blocks/firecrawl/_api.py`. > - Add `_format_utils.convert_to_format_options` to map `ScrapeFormat` (incl. `screenshot@fullPage`) to v4 `FormatOption`/`ScreenshotFormat`. > - Switch to v4 types (`firecrawl.v2.types.ScrapeOptions`); adopt snake_case fields (`only_main_content`, `max_age`, `wait_for`). > - Rename methods: `crawl_url` → `crawl`, `scrape_url` → `scrape`, `map_url` → `map`. > - Normalize response attributes: `rawHtml` → `raw_html`, `changeTracking` → `change_tracking`. > - **Blocks** > - `crawl.py`, `scrape.py`, `search.py`: use new formats conversion and updated options/fields; adjust iteration over results (`search`: iterate `web` when present). > - `map.py`: return both `links` and detailed `results` (url/title/description) and update output schema accordingly. > - **Project files** > - Update `pyproject.toml` and `poetry.lock` for new dependency versions. > > <sup>Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit d872f2e. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot).</sup>  > **Note** > Automatic rebases have been disabled on this pull request as it has been open for over 30 days. --------- Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: claude[bot] <41898282+claude[bot]@users.noreply.github.com> Co-authored-by: Nicholas Tindle <ntindle@users.noreply.github.com> Co-authored-by: Nicholas Tindle <nicholas.tindle@agpt.co>
diff --git a/autogpt_platform/backend/backend/blocks/firecrawl/_api.py b/autogpt_platform/backend/backend/blocks/firecrawl/_api.py
@@ -0,0 +1,12 @@
+from enum import Enum
+
+
+class ScrapeFormat(Enum):
+    MARKDOWN = "markdown"
+    HTML = "html"
+    RAW_HTML = "rawHtml"
+    LINKS = "links"
+    SCREENSHOT = "screenshot"
+    SCREENSHOT_FULL_PAGE = "screenshot@fullPage"
+    JSON = "json"
+    CHANGE_TRACKING = "changeTracking"
diff --git a/autogpt_platform/backend/backend/blocks/firecrawl/_format_utils.py b/autogpt_platform/backend/backend/blocks/firecrawl/_format_utils.py
@@ -0,0 +1,28 @@
+"""Utility functions for converting between our ScrapeFormat enum and firecrawl FormatOption types."""
+
+from typing import List
+
+from firecrawl.v2.types import FormatOption, ScreenshotFormat
+
+from backend.blocks.firecrawl._api import ScrapeFormat
+
+
+def convert_to_format_options(
+    formats: List[ScrapeFormat],
+) -> List[FormatOption]:
+    """Convert our ScrapeFormat enum values to firecrawl FormatOption types.
+
+    Handles special cases like screenshot@fullPage which needs to be converted
+    to a ScreenshotFormat object.
+    """
+    result: List[FormatOption] = []
+
+    for format_enum in formats:
+        if format_enum.value == "screenshot@fullPage":
+            # Special case: convert to ScreenshotFormat with full_page=True
+            result.append(ScreenshotFormat(type="screenshot", full_page=True))
+        else:
+            # Regular string literals
+            result.append(format_enum.value)
+
+    return result
diff --git a/autogpt_platform/backend/backend/blocks/firecrawl/crawl.py b/autogpt_platform/backend/backend/blocks/firecrawl/crawl.py
@@ -1,8 +1,9 @@
-from enum import Enum
 from typing import Any
 
-from firecrawl import FirecrawlApp, ScrapeOptions
+from firecrawl import FirecrawlApp
+from firecrawl.v2.types import ScrapeOptions
 
+from backend.blocks.firecrawl._api import ScrapeFormat
 from backend.sdk import (
     APIKeyCredentials,
     Block,
@@ -14,21 +15,10 @@
 )
 
 from ._config import firecrawl
-
-
-class ScrapeFormat(Enum):
-    MARKDOWN = "markdown"
-    HTML = "html"
-    RAW_HTML = "rawHtml"
-    LINKS = "links"
-    SCREENSHOT = "screenshot"
-    SCREENSHOT_FULL_PAGE = "screenshot@fullPage"
-    JSON = "json"
-    CHANGE_TRACKING = "changeTracking"
+from ._format_utils import convert_to_format_options
 
 
 class FirecrawlCrawlBlock(Block):
-
     class Input(BlockSchema):
         credentials: CredentialsMetaInput = firecrawl.credentials_field()
         url: str = SchemaField(description="The URL to crawl")
@@ -78,18 +68,17 @@ def __init__(self):
     async def run(
         self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
     ) -> BlockOutput:
-
         app = FirecrawlApp(api_key=credentials.api_key.get_secret_value())
 
         # Sync call
-        crawl_result = app.crawl_url(
+        crawl_result = app.crawl(
             input_data.url,
             limit=input_data.limit,
             scrape_options=ScrapeOptions(
-                formats=[format.value for format in input_data.formats],
-                onlyMainContent=input_data.only_main_content,
-                maxAge=input_data.max_age,
-                waitFor=input_data.wait_for,
+                formats=convert_to_format_options(input_data.formats),
+                only_main_content=input_data.only_main_content,
+                max_age=input_data.max_age,
+                wait_for=input_data.wait_for,
             ),
         )
         yield "data", crawl_result.data
@@ -101,14 +90,14 @@ async def run(
                 elif f == ScrapeFormat.HTML:
                     yield "html", data.html
                 elif f == ScrapeFormat.RAW_HTML:
-                    yield "raw_html", data.rawHtml
+                    yield "raw_html", data.raw_html
                 elif f == ScrapeFormat.LINKS:
                     yield "links", data.links
                 elif f == ScrapeFormat.SCREENSHOT:
                     yield "screenshot", data.screenshot
                 elif f == ScrapeFormat.SCREENSHOT_FULL_PAGE:
                     yield "screenshot_full_page", data.screenshot
                 elif f == ScrapeFormat.CHANGE_TRACKING:
-                    yield "change_tracking", data.changeTracking
+                    yield "change_tracking", data.change_tracking
                 elif f == ScrapeFormat.JSON:
                     yield "json", data.json
diff --git a/autogpt_platform/backend/backend/blocks/firecrawl/extract.py b/autogpt_platform/backend/backend/blocks/firecrawl/extract.py
@@ -20,7 +20,6 @@
 
 @cost(BlockCost(2, BlockCostType.RUN))
 class FirecrawlExtractBlock(Block):
-
     class Input(BlockSchema):
         credentials: CredentialsMetaInput = firecrawl.credentials_field()
         urls: list[str] = SchemaField(
@@ -53,7 +52,6 @@ def __init__(self):
     async def run(
         self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
     ) -> BlockOutput:
-
         app = FirecrawlApp(api_key=credentials.api_key.get_secret_value())
 
         extract_result = app.extract(
diff --git a/autogpt_platform/backend/backend/blocks/firecrawl/map.py b/autogpt_platform/backend/backend/blocks/firecrawl/map.py
@@ -1,3 +1,5 @@
+from typing import Any
+
 from firecrawl import FirecrawlApp
 
 from backend.sdk import (
@@ -14,14 +16,16 @@
 
 
 class FirecrawlMapWebsiteBlock(Block):
-
     class Input(BlockSchema):
         credentials: CredentialsMetaInput = firecrawl.credentials_field()
 
         url: str = SchemaField(description="The website url to map")
 
     class Output(BlockSchema):
-        links: list[str] = SchemaField(description="The links of the website")
+        links: list[str] = SchemaField(description="List of URLs found on the website")
+        results: list[dict[str, Any]] = SchemaField(
+            description="List of search results with url, title, and description"
+        )
 
     def __init__(self):
         super().__init__(
@@ -35,12 +39,22 @@ def __init__(self):
     async def run(
         self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
     ) -> BlockOutput:
-
         app = FirecrawlApp(api_key=credentials.api_key.get_secret_value())
 
         # Sync call
-        map_result = app.map_url(
+        map_result = app.map(
             url=input_data.url,
         )
 
-        yield "links", map_result.links
+        # Convert SearchResult objects to dicts
+        results_data = [
+            {
+                "url": link.url,
+                "title": link.title,
+                "description": link.description,
+            }
+            for link in map_result.links
+        ]
+
+        yield "links", [link.url for link in map_result.links]
+        yield "results", results_data
diff --git a/autogpt_platform/backend/backend/blocks/firecrawl/scrape.py b/autogpt_platform/backend/backend/blocks/firecrawl/scrape.py
@@ -1,8 +1,8 @@
-from enum import Enum
 from typing import Any
 
 from firecrawl import FirecrawlApp
 
+from backend.blocks.firecrawl._api import ScrapeFormat
 from backend.sdk import (
     APIKeyCredentials,
     Block,
@@ -14,21 +14,10 @@
 )
 
 from ._config import firecrawl
-
-
-class ScrapeFormat(Enum):
-    MARKDOWN = "markdown"
-    HTML = "html"
-    RAW_HTML = "rawHtml"
-    LINKS = "links"
-    SCREENSHOT = "screenshot"
-    SCREENSHOT_FULL_PAGE = "screenshot@fullPage"
-    JSON = "json"
-    CHANGE_TRACKING = "changeTracking"
+from ._format_utils import convert_to_format_options
 
 
 class FirecrawlScrapeBlock(Block):
-
     class Input(BlockSchema):
         credentials: CredentialsMetaInput = firecrawl.credentials_field()
         url: str = SchemaField(description="The URL to crawl")
@@ -78,12 +67,11 @@ def __init__(self):
     async def run(
         self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
     ) -> BlockOutput:
-
         app = FirecrawlApp(api_key=credentials.api_key.get_secret_value())
 
-        scrape_result = app.scrape_url(
+        scrape_result = app.scrape(
             input_data.url,
-            formats=[format.value for format in input_data.formats],
+            formats=convert_to_format_options(input_data.formats),
             only_main_content=input_data.only_main_content,
             max_age=input_data.max_age,
             wait_for=input_data.wait_for,
@@ -96,14 +84,14 @@ async def run(
             elif f == ScrapeFormat.HTML:
                 yield "html", scrape_result.html
             elif f == ScrapeFormat.RAW_HTML:
-                yield "raw_html", scrape_result.rawHtml
+                yield "raw_html", scrape_result.raw_html
             elif f == ScrapeFormat.LINKS:
                 yield "links", scrape_result.links
             elif f == ScrapeFormat.SCREENSHOT:
                 yield "screenshot", scrape_result.screenshot
             elif f == ScrapeFormat.SCREENSHOT_FULL_PAGE:
                 yield "screenshot_full_page", scrape_result.screenshot
             elif f == ScrapeFormat.CHANGE_TRACKING:
-                yield "change_tracking", scrape_result.changeTracking
+                yield "change_tracking", scrape_result.change_tracking
             elif f == ScrapeFormat.JSON:
                 yield "json", scrape_result.json
diff --git a/autogpt_platform/backend/backend/blocks/firecrawl/search.py b/autogpt_platform/backend/backend/blocks/firecrawl/search.py
@@ -1,8 +1,9 @@
-from enum import Enum
 from typing import Any
 
-from firecrawl import FirecrawlApp, ScrapeOptions
+from firecrawl import FirecrawlApp
+from firecrawl.v2.types import ScrapeOptions
 
+from backend.blocks.firecrawl._api import ScrapeFormat
 from backend.sdk import (
     APIKeyCredentials,
     Block,
@@ -14,21 +15,10 @@
 )
 
 from ._config import firecrawl
-
-
-class ScrapeFormat(Enum):
-    MARKDOWN = "markdown"
-    HTML = "html"
-    RAW_HTML = "rawHtml"
-    LINKS = "links"
-    SCREENSHOT = "screenshot"
-    SCREENSHOT_FULL_PAGE = "screenshot@fullPage"
-    JSON = "json"
-    CHANGE_TRACKING = "changeTracking"
+from ._format_utils import convert_to_format_options
 
 
 class FirecrawlSearchBlock(Block):
-
     class Input(BlockSchema):
         credentials: CredentialsMetaInput = firecrawl.credentials_field()
         query: str = SchemaField(description="The query to search for")
@@ -61,19 +51,19 @@ def __init__(self):
     async def run(
         self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
     ) -> BlockOutput:
-
         app = FirecrawlApp(api_key=credentials.api_key.get_secret_value())
 
         # Sync call
         scrape_result = app.search(
             input_data.query,
             limit=input_data.limit,
             scrape_options=ScrapeOptions(
-                formats=[format.value for format in input_data.formats],
-                maxAge=input_data.max_age,
-                waitFor=input_data.wait_for,
+                formats=convert_to_format_options(input_data.formats) or None,
+                max_age=input_data.max_age,
+                wait_for=input_data.wait_for,
             ),
         )
         yield "data", scrape_result
-        for site in scrape_result.data:
-            yield "site", site
+        if hasattr(scrape_result, "web") and scrape_result.web:
+            for site in scrape_result.web:
+                yield "site", site
diff --git a/autogpt_platform/backend/poetry.lock b/autogpt_platform/backend/poetry.lock
diff --git a/autogpt_platform/backend/pyproject.toml b/autogpt_platform/backend/pyproject.toml
@@ -78,7 +78,7 @@ aioclamd = "^1.0.0"
 setuptools = "^80.9.0"
 gcloud-aio-storage = "^9.5.0"
 pandas = "^2.3.1"
-firecrawl-py = "^2.16.3"
+firecrawl-py = "^4.3.6"
 exa-py = "^1.14.20"
 croniter = "^6.0.0"
 stagehand = "^0.5.1"