Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 51 additions & 2 deletions autogpt_platform/backend/backend/blocks/firecrawl/extract.py
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,52 @@
from ._config import firecrawl


def normalize_to_json_schema(schema: dict | None) -> dict | None:
"""
Normalize a simplified schema format into valid JSON Schema format.

Transforms simplified schemas like {"field": "type"} into proper JSON Schema format:
{"type": "object", "properties": {"field": {"type": "type"}}}

If the schema already appears to be a valid JSON Schema (has "type" or "properties"),
it is returned as-is.

Args:
schema: The schema to normalize, or None

Returns:
A valid JSON Schema dict, or None if input was None
"""
if schema is None:
return None

# If it already has "type" at the root level, assume it's already a JSON Schema
if "type" in schema:
return schema

# If it already has "properties", assume it's already a JSON Schema
if "properties" in schema:
return schema

# Otherwise, treat it as a simplified format and transform it
properties = {}
for key, value in schema.items():
if isinstance(value, str):
# Simple type string like "string", "number", etc.
properties[key] = {"type": value}
elif isinstance(value, dict):
# Already a property definition, use as-is
properties[key] = value
else:
# Fallback: treat as any type
properties[key] = {"type": "string"}

return {
"type": "object",
"properties": properties,
}


@cost(BlockCost(2, BlockCostType.RUN))
class FirecrawlExtractBlock(Block):
class Input(BlockSchemaInput):
Expand All @@ -30,7 +76,7 @@ class Input(BlockSchemaInput):
description="The prompt to use for the crawl", default=None, advanced=False
)
output_schema: dict | None = SchemaField(
description="A Json Schema describing the output structure if more rigid structure is desired.",
description='A JSON Schema describing the output structure. Supports both simplified format (e.g., {"field": "string"}) and full JSON Schema format (e.g., {"type": "object", "properties": {"field": {"type": "string"}}}).',
default=None,
)
enable_web_search: bool = SchemaField(
Expand Down Expand Up @@ -59,10 +105,13 @@ async def run(
) -> BlockOutput:
app = FirecrawlApp(api_key=credentials.api_key.get_secret_value())

# Normalize the schema to ensure it's in valid JSON Schema format
normalized_schema = normalize_to_json_schema(input_data.output_schema)

extract_result = app.extract(
urls=input_data.urls,
prompt=input_data.prompt,
schema=input_data.output_schema,
schema=normalized_schema,
enable_web_search=input_data.enable_web_search,
)

Expand Down
Loading