Skip to content

Commit 0d798a8

Browse files
authored
Merge pull request #139 from firecrawl/feat/agent
Feat/agent
2 parents d757025 + 3421a79 commit 0d798a8

File tree

3 files changed

+128
-7
lines changed

3 files changed

+128
-7
lines changed

package-lock.json

Lines changed: 6 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
},
2929
"license": "MIT",
3030
"dependencies": {
31-
"@mendable/firecrawl-js": "^4.3.6",
31+
"@mendable/firecrawl-js": "^4.9.3",
3232
"dotenv": "^17.2.2",
3333
"firecrawl-fastmcp": "^1.0.4",
3434
"typescript": "^5.9.2",

src/index.ts

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,7 @@ const scrapeParamsSchema = z.object({
256256
})
257257
.optional(),
258258
storeInCache: z.boolean().optional(),
259+
zeroDataRetention: z.boolean().optional(),
259260
maxAge: z.number().optional(),
260261
});
261262

@@ -429,6 +430,7 @@ The query also supports search operators, that you can use if needed to refine t
429430
.array(z.object({ type: z.enum(['web', 'images', 'news']) }))
430431
.optional(),
431432
scrapeOptions: scrapeParamsSchema.omit({ url: true }).partial().optional(),
433+
enterprise: z.array(z.enum(['default', 'anon', 'zdr'])).optional(),
432434
}),
433435
execute: async (
434436
args: unknown,
@@ -616,6 +618,125 @@ Extract structured information from web pages using LLM capabilities. Supports b
616618
return asText(res);
617619
},
618620
});
621+
622+
server.addTool({
623+
name: 'firecrawl_agent',
624+
description: `
625+
Autonomous web data gathering agent. Describe what data you want, and the agent searches, navigates, and extracts it from anywhere on the web.
626+
627+
**Best for:** Complex data gathering tasks where you don't know the exact URLs; research tasks requiring multiple sources; finding data in hard-to-reach places.
628+
**Not recommended for:** Simple single-page scraping (use scrape); when you already know the exact URL (use scrape or extract).
629+
**Key advantages over extract:**
630+
- No URLs required - just describe what you need
631+
- Autonomously searches and navigates the web
632+
- Faster and more cost-effective for complex tasks
633+
- Higher reliability for varied queries
634+
635+
**Arguments:**
636+
- prompt: Natural language description of the data you want (required, max 10,000 characters)
637+
- urls: Optional array of URLs to focus the agent on specific pages
638+
- schema: Optional JSON schema for structured output
639+
640+
**Prompt Example:** "Find the founders of Firecrawl and their backgrounds"
641+
**Usage Example (no URLs):**
642+
\`\`\`json
643+
{
644+
"name": "firecrawl_agent",
645+
"arguments": {
646+
"prompt": "Find the top 5 AI startups founded in 2024 and their funding amounts",
647+
"schema": {
648+
"type": "object",
649+
"properties": {
650+
"startups": {
651+
"type": "array",
652+
"items": {
653+
"type": "object",
654+
"properties": {
655+
"name": { "type": "string" },
656+
"funding": { "type": "string" },
657+
"founded": { "type": "string" }
658+
}
659+
}
660+
}
661+
}
662+
}
663+
}
664+
}
665+
\`\`\`
666+
**Usage Example (with URLs):**
667+
\`\`\`json
668+
{
669+
"name": "firecrawl_agent",
670+
"arguments": {
671+
"urls": ["https://docs.firecrawl.dev", "https://firecrawl.dev/pricing"],
672+
"prompt": "Compare the features and pricing information from these pages"
673+
}
674+
}
675+
\`\`\`
676+
**Returns:** Extracted data matching your prompt/schema, plus credits used.
677+
`,
678+
parameters: z.object({
679+
prompt: z.string().min(1).max(10000),
680+
urls: z.array(z.string().url()).optional(),
681+
schema: z.record(z.string(), z.any()).optional(),
682+
}),
683+
execute: async (
684+
args: unknown,
685+
{ session, log }: { session?: SessionData; log: Logger }
686+
): Promise<string> => {
687+
const client = getClient(session);
688+
const a = args as Record<string, unknown>;
689+
log.info('Starting agent', {
690+
prompt: (a.prompt as string).substring(0, 100),
691+
urlCount: Array.isArray(a.urls) ? a.urls.length : 0,
692+
});
693+
const agentBody = removeEmptyTopLevel({
694+
prompt: a.prompt as string,
695+
urls: a.urls as string[] | undefined,
696+
schema: (a.schema as Record<string, unknown>) || undefined,
697+
});
698+
const res = await (client as any).agent({
699+
...agentBody,
700+
origin: ORIGIN,
701+
});
702+
return asText(res);
703+
},
704+
});
705+
706+
server.addTool({
707+
name: 'firecrawl_agent_status',
708+
description: `
709+
Check the status of an agent job.
710+
711+
**Usage Example:**
712+
\`\`\`json
713+
{
714+
"name": "firecrawl_agent_status",
715+
"arguments": {
716+
"id": "550e8400-e29b-41d4-a716-446655440000"
717+
}
718+
}
719+
\`\`\`
720+
**Possible statuses:**
721+
- processing: Agent is still working
722+
- completed: Extraction finished successfully
723+
- failed: An error occurred
724+
725+
**Returns:** Status, progress, and results (if completed) of the agent job.
726+
`,
727+
parameters: z.object({ id: z.string() }),
728+
execute: async (
729+
args: unknown,
730+
{ session, log }: { session?: SessionData; log: Logger }
731+
): Promise<string> => {
732+
const client = getClient(session);
733+
const { id } = args as { id: string };
734+
log.info('Checking agent status', { id });
735+
const res = await (client as any).getAgentStatus(id);
736+
return asText(res);
737+
},
738+
});
739+
619740
const PORT = Number(process.env.PORT || 3000);
620741
const HOST =
621742
process.env.CLOUD_SERVICE === 'true'

0 commit comments

Comments
 (0)