Skip to content

Commit 22a3555

Browse files
committed
feature #270 [Agent] Add Firecrawl tool (Guikingone)
This PR was squashed before being merged into the main branch. Discussion ---------- [Agent] Add Firecrawl tool | Q | A | ------------- | --- | Bug fix? | no | New feature? | yes | Docs? | yes | Issues | None | License | MIT Hi 👋🏻 This PR aims to introduce a tool for `Firecrawl`, it allows to scrape / crawl / map the websites / links and gave them to the model for future usage. Commits ------- 89a823f [Agent] Add Firecrawl tool
2 parents e9abe34 + 89a823f commit 22a3555

File tree

13 files changed

+387
-0
lines changed

13 files changed

+387
-0
lines changed

examples/.env

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,10 @@ TAVILY_API_KEY=
5252
# For using Brave (tool)
5353
BRAVE_API_KEY=
5454

55+
# For using Firecrawl (tool)
56+
FIRECRAWL_HOST=https://api.firecrawl.dev
57+
FIRECRAWL_API_KEY=
58+
5559
# For using MongoDB Atlas (store)
5660
MONGODB_URI=
5761

examples/toolbox/firecrawl-crawl.php

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
use Symfony\AI\Agent\Agent;
13+
use Symfony\AI\Agent\Toolbox\AgentProcessor;
14+
use Symfony\AI\Agent\Toolbox\Tool\Firecrawl;
15+
use Symfony\AI\Agent\Toolbox\Toolbox;
16+
use Symfony\AI\Platform\Bridge\OpenAi\Gpt;
17+
use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory;
18+
use Symfony\AI\Platform\Message\Message;
19+
use Symfony\AI\Platform\Message\MessageBag;
20+
21+
require_once dirname(__DIR__) . '/bootstrap.php';
22+
23+
$platform = PlatformFactory::create(env('OPENAI_API_KEY'), http_client());
24+
$model = new Gpt(Gpt::GPT_4O_MINI);
25+
26+
$firecrawl = new Firecrawl(
27+
http_client(),
28+
env('FIRECRAWL_API_KEY'),
29+
env('FIRECRAWL_HOST'),
30+
);
31+
32+
$toolbox = new Toolbox([$firecrawl], logger: logger());
33+
$toolProcessor = new AgentProcessor($toolbox);
34+
35+
$agent = new Agent($platform, $model, inputProcessors: [$toolProcessor], outputProcessors: [$toolProcessor]);
36+
37+
$messages = new MessageBag(Message::ofUser('Crawl the following URL: https://symfony.com/doc/current/setup.html then resume it in less than 200 words.'));
38+
$result = $agent->call($messages);
39+
40+
echo $result->getContent() . \PHP_EOL;

examples/toolbox/firecrawl-map.php

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
use Symfony\AI\Agent\Agent;
13+
use Symfony\AI\Agent\Toolbox\AgentProcessor;
14+
use Symfony\AI\Agent\Toolbox\Tool\Firecrawl;
15+
use Symfony\AI\Agent\Toolbox\Toolbox;
16+
use Symfony\AI\Platform\Bridge\OpenAi\Gpt;
17+
use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory;
18+
use Symfony\AI\Platform\Message\Message;
19+
use Symfony\AI\Platform\Message\MessageBag;
20+
21+
require_once dirname(__DIR__) . '/bootstrap.php';
22+
23+
$platform = PlatformFactory::create(env('OPENAI_API_KEY'), http_client());
24+
$model = new Gpt(Gpt::GPT_4O_MINI);
25+
26+
$firecrawl = new Firecrawl(
27+
http_client(),
28+
env('FIRECRAWL_API_KEY'),
29+
env('FIRECRAWL_HOST'),
30+
);
31+
32+
$toolbox = new Toolbox([$firecrawl], logger: logger());
33+
$toolProcessor = new AgentProcessor($toolbox);
34+
35+
$agent = new Agent($platform, $model, inputProcessors: [$toolProcessor], outputProcessors: [$toolProcessor]);
36+
37+
$messages = new MessageBag(Message::ofUser('Retrieve all the links from https://symfony.com then list only the ones related to the Messenger component.'));
38+
$result = $agent->call($messages);
39+
40+
echo $result->getContent() . \PHP_EOL;

examples/toolbox/firecrawl-scrape.php

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
use Symfony\AI\Agent\Agent;
13+
use Symfony\AI\Agent\Toolbox\AgentProcessor;
14+
use Symfony\AI\Agent\Toolbox\Tool\Firecrawl;
15+
use Symfony\AI\Agent\Toolbox\Toolbox;
16+
use Symfony\AI\Platform\Bridge\OpenAi\Gpt;
17+
use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory;
18+
use Symfony\AI\Platform\Message\Message;
19+
use Symfony\AI\Platform\Message\MessageBag;
20+
21+
require_once dirname(__DIR__) . '/bootstrap.php';
22+
23+
$platform = PlatformFactory::create(env('OPENAI_API_KEY'), http_client());
24+
$model = new Gpt(Gpt::GPT_4O_MINI);
25+
26+
$firecrawl = new Firecrawl(
27+
http_client(),
28+
env('FIRECRAWL_API_KEY'),
29+
env('FIRECRAWL_HOST'),
30+
);
31+
32+
$toolbox = new Toolbox([$firecrawl], logger: logger());
33+
$toolProcessor = new AgentProcessor($toolbox);
34+
35+
$agent = new Agent($platform, $model, inputProcessors: [$toolProcessor], outputProcessors: [$toolProcessor]);
36+
37+
$messages = new MessageBag(Message::ofUser('Scrape the following URL: https://symfony.com/doc/current/setup.html then resume it in less than 200 words.'));
38+
$result = $agent->call($messages);
39+
40+
echo $result->getContent() . \PHP_EOL;
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
namespace Symfony\AI\Agent\Toolbox\Tool;
13+
14+
use Symfony\AI\Agent\Toolbox\Attribute\AsTool;
15+
use Symfony\Contracts\HttpClient\HttpClientInterface;
16+
17+
/**
18+
* @author Guillaume Loulier <[email protected]>
19+
*
20+
* @see https://www.firecrawl.dev/
21+
*/
22+
#[AsTool('firecrawl_scrape', description: 'Allow to scrape website using url', method: 'scrape')]
23+
#[AsTool('firecrawl_crawl', description: 'Allow to crawl website using url', method: 'crawl')]
24+
#[AsTool('firecrawl_map', description: 'Allow to retrieve all urls from a website using url', method: 'map')]
25+
final readonly class Firecrawl
26+
{
27+
public function __construct(
28+
private HttpClientInterface $httpClient,
29+
#[\SensitiveParameter] private string $apiKey,
30+
private string $endpoint,
31+
) {
32+
}
33+
34+
/**
35+
* @return array{
36+
* url: string,
37+
* markdown: string,
38+
* html: string,
39+
* }
40+
*/
41+
public function scrape(string $url): array
42+
{
43+
$response = $this->httpClient->request('POST', \sprintf('%s/v1/scrape', $this->endpoint), [
44+
'auth_bearer' => $this->apiKey,
45+
'json' => [
46+
'url' => $url,
47+
'formats' => ['markdown', 'html'],
48+
],
49+
]);
50+
51+
$scrapingPayload = $response->toArray();
52+
53+
return [
54+
'url' => $url,
55+
'markdown' => $scrapingPayload['data']['markdown'],
56+
'html' => $scrapingPayload['data']['html'],
57+
];
58+
}
59+
60+
/**
61+
* @return array<int, array{
62+
* url: string,
63+
* markdown: string,
64+
* html: string,
65+
* }>|array{}
66+
*/
67+
public function crawl(string $url): array
68+
{
69+
$response = $this->httpClient->request('POST', \sprintf('%s/v1/crawl', $this->endpoint), [
70+
'auth_bearer' => $this->apiKey,
71+
'json' => [
72+
'url' => $url,
73+
'scrapeOptions' => [
74+
'formats' => ['markdown', 'html'],
75+
],
76+
],
77+
]);
78+
79+
$crawlingPayload = $response->toArray();
80+
81+
while ('scraping' === $this->httpClient->request('GET', \sprintf('%s/v1/crawl/%s', $this->endpoint, $crawlingPayload['id']))->toArray()['status']) {
82+
usleep(500);
83+
}
84+
85+
$scrapingPayload = $this->httpClient->request('GET', \sprintf('%s/v1/crawl/%s', $this->endpoint, $crawlingPayload['id']));
86+
87+
$finalPayload = $scrapingPayload->toArray();
88+
89+
return array_map(static fn (array $scrapedItem) => [
90+
'url' => $scrapedItem['metadata']['og:url'],
91+
'markdown' => $scrapedItem['markdown'],
92+
'html' => $scrapedItem['html'],
93+
], $finalPayload['data']);
94+
}
95+
96+
/**
97+
* @return array{
98+
* url: string,
99+
* links: array<string>,
100+
* }
101+
*/
102+
public function map(string $url): array
103+
{
104+
$response = $this->httpClient->request('POST', \sprintf('%s/v1/map', $this->endpoint), [
105+
'auth_bearer' => $this->apiKey,
106+
'json' => [
107+
'url' => $url,
108+
],
109+
]);
110+
111+
$mappingPayload = $response->toArray();
112+
113+
return [
114+
'url' => $url,
115+
'links' => $mappingPayload['links'],
116+
];
117+
}
118+
}
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
namespace Symfony\AI\Agent\Tests\Toolbox\Tool;
13+
14+
use PHPUnit\Framework\Attributes\CoversClass;
15+
use PHPUnit\Framework\TestCase;
16+
use Symfony\AI\Agent\Toolbox\Tool\Firecrawl;
17+
use Symfony\Component\HttpClient\MockHttpClient;
18+
use Symfony\Component\HttpClient\Response\JsonMockResponse;
19+
20+
#[CoversClass(Firecrawl::class)]
21+
final class FirecrawlTest extends TestCase
22+
{
23+
public function testScrape()
24+
{
25+
$httpClient = new MockHttpClient([
26+
new JsonMockResponse(json_decode(file_get_contents(__DIR__.'/fixtures/firecrawl-scrape.json'), true)),
27+
]);
28+
29+
$firecrawl = new Firecrawl($httpClient, 'test', 'https://127.0.0.1:3002');
30+
31+
$scrapingResult = $firecrawl->scrape('https://www.symfony.com');
32+
33+
$this->assertSame('https://www.symfony.com', $scrapingResult['url']);
34+
$this->assertNotEmpty($scrapingResult['markdown']);
35+
$this->assertNotEmpty($scrapingResult['html']);
36+
$this->assertSame(1, $httpClient->getRequestsCount());
37+
}
38+
39+
public function testCrawl()
40+
{
41+
$httpClient = new MockHttpClient([
42+
new JsonMockResponse(json_decode(file_get_contents(__DIR__.'/fixtures/firecrawl-crawl-wait.json'), true)),
43+
new JsonMockResponse(json_decode(file_get_contents(__DIR__.'/fixtures/firecrawl-crawl-status.json'), true)),
44+
new JsonMockResponse(json_decode(file_get_contents(__DIR__.'/fixtures/firecrawl-crawl-status-done.json'), true)),
45+
new JsonMockResponse(json_decode(file_get_contents(__DIR__.'/fixtures/firecrawl-crawl.json'), true)),
46+
]);
47+
48+
$firecrawl = new Firecrawl($httpClient, 'test', 'https://127.0.0.1:3002');
49+
50+
$scrapingResult = $firecrawl->crawl('https://www.symfony.com');
51+
52+
$this->assertCount(1, $scrapingResult);
53+
$this->assertNotEmpty($scrapingResult[0]);
54+
55+
$firstItem = $scrapingResult[0];
56+
$this->assertSame('https://www.symfony.com', $firstItem['url']);
57+
$this->assertNotEmpty($firstItem['markdown']);
58+
$this->assertNotEmpty($firstItem['html']);
59+
$this->assertSame(4, $httpClient->getRequestsCount());
60+
}
61+
62+
public function testMap()
63+
{
64+
$httpClient = new MockHttpClient([
65+
new JsonMockResponse(json_decode(file_get_contents(__DIR__.'/fixtures/firecrawl-map.json'), true)),
66+
]);
67+
68+
$firecrawl = new Firecrawl($httpClient, 'test', 'https://127.0.0.1:3002');
69+
70+
$mapping = $firecrawl->map('https://www.symfony.com');
71+
72+
$this->assertSame('https://www.symfony.com', $mapping['url']);
73+
$this->assertCount(5, $mapping['links']);
74+
$this->assertSame(1, $httpClient->getRequestsCount());
75+
}
76+
}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{
2+
"status": "completed"
3+
}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{
2+
"status": "scraping"
3+
}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
"success": true,
3+
"id": "123-456-789",
4+
"url": "https://127.0.0.1:3002/v1/crawl/123-456-789"
5+
}
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
{
2+
"status": "scraping",
3+
"total": 36,
4+
"completed": 10,
5+
"creditsUsed": 10,
6+
"expiresAt": "2024-00-00T00:00:00.000Z",
7+
"next": "https://api.firecrawl.dev/v1/crawl/123-456-789?skip=10",
8+
"data": [
9+
{
10+
"markdown": "[Firecrawl Docs home page![light logo](https://mintlify.s3-us-west-1.amazonaws.com/firecrawl/logo/light.svg)!...",
11+
"html": "<!DOCTYPE html><html lang=\"en\" class=\"js-focus-visible lg:[--scroll-mt:9.5rem]\" data-js-focus-visible=\"\">...",
12+
"metadata": {
13+
"title": "Build a 'Chat with website' using Groq Llama 3 | Firecrawl",
14+
"language": "en",
15+
"sourceURL": "https://docs.firecrawl.dev/learn/rag-llama3",
16+
"description": "Learn how to use Firecrawl, Groq Llama 3, and Langchain to build a 'Chat with your website' bot.",
17+
"ogLocaleAlternate": [],
18+
"statusCode": 200,
19+
"og:url": "https://www.symfony.com"
20+
}
21+
}
22+
]
23+
}

0 commit comments

Comments
 (0)