Skip to content

Commit cc38ddd

Browse files
valtzuchr-hertel
authored andcommitted
feat: add audio & document input support for Gemini (#339)
1 parent fa411a1 commit cc38ddd

File tree

7 files changed

+110
-12
lines changed

7 files changed

+110
-12
lines changed

examples/google/audio-input.php

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
use Symfony\AI\Agent\Agent;
13+
use Symfony\AI\Platform\Bridge\Google\Gemini;
14+
use Symfony\AI\Platform\Bridge\Google\PlatformFactory;
15+
use Symfony\AI\Platform\Message\Content\Audio;
16+
use Symfony\AI\Platform\Message\Message;
17+
use Symfony\AI\Platform\Message\MessageBag;
18+
use Symfony\Component\Dotenv\Dotenv;
19+
20+
require_once dirname(__DIR__).'/vendor/autoload.php';
21+
(new Dotenv())->loadEnv(dirname(__DIR__).'/.env');
22+
23+
if (empty($_ENV['GOOGLE_API_KEY'])) {
24+
echo 'Please set the GOOGLE_API_KEY environment variable.'.\PHP_EOL;
25+
exit(1);
26+
}
27+
28+
$platform = PlatformFactory::create($_ENV['GOOGLE_API_KEY']);
29+
$model = new Gemini(Gemini::GEMINI_1_5_FLASH);
30+
31+
$agent = new Agent($platform, $model);
32+
$messages = new MessageBag(
33+
Message::ofUser(
34+
'What is this recording about?',
35+
Audio::fromFile(dirname(__DIR__, 2).'/fixtures/audio.mp3'),
36+
),
37+
);
38+
$response = $agent->call($messages);
39+
40+
echo $response->getContent().\PHP_EOL;

examples/google/pdf-input-binary.php

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
use Symfony\AI\Agent\Agent;
13+
use Symfony\AI\Platform\Bridge\Google\Gemini;
14+
use Symfony\AI\Platform\Bridge\Google\PlatformFactory;
15+
use Symfony\AI\Platform\Message\Content\Document;
16+
use Symfony\AI\Platform\Message\Message;
17+
use Symfony\AI\Platform\Message\MessageBag;
18+
use Symfony\Component\Dotenv\Dotenv;
19+
20+
require_once dirname(__DIR__).'/vendor/autoload.php';
21+
(new Dotenv())->loadEnv(dirname(__DIR__).'/.env');
22+
23+
if (empty($_ENV['GOOGLE_API_KEY'])) {
24+
echo 'Please set the GOOGLE_API_KEY environment variable.'.\PHP_EOL;
25+
exit(1);
26+
}
27+
28+
$platform = PlatformFactory::create($_ENV['GOOGLE_API_KEY']);
29+
$model = new Gemini(Gemini::GEMINI_1_5_FLASH);
30+
31+
$agent = new Agent($platform, $model);
32+
$messages = new MessageBag(
33+
Message::ofUser(
34+
Document::fromFile(dirname(__DIR__, 2).'/fixtures/document.pdf'),
35+
'What is this document about?',
36+
),
37+
);
38+
$response = $agent->call($messages);
39+
40+
echo $response->getContent().\PHP_EOL;

examples/google/toolcall.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@
1919
use Symfony\AI\Platform\Message\MessageBag;
2020
use Symfony\Component\Dotenv\Dotenv;
2121

22-
require_once dirname(__DIR__, 2).'/vendor/autoload.php';
23-
(new Dotenv())->loadEnv(dirname(__DIR__, 2).'/.env');
22+
require_once dirname(__DIR__).'/vendor/autoload.php';
23+
(new Dotenv())->loadEnv(dirname(__DIR__).'/.env');
2424

2525
if (empty($_ENV['GOOGLE_API_KEY'])) {
2626
echo 'Please set the GOOGLE_API_KEY environment variable.'.\PHP_EOL;

src/platform/src/Bridge/Google/Contract/UserMessageNormalizer.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
use Symfony\AI\Platform\Bridge\Google\Gemini;
1515
use Symfony\AI\Platform\Contract\Normalizer\ModelContractNormalizer;
16-
use Symfony\AI\Platform\Message\Content\Image;
16+
use Symfony\AI\Platform\Message\Content\File;
1717
use Symfony\AI\Platform\Message\Content\Text;
1818
use Symfony\AI\Platform\Message\UserMessage;
1919
use Symfony\AI\Platform\Model;
@@ -45,7 +45,7 @@ public function normalize(mixed $data, ?string $format = null, array $context =
4545
if ($content instanceof Text) {
4646
$parts[] = ['text' => $content->text];
4747
}
48-
if ($content instanceof Image) {
48+
if ($content instanceof File) {
4949
$parts[] = ['inline_data' => [
5050
'mime_type' => $content->getFormat(),
5151
'data' => $content->asBase64(),

src/platform/src/Bridge/Google/Gemini.php

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ public function __construct(string $name = self::GEMINI_2_PRO, array $options =
3333
$capabilities = [
3434
Capability::INPUT_MESSAGES,
3535
Capability::INPUT_IMAGE,
36+
Capability::INPUT_AUDIO,
37+
Capability::INPUT_PDF,
3638
Capability::OUTPUT_STREAMING,
3739
Capability::TOOL_CALLING,
3840
];

src/platform/src/Bridge/Google/ModelHandler.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -197,13 +197,13 @@ private function convertChoice(array $choice): Choice
197197

198198
/**
199199
* @param array{
200-
* id: string,
200+
* id?: string,
201201
* name: string,
202202
* args: mixed[]
203203
* } $toolCall
204204
*/
205205
private function convertToolCall(array $toolCall): ToolCall
206206
{
207-
return new ToolCall($toolCall['id'], $toolCall['name'], $toolCall['args']);
207+
return new ToolCall($toolCall['id'] ?? '', $toolCall['name'], $toolCall['args']);
208208
}
209209
}

src/platform/tests/Bridge/Google/Contract/UserMessageNormalizerTest.php

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,16 @@
1212
namespace Symfony\AI\Platform\Tests\Bridge\Google\Contract;
1313

1414
use PHPUnit\Framework\Attributes\CoversClass;
15+
use PHPUnit\Framework\Attributes\DataProvider;
1516
use PHPUnit\Framework\Attributes\Small;
1617
use PHPUnit\Framework\Attributes\Test;
1718
use PHPUnit\Framework\Attributes\UsesClass;
1819
use PHPUnit\Framework\TestCase;
1920
use Symfony\AI\Platform\Bridge\Google\Contract\UserMessageNormalizer;
2021
use Symfony\AI\Platform\Bridge\Google\Gemini;
2122
use Symfony\AI\Platform\Contract;
23+
use Symfony\AI\Platform\Message\Content\Audio;
24+
use Symfony\AI\Platform\Message\Content\Document;
2225
use Symfony\AI\Platform\Message\Content\File;
2326
use Symfony\AI\Platform\Message\Content\Image;
2427
use Symfony\AI\Platform\Message\Content\Text;
@@ -30,6 +33,9 @@
3033
#[UsesClass(UserMessage::class)]
3134
#[UsesClass(Text::class)]
3235
#[UsesClass(File::class)]
36+
#[UsesClass(Image::class)]
37+
#[UsesClass(Document::class)]
38+
#[UsesClass(Audio::class)]
3339
final class UserMessageNormalizerTest extends TestCase
3440
{
3541
#[Test]
@@ -62,22 +68,32 @@ public function normalizeTextContent(): void
6268
self::assertSame([['text' => 'Write a story about a magic backpack.']], $normalized);
6369
}
6470

71+
#[DataProvider('binaryContentProvider')]
6572
#[Test]
66-
public function normalizeImageContent(): void
73+
public function normalizeBinaryContent(File $content, string $expectedMimeType, string $expectedPrefix): void
6774
{
6875
$normalizer = new UserMessageNormalizer();
69-
$imageContent = Image::fromFile(\dirname(__DIR__, 6).'/fixtures/image.jpg');
70-
$message = new UserMessage(new Text('Tell me about this instrument'), $imageContent);
76+
$message = new UserMessage(new Text('Tell me about this instrument'), $content);
7177

7278
$normalized = $normalizer->normalize($message);
7379

7480
self::assertCount(2, $normalized);
7581
self::assertSame(['text' => 'Tell me about this instrument'], $normalized[0]);
7682
self::assertArrayHasKey('inline_data', $normalized[1]);
77-
self::assertSame('image/jpeg', $normalized[1]['inline_data']['mime_type']);
83+
self::assertSame($expectedMimeType, $normalized[1]['inline_data']['mime_type']);
7884
self::assertNotEmpty($normalized[1]['inline_data']['data']);
7985

80-
// Verify that the base64 data string starts correctly for a JPEG
81-
self::assertStringStartsWith('/9j/', $normalized[1]['inline_data']['data']);
86+
// Verify that the base64 data string starts correctly
87+
self::assertStringStartsWith($expectedPrefix, $normalized[1]['inline_data']['data']);
88+
}
89+
90+
/**
91+
* @return iterable<string, array{0: File, 1: string, 2: string}>
92+
*/
93+
public static function binaryContentProvider(): iterable
94+
{
95+
yield 'image' => [Image::fromFile(\dirname(__DIR__, 6).'/fixtures/image.jpg'), 'image/jpeg', '/9j/'];
96+
yield 'document' => [Document::fromFile(\dirname(__DIR__, 6).'/fixtures/document.pdf'), 'application/pdf', 'JVBE'];
97+
yield 'audio' => [Audio::fromFile(\dirname(__DIR__, 6).'/fixtures/audio.mp3'), 'audio/mpeg', 'SUQz'];
8298
}
8399
}

0 commit comments

Comments
 (0)