Skip to content

Commit a4821e8

Browse files
committed
feat(platform): add Speech
1 parent c131b97 commit a4821e8

32 files changed

+1047
-17
lines changed

demo/tests/Blog/Command/StreamCommandTest.php

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,13 @@
1616
use Symfony\AI\Agent\AgentInterface;
1717
use Symfony\AI\Platform\Message\MessageBag;
1818
use Symfony\AI\Platform\Metadata\Metadata;
19+
use Symfony\AI\Platform\Result\DeferredResult;
20+
use Symfony\AI\Platform\Result\InMemoryRawResult;
1921
use Symfony\AI\Platform\Result\RawResultInterface;
2022
use Symfony\AI\Platform\Result\ResultInterface;
23+
use Symfony\AI\Platform\Result\TextResult;
24+
use Symfony\AI\Platform\Speech\Speech;
25+
use Symfony\AI\Platform\Test\PlainConverter;
2126
use Symfony\Component\Console\Input\ArrayInput;
2227
use Symfony\Component\Console\Output\BufferedOutput;
2328
use Symfony\Component\Console\Style\SymfonyStyle;
@@ -52,6 +57,15 @@ public function getRawResult(): ?RawResultInterface
5257
public function setRawResult(RawResultInterface $rawResult): void
5358
{
5459
}
60+
61+
public function addSpeech(Speech $speech): void
62+
{
63+
}
64+
65+
public function getSpeech(string $identifier): Speech
66+
{
67+
return new Speech([], new DeferredResult(new PlainConverter(new TextResult('foo')), new InMemoryRawResult()), 'bar');
68+
}
5569
});
5670

5771
$input = new ArrayInput([]);

docs/components/platform.rst

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -532,6 +532,64 @@ This allows fast and isolated testing of AI-powered features without relying on
532532

533533
This requires `cURL` and the `ext-curl` extension to be installed.
534534

535+
Speech support
536+
~~~~~~~~~~~~~~
537+
538+
Using speech to send messages / receive answers as audio is a common use case when integrating agents and/or chats.
539+
540+
Speech support can be enable using ``Symfony\AI\Platform\Speech\SpeechProviderListener`` (for `tts` in this example)::
541+
542+
use Symfony\AI\Agent\Agent;
543+
use Symfony\AI\Platform\Bridge\ElevenLabs\ElevenLabsSpeechProvider;
544+
use Symfony\AI\Platform\Bridge\ElevenLabs\PlatformFactory;
545+
use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory as OpenAiPlatformFactory;
546+
use Symfony\AI\Platform\Message\Message;
547+
use Symfony\AI\Platform\Message\MessageBag;
548+
use Symfony\AI\Platform\Speech\SpeechConfiguration;
549+
use Symfony\AI\Platform\Speech\SpeechProviderListener;
550+
use Symfony\Component\EventDispatcher\EventDispatcher;
551+
552+
$elevenLabsPlatform = new ElevenLabsSpeechPlatform(
553+
PlatformFactory::create(
554+
apiKey: env('ELEVEN_LABS_API_KEY'),
555+
httpClient: http_client(),
556+
),
557+
[
558+
'ttsModel' => 'eleven_multilingual_v2',
559+
'ttsVoice' => 'Dslrhjl3ZpzrctukrQSN', // Brad (https://elevenlabs.io/app/voice-library?voiceId=Dslrhjl3ZpzrctukrQSN)
560+
],
561+
);
562+
563+
$eventDispatcher = new EventDispatcher();
564+
$eventDispatcher->addSubscriber(new SpeechListener([
565+
$elevenLabsPlatform,
566+
]));
567+
568+
$platform = OpenAiPlatformFactory::create($openAiApiKey, httpClient: HttpClient::create(), eventDispatcher: $eventDispatcher);
569+
570+
$agent = new Agent($platform, 'gpt-4o');
571+
$answer = $agent->call(new MessageBag(
572+
Message::ofUser('Tina has one brother and one sister. How many sisters do Tina\'s siblings have?'),
573+
));
574+
575+
echo $answer->getSpeech('elevenlabs')->asBinary();
576+
577+
When using the bundle, the configuration allows to configure models and voices::
578+
579+
ai:
580+
platform:
581+
elevenlabs:
582+
api_key: '%env(ELEVEN_LABS_API_KEY)%'
583+
speech:
584+
tts_model: 'eleven_multilingual_v2'
585+
tts_voice: '%env(ELEVEN_LABS_VOICE_IDENTIFIER)%'
586+
tts_extra_options:
587+
foo: bar
588+
589+
.. note::
590+
591+
Please be aware that enabling speech support requires to define corresponding platforms.
592+
535593
Code Examples
536594
~~~~~~~~~~~~~
537595

examples/speech/README.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# Speech Examples
2+
3+
Speech is mainly used to transform text to audio and vice versa, it can also be used to create an audio to audio pipeline.
4+
5+
To run the examples, you can use additional tools like (mpg123)[https://www.mpg123.de/]:
6+
7+
```bash
8+
php speech/agent-eleven-labs-speech-tts.php | mpg123 -
9+
php speech/agent-eleven-labs-speech-sts.php | mpg123 -
10+
```
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
use Symfony\AI\Agent\Agent;
13+
use Symfony\AI\Platform\Bridge\ElevenLabs\ElevenLabsSpeechPlatform;
14+
use Symfony\AI\Platform\Bridge\ElevenLabs\PlatformFactory;
15+
use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory as OpenAiPlatformFactory;
16+
use Symfony\AI\Platform\Message\Content\Audio;
17+
use Symfony\AI\Platform\Message\Message;
18+
use Symfony\AI\Platform\Message\MessageBag;
19+
use Symfony\AI\Platform\Speech\SpeechListener;
20+
use Symfony\Component\EventDispatcher\EventDispatcher;
21+
22+
require_once dirname(__DIR__).'/bootstrap.php';
23+
24+
$elevenLabsPlatform = new ElevenLabsSpeechPlatform(
25+
PlatformFactory::create(
26+
apiKey: env('ELEVEN_LABS_API_KEY'),
27+
httpClient: http_client(),
28+
),
29+
[
30+
'ttsModel' => 'eleven_multilingual_v2',
31+
'ttsVoice' => 'Dslrhjl3ZpzrctukrQSN', // Brad (https://elevenlabs.io/app/voice-library?voiceId=Dslrhjl3ZpzrctukrQSN)
32+
'sttModel' => 'eleven_multilingual_v2',
33+
],
34+
);
35+
36+
$eventDispatcher = new EventDispatcher();
37+
$eventDispatcher->addSubscriber(new SpeechListener([
38+
$elevenLabsPlatform,
39+
]));
40+
41+
$platform = OpenAiPlatformFactory::create(env('OPENAI_API_KEY'), httpClient: http_client(), eventDispatcher: $eventDispatcher);
42+
43+
$agent = new Agent($platform, 'gpt-4o');
44+
$answer = $agent->call(new MessageBag(
45+
Message::ofUser(Audio::fromFile(dirname(__DIR__, 2).'/fixtures/audio.mp3'))
46+
));
47+
48+
echo $answer->getSpeech('elevenlabs')->asBinary();
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
use Symfony\AI\Agent\Agent;
13+
use Symfony\AI\Platform\Bridge\ElevenLabs\ElevenLabsSpeechPlatform;
14+
use Symfony\AI\Platform\Bridge\ElevenLabs\PlatformFactory;
15+
use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory as OpenAiPlatformFactory;
16+
use Symfony\AI\Platform\Message\Content\Audio;
17+
use Symfony\AI\Platform\Message\Message;
18+
use Symfony\AI\Platform\Message\MessageBag;
19+
use Symfony\AI\Platform\Speech\SpeechListener;
20+
use Symfony\Component\EventDispatcher\EventDispatcher;
21+
22+
require_once dirname(__DIR__).'/bootstrap.php';
23+
24+
$eventDispatcher = new EventDispatcher();
25+
$eventDispatcher->addSubscriber(new SpeechListener([
26+
new ElevenLabsSpeechPlatform(
27+
PlatformFactory::create(
28+
apiKey: env('ELEVEN_LABS_API_KEY'),
29+
httpClient: http_client(),
30+
),
31+
[
32+
'sttModel' => 'eleven_multilingual_v2',
33+
],
34+
),
35+
]));
36+
37+
$platform = OpenAiPlatformFactory::create(env('OPENAI_API_KEY'), httpClient: http_client(), eventDispatcher: $eventDispatcher);
38+
39+
$agent = new Agent($platform, 'gpt-4o');
40+
$answer = $agent->call(new MessageBag(
41+
Message::ofUser(Audio::fromFile(dirname(__DIR__, 2).'/fixtures/audio.mp3'))
42+
));
43+
44+
echo $answer->getContent();
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
use Symfony\AI\Agent\Agent;
13+
use Symfony\AI\Platform\Bridge\ElevenLabs\ElevenLabsSpeechPlatform;
14+
use Symfony\AI\Platform\Bridge\ElevenLabs\PlatformFactory;
15+
use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory as OpenAiPlatformFactory;
16+
use Symfony\AI\Platform\Message\Message;
17+
use Symfony\AI\Platform\Message\MessageBag;
18+
use Symfony\AI\Platform\Speech\SpeechListener;
19+
use Symfony\Component\EventDispatcher\EventDispatcher;
20+
21+
require_once dirname(__DIR__).'/bootstrap.php';
22+
23+
$elevenLabsPlatform = new ElevenLabsSpeechPlatform(
24+
PlatformFactory::create(
25+
apiKey: env('ELEVEN_LABS_API_KEY'),
26+
httpClient: http_client(),
27+
),
28+
[
29+
'ttsModel' => 'eleven_multilingual_v2',
30+
'ttsVoice' => 'Dslrhjl3ZpzrctukrQSN', // Brad (https://elevenlabs.io/app/voice-library?voiceId=Dslrhjl3ZpzrctukrQSN)
31+
],
32+
);
33+
34+
$eventDispatcher = new EventDispatcher();
35+
$eventDispatcher->addSubscriber(new SpeechListener([
36+
$elevenLabsPlatform,
37+
]));
38+
39+
$platform = OpenAiPlatformFactory::create(env('OPENAI_API_KEY'), httpClient: http_client(), eventDispatcher: $eventDispatcher);
40+
41+
$agent = new Agent($platform, 'gpt-4o');
42+
$answer = $agent->call(new MessageBag(
43+
Message::ofUser('Tina has one brother and one sister. How many sisters do Tina\'s siblings have?'),
44+
));
45+
46+
echo $answer->getSpeech('elevenlabs')->asBinary();

src/agent/src/Output.php

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
use Symfony\AI\Platform\Message\MessageBag;
1515
use Symfony\AI\Platform\Result\ResultInterface;
16+
use Symfony\AI\Platform\Speech\Speech;
1617

1718
/**
1819
* @author Christopher Hertel <[email protected]>
@@ -27,6 +28,7 @@ public function __construct(
2728
private ResultInterface $result,
2829
private readonly MessageBag $messageBag,
2930
private readonly array $options = [],
31+
private ?Speech $speech = null,
3032
) {
3133
}
3234

@@ -57,4 +59,14 @@ public function getOptions(): array
5759
{
5860
return $this->options;
5961
}
62+
63+
public function setSpeech(?Speech $speech): void
64+
{
65+
$this->speech = $speech;
66+
}
67+
68+
public function getSpeech(): ?Speech
69+
{
70+
return $this->speech;
71+
}
6072
}

src/ai-bundle/CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,4 @@ CHANGELOG
3434
- Token usage metadata in agent results including prompt, completion, total, cached, and thinking tokens
3535
- Rate limit information tracking for supported platforms
3636
* Add support for configuring chats and message stores
37+
* Add support for configuring speeches

src/ai-bundle/config/options.php

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,23 @@
113113
->booleanNode('api_catalog')
114114
->info('If set, the ElevenLabs API will be used to build the catalog and retrieve models information, using this option leads to additional HTTP calls')
115115
->end()
116+
->arrayNode('speech')
117+
->children()
118+
->stringNode('tts_model')->end()
119+
->stringNode('tts_voice')->end()
120+
->arrayNode('tts_options')
121+
->scalarPrototype()
122+
->defaultValue([])
123+
->end()
124+
->end()
125+
->stringNode('stt_model')->end()
126+
->arrayNode('stt_options')
127+
->scalarPrototype()
128+
->defaultValue([])
129+
->end()
130+
->end()
131+
->end()
132+
->end()
116133
->end()
117134
->end()
118135
->arrayNode('gemini')

src/ai-bundle/config/services.php

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@
6464
use Symfony\AI\Platform\Message\TemplateRenderer\TemplateRendererRegistry;
6565
use Symfony\AI\Platform\ModelCatalog\ModelCatalogInterface;
6666
use Symfony\AI\Platform\Serializer\StructuredOutputSerializer;
67+
use Symfony\AI\Platform\Speech\SpeechListener;
6768
use Symfony\AI\Platform\StructuredOutput\PlatformSubscriber;
6869
use Symfony\AI\Platform\StructuredOutput\ResponseFormatFactory;
6970
use Symfony\AI\Platform\StructuredOutput\ResponseFormatFactoryInterface;
@@ -263,5 +264,12 @@
263264
tagged_locator('ai.message_store', 'name'),
264265
])
265266
->tag('console.command')
267+
268+
// listeners
269+
->set('ai.speech.listener', SpeechListener::class)
270+
->args([
271+
tagged_iterator('ai.platform.speech', 'name'),
272+
])
273+
->tag('kernel.event_subscriber')
266274
;
267275
};

0 commit comments

Comments
 (0)