Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions docs/components/platform.rst
Original file line number Diff line number Diff line change
Expand Up @@ -501,6 +501,59 @@ This allows fast and isolated testing of AI-powered features without relying on

This requires `cURL` and the `ext-curl` extension to be installed.

Speech support
~~~~~~~~~~~~~~

Using speech to send messages / receive answers as audio is a common use case when integrating agents and/or chats.

Speech support can be enable using ``Symfony\AI\Platform\Speech\SpeechProviderListener``::

use Symfony\AI\Agent\Agent;
use Symfony\AI\Platform\Bridge\ElevenLabs\ElevenLabsSpeechProvider;
use Symfony\AI\Platform\Bridge\ElevenLabs\PlatformFactory;
use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory as OpenAiPlatformFactory;
use Symfony\AI\Platform\Message\Message;
use Symfony\AI\Platform\Message\MessageBag;
use Symfony\AI\Platform\Speech\SpeechConfiguration;
use Symfony\AI\Platform\Speech\SpeechProviderListener;
use Symfony\Component\EventDispatcher\EventDispatcher;

$eventDispatcher = new EventDispatcher();
$eventDispatcher->addSubscriber(new SpeechProviderListener([
new ElevenLabsSpeechProvider(PlatformFactory::create(
apiKey: $elevenLabsApiKey,
httpClient: http_client(),
speechConfiguration: new SpeechConfiguration(
ttsModel: 'eleven_multilingual_v2',
ttsVoice: 'Dslrhjl3ZpzrctukrQSN', // Brad (https://elevenlabs.io/app/voice-library?voiceId=Dslrhjl3ZpzrctukrQSN)
sttModel: 'eleven_multilingual_v2'
)),
),
], []));

$platform = OpenAiPlatformFactory::create($openAiApiKey, httpClient: HttpClient::create(), eventDispatcher: $eventDispatcher);

$agent = new Agent($platform, 'gpt-4o');
$answer = $agent->call(new MessageBag(
Message::ofUser('Tina has one brother and one sister. How many sisters do Tina\'s siblings have?'),
));

echo $answer->getSpeech('eleven_labs')->asBinary();

When using the bundle, the configuration allows to configure models and voices::

ai:
platform:
eleven_labs:
api_key: '%env(ELEVEN_LABS_API_KEY)%'

speech:
eleven_labs:
tts_model: 'eleven_multilingual_v2'
tts_voice: '%env(ELEVEN_LABS_VOICE_IDENTIFIER)%'
tts_extra_options:
foo: bar

Code Examples
~~~~~~~~~~~~~

Expand Down
10 changes: 10 additions & 0 deletions examples/speech/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Speech Examples

Speech is mainly used to transform text to audio and vice versa, it can also be used to create an audio to audio pipeline.

To run the examples, you can use additional tools like (mpg123)[https://www.mpg123.de/]:

```bash
php speech/agent-eleven-labs-speech-tts.php | mpg123 -
php speech/agent-eleven-labs-speech-sts.php | mpg123 -
```
54 changes: 54 additions & 0 deletions examples/speech/agent-eleven-labs-speech-sts.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
<?php

/*
* This file is part of the Symfony package.
*
* (c) Fabien Potencier <[email protected]>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/

use Symfony\AI\Agent\Agent;
use Symfony\AI\Platform\Bridge\ElevenLabs\ElevenLabsSpeechListener;
use Symfony\AI\Platform\Bridge\ElevenLabs\ElevenLabsSpeechProvider;
use Symfony\AI\Platform\Bridge\ElevenLabs\PlatformFactory;
use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory as OpenAiPlatformFactory;
use Symfony\AI\Platform\Message\Content\Audio;
use Symfony\AI\Platform\Message\Message;
use Symfony\AI\Platform\Message\MessageBag;
use Symfony\AI\Platform\Speech\SpeechConfiguration;
use Symfony\AI\Platform\Speech\SpeechProviderListener;
use Symfony\Component\EventDispatcher\EventDispatcher;

require_once dirname(__DIR__).'/bootstrap.php';

$eventDispatcher = new EventDispatcher();
$eventDispatcher->addSubscriber(new SpeechProviderListener([
new ElevenLabsSpeechProvider(PlatformFactory::create(
apiKey: env('ELEVEN_LABS_API_KEY'),
httpClient: http_client(),
speechConfiguration: new SpeechConfiguration(
ttsModel: 'eleven_multilingual_v2',
ttsVoice: 'Dslrhjl3ZpzrctukrQSN', // Brad (https://elevenlabs.io/app/voice-library?voiceId=Dslrhjl3ZpzrctukrQSN)
sttModel: 'eleven_multilingual_v2'
)),
),
], [
new ElevenLabsSpeechListener(PlatformFactory::create(
apiKey: env('ELEVEN_LABS_API_KEY'),
httpClient: http_client(),
speechConfiguration: new SpeechConfiguration(
sttModel: 'scribe_v1'
)),
),
]));

$platform = OpenAiPlatformFactory::create(env('OPENAI_API_KEY'), httpClient: http_client(), eventDispatcher: $eventDispatcher);

$agent = new Agent($platform, 'gpt-4o');
$answer = $agent->call(new MessageBag(
Message::ofUser(Audio::fromFile(dirname(__DIR__, 2).'/fixtures/audio.mp3'))
));

echo $answer->getSpeech('eleven_labs')->asBinary();
43 changes: 43 additions & 0 deletions examples/speech/agent-eleven-labs-speech-stt.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
<?php

/*
* This file is part of the Symfony package.
*
* (c) Fabien Potencier <[email protected]>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/

use Symfony\AI\Agent\Agent;
use Symfony\AI\Platform\Bridge\ElevenLabs\ElevenLabsSpeechListener;
use Symfony\AI\Platform\Bridge\ElevenLabs\PlatformFactory;
use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory as OpenAiPlatformFactory;
use Symfony\AI\Platform\Message\Content\Audio;
use Symfony\AI\Platform\Message\Message;
use Symfony\AI\Platform\Message\MessageBag;
use Symfony\AI\Platform\Speech\SpeechConfiguration;
use Symfony\AI\Platform\Speech\SpeechProviderListener;
use Symfony\Component\EventDispatcher\EventDispatcher;

require_once dirname(__DIR__).'/bootstrap.php';

$eventDispatcher = new EventDispatcher();
$eventDispatcher->addSubscriber(new SpeechProviderListener([], [
new ElevenLabsSpeechListener(PlatformFactory::create(
apiKey: env('ELEVEN_LABS_API_KEY'),
httpClient: http_client(),
speechConfiguration: new SpeechConfiguration(
sttModel: 'scribe_v1'
)),
),
]));

$platform = OpenAiPlatformFactory::create(env('OPENAI_API_KEY'), httpClient: http_client(), eventDispatcher: $eventDispatcher);

$agent = new Agent($platform, 'gpt-4o');
$answer = $agent->call(new MessageBag(
Message::ofUser(Audio::fromFile(dirname(__DIR__, 2).'/fixtures/audio.mp3'))
));

echo $answer->getContent();
44 changes: 44 additions & 0 deletions examples/speech/agent-eleven-labs-speech-tts.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
<?php

/*
* This file is part of the Symfony package.
*
* (c) Fabien Potencier <[email protected]>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/

use Symfony\AI\Agent\Agent;
use Symfony\AI\Platform\Bridge\ElevenLabs\ElevenLabsSpeechProvider;
use Symfony\AI\Platform\Bridge\ElevenLabs\PlatformFactory;
use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory as OpenAiPlatformFactory;
use Symfony\AI\Platform\Message\Message;
use Symfony\AI\Platform\Message\MessageBag;
use Symfony\AI\Platform\Speech\SpeechConfiguration;
use Symfony\AI\Platform\Speech\SpeechProviderListener;
use Symfony\Component\EventDispatcher\EventDispatcher;

require_once dirname(__DIR__).'/bootstrap.php';

$eventDispatcher = new EventDispatcher();
$eventDispatcher->addSubscriber(new SpeechProviderListener([
new ElevenLabsSpeechProvider(PlatformFactory::create(
apiKey: env('ELEVEN_LABS_API_KEY'),
httpClient: http_client(),
speechConfiguration: new SpeechConfiguration(
ttsModel: 'eleven_multilingual_v2',
ttsVoice: 'Dslrhjl3ZpzrctukrQSN', // Brad (https://elevenlabs.io/app/voice-library?voiceId=Dslrhjl3ZpzrctukrQSN)
sttModel: 'eleven_multilingual_v2'
)),
),
], []));

$platform = OpenAiPlatformFactory::create(env('OPENAI_API_KEY'), httpClient: http_client(), eventDispatcher: $eventDispatcher);

$agent = new Agent($platform, 'gpt-4o');
$answer = $agent->call(new MessageBag(
Message::ofUser('Tina has one brother and one sister. How many sisters do Tina\'s siblings have?'),
));

echo $answer->getSpeech('eleven_labs')->asBinary();
4 changes: 2 additions & 2 deletions src/agent/src/Agent.php
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ public function getName(): string
public function call(MessageBag $messages, array $options = []): ResultInterface
{
$input = new Input($this->getModel(), $messages, $options);
array_map(fn (InputProcessorInterface $processor) => $processor->processInput($input), $this->inputProcessors);
array_map(static fn (InputProcessorInterface $processor) => $processor->processInput($input), $this->inputProcessors);

$model = $input->getModel();
$messages = $input->getMessageBag();
Expand All @@ -78,7 +78,7 @@ public function call(MessageBag $messages, array $options = []): ResultInterface
$result = $this->platform->invoke($model, $messages, $options)->getResult();

$output = new Output($model, $result, $messages, $options);
array_map(fn (OutputProcessorInterface $processor) => $processor->processOutput($output), $this->outputProcessors);
array_map(static fn (OutputProcessorInterface $processor) => $processor->processOutput($output), $this->outputProcessors);

return $output->getResult();
}
Expand Down
12 changes: 12 additions & 0 deletions src/agent/src/Output.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

use Symfony\AI\Platform\Message\MessageBag;
use Symfony\AI\Platform\Result\ResultInterface;
use Symfony\AI\Platform\Speech\Speech;

/**
* @author Christopher Hertel <[email protected]>
Expand All @@ -27,6 +28,7 @@ public function __construct(
private ResultInterface $result,
private readonly MessageBag $messageBag,
private readonly array $options = [],
private ?Speech $speech = null,
) {
}

Expand Down Expand Up @@ -57,4 +59,14 @@ public function getOptions(): array
{
return $this->options;
}

public function setSpeech(?Speech $speech): void
{
$this->speech = $speech;
}

public function getSpeech(): ?Speech
{
return $this->speech;
}
}
16 changes: 16 additions & 0 deletions src/ai-bundle/config/options.php
Original file line number Diff line number Diff line change
Expand Up @@ -960,6 +960,22 @@
->end()
->end()
->end()
->arrayNode('speech')
->children()
->arrayNode('eleven_labs')
->useAttributeAsKey('name')
->arrayPrototype()
->children()
->stringNode('tts_model')->end()
->stringNode('tts_voice')->end()
->arrayNode('tts_extra_options')->end()
->stringNode('stt_model')->end()
->arrayNode('stt_extra_options')->end()
->end()
->end()
->end()
->end()
->end()
->arrayNode('vectorizer')
->info('Vectorizers for converting strings to Vector objects and transforming TextDocument arrays to VectorDocument arrays')
->useAttributeAsKey('name')
Expand Down
9 changes: 9 additions & 0 deletions src/ai-bundle/config/services.php
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
use Symfony\AI\Platform\Contract\JsonSchema\DescriptionParser;
use Symfony\AI\Platform\Contract\JsonSchema\Factory as SchemaFactory;
use Symfony\AI\Platform\Serializer\StructuredOutputSerializer;
use Symfony\AI\Platform\Speech\SpeechProviderListener;
use Symfony\AI\Platform\StructuredOutput\PlatformSubscriber;
use Symfony\AI\Platform\StructuredOutput\ResponseFormatFactory;
use Symfony\AI\Platform\StructuredOutput\ResponseFormatFactoryInterface;
Expand Down Expand Up @@ -235,5 +236,13 @@
tagged_locator('ai.message_store', 'name'),
])
->tag('console.command')

// listeners
->set('ai.speech_provider.listener', SpeechProviderListener::class)
->args([
tagged_locator('ai.speech_provider', 'name'),
tagged_locator('ai.speech_listener', 'name'),
])
->tag('kernel.event_subscriber')
;
};
Loading
Loading