ref

Guikingone · Guikingone · commit b319521d87c0 · 2025-11-25T13:49:34.000+01:00
diff --git a/examples/speech/README.md b/examples/speech/README.md
@@ -0,0 +1,10 @@
+# Speech Examples
+
+Speech is mainly used to transform text to audio and vice versa, it can also be used to create an audio to audio pipeline.
+
+To run the examples, you can use additional tools like (mpg123)[https://www.mpg123.de/]:
+
+```bash
+php speech/agent-eleven-labs-speech-tts.php | mpg123 -
+php speech/agent-eleven-labs-speech-sts.php | mpg123 -
+```
diff --git a/examples/speech/agent-eleven-labs-speech-sts.php b/examples/speech/agent-eleven-labs-speech-sts.php
@@ -0,0 +1,54 @@
+<?php
+
+/*
+ * This file is part of the Symfony package.
+ *
+ * (c) Fabien Potencier <fabien@symfony.com>
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+use Symfony\AI\Agent\Agent;
+use Symfony\AI\Platform\Bridge\ElevenLabs\ElevenLabsSpeechListener;
+use Symfony\AI\Platform\Bridge\ElevenLabs\ElevenLabsSpeechProvider;
+use Symfony\AI\Platform\Bridge\ElevenLabs\PlatformFactory;
+use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory as OpenAiPlatformFactory;
+use Symfony\AI\Platform\Message\Content\Audio;
+use Symfony\AI\Platform\Message\Message;
+use Symfony\AI\Platform\Message\MessageBag;
+use Symfony\AI\Platform\Speech\SpeechConfiguration;
+use Symfony\AI\Platform\Speech\SpeechProviderListener;
+use Symfony\Component\EventDispatcher\EventDispatcher;
+
+require_once dirname(__DIR__).'/bootstrap.php';
+
+$eventDispatcher = new EventDispatcher();
+$eventDispatcher->addSubscriber(new SpeechProviderListener([
+    new ElevenLabsSpeechProvider(PlatformFactory::create(
+        apiKey: env('ELEVEN_LABS_API_KEY'),
+        httpClient: http_client(),
+        speechConfiguration: new SpeechConfiguration(
+            ttsModel: 'eleven_multilingual_v2',
+            ttsVoice: 'Dslrhjl3ZpzrctukrQSN', // Brad (https://elevenlabs.io/app/voice-library?voiceId=Dslrhjl3ZpzrctukrQSN)
+            sttModel: 'eleven_multilingual_v2'
+        )),
+    ),
+], [
+    new ElevenLabsSpeechListener(PlatformFactory::create(
+        apiKey: env('ELEVEN_LABS_API_KEY'),
+        httpClient: http_client(),
+        speechConfiguration: new SpeechConfiguration(
+            sttModel: 'scribe_v1'
+        )),
+    ),
+]));
+
+$platform = OpenAiPlatformFactory::create(env('OPENAI_API_KEY'), httpClient: http_client(), eventDispatcher: $eventDispatcher);
+
+$agent = new Agent($platform, 'gpt-4o');
+$answer = $agent->call(new MessageBag(
+    Message::ofUser(Audio::fromFile(dirname(__DIR__, 2).'/fixtures/audio.mp3'))
+));
+
+echo $answer->getSpeech()->asBinary();
diff --git a/examples/speech/agent-eleven-labs-speech-stt.php b/examples/speech/agent-eleven-labs-speech-stt.php
@@ -0,0 +1,43 @@
+<?php
+
+/*
+ * This file is part of the Symfony package.
+ *
+ * (c) Fabien Potencier <fabien@symfony.com>
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+use Symfony\AI\Agent\Agent;
+use Symfony\AI\Platform\Bridge\ElevenLabs\ElevenLabsSpeechListener;
+use Symfony\AI\Platform\Bridge\ElevenLabs\PlatformFactory;
+use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory as OpenAiPlatformFactory;
+use Symfony\AI\Platform\Message\Content\Audio;
+use Symfony\AI\Platform\Message\Message;
+use Symfony\AI\Platform\Message\MessageBag;
+use Symfony\AI\Platform\Speech\SpeechConfiguration;
+use Symfony\AI\Platform\Speech\SpeechProviderListener;
+use Symfony\Component\EventDispatcher\EventDispatcher;
+
+require_once dirname(__DIR__).'/bootstrap.php';
+
+$eventDispatcher = new EventDispatcher();
+$eventDispatcher->addSubscriber(new SpeechProviderListener([], [
+    new ElevenLabsSpeechListener(PlatformFactory::create(
+        apiKey: env('ELEVEN_LABS_API_KEY'),
+        httpClient: http_client(),
+        speechConfiguration: new SpeechConfiguration(
+            sttModel: 'scribe_v1'
+        )),
+    ),
+]));
+
+$platform = OpenAiPlatformFactory::create(env('OPENAI_API_KEY'), httpClient: http_client(), eventDispatcher: $eventDispatcher);
+
+$agent = new Agent($platform, 'gpt-4o');
+$answer = $agent->call(new MessageBag(
+    Message::ofUser(Audio::fromFile(dirname(__DIR__, 2).'/fixtures/audio.mp3'))
+));
+
+echo $answer->getContent();
diff --git a/examples/speech/agent-eleven-labs-speech-tts.php b/examples/speech/agent-eleven-labs-speech-tts.php
@@ -27,9 +27,9 @@
         apiKey: env('ELEVEN_LABS_API_KEY'),
         httpClient: http_client(),
         speechConfiguration: new SpeechConfiguration(
-            'eleven_multilingual_v2',
-            'Dslrhjl3ZpzrctukrQSN', // Brad (https://elevenlabs.io/app/voice-library?voiceId=Dslrhjl3ZpzrctukrQSN)
-            'eleven_multilingual_v2'
+            ttsModel: 'eleven_multilingual_v2',
+            ttsVoice: 'Dslrhjl3ZpzrctukrQSN', // Brad (https://elevenlabs.io/app/voice-library?voiceId=Dslrhjl3ZpzrctukrQSN)
+            sttModel: 'eleven_multilingual_v2'
         )),
     ),
 ], []));
diff --git a/src/platform/src/Bridge/ElevenLabs/ElevenLabsSpeechListener.php b/src/platform/src/Bridge/ElevenLabs/ElevenLabsSpeechListener.php
@@ -12,23 +12,30 @@
 namespace Symfony\AI\Platform\Bridge\ElevenLabs;
 
 use Symfony\AI\Platform\Capability;
+use Symfony\AI\Platform\Message\Content\Text;
+use Symfony\AI\Platform\Message\MessageBag;
 use Symfony\AI\Platform\Platform;
 use Symfony\AI\Platform\Speech\SpeechListenerInterface;
 
+/**
+ * @author Guillaume Loulier <personal@guillaumeloulier.fr>
+ */
 final class ElevenLabsSpeechListener implements SpeechListenerInterface
 {
     public function __construct(
         private readonly Platform $platform,
     ) {
     }
 
-    public function listen(object|array|string $input, array $options): string
+    public function listen(object|array|string $input, array $options): Text
     {
         $speechConfiguration = $this->platform->getSpeechConfiguration();
 
+        $input = ($input instanceof MessageBag && $input->containsAudio()) ? $input->getUserMessage()->getAudioContent() : $input;
+
         $result = $this->platform->invoke($speechConfiguration->sttModel, $input, $options);
 
-        return $result->asText();
+        return new Text($result->asText());
     }
 
     public function support(object|array|string $input, array $options): bool
diff --git a/src/platform/src/Message/UserMessage.php b/src/platform/src/Message/UserMessage.php
@@ -11,6 +11,7 @@
 
 namespace Symfony\AI\Platform\Message;
 
+use Symfony\AI\Platform\Exception\RuntimeException;
 use Symfony\AI\Platform\Message\Content\Audio;
 use Symfony\AI\Platform\Message\Content\ContentInterface;
 use Symfony\AI\Platform\Message\Content\Image;
@@ -71,6 +72,19 @@ public function hasAudioContent(): bool
         return false;
     }
 
+    public function getAudioContent(): ?Audio
+    {
+        foreach ($this->content as $content) {
+            if (!$content instanceof Audio) {
+                continue;
+            }
+
+            return $content;
+        }
+
+        throw new RuntimeException('No Audio content found.');
+    }
+
     public function hasImageContent(): bool
     {
         foreach ($this->content as $content) {
diff --git a/src/platform/src/Speech/SpeechListenerInterface.php b/src/platform/src/Speech/SpeechListenerInterface.php
@@ -11,12 +11,14 @@
 
 namespace Symfony\AI\Platform\Speech;
 
+use Symfony\AI\Platform\Message\Content\Text;
+
 /**
  * @author Guillaume Loulier <personal@guillaumeloulier.fr>
  */
 interface SpeechListenerInterface
 {
-    public function listen(array|string|object $input, array $options): string;
+    public function listen(array|string|object $input, array $options): Text;
 
     public function support(array|string|object $input, array $options): bool;
 }
diff --git a/src/platform/src/Speech/SpeechProviderListener.php b/src/platform/src/Speech/SpeechProviderListener.php
@@ -13,6 +13,8 @@
 
 use Symfony\AI\Platform\Event\InvocationEvent;
 use Symfony\AI\Platform\Event\ResultEvent;
+use Symfony\AI\Platform\Message\Message;
+use Symfony\AI\Platform\Message\MessageBag;
 use Symfony\Component\EventDispatcher\EventSubscriberInterface;
 
 /**
@@ -33,7 +35,7 @@ public function __construct(
     public static function getSubscribedEvents(): array
     {
         return [
-            InvocationEvent::class => 'onInvocation',
+            InvocationEvent::class => ['onInvocation', 255],
             ResultEvent::class => 'onResult',
         ];
     }
@@ -48,7 +50,15 @@ public function onInvocation(InvocationEvent $event): void
                 continue;
             }
 
-            $event->setInput($speechListener->listen($input, $options));
+            $overriddenInput = $speechListener->listen($input, $options);
+
+            if (!$input instanceof MessageBag) {
+                $event->setInput($overriddenInput);
+            }
+
+            $event->setInput(new MessageBag(
+                Message::ofUser($overriddenInput),
+            ));
         }
     }
 

Original file line number	Diff line number	Diff line change
`@@ -11,12 +11,14 @@`
`11`	`11`
`12`	`12`	`namespace Symfony\AI\Platform\Speech;`
`13`	`13`
	`14`	`+use Symfony\AI\Platform\Message\Content\Text;`
	`15`	`+`
`14`	`16`	`/**`
`15`	`17`	`* @author Guillaume Loulier <personal@guillaumeloulier.fr>`
`16`	`18`	`*/`
`17`	`19`	`interface SpeechListenerInterface`
`18`	`20`	`{`
`19`		`- public function listen(array\|string\|object $input, array $options): string;`
	`21`	`+ public function listen(array\|string\|object $input, array $options): Text;`
`20`	`22`
`21`	`23`	`public function support(array\|string\|object $input, array $options): bool;`
`22`	`24`	`}`
Original file line number	Diff line number	Diff line change
`@@ -13,6 +13,8 @@`
`13`	`13`
`14`	`14`	`use Symfony\AI\Platform\Event\InvocationEvent;`
`15`	`15`	`use Symfony\AI\Platform\Event\ResultEvent;`
	`16`	`+use Symfony\AI\Platform\Message\Message;`
	`17`	`+use Symfony\AI\Platform\Message\MessageBag;`
`16`	`18`	`use Symfony\Component\EventDispatcher\EventSubscriberInterface;`
`17`	`19`
`18`	`20`	`/**`
`@@ -33,7 +35,7 @@ public function __construct(`
`33`	`35`	`public static function getSubscribedEvents(): array`
`34`	`36`	`{`
`35`	`37`	`return [`
`36`		`- InvocationEvent::class => 'onInvocation',`
	`38`	`+ InvocationEvent::class => ['onInvocation', 255],`
`37`	`39`	`ResultEvent::class => 'onResult',`
`38`	`40`	`];`
`39`	`41`	`}`
`@@ -48,7 +50,15 @@ public function onInvocation(InvocationEvent $event): void`
`48`	`50`	`continue;`
`49`	`51`	`}`
`50`	`52`
`51`		`- $event->setInput($speechListener->listen($input, $options));`
	`53`	`+ $overriddenInput = $speechListener->listen($input, $options);`
	`54`	`+`
	`55`	`+ if (!$input instanceof MessageBag) {`
	`56`	`+ $event->setInput($overriddenInput);`
	`57`	`+ }`
	`58`	`+`
	`59`	`+ $event->setInput(new MessageBag(`
	`60`	`+ Message::ofUser($overriddenInput),`
	`61`	`+ ));`
`52`	`62`	`}`
`53`	`63`	`}`
`54`	`64`