symfony
diff --git a/‎docs/bundles/ai-bundle.rst‎
Lines changed: 13 additions & 0 deletions b/‎docs/bundles/ai-bundle.rst‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎docs/components/agent.rst‎
Lines changed: 110 additions & 0 deletions b/‎docs/components/agent.rst‎
Lines changed: 110 additions & 0 deletions
diff --git a/‎docs/components/platform.rst‎
Lines changed: 0 additions & 86 deletions b/‎docs/components/platform.rst‎
Lines changed: 0 additions & 86 deletions
diff --git a/‎examples/speech/agent-eleven-labs-speech-sts.php‎
Lines changed: 13 additions & 18 deletions b/‎examples/speech/agent-eleven-labs-speech-sts.php‎
Lines changed: 13 additions & 18 deletions
diff --git a/‎examples/speech/agent-eleven-labs-speech-stt.php‎
Lines changed: 8 additions & 14 deletions b/‎examples/speech/agent-eleven-labs-speech-stt.php‎
Lines changed: 8 additions & 14 deletions
diff --git a/‎examples/speech/agent-eleven-labs-speech-tts.php‎
Lines changed: 10 additions & 17 deletions b/‎examples/speech/agent-eleven-labs-speech-tts.php‎
Lines changed: 10 additions & 17 deletions
diff --git a/‎src/agent/composer.json‎
Lines changed: 1 addition & 1 deletion b/‎src/agent/composer.json‎
Lines changed: 1 addition & 1 deletion
@@ -1189,12 +1189,25 @@ When using the bundle, the configuration allows to configure models and voices::
         platform:
             elevenlabs:
                 api_key: '%env(ELEVEN_LABS_API_KEY)%'
+            openai:
+                api_key: '%env(OPENAI_API_KEY)%'
+
+        agent:
+            assistant_vocal:
+                platform: ai.platform.openai
+                model: gpt-4o
                 speech:
+                    platform: 'ai.platform.elevenlabs'
                     tts_model: '%env(ELEVEN_LABS_TTS_MODEL)%'
                     tts_options:
                         voice: '%env(ELEVEN_LABS_VOICE_IDENTIFIER)%'
                     stt_model: '%env(ELEVEN_LABS_STT_MODEL)%'
 
+.. note::
+
+    The current example is built for "TTS / STT sandwich", a pattern that handles both input and output as audio,
+    both STT and TTS can be enabled independently.
+
 .. _`Symfony AI Agent`: https://github.com/symfony/ai-agent
 .. _`Symfony AI Chat`: https://github.com/symfony/ai-chat
 .. _`Symfony AI Platform`: https://github.com/symfony/ai-platform
 
@@ -731,6 +731,116 @@ Testing a service that uses an agent::
 The ``MockAgent`` provides all the benefits of traditional mocks while offering a more intuitive API for AI agent testing,
 making your tests more reliable and easier to maintain.
 
+Speech support
+~~~~~~~~~~~~~~
+
+Using speech to send messages / receive answers as audio is a common use case when integrating agents and/or chats,
+this approach allows to either send audio and expect text output or send a text and receive an audio content.
+
+Another approach is to use stt / tts together to enable a full audio pipeline, this approach introduce some latency
+(as both input/output must be processed) but allows to create a more natural and "human-like" conversation flow.
+
+Speech support can be enabled using :class:`Symfony\\AI\\Agent\\InputProcessor\\SpeechProcessor` (for `text-to-speech` in this example)::
+
+    use Symfony\AI\Agent\Agent;
+    use Symfony\AI\Agent\InputProcessor\SpeechProcessor;
+    use Symfony\AI\Platform\Bridge\ElevenLabs\PlatformFactory as ElevenLabsPlatformFactory;
+    use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory as OpenAiPlatformFactory;
+    use Symfony\AI\Platform\Message\Message;
+    use Symfony\AI\Platform\Message\MessageBag;
+    use Symfony\AI\Platform\Speech\SpeechConfiguration;
+    use Symfony\Component\HttpClient\HttpClient;
+
+    require_once dirname(__DIR__).'/bootstrap.php';
+
+    $platform = OpenAiPlatformFactory::create('key', httpClient: HttpClient::create());
+
+    $agent = new Agent($platform, 'gpt-4o', outputProcessors: [
+        new SpeechProcessor(ElevenLabsPlatformFactory::create(
+            apiKey: 'key',
+            httpClient: http_client()
+        ), new SpeechConfiguration([
+            'tts_model' => 'eleven_multilingual_v2',
+            'tts_options' => [
+                'voice' => 'Dslrhjl3ZpzrctukrQSN', // Brad (https://elevenlabs.io/app/voice-library?voiceId=Dslrhjl3ZpzrctukrQSN)
+            ],
+        ])),
+    ]);
+    $answer = $agent->call(new MessageBag(
+        Message::ofUser('Tina has one brother and one sister. How many sisters do Tina\'s siblings have?'),
+    ));
+
+    echo $answer->getSpeech()->asBinary();
+
+When handling `speech-to-speech`, the process still the same but requires a :class:`Symfony\\AI\\Platform\\Message\\Content\\Audio` as an input::
+
+    use Symfony\AI\Agent\Agent;
+    use Symfony\AI\Agent\InputProcessor\SpeechProcessor;
+    use Symfony\AI\Platform\Bridge\ElevenLabs\PlatformFactory as ElevenLabsPlatformFactory;
+    use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory as OpenAiPlatformFactory;
+    use Symfony\AI\Platform\Message\Content\Audio;
+    use Symfony\AI\Platform\Message\Message;
+    use Symfony\AI\Platform\Message\MessageBag;
+    use Symfony\AI\Platform\Speech\SpeechConfiguration;
+    use Symfony\Component\HttpClient\HttpClient;
+
+    require_once dirname(__DIR__).'/bootstrap.php';
+
+    $platform = OpenAiPlatformFactory::create('key', httpClient: HttpClient::create());
+
+    $agent = new Agent($platform, 'gpt-4o', [
+        new SpeechProcessor(ElevenLabsPlatformFactory::create(
+            apiKey: 'key',
+            httpClient: http_client(),
+        ), new SpeechConfiguration([
+            'stt_model' => 'scribe_v1',
+        ]))
+    ]);
+    $answer = $agent->call(new MessageBag(
+        Message::ofUser(Audio::fromFile(dirname(__DIR__, 2).'/fixtures/audio.mp3'))
+    ));
+
+    echo $answer->getContent();
+
+A "STT / TTS sandwich" can be created using the :class:`Symfony\\AI\\Agent\\InputProcessor\\SpeechProcessor` as input and output processor::
+
+    use Symfony\AI\Agent\Agent;
+    use Symfony\AI\Agent\InputProcessor\SpeechProcessor;
+    use Symfony\AI\Platform\Bridge\ElevenLabs\PlatformFactory as ElevenLabsPlatformFactory;
+    use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory as OpenAiPlatformFactory;
+    use Symfony\AI\Platform\Message\Content\Audio;
+    use Symfony\AI\Platform\Message\Message;
+    use Symfony\AI\Platform\Message\MessageBag;
+    use Symfony\AI\Platform\Speech\SpeechConfiguration;
+    use Symfony\Component\HttpClient\HttpClient;
+
+    require_once dirname(__DIR__).'/bootstrap.php';
+
+    $platform = OpenAiPlatformFactory::create('key', httpClient: HttpClient::create());
+
+    $speechProcessor = new SpeechProcessor(ElevenLabsPlatformFactory::create(
+        apiKey: 'key',
+        httpClient: http_client(),
+    ), new SpeechConfiguration([
+        'tts_model' => 'eleven_multilingual_v2',
+        'tts_options' => [
+            'voice' => 'Dslrhjl3ZpzrctukrQSN', // Brad (https://elevenlabs.io/app/voice-library?voiceId=Dslrhjl3ZpzrctukrQSN)
+        ],
+        'stt_model' => 'scribe_v1',
+    ]));
+
+    $agent = new Agent($platform, 'gpt-4o', [$speechProcessor], [$speechProcessor]);
+
+    $answer = $agent->call(new MessageBag(
+        Message::ofUser(Audio::fromFile(dirname(__DIR__, 2).'/fixtures/audio.mp3'))
+    ));
+
+    echo $answer->getSpeech()->asBinary();
+
+.. note::
+
+    Handling both `text-to-speech` and `speech-to-text` introduce latency as most of the process is synchronous.
+
 Code Examples
 ~~~~~~~~~~~~~
 
 
@@ -687,92 +687,6 @@ This allows fast and isolated testing of AI-powered features without relying on
 
     This requires `cURL` and the `ext-curl` extension to be installed.
 
-Speech support
-~~~~~~~~~~~~~~
-
-Using speech to send messages / receive answers as audio is a common use case when integrating agents and/or chats,
-this approach allows to either send audio and expect text output or send a text and receive an audio content.
-
-Another approach is to use stt / tts together to enable a full audio pipeline, this approach introduce some latency
-(as both input/output must be processed) but allows to create a more natural and "human-like" conversation flow.
-
-Speech support can be enabled using :class:`Symfony\\AI\\Platform\\Speech\\SpeechListener` (for `text-to-speech` in this example)::
-
-    use Symfony\AI\Agent\Agent;
-    use Symfony\AI\Platform\Bridge\ElevenLabs\PlatformFactory as ElevenLabsPlatformFactory;
-    use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory as OpenAiPlatformFactory;
-    use Symfony\AI\Platform\Message\Message;
-    use Symfony\AI\Platform\Message\MessageBag;
-    use Symfony\AI\Platform\Speech\SpeechConfiguration;
-    use Symfony\AI\Platform\Speech\SpeechListener;
-    use Symfony\Component\EventDispatcher\EventDispatcher;
-    use Symfony\Component\HttpClient\HttpClient;
-
-    $eventDispatcher = new EventDispatcher();
-    $eventDispatcher->addSubscriber(new SpeechListener([
-        'elevenlabs' => ElevenLabsPlatformFactory::create(
-            env('ELEVEN_LABS_API_KEY'),
-            httpClient: HttpClient::create(),
-        ),
-    ], [
-        'elevenlabs' => new SpeechConfiguration([
-            'tts_model' => 'eleven_multilingual_v2',
-            'tts_options' => [
-                'voice' => 'Dslrhjl3ZpzrctukrQSN', // Brad (https://elevenlabs.io/app/voice-library?voiceId=Dslrhjl3ZpzrctukrQSN)
-            ],
-        ]),
-    ]));
-
-    $platform = OpenAiPlatformFactory::create(env('OPENAI_API_KEY'), httpClient: HttpClient::create(), eventDispatcher: $eventDispatcher);
-
-    $agent = new Agent($platform, 'gpt-4o');
-    $answer = $agent->call(new MessageBag(
-        Message::ofUser('Tina has one brother and one sister. How many sisters do Tina\'s siblings have?'),
-    ));
-
-    echo $answer->getSpeech()->asBinary();
-
-When handling `speech-to-speech`, the process still the same but requires a :class:`Symfony\\AI\\Platform\\Message\\Content\\Audio` as an input::
-
-    use Symfony\AI\Agent\Agent;
-    use Symfony\AI\Platform\Bridge\ElevenLabs\PlatformFactory as ElevenLabsPlatformFactory;
-    use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory as OpenAiPlatformFactory;
-    use Symfony\AI\Platform\Message\Content\Audio;
-    use Symfony\AI\Platform\Message\Message;
-    use Symfony\AI\Platform\Message\MessageBag;
-    use Symfony\AI\Platform\Speech\SpeechConfiguration;
-    use Symfony\AI\Platform\Speech\SpeechListener;
-    use Symfony\Component\EventDispatcher\EventDispatcher;
-    use Symfony\Component\HttpClient\HttpClient;
-
-    $eventDispatcher = new EventDispatcher();
-    $eventDispatcher->addSubscriber(new SpeechListener([
-        'elevenlabs' => ElevenLabsPlatformFactory::create(
-            env('ELEVEN_LABS_API_KEY'),
-            httpClient: HttpClient::create(),
-        ),
-    ], [
-        'elevenlabs' => new SpeechConfiguration([
-            'tts_model' => 'eleven_multilingual_v2',
-            'tts_options' => [
-                'voice' => 'Dslrhjl3ZpzrctukrQSN', // Brad (https://elevenlabs.io/app/voice-library?voiceId=Dslrhjl3ZpzrctukrQSN)
-            ],
-        ]),
-    ]));
-
-    $platform = OpenAiPlatformFactory::create(env('OPENAI_API_KEY'), httpClient: HttpClient::create(), eventDispatcher: $eventDispatcher);
-
-    $agent = new Agent($platform, 'gpt-4o');
-    $answer = $agent->call(new MessageBag(
-        Message::ofUser(Audio::fromFile(...)
-    ));
-
-    echo $answer->getSpeech()->asBinary();
-
-.. note::
-
-    Handling `speech-to-speech` introduce latency as most of the process is synchronous.
-
 Code Examples
 ~~~~~~~~~~~~~
 
 
@@ -10,36 +10,31 @@
  */
 
 use Symfony\AI\Agent\Agent;
+use Symfony\AI\Agent\InputProcessor\SpeechProcessor;
 use Symfony\AI\Platform\Bridge\ElevenLabs\PlatformFactory as ElevenLabsPlatformFactory;
 use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory as OpenAiPlatformFactory;
 use Symfony\AI\Platform\Message\Content\Audio;
 use Symfony\AI\Platform\Message\Message;
 use Symfony\AI\Platform\Message\MessageBag;
 use Symfony\AI\Platform\Speech\SpeechConfiguration;
-use Symfony\AI\Platform\Speech\SpeechListener;
-use Symfony\Component\EventDispatcher\EventDispatcher;
 
 require_once dirname(__DIR__).'/bootstrap.php';
 
-$eventDispatcher = new EventDispatcher();
-$eventDispatcher->addSubscriber(new SpeechListener([
-    'elevenlabs' => ElevenLabsPlatformFactory::create(
-        apiKey: env('ELEVEN_LABS_API_KEY'),
-        httpClient: http_client(),
-    ),
-], [
-    'elevenlabs' => new SpeechConfiguration([
-        'tts_model' => 'eleven_multilingual_v2',
-        'tts_options' => [
-            'voice' => 'Dslrhjl3ZpzrctukrQSN', // Brad (https://elevenlabs.io/app/voice-library?voiceId=Dslrhjl3ZpzrctukrQSN)
-        ],
-        'stt_model' => 'scribe_v1',
-    ]),
+$platform = OpenAiPlatformFactory::create(env('OPENAI_API_KEY'), httpClient: http_client());
+
+$speechProcessor = new SpeechProcessor(ElevenLabsPlatformFactory::create(
+    apiKey: env('ELEVEN_LABS_API_KEY'),
+    httpClient: http_client(),
+), new SpeechConfiguration([
+    'tts_model' => 'eleven_multilingual_v2',
+    'tts_options' => [
+        'voice' => 'Dslrhjl3ZpzrctukrQSN', // Brad (https://elevenlabs.io/app/voice-library?voiceId=Dslrhjl3ZpzrctukrQSN)
+    ],
+    'stt_model' => 'scribe_v1',
 ]));
 
-$platform = OpenAiPlatformFactory::create(env('OPENAI_API_KEY'), httpClient: http_client(), eventDispatcher: $eventDispatcher);
+$agent = new Agent($platform, 'gpt-4o', [$speechProcessor], [$speechProcessor]);
 
-$agent = new Agent($platform, 'gpt-4o');
 $answer = $agent->call(new MessageBag(
     Message::ofUser(Audio::fromFile(dirname(__DIR__, 2).'/fixtures/audio.mp3'))
 ));
 
@@ -10,32 +10,26 @@
  */
 
 use Symfony\AI\Agent\Agent;
+use Symfony\AI\Agent\InputProcessor\SpeechProcessor;
 use Symfony\AI\Platform\Bridge\ElevenLabs\PlatformFactory as ElevenLabsPlatformFactory;
 use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory as OpenAiPlatformFactory;
 use Symfony\AI\Platform\Message\Content\Audio;
 use Symfony\AI\Platform\Message\Message;
 use Symfony\AI\Platform\Message\MessageBag;
 use Symfony\AI\Platform\Speech\SpeechConfiguration;
-use Symfony\AI\Platform\Speech\SpeechListener;
-use Symfony\Component\EventDispatcher\EventDispatcher;
 
 require_once dirname(__DIR__).'/bootstrap.php';
 
-$eventDispatcher = new EventDispatcher();
-$eventDispatcher->addSubscriber(new SpeechListener([
-    'elevenlabs' => ElevenLabsPlatformFactory::create(
+$platform = OpenAiPlatformFactory::create(env('OPENAI_API_KEY'), httpClient: http_client());
+
+$agent = new Agent($platform, 'gpt-4o', [
+    new SpeechProcessor(ElevenLabsPlatformFactory::create(
         apiKey: env('ELEVEN_LABS_API_KEY'),
         httpClient: http_client(),
-    ),
-], [
-    'elevenlabs' => new SpeechConfiguration([
+    ), new SpeechConfiguration([
         'stt_model' => 'scribe_v1',
-    ]),
-]));
-
-$platform = OpenAiPlatformFactory::create(env('OPENAI_API_KEY'), httpClient: http_client(), eventDispatcher: $eventDispatcher);
-
-$agent = new Agent($platform, 'gpt-4o');
+    ])),
+]);
 $answer = $agent->call(new MessageBag(
     Message::ofUser(Audio::fromFile(dirname(__DIR__, 2).'/fixtures/audio.mp3'))
 ));
 
@@ -10,35 +10,28 @@
  */
 
 use Symfony\AI\Agent\Agent;
+use Symfony\AI\Agent\InputProcessor\SpeechProcessor;
 use Symfony\AI\Platform\Bridge\ElevenLabs\PlatformFactory as ElevenLabsPlatformFactory;
 use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory as OpenAiPlatformFactory;
 use Symfony\AI\Platform\Message\Message;
 use Symfony\AI\Platform\Message\MessageBag;
 use Symfony\AI\Platform\Speech\SpeechConfiguration;
-use Symfony\AI\Platform\Speech\SpeechListener;
-use Symfony\Component\EventDispatcher\EventDispatcher;
 
 require_once dirname(__DIR__).'/bootstrap.php';
 
-$eventDispatcher = new EventDispatcher();
-$eventDispatcher->addSubscriber(new SpeechListener([
-    'elevenlabs' => ElevenLabsPlatformFactory::create(
-        apiKey: env('ELEVEN_LABS_API_KEY'),
-        httpClient: http_client(),
-    ),
-], [
-    'elevenlabs' => new SpeechConfiguration([
+$platform = OpenAiPlatformFactory::create(env('OPENAI_API_KEY'), httpClient: http_client());
+
+$agent = new Agent($platform, 'gpt-4o', outputProcessors: [
+    new SpeechProcessor(ElevenLabsPlatformFactory::create(
+        env('ELEVEN_LABS_API_KEY'),
+        httpClient: http_client()
+    ), new SpeechConfiguration([
         'tts_model' => 'eleven_multilingual_v2',
         'tts_options' => [
             'voice' => 'Dslrhjl3ZpzrctukrQSN', // Brad (https://elevenlabs.io/app/voice-library?voiceId=Dslrhjl3ZpzrctukrQSN)
         ],
-        'stt_model' => 'scribe_v1',
-    ]),
-]));
-
-$platform = OpenAiPlatformFactory::create(env('OPENAI_API_KEY'), httpClient: http_client(), eventDispatcher: $eventDispatcher);
-
-$agent = new Agent($platform, 'gpt-4o');
+    ])),
+]);
 $answer = $agent->call(new MessageBag(
     Message::ofUser('Tina has one brother and one sister. How many sisters do Tina\'s siblings have?'),
 ));
 
@@ -28,7 +28,7 @@
         "phpdocumentor/reflection-docblock": "^5.4",
         "phpstan/phpdoc-parser": "^2.1",
         "psr/log": "^3.0",
-        "symfony/ai-platform": "^0.3",
+        "symfony/ai-platform": "^0.4",
         "symfony/clock": "^7.3|^8.0",
         "symfony/http-client": "^7.3|^8.0",
         "symfony/polyfill-php85": "^1.33",