Skip to content

Commit 959b46f

Browse files
committed
feature #324 [Platform] Add stream for ElevenLabs TTS (Guikingone)
This PR was merged into the main branch. Discussion ---------- [Platform] Add stream for ElevenLabs TTS | Q | A | ------------- | --- | Bug fix? | no | New feature? | yes | Docs? | yes | Issues | None | License | MIT Hi 👋🏻 This PR aim to introduce the support for streaming TTS. Commits ------- 16c492e feat(platform): ElevenLabs stream for TTS
2 parents 66c5c22 + 16c492e commit 959b46f

File tree

6 files changed

+104
-10
lines changed

6 files changed

+104
-10
lines changed
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
use Symfony\AI\Platform\Bridge\ElevenLabs\ElevenLabs;
13+
use Symfony\AI\Platform\Bridge\ElevenLabs\PlatformFactory;
14+
use Symfony\AI\Platform\Message\Content\Text;
15+
16+
require_once dirname(__DIR__).'/bootstrap.php';
17+
18+
$platform = PlatformFactory::create(
19+
apiKey: env('ELEVEN_LABS_API_KEY'),
20+
httpClient: http_client(),
21+
);
22+
$model = new ElevenLabs(options: [
23+
'voice' => 'Dslrhjl3ZpzrctukrQSN', // Brad (https://elevenlabs.io/app/voice-library?voiceId=Dslrhjl3ZpzrctukrQSN)
24+
'stream' => true,
25+
]);
26+
27+
$result = $platform->invoke($model, new Text('The first move is what sets everything in motion.'));
28+
29+
$content = '';
30+
31+
foreach ($result->asStream() as $chunk) {
32+
echo $chunk;
33+
}
34+
35+
echo \PHP_EOL;

src/platform/src/Bridge/ElevenLabs/ElevenLabsClient.php

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ public function request(Model $model, array|string $payload, array $options = []
4242
}
4343

4444
if (\in_array($model->getName(), [ElevenLabs::SCRIBE_V1, ElevenLabs::SCRIBE_V1_EXPERIMENTAL], true)) {
45-
return $this->doSpeechToTextRequest($model, $payload, $options);
45+
return $this->doSpeechToTextRequest($model, $payload);
4646
}
4747

4848
$capabilities = $this->retrieveCapabilities($model);
@@ -56,9 +56,8 @@ public function request(Model $model, array|string $payload, array $options = []
5656

5757
/**
5858
* @param array<string|int, mixed> $payload
59-
* @param array<string, mixed> $options
6059
*/
61-
private function doSpeechToTextRequest(Model $model, array|string $payload, array $options): RawHttpResult
60+
private function doSpeechToTextRequest(Model $model, array|string $payload): RawHttpResult
6261
{
6362
return new RawHttpResult($this->httpClient->request('POST', \sprintf('%s/speech-to-text', $this->hostUrl), [
6463
'headers' => [
@@ -86,8 +85,13 @@ private function doTextToSpeechRequest(Model $model, array|string $payload, arra
8685
}
8786

8887
$voice = $options['voice'] ??= $model->getOptions()['voice'];
88+
$stream = $options['stream'] ??= $model->getOptions()['stream'] ?? false;
89+
90+
$url = $stream
91+
? \sprintf('%s/text-to-speech/%s/stream', $this->hostUrl, $voice)
92+
: \sprintf('%s/text-to-speech/%s', $this->hostUrl, $voice);
8993

90-
return new RawHttpResult($this->httpClient->request('POST', \sprintf('%s/text-to-speech/%s', $this->hostUrl, $voice), [
94+
return new RawHttpResult($this->httpClient->request('POST', $url, [
9195
'headers' => [
9296
'xi-api-key' => $this->apiKey,
9397
],

src/platform/src/Bridge/ElevenLabs/ElevenLabsResultConverter.php

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,22 @@
1616
use Symfony\AI\Platform\Result\BinaryResult;
1717
use Symfony\AI\Platform\Result\RawResultInterface;
1818
use Symfony\AI\Platform\Result\ResultInterface;
19+
use Symfony\AI\Platform\Result\StreamResult;
1920
use Symfony\AI\Platform\Result\TextResult;
2021
use Symfony\AI\Platform\ResultConverterInterface;
22+
use Symfony\Contracts\HttpClient\HttpClientInterface;
2123
use Symfony\Contracts\HttpClient\ResponseInterface;
2224

2325
/**
2426
* @author Guillaume Loulier <[email protected]>
2527
*/
2628
final readonly class ElevenLabsResultConverter implements ResultConverterInterface
2729
{
30+
public function __construct(
31+
private HttpClientInterface $httpClient,
32+
) {
33+
}
34+
2835
public function supports(Model $model): bool
2936
{
3037
return $model instanceof ElevenLabs;
@@ -36,9 +43,25 @@ public function convert(RawResultInterface $result, array $options = []): Result
3643
$response = $result->getObject();
3744

3845
return match (true) {
46+
\array_key_exists('stream', $options) && $options['stream'] => new StreamResult($this->convertToGenerator($response)),
3947
str_contains($response->getInfo('url'), 'speech-to-text') => new TextResult($result->getData()['text']),
4048
str_contains($response->getInfo('url'), 'text-to-speech') => new BinaryResult($result->getObject()->getContent(), 'audio/mpeg'),
4149
default => throw new RuntimeException('Unsupported ElevenLabs response.'),
4250
};
4351
}
52+
53+
private function convertToGenerator(ResponseInterface $response): \Generator
54+
{
55+
foreach ($this->httpClient->stream($response) as $chunk) {
56+
if ($chunk->isFirst() || $chunk->isLast()) {
57+
continue;
58+
}
59+
60+
if ('' === $chunk->getContent()) {
61+
continue;
62+
}
63+
64+
yield $chunk->getContent();
65+
}
66+
}
4467
}

src/platform/src/Bridge/ElevenLabs/PlatformFactory.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ public static function create(
3232

3333
return new Platform(
3434
[new ElevenLabsClient($httpClient, $apiKey, $hostUrl)],
35-
[new ElevenLabsResultConverter()],
35+
[new ElevenLabsResultConverter($httpClient)],
3636
$contract ?? ElevenLabsContract::create(),
3737
);
3838
}

src/platform/tests/Bridge/ElevenLabs/ElevenLabsClientTest.php

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
namespace Symfony\AI\Platform\Tests\Bridge\ElevenLabs;
1313

1414
use PHPUnit\Framework\Attributes\CoversClass;
15-
use PHPUnit\Framework\Attributes\Group;
1615
use PHPUnit\Framework\Attributes\UsesClass;
1716
use PHPUnit\Framework\TestCase;
1817
use Symfony\AI\Platform\Bridge\ElevenLabs\Contract\AudioNormalizer;
@@ -21,6 +20,7 @@
2120
use Symfony\AI\Platform\Exception\InvalidArgumentException;
2221
use Symfony\AI\Platform\Message\Content\Audio;
2322
use Symfony\AI\Platform\Model;
23+
use Symfony\AI\Platform\Result\RawHttpResult;
2424
use Symfony\Component\HttpClient\MockHttpClient;
2525
use Symfony\Component\HttpClient\Response\JsonMockResponse;
2626
use Symfony\Component\HttpClient\Response\MockResponse;
@@ -30,6 +30,7 @@
3030
#[UsesClass(Model::class)]
3131
#[UsesClass(Audio::class)]
3232
#[UsesClass(AudioNormalizer::class)]
33+
#[UsesClass(RawHttpResult::class)]
3334
final class ElevenLabsClientTest extends TestCase
3435
{
3536
public function testSupportsModel()
@@ -133,7 +134,6 @@ public function testClientCannotPerformTextToSpeechRequestWithoutValidPayload()
133134
]), []);
134135
}
135136

136-
#[Group('foo')]
137137
public function testClientCanPerformTextToSpeechRequest()
138138
{
139139
$payload = Audio::fromFile(\dirname(__DIR__, 5).'/fixtures/audio.mp3');
@@ -162,4 +162,35 @@ public function testClientCanPerformTextToSpeechRequest()
162162

163163
$this->assertSame(2, $httpClient->getRequestsCount());
164164
}
165+
166+
public function testClientCanPerformTextToSpeechRequestAsStream()
167+
{
168+
$payload = Audio::fromFile(\dirname(__DIR__, 5).'/fixtures/audio.mp3');
169+
170+
$httpClient = new MockHttpClient([
171+
new JsonMockResponse([
172+
[
173+
'model_id' => ElevenLabs::ELEVEN_MULTILINGUAL_V2,
174+
'can_do_text_to_speech' => true,
175+
],
176+
]),
177+
new MockResponse($payload->asBinary()),
178+
]);
179+
180+
$client = new ElevenLabsClient(
181+
$httpClient,
182+
'https://api.elevenlabs.io/v1',
183+
'my-api-key',
184+
);
185+
186+
$result = $client->request(new ElevenLabs(options: [
187+
'voice' => 'Dslrhjl3ZpzrctukrQSN',
188+
'stream' => true,
189+
]), [
190+
'text' => 'foo',
191+
]);
192+
193+
$this->assertInstanceOf(RawHttpResult::class, $result);
194+
$this->assertSame(2, $httpClient->getRequestsCount());
195+
}
165196
}

src/platform/tests/Bridge/ElevenLabs/ElevenLabsConverterTest.php

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
use Symfony\AI\Platform\Result\BinaryResult;
2121
use Symfony\AI\Platform\Result\InMemoryRawResult;
2222
use Symfony\AI\Platform\Result\TextResult;
23+
use Symfony\Component\HttpClient\MockHttpClient;
2324

2425
#[CoversClass(ElevenLabsResultConverter::class)]
2526
#[UsesClass(ElevenLabs::class)]
@@ -31,15 +32,15 @@ final class ElevenLabsConverterTest extends TestCase
3132
{
3233
public function testSupportsModel()
3334
{
34-
$converter = new ElevenLabsResultConverter();
35+
$converter = new ElevenLabsResultConverter(new MockHttpClient());
3536

3637
$this->assertTrue($converter->supports(new ElevenLabs()));
3738
$this->assertFalse($converter->supports(new Model('any-model')));
3839
}
3940

4041
public function testConvertSpeechToTextResponse()
4142
{
42-
$converter = new ElevenLabsResultConverter();
43+
$converter = new ElevenLabsResultConverter(new MockHttpClient());
4344
$rawResult = new InMemoryRawResult([
4445
'text' => 'Hello there',
4546
], new class {
@@ -57,7 +58,7 @@ public function getInfo(): string
5758

5859
public function testConvertTextToSpeechResponse()
5960
{
60-
$converter = new ElevenLabsResultConverter();
61+
$converter = new ElevenLabsResultConverter(new MockHttpClient());
6162
$rawResult = new InMemoryRawResult([], new class {
6263
public function getInfo(): string
6364
{

0 commit comments

Comments
 (0)