diff --git a/examples/.env b/examples/.env index fa6fdd1f..7378a7b6 100644 --- a/examples/.env +++ b/examples/.env @@ -52,6 +52,9 @@ TAVILY_API_KEY= # For using Brave (tool) BRAVE_API_KEY= +# For using ElevenLabs (tool) +ELEVENLABS_API_KEY= + # For using MongoDB Atlas (store) MONGODB_URI= diff --git a/examples/misc/text-to-speech.php b/examples/misc/text-to-speech.php new file mode 100644 index 00000000..56ce589f --- /dev/null +++ b/examples/misc/text-to-speech.php @@ -0,0 +1,42 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +use Symfony\AI\Agent\Agent; +use Symfony\AI\Agent\Toolbox\AgentProcessor; +use Symfony\AI\Agent\Toolbox\Tool\ElevenLabs; +use Symfony\AI\Agent\Toolbox\Toolbox; +use Symfony\AI\Platform\Bridge\OpenAi\Gpt; +use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory; +use Symfony\AI\Platform\Message\Message; +use Symfony\AI\Platform\Message\MessageBag; + +require_once dirname(__DIR__).'/bootstrap.php'; + +$platform = PlatformFactory::create(env('OPENAI_API_KEY'), http_client()); +$model = new Gpt(Gpt::GPT_4O_MINI); + +$elevenLabs = new ElevenLabs( + http_client(), + env('ELEVENLABS_API_KEY'), + __DIR__.'/../tmp', + 'eleven_multilingual_v2', + 'Dslrhjl3ZpzrctukrQSN' // Brad (https://elevenlabs.io/app/voice-library?voiceId=Dslrhjl3ZpzrctukrQSN) +); + +$toolbox = new Toolbox([$elevenLabs], logger: logger()); +$toolProcessor = new AgentProcessor($toolbox); + +$agent = new Agent($platform, $model, inputProcessors: [$toolProcessor], outputProcessors: [$toolProcessor]); + +$messages = new MessageBag(Message::ofUser('Convert the following text to voice: "Hello world with voice!"')); +$result = $agent->call($messages); + +echo $result->getContent().\PHP_EOL; diff --git a/src/agent/composer.json b/src/agent/composer.json index 73249209..a6ed0e1a 100644 --- a/src/agent/composer.json +++ b/src/agent/composer.json @@ -41,7 +41,9 @@ "symfony/css-selector": "^6.4 || ^7.1", "symfony/dom-crawler": "^6.4 || ^7.1", "symfony/event-dispatcher": "^6.4 || ^7.1", - "symfony/http-foundation": "^6.4 || ^7.1" + "symfony/filesystem": "^6.4 || ^7.1", + "symfony/http-foundation": "^6.4 || ^7.1", + "symfony/uid": "^6.4 || ^7.1" }, "config": { "sort-packages": true diff --git a/src/agent/doc/index.rst b/src/agent/doc/index.rst index 1c24b486..f66da5b9 100644 --- a/src/agent/doc/index.rst +++ b/src/agent/doc/index.rst @@ -281,6 +281,7 @@ messages will be added to your MessageBag:: * `Weather Tool with Event Listener`_ * `Wikipedia Tool`_ * `YouTube Transcriber Tool`_ +* `ElevenLabs Text to Speech`_ Retrieval Augmented Generation (RAG) ------------------------------------ @@ -552,6 +553,7 @@ useful when certain interactions shouldn't be influenced by the memory context:: .. _`Weather Tool with Event Listener`: https://github.com/symfony/ai/blob/main/examples/toolbox/weather-event.php .. _`Wikipedia Tool`: https://github.com/symfony/ai/blob/main/examples/openai/toolcall-stream.php .. _`YouTube Transcriber Tool`: https://github.com/symfony/ai/blob/main/examples/openai/toolcall.php +.. _`ElevenLabs Text to Speech`: https://github.com/symfony/ai/blob/main/examples/misc/text-to-speech.php .. _`Store Component`: https://github.com/symfony/ai-store .. _`RAG with MongoDB`: https://github.com/symfony/ai/blob/main/examples/store/mongodb-similarity-search.php .. _`RAG with Pinecone`: https://github.com/symfony/ai/blob/main/examples/store/pinecone-similarity-search.php diff --git a/src/agent/src/Toolbox/Tool/ElevenLabs.php b/src/agent/src/Toolbox/Tool/ElevenLabs.php new file mode 100644 index 00000000..ab91fb88 --- /dev/null +++ b/src/agent/src/Toolbox/Tool/ElevenLabs.php @@ -0,0 +1,72 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Agent\Toolbox\Tool; + +use Symfony\AI\Agent\Exception\RuntimeException; +use Symfony\AI\Agent\Toolbox\Attribute\AsTool; +use Symfony\Component\Filesystem\Filesystem; +use Symfony\Component\Uid\Uuid; +use Symfony\Contracts\HttpClient\HttpClientInterface; + +/** + * @author Guillaume Loulier + * + * @see https://elevenlabs.io/ + */ +#[AsTool('text_to_speech', description: 'Convert text to speech / voice')] +final readonly class ElevenLabs +{ + public function __construct( + private HttpClientInterface $httpClient, + #[\SensitiveParameter] private string $apiKey, + private string $path, + private string $model, + private string $voice, + ) { + if (!class_exists(Filesystem::class)) { + throw new RuntimeException('For using the ElevenLabs TTS tool, the symfony/filesystem package is required. Try running "composer require symfony/filesystem".'); + } + + if (!class_exists(Uuid::class)) { + throw new RuntimeException('For using the ElevenLabs TTS tool, the symfony/uid package is required. Try running "composer require symfony/uid".'); + } + } + + /** + * @return array{ + * input: string, + * path: string, + * } + */ + public function __invoke(string $text): array + { + $response = $this->httpClient->request('POST', \sprintf('https://api.elevenlabs.io/v1/text-to-speech/%s?output_format=mp3_44100_128', $this->voice), [ + 'headers' => [ + 'xi-api-key' => $this->apiKey, + ], + 'json' => [ + 'text' => $text, + 'model_id' => $this->model, + ], + ]); + + $file = \sprintf('%s/%s.mp3', $this->path, Uuid::v4()->toRfc4122()); + + $filesystem = new Filesystem(); + $filesystem->dumpFile($file, $response->getContent()); + + return [ + 'input' => $text, + 'path' => $file, + ]; + } +} diff --git a/src/agent/tests/Toolbox/Tool/ElevenLabsTest.php b/src/agent/tests/Toolbox/Tool/ElevenLabsTest.php new file mode 100644 index 00000000..c86fa5e0 --- /dev/null +++ b/src/agent/tests/Toolbox/Tool/ElevenLabsTest.php @@ -0,0 +1,43 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Agent\Tests\Toolbox\Tool; + +use PHPUnit\Framework\Attributes\CoversClass; +use PHPUnit\Framework\TestCase; +use Symfony\AI\Agent\Toolbox\Tool\ElevenLabs; +use Symfony\Component\HttpClient\MockHttpClient; +use Symfony\Component\HttpClient\Response\MockResponse; + +#[CoversClass(ElevenLabs::class)] +final class ElevenLabsTest extends TestCase +{ + public function testTextToSpeech() + { + $httpClient = new MockHttpClient( + new MockResponse(file_get_contents(__DIR__.'/../../../../../fixtures/audio.mp3'), [ + 'headers' => [ + 'Content-Type' => 'audio/mpeg', + ], + 'http_code' => 200, + ]), + ); + + $elevenLabs = new ElevenLabs($httpClient, 'foo', 'bar', 'baz', 'random'); + + $result = $elevenLabs('Hello World'); + + $this->assertCount(2, $result); + $this->assertSame('Hello World', $result['input']); + $this->assertNotEmpty($result['path']); + $this->assertSame(1, $httpClient->getRequestsCount()); + } +}