Skip to content

Commit 812ec69

Browse files
authored
Merge pull request #1002 from 10up/feature/993
Add ElevenLabs as a Provider for Text to Speech
2 parents 4ecbba8 + 4015d55 commit 812ec69

File tree

15 files changed

+912
-46
lines changed

15 files changed

+912
-46
lines changed

README.md

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ Tap into leading cloud-based services like [OpenAI](https://openai.com/), [Micro
2424
* Generate new images on demand to use in-content or as a featured image using [OpenAI's Image Generation API](https://platform.openai.com/docs/guides/images-vision), [Google AI's Imagen API](https://ai.google.dev/gemini-api/docs/image-generation#imagen), [Together AI's API](https://docs.together.ai/docs/images-overview) or locally using [Stable Diffusion](https://github.com/AUTOMATIC1111/stable-diffusion-webui/)
2525
* Generate transcripts of audio files using [OpenAI's Audio Transcription API](https://platform.openai.com/docs/guides/speech-to-text) or [ElevenLabs Speech to Text API](https://elevenlabs.io/docs/capabilities/speech-to-text)
2626
* Moderate incoming comments for sensitive content using [OpenAI's Moderation API](https://platform.openai.com/docs/guides/moderation)
27-
* Convert text content into audio and output a "read-to-me" feature on the front-end to play this audio using [Microsoft Azure's Text to Speech API](https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/text-to-speech), [Amazon Polly](https://aws.amazon.com/polly/) or [OpenAI's Text to Speech API](https://platform.openai.com/docs/guides/text-to-speech)
27+
* Convert text content into audio and output a "read-to-me" feature on the front-end to play this audio using [Microsoft Azure's Text to Speech API](https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/text-to-speech), [Amazon Polly](https://aws.amazon.com/polly/), [OpenAI's Text to Speech API](https://platform.openai.com/docs/guides/text-to-speech) or [ElevenLabs' Text to Speech API](https://elevenlabs.io/docs/capabilities/text-to-speech)
2828
* Classify post content using [IBM Watson's Natural Language Understanding API](https://www.ibm.com/watson/services/natural-language-understanding/), [OpenAI's Embedding API](https://platform.openai.com/docs/guides/embeddings), [Microsoft Azure's OpenAI service](https://azure.microsoft.com/en-us/products/ai-services/openai-service) or locally using [Ollama](https://ollama.com/)
2929
* Create a smart 404 page that has a recommended results section that suggests relevant content to the user based on the page URL they were trying to access using either [OpenAI's Embedding API](https://platform.openai.com/docs/guides/embeddings) or [Microsoft Azure's OpenAI service](https://azure.microsoft.com/en-us/products/ai-services/openai-service) in combination with [ElasticPress](https://github.com/10up/ElasticPress)
3030
* Find similar terms to merge together using either [OpenAI's Embedding API](https://platform.openai.com/docs/guides/embeddings) or [Microsoft Azure's OpenAI service](https://azure.microsoft.com/en-us/products/ai-services/openai-service) in combination with [ElasticPress](https://github.com/10up/ElasticPress). Note this only compares top-level terms and if you merge a term that has children, these become top-level terms as per default WordPress behavior
@@ -491,6 +491,27 @@ Note that [ElevenLabs](https://elevenlabs.io/docs/capabilities/speech-to-text) c
491491
* Click the button to preview the generated speech audio for the post.
492492
* View the post on the front-end and see a read-to-me feature has been added
493493

494+
## Set Up Text to Speech (via ElevenLabs)
495+
496+
### 1. Sign up for ElevenLabs
497+
498+
* [Sign up for an ElevenLabs account](https://elevenlabs.io/sign-up) or sign into your existing one.
499+
* Log into your account and go to the [API key page](https://elevenlabs.io/app/developers/api-keys).
500+
* Click `Create Key` create a new API key and ensure you turn on access to the Text to Speech endpoint and turn on Read access to the Models and Voices endpoint.
501+
502+
### 2. Configure ElevenLabs API Keys under Tools > ClassifAI > Language Processing > Text to Speech > Settings
503+
504+
* Select **ElevenLabs** in the Provider dropdown.
505+
* Enter your API Key copied from the above step into the `API Key` field.
506+
* Select the model and voice you want to use for the text to speech after saving and verifying the connection.
507+
508+
### 3. Using the Text to Speech service
509+
510+
* Assuming the post type selected is "post", create a new post and publish it.
511+
* After a few seconds, a "Preview" button will appear under the ClassifAI settings panel.
512+
* Click the button to preview the generated speech audio for the post.
513+
* View the post on the front-end and see a read-to-me feature has been added
514+
494515
## Set Up the Smart 404 Feature
495516

496517
### 1. Decide on Provider

includes/Classifai/Features/TextToSpeech.php

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
use Classifai\Providers\Azure\Speech;
77
use Classifai\Providers\AWS\AmazonPolly;
88
use Classifai\Providers\OpenAI\TextToSpeech as OpenAITTS;
9+
use Classifai\Providers\ElevenLabs\TextToSpeech as ElevenLabsTTS;
910
use Classifai\Normalizer;
1011
use WP_REST_Server;
1112
use WP_REST_Request;
@@ -65,9 +66,10 @@ public function __construct() {
6566

6667
// Contains just the providers this feature supports.
6768
$this->supported_providers = [
68-
AmazonPolly::ID => __( 'Amazon Polly', 'classifai' ),
69-
Speech::ID => __( 'Microsoft Azure AI Speech', 'classifai' ),
70-
OpenAITTS::ID => __( 'OpenAI Text to Speech', 'classifai' ),
69+
AmazonPolly::ID => __( 'Amazon Polly', 'classifai' ),
70+
Speech::ID => __( 'Microsoft Azure AI Speech', 'classifai' ),
71+
OpenAITTS::ID => __( 'OpenAI Text to Speech', 'classifai' ),
72+
ElevenLabsTTS::ID => __( 'ElevenLabs', 'classifai' ),
7173
];
7274
}
7375

includes/Classifai/Providers/ElevenLabs/ElevenLabs.php

Lines changed: 81 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
namespace Classifai\Providers\ElevenLabs;
77

8+
use Classifai\Features\TextToSpeech;
89
use WP_Error;
910

1011
use function Classifai\safe_wp_remote_get;
@@ -121,8 +122,15 @@ public function request( string $url, string $api_key = '', string $type = 'post
121122
return $response;
122123
}
123124

125+
$code = wp_remote_retrieve_response_code( $response );
126+
$content_type = wp_remote_retrieve_header( $response, 'content-type' );
127+
128+
// Return the body if the request was successful and the content type is audio.
129+
if ( 200 === $code && false !== strpos( $content_type, 'audio' ) ) {
130+
return $response;
131+
}
132+
124133
$body = wp_remote_retrieve_body( $response );
125-
$code = wp_remote_retrieve_response_code( $response );
126134
$json = json_decode( $body, true );
127135

128136
if ( 200 !== $code ) {
@@ -174,6 +182,24 @@ public function sanitize_api_key_settings( array $new_settings = [], array $sett
174182
} else {
175183
$new_settings[ static::ID ]['authenticated'] = true;
176184
$new_settings[ static::ID ]['models'] = $models;
185+
186+
if ( $this->feature_instance instanceof TextToSpeech ) {
187+
// Get the available voices.
188+
$voices = $this->get_voices( $new_settings[ static::ID ]['api_key'] ?? '' );
189+
190+
if ( is_wp_error( $voices ) ) {
191+
$new_settings[ static::ID ]['authenticated'] = false;
192+
$new_settings[ static::ID ]['voices'] = [];
193+
add_settings_error(
194+
'api_key',
195+
'classifai-elevenlabs-voices-error',
196+
$voices->get_error_message(),
197+
'error'
198+
);
199+
} else {
200+
$new_settings[ static::ID ]['voices'] = $voices;
201+
}
202+
}
177203
}
178204

179205
$new_settings[ static::ID ]['api_key'] = sanitize_text_field( $new_settings[ static::ID ]['api_key'] ?? $settings[ static::ID ]['api_key'] );
@@ -199,15 +225,67 @@ protected function get_models( string $api_key = '' ) {
199225
return $response;
200226
}
201227

228+
// Filter the models based on the current feature.
229+
if ( $this->feature_instance instanceof TextToSpeech ) {
230+
$response = array_filter(
231+
$response,
232+
function ( $model ) {
233+
return true === $model['can_do_text_to_speech'];
234+
}
235+
);
236+
}
237+
202238
// Get the model data we need.
203239
$models = array_map(
204240
fn( $model ) => [
205-
'id' => $model['model_id'] ?? '',
206-
'display_name' => $model['name'] ?? '',
241+
'id' => $model['model_id'] ?? '',
242+
'display_name' => $model['name'] ?? '',
243+
'max_text_length' => $model['maximum_text_length_per_request'] ?? '',
207244
],
208245
$response
209246
);
210247

211248
return $models;
212249
}
250+
251+
/**
252+
* Get the available voices.
253+
*
254+
* @param string $api_key The API key.
255+
* @return array|WP_Error
256+
*/
257+
protected function get_voices( string $api_key = '' ) {
258+
// Check that we have credentials before hitting the API.
259+
if ( empty( $api_key ) ) {
260+
return new WP_Error( 'auth', esc_html__( 'Please enter your ElevenLabs API key.', 'classifai' ) );
261+
}
262+
263+
$response = $this->request( $this->get_api_url( 'voices?per_page=100' ), $api_key, 'get' );
264+
265+
if ( is_wp_error( $response ) ) {
266+
return $response;
267+
}
268+
269+
// Get the voice data we need.
270+
$voices = array_map(
271+
function ( $voice ) {
272+
$labels = $voice['labels'] ?? array();
273+
$name = $voice['name'] ?? '';
274+
$gender = $labels['gender'] ?? '';
275+
$language = $labels['language'] ?? '';
276+
$accent = $labels['accent'] ?? '';
277+
$voice_name = sprintf( '%s (%s) - %s', $name, ucfirst( $gender ), strtoupper( $language ) );
278+
if ( ! empty( $accent ) ) {
279+
$voice_name = sprintf( '%s (%s)', $voice_name, ucfirst( $accent ) );
280+
}
281+
return [
282+
'id' => $voice['voice_id'] ?? '',
283+
'name' => $voice_name,
284+
];
285+
},
286+
$response['voices'] ?? []
287+
);
288+
289+
return $voices;
290+
}
213291
}

0 commit comments

Comments
 (0)