10up
diff --git a/‎README.md‎
Lines changed: 22 additions & 1 deletion b/‎README.md‎
Lines changed: 22 additions & 1 deletion
diff --git a/‎includes/Classifai/Features/TextToSpeech.php‎
Lines changed: 5 additions & 3 deletions b/‎includes/Classifai/Features/TextToSpeech.php‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎includes/Classifai/Providers/ElevenLabs/ElevenLabs.php‎
Lines changed: 81 additions & 3 deletions b/‎includes/Classifai/Providers/ElevenLabs/ElevenLabs.php‎
Lines changed: 81 additions & 3 deletions
@@ -24,7 +24,7 @@ Tap into leading cloud-based services like [OpenAI](https://openai.com/), [Micro
 * Generate new images on demand to use in-content or as a featured image using [OpenAI's Image Generation API](https://platform.openai.com/docs/guides/images-vision), [Google AI's Imagen API](https://ai.google.dev/gemini-api/docs/image-generation#imagen), [Together AI's API](https://docs.together.ai/docs/images-overview) or locally using [Stable Diffusion](https://github.com/AUTOMATIC1111/stable-diffusion-webui/)
 * Generate transcripts of audio files using [OpenAI's Audio Transcription API](https://platform.openai.com/docs/guides/speech-to-text) or [ElevenLabs Speech to Text API](https://elevenlabs.io/docs/capabilities/speech-to-text)
 * Moderate incoming comments for sensitive content using [OpenAI's Moderation API](https://platform.openai.com/docs/guides/moderation)
-* Convert text content into audio and output a "read-to-me" feature on the front-end to play this audio using [Microsoft Azure's Text to Speech API](https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/text-to-speech), [Amazon Polly](https://aws.amazon.com/polly/) or [OpenAI's Text to Speech API](https://platform.openai.com/docs/guides/text-to-speech)
+* Convert text content into audio and output a "read-to-me" feature on the front-end to play this audio using [Microsoft Azure's Text to Speech API](https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/text-to-speech), [Amazon Polly](https://aws.amazon.com/polly/), [OpenAI's Text to Speech API](https://platform.openai.com/docs/guides/text-to-speech) or [ElevenLabs' Text to Speech API](https://elevenlabs.io/docs/capabilities/text-to-speech)
 * Classify post content using [IBM Watson's Natural Language Understanding API](https://www.ibm.com/watson/services/natural-language-understanding/), [OpenAI's Embedding API](https://platform.openai.com/docs/guides/embeddings), [Microsoft Azure's OpenAI service](https://azure.microsoft.com/en-us/products/ai-services/openai-service) or locally using [Ollama](https://ollama.com/)
 * Create a smart 404 page that has a recommended results section that suggests relevant content to the user based on the page URL they were trying to access using either [OpenAI's Embedding API](https://platform.openai.com/docs/guides/embeddings) or [Microsoft Azure's OpenAI service](https://azure.microsoft.com/en-us/products/ai-services/openai-service) in combination with [ElasticPress](https://github.com/10up/ElasticPress)
 * Find similar terms to merge together using either [OpenAI's Embedding API](https://platform.openai.com/docs/guides/embeddings) or [Microsoft Azure's OpenAI service](https://azure.microsoft.com/en-us/products/ai-services/openai-service) in combination with [ElasticPress](https://github.com/10up/ElasticPress). Note this only compares top-level terms and if you merge a term that has children, these become top-level terms as per default WordPress behavior
@@ -491,6 +491,27 @@ Note that [ElevenLabs](https://elevenlabs.io/docs/capabilities/speech-to-text) c
 * Click the button to preview the generated speech audio for the post.
 * View the post on the front-end and see a read-to-me feature has been added
 
+## Set Up Text to Speech (via ElevenLabs)
+
+### 1. Sign up for ElevenLabs
+
+* [Sign up for an ElevenLabs account](https://elevenlabs.io/sign-up) or sign into your existing one.
+* Log into your account and go to the [API key page](https://elevenlabs.io/app/developers/api-keys).
+* Click `Create Key` create a new API key and ensure you turn on access to the Text to Speech endpoint and turn on Read access to the Models and Voices endpoint.
+
+### 2. Configure ElevenLabs API Keys under Tools > ClassifAI > Language Processing > Text to Speech > Settings
+
+* Select **ElevenLabs** in the Provider dropdown.
+* Enter your API Key copied from the above step into the `API Key` field.
+* Select the model and voice you want to use for the text to speech after saving and verifying the connection.
+
+### 3. Using the Text to Speech service
+
+* Assuming the post type selected is "post", create a new post and publish it.
+* After a few seconds, a "Preview" button will appear under the ClassifAI settings panel.
+* Click the button to preview the generated speech audio for the post.
+* View the post on the front-end and see a read-to-me feature has been added
+
 ## Set Up the Smart 404 Feature
 
 ### 1. Decide on Provider
 
@@ -6,6 +6,7 @@
 use Classifai\Providers\Azure\Speech;
 use Classifai\Providers\AWS\AmazonPolly;
 use Classifai\Providers\OpenAI\TextToSpeech as OpenAITTS;
+use Classifai\Providers\ElevenLabs\TextToSpeech as ElevenLabsTTS;
 use Classifai\Normalizer;
 use WP_REST_Server;
 use WP_REST_Request;
@@ -65,9 +66,10 @@ public function __construct() {
 
 		// Contains just the providers this feature supports.
 		$this->supported_providers = [
-			AmazonPolly::ID => __( 'Amazon Polly', 'classifai' ),
-			Speech::ID      => __( 'Microsoft Azure AI Speech', 'classifai' ),
-			OpenAITTS::ID   => __( 'OpenAI Text to Speech', 'classifai' ),
+			AmazonPolly::ID   => __( 'Amazon Polly', 'classifai' ),
+			Speech::ID        => __( 'Microsoft Azure AI Speech', 'classifai' ),
+			OpenAITTS::ID     => __( 'OpenAI Text to Speech', 'classifai' ),
+			ElevenLabsTTS::ID => __( 'ElevenLabs', 'classifai' ),
 		];
 	}
 
 
@@ -5,6 +5,7 @@
 
 namespace Classifai\Providers\ElevenLabs;
 
+use Classifai\Features\TextToSpeech;
 use WP_Error;
 
 use function Classifai\safe_wp_remote_get;
@@ -121,8 +122,15 @@ public function request( string $url, string $api_key = '', string $type = 'post
 			return $response;
 		}
 
+		$code         = wp_remote_retrieve_response_code( $response );
+		$content_type = wp_remote_retrieve_header( $response, 'content-type' );
+
+		// Return the body if the request was successful and the content type is audio.
+		if ( 200 === $code && false !== strpos( $content_type, 'audio' ) ) {
+			return $response;
+		}
+
 		$body = wp_remote_retrieve_body( $response );
-		$code = wp_remote_retrieve_response_code( $response );
 		$json = json_decode( $body, true );
 
 		if ( 200 !== $code ) {
@@ -174,6 +182,24 @@ public function sanitize_api_key_settings( array $new_settings = [], array $sett
 		} else {
 			$new_settings[ static::ID ]['authenticated'] = true;
 			$new_settings[ static::ID ]['models']        = $models;
+
+			if ( $this->feature_instance instanceof TextToSpeech ) {
+				// Get the available voices.
+				$voices = $this->get_voices( $new_settings[ static::ID ]['api_key'] ?? '' );
+
+				if ( is_wp_error( $voices ) ) {
+					$new_settings[ static::ID ]['authenticated'] = false;
+					$new_settings[ static::ID ]['voices']        = [];
+					add_settings_error(
+						'api_key',
+						'classifai-elevenlabs-voices-error',
+						$voices->get_error_message(),
+						'error'
+					);
+				} else {
+					$new_settings[ static::ID ]['voices'] = $voices;
+				}
+			}
 		}
 
 		$new_settings[ static::ID ]['api_key'] = sanitize_text_field( $new_settings[ static::ID ]['api_key'] ?? $settings[ static::ID ]['api_key'] );
@@ -199,15 +225,67 @@ protected function get_models( string $api_key = '' ) {
 			return $response;
 		}
 
+		// Filter the models based on the current feature.
+		if ( $this->feature_instance instanceof TextToSpeech ) {
+			$response = array_filter(
+				$response,
+				function ( $model ) {
+					return true === $model['can_do_text_to_speech'];
+				}
+			);
+		}
+
 		// Get the model data we need.
 		$models = array_map(
 			fn( $model ) => [
-				'id'           => $model['model_id'] ?? '',
-				'display_name' => $model['name'] ?? '',
+				'id'              => $model['model_id'] ?? '',
+				'display_name'    => $model['name'] ?? '',
+				'max_text_length' => $model['maximum_text_length_per_request'] ?? '',
 			],
 			$response
 		);
 
 		return $models;
 	}
+
+	/**
+	 * Get the available voices.
+	 *
+	 * @param string $api_key The API key.
+	 * @return array|WP_Error
+	 */
+	protected function get_voices( string $api_key = '' ) {
+		// Check that we have credentials before hitting the API.
+		if ( empty( $api_key ) ) {
+			return new WP_Error( 'auth', esc_html__( 'Please enter your ElevenLabs API key.', 'classifai' ) );
+		}
+
+		$response = $this->request( $this->get_api_url( 'voices?per_page=100' ), $api_key, 'get' );
+
+		if ( is_wp_error( $response ) ) {
+			return $response;
+		}
+
+		// Get the voice data we need.
+		$voices = array_map(
+			function ( $voice ) {
+				$labels     = $voice['labels'] ?? array();
+				$name       = $voice['name'] ?? '';
+				$gender     = $labels['gender'] ?? '';
+				$language   = $labels['language'] ?? '';
+				$accent     = $labels['accent'] ?? '';
+				$voice_name = sprintf( '%s (%s) - %s', $name, ucfirst( $gender ), strtoupper( $language ) );
+				if ( ! empty( $accent ) ) {
+					$voice_name = sprintf( '%s (%s)', $voice_name, ucfirst( $accent ) );
+				}
+				return [
+					'id'   => $voice['voice_id'] ?? '',
+					'name' => $voice_name,
+				];
+			},
+			$response['voices'] ?? []
+		);
+
+		return $voices;
+	}
 }