Merge pull request #41 from himanshumahajan138/feature/add-text-to-speech

king04aman · web-flow · commit b6ea04e6fd97 · 2024-10-17T00:14:11.000+05:30
Fixes: #38 ; Adds Text-to-Speech Converter using gTTS
diff --git a/Text To Speech/README.md b/Text To Speech/README.md
@@ -0,0 +1,63 @@
+# Script Name
+**Text to Speech Converter using gTTS**
+
+- This script converts text into speech using Google’s Text-to-Speech (gTTS) API and saves the output as an audio file (e.g., `.mp3` format).
+- It allows for customization of language, speech speed, accents, and other pre-processing and tokenizing options.
+- Features:
+  - Support for multiple languages using IETF language tags.
+  - Localized accents via different Google Translate top-level domains (`tld`).
+  - Option to slow down speech for easier comprehension.
+  - Custom text pre-processing and tokenization options.
+  - Timeout control for network requests.
+  - Automatic playing of the audio file after saving (optional).
+
+# Description
+This script provides a convenient interface for converting text into speech using the `gTTS` library. The text can be read in multiple languages, at different speeds, and with various localized accents. The script also includes advanced options for pre-processing the input text and customizing how it's tokenized before being sent to the gTTS API.
+
+### Key Features:
+- **Multilingual Support**: Specify different languages using IETF language tags (`en`, `es`, etc.).
+- **Accents**: Use top-level domains (`tld`), such as `com`, `co.uk`, etc., to localize the accent.
+- **Custom Speed**: Option to slow down the speech for better understanding.
+- **Pre-Processing**: Built-in support for text pre-processing (e.g., removing punctuation).
+- **Timeout**: Set timeout limits for the API request.
+
+# Prerequisites
+The following libraries are required to run the script:
+```bash
+pip install gtts
+```
+
+Additionally, the script uses built-in libraries like `os`.
+
+# Installing Instructions
+1. **Clone the Repository**:
+   Clone this repository to your local machine using:
+   ```bash
+   git clone <repository-url>
+   ```
+
+2. **Install Dependencies**:
+   Navigate to the project directory and install the required packages:
+   ```bash
+   pip install -r requirements.txt
+   ```
+
+3. **Run the Script**:
+   After cloning and installing dependencies, you can run the script directly:
+   ```bash
+   python text_to_speech.py
+   ```
+
+4. **Customize the Script**:
+   You can modify the input text, language, speed, and other options directly in the script:
+   ```python
+   text_to_speech("Hello, welcome to the gTTS Python tutorial.", lang='en', slow=False)
+   ```
+
+# Output
+### Example output:
+After running the script with the text `"Hello, welcome to the gTTS Python tutorial."`, the output file `output.mp3` is generated.
+
+
+# Author
+**[Himanshu Mahajan](https://github.com/himanshumahajan138)**
diff --git a/Text To Speech/requirements.txt b/Text To Speech/requirements.txt
@@ -0,0 +1 @@
+gTTS==2.5.2
diff --git a/Text To Speech/runtime.txt b/Text To Speech/runtime.txt
@@ -0,0 +1 @@
+python-3.10.7
diff --git a/Text To Speech/text_to_speech.py b/Text To Speech/text_to_speech.py
@@ -0,0 +1,118 @@
+from gtts import gTTS
+import os
+
+
+def text_to_speech(
+    text,
+    lang="en",
+    tld="com",
+    slow=False,
+    lang_check=True,
+    pre_processor_funcs=None,
+    tokenizer_func=None,
+    timeout=None,
+    output_file="output.mp3",
+):
+    """
+    Convert the provided text to speech and save it as an audio file.
+
+    Args:
+        text (string): The text to be read.
+        lang (string, optional): The language (IETF language tag) to read the text in. Default is 'en'.
+        tld (string, optional): Top-level domain for Google Translate host (e.g., 'com', 'co.uk').
+                                This affects accent localization. Default is 'com'.
+        slow (bool, optional): If True, reads the text more slowly. Default is False.
+        lang_check (bool, optional): If True, enforces valid language, raising a ValueError if unsupported. Default is True.
+        pre_processor_funcs (list, optional): List of pre-processing functions to modify the text before tokenizing.
+                                                Defaults to a list of built-in pre-processors.
+        tokenizer_func (callable, optional): Function to tokenize the text. Defaults to a built-in tokenizer.
+        timeout (float or tuple, optional): Seconds to wait for server response. Can be a float or a (connect, read) tuple.
+                                            Default is None (wait indefinitely).
+        output_file (string): Path for the output audio file (default: 'output.mp3').
+
+    Raises:
+        AssertionError: When text is None or empty.
+        ValueError: When lang_check is True and lang is unsupported.
+    """
+
+    # Use default pre-processor functions if not provided
+    if pre_processor_funcs is None:
+        pre_processor_funcs = [
+            # Example built-in functions from gTTS:
+            # Converts tone marks, abbreviations, and deals with word substitutions
+            lambda text: text.replace(
+                ".", ""
+            ),  # You can define more or use built-ins from gTTS
+        ]
+
+    # Use default tokenizer if not provided
+    if tokenizer_func is None:
+        tokenizer_func = lambda text: text.split()  # Basic tokenizer example
+
+    try:
+        # Create the gTTS object with the provided arguments
+        tts = gTTS(
+            text=text,
+            lang=lang,
+            tld=tld,
+            slow=slow,
+            lang_check=lang_check,
+            pre_processor_funcs=pre_processor_funcs,
+            tokenizer_func=tokenizer_func,
+            timeout=timeout,
+        )
+
+        # Save the audio file
+        tts.save("Text To Speech/"+output_file)
+        print(f"Audio saved at Text To Speech/{output_file}")
+
+        # Optionally, play the audio file (Windows or Linux/MacOS)
+        # if os.name == "nt":  # Windows
+        #     os.system(f"start {output_file}")
+        # else:  # macOS/Linux
+        #     os.system(f"xdg-open {output_file}")
+
+    except AssertionError as ae:
+        print(f"Assertion Error: {ae}")
+    except ValueError as ve:
+        print(f"Value Error: {ve}")
+    except RuntimeError as re:
+        print(f"Runtime Error: {re}")
+
+
+if __name__ == "__main__":
+    # Example usage of the text_to_speech function with various arguments
+
+    # Basic example (English, default options)
+    text = "Hello, welcome to the gTTS Python tutorial."
+    text_to_speech(text)
+
+    # # Custom example (Spanish, slow speech, and custom file name)
+    # text_to_speech(
+    #     "Hola, bienvenido al tutorial de gTTS.",
+    #     lang="es",
+    #     slow=True,
+    #     output_file="spanish_slow.mp3",
+    # )
+
+    # # Custom example with localized accent (UK English)
+    # text_to_speech(
+    #     "Hello! How are you today?",
+    #     lang="en",
+    #     tld="co.uk",
+    #     output_file="british_accent.mp3",
+    # )
+
+    # # You can pass custom pre-processor functions to modify the text before it’s tokenized.
+    # text_to_speech(
+    #     "Dr. Smith is a great person.",
+    #     pre_processor_funcs=[lambda x: x.replace(".", "")],
+    #     output_file="custom_pre-processor.mp3",
+    # )
+
+    # # You can set a timeout to limit how long the request to Google Translate waits.
+    # text_to_speech(
+    #     "This will timeout after 5 seconds.",
+    #     output_file="timeout.mp3",
+    #     timeout=5.0
+    # )