Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,29 @@ mlx_audio.tts.generate --text "Hello, world" --file_prefix hello
mlx_audio.tts.generate --text "Hello, world" --speed 1.4
```

### How to call from python

To generate audio with an LLM use:

```python
from mlx_audio.tts.generate import generate_audio

# Example: Generate an audiobook chapter as audio
generate_audio(
text="In the beginning, the universe was created...",
model="prince-canuma/Kokoro-82M",
voice="af_heart",
speed=1.2,
lang_code="en",
file_path="audiobook_chapter1",
audio_format="wav",
sample_rate=24000,
verbose=True # Set to False to disable print messages
)

print("Audiobook chapter successfully generated!")

```

### Web Interface & API Server

Expand Down
54 changes: 54 additions & 0 deletions mlx_audio/tts/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,5 +117,59 @@ def main():
traceback.print_exc()


def generate_audio(text: str,
model: str = "prince-canuma/Kokoro-82M",
voice: str = "af_heart",
speed: float = 1.0,
lang_code: str = "a",
file_path: str = "audio",
audio_format: str = "wav",
sample_rate: int = 24000,
verbose: bool = True) -> None:
"""
Generates audio from text using a specified TTS model.

Parameters:
- text (str): The input text to be converted to speech.
- model (str): The TTS model to use (default: "prince-canuma/Kokoro-82M").
- voice (str): The voice style to use (default: "af_heart").
- speed (float): Playback speed multiplier (default: 1.0).
- lang_code (str): The language code (default: "a").
- file_path (str): The output file path without extension (default: "audio").
- audio_format (str): Output audio format (e.g., "wav", "flac") (default: "wav").
- sample_rate (int): Sampling rate in Hz (default: 24000).
- verbose (bool): Whether to print status messages (default: True).

Returns:
- None: The function writes the generated audio to a file.
"""
try:
# Load the specified TTS model
model_instance = load_model(model_path=model)

# Generate audio using the model
results = model_instance.generate(
text=text,
voice=voice,
speed=speed,
lang_code=lang_code,
verbose=verbose, # Pass verbose to model if applicable
)

# Concatenate generated audio segments
audio_list = [result.audio for result in results]
final_audio = mx.concatenate(audio_list, axis=0)

# Save the audio to the specified file format
output_file = f"{file_path}.{audio_format}"
sf.write(output_file, final_audio, sample_rate)

if verbose:
print(f"βœ… Audio successfully generated and saved as: {output_file}")

except Exception as e:
print(f"❌ Error generating audio: {e}")


if __name__ == "__main__":
main()