Better quickstart script including gpu management #1751

Eyalm321 · 2024-08-08T15:29:50Z

Eyalm321
Aug 8, 2024

This code is for developers with multiple GPU setup, it runs 1 audio file per GPU, if GPU has OOM errors it tries to fall back to cpu memory.

`import os
import logging
from pyannote.audio import Pipeline
from pydub import AudioSegment
import torch
from multiprocessing import Semaphore, Queue, Process, Lock

Configure logging

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

Hugging Face authentication token

auth_token = ""

Directory containing wav files

wav_dir = "/home/admin/voice"
output_dir = "/home/admin/voice/segments"

Create output directory if it doesn't exist

os.makedirs(output_dir, exist_ok=True)

List of available GPU devices

gpu_devices = [f"cuda:{i}" for i in range(torch.cuda.device_count())]

Check if GPUs are available

if not gpu_devices:
logging.info("No GPU available. Using CPU.")
gpu_devices = ["cpu"]

Initialize a lock for managing GPU availability

gpu_locks = {gpu: Lock() for gpu in gpu_devices}

def process_file(wav_path, gpu_device, semaphore):
with semaphore:
try:
if gpu_device != "cpu":
os.environ["CUDA_VISIBLE_DEVICES"] = gpu_device.split(":")[-1]
device = torch.device(f"cuda:0")
else:
os.environ["CUDA_VISIBLE_DEVICES"] = ""
device = torch.device("cpu")

        logging.info(f"Processing file: {wav_path} on {gpu_device} with CUDA_VISIBLE_DEVICES={os.environ.get('CUDA_VISIBLE_DEVICES')}")

        # Initialize the pipeline
        pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1", use_auth_token=auth_token)
        pipeline.to(device)

        # Function to apply pipeline with error handling
        def apply_pipeline(pipeline, wav_path):
            try:
                return pipeline(wav_path)
            except RuntimeError as e:
                if "CUDA out of memory" in str(e) or "batch_size" in str(e):
                    logging.warning(f"GPU memory full or batch size issue for file {wav_path}, falling back to CPU.")
                    pipeline.to(torch.device("cpu"))
                    return pipeline(wav_path)
                else:
                    raise e

        # Apply the pipeline to the wav file with error handling
        diarization = apply_pipeline(pipeline, wav_path)
        if diarization is None:
            raise ValueError("Diarization result is None.")

        # Load the original audio file
        audio = AudioSegment.from_wav(wav_path)

        # Process and save speaker segments
        filename = os.path.basename(wav_path)
        for i, (turn, _, speaker) in enumerate(diarization.itertracks(yield_label=True)):
            start = turn.start * 1000  # convert to milliseconds
            end = turn.end * 1000      # convert to milliseconds
            segment = audio[start:end]
            segment_filename = os.path.join(output_dir, f"{filename[:-4]}_speaker_{speaker}_{i}.wav")
            segment.export(segment_filename, format="wav")
            logging.info(f"Saved segment: {segment_filename}")

        # Print diarization results
        for turn, _, speaker in diarization.itertracks(yield_label=True):
            logging.info(f"start={turn.start:.1f}s stop={turn.end:.1f}s speaker_{speaker}")
    except Exception as e:
        logging.error(f"Error processing file {wav_path} on {gpu_device}: {e}", exc_info=True)

def worker(queue, semaphore):
while not queue.empty():
try:
wav_path, gpu_device = queue.get_nowait()
except Queue.Empty:
break
with gpu_locks[gpu_device]:
process_file(wav_path, gpu_device, semaphore)

if name == "main":
# List all wav files
wav_files = [os.path.join(wav_dir, f) for f in os.listdir(wav_dir) if f.endswith(".wav")]

# Prepare tasks with round-robin GPU assignment
tasks = [(wav_files[i], gpu_devices[i % len(gpu_devices)]) for i in range(len(wav_files))]

# Create a queue and semaphore for controlling concurrency
queue = Queue()
for task in tasks:
    queue.put(task)

semaphore = Semaphore(len(gpu_devices))  # Limit concurrency to number of available GPUs

# Start worker processes
processes = []
for _ in range(len(gpu_devices)):  # Start as many worker processes as there are GPUs
    p = Process(target=worker, args=(queue, semaphore))
    processes.append(p)
    p.start()

for p in processes:
    p.join()

`

Runs well for long audio files.
Cheers.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Uh oh!

Better quickstart script including gpu management #1751

Uh oh!

{{title}}

Uh oh!

Uh oh!

{{editor}}'s edit

{{editor}}'s edit

Uh oh!

Replies: 0 comments

Select a reply

Uh oh!

Uh oh!

Better quickstart script including gpu management #1751

Uh oh!

Uh oh!

Eyalm321 Aug 8, 2024

Configure logging

Hugging Face authentication token

Directory containing wav files

Create output directory if it doesn't exist

List of available GPU devices

Check if GPUs are available

Initialize a lock for managing GPU availability

Replies: 0 comments

Eyalm321
Aug 8, 2024