Skip to content
Discussion options

You must be logged in to vote

I did this:

import torch
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
from datasets import load_dataset
import time

# Measure the start time
start_time = time.time()

# Check if GPU is available
device = "cuda:0" #if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32

# Load the model on the CPU
model_id = "openai/whisper-large-v3"
model = AutoModelForSpeechSeq2Seq.from_pretrained(
    model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True, device_map="cuda:0", attn_implementation="flash_attention_2"
)

# Move the model to the GPU if available
#model.to(device)

processor = 

Replies: 2 comments 3 replies

Comment options

You must be logged in to vote
3 replies
@dezdem
Comment options

Answer selected by dezdem
@phineas-pta
Comment options

@dezdem
Comment options

Comment options

You must be logged in to vote
0 replies
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Category
Q&A
Labels
None yet
2 participants