Skip to content

Commit 85ff1a7

Browse files
committed
Remove non-batched version of whisper transcription
Signed-off-by: Fejgin, Roy <rfejgin@nvidia.com>
1 parent 20f19f9 commit 85ff1a7

File tree

1 file changed

+0
-18
lines changed

1 file changed

+0
-18
lines changed

nemo/collections/tts/modules/magpietts_inference/evaluate_generated_audio.py

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -117,24 +117,6 @@ def process_text(input_text):
117117
return single_space_text
118118

119119

120-
def transcribe_with_whisper(whisper_model, whisper_processor, audio_path, language, device):
121-
speech_array, sampling_rate = librosa.load(audio_path, sr=16000)
122-
# Set the language task (optional, improves performance for specific languages)
123-
forced_decoder_ids = (
124-
whisper_processor.get_decoder_prompt_ids(language=language, task="transcribe") if language else None
125-
)
126-
inputs = whisper_processor(speech_array, sampling_rate=sampling_rate, return_tensors="pt").input_features
127-
inputs = inputs.to(device)
128-
# Generate transcription
129-
with torch.inference_mode():
130-
predicted_ids = whisper_model.generate(inputs, forced_decoder_ids=forced_decoder_ids)
131-
132-
# Decode transcription
133-
transcription = whisper_processor.batch_decode(predicted_ids, skip_special_tokens=True)
134-
result = transcription[0]
135-
return result
136-
137-
138120
def transcribe_with_whisper_batch(whisper_model, whisper_processor, audio_paths, language, device, batch_size=8):
139121
"""Transcribe multiple audio files with Whisper in batches. Returns list of transcriptions (one per path)."""
140122
forced_decoder_ids = (

0 commit comments

Comments
 (0)