spmallick
diff --git a/‎Fine-Tuning-Whisper-on-Custom-Dataset/README.md‎
Lines changed: 17 additions & 0 deletions b/‎Fine-Tuning-Whisper-on-Custom-Dataset/README.md‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎Fine-Tuning-Whisper-on-Custom-Dataset/compare_time.py‎
Lines changed: 56 additions & 0 deletions b/‎Fine-Tuning-Whisper-on-Custom-Dataset/compare_time.py‎
Lines changed: 56 additions & 0 deletions
@@ -0,0 +1,17 @@
+# Fine Tuning Whisper on Custom Dataset
+
+This folder contains the Jupyter Notebooks and Scripts for the LearnOpenCV article  - **[Fine Tuning Whisper on Custom Dataset](https://learnopencv.com/fine-tuning-whisper-on-custom-dataset/)**.
+
+We have provided notebooks for fine tuning Whisper - Tiny, Base, and Small models along with scripts for time comparison and Gradio UI.
+
+You can download the trained weights from the below link.
+
+[<img src="https://learnopencv.com/wp-content/uploads/2022/07/download-button-e1657285155454.png" alt="Download Code" width="200">](https://www.dropbox.com/scl/fo/f13dkz0373eq6hfdm0eoe/AG9cM_dKoCdfyhNPHYNC9eM?rlkey=3omlte477caelquynoms3xieg&st=qzzkrs3g&dl=1)
+
+![](readme_images/whisper_fine_tuning_small_gradio.gif)
+
+## AI Courses by OpenCV
+
+Want to become an expert in AI? [AI Courses by OpenCV](https://opencv.org/courses/) is a great place to start.
+
+[![img](https://learnopencv.com/wp-content/uploads/2023/01/AI-Courses-By-OpenCV-Github.png)](https://opencv.org/courses/)
@@ -0,0 +1,56 @@
+"""
+Script to compare time for fine-tuned Whisper models.
+"""
+
+import torch
+import time
+import os
+
+from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
+
+model_dirs = [
+    'whisper_tiny_atco2_v2/best_model',
+    'whisper_base_atco2/best_model',
+    'whisper_small_atco2/best_model'
+]
+
+input_dir = 'inference_data'
+
+device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
+torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+
+for model_id in model_dirs:
+    print(f"\nEvaluating model: {model_id}")
+
+    model = AutoModelForSpeechSeq2Seq.from_pretrained(
+        model_id, torch_dtype=torch_dtype,
+        low_cpu_mem_usage=True,
+        use_safetensors=True
+    )
+    model.to(device)
+
+    processor = AutoProcessor.from_pretrained(model_id)
+
+    pipe = pipeline(
+        'automatic-speech-recognition',
+        model=model,
+        tokenizer=processor.tokenizer,
+        feature_extractor=processor.feature_extractor,
+        torch_dtype=torch_dtype,
+        device=device
+    )
+
+    total_time = 0
+    num_runs = 0
+
+    for _ in range(10):
+        for filename in os.listdir(input_dir):
+            if filename.endswith('.wav'):
+                start_time = time.time()
+                result = pipe(os.path.join(input_dir, filename))
+                end_time = time.time()
+                total_time += (end_time - start_time)
+                num_runs += 1
+
+    average_time = total_time / num_runs
+    print(f"\nAverage time taken for {model_id}: {average_time} seconds")