Adding in whisper tiny export script in examples

BujSet · BujSet · commit 3c23ade3ad72 · 2025-07-21T17:41:22.000Z
diff --git a/examples/models/__init__.py b/examples/models/__init__.py
@@ -37,6 +37,7 @@ class Model(str, Enum):
     EfficientSam = "efficient_sam"
     Qwen25 = "qwen2_5"
     Phi4Mini = "phi_4_mini"
+    WhisperTiny = "whisper_tiny"
 
     def __str__(self) -> str:
         return self.value
@@ -82,6 +83,7 @@ def __str__(self) -> str:
     str(Model.EfficientSam): ("efficient_sam", "EfficientSAM"),
     str(Model.Qwen25): ("qwen2_5", "Qwen2_5Model"),
     str(Model.Phi4Mini): ("phi_4_mini", "Phi4MiniModel"),
+    str(Model.WhisperTiny): ("whisper_tiny", "WhisperTinyModel"),
 }
 
 __all__ = [
diff --git a/examples/models/whisper_tiny/__init__.py b/examples/models/whisper_tiny/__init__.py
@@ -0,0 +1,11 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .model import WhisperTinyModel
+
+__all__ = [
+    "WhisperTinyModel",
+]
diff --git a/examples/models/whisper_tiny/model.py b/examples/models/whisper_tiny/model.py
@@ -0,0 +1,39 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+
+import torch
+
+from transformers import AutoFeatureExtractor, WhisperModel # @manual
+from datasets import load_dataset
+
+from ..model_base import EagerModelBase
+
+
+class WhisperTinyModel(EagerModelBase):
+    def __init__(self):
+        pass
+
+    def get_eager_model(self) -> torch.nn.Module:
+        logging.info("Loading whipser-tiny model")
+        # pyre-ignore
+        model = WhisperModel.from_pretrained("openai/whisper-tiny", return_dict=False)
+        model.eval()
+        logging.info("Loaded whisper-tiny model")
+        return model
+
+    def get_example_inputs(self):
+        feature_extractor = AutoFeatureExtractor.from_pretrained("openai/whisper-tiny")
+        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+        inputs = feature_extractor(ds[0]["audio"]["array"], return_tensors="pt")
+        print(inputs)
+        print(inputs.input_features)
+        return (inputs.input_features,)
+        # Raw audio input: 1 second of 16kHz audio
+        #input_values = torch.randn(1, 16000)
+        #print(input_values)
+        #return (input_values,)
diff --git a/requirements-examples.txt b/requirements-examples.txt
@@ -5,3 +5,4 @@ timm == 1.0.7
 torchsr == 1.0.4
 torchtune >= 0.6.1
 transformers >= 4.53.1
+librosa >= 0.11.0