[Backend Tester] Add TorchAudio tests

GregoryComer · GregoryComer · commit 870c3ce4482c · 2025-07-20T18:36:56.000-07:00
ghstack-source-id: 01e7a75 ghstack-comment-id: 3095005640 Pull-Request: #12666
diff --git a/backends/test/suite/models/__init__.py b/backends/test/suite/models/__init__.py
@@ -67,8 +67,9 @@ def _expand_test(cls, test_name: str) -> None:
     test_func = getattr(cls, test_name)
     supports_dynamic_shapes = getattr(test_func, "supports_dynamic_shapes", True)
     dynamic_shape_values = [True, False] if supports_dynamic_shapes else [False]
+    dtypes = getattr(test_func, "dtypes", DTYPES)
 
-    for flow, dtype, use_dynamic_shapes in itertools.product(get_test_flows(), DTYPES, dynamic_shape_values):
+    for flow, dtype, use_dynamic_shapes in itertools.product(get_test_flows(), dtypes, dynamic_shape_values):
         _create_test(cls, test_func, flow, dtype, use_dynamic_shapes)
     delattr(cls, test_name)
 
@@ -81,10 +82,17 @@ def model_test_cls(cls) -> Callable | None:
     return cls
 
 
-def model_test_params(supports_dynamic_shapes: bool) -> Callable:
+def model_test_params(
+    supports_dynamic_shapes: bool = True,
+    dtypes: list[torch.dtype] | None = None,
+) -> Callable:
     """ Optional parameter decorator for model tests. Specifies test pararameters. Only valid with a class decorated by model_test_cls. """
     def inner_decorator(func: Callable) -> Callable:
         setattr(func, "supports_dynamic_shapes", supports_dynamic_shapes)
+        
+        if dtypes is not None:
+            setattr(func, "dtypes", dtypes)
+
         return func
     return inner_decorator
 
diff --git a/backends/test/suite/models/test_torchaudio.py b/backends/test/suite/models/test_torchaudio.py
@@ -0,0 +1,81 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# pyre-unsafe
+
+import torch
+import torchaudio
+import unittest
+
+from executorch.backends.test.suite.models import model_test_params, model_test_cls, run_model_test
+from torch.export import Dim
+from typing import Callable, Tuple
+
+#
+# This file contains model integration tests for supported torchaudio models.
+# 
+
+class PatchedConformer(torch.nn.Module):
+    """
+    A lightly modified version of the top-level Conformer module, such that it can be exported.
+    Instead of taking lengths and computing the padding mask, it takes the padding mask directly.
+    See https://github.com/pytorch/audio/blob/main/src/torchaudio/models/conformer.py#L215
+    """
+
+    def __init__(self, conformer):
+        super().__init__()
+        self.conformer = conformer
+
+    def forward(self, input: torch.Tensor, encoder_padding_mask: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
+        x = input.transpose(0, 1)
+        for layer in self.conformer.conformer_layers:
+            x = layer(x, encoder_padding_mask)
+        return x.transpose(0, 1)
+
+@model_test_cls
+class TorchAudio(unittest.TestCase):
+    @model_test_params(dtypes=[torch.float32], supports_dynamic_shapes=False)
+    def test_conformer(self, dtype: torch.dtype, use_dynamic_shapes: bool, tester_factory: Callable):
+        inner_model = torchaudio.models.Conformer(
+            input_dim=80,
+            num_heads=4,
+            ffn_dim=128,
+            num_layers=4,
+            depthwise_conv_kernel_size=31,
+        )
+        model = PatchedConformer(inner_model)
+        lengths = torch.randint(1, 400, (10,))
+        
+        encoder_padding_mask = torchaudio.models.conformer._lengths_to_padding_mask(lengths)
+        inputs = (
+            torch.rand(10, int(lengths.max()), 80),
+            encoder_padding_mask,
+        )
+        
+        run_model_test(model, inputs, dtype, None, tester_factory)
+
+    @model_test_params(dtypes=[torch.float32])
+    def test_wav2letter(self, dtype: torch.dtype, use_dynamic_shapes: bool, tester_factory: Callable):
+        model = torchaudio.models.Wav2Letter()
+        inputs = (torch.randn(1, 1, 1024, dtype=dtype),)
+        dynamic_shapes = {
+            "x": {
+                2: Dim("d", min=900, max=1024),
+            }
+        } if use_dynamic_shapes else None
+        run_model_test(model, inputs, dtype, dynamic_shapes, tester_factory)
+        
+    @unittest.skip("This model times out on all backends.")
+    def test_wavernn(self, dtype: torch.dtype, use_dynamic_shapes: bool, tester_factory: Callable):
+        model = torchaudio.models.WaveRNN(upsample_scales=[5,5,8], n_classes=512, hop_length=200).eval()
+
+        # See https://docs.pytorch.org/audio/stable/generated/torchaudio.models.WaveRNN.html#forward
+        inputs = (
+            torch.randn(1, 1, (64 - 5 + 1) * 200), # waveform
+            torch.randn(1, 1, 128, 64), # specgram
+        )
+
+        run_model_test(model, inputs, dtype, None, tester_factory)
diff --git a/backends/test/suite/runner.py b/backends/test/suite/runner.py
@@ -51,6 +51,8 @@ def build_result(
             result=result,
             error=error,
         )
+    
+    model.eval()
 
     # Ensure the model can run in eager mode.
     try:

Original file line number	Diff line number	Diff line change
`@@ -51,6 +51,8 @@ def build_result(`
`51`	`51`	`result=result,`
`52`	`52`	`error=error,`
`53`	`53`	`)`
	`54`	`+`
	`55`	`+ model.eval()`
`54`	`56`
`55`	`57`	`# Ensure the model can run in eager mode.`
`56`	`58`	`try:`