Fix torchscript related test failures. Fix flake8.

pearu · pearu · commit 990e8be9caf2 · 2025-08-29T11:35:01.000+03:00
diff --git a/.github/scripts/unittest-linux/run_test.sh b/.github/scripts/unittest-linux/run_test.sh
@@ -34,5 +34,5 @@ fi
     export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MOD_inflect=true
     export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MOD_pytorch_lightning=true
     cd test
-    pytest torchaudio_unittest -k "not backend and not /io/ and not prototype and not ffmpeg and not fairseq and not hdemucs and not (torchscript and rnnt) and not torchscript_consistency"
+    pytest torchaudio_unittest -k "not backend and not /io/ and not prototype and not ffmpeg and not fairseq and not hdemucs"
 )
diff --git a/src/torchaudio/functional/filtering.py b/src/torchaudio/functional/filtering.py
@@ -946,7 +946,8 @@ def forward(ctx, waveform, b_coeffs):
         b_coeff_flipped = b_coeffs.flip(1).contiguous()
         padded_waveform = F.pad(waveform, (n_order - 1, 0))
         output = F.conv1d(padded_waveform, b_coeff_flipped.unsqueeze(1), groups=n_channel)
-        ctx.save_for_backward(waveform, b_coeffs, output)
+        if not torch.jit.is_scripting():
+            ctx.save_for_backward(waveform, b_coeffs, output)
         return output
 
     @staticmethod
@@ -956,32 +957,41 @@ def backward(ctx, dy):
         n_channel = x.size(1)
         n_order = b_coeffs.size(1)
         db = F.conv1d(
-                F.pad(x, (n_order - 1, 0)).view(1, n_batch * n_channel, -1),
-                dy.view(n_batch * n_channel, 1, -1),
-                groups=n_batch * n_channel
-            ).view(
-                n_batch, n_channel, -1
-            ).sum(0).flip(1) if b_coeffs.requires_grad else None
+            F.pad(x, (n_order - 1, 0)).view(1, n_batch * n_channel, -1),
+            dy.view(n_batch * n_channel, 1, -1),
+            groups=n_batch * n_channel
+        ).view(
+            n_batch, n_channel, -1
+        ).sum(0).flip(1) if b_coeffs.requires_grad else None
         dx = F.conv1d(
-                F.pad(dy, (0, n_order - 1)),
-                b_coeffs.unsqueeze(1),
-                groups=n_channel
-            ) if x.requires_grad else None
+            F.pad(dy, (0, n_order - 1)),
+            b_coeffs.unsqueeze(1),
+            groups=n_channel
+        ) if x.requires_grad else None
         return (dx, db)
 
+    @staticmethod
+    def ts_apply(waveform, b_coeffs):
+        if torch.jit.is_scripting():
+            return DifferentiableFIR.forward(torch.empty(0), waveform, b_coeffs)
+        else:
+            return DifferentiableFIR.apply(waveform, b_coeffs)
+
+
 class DifferentiableIIR(torch.autograd.Function):
     @staticmethod
     def forward(ctx, waveform, a_coeffs_normalized):
         n_batch, n_channel, n_sample = waveform.shape
         n_order = a_coeffs_normalized.size(1)
         n_sample_padded = n_sample + n_order - 1
 
-        a_coeff_flipped = a_coeffs_normalized.flip(1).contiguous();
+        a_coeff_flipped = a_coeffs_normalized.flip(1).contiguous()
         padded_output_waveform = torch.zeros(n_batch, n_channel, n_sample_padded,
-            device=waveform.device, dtype=waveform.dtype)
+                                             device=waveform.device, dtype=waveform.dtype)
         _lfilter_core_loop(waveform, a_coeff_flipped, padded_output_waveform)
-        output = padded_output_waveform[:,:,n_order - 1:]
-        ctx.save_for_backward(waveform, a_coeffs_normalized, output)
+        output = padded_output_waveform[:, :, n_order - 1:]
+        if not torch.jit.is_scripting():
+            ctx.save_for_backward(waveform, a_coeffs_normalized, output)
         return output
 
     @staticmethod
@@ -992,15 +1002,23 @@ def backward(ctx, dy):
         tmp = DifferentiableIIR.apply(dy.flip(2).contiguous(), a_coeffs_normalized).flip(2)
         dx = tmp if x.requires_grad else None
         da = -(tmp.transpose(0, 1).reshape(n_channel, 1, -1) @
-                F.pad(y, (n_order - 1, 0)).unfold(2, n_order, 1).transpose(0,1)
-                .reshape(n_channel, -1, n_order)
-            ).squeeze(1).flip(1) if a_coeffs_normalized.requires_grad else None
+               F.pad(y, (n_order - 1, 0)).unfold(2, n_order, 1).transpose(0, 1)
+               .reshape(n_channel, -1, n_order)
+               ).squeeze(1).flip(1) if a_coeffs_normalized.requires_grad else None
         return (dx, da)
 
+    @staticmethod
+    def ts_apply(waveform, a_coeffs_normalized):
+        if torch.jit.is_scripting():
+            return DifferentiableIIR.forward(torch.empty(0), waveform, a_coeffs_normalized)
+        else:
+            return DifferentiableIIR.apply(waveform, a_coeffs_normalized)
+
+
 def _lfilter(waveform, a_coeffs, b_coeffs):
-    n_order = b_coeffs.size(1)
-    filtered_waveform = DifferentiableFIR.apply(waveform, b_coeffs / a_coeffs[:, 0:1])
-    return DifferentiableIIR.apply(filtered_waveform, a_coeffs / a_coeffs[:, 0:1])
+    filtered_waveform = DifferentiableFIR.ts_apply(waveform, b_coeffs / a_coeffs[:, 0:1])
+    return DifferentiableIIR.ts_apply(filtered_waveform, a_coeffs / a_coeffs[:, 0:1])
+
 
 def lfilter(waveform: Tensor, a_coeffs: Tensor, b_coeffs: Tensor, clamp: bool = True, batching: bool = True) -> Tensor:
     r"""Perform an IIR filter by evaluating difference equation, using differentiable implementation
@@ -1071,6 +1089,7 @@ def lfilter(waveform: Tensor, a_coeffs: Tensor, b_coeffs: Tensor, clamp: bool =
 
     return output
 
+
 def lowpass_biquad(waveform: Tensor, sample_rate: int, cutoff_freq: float, Q: float = 0.707) -> Tensor:
     r"""Design biquad lowpass filter and perform filtering.  Similar to SoX implementation.
 
diff --git a/src/torchaudio/functional/functional.py b/src/torchaudio/functional/functional.py
@@ -851,7 +851,8 @@ def mask_along_axis_iid(
 
     if axis not in [dim - 2, dim - 1]:
         raise ValueError(
-            f"Only Frequency and Time masking are supported (axis {dim-2} and axis {dim-1} supported; {axis} given)."
+            "Only Frequency and Time masking are supported"
+            f" (axis {dim - 2} and axis {dim - 1} supported; {axis} given)."
         )
 
     if not 0.0 <= p <= 1.0:
@@ -923,7 +924,8 @@ def mask_along_axis(
 
     if axis not in [dim - 2, dim - 1]:
         raise ValueError(
-            f"Only Frequency and Time masking are supported (axis {dim-2} and axis {dim-1} supported; {axis} given)."
+            "Only Frequency and Time masking are supported"
+            f" (axis {dim - 2} and axis {dim - 1} supported; {axis} given)."
         )
 
     if not 0.0 <= p <= 1.0:
@@ -1765,6 +1767,7 @@ def _fix_waveform_shape(
     waveform_shift = waveform_shift.view(shape[:-1] + waveform_shift.shape[-1:])
     return waveform_shift
 
+
 class RnntLoss(torch.autograd.Function):
     @staticmethod
     def forward(ctx, *args):
@@ -1776,9 +1779,39 @@ def forward(ctx, *args):
     def backward(ctx, dy):
         grad = ctx.saved_tensors[0]
         grad_out = dy.view((-1, 1, 1, 1))
-        result = grad * grad_out;
+        result = grad * grad_out
         return (result, None, None, None, None, None, None, None)
 
+    @staticmethod
+    def ts_apply(
+            logits,
+            targets,
+            logit_lengths,
+            target_lengths,
+            blank: int,
+            clamp: float,
+            fused_log_softmax: bool):
+        if torch.jit.is_scripting():
+            output, saved = torch.ops.torchaudio.rnnt_loss_forward(
+                logits,
+                targets,
+                logit_lengths,
+                target_lengths,
+                blank,
+                clamp,
+                fused_log_softmax)
+            return output
+        else:
+            return RnntLoss.apply(
+                logits,
+                targets,
+                logit_lengths,
+                target_lengths,
+                blank,
+                clamp,
+                fused_log_softmax)
+
+
 def _rnnt_loss(
     logits: Tensor,
     targets: Tensor,
@@ -1821,7 +1854,7 @@ def _rnnt_loss(
     if blank < 0:  # reinterpret blank index if blank < 0.
         blank = logits.shape[-1] + blank
 
-    costs = RnntLoss.apply(
+    costs = RnntLoss.ts_apply(
         logits,
         targets,
         logit_lengths,
@@ -1883,10 +1916,12 @@ def psd(
     psd = psd.sum(dim=-3)
     return psd
 
+
 # Expose both deprecated wrapper as well as original because torchscript breaks on
 # wrapped functions.
 rnnt_loss = dropping_support(_rnnt_loss)
 
+
 def _compute_mat_trace(input: torch.Tensor, dim1: int = -1, dim2: int = -2) -> torch.Tensor:
     r"""Compute the trace of a Tensor along ``dim1`` and ``dim2`` dimensions.
 
diff --git a/src/torchaudio/transforms/_transforms.py b/src/torchaudio/transforms/_transforms.py
@@ -1202,7 +1202,8 @@ def forward(self, specgram: Tensor, mask_value: Union[float, torch.Tensor] = 0.0
                 specgram, self.mask_param, mask_value, self.axis + specgram.dim() - 3, p=self.p
             )
         else:
-            return F.mask_along_axis(specgram, self.mask_param, mask_value, self.axis + specgram.dim() - 3, p=self.p)
+            mask_value_ = float(mask_value) if isinstance(mask_value, Tensor) else mask_value
+            return F.mask_along_axis(specgram, self.mask_param, mask_value_, self.axis + specgram.dim() - 3, p=self.p)
 
 
 class FrequencyMasking(_AxisMasking):

Original file line number	Diff line number	Diff line change
`@@ -34,5 +34,5 @@ fi`
`34`	`34`	`export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MOD_inflect=true`
`35`	`35`	`export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MOD_pytorch_lightning=true`
`36`	`36`	`cd test`
`37`		`- pytest torchaudio_unittest -k "not backend and not /io/ and not prototype and not ffmpeg and not fairseq and not hdemucs and not (torchscript and rnnt) and not torchscript_consistency"`
	`37`	`+ pytest torchaudio_unittest -k "not backend and not /io/ and not prototype and not ffmpeg and not fairseq and not hdemucs"`
`38`	`38`	`)`
Original file line number	Diff line number	Diff line change
`@@ -1202,7 +1202,8 @@ def forward(self, specgram: Tensor, mask_value: Union[float, torch.Tensor] = 0.0`
`1202`	`1202`	`specgram, self.mask_param, mask_value, self.axis + specgram.dim() - 3, p=self.p`
`1203`	`1203`	`)`
`1204`	`1204`	`else:`
`1205`		`- return F.mask_along_axis(specgram, self.mask_param, mask_value, self.axis + specgram.dim() - 3, p=self.p)`
	`1205`	`+ mask_value_ = float(mask_value) if isinstance(mask_value, Tensor) else mask_value`
	`1206`	`+ return F.mask_along_axis(specgram, self.mask_param, mask_value_, self.axis + specgram.dim() - 3, p=self.p)`
`1206`	`1207`
`1207`	`1208`
`1208`	`1209`	`class FrequencyMasking(_AxisMasking):`