Port autograd code for rnnt (#3970)

samanklesaria · NicolasHug · web-flow · commit b2a6973c8527 · 2025-07-28T15:11:26.000+01:00
Co-authored-by: Sam Anklesaria &lt;sanklesaria@openteams.com&gt;
Co-authored-by: Nicolas Hug &lt;contact@nicolas-hug.com&gt;
Co-authored-by: Nicolas Hug &lt;nicolashug@meta.com&gt;
diff --git a/.github/scripts/unittest-linux/run_test.sh b/.github/scripts/unittest-linux/run_test.sh
@@ -30,5 +30,5 @@ fi
 
 (
     cd test
-    pytest torchaudio_unittest -k "not backend and not /io/ and not prototype and not sox and not ffmpeg and not fairseq and not hdemucs"
+    pytest torchaudio_unittest -k "not backend and not /io/ and not prototype and not sox and not ffmpeg and not fairseq and not hdemucs and not (torchscript and rnnt)"
 )
diff --git a/.github/workflows/unittest-linux-gpu.yml b/.github/workflows/unittest-linux-gpu.yml
@@ -117,7 +117,7 @@ jobs:
             '--cov=torchaudio'
             "--junitxml=${RUNNER_TEST_RESULTS_DIR}/junit.xml"
             '--durations' '100'
-            '-k' 'cuda or gpu'
+            '-k' '(cuda or gpu) and not (torchscript and rnnt)'
         )
 
         cd test
diff --git a/src/libtorchaudio/CMakeLists.txt b/src/libtorchaudio/CMakeLists.txt
@@ -28,7 +28,6 @@ if(BUILD_RNNT)
     rnnt/compute_alphas.cpp
     rnnt/compute_betas.cpp
     rnnt/compute.cpp
-    rnnt/autograd.cpp
     )
   if (USE_CUDA)
     list(
diff --git a/src/libtorchaudio/rnnt/autograd.cpp b/src/libtorchaudio/rnnt/autograd.cpp
diff --git a/src/libtorchaudio/rnnt/compute.cpp b/src/libtorchaudio/rnnt/compute.cpp
@@ -30,4 +30,5 @@ TORCH_LIBRARY_FRAGMENT(torchaudio, m) {
       "int blank,"
       "float clamp,"
       "bool fused_log_softmax) -> (Tensor, Tensor?)");
+  m.def("torchaudio::rnnt_loss_forward", &rnnt_loss);
 }
diff --git a/src/torchaudio/functional/functional.py b/src/torchaudio/functional/functional.py
@@ -1760,6 +1760,19 @@ def _fix_waveform_shape(
     waveform_shift = waveform_shift.view(shape[:-1] + waveform_shift.shape[-1:])
     return waveform_shift
 
+class RnntLoss(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, *args):
+        output, saved = torch.ops.torchaudio.rnnt_loss_forward(*args)
+        ctx.save_for_backward(saved)
+        return output
+
+    @staticmethod
+    def backward(ctx, dy):
+        grad = ctx.saved_tensors[0]
+        grad_out = dy.view((-1, 1, 1, 1))
+        result = grad * grad_out;
+        return (result, None, None, None, None, None, None, None)
 
 def _rnnt_loss(
     logits: Tensor,
@@ -1803,14 +1816,14 @@ def _rnnt_loss(
     if blank < 0:  # reinterpret blank index if blank < 0.
         blank = logits.shape[-1] + blank
 
-    costs, _ = torch.ops.torchaudio.rnnt_loss(
-        logits=logits,
-        targets=targets,
-        logit_lengths=logit_lengths,
-        target_lengths=target_lengths,
-        blank=blank,
-        clamp=clamp,
-        fused_log_softmax=fused_log_softmax,
+    costs = RnntLoss.apply(
+        logits,
+        targets,
+        logit_lengths,
+        target_lengths,
+        blank,
+        clamp,
+        fused_log_softmax
     )
 
     if reduction == "mean":

Original file line number	Diff line number	Diff line change
`@@ -30,5 +30,5 @@ fi`
`30`	`30`
`31`	`31`	`(`
`32`	`32`	`cd test`
`33`		`- pytest torchaudio_unittest -k "not backend and not /io/ and not prototype and not sox and not ffmpeg and not fairseq and not hdemucs"`
	`33`	`+ pytest torchaudio_unittest -k "not backend and not /io/ and not prototype and not sox and not ffmpeg and not fairseq and not hdemucs and not (torchscript and rnnt)"`
`34`	`34`	`)`
Original file line number	Diff line number	Diff line change
`@@ -117,7 +117,7 @@ jobs:`
`117`	`117`	`'--cov=torchaudio'`
`118`	`118`	`"--junitxml=${RUNNER_TEST_RESULTS_DIR}/junit.xml"`
`119`	`119`	`'--durations' '100'`
`120`		`- '-k' 'cuda or gpu'`
	`120`	`+ '-k' '(cuda or gpu) and not (torchscript and rnnt)'`
`121`	`121`	`)`
`122`	`122`
`123`	`123`	`cd test`
Original file line number	Diff line number	Diff line change
`@@ -28,7 +28,6 @@ if(BUILD_RNNT)`
`28`	`28`	`rnnt/compute_alphas.cpp`
`29`	`29`	`rnnt/compute_betas.cpp`
`30`	`30`	`rnnt/compute.cpp`
`31`		`- rnnt/autograd.cpp`
`32`	`31`	`)`
`33`	`32`	`if (USE_CUDA)`
`34`	`33`	`list(`
Original file line number	Diff line number	Diff line change
`@@ -30,4 +30,5 @@ TORCH_LIBRARY_FRAGMENT(torchaudio, m) {`
`30`	`30`	`"int blank,"`
`31`	`31`	`"float clamp,"`
`32`	`32`	`"bool fused_log_softmax) -> (Tensor, Tensor?)");`
	`33`	`+ m.def("torchaudio::rnnt_loss_forward", &rnnt_loss);`
`33`	`34`	`}`