Port autograd code for rnnt

samanklesaria · samanklesaria · commit 77d44213eaed · 2025-07-11T16:01:53.000Z
diff --git a/src/libtorchaudio/rnnt/autograd.cpp b/src/libtorchaudio/rnnt/autograd.cpp
@@ -3,31 +3,7 @@
 namespace torchaudio {
 namespace rnnt {
 
-class RNNTLossFunction : public torch::autograd::Function<RNNTLossFunction> {
- public:
-  static torch::autograd::tensor_list forward(
-      torch::autograd::AutogradContext* ctx,
-      torch::Tensor& logits,
-      const torch::Tensor& targets,
-      const torch::Tensor& logit_lengths,
-      const torch::Tensor& target_lengths,
-      int64_t blank,
-      double clamp,
-      bool fused_log_softmax = true) {
-    torch::Tensor undef;
-    auto result = rnnt_loss(
-        logits,
-        targets,
-        logit_lengths,
-        target_lengths,
-        blank,
-        clamp,
-        fused_log_softmax);
-    auto costs = std::get<0>(result);
-    auto grads = std::get<1>(result).value_or(undef);
-    ctx->save_for_backward({grads});
-    return {costs, grads};
-  }
+
 
   static torch::autograd::tensor_list backward(
       torch::autograd::AutogradContext* ctx,
@@ -39,31 +15,10 @@ class RNNTLossFunction : public torch::autograd::Function<RNNTLossFunction> {
     torch::Tensor undef;
     return {result, undef, undef, undef, undef, undef, undef, undef};
   }
-};
-
-std::tuple<torch::Tensor, std::optional<torch::Tensor>> rnnt_loss_autograd(
-    torch::Tensor& logits,
-    const torch::Tensor& targets,
-    const torch::Tensor& logit_lengths,
-    const torch::Tensor& target_lengths,
-    int64_t blank,
-    double clamp,
-    bool fused_log_softmax = true) {
-  at::AutoDispatchBelowADInplaceOrView guard;
-  auto results = RNNTLossFunction::apply(
-      logits,
-      targets,
-      logit_lengths,
-      target_lengths,
-      blank,
-      clamp,
-      fused_log_softmax);
-  return std::make_tuple(results[0], results[1]);
 }
 
-TORCH_LIBRARY_IMPL(torchaudio, Autograd, m) {
-  m.impl("rnnt_loss", rnnt_loss_autograd);
+TORCH_LIBRARY_FRAGMENT(torchaudio, m) {
+  m.def("torchaudio::rnnt_loss_forward", &rnnt_loss);
 }
 
-} // namespace rnnt
 } // namespace torchaudio
diff --git a/src/torchaudio/functional/functional.py b/src/torchaudio/functional/functional.py
@@ -1760,6 +1760,21 @@ def _fix_waveform_shape(
     waveform_shift = waveform_shift.view(shape[:-1] + waveform_shift.shape[-1:])
     return waveform_shift
 
+class RnntLoss(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, *args):
+        output, saved = torch.ops.torchaudio.rnnt_loss_forward(*args)
+        ctx.save_for_backward(saved)
+        return output
+
+    @staticmethod
+    def backward(ctx, dy):
+        grad = ctx.saved_tensors[0]
+        grad_out = dy.view((-1, 1, 1, 1))
+        result = grad * grad_out;
+        return (result, None, None, None, None, None, None, None)
+
+torch.ops.torchaudio.rnnt_loss_forward
 
 def _rnnt_loss(
     logits: Tensor,
@@ -1803,7 +1818,7 @@ def _rnnt_loss(
     if blank < 0:  # reinterpret blank index if blank < 0.
         blank = logits.shape[-1] + blank
 
-    costs, _ = torch.ops.torchaudio.rnnt_loss(
+    costs = RnntLoss.apply(
         logits=logits,
         targets=targets,
         logit_lengths=logit_lengths,