Merge branch 'main' into forced_align_accessors

samanklesaria · samanklesaria · commit 258ca0057f6e · 2025-08-06T19:04:01.000Z
diff --git a/src/libtorchaudio/forced_align/cpu/compute.cpp b/src/libtorchaudio/forced_align/cpu/compute.cpp
@@ -191,7 +191,7 @@ std::tuple<Tensor, Tensor> compute(
   auto paths = Tensor(paths_h);
 
 
-  if (targets.scalar_type() == aoti_torch_dtype_int64()) {
+  if (targets.dtype() == aoti_torch_dtype_int64()) {
     if (logProbs.scalar_type() == aoti_torch_dtype_float64()) {
       forced_align_impl<float64, int64>(logProbs, targets, blank, paths);
     } else if (logProbs.scalar_type() == aoti_torch_dtype_float32()) {
@@ -210,11 +210,8 @@ std::tuple<Tensor, Tensor> compute(
   }
   return std::make_tuple(
       paths,
-      logProbs.index(
-          {torch::indexing::Slice(),
-           torch::linspace(
-               0, T - 1, T, torch::TensorOptions().dtype(paths.dtype())),
-           paths.index({0})}));
+      logProbs
+      );
 }
 
 
diff --git a/src/libtorchaudio/forced_align/gpu/compute.cu b/src/libtorchaudio/forced_align/gpu/compute.cu
@@ -306,11 +306,7 @@ std::tuple<torch::Tensor, torch::Tensor> compute(
       });
   return std::make_tuple(
       paths.to(logProbs.device()),
-      logProbs.index(
-          {torch::indexing::Slice(),
-           torch::linspace(
-               0, T - 1, T, torch::TensorOptions().dtype(paths.dtype())),
-           paths.index({0})}));
+      logProbs);
 }
 
 TORCH_LIBRARY_IMPL(torchaudio, CUDA, m) {
diff --git a/src/libtorchaudio/lfilter.cpp b/src/libtorchaudio/lfilter.cpp
@@ -82,15 +82,9 @@ void lfilter_core_generic_loop(
   auto coeff = a_coeff_flipped.unsqueeze(2);
   for (int64_t i_sample = 0; i_sample < n_samples_input; i_sample++) {
     auto windowed_output_signal =
-        padded_output_waveform
-            .index(
-                {torch::indexing::Slice(),
-                 torch::indexing::Slice(),
-                 torch::indexing::Slice(i_sample, i_sample + n_order)})
-            .transpose(0, 1);
+      torch::narrow(padded_output_waveform, 2, i_sample, i_sample + n_order).transpose(0, 1);
     auto o0 =
-        input_signal_windows.index(
-            {torch::indexing::Slice(), torch::indexing::Slice(), i_sample}) -
+        torch::select(input_signal_windows, 2, i_sample) -
         at::matmul(windowed_output_signal, coeff).squeeze(2).transpose(0, 1);
     padded_output_waveform.index_put_(
         {torch::indexing::Slice(),
diff --git a/src/torchaudio/functional/_alignment.py b/src/torchaudio/functional/_alignment.py
@@ -70,7 +70,7 @@ def forced_align(
     assert target_lengths is not None
 
     paths, scores = torch.ops.torchaudio.forced_align(log_probs, targets, input_lengths, target_lengths, blank)
-    return paths, scores
+    return paths, scores[:, torch.arange(scores.shape[1]), paths[0]]
 
 
 @dataclass