1
1
#include < torch/script.h>
2
2
#include < torch/torch.h>
3
+ #include < torch/csrc/stable/library.h>
4
+ #include < torch/csrc/stable/tensor.h>
5
+ #include < torch/csrc/stable/ops.h>
6
+ #include < torch/csrc/inductor/aoti_torch/c/shim.h>
3
7
4
8
using namespace std ;
5
9
@@ -22,17 +26,17 @@ void forced_align_impl(
22
26
const auto T = logProbs.size (1 );
23
27
const auto L = targets.size (1 );
24
28
const auto S = 2 * L + 1 ;
25
- torch::Tensor alphas = torch::empty (
26
- {2 , S},
27
- torch::TensorOptions ()
28
- .device (logProbs.device ())
29
- .dtype (logProbs.dtype ()))
30
- .fill_ (kNegInfinity );
29
+
30
+ auto alphas_a = new scalar_t [S][2 ];
31
+ for (int i = 0 ; i < S; i++) {
32
+ alphas_a[i][0 ] = kNegInfinity ;
33
+ alphas_a[i][1 ] = kNegInfinity ;
34
+ }
35
+
31
36
torch::Tensor backPtr = torch::empty ({T, S}, torch::kInt8 ).fill_ (-1 );
32
37
auto logProbs_a = logProbs.accessor <scalar_t , 3 >();
33
38
auto targets_a = targets.accessor <target_t , 2 >();
34
39
auto paths_a = paths.accessor <target_t , 2 >();
35
- auto alphas_a = alphas.accessor <scalar_t , 2 >();
36
40
auto backPtr_a = backPtr.accessor <int8_t , 2 >();
37
41
auto R = 0 ;
38
42
for (auto i = 1 ; i < L; i++) {
@@ -52,7 +56,7 @@ void forced_align_impl(
52
56
auto end = (S == 1 ) ? 1 : 2 ;
53
57
for (auto i = start; i < end; i++) {
54
58
auto labelIdx = (i % 2 == 0 ) ? blank : targets_a[batchIndex][i / 2 ];
55
- alphas_a[0 ][i ] = logProbs_a[batchIndex][0 ][labelIdx];
59
+ alphas_a[i][ 0 ] = logProbs_a[batchIndex][0 ][labelIdx];
56
60
}
57
61
for (auto t = 1 ; t < T; t++) {
58
62
if (T - t <= L + R) {
@@ -75,18 +79,18 @@ void forced_align_impl(
75
79
auto curIdxOffset = t % 2 ;
76
80
auto prevIdxOffset = (t - 1 ) % 2 ;
77
81
for (auto j = 0 ; j < S; ++j) {
78
- alphas_a[curIdxOffset][j ] = -std::numeric_limits<scalar_t >::infinity ();
82
+ alphas_a[j][curIdxOffset ] = -std::numeric_limits<scalar_t >::infinity ();
79
83
}
80
84
if (start == 0 ) {
81
- alphas_a[curIdxOffset][ 0 ] =
82
- alphas_a[prevIdxOffset][ 0 ] + logProbs_a[batchIndex][t][blank];
85
+ alphas_a[0 ][curIdxOffset ] =
86
+ alphas_a[0 ][prevIdxOffset ] + logProbs_a[batchIndex][t][blank];
83
87
backPtr_a[t][0 ] = 0 ;
84
88
startloop += 1 ;
85
89
}
86
90
87
91
for (auto i = startloop; i < end; i++) {
88
- auto x0 = alphas_a[prevIdxOffset][i ];
89
- auto x1 = alphas_a[prevIdxOffset][ i - 1 ];
92
+ auto x0 = alphas_a[i][prevIdxOffset ];
93
+ auto x1 = alphas_a[i - 1 ][prevIdxOffset ];
90
94
auto x2 = -std::numeric_limits<scalar_t >::infinity ();
91
95
92
96
auto labelIdx = (i % 2 == 0 ) ? blank : targets_a[batchIndex][i / 2 ];
@@ -97,7 +101,7 @@ void forced_align_impl(
97
101
// (i != 1) just ensures we don't access targets[i - 2] if its i < 2
98
102
if (i % 2 != 0 && i != 1 &&
99
103
targets_a[batchIndex][i / 2 ] != targets_a[batchIndex][i / 2 - 1 ]) {
100
- x2 = alphas_a[prevIdxOffset][ i - 2 ];
104
+ x2 = alphas_a[i - 2 ][prevIdxOffset ];
101
105
}
102
106
scalar_t result = 0.0 ;
103
107
if (x2 > x1 && x2 > x0) {
@@ -110,11 +114,11 @@ void forced_align_impl(
110
114
result = x0;
111
115
backPtr_a[t][i] = 0 ;
112
116
}
113
- alphas_a[curIdxOffset][i ] = result + logProbs_a[batchIndex][t][labelIdx];
117
+ alphas_a[i][curIdxOffset ] = result + logProbs_a[batchIndex][t][labelIdx];
114
118
}
115
119
}
116
120
auto idx1 = (T - 1 ) % 2 ;
117
- auto ltrIdx = alphas_a[idx1][ S - 1 ] > alphas_a[idx1][ S - 2 ] ? S - 1 : S - 2 ;
121
+ auto ltrIdx = alphas_a[S - 1 ][idx1] > alphas_a[S - 2 ][idx1 ] ? S - 1 : S - 2 ;
118
122
// path stores the token index for each time step after force alignment.
119
123
for (auto t = T - 1 ; t > -1 ; t--) {
120
124
auto lbl_idx = ltrIdx % 2 == 0 ? blank : targets_a[batchIndex][ltrIdx / 2 ];
0 commit comments