You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
u32fftMiddleIn_i=line / WIDTH; // The i in fftMiddleIn's u[i]
147
147
in+=fftMiddleIn_i*IN_WG; // Adjust in pointer the same way writeMiddleInLine did
148
148
149
-
// Adjust in pointer based on the y value used in writeMiddleInLine
149
+
// Adjust in pointer based on the y value used in writeMiddleInLine. This code is a little obscure as rocm compiler has trouble optimizing commented out code.
150
150
in+=me % SIZEY; // Adjust in pointer to read SIZEY consecutive values
151
+
u32fftMiddleIn_y=me; // The i=0 fftMiddleIn y value
152
+
u32chunk_y=fftMiddleIn_y / SIZEY; // The i=0 fftMiddleIn chunk_y value
153
+
u32fftMiddleIn_y_incr=G_H; // The increment to next fftMiddleIn y value
154
+
u32chunk_y_incr=fftMiddleIn_y_incr / SIZEY; // The increment to next fftMiddleIn chunk_y value
151
155
for (i32i=0; i<NH; ++i) {
152
-
u32fftMiddleIn_y=i*G_H+me; // The fftMiddleIn y value
153
-
u32chunk_y=fftMiddleIn_y / SIZEY; // The fftMiddleIn chunk_y value
156
+
// u32 fftMiddleIn_y = i * G_H + me; // The fftMiddleIn y value
157
+
// u32 chunk_y = fftMiddleIn_y / SIZEY; // The fftMiddleIn chunk_y value
154
158
u[i] =NTLOAD(in[chunk_y* (MIDDLE*IN_WG+PAD_SIZE)]); // Adjust in pointer the same way writeMiddleInLine did
u32fftMiddleIn_i=line / WIDTH; // The i in fftMiddleIn's u[i]
168
173
in+=fftMiddleIn_i*IN_WG; // Adjust in pointer the same way writeMiddleInLine did
169
174
170
-
// Adjust in pointer based on the y value used in writeMiddleInLine
175
+
// Adjust in pointer based on the y value used in writeMiddleInLine. This code is a little obscure as rocm compiler has trouble optimizing commented out code.
171
176
in+=me % SIZEY; // Adjust in pointer to read SIZEY consecutive values
177
+
u32fftMiddleIn_y=me; // The i=0 fftMiddleIn y value
178
+
u32chunk_y=fftMiddleIn_y / SIZEY; // The i=0 fftMiddleIn chunk_y value
179
+
u32fftMiddleIn_y_incr=G_H; // The increment to next fftMiddleIn y value
180
+
u32chunk_y_incr=fftMiddleIn_y_incr / SIZEY; // The increment to next fftMiddleIn chunk_y value
172
181
for (i32i=0; i<NH; ++i) {
173
182
u32fftMiddleIn_y=i*G_H+me; // The fftMiddleIn y value
174
183
u32chunk_y=fftMiddleIn_y / SIZEY; // The fftMiddleIn chunk_y value
175
184
u[i] =NTLOAD(in[chunk_y* (MIDDLE*IN_WG)]); // Adjust in pointer the same way writeMiddleInLine did
0 commit comments