We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 8a42373 commit 4464dfdCopy full SHA for 4464dfd
accelerated_scan/warp.cuh
@@ -391,8 +391,8 @@ __global__ void scan_grad(
391
const weight_t kEmptyToken = 0.0;
392
393
// Limits for loading shifted tuples.
394
- const int minIdx = blockDim.x * kNChunksPerSequence * blockIdx.x;
395
- const int maxIdx = blockDim.x * kNChunksPerSequence * (blockIdx.x + 1);
+ const int minIdx = seqoffset / Tuple::Size;
+ const int maxIdx = minIdx + blockDim.x * kNChunksPerSequence;
396
397
for (int chunk = 0; chunk < kNChunksPerSequence; chunk++) {
398
const int offset = seqoffset + (kNChunksPerSequence - 1 - chunk) * chunklen;
0 commit comments