Skip to content

Commit 681e60e

Browse files
[AUTOGENERATED] [rocm7.1_internal_testing] [ROCm] OffsetCalc Unroll Optimization (#2600)
Cherry-pick of #2597 Co-authored-by: Jerry Mannil <[email protected]>
1 parent 272d56b commit 681e60e

File tree

1 file changed

+18
-0
lines changed

1 file changed

+18
-0
lines changed

aten/src/ATen/cuda/detail/OffsetCalculator.cuh

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,24 @@ struct OffsetCalculator {
4545

4646
C10_HOST_DEVICE offset_type get(index_t linear_idx) const {
4747
offset_type offsets;
48+
49+
#if defined(USE_ROCM)
50+
if ((dims > 0) && (dims <= 2)) {
51+
auto divmod = sizes_[0].divmod(linear_idx);
52+
#pragma unroll
53+
for (int arg = 0; arg < NARGS; arg++)
54+
offsets[arg] = divmod.mod * strides_[0][arg];
55+
if (dims >= 2) {
56+
divmod = sizes_[1].divmod(divmod.div);
57+
#pragma unroll
58+
for (int arg = 0; arg < NARGS; arg++)
59+
offsets[arg] += divmod.mod * strides_[1][arg];
60+
}
61+
// [...]
62+
return offsets;
63+
}
64+
#endif
65+
4866
#pragma unroll
4967
for (int arg = 0; arg < NARGS; arg++) {
5068
offsets[arg] = 0;

0 commit comments

Comments
 (0)