@@ -31,8 +31,8 @@ func.func @global_load_to_rocdl_f32(%global : memref<128x72xf32, #gpu_global_add
3131 // CHECK: %[[GLOBAL_PTR:.*]] = llvm.getelementptr %[[GLOBAL_BASE]][%[[SRC_OFFSET]]]
3232 // CHECK: %[[LDS_BASE:.*]] = llvm.extractvalue %[[LDS_DESC]][1]
3333
34- // CHECK: %[[C72_1 :.*]] = llvm.mlir.constant(72 : index) : i64
35- // CHECK: %[[MUL_2:.*]] = llvm.mul %[[IC32]], %[[C72_1 ]] : i64
34+ // CHECK: %[[C64 :.*]] = llvm.mlir.constant(64 : index) : i64
35+ // CHECK: %[[MUL_2:.*]] = llvm.mul %[[IC32]], %[[C64 ]] : i64
3636 // CHECK: %[[DST_OFFSET:.*]] = llvm.add %[[MUL_2]], %[[IC0]] : i64
3737
3838 // CHECK: %[[LDS_PTR:.*]] = llvm.getelementptr %[[LDS_BASE]][%[[DST_OFFSET]]]
@@ -65,8 +65,8 @@ func.func @global_load_to_rocdl_i8(%global : memref<128x72xi8, #gpu_global_addrs
6565 // CHECK: %[[GLOBAL_PTR:.*]] = llvm.getelementptr %[[GLOBAL_BASE]][%[[SRC_OFFSET]]]
6666 // CHECK: %[[LDS_BASE:.*]] = llvm.extractvalue %[[LDS_DESC]][1]
6767
68- // CHECK: %[[C72_1 :.*]] = llvm.mlir.constant(72 : index) : i64
69- // CHECK: %[[MUL_2:.*]] = llvm.mul %[[IC32]], %[[C72_1 ]] : i64
68+ // CHECK: %[[C64 :.*]] = llvm.mlir.constant(64 : index) : i64
69+ // CHECK: %[[MUL_2:.*]] = llvm.mul %[[IC32]], %[[C64 ]] : i64
7070 // CHECK: %[[DST_OFFSET:.*]] = llvm.add %[[MUL_2]], %[[IC0]] : i64
7171
7272 // CHECK: %[[LDS_PTR:.*]] = llvm.getelementptr %[[LDS_BASE]][%[[DST_OFFSET]]]
@@ -103,8 +103,8 @@ func.func @global_load_to_rocdl_vec(%global : memref<128x72xi16, #gpu_global_add
103103 // CHECK: %[[GLOBAL_PTR:.*]] = llvm.getelementptr %[[GLOBAL_BASE]][%[[SRC_OFFSET]]]
104104 // CHECK: %[[LDS_BASE:.*]] = llvm.extractvalue %[[LDS_DESC]][1]
105105
106- // CHECK: %[[C72_1 :.*]] = llvm.mlir.constant(72 : index) : i64
107- // CHECK: %[[MUL_2:.*]] = llvm.mul %[[IC32]], %[[C72_1 ]] : i64
106+ // CHECK: %[[C128 :.*]] = llvm.mlir.constant(128 : index) : i64
107+ // CHECK: %[[MUL_2:.*]] = llvm.mul %[[IC32]], %[[C128 ]] : i64
108108 // CHECK: %[[DST_OFFSET:.*]] = llvm.add %[[MUL_2]], %[[IC0]] : i64
109109
110110 // CHECK: %[[LDS_PTR:.*]] = llvm.getelementptr %[[LDS_BASE]][%[[DST_OFFSET]]]
@@ -130,7 +130,9 @@ func.func @global_load_to_rocdl_dynamic_indices(%global : memref<512xi32, #gpu_g
130130 // CHECK: %[[GLOBAL_BASE:.*]] = llvm.extractvalue %[[GLOBAL_DESC]][1]
131131 // CHECK: %[[GLOBAL_PTR:.*]] = llvm.getelementptr %[[GLOBAL_BASE]][%[[SRCIDX_CAST]]]
132132 // CHECK: %[[LDS_BASE:.*]] = llvm.extractvalue %[[LDS_DESC]][1]
133- // CHECK: %[[LDS_PTR:.*]] = llvm.getelementptr %[[LDS_BASE]][%[[DSTIDX_CAST]]]
133+ // CHECK: %[[C64:.*]] = llvm.mlir.constant(64 : index) : i64
134+ // CHECK: %[[DSTIDX:.*]] = llvm.mul %[[DSTIDX_CAST]], %[[C64]] : i64
135+ // CHECK: %[[LDS_PTR:.*]] = llvm.getelementptr %[[LDS_BASE]][%[[DSTIDX]]]
134136 // CHECK: rocdl.load.to.lds %[[GLOBAL_PTR]], %[[LDS_PTR]], 4
135137 %alloc = memref.alloc () : memref <4 x64 xi32 , #gpu_lds_addrspace >
136138 %c0 = arith.constant 0 : index
@@ -166,8 +168,8 @@ func.func @fat_buffer_load_to_rocdl_f32(%global : memref<128x72xf32, #amdgpu_fat
166168 // CHECK: %[[GLOBAL_PTR:.*]] = llvm.getelementptr %[[GLOBAL_BASE]][%[[SRC_OFFSET]]]
167169 // CHECK: %[[LDS_BASE:.*]] = llvm.extractvalue %[[LDS_DESC]][1]
168170
169- // CHECK: %[[C72_1 :.*]] = llvm.mlir.constant(72 : index) : i64
170- // CHECK: %[[MUL_2:.*]] = llvm.mul %[[IC32]], %[[C72_1 ]] : i64
171+ // CHECK: %[[C64 :.*]] = llvm.mlir.constant(64 : index) : i64
172+ // CHECK: %[[MUL_2:.*]] = llvm.mul %[[IC32]], %[[C64 ]] : i64
171173 // CHECK: %[[DST_OFFSET:.*]] = llvm.add %[[MUL_2]], %[[IC0]] : i64
172174
173175 // CHECK: %[[LDS_PTR:.*]] = llvm.getelementptr %[[LDS_BASE]][%[[DST_OFFSET]]]
0 commit comments