Skip to content

Commit 0e4116a

Browse files
authored
[AMDGPU] Fix typing error in multi dimensional promote alloca (#131763)
Fix type error when GEP uses i64 index introduced in #127973.
1 parent b251c29 commit 0e4116a

File tree

2 files changed

+65
-12
lines changed

2 files changed

+65
-12
lines changed

llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -444,36 +444,39 @@ static Value *GEPToVectorIndex(GetElementPtrInst *GEP, AllocaInst *Alloca,
444444
if (VarOffsets.size() > 1)
445445
return nullptr;
446446

447-
APInt Quot;
447+
APInt IndexQuot;
448448
uint64_t Rem;
449-
APInt::udivrem(ConstOffset, VecElemSize, Quot, Rem);
449+
APInt::udivrem(ConstOffset, VecElemSize, IndexQuot, Rem);
450450
if (Rem != 0)
451451
return nullptr;
452-
453-
ConstantInt *ConstIndex = ConstantInt::get(GEP->getContext(), Quot);
454452
if (VarOffsets.size() == 0)
455-
return ConstIndex;
453+
return ConstantInt::get(GEP->getContext(), IndexQuot);
456454

457455
IRBuilder<> Builder(GEP);
458456

459457
const auto &VarOffset = VarOffsets.front();
460-
APInt::udivrem(VarOffset.second, VecElemSize, Quot, Rem);
461-
if (Rem != 0 || Quot.isZero())
458+
APInt OffsetQuot;
459+
APInt::udivrem(VarOffset.second, VecElemSize, OffsetQuot, Rem);
460+
if (Rem != 0 || OffsetQuot.isZero())
462461
return nullptr;
463462

464463
Value *Offset = VarOffset.first;
465-
if (!Quot.isOne()) {
466-
auto *OffsetType = dyn_cast<IntegerType>(Offset->getType());
467-
if (!OffsetType)
468-
return nullptr;
469-
ConstantInt *ConstMul = ConstantInt::get(OffsetType, Quot.getZExtValue());
464+
auto *OffsetType = dyn_cast<IntegerType>(Offset->getType());
465+
if (!OffsetType)
466+
return nullptr;
467+
468+
if (!OffsetQuot.isOne()) {
469+
ConstantInt *ConstMul =
470+
ConstantInt::get(OffsetType, OffsetQuot.getZExtValue());
470471
Offset = Builder.CreateMul(Offset, ConstMul);
471472
if (Instruction *NewInst = dyn_cast<Instruction>(Offset))
472473
NewInsts.push_back(NewInst);
473474
}
474475
if (ConstOffset.isZero())
475476
return Offset;
476477

478+
ConstantInt *ConstIndex =
479+
ConstantInt::get(OffsetType, IndexQuot.getZExtValue());
477480
Value *IndexAdd = Builder.CreateAdd(ConstIndex, Offset);
478481
if (Instruction *NewInst = dyn_cast<Instruction>(IndexAdd))
479482
NewInsts.push_back(NewInst);

llvm/test/CodeGen/AMDGPU/promote-alloca-multidim.ll

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,56 @@ define amdgpu_kernel void @i64_2d_load_store_subvec_3_i64_offset(ptr %out) {
294294
ret void
295295
}
296296

297+
define amdgpu_kernel void @i64_2d_load_store_subvec_3_i64_offset_index(ptr %out) {
298+
; CHECK-LABEL: define amdgpu_kernel void @i64_2d_load_store_subvec_3_i64_offset_index(
299+
; CHECK-SAME: ptr [[OUT:%.*]]) {
300+
; CHECK-NEXT: [[X:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x()
301+
; CHECK-NEXT: [[Y:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.y()
302+
; CHECK-NEXT: [[C1:%.*]] = icmp uge i32 [[X]], 3
303+
; CHECK-NEXT: [[C2:%.*]] = icmp uge i32 [[Y]], 3
304+
; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[C1]], i32 1, i32 2
305+
; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[C2]], i32 0, i32 [[SEL1]]
306+
; CHECK-NEXT: [[SEL3:%.*]] = zext i32 [[SEL2]] to i64
307+
; CHECK-NEXT: [[ALLOCA:%.*]] = freeze <6 x i64> poison
308+
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <6 x i64> [[ALLOCA]], i64 0, i32 0
309+
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <6 x i64> [[TMP11]], i64 1, i32 1
310+
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <6 x i64> [[TMP12]], i64 2, i32 2
311+
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <6 x i64> [[TMP13]], i64 3, i32 3
312+
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <6 x i64> [[TMP14]], i64 4, i32 4
313+
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <6 x i64> [[TMP15]], i64 5, i32 5
314+
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[SEL3]], 3
315+
; CHECK-NEXT: [[TMP2:%.*]] = add i64 6, [[TMP1]]
316+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <6 x i64> [[TMP16]], i64 [[TMP2]]
317+
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <3 x i64> poison, i64 [[TMP3]], i64 0
318+
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP2]], 1
319+
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <6 x i64> [[TMP16]], i64 [[TMP5]]
320+
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <3 x i64> [[TMP4]], i64 [[TMP6]], i64 1
321+
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[TMP2]], 2
322+
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <6 x i64> [[TMP16]], i64 [[TMP8]]
323+
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <3 x i64> [[TMP7]], i64 [[TMP9]], i64 2
324+
; CHECK-NEXT: [[ELEM:%.*]] = extractelement <3 x i64> [[TMP10]], i32 2
325+
; CHECK-NEXT: store i64 [[ELEM]], ptr [[OUT]], align 8
326+
; CHECK-NEXT: ret void
327+
;
328+
%x = tail call i32 @llvm.amdgcn.workitem.id.x()
329+
%y = tail call i32 @llvm.amdgcn.workitem.id.y()
330+
%c1 = icmp uge i32 %x, 3
331+
%c2 = icmp uge i32 %y, 3
332+
%sel1 = select i1 %c1, i32 1, i32 2
333+
%sel2 = select i1 %c2, i32 0, i32 %sel1
334+
%sel3 = zext i32 %sel2 to i64
335+
%alloca = alloca [2 x [3 x i64]], align 16, addrspace(5)
336+
%gep.00 = getelementptr inbounds [2 x [3 x i64]], ptr addrspace(5) %alloca, i32 0
337+
%gep.01 = getelementptr inbounds [2 x [3 x i64]], ptr addrspace(5) %alloca, i32 0, i32 1, i32 0
338+
store <3 x i64> <i64 0, i64 1, i64 2>, ptr addrspace(5) %gep.00
339+
store <3 x i64> <i64 3, i64 4, i64 5>, ptr addrspace(5) %gep.01
340+
%gep = getelementptr inbounds [2 x [3 x i64]], ptr addrspace(5) %alloca, i64 1, i64 %sel3
341+
%load = load <3 x i64>, ptr addrspace(5) %gep
342+
%elem = extractelement <3 x i64> %load, i32 2
343+
store i64 %elem, ptr %out
344+
ret void
345+
}
346+
297347
define amdgpu_kernel void @i64_2d_load_store_subvec_4(ptr %out) {
298348
; CHECK-LABEL: define amdgpu_kernel void @i64_2d_load_store_subvec_4(
299349
; CHECK-SAME: ptr [[OUT:%.*]]) {

0 commit comments

Comments
 (0)