Skip to content

Commit de86696

Browse files
committed
Revert "AMDGPU/PromoteAlloca: Always use i32 for indexing (#170511)"
This reverts commit f558c30. Failure on clang-hip-vega20: https://lab.llvm.org/buildbot/#/builders/123/builds/31779
1 parent d8e52c0 commit de86696

File tree

4 files changed

+24
-31
lines changed

4 files changed

+24
-31
lines changed

llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -461,23 +461,22 @@ static Value *GEPToVectorIndex(GetElementPtrInst *GEP, AllocaInst *Alloca,
461461
return nullptr;
462462

463463
Value *Offset = VarOffset.first;
464-
if (!isa<IntegerType>(Offset->getType()))
464+
auto *OffsetType = dyn_cast<IntegerType>(Offset->getType());
465+
if (!OffsetType)
465466
return nullptr;
466467

467-
Offset = Builder.CreateSExtOrTrunc(Offset, Builder.getIntNTy(BW));
468-
if (Offset != VarOffset.first)
469-
NewInsts.push_back(cast<Instruction>(Offset));
470-
471468
if (!OffsetQuot.isOne()) {
472-
ConstantInt *ConstMul = ConstantInt::get(Ctx, OffsetQuot.sextOrTrunc(BW));
469+
ConstantInt *ConstMul =
470+
ConstantInt::get(Ctx, OffsetQuot.sext(OffsetType->getBitWidth()));
473471
Offset = Builder.CreateMul(Offset, ConstMul);
474472
if (Instruction *NewInst = dyn_cast<Instruction>(Offset))
475473
NewInsts.push_back(NewInst);
476474
}
477475
if (ConstOffset.isZero())
478476
return Offset;
479477

480-
ConstantInt *ConstIndex = ConstantInt::get(Ctx, IndexQuot.sextOrTrunc(BW));
478+
ConstantInt *ConstIndex =
479+
ConstantInt::get(Ctx, IndexQuot.sext(OffsetType->getBitWidth()));
481480
Value *IndexAdd = Builder.CreateAdd(Offset, ConstIndex);
482481
if (Instruction *NewInst = dyn_cast<Instruction>(IndexAdd))
483482
NewInsts.push_back(NewInst);

llvm/test/CodeGen/AMDGPU/promote-alloca-multidim.ll

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -262,15 +262,14 @@ define amdgpu_kernel void @i64_2d_load_store_subvec_3_i64_offset(ptr %out) {
262262
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <6 x i64> [[TMP12]], i64 3, i32 3
263263
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <6 x i64> [[TMP13]], i64 4, i32 4
264264
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <6 x i64> [[TMP14]], i64 5, i32 5
265-
; CHECK-NEXT: [[TMP7:%.*]] = trunc i64 [[SEL3]] to i32
266-
; CHECK-NEXT: [[TMP16:%.*]] = mul i32 [[TMP7]], 3
267-
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <6 x i64> [[TMP15]], i32 [[TMP16]]
265+
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[SEL3]], 3
266+
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <6 x i64> [[TMP15]], i64 [[TMP1]]
268267
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <3 x i64> poison, i64 [[TMP2]], i64 0
269-
; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[TMP16]], 1
270-
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <6 x i64> [[TMP15]], i32 [[TMP17]]
268+
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP1]], 1
269+
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <6 x i64> [[TMP15]], i64 [[TMP4]]
271270
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <3 x i64> [[TMP3]], i64 [[TMP5]], i64 1
272-
; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP16]], 2
273-
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <6 x i64> [[TMP15]], i32 [[TMP18]]
271+
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP1]], 2
272+
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <6 x i64> [[TMP15]], i64 [[TMP7]]
274273
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <3 x i64> [[TMP6]], i64 [[TMP8]], i64 2
275274
; CHECK-NEXT: [[ELEM:%.*]] = extractelement <3 x i64> [[TMP9]], i32 2
276275
; CHECK-NEXT: store i64 [[ELEM]], ptr [[OUT]], align 8
@@ -312,16 +311,15 @@ define amdgpu_kernel void @i64_2d_load_store_subvec_3_i64_offset_index(ptr %out)
312311
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <6 x i64> [[TMP13]], i64 3, i32 3
313312
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <6 x i64> [[TMP14]], i64 4, i32 4
314313
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <6 x i64> [[TMP15]], i64 5, i32 5
315-
; CHECK-NEXT: [[TMP17:%.*]] = trunc i64 [[SEL3]] to i32
316-
; CHECK-NEXT: [[TMP8:%.*]] = mul i32 [[TMP17]], 3
317-
; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP8]], 6
318-
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <6 x i64> [[TMP16]], i32 [[TMP18]]
314+
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[SEL3]], 3
315+
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], 6
316+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <6 x i64> [[TMP16]], i64 [[TMP2]]
319317
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <3 x i64> poison, i64 [[TMP3]], i64 0
320-
; CHECK-NEXT: [[TMP19:%.*]] = add i32 [[TMP18]], 1
321-
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <6 x i64> [[TMP16]], i32 [[TMP19]]
318+
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP2]], 1
319+
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <6 x i64> [[TMP16]], i64 [[TMP5]]
322320
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <3 x i64> [[TMP4]], i64 [[TMP6]], i64 1
323-
; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TMP18]], 2
324-
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <6 x i64> [[TMP16]], i32 [[TMP20]]
321+
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[TMP2]], 2
322+
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <6 x i64> [[TMP16]], i64 [[TMP8]]
325323
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <3 x i64> [[TMP7]], i64 [[TMP9]], i64 2
326324
; CHECK-NEXT: [[ELEM:%.*]] = extractelement <3 x i64> [[TMP10]], i32 2
327325
; CHECK-NEXT: store i64 [[ELEM]], ptr [[OUT]], align 8

llvm/test/CodeGen/AMDGPU/promote-alloca-negative-index.ll

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,8 @@ define amdgpu_kernel void @negative_index_byte(ptr %out, i64 %offset) {
1111
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i8> [[TMP1]], i8 1, i32 1
1212
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> [[TMP2]], i8 2, i32 2
1313
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i8> [[TMP3]], i8 3, i32 3
14-
; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[OFFSET:%.*]] to i32
15-
; CHECK-NEXT: [[TMP8:%.*]] = trunc i64 [[OFFSET]] to i32
16-
; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[TMP8]], -1
17-
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i8> [[TMP4]], i32 [[TMP7]]
14+
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET:%.*]], -1
15+
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i8> [[TMP4]], i64 [[TMP5]]
1816
; CHECK-NEXT: store i8 [[TMP6]], ptr [[OUT:%.*]], align 1
1917
; CHECK-NEXT: ret void
2018
;
@@ -41,10 +39,8 @@ define amdgpu_kernel void @negative_index_word(ptr %out, i64 %offset) {
4139
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 1, i32 1
4240
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 2, i32 2
4341
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 3, i32 3
44-
; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[OFFSET:%.*]] to i32
45-
; CHECK-NEXT: [[TMP8:%.*]] = trunc i64 [[OFFSET]] to i32
46-
; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[TMP8]], -1
47-
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 [[TMP7]]
42+
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET:%.*]], -1
43+
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i64 [[TMP5]]
4844
; CHECK-NEXT: store i32 [[TMP6]], ptr [[OUT:%.*]], align 4
4945
; CHECK-NEXT: ret void
5046
;

llvm/test/CodeGen/AMDGPU/promote-alloca-vector-to-vector.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
22
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
3-
; RUN: opt -S -mtriple=amdgcn-- -mcpu=fiji -passes=sroa,amdgpu-promote-alloca < %s | FileCheck -check-prefix=OPT %s
3+
; RUN: opt -S -mtriple=amdgcn-- -data-layout=A5 -mcpu=fiji -passes=sroa,amdgpu-promote-alloca < %s | FileCheck -check-prefix=OPT %s
44

55
; GCN-LABEL: {{^}}float4_alloca_store4:
66
; OPT-LABEL: define amdgpu_kernel void @float4_alloca_store4

0 commit comments

Comments
 (0)