Skip to content

Commit 5b81367

Browse files
authored
[AMDGPU] Generate canonical additions in AMDGPUPromoteAlloca (#157810)
When we know that one operand of an addition is a constant, we might was well put it on the right-hand side and avoid the work to canonicalize it in a later pass.
1 parent 93b9173 commit 5b81367

File tree

4 files changed

+10
-10
lines changed

4 files changed

+10
-10
lines changed

llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -478,7 +478,7 @@ static Value *GEPToVectorIndex(GetElementPtrInst *GEP, AllocaInst *Alloca,
478478

479479
ConstantInt *ConstIndex =
480480
ConstantInt::get(OffsetType, IndexQuot.getSExtValue());
481-
Value *IndexAdd = Builder.CreateAdd(ConstIndex, Offset);
481+
Value *IndexAdd = Builder.CreateAdd(Offset, ConstIndex);
482482
if (Instruction *NewInst = dyn_cast<Instruction>(IndexAdd))
483483
NewInsts.push_back(NewInst);
484484
return IndexAdd;

llvm/test/CodeGen/AMDGPU/promote-alloca-multidim.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -312,7 +312,7 @@ define amdgpu_kernel void @i64_2d_load_store_subvec_3_i64_offset_index(ptr %out)
312312
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <6 x i64> [[TMP14]], i64 4, i32 4
313313
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <6 x i64> [[TMP15]], i64 5, i32 5
314314
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[SEL3]], 3
315-
; CHECK-NEXT: [[TMP2:%.*]] = add i64 6, [[TMP1]]
315+
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], 6
316316
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <6 x i64> [[TMP16]], i64 [[TMP2]]
317317
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <3 x i64> poison, i64 [[TMP3]], i64 0
318318
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP2]], 1
@@ -464,7 +464,7 @@ define amdgpu_kernel void @i16_2d_load_store(ptr %out, i32 %sel) {
464464
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <6 x i16> [[TMP3]], i16 3, i32 3
465465
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <6 x i16> [[TMP4]], i16 4, i32 4
466466
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <6 x i16> [[TMP5]], i16 5, i32 5
467-
; CHECK-NEXT: [[TMP1:%.*]] = add i32 3, [[SEL]]
467+
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SEL]], 3
468468
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <6 x i16> [[TMP6]], i32 [[TMP1]]
469469
; CHECK-NEXT: store i16 [[TMP2]], ptr [[OUT]], align 2
470470
; CHECK-NEXT: ret void
@@ -498,7 +498,7 @@ define amdgpu_kernel void @float_2d_load_store(ptr %out, i32 %sel) {
498498
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <6 x float> [[TMP3]], float 3.000000e+00, i32 3
499499
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <6 x float> [[TMP4]], float 4.000000e+00, i32 4
500500
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <6 x float> [[TMP5]], float 5.000000e+00, i32 5
501-
; CHECK-NEXT: [[TMP1:%.*]] = add i32 3, [[SEL]]
501+
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SEL]], 3
502502
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <6 x float> [[TMP6]], i32 [[TMP1]]
503503
; CHECK-NEXT: store float [[TMP2]], ptr [[OUT]], align 4
504504
; CHECK-NEXT: ret void
@@ -538,7 +538,7 @@ define amdgpu_kernel void @ptr_2d_load_store(ptr %out, i32 %sel) {
538538
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <6 x ptr> [[TMP3]], ptr [[PTR_3]], i32 3
539539
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <6 x ptr> [[TMP4]], ptr [[PTR_4]], i32 4
540540
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <6 x ptr> [[TMP5]], ptr [[PTR_5]], i32 5
541-
; CHECK-NEXT: [[TMP7:%.*]] = add i32 3, [[SEL]]
541+
; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[SEL]], 3
542542
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <6 x ptr> [[TMP6]], i32 [[TMP7]]
543543
; CHECK-NEXT: store ptr [[TMP8]], ptr [[OUT]], align 8
544544
; CHECK-NEXT: ret void

llvm/test/CodeGen/AMDGPU/promote-alloca-negative-index.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ define amdgpu_kernel void @negative_index_byte(ptr %out, i64 %offset) {
1111
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i8> [[TMP1]], i8 1, i32 1
1212
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> [[TMP2]], i8 2, i32 2
1313
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i8> [[TMP3]], i8 3, i32 3
14-
; CHECK-NEXT: [[TMP5:%.*]] = add i64 -1, [[OFFSET:%.*]]
14+
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET:%.*]], -1
1515
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i8> [[TMP4]], i64 [[TMP5]]
1616
; CHECK-NEXT: store i8 [[TMP6]], ptr [[OUT:%.*]], align 1
1717
; CHECK-NEXT: ret void
@@ -39,7 +39,7 @@ define amdgpu_kernel void @negative_index_word(ptr %out, i64 %offset) {
3939
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 1, i32 1
4040
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 2, i32 2
4141
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 3, i32 3
42-
; CHECK-NEXT: [[TMP5:%.*]] = add i64 -1, [[OFFSET:%.*]]
42+
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET:%.*]], -1
4343
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i64 [[TMP5]]
4444
; CHECK-NEXT: store i32 [[TMP6]], ptr [[OUT:%.*]], align 4
4545
; CHECK-NEXT: ret void

llvm/test/CodeGen/AMDGPU/promote-alloca-vector-gep-of-gep.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ define amdgpu_ps void @scalar_alloca_ptr_with_vector_gep_of_gep(i32 %idx, ptr ad
1010
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP0]], 1
1111
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <20 x i32> [[TMP1]], i32 2, i32 [[TMP2]]
1212
; CHECK-NEXT: [[TMP4:%.*]] = mul i32 [[IDX]], 2
13-
; CHECK-NEXT: [[TMP5:%.*]] = add i32 1, [[TMP4]]
13+
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], 1
1414
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <20 x i32> [[TMP3]], i32 [[TMP5]]
1515
; CHECK-NEXT: store i32 [[TMP6]], ptr addrspace(1) [[OUTPUT]], align 4
1616
; CHECK-NEXT: ret void
@@ -31,12 +31,12 @@ define amdgpu_ps void @scalar_alloca_ptr_with_vector_gep_of_gep3(i32 %idx, ptr a
3131
; CHECK-NEXT: [[ENTRY:.*:]]
3232
; CHECK-NEXT: [[ALLOCA:%.*]] = freeze <16 x i32> poison
3333
; CHECK-NEXT: [[TMP0:%.*]] = mul i32 [[IDX]], 2
34-
; CHECK-NEXT: [[TMP1:%.*]] = add i32 8, [[TMP0]]
34+
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 8
3535
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x i32> [[ALLOCA]], i32 10, i32 [[TMP1]]
3636
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP1]], 1
3737
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <16 x i32> [[TMP2]], i32 20, i32 [[TMP3]]
3838
; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[IDX]], 2
39-
; CHECK-NEXT: [[TMP6:%.*]] = add i32 9, [[TMP5]]
39+
; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], 9
4040
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <16 x i32> [[TMP4]], i32 [[TMP6]]
4141
; CHECK-NEXT: store i32 [[TMP7]], ptr addrspace(1) [[OUTPUT]], align 4
4242
; CHECK-NEXT: ret void

0 commit comments

Comments
 (0)