Skip to content

Commit 5d99c2e

Browse files
committed
Support EVL
1 parent 2a46ad6 commit 5d99c2e

File tree

7 files changed

+32
-128
lines changed

7 files changed

+32
-128
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2558,6 +2558,12 @@ static VPRecipeBase *optimizeMaskToEVL(VPValue *HeaderMask,
25582558
VPValue *NewAddr = GetNewAddr(L->getAddr());
25592559
return new VPWidenLoadEVLRecipe(*L, NewAddr, EVL, NewMask);
25602560
})
2561+
.Case<VPWidenStridedLoadRecipe>([&](VPWidenStridedLoadRecipe *L) {
2562+
VPValue *NewMask = GetNewMask(L->getMask());
2563+
return new VPWidenStridedLoadRecipe(
2564+
*cast<LoadInst>(&L->getIngredient()), L->getAddr(), L->getStride(),
2565+
&EVL, NewMask, *L, L->getDebugLoc());
2566+
})
25612567
.Case<VPWidenStoreRecipe>([&](VPWidenStoreRecipe *S) {
25622568
VPValue *NewMask = GetNewMask(S->getMask());
25632569
VPValue *NewAddr = GetNewAddr(S->getAddr());

llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -315,8 +315,6 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_6(ptr %src, i1 %
315315
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
316316
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
317317
; CHECK: [[VECTOR_PH]]:
318-
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
319-
; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 8
320318
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i1> poison, i1 [[IC]], i64 0
321319
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i1> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
322320
; CHECK-NEXT: [[TMP8:%.*]] = xor <vscale x 8 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
@@ -329,19 +327,14 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_6(ptr %src, i1 %
329327
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
330328
; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP2]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
331329
; CHECK-NEXT: [[TMP27:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true)
332-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 8 x i32> poison, i32 [[TMP27]], i64 0
333-
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 8 x i32> [[BROADCAST_SPLATINSERT3]], <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
334330
; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[TMP27]] to i64
335331
; CHECK-NEXT: [[TMP16:%.*]] = mul i64 3, [[TMP12]]
336332
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP16]], i64 0
337333
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
338-
; CHECK-NEXT: [[TMP18:%.*]] = call <vscale x 8 x i32> @llvm.stepvector.nxv8i32()
339-
; CHECK-NEXT: [[TMP19:%.*]] = icmp ult <vscale x 8 x i32> [[TMP18]], [[BROADCAST_SPLAT4]]
340334
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[EVL_BASED_IV]], 3
341335
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i16, ptr [[SRC]], i64 [[OFFSET_IDX]]
342336
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i16, ptr [[SRC]], <vscale x 8 x i64> [[VEC_IND]]
343-
; CHECK-NEXT: [[TMP15:%.*]] = trunc i64 [[TMP4]] to i32
344-
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vp.strided.load.nxv8i16.p0.i64(ptr align 2 [[TMP21]], i64 6, <vscale x 8 x i1> [[TMP19]], i32 [[TMP15]])
337+
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vp.strided.load.nxv8i16.p0.i64(ptr align 2 [[TMP21]], i64 6, <vscale x 8 x i1> splat (i1 true), i32 [[TMP27]])
345338
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq <vscale x 8 x i16> [[WIDE_MASKED_GATHER]], zeroinitializer
346339
; CHECK-NEXT: [[TMP14:%.*]] = select <vscale x 8 x i1> [[TMP17]], <vscale x 8 x i1> [[TMP8]], <vscale x 8 x i1> zeroinitializer
347340
; CHECK-NEXT: [[TMP28:%.*]] = xor <vscale x 8 x i1> [[TMP17]], splat (i1 true)

llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll

Lines changed: 4 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,6 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea
3030
; RV32-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT5]]
3131
; RV32-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
3232
; RV32: vector.ph:
33-
; RV32-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
34-
; RV32-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2
3533
; RV32-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
3634
; RV32-NEXT: [[TMP9:%.*]] = mul <vscale x 2 x i64> [[TMP7]], splat (i64 16)
3735
; RV32-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP9]]
@@ -41,24 +39,17 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea
4139
; RV32-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
4240
; RV32-NEXT: [[AVL:%.*]] = phi i64 [ 625, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
4341
; RV32-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
44-
; RV32-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[TMP10]], i64 0
45-
; RV32-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT6]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
4642
; RV32-NEXT: [[TMP8:%.*]] = zext i32 [[TMP10]] to i64
4743
; RV32-NEXT: [[TMP11:%.*]] = mul i64 16, [[TMP8]]
4844
; RV32-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP11]], i64 0
4945
; RV32-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
50-
; RV32-NEXT: [[TMP16:%.*]] = call <vscale x 2 x i32> @llvm.stepvector.nxv2i32()
51-
; RV32-NEXT: [[TMP25:%.*]] = icmp ult <vscale x 2 x i32> [[TMP16]], [[BROADCAST_SPLAT7]]
5246
; RV32-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[EVL_BASED_IV]], 16
5347
; RV32-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[OFFSET_IDX]]
54-
; RV32-NEXT: [[TMP27:%.*]] = trunc i64 [[TMP1]] to i32
55-
; RV32-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x i32> @llvm.experimental.vp.strided.load.nxv2i32.p0.i64(ptr align 4 [[TMP26]], i64 64, <vscale x 2 x i1> [[TMP25]], i32 [[TMP27]]), !alias.scope [[META0:![0-9]+]]
48+
; RV32-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x i32> @llvm.experimental.vp.strided.load.nxv2i32.p0.i64(ptr align 4 [[TMP26]], i64 64, <vscale x 2 x i1> splat (i1 true), i32 [[TMP10]]), !alias.scope [[META0:![0-9]+]]
5649
; RV32-NEXT: [[TMP14:%.*]] = icmp slt <vscale x 2 x i32> [[WIDE_MASKED_GATHER]], splat (i32 100)
57-
; RV32-NEXT: [[TMP12:%.*]] = select <vscale x 2 x i1> [[TMP25]], <vscale x 2 x i1> [[TMP14]], <vscale x 2 x i1> zeroinitializer
5850
; RV32-NEXT: [[TMP13:%.*]] = shl nuw nsw i64 [[OFFSET_IDX]], 1
5951
; RV32-NEXT: [[TMP28:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[TMP13]]
60-
; RV32-NEXT: [[TMP15:%.*]] = trunc i64 [[TMP1]] to i32
61-
; RV32-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call <vscale x 2 x double> @llvm.experimental.vp.strided.load.nxv2f64.p0.i64(ptr align 8 [[TMP28]], i64 256, <vscale x 2 x i1> [[TMP12]], i32 [[TMP15]]), !alias.scope [[META3:![0-9]+]]
52+
; RV32-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call <vscale x 2 x double> @llvm.experimental.vp.strided.load.nxv2f64.p0.i64(ptr align 8 [[TMP28]], i64 256, <vscale x 2 x i1> [[TMP14]], i32 [[TMP10]]), !alias.scope [[META3:![0-9]+]]
6253
; RV32-NEXT: [[TMP17:%.*]] = sitofp <vscale x 2 x i32> [[WIDE_MASKED_GATHER]] to <vscale x 2 x double>
6354
; RV32-NEXT: [[TMP18:%.*]] = fadd <vscale x 2 x double> [[WIDE_MASKED_GATHER6]], [[TMP17]]
6455
; RV32-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, ptr [[A]], <vscale x 2 x i64> [[VEC_IND]]
@@ -110,8 +101,6 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea
110101
; RV64-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT5]]
111102
; RV64-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
112103
; RV64: vector.ph:
113-
; RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
114-
; RV64-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2
115104
; RV64-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
116105
; RV64-NEXT: [[TMP9:%.*]] = mul <vscale x 2 x i64> [[TMP7]], splat (i64 16)
117106
; RV64-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP9]]
@@ -121,24 +110,17 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea
121110
; RV64-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
122111
; RV64-NEXT: [[AVL:%.*]] = phi i64 [ 625, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
123112
; RV64-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
124-
; RV64-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[TMP10]], i64 0
125-
; RV64-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT6]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
126113
; RV64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP10]] to i64
127114
; RV64-NEXT: [[TMP11:%.*]] = mul i64 16, [[TMP8]]
128115
; RV64-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP11]], i64 0
129116
; RV64-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
130-
; RV64-NEXT: [[TMP16:%.*]] = call <vscale x 2 x i32> @llvm.stepvector.nxv2i32()
131-
; RV64-NEXT: [[TMP25:%.*]] = icmp ult <vscale x 2 x i32> [[TMP16]], [[BROADCAST_SPLAT7]]
132117
; RV64-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[EVL_BASED_IV]], 16
133118
; RV64-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[OFFSET_IDX]]
134-
; RV64-NEXT: [[TMP27:%.*]] = trunc i64 [[TMP1]] to i32
135-
; RV64-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x i32> @llvm.experimental.vp.strided.load.nxv2i32.p0.i64(ptr align 4 [[TMP26]], i64 64, <vscale x 2 x i1> [[TMP25]], i32 [[TMP27]]), !alias.scope [[META0:![0-9]+]]
119+
; RV64-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x i32> @llvm.experimental.vp.strided.load.nxv2i32.p0.i64(ptr align 4 [[TMP26]], i64 64, <vscale x 2 x i1> splat (i1 true), i32 [[TMP10]]), !alias.scope [[META0:![0-9]+]]
136120
; RV64-NEXT: [[TMP14:%.*]] = icmp slt <vscale x 2 x i32> [[WIDE_MASKED_GATHER]], splat (i32 100)
137-
; RV64-NEXT: [[TMP12:%.*]] = select <vscale x 2 x i1> [[TMP25]], <vscale x 2 x i1> [[TMP14]], <vscale x 2 x i1> zeroinitializer
138121
; RV64-NEXT: [[TMP13:%.*]] = shl nuw nsw i64 [[OFFSET_IDX]], 1
139122
; RV64-NEXT: [[TMP28:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[TMP13]]
140-
; RV64-NEXT: [[TMP15:%.*]] = trunc i64 [[TMP1]] to i32
141-
; RV64-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call <vscale x 2 x double> @llvm.experimental.vp.strided.load.nxv2f64.p0.i64(ptr align 8 [[TMP28]], i64 256, <vscale x 2 x i1> [[TMP12]], i32 [[TMP15]]), !alias.scope [[META3:![0-9]+]]
123+
; RV64-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call <vscale x 2 x double> @llvm.experimental.vp.strided.load.nxv2f64.p0.i64(ptr align 8 [[TMP28]], i64 256, <vscale x 2 x i1> [[TMP14]], i32 [[TMP10]]), !alias.scope [[META3:![0-9]+]]
142124
; RV64-NEXT: [[TMP17:%.*]] = sitofp <vscale x 2 x i32> [[WIDE_MASKED_GATHER]] to <vscale x 2 x double>
143125
; RV64-NEXT: [[TMP18:%.*]] = fadd <vscale x 2 x double> [[WIDE_MASKED_GATHER6]], [[TMP17]]
144126
; RV64-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, ptr [[A]], <vscale x 2 x i64> [[VEC_IND]]

llvm/test/Transforms/LoopVectorize/RISCV/pr154103.ll

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@ define void @pr154103(ptr noalias %a, ptr noalias %b, ptr noalias %c, ptr noalia
99
; CHECK-NEXT: [[ENTRY:.*:]]
1010
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
1111
; CHECK: [[VECTOR_PH]]:
12-
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
13-
; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4
1412
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[B]], i64 0
1513
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer
1614
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[C]], i64 0
@@ -20,15 +18,10 @@ define void @pr154103(ptr noalias %a, ptr noalias %b, ptr noalias %c, ptr noalia
2018
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
2119
; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ -7905747460161236406, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
2220
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
23-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP2]], i64 0
24-
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT3]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
25-
; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
26-
; CHECK-NEXT: [[TMP4:%.*]] = icmp ult <vscale x 4 x i32> [[TMP3]], [[BROADCAST_SPLAT4]]
2721
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[EVL_BASED_IV]], 7
2822
; CHECK-NEXT: [[IV:%.*]] = add i64 1, [[TMP5]]
2923
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[IV]]
30-
; CHECK-NEXT: [[TMP7:%.*]] = trunc i64 [[TMP1]] to i32
31-
; CHECK-NEXT: [[WIDE_STRIDED_LOAD:%.*]] = call <vscale x 4 x i8> @llvm.experimental.vp.strided.load.nxv4i8.p0.i64(ptr align 1 [[GEP]], i64 7, <vscale x 4 x i1> [[TMP4]], i32 [[TMP7]])
24+
; CHECK-NEXT: [[WIDE_STRIDED_LOAD:%.*]] = call <vscale x 4 x i8> @llvm.experimental.vp.strided.load.nxv4i8.p0.i64(ptr align 1 [[GEP]], i64 7, <vscale x 4 x i1> splat (i1 true), i32 [[TMP2]])
3225
; CHECK-NEXT: [[TMP8:%.*]] = zext <vscale x 4 x i8> [[WIDE_STRIDED_LOAD]] to <vscale x 4 x i64>
3326
; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 4 x i64> @llvm.vp.merge.nxv4i64(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i64> [[TMP8]], <vscale x 4 x i64> splat (i64 1), i32 [[TMP2]])
3427
; CHECK-NEXT: [[TMP10:%.*]] = sdiv <vscale x 4 x i64> zeroinitializer, [[TMP9]]

0 commit comments

Comments
 (0)