Skip to content

Commit 8e04ed6

Browse files
committed
!fixup, implement cost of VPInstruction::Not to fix regression.
This fixes regression of `llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll`.
1 parent b8f5de4 commit 8e04ed6

File tree

3 files changed

+130
-87
lines changed

3 files changed

+130
-87
lines changed

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1175,6 +1175,13 @@ InstructionCost VPInstruction::computeCost(ElementCount VF,
11751175
I32Ty, {Arg0Ty, I32Ty, I1Ty});
11761176
return Ctx.TTI.getIntrinsicInstrCost(Attrs, Ctx.CostKind);
11771177
}
1178+
case VPInstruction::Not: {
1179+
Type *RetTy = Ctx.Types.inferScalarType(getOperand(0));
1180+
if (!vputils::onlyFirstLaneUsed(this))
1181+
RetTy = toVectorTy(RetTy, VF);
1182+
return Ctx.TTI.getArithmeticInstrCost(Instruction::Xor, RetTy,
1183+
Ctx.CostKind);
1184+
}
11781185
case VPInstruction::ExtractLastElement: {
11791186
// Add on the cost of extracting the element.
11801187
auto *VecTy = toVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF);

llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll

Lines changed: 32 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -682,16 +682,16 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) {
682682
; COMMON-NEXT: store i8 6, ptr [[TMP6]], align 1
683683
; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE12]]
684684
; COMMON: [[PRED_STORE_CONTINUE12]]:
685-
; COMMON-NEXT: br i1 false, label %[[PRED_STORE_IF13:.*]], label %[[EXIT:.*]]
685+
; COMMON-NEXT: br i1 false, label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]]
686686
; COMMON: [[PRED_STORE_IF13]]:
687687
; COMMON-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[DST]], i64 7
688688
; COMMON-NEXT: store i8 7, ptr [[TMP7]], align 1
689-
; COMMON-NEXT: br label %[[EXIT]]
689+
; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE14]]
690+
; COMMON: [[PRED_STORE_CONTINUE14]]:
691+
; COMMON-NEXT: br label %[[MIDDLE_BLOCK:.*]]
692+
; COMMON: [[MIDDLE_BLOCK]]:
693+
; COMMON-NEXT: br label %[[EXIT:.*]]
690694
; COMMON: [[EXIT]]:
691-
; COMMON-NEXT: br label %[[SCALAR_PH:.*]]
692-
; COMMON: [[SCALAR_PH]]:
693-
; COMMON-NEXT: br label %[[EXIT1:.*]]
694-
; COMMON: [[EXIT1]]:
695695
; COMMON-NEXT: ret void
696696
;
697697
entry:
@@ -1325,7 +1325,7 @@ define void @pred_udiv_select_cost(ptr %A, ptr %B, ptr %C, i64 %n, i8 %y) #1 {
13251325
; PRED-NEXT: br label %[[VECTOR_MEMCHECK:.*]]
13261326
; PRED: [[VECTOR_MEMCHECK]]:
13271327
; PRED-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
1328-
; PRED-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 4
1328+
; PRED-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 16
13291329
; PRED-NEXT: [[TMP3:%.*]] = sub i64 [[C1]], [[A2]]
13301330
; PRED-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP2]]
13311331
; PRED-NEXT: [[TMP4:%.*]] = sub i64 [[C1]], [[B3]]
@@ -1334,42 +1334,42 @@ define void @pred_udiv_select_cost(ptr %A, ptr %B, ptr %C, i64 %n, i8 %y) #1 {
13341334
; PRED-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
13351335
; PRED: [[VECTOR_PH]]:
13361336
; PRED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
1337-
; PRED-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4
1337+
; PRED-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16
13381338
; PRED-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
1339-
; PRED-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2
1339+
; PRED-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 4
13401340
; PRED-NEXT: [[TMP9:%.*]] = sub i64 [[TMP0]], [[TMP8]]
13411341
; PRED-NEXT: [[TMP10:%.*]] = icmp ugt i64 [[TMP0]], [[TMP8]]
13421342
; PRED-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i64 [[TMP9]], i64 0
1343-
; PRED-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[TMP0]])
1344-
; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i8> poison, i8 [[Y]], i64 0
1345-
; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i8> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
1343+
; PRED-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 [[TMP0]])
1344+
; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[Y]], i64 0
1345+
; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
13461346
; PRED-NEXT: br label %[[VECTOR_BODY:.*]]
13471347
; PRED: [[VECTOR_BODY]]:
13481348
; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1349-
; PRED-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ]
1349+
; PRED-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 16 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ]
13501350
; PRED-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
1351-
; PRED-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP12]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i8> poison)
1352-
; PRED-NEXT: [[TMP13:%.*]] = uitofp <vscale x 4 x i8> [[WIDE_MASKED_LOAD]] to <vscale x 4 x float>
1351+
; PRED-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP12]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK]], <vscale x 16 x i8> poison)
1352+
; PRED-NEXT: [[TMP13:%.*]] = uitofp <vscale x 16 x i8> [[WIDE_MASKED_LOAD]] to <vscale x 16 x float>
13531353
; PRED-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
1354-
; PRED-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP14]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i8> poison)
1355-
; PRED-NEXT: [[TMP15:%.*]] = icmp ne <vscale x 4 x i8> [[WIDE_MASKED_LOAD5]], zeroinitializer
1356-
; PRED-NEXT: [[TMP16:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i1> zeroinitializer
1357-
; PRED-NEXT: [[TMP17:%.*]] = xor <vscale x 4 x i8> [[WIDE_MASKED_LOAD]], splat (i8 1)
1358-
; PRED-NEXT: [[TMP18:%.*]] = select <vscale x 4 x i1> [[TMP16]], <vscale x 4 x i8> [[BROADCAST_SPLAT]], <vscale x 4 x i8> splat (i8 1)
1359-
; PRED-NEXT: [[TMP19:%.*]] = udiv <vscale x 4 x i8> [[TMP17]], [[TMP18]]
1360-
; PRED-NEXT: [[TMP20:%.*]] = icmp ugt <vscale x 4 x i8> [[TMP19]], splat (i8 1)
1361-
; PRED-NEXT: [[TMP21:%.*]] = select <vscale x 4 x i1> [[TMP20]], <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> splat (i32 255)
1362-
; PRED-NEXT: [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i32> [[TMP21]], <vscale x 4 x i32> zeroinitializer
1363-
; PRED-NEXT: [[TMP22:%.*]] = zext <vscale x 4 x i8> [[WIDE_MASKED_LOAD]] to <vscale x 4 x i32>
1364-
; PRED-NEXT: [[TMP23:%.*]] = sub <vscale x 4 x i32> [[PREDPHI]], [[TMP22]]
1365-
; PRED-NEXT: [[TMP24:%.*]] = sitofp <vscale x 4 x i32> [[TMP23]] to <vscale x 4 x float>
1366-
; PRED-NEXT: [[TMP25:%.*]] = call <vscale x 4 x float> @llvm.fmuladd.nxv4f32(<vscale x 4 x float> [[TMP24]], <vscale x 4 x float> splat (float 3.000000e+00), <vscale x 4 x float> [[TMP13]])
1367-
; PRED-NEXT: [[TMP26:%.*]] = fptoui <vscale x 4 x float> [[TMP25]] to <vscale x 4 x i8>
1354+
; PRED-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP14]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK]], <vscale x 16 x i8> poison)
1355+
; PRED-NEXT: [[TMP15:%.*]] = icmp ne <vscale x 16 x i8> [[WIDE_MASKED_LOAD5]], zeroinitializer
1356+
; PRED-NEXT: [[TMP16:%.*]] = select <vscale x 16 x i1> [[ACTIVE_LANE_MASK]], <vscale x 16 x i1> [[TMP15]], <vscale x 16 x i1> zeroinitializer
1357+
; PRED-NEXT: [[TMP17:%.*]] = xor <vscale x 16 x i8> [[WIDE_MASKED_LOAD]], splat (i8 1)
1358+
; PRED-NEXT: [[TMP18:%.*]] = select <vscale x 16 x i1> [[TMP16]], <vscale x 16 x i8> [[BROADCAST_SPLAT]], <vscale x 16 x i8> splat (i8 1)
1359+
; PRED-NEXT: [[TMP19:%.*]] = udiv <vscale x 16 x i8> [[TMP17]], [[TMP18]]
1360+
; PRED-NEXT: [[TMP20:%.*]] = icmp ugt <vscale x 16 x i8> [[TMP19]], splat (i8 1)
1361+
; PRED-NEXT: [[TMP21:%.*]] = select <vscale x 16 x i1> [[TMP20]], <vscale x 16 x i32> zeroinitializer, <vscale x 16 x i32> splat (i32 255)
1362+
; PRED-NEXT: [[PREDPHI:%.*]] = select <vscale x 16 x i1> [[TMP15]], <vscale x 16 x i32> [[TMP21]], <vscale x 16 x i32> zeroinitializer
1363+
; PRED-NEXT: [[TMP22:%.*]] = zext <vscale x 16 x i8> [[WIDE_MASKED_LOAD]] to <vscale x 16 x i32>
1364+
; PRED-NEXT: [[TMP23:%.*]] = sub <vscale x 16 x i32> [[PREDPHI]], [[TMP22]]
1365+
; PRED-NEXT: [[TMP24:%.*]] = sitofp <vscale x 16 x i32> [[TMP23]] to <vscale x 16 x float>
1366+
; PRED-NEXT: [[TMP25:%.*]] = call <vscale x 16 x float> @llvm.fmuladd.nxv16f32(<vscale x 16 x float> [[TMP24]], <vscale x 16 x float> splat (float 3.000000e+00), <vscale x 16 x float> [[TMP13]])
1367+
; PRED-NEXT: [[TMP26:%.*]] = fptoui <vscale x 16 x float> [[TMP25]] to <vscale x 16 x i8>
13681368
; PRED-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[C]], i64 [[INDEX]]
1369-
; PRED-NEXT: call void @llvm.masked.store.nxv4i8.p0(<vscale x 4 x i8> [[TMP26]], ptr [[TMP27]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
1369+
; PRED-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP26]], ptr [[TMP27]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK]])
13701370
; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]]
1371-
; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP11]])
1372-
; PRED-NEXT: [[TMP28:%.*]] = extractelement <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0
1371+
; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP11]])
1372+
; PRED-NEXT: [[TMP28:%.*]] = extractelement <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0
13731373
; PRED-NEXT: [[TMP29:%.*]] = xor i1 [[TMP28]], true
13741374
; PRED-NEXT: br i1 [[TMP29]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
13751375
; PRED: [[MIDDLE_BLOCK]]:

0 commit comments

Comments
 (0)