Skip to content

Commit 5b97a27

Browse files
committed
[AArch64][CostModel] Alter sdiv/srem cost where the divisor is constant
This patch draws its inspiration from the udiv/urem patch llvm#122236 For sdiv, typical sequence of instructions as per the type and divisor property is as follows: Scalar power-of-2: cmp + csel + asr Neon power-of-2: usra + sshr Scalar non-power-2: smulh/smull + asr/lsr + add/sub + asr + add Vector non-power-2: a) <2 x i64>: 2 * (smulh + asr + add) . This yeilds scalarized form. b) <4 x i32>: smull2 + smull + uzp2 + add + sshr + usra SVE versions should have more or less the same cost because sometimes they yeild native sdiv instructions, which should have less cost or the same sequence of neon instructions. For srem, typical sequence of instructions as per the type and divisor property is as follows: Scalar version: <set of sdiv instructions> + msub Vector version: <set of sdiv instructions> + 2-msub/1-mls
1 parent 58fc4b1 commit 5b97a27

File tree

8 files changed

+440
-222
lines changed

8 files changed

+440
-222
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 46 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3531,23 +3531,53 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
35313531
default:
35323532
return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
35333533
Op2Info);
3534+
case ISD::SREM:
35343535
case ISD::SDIV:
3535-
if (Op2Info.isConstant() && Op2Info.isUniform() && Op2Info.isPowerOf2()) {
3536-
// On AArch64, scalar signed division by constants power-of-two are
3537-
// normally expanded to the sequence ADD + CMP + SELECT + SRA.
3538-
// The OperandValue properties many not be same as that of previous
3539-
// operation; conservatively assume OP_None.
3540-
InstructionCost Cost = getArithmeticInstrCost(
3541-
Instruction::Add, Ty, CostKind,
3542-
Op1Info.getNoProps(), Op2Info.getNoProps());
3543-
Cost += getArithmeticInstrCost(Instruction::Sub, Ty, CostKind,
3544-
Op1Info.getNoProps(), Op2Info.getNoProps());
3545-
Cost += getArithmeticInstrCost(
3546-
Instruction::Select, Ty, CostKind,
3547-
Op1Info.getNoProps(), Op2Info.getNoProps());
3548-
Cost += getArithmeticInstrCost(Instruction::AShr, Ty, CostKind,
3549-
Op1Info.getNoProps(), Op2Info.getNoProps());
3550-
return Cost;
3536+
/*
3537+
For sdiv, typical sequence of instructions as per the type and divisor
3538+
property is as follows:
3539+
Scalar power-of-2: cmp + csel + asr
3540+
Vector power-of-2: usra + sshr
3541+
3542+
Scalar non-power-2: smulh/smull + asr/lsr + add/sub + asr + add
3543+
Vector non-power-2:
3544+
a) <2 x i64>: 2 * (smulh + asr + add) --> This yeilds scalarized form.
3545+
b) <4 x i32>: smull2 + smull + uzp2 + add + sshr + usra
3546+
3547+
SVE versions should have more or less the same cost because sometimes they
3548+
yeild native sdiv instructions, which should have less cost or the same
3549+
sequence of neon instructions.
3550+
3551+
For srem, typical sequence of instructions as per the type and divisor
3552+
property is as follows:
3553+
Scalar version: <set of sdiv instructions> + msub
3554+
Vector version: <set of sdiv instructions> + 2-msub/mls
3555+
*/
3556+
if (Op2Info.isConstant()) {
3557+
InstructionCost AsrCost =
3558+
getArithmeticInstrCost(Instruction::AShr, Ty, CostKind,
3559+
Op1Info.getNoProps(), Op2Info.getNoProps());
3560+
InstructionCost AddCost =
3561+
getArithmeticInstrCost(Instruction::Add, Ty, CostKind,
3562+
Op1Info.getNoProps(), Op2Info.getNoProps());
3563+
InstructionCost MulCost =
3564+
getArithmeticInstrCost(Instruction::Mul, Ty, CostKind,
3565+
Op1Info.getNoProps(), Op2Info.getNoProps());
3566+
3567+
bool HasSMUL = !Op2Info.isPowerOf2();
3568+
unsigned NumOfSMUL = HasSMUL ? (LT.second.isVector() ? 2 : 1) : 0;
3569+
bool HasExtraAsr =
3570+
(LT.second.isVector() || LT.second == MVT::i32) && HasSMUL;
3571+
3572+
InstructionCost CommonCost = AsrCost + AddCost;
3573+
// We typicall get 1 msub for scalar and 2-msub/1-mls for the vector form.
3574+
// Typically, the cost of msub is same and mls is twice as costly as
3575+
// add/sub/mul.
3576+
InstructionCost MlsOrMSubCost = (LT.second.isVector() ? 2 : 1) * MulCost;
3577+
InstructionCost DivCost =
3578+
CommonCost + (MulCost * NumOfSMUL) /* SMULH/SMULH */ +
3579+
(AsrCost * HasExtraAsr); // Coming with second SMULH
3580+
return DivCost + (ISD == ISD::SREM ? MlsOrMSubCost : 0);
35513581
}
35523582
[[fallthrough]];
35533583
case ISD::UDIV:

llvm/test/Analysis/CostModel/AArch64/div_cte.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
77

88
define <16 x i8> @sdiv8xi16(<16 x i8> %x) {
99
; CHECK-LABEL: 'sdiv8xi16'
10-
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %div = sdiv <16 x i8> %x, splat (i8 9)
10+
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %div = sdiv <16 x i8> %x, splat (i8 9)
1111
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %div
1212
;
1313
%div = sdiv <16 x i8> %x, <i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9>
@@ -16,7 +16,7 @@ define <16 x i8> @sdiv8xi16(<16 x i8> %x) {
1616

1717
define <8 x i16> @sdiv16xi8(<8 x i16> %x) {
1818
; CHECK-LABEL: 'sdiv16xi8'
19-
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %div = sdiv <8 x i16> %x, splat (i16 9)
19+
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %div = sdiv <8 x i16> %x, splat (i16 9)
2020
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %div
2121
;
2222
%div = sdiv <8 x i16> %x, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
@@ -25,7 +25,7 @@ define <8 x i16> @sdiv16xi8(<8 x i16> %x) {
2525

2626
define <4 x i32> @sdiv32xi4(<4 x i32> %x) {
2727
; CHECK-LABEL: 'sdiv32xi4'
28-
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %div = sdiv <4 x i32> %x, splat (i32 9)
28+
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %div = sdiv <4 x i32> %x, splat (i32 9)
2929
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %div
3030
;
3131
%div = sdiv <4 x i32> %x, <i32 9, i32 9, i32 9, i32 9>

llvm/test/Analysis/CostModel/AArch64/rem.ll

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,22 +5,22 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
55

66
define void @srem() {
77
; CHECK-LABEL: 'srem'
8-
; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %I128 = srem i128 undef, undef
8+
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I128 = srem i128 undef, undef
99
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, undef
10-
; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2i64 = srem <2 x i64> undef, undef
11-
; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i64 = srem <4 x i64> undef, undef
12-
; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i64 = srem <8 x i64> undef, undef
10+
; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V2i64 = srem <2 x i64> undef, undef
11+
; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V4i64 = srem <4 x i64> undef, undef
12+
; CHECK-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V8i64 = srem <8 x i64> undef, undef
1313
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, undef
14-
; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2i32 = srem <2 x i32> undef, undef
15-
; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = srem <4 x i32> undef, undef
16-
; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = srem <8 x i32> undef, undef
17-
; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = srem <16 x i32> undef, undef
14+
; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V2i32 = srem <2 x i32> undef, undef
15+
; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V4i32 = srem <4 x i32> undef, undef
16+
; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V8i32 = srem <8 x i32> undef, undef
17+
; CHECK-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V16i32 = srem <16 x i32> undef, undef
1818
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = srem i16 undef, undef
19-
; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2i16 = srem <2 x i16> undef, undef
20-
; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i16 = srem <4 x i16> undef, undef
21-
; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = srem <8 x i16> undef, undef
22-
; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = srem <16 x i16> undef, undef
23-
; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = srem <32 x i16> undef, undef
19+
; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V2i16 = srem <2 x i16> undef, undef
20+
; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V4i16 = srem <4 x i16> undef, undef
21+
; CHECK-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V8i16 = srem <8 x i16> undef, undef
22+
; CHECK-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V16i16 = srem <16 x i16> undef, undef
23+
; CHECK-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V32i16 = srem <32 x i16> undef, undef
2424
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = srem i8 undef, undef
2525
; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2i8 = srem <2 x i8> undef, undef
2626
; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i8 = srem <4 x i8> undef, undef

llvm/test/Analysis/CostModel/AArch64/sve-div.ll

Lines changed: 30 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -163,24 +163,24 @@ define void @udiv() {
163163

164164
define void @sdiv_uniformconst() {
165165
; CHECK-LABEL: 'sdiv_uniformconst'
166-
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 7)
167-
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 7)
168-
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 7)
169-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = sdiv <2 x i32> undef, splat (i32 7)
170-
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 7)
171-
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 7)
172-
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 7)
166+
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 7)
167+
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 7)
168+
; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 7)
169+
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2i32 = sdiv <2 x i32> undef, splat (i32 7)
170+
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 7)
171+
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 7)
172+
; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 7)
173173
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2i16 = sdiv <2 x i16> undef, splat (i16 7)
174174
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i16 = sdiv <4 x i16> undef, splat (i16 7)
175-
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 7)
176-
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 7)
177-
; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 7)
175+
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 7)
176+
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 7)
177+
; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 7)
178178
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2i8 = sdiv <2 x i8> undef, splat (i8 7)
179-
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i8 = sdiv <4 x i8> undef, splat (i8 7)
180-
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i8 = sdiv <8 x i8> undef, splat (i8 7)
181-
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 7)
182-
; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 7)
183-
; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 7)
179+
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i8 = sdiv <4 x i8> undef, splat (i8 7)
180+
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i8 = sdiv <8 x i8> undef, splat (i8 7)
181+
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 7)
182+
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 7)
183+
; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 7)
184184
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NV2i64 = sdiv <vscale x 2 x i64> undef, splat (i64 7)
185185
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NV4i64 = sdiv <vscale x 4 x i64> undef, splat (i64 7)
186186
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NV8i64 = sdiv <vscale x 8 x i64> undef, splat (i64 7)
@@ -479,24 +479,24 @@ define void @udiv_uniformconstpow2() {
479479

480480
define void @sdiv_uniformconstnegpow2() {
481481
; CHECK-LABEL: 'sdiv_uniformconstnegpow2'
482-
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16)
483-
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16)
484-
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16)
485-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = sdiv <2 x i32> undef, splat (i32 -16)
486-
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16)
487-
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16)
488-
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16)
482+
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16)
483+
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16)
484+
; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16)
485+
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2i32 = sdiv <2 x i32> undef, splat (i32 -16)
486+
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16)
487+
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16)
488+
; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16)
489489
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2i16 = sdiv <2 x i16> undef, splat (i16 -16)
490490
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i16 = sdiv <4 x i16> undef, splat (i16 -16)
491-
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16)
492-
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16)
493-
; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16)
491+
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16)
492+
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16)
493+
; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16)
494494
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2i8 = sdiv <2 x i8> undef, splat (i8 -16)
495-
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i8 = sdiv <4 x i8> undef, splat (i8 -16)
496-
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i8 = sdiv <8 x i8> undef, splat (i8 -16)
497-
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16)
498-
; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16)
499-
; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16)
495+
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i8 = sdiv <4 x i8> undef, splat (i8 -16)
496+
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i8 = sdiv <8 x i8> undef, splat (i8 -16)
497+
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16)
498+
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16)
499+
; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16)
500500
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NV2i64 = sdiv <vscale x 2 x i64> undef, splat (i64 -16)
501501
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NV4i64 = sdiv <vscale x 4 x i64> undef, splat (i64 -16)
502502
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NV8i64 = sdiv <vscale x 8 x i64> undef, splat (i64 -16)

0 commit comments

Comments
 (0)