Skip to content

Commit 4d2e78d

Browse files
committed
[AArch64][CostModel] Alter sdiv/srem cost where the divisor is constant
This patch draws its inspiration from the udiv/urem patch #122236 For sdiv, typical sequence of instructions as per the type and divisor property is as follows: Scalar power-of-2: cmp + csel + asr Neon power-of-2: usra + sshr Scalar non-power-2: smulh/smull + asr/lsr + add/sub + asr + add Vector non-power-2: a) <2 x i64>: 2 * (smulh + asr + add) . This yeilds scalarized form. b) <4 x i32>: smull2 + smull + uzp2 + add + sshr + usra SVE versions should have more or less the same cost because sometimes they yeild native sdiv instructions, which should have less cost or the same sequence of neon instructions. For srem, typical sequence of instructions as per the type and divisor property is as follows: Scalar version: <set of sdiv instructions> + msub Vector version: <set of sdiv instructions> + 2-msub/1-mls
1 parent 556eb82 commit 4d2e78d

File tree

8 files changed

+440
-222
lines changed

8 files changed

+440
-222
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 46 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3526,23 +3526,53 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
35263526
default:
35273527
return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
35283528
Op2Info);
3529+
case ISD::SREM:
35293530
case ISD::SDIV:
3530-
if (Op2Info.isConstant() && Op2Info.isUniform() && Op2Info.isPowerOf2()) {
3531-
// On AArch64, scalar signed division by constants power-of-two are
3532-
// normally expanded to the sequence ADD + CMP + SELECT + SRA.
3533-
// The OperandValue properties many not be same as that of previous
3534-
// operation; conservatively assume OP_None.
3535-
InstructionCost Cost = getArithmeticInstrCost(
3536-
Instruction::Add, Ty, CostKind,
3537-
Op1Info.getNoProps(), Op2Info.getNoProps());
3538-
Cost += getArithmeticInstrCost(Instruction::Sub, Ty, CostKind,
3539-
Op1Info.getNoProps(), Op2Info.getNoProps());
3540-
Cost += getArithmeticInstrCost(
3541-
Instruction::Select, Ty, CostKind,
3542-
Op1Info.getNoProps(), Op2Info.getNoProps());
3543-
Cost += getArithmeticInstrCost(Instruction::AShr, Ty, CostKind,
3544-
Op1Info.getNoProps(), Op2Info.getNoProps());
3545-
return Cost;
3531+
/*
3532+
For sdiv, typical sequence of instructions as per the type and divisor
3533+
property is as follows:
3534+
Scalar power-of-2: cmp + csel + asr
3535+
Vector power-of-2: usra + sshr
3536+
3537+
Scalar non-power-2: smulh/smull + asr/lsr + add/sub + asr + add
3538+
Vector non-power-2:
3539+
a) <2 x i64>: 2 * (smulh + asr + add) --> This yeilds scalarized form.
3540+
b) <4 x i32>: smull2 + smull + uzp2 + add + sshr + usra
3541+
3542+
SVE versions should have more or less the same cost because sometimes they
3543+
yeild native sdiv instructions, which should have less cost or the same
3544+
sequence of neon instructions.
3545+
3546+
For srem, typical sequence of instructions as per the type and divisor
3547+
property is as follows:
3548+
Scalar version: <set of sdiv instructions> + msub
3549+
Vector version: <set of sdiv instructions> + 2-msub/mls
3550+
*/
3551+
if (Op2Info.isConstant()) {
3552+
InstructionCost AsrCost =
3553+
getArithmeticInstrCost(Instruction::AShr, Ty, CostKind,
3554+
Op1Info.getNoProps(), Op2Info.getNoProps());
3555+
InstructionCost AddCost =
3556+
getArithmeticInstrCost(Instruction::Add, Ty, CostKind,
3557+
Op1Info.getNoProps(), Op2Info.getNoProps());
3558+
InstructionCost MulCost =
3559+
getArithmeticInstrCost(Instruction::Mul, Ty, CostKind,
3560+
Op1Info.getNoProps(), Op2Info.getNoProps());
3561+
3562+
bool HasSMUL = !Op2Info.isPowerOf2();
3563+
unsigned NumOfSMUL = HasSMUL ? (LT.second.isVector() ? 2 : 1) : 0;
3564+
bool HasExtraAsr =
3565+
(LT.second.isVector() || LT.second == MVT::i32) && HasSMUL;
3566+
3567+
InstructionCost CommonCost = AsrCost + AddCost;
3568+
// We typicall get 1 msub for scalar and 2-msub/1-mls for the vector form.
3569+
// Typically, the cost of msub is same and mls is twice as costly as
3570+
// add/sub/mul.
3571+
InstructionCost MlsOrMSubCost = (LT.second.isVector() ? 2 : 1) * MulCost;
3572+
InstructionCost DivCost =
3573+
CommonCost + (MulCost * NumOfSMUL) /* SMULH/SMULH */ +
3574+
(AsrCost * HasExtraAsr); // Coming with second SMULH
3575+
return DivCost + (ISD == ISD::SREM ? MlsOrMSubCost : 0);
35463576
}
35473577
[[fallthrough]];
35483578
case ISD::UDIV:

llvm/test/Analysis/CostModel/AArch64/div_cte.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
77

88
define <16 x i8> @sdiv8xi16(<16 x i8> %x) {
99
; CHECK-LABEL: 'sdiv8xi16'
10-
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %div = sdiv <16 x i8> %x, splat (i8 9)
10+
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %div = sdiv <16 x i8> %x, splat (i8 9)
1111
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %div
1212
;
1313
%div = sdiv <16 x i8> %x, <i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9>
@@ -16,7 +16,7 @@ define <16 x i8> @sdiv8xi16(<16 x i8> %x) {
1616

1717
define <8 x i16> @sdiv16xi8(<8 x i16> %x) {
1818
; CHECK-LABEL: 'sdiv16xi8'
19-
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %div = sdiv <8 x i16> %x, splat (i16 9)
19+
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %div = sdiv <8 x i16> %x, splat (i16 9)
2020
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %div
2121
;
2222
%div = sdiv <8 x i16> %x, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
@@ -25,7 +25,7 @@ define <8 x i16> @sdiv16xi8(<8 x i16> %x) {
2525

2626
define <4 x i32> @sdiv32xi4(<4 x i32> %x) {
2727
; CHECK-LABEL: 'sdiv32xi4'
28-
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %div = sdiv <4 x i32> %x, splat (i32 9)
28+
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %div = sdiv <4 x i32> %x, splat (i32 9)
2929
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %div
3030
;
3131
%div = sdiv <4 x i32> %x, <i32 9, i32 9, i32 9, i32 9>

llvm/test/Analysis/CostModel/AArch64/rem.ll

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,22 +5,22 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
55

66
define void @srem() {
77
; CHECK-LABEL: 'srem'
8-
; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %I128 = srem i128 undef, undef
8+
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I128 = srem i128 undef, undef
99
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, undef
10-
; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2i64 = srem <2 x i64> undef, undef
11-
; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i64 = srem <4 x i64> undef, undef
12-
; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i64 = srem <8 x i64> undef, undef
10+
; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V2i64 = srem <2 x i64> undef, undef
11+
; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V4i64 = srem <4 x i64> undef, undef
12+
; CHECK-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V8i64 = srem <8 x i64> undef, undef
1313
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, undef
14-
; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2i32 = srem <2 x i32> undef, undef
15-
; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = srem <4 x i32> undef, undef
16-
; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = srem <8 x i32> undef, undef
17-
; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = srem <16 x i32> undef, undef
14+
; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V2i32 = srem <2 x i32> undef, undef
15+
; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V4i32 = srem <4 x i32> undef, undef
16+
; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V8i32 = srem <8 x i32> undef, undef
17+
; CHECK-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V16i32 = srem <16 x i32> undef, undef
1818
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = srem i16 undef, undef
19-
; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2i16 = srem <2 x i16> undef, undef
20-
; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i16 = srem <4 x i16> undef, undef
21-
; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = srem <8 x i16> undef, undef
22-
; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = srem <16 x i16> undef, undef
23-
; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = srem <32 x i16> undef, undef
19+
; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V2i16 = srem <2 x i16> undef, undef
20+
; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V4i16 = srem <4 x i16> undef, undef
21+
; CHECK-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V8i16 = srem <8 x i16> undef, undef
22+
; CHECK-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V16i16 = srem <16 x i16> undef, undef
23+
; CHECK-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V32i16 = srem <32 x i16> undef, undef
2424
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = srem i8 undef, undef
2525
; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2i8 = srem <2 x i8> undef, undef
2626
; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i8 = srem <4 x i8> undef, undef

llvm/test/Analysis/CostModel/AArch64/sve-div.ll

Lines changed: 30 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -163,24 +163,24 @@ define void @udiv() {
163163

164164
define void @sdiv_uniformconst() {
165165
; CHECK-LABEL: 'sdiv_uniformconst'
166-
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 7)
167-
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 7)
168-
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 7)
169-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = sdiv <2 x i32> undef, splat (i32 7)
170-
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 7)
171-
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 7)
172-
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 7)
166+
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 7)
167+
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 7)
168+
; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 7)
169+
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2i32 = sdiv <2 x i32> undef, splat (i32 7)
170+
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 7)
171+
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 7)
172+
; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 7)
173173
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2i16 = sdiv <2 x i16> undef, splat (i16 7)
174174
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i16 = sdiv <4 x i16> undef, splat (i16 7)
175-
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 7)
176-
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 7)
177-
; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 7)
175+
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 7)
176+
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 7)
177+
; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 7)
178178
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2i8 = sdiv <2 x i8> undef, splat (i8 7)
179-
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i8 = sdiv <4 x i8> undef, splat (i8 7)
180-
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i8 = sdiv <8 x i8> undef, splat (i8 7)
181-
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 7)
182-
; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 7)
183-
; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 7)
179+
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i8 = sdiv <4 x i8> undef, splat (i8 7)
180+
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i8 = sdiv <8 x i8> undef, splat (i8 7)
181+
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 7)
182+
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 7)
183+
; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 7)
184184
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NV2i64 = sdiv <vscale x 2 x i64> undef, splat (i64 7)
185185
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NV4i64 = sdiv <vscale x 4 x i64> undef, splat (i64 7)
186186
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NV8i64 = sdiv <vscale x 8 x i64> undef, splat (i64 7)
@@ -479,24 +479,24 @@ define void @udiv_uniformconstpow2() {
479479

480480
define void @sdiv_uniformconstnegpow2() {
481481
; CHECK-LABEL: 'sdiv_uniformconstnegpow2'
482-
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16)
483-
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16)
484-
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16)
485-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = sdiv <2 x i32> undef, splat (i32 -16)
486-
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16)
487-
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16)
488-
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16)
482+
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16)
483+
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16)
484+
; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16)
485+
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2i32 = sdiv <2 x i32> undef, splat (i32 -16)
486+
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16)
487+
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16)
488+
; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16)
489489
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2i16 = sdiv <2 x i16> undef, splat (i16 -16)
490490
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i16 = sdiv <4 x i16> undef, splat (i16 -16)
491-
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16)
492-
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16)
493-
; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16)
491+
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16)
492+
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16)
493+
; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16)
494494
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2i8 = sdiv <2 x i8> undef, splat (i8 -16)
495-
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i8 = sdiv <4 x i8> undef, splat (i8 -16)
496-
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i8 = sdiv <8 x i8> undef, splat (i8 -16)
497-
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16)
498-
; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16)
499-
; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16)
495+
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i8 = sdiv <4 x i8> undef, splat (i8 -16)
496+
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i8 = sdiv <8 x i8> undef, splat (i8 -16)
497+
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16)
498+
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16)
499+
; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16)
500500
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NV2i64 = sdiv <vscale x 2 x i64> undef, splat (i64 -16)
501501
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NV4i64 = sdiv <vscale x 4 x i64> undef, splat (i64 -16)
502502
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NV8i64 = sdiv <vscale x 8 x i64> undef, splat (i64 -16)

0 commit comments

Comments
 (0)