Skip to content

Commit 1d64fd5

Browse files
authored
[ARM] Introduce intrinsics for MVE add/sub/mul under strict-fp. (#169156)
As far as I understand, the MVE fp vadd/vsub/vmul instructions will set exception flags in the same ways as scalar fadd/fsub/fmul, but will not honor flush-to-zero (for f32 they always flush, for f16 they follows the fpsrc flags) and will always use the default rounding mode. This means that we cannot convert the vadd_f23/vsub_f32/vmul_f32 intrinsics to llvm.constrained.fadd/fsub/fmul and then vadd/vsub/vmul without changing the expected behaviour under strict-fp. This patch introduces a set in intrinsics that we can use instead, going from vadd_f32 -> llvm.arm.mve.vadd -> MVE_VADD. The current implementations assumes that the standard variant of a strictfp alternative will be a IRBuilder, this can be changed to take a IRBuilder or IRInt.
1 parent 44a7d2f commit 1d64fd5

File tree

10 files changed

+795
-284
lines changed

10 files changed

+795
-284
lines changed

clang/include/clang/Basic/arm_mve_defs.td

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -74,9 +74,9 @@ def immshr: CGHelperFn<"MVEImmediateShr"> {
7474
let special_params = [IRBuilderIntParam<1, "unsigned">,
7575
IRBuilderIntParam<2, "bool">];
7676
}
77-
def fadd: IRBuilder<"CreateFAdd">;
78-
def fmul: IRBuilder<"CreateFMul">;
79-
def fsub: IRBuilder<"CreateFSub">;
77+
def fadd_node: IRBuilder<"CreateFAdd">;
78+
def fmul_node: IRBuilder<"CreateFMul">;
79+
def fsub_node: IRBuilder<"CreateFSub">;
8080
def load: IRBuilder<"CreateLoad"> {
8181
let special_params = [IRBuilderAddrParam<0>];
8282
}
@@ -212,6 +212,13 @@ def unsignedflag;
212212
// constant giving its size in bits.
213213
def bitsize;
214214

215+
// strictFPAlt allows a node to have different code generation under strict-fp.
216+
// TODO: The standard node can be IRBuilderBase or IRIntBase.
217+
class strictFPAlt<IRBuilderBase standard_, IRIntBase strictfp_> {
218+
IRBuilderBase standard = standard_;
219+
IRIntBase strictfp = strictfp_;
220+
}
221+
215222
// If you put CustomCodegen<"foo"> in an intrinsic's codegen field, it
216223
// indicates that the IR generation for that intrinsic is done by handwritten
217224
// C++ and not autogenerated at all. The effect in the MVE builtin codegen
@@ -573,6 +580,14 @@ multiclass IntrinsicMXNameOverride<Type rettype, dag arguments, dag cg,
573580
}
574581
}
575582

583+
// StrictFP nodes that choose between standard fadd and llvm.arm.mve.fadd nodes
584+
// depending on whether we are using strict-fp.
585+
def fadd: strictFPAlt<fadd_node,
586+
IRInt<"vadd", [Vector]>>;
587+
def fsub: strictFPAlt<fsub_node,
588+
IRInt<"vsub", [Vector]>>;
589+
def fmul: strictFPAlt<fmul_node,
590+
IRInt<"vmul", [Vector]>>;
576591

577592
// -----------------------------------------------------------------------------
578593
// Convenience lists of parameter types. 'T' is just a container record, so you

clang/test/CodeGen/arm-mve-intrinsics/vaddq.c

Lines changed: 146 additions & 68 deletions
Large diffs are not rendered by default.

clang/test/CodeGen/arm-mve-intrinsics/vmulq.c

Lines changed: 266 additions & 124 deletions
Large diffs are not rendered by default.

clang/test/CodeGen/arm-mve-intrinsics/vsubq.c

Lines changed: 146 additions & 68 deletions
Large diffs are not rendered by default.

clang/utils/TableGen/MveEmitter.cpp

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -757,6 +757,26 @@ class IRIntrinsicResult : public Result {
757757
}
758758
};
759759

760+
// Result subclass that generates
761+
// Builder.getIsFPConstrained() ? <Standard> : <StrictFp>
762+
class StrictFpAltResult : public Result {
763+
public:
764+
Ptr Standard;
765+
Ptr StrictFp;
766+
StrictFpAltResult(Ptr Standard, Ptr StrictFp)
767+
: Standard(Standard), StrictFp(StrictFp) {}
768+
void genCode(raw_ostream &OS,
769+
CodeGenParamAllocator &ParamAlloc) const override {
770+
OS << "!Builder.getIsFPConstrained() ? ";
771+
Standard->genCode(OS, ParamAlloc);
772+
OS << " : ";
773+
StrictFp->genCode(OS, ParamAlloc);
774+
}
775+
void morePrerequisites(std::vector<Ptr> &output) const override {
776+
Standard->morePrerequisites(output);
777+
}
778+
};
779+
760780
// Result subclass that specifies a type, for use in IRBuilder operations such
761781
// as CreateBitCast that take a type argument.
762782
class TypeResult : public Result {
@@ -1239,7 +1259,8 @@ Result::Ptr EmitterBase::getCodeForDag(const DagInit *D,
12391259
std::vector<Result::Ptr> Args;
12401260
for (unsigned i = 0, e = D->getNumArgs(); i < e; ++i)
12411261
Args.push_back(getCodeForDagArg(D, i, Scope, Param));
1242-
if (Op->isSubClassOf("IRBuilderBase")) {
1262+
1263+
auto GenIRBuilderBase = [&](const Record *Op) {
12431264
std::set<unsigned> AddressArgs;
12441265
std::map<unsigned, std::string> IntegerArgs;
12451266
for (const Record *sp : Op->getValueAsListOfDefs("special_params")) {
@@ -1252,14 +1273,25 @@ Result::Ptr EmitterBase::getCodeForDag(const DagInit *D,
12521273
}
12531274
return std::make_shared<IRBuilderResult>(Op->getValueAsString("prefix"),
12541275
Args, AddressArgs, IntegerArgs);
1255-
} else if (Op->isSubClassOf("IRIntBase")) {
1276+
};
1277+
auto GenIRIntBase = [&](const Record *Op) {
12561278
std::vector<const Type *> ParamTypes;
12571279
for (const Record *RParam : Op->getValueAsListOfDefs("params"))
12581280
ParamTypes.push_back(getType(RParam, Param));
12591281
std::string IntName = std::string(Op->getValueAsString("intname"));
12601282
if (Op->getValueAsBit("appendKind"))
12611283
IntName += "_" + toLetter(cast<ScalarType>(Param)->kind());
12621284
return std::make_shared<IRIntrinsicResult>(IntName, ParamTypes, Args);
1285+
};
1286+
1287+
if (Op->isSubClassOf("IRBuilderBase")) {
1288+
return GenIRBuilderBase(Op);
1289+
} else if (Op->isSubClassOf("IRIntBase")) {
1290+
return GenIRIntBase(Op);
1291+
} else if (Op->isSubClassOf("strictFPAlt")) {
1292+
auto Standard = GenIRBuilderBase(Op->getValueAsDef("standard"));
1293+
auto StrictFp = GenIRIntBase(Op->getValueAsDef("strictfp"));
1294+
return std::make_shared<StrictFpAltResult>(Standard, StrictFp);
12631295
} else {
12641296
PrintFatalError("Unsupported dag node " + Op->getName());
12651297
}

llvm/include/llvm/IR/IntrinsicsARM.td

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1138,6 +1138,18 @@ def int_arm_mve_vshlc_predicated: DefaultAttrsIntrinsic<
11381138
[llvm_i32_ty /* bits shifted out */, llvm_anyvector_ty],
11391139
[LLVMMatchType<0>, llvm_i32_ty /* bits shifted in */,
11401140
llvm_i32_ty /* shift count */, llvm_anyvector_ty], [IntrNoMem]>;
1141+
def int_arm_mve_vadd: DefaultAttrsIntrinsic<
1142+
[llvm_anyvector_ty],
1143+
[LLVMMatchType<0>, LLVMMatchType<0>],
1144+
[IntrNoMem]>;
1145+
def int_arm_mve_vsub: DefaultAttrsIntrinsic<
1146+
[llvm_anyvector_ty],
1147+
[LLVMMatchType<0>, LLVMMatchType<0>],
1148+
[IntrNoMem]>;
1149+
def int_arm_mve_vmul: DefaultAttrsIntrinsic<
1150+
[llvm_anyvector_ty],
1151+
[LLVMMatchType<0>, LLVMMatchType<0>],
1152+
[IntrNoMem]>;
11411153
def int_arm_mve_vmulh: DefaultAttrsIntrinsic<
11421154
[llvm_anyvector_ty],
11431155
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */],

llvm/lib/Target/ARM/ARMInstrMVE.td

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,16 @@ multiclass MVE_TwoOpPatternDup<MVEVectorVTInfo VTI, SDPatternOperator Op, Intrin
384384
(VTI.Vec MQPR:$inactive)))>;
385385
}
386386

387+
def vadd : PatFrags<(ops node:$lhs, node:$rhs),
388+
[(fadd node:$lhs, node:$rhs),
389+
(int_arm_mve_vadd node:$lhs, node:$rhs)]>;
390+
def vsub : PatFrags<(ops node:$lhs, node:$rhs),
391+
[(fsub node:$lhs, node:$rhs),
392+
(int_arm_mve_vsub node:$lhs, node:$rhs)]>;
393+
def vmul : PatFrags<(ops node:$lhs, node:$rhs),
394+
[(fmul node:$lhs, node:$rhs),
395+
(int_arm_mve_vmul node:$lhs, node:$rhs)]>;
396+
387397
// --------- Start of base classes for the instructions themselves
388398

389399
class MVE_MI<dag oops, dag iops, InstrItinClass itin, string asm,
@@ -3605,7 +3615,7 @@ class MVE_VMUL_fp<string iname, string suffix, bits<2> size, list<dag> pattern=[
36053615
let validForTailPredication = 1;
36063616
}
36073617

3608-
multiclass MVE_VMULT_fp_m<string iname, MVEVectorVTInfo VTI, SDNode Op,
3618+
multiclass MVE_VMULT_fp_m<string iname, MVEVectorVTInfo VTI, SDPatternOperator Op,
36093619
Intrinsic PredInt, SDPatternOperator IdentityVec> {
36103620
def "" : MVE_VMUL_fp<iname, VTI.Suffix, VTI.Size>;
36113621
defvar Inst = !cast<Instruction>(NAME);
@@ -3616,7 +3626,7 @@ multiclass MVE_VMULT_fp_m<string iname, MVEVectorVTInfo VTI, SDNode Op,
36163626
}
36173627

36183628
multiclass MVE_VMUL_fp_m<MVEVectorVTInfo VTI, SDPatternOperator IdentityVec>
3619-
: MVE_VMULT_fp_m<"vmul", VTI, fmul, int_arm_mve_mul_predicated, IdentityVec>;
3629+
: MVE_VMULT_fp_m<"vmul", VTI, vmul, int_arm_mve_mul_predicated, IdentityVec>;
36203630

36213631
def ARMimmOneF: PatLeaf<(bitconvert (v4f32 (ARMvmovFPImm (i32 112))))>; // 1.0 float
36223632
def ARMimmOneH: PatLeaf<(bitconvert (v8i16 (ARMvmovImm (i32 2620))))>; // 1.0 half
@@ -3740,7 +3750,7 @@ defm MVE_VFMSf32 : MVE_VFMA_fp_multi<"vfms", 1, MVE_v4f32>;
37403750
defm MVE_VFMSf16 : MVE_VFMA_fp_multi<"vfms", 1, MVE_v8f16>;
37413751

37423752
multiclass MVE_VADDSUB_fp_m<string iname, bit bit_21, MVEVectorVTInfo VTI,
3743-
SDNode Op, Intrinsic PredInt, SDPatternOperator IdentityVec> {
3753+
SDPatternOperator Op, Intrinsic PredInt, SDPatternOperator IdentityVec> {
37443754
def "" : MVE_VADDSUBFMA_fp<iname, VTI.Suffix, VTI.Size, 0, 1, bit_21> {
37453755
let validForTailPredication = 1;
37463756
}
@@ -3752,9 +3762,9 @@ multiclass MVE_VADDSUB_fp_m<string iname, bit bit_21, MVEVectorVTInfo VTI,
37523762
}
37533763

37543764
multiclass MVE_VADD_fp_m<MVEVectorVTInfo VTI, SDPatternOperator IdentityVec>
3755-
: MVE_VADDSUB_fp_m<"vadd", 0, VTI, fadd, int_arm_mve_add_predicated, IdentityVec>;
3765+
: MVE_VADDSUB_fp_m<"vadd", 0, VTI, vadd, int_arm_mve_add_predicated, IdentityVec>;
37563766
multiclass MVE_VSUB_fp_m<MVEVectorVTInfo VTI, SDPatternOperator IdentityVec>
3757-
: MVE_VADDSUB_fp_m<"vsub", 1, VTI, fsub, int_arm_mve_sub_predicated, IdentityVec>;
3767+
: MVE_VADDSUB_fp_m<"vsub", 1, VTI, vsub, int_arm_mve_sub_predicated, IdentityVec>;
37583768

37593769
def ARMimmMinusZeroF: PatLeaf<(bitconvert (v4i32 (ARMvmovImm (i32 1664))))>; // -0.0 float
37603770
def ARMimmMinusZeroH: PatLeaf<(bitconvert (v8i16 (ARMvmovImm (i32 2688))))>; // -0.0 half
@@ -5391,21 +5401,22 @@ defm MVE_VHSUB_qr_u16 : MVE_VHSUB_qr_m<MVE_v8u16, subnuw, ARMvshruImm>;
53915401
defm MVE_VHSUB_qr_u32 : MVE_VHSUB_qr_m<MVE_v4u32, subnuw, ARMvshruImm>;
53925402

53935403
multiclass MVE_VADDSUB_qr_f<string iname, MVEVectorVTInfo VTI, bit subtract,
5394-
SDNode Op, Intrinsic PredInt, SDPatternOperator IdentityVec> {
5404+
SDPatternOperator Op, Intrinsic PredInt,
5405+
SDPatternOperator IdentityVec> {
53955406
def "" : MVE_VxADDSUB_qr<iname, VTI.Suffix, VTI.Size{0}, 0b11, subtract, VTI.Size>;
53965407
defm : MVE_TwoOpPatternDup<VTI, Op, PredInt, (? ),
53975408
!cast<Instruction>(NAME), IdentityVec>;
53985409
}
53995410

54005411
let Predicates = [HasMVEFloat] in {
5401-
defm MVE_VADD_qr_f32 : MVE_VADDSUB_qr_f<"vadd", MVE_v4f32, 0b0, fadd,
5412+
defm MVE_VADD_qr_f32 : MVE_VADDSUB_qr_f<"vadd", MVE_v4f32, 0b0, vadd,
54025413
int_arm_mve_add_predicated, ARMimmMinusZeroF>;
5403-
defm MVE_VADD_qr_f16 : MVE_VADDSUB_qr_f<"vadd", MVE_v8f16, 0b0, fadd,
5414+
defm MVE_VADD_qr_f16 : MVE_VADDSUB_qr_f<"vadd", MVE_v8f16, 0b0, vadd,
54045415
int_arm_mve_add_predicated, ARMimmMinusZeroH>;
54055416

5406-
defm MVE_VSUB_qr_f32 : MVE_VADDSUB_qr_f<"vsub", MVE_v4f32, 0b1, fsub,
5417+
defm MVE_VSUB_qr_f32 : MVE_VADDSUB_qr_f<"vsub", MVE_v4f32, 0b1, vsub,
54075418
int_arm_mve_sub_predicated, ARMimmAllZerosV>;
5408-
defm MVE_VSUB_qr_f16 : MVE_VADDSUB_qr_f<"vsub", MVE_v8f16, 0b1, fsub,
5419+
defm MVE_VSUB_qr_f16 : MVE_VADDSUB_qr_f<"vsub", MVE_v8f16, 0b1, vsub,
54095420
int_arm_mve_sub_predicated, ARMimmAllZerosV>;
54105421
}
54115422

@@ -5588,7 +5599,7 @@ defm MVE_VQRDMULH_qr_s32 : MVE_VQRDMULH_qr_m<MVE_v4s32>;
55885599
multiclass MVE_VxxMUL_qr_f_m<MVEVectorVTInfo VTI, SDPatternOperator IdentityVec> {
55895600
let validForTailPredication = 1 in
55905601
def "" : MVE_VxxMUL_qr<"vmul", VTI.Suffix, VTI.Size{0}, 0b11, VTI.Size>;
5591-
defm : MVE_TwoOpPatternDup<VTI, fmul, int_arm_mve_mul_predicated, (? ),
5602+
defm : MVE_TwoOpPatternDup<VTI, vmul, int_arm_mve_mul_predicated, (? ),
55925603
!cast<Instruction>(NAME), IdentityVec>;
55935604
}
55945605

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -o - %s | FileCheck %s
3+
4+
define arm_aapcs_vfpcc <8 x half> @test_vaddq_f16(<8 x half> %a, <8 x half> %b) {
5+
; CHECK-LABEL: test_vaddq_f16:
6+
; CHECK: @ %bb.0: @ %entry
7+
; CHECK-NEXT: vadd.f16 q0, q0, q1
8+
; CHECK-NEXT: bx lr
9+
entry:
10+
%0 = tail call <8 x half> @llvm.arm.mve.vadd.v8f16(<8 x half> %a, <8 x half> %b)
11+
ret <8 x half> %0
12+
}
13+
14+
define arm_aapcs_vfpcc <4 x float> @test_vaddq_f32(<4 x float> %a, <4 x float> %b) {
15+
; CHECK-LABEL: test_vaddq_f32:
16+
; CHECK: @ %bb.0: @ %entry
17+
; CHECK-NEXT: vadd.f32 q0, q0, q1
18+
; CHECK-NEXT: bx lr
19+
entry:
20+
%0 = tail call <4 x float> @llvm.arm.mve.vadd.v4f32(<4 x float> %a, <4 x float> %b)
21+
ret <4 x float> %0
22+
}
23+
24+
define arm_aapcs_vfpcc <8 x half> @test_vsubq_f16(<8 x half> %a, <8 x half> %b) {
25+
; CHECK-LABEL: test_vsubq_f16:
26+
; CHECK: @ %bb.0: @ %entry
27+
; CHECK-NEXT: vsub.f16 q0, q0, q1
28+
; CHECK-NEXT: bx lr
29+
entry:
30+
%0 = tail call <8 x half> @llvm.arm.mve.vsub.v8f16(<8 x half> %a, <8 x half> %b)
31+
ret <8 x half> %0
32+
}
33+
34+
define arm_aapcs_vfpcc <4 x float> @test_vsubq_f32(<4 x float> %a, <4 x float> %b) {
35+
; CHECK-LABEL: test_vsubq_f32:
36+
; CHECK: @ %bb.0: @ %entry
37+
; CHECK-NEXT: vsub.f32 q0, q0, q1
38+
; CHECK-NEXT: bx lr
39+
entry:
40+
%0 = tail call <4 x float> @llvm.arm.mve.vsub.v4f32(<4 x float> %a, <4 x float> %b)
41+
ret <4 x float> %0
42+
}
43+
44+
define arm_aapcs_vfpcc <8 x half> @test_vmulq_f16(<8 x half> %a, <8 x half> %b) {
45+
; CHECK-LABEL: test_vmulq_f16:
46+
; CHECK: @ %bb.0: @ %entry
47+
; CHECK-NEXT: vmul.f16 q0, q0, q1
48+
; CHECK-NEXT: bx lr
49+
entry:
50+
%0 = tail call <8 x half> @llvm.arm.mve.vmul.v8f16(<8 x half> %a, <8 x half> %b)
51+
ret <8 x half> %0
52+
}
53+
54+
define arm_aapcs_vfpcc <4 x float> @test_vmulq_f32(<4 x float> %a, <4 x float> %b) {
55+
; CHECK-LABEL: test_vmulq_f32:
56+
; CHECK: @ %bb.0: @ %entry
57+
; CHECK-NEXT: vmul.f32 q0, q0, q1
58+
; CHECK-NEXT: bx lr
59+
entry:
60+
%0 = tail call <4 x float> @llvm.arm.mve.vmul.v4f32(<4 x float> %a, <4 x float> %b)
61+
ret <4 x float> %0
62+
}
63+
64+
65+
66+
67+
define arm_aapcs_vfpcc <8 x half> @test_vaddq_f16_splat(<8 x half> %a, half %b) {
68+
; CHECK-LABEL: test_vaddq_f16_splat:
69+
; CHECK: @ %bb.0: @ %entry
70+
; CHECK-NEXT: vmov.f16 r0, s4
71+
; CHECK-NEXT: vadd.f16 q0, q0, r0
72+
; CHECK-NEXT: bx lr
73+
entry:
74+
%i = insertelement <8 x half> poison, half %b, i32 0
75+
%s = shufflevector <8 x half> %i, <8 x half> poison, <8 x i32> zeroinitializer
76+
%0 = tail call <8 x half> @llvm.arm.mve.vadd.v8f16(<8 x half> %a, <8 x half> %s)
77+
ret <8 x half> %0
78+
}
79+
80+
define arm_aapcs_vfpcc <4 x float> @test_vaddq_f32_splat(<4 x float> %a, float %b) {
81+
; CHECK-LABEL: test_vaddq_f32_splat:
82+
; CHECK: @ %bb.0: @ %entry
83+
; CHECK-NEXT: vmov r0, s4
84+
; CHECK-NEXT: vadd.f32 q0, q0, r0
85+
; CHECK-NEXT: bx lr
86+
entry:
87+
%i = insertelement <4 x float> poison, float %b, i32 0
88+
%s = shufflevector <4 x float> %i, <4 x float> poison, <4 x i32> zeroinitializer
89+
%0 = tail call <4 x float> @llvm.arm.mve.vadd.v4f32(<4 x float> %a, <4 x float> %s)
90+
ret <4 x float> %0
91+
}
92+
93+
define arm_aapcs_vfpcc <8 x half> @test_vsubq_f16_splat(<8 x half> %a, half %b) {
94+
; CHECK-LABEL: test_vsubq_f16_splat:
95+
; CHECK: @ %bb.0: @ %entry
96+
; CHECK-NEXT: vmov.f16 r0, s4
97+
; CHECK-NEXT: vsub.f16 q0, q0, r0
98+
; CHECK-NEXT: bx lr
99+
entry:
100+
%i = insertelement <8 x half> poison, half %b, i32 0
101+
%s = shufflevector <8 x half> %i, <8 x half> poison, <8 x i32> zeroinitializer
102+
%0 = tail call <8 x half> @llvm.arm.mve.vsub.v8f16(<8 x half> %a, <8 x half> %s)
103+
ret <8 x half> %0
104+
}
105+
106+
define arm_aapcs_vfpcc <4 x float> @test_vsubq_f32_splat(<4 x float> %a, float %b) {
107+
; CHECK-LABEL: test_vsubq_f32_splat:
108+
; CHECK: @ %bb.0: @ %entry
109+
; CHECK-NEXT: vmov r0, s4
110+
; CHECK-NEXT: vsub.f32 q0, q0, r0
111+
; CHECK-NEXT: bx lr
112+
entry:
113+
%i = insertelement <4 x float> poison, float %b, i32 0
114+
%s = shufflevector <4 x float> %i, <4 x float> poison, <4 x i32> zeroinitializer
115+
%0 = tail call <4 x float> @llvm.arm.mve.vsub.v4f32(<4 x float> %a, <4 x float> %s)
116+
ret <4 x float> %0
117+
}
118+
119+
define arm_aapcs_vfpcc <8 x half> @test_vmulq_f16_splat(<8 x half> %a, half %b) {
120+
; CHECK-LABEL: test_vmulq_f16_splat:
121+
; CHECK: @ %bb.0: @ %entry
122+
; CHECK-NEXT: vmov.f16 r0, s4
123+
; CHECK-NEXT: vmul.f16 q0, q0, r0
124+
; CHECK-NEXT: bx lr
125+
entry:
126+
%i = insertelement <8 x half> poison, half %b, i32 0
127+
%s = shufflevector <8 x half> %i, <8 x half> poison, <8 x i32> zeroinitializer
128+
%0 = tail call <8 x half> @llvm.arm.mve.vmul.v8f16(<8 x half> %a, <8 x half> %s)
129+
ret <8 x half> %0
130+
}
131+
132+
define arm_aapcs_vfpcc <4 x float> @test_vmulq_f32_splat(<4 x float> %a, float %b) {
133+
; CHECK-LABEL: test_vmulq_f32_splat:
134+
; CHECK: @ %bb.0: @ %entry
135+
; CHECK-NEXT: vmov r0, s4
136+
; CHECK-NEXT: vmul.f32 q0, q0, r0
137+
; CHECK-NEXT: bx lr
138+
entry:
139+
%i = insertelement <4 x float> poison, float %b, i32 0
140+
%s = shufflevector <4 x float> %i, <4 x float> poison, <4 x i32> zeroinitializer
141+
%0 = tail call <4 x float> @llvm.arm.mve.vmul.v4f32(<4 x float> %a, <4 x float> %s)
142+
ret <4 x float> %0
143+
}

llvm/test/CodeGen/Thumb2/mve-intrinsics/vabdq.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ entry:
2525

2626
declare <4 x i32> @llvm.arm.mve.vabd.v4i32(<4 x i32>, <4 x i32>, i32) #1
2727

28-
define arm_aapcs_vfpcc <8 x half> @test_vabdq_f32(<8 x half> %a, <8 x half> %b) local_unnamed_addr #0 {
29-
; CHECK-LABEL: test_vabdq_f32:
28+
define arm_aapcs_vfpcc <8 x half> @test_vabdq_f16(<8 x half> %a, <8 x half> %b) local_unnamed_addr #0 {
29+
; CHECK-LABEL: test_vabdq_f16:
3030
; CHECK: @ %bb.0: @ %entry
3131
; CHECK-NEXT: vabd.f16 q0, q0, q1
3232
; CHECK-NEXT: bx lr

0 commit comments

Comments
 (0)