Skip to content

Commit 6c3adaa

Browse files
[AARCH64][Neon] switch to using bitcasts in arm_neon.h where appropriate (llvm#127043)
Currently arm_neon.h emits C-style casts to do vector type casts. This relies on implicit conversion between vector types to be enabled, which is currently deprecated behaviour and soon will disappear. To ensure NEON code will keep working afterwards, this patch changes all this vector type casts into bitcasts. Co-authored-by: Momchil Velikov <[email protected]>
1 parent 9e5bfbf commit 6c3adaa

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+56329
-36394
lines changed

clang/include/clang/Basic/TargetBuiltins.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,10 @@ namespace clang {
263263
EltType ET = getEltType();
264264
return ET == Poly8 || ET == Poly16 || ET == Poly64;
265265
}
266+
bool isFloatingPoint() const {
267+
EltType ET = getEltType();
268+
return ET == Float16 || ET == Float32 || ET == Float64 || ET == BFloat16;
269+
}
266270
bool isUnsigned() const { return (Flags & UnsignedFlag) != 0; }
267271
bool isQuad() const { return (Flags & QuadFlag) != 0; }
268272
unsigned getEltSizeInBits() const {

clang/include/clang/Basic/arm_neon.td

Lines changed: 34 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@ def OP_MLAL : Op<(op "+", $p0, (call "vmull", $p1, $p2))>;
3131
def OP_MULLHi : Op<(call "vmull", (call "vget_high", $p0),
3232
(call "vget_high", $p1))>;
3333
def OP_MULLHi_P64 : Op<(call "vmull",
34-
(cast "poly64_t", (call "vget_high", $p0)),
35-
(cast "poly64_t", (call "vget_high", $p1)))>;
34+
(bitcast "poly64_t", (call "vget_high", $p0)),
35+
(bitcast "poly64_t", (call "vget_high", $p1)))>;
3636
def OP_MULLHi_N : Op<(call "vmull_n", (call "vget_high", $p0), $p1)>;
3737
def OP_MLALHi : Op<(call "vmlal", $p0, (call "vget_high", $p1),
3838
(call "vget_high", $p2))>;
@@ -95,11 +95,11 @@ def OP_TRN2 : Op<(shuffle $p0, $p1, (interleave
9595
def OP_ZIP2 : Op<(shuffle $p0, $p1, (highhalf (interleave mask0, mask1)))>;
9696
def OP_UZP2 : Op<(shuffle $p0, $p1, (add (decimate (rotl mask0, 1), 2),
9797
(decimate (rotl mask1, 1), 2)))>;
98-
def OP_EQ : Op<(cast "R", (op "==", $p0, $p1))>;
99-
def OP_GE : Op<(cast "R", (op ">=", $p0, $p1))>;
100-
def OP_LE : Op<(cast "R", (op "<=", $p0, $p1))>;
101-
def OP_GT : Op<(cast "R", (op ">", $p0, $p1))>;
102-
def OP_LT : Op<(cast "R", (op "<", $p0, $p1))>;
98+
def OP_EQ : Op<(bitcast "R", (op "==", $p0, $p1))>;
99+
def OP_GE : Op<(bitcast "R", (op ">=", $p0, $p1))>;
100+
def OP_LE : Op<(bitcast "R", (op "<=", $p0, $p1))>;
101+
def OP_GT : Op<(bitcast "R", (op ">", $p0, $p1))>;
102+
def OP_LT : Op<(bitcast "R", (op "<", $p0, $p1))>;
103103
def OP_NEG : Op<(op "-", $p0)>;
104104
def OP_NOT : Op<(op "~", $p0)>;
105105
def OP_AND : Op<(op "&", $p0, $p1)>;
@@ -108,33 +108,33 @@ def OP_XOR : Op<(op "^", $p0, $p1)>;
108108
def OP_ANDN : Op<(op "&", $p0, (op "~", $p1))>;
109109
def OP_ORN : Op<(op "|", $p0, (op "~", $p1))>;
110110
def OP_CAST : LOp<[(save_temp $promote, $p0),
111-
(cast "R", $promote)]>;
111+
(bitcast "R", $promote)]>;
112112
def OP_HI : Op<(shuffle $p0, $p0, (highhalf mask0))>;
113113
def OP_LO : Op<(shuffle $p0, $p0, (lowhalf mask0))>;
114114
def OP_CONC : Op<(shuffle $p0, $p1, (add mask0, mask1))>;
115115
def OP_DUP : Op<(dup $p0)>;
116116
def OP_DUP_LN : Op<(call_mangled "splat_lane", $p0, $p1)>;
117-
def OP_SEL : Op<(cast "R", (op "|",
118-
(op "&", $p0, (cast $p0, $p1)),
119-
(op "&", (op "~", $p0), (cast $p0, $p2))))>;
117+
def OP_SEL : Op<(bitcast "R", (op "|",
118+
(op "&", $p0, (bitcast $p0, $p1)),
119+
(op "&", (op "~", $p0), (bitcast $p0, $p2))))>;
120120
def OP_REV16 : Op<(shuffle $p0, $p0, (rev 16, mask0))>;
121121
def OP_REV32 : Op<(shuffle $p0, $p0, (rev 32, mask0))>;
122122
def OP_REV64 : Op<(shuffle $p0, $p0, (rev 64, mask0))>;
123123
def OP_XTN : Op<(call "vcombine", $p0, (call "vmovn", $p1))>;
124-
def OP_SQXTUN : Op<(call "vcombine", (cast $p0, "U", $p0),
124+
def OP_SQXTUN : Op<(call "vcombine", (bitcast $p0, "U", $p0),
125125
(call "vqmovun", $p1))>;
126126
def OP_QXTN : Op<(call "vcombine", $p0, (call "vqmovn", $p1))>;
127127
def OP_VCVT_NA_HI_F16 : Op<(call "vcombine", $p0, (call "vcvt_f16_f32", $p1))>;
128128
def OP_VCVT_NA_HI_F32 : Op<(call "vcombine", $p0, (call "vcvt_f32_f64", $p1))>;
129129
def OP_VCVT_EX_HI_F32 : Op<(call "vcvt_f32_f16", (call "vget_high", $p0))>;
130130
def OP_VCVT_EX_HI_F64 : Op<(call "vcvt_f64_f32", (call "vget_high", $p0))>;
131131
def OP_VCVTX_HI : Op<(call "vcombine", $p0, (call "vcvtx_f32", $p1))>;
132-
def OP_REINT : Op<(cast "R", $p0)>;
132+
def OP_REINT : Op<(bitcast "R", $p0)>;
133133
def OP_ADDHNHi : Op<(call "vcombine", $p0, (call "vaddhn", $p1, $p2))>;
134134
def OP_RADDHNHi : Op<(call "vcombine", $p0, (call "vraddhn", $p1, $p2))>;
135135
def OP_SUBHNHi : Op<(call "vcombine", $p0, (call "vsubhn", $p1, $p2))>;
136136
def OP_RSUBHNHi : Op<(call "vcombine", $p0, (call "vrsubhn", $p1, $p2))>;
137-
def OP_ABDL : Op<(cast "R", (call "vmovl", (cast $p0, "U",
137+
def OP_ABDL : Op<(bitcast "R", (call "vmovl", (bitcast $p0, "U",
138138
(call "vabd", $p0, $p1))))>;
139139
def OP_ABDLHi : Op<(call "vabdl", (call "vget_high", $p0),
140140
(call "vget_high", $p1))>;
@@ -152,15 +152,15 @@ def OP_QDMLSLHi : Op<(call "vqdmlsl", $p0, (call "vget_high", $p1),
152152
(call "vget_high", $p2))>;
153153
def OP_QDMLSLHi_N : Op<(call "vqdmlsl_n", $p0, (call "vget_high", $p1), $p2)>;
154154
def OP_DIV : Op<(op "/", $p0, $p1)>;
155-
def OP_LONG_HI : Op<(cast "R", (call (name_replace "_high_", "_"),
155+
def OP_LONG_HI : Op<(bitcast "R", (call (name_replace "_high_", "_"),
156156
(call "vget_high", $p0), $p1))>;
157-
def OP_NARROW_HI : Op<(cast "R", (call "vcombine",
158-
(cast "R", "H", $p0),
159-
(cast "R", "H",
157+
def OP_NARROW_HI : Op<(bitcast "R", (call "vcombine",
158+
(bitcast "R", "H", $p0),
159+
(bitcast "R", "H",
160160
(call (name_replace "_high_", "_"),
161161
$p1, $p2))))>;
162162
def OP_MOVL_HI : LOp<[(save_temp $a1, (call "vget_high", $p0)),
163-
(cast "R",
163+
(bitcast "R",
164164
(call "vshll_n", $a1, (literal "int32_t", "0")))]>;
165165
def OP_COPY_LN : Op<(call "vset_lane", (call "vget_lane", $p2, $p3), $p0, $p1)>;
166166
def OP_SCALAR_MUL_LN : Op<(op "*", $p0, (call "vget_lane", $p1, $p2))>;
@@ -221,18 +221,18 @@ def OP_FMLSL_LN_Hi : Op<(call "vfmlsl_high", $p0, $p1,
221221

222222
def OP_USDOT_LN
223223
: Op<(call "vusdot", $p0, $p1,
224-
(cast "8", "S", (call_mangled "splat_lane", (bitcast "int32x2_t", $p2), $p3)))>;
224+
(bitcast "8", "S", (call_mangled "splat_lane", (bitcast "int32x2_t", $p2), $p3)))>;
225225
def OP_USDOT_LNQ
226226
: Op<(call "vusdot", $p0, $p1,
227-
(cast "8", "S", (call_mangled "splat_lane", (bitcast "int32x4_t", $p2), $p3)))>;
227+
(bitcast "8", "S", (call_mangled "splat_lane", (bitcast "int32x4_t", $p2), $p3)))>;
228228

229229
// sudot splats the second vector and then calls vusdot
230230
def OP_SUDOT_LN
231231
: Op<(call "vusdot", $p0,
232-
(cast "8", "U", (call_mangled "splat_lane", (bitcast "int32x2_t", $p2), $p3)), $p1)>;
232+
(bitcast "8", "U", (call_mangled "splat_lane", (bitcast "int32x2_t", $p2), $p3)), $p1)>;
233233
def OP_SUDOT_LNQ
234234
: Op<(call "vusdot", $p0,
235-
(cast "8", "U", (call_mangled "splat_lane", (bitcast "int32x4_t", $p2), $p3)), $p1)>;
235+
(bitcast "8", "U", (call_mangled "splat_lane", (bitcast "int32x4_t", $p2), $p3)), $p1)>;
236236

237237
def OP_BFDOT_LN
238238
: Op<(call "vbfdot", $p0, $p1,
@@ -263,7 +263,7 @@ def OP_VCVT_BF16_F32_A32
263263
: Op<(call "__a32_vcvt_bf16", $p0)>;
264264

265265
def OP_VCVT_BF16_F32_LO_A32
266-
: Op<(call "vcombine", (cast "bfloat16x4_t", (literal "uint64_t", "0ULL")),
266+
: Op<(call "vcombine", (bitcast "bfloat16x4_t", (literal "uint64_t", "0ULL")),
267267
(call "__a32_vcvt_bf16", $p0))>;
268268
def OP_VCVT_BF16_F32_HI_A32
269269
: Op<(call "vcombine", (call "__a32_vcvt_bf16", $p1),
@@ -924,12 +924,12 @@ def CFMLE : SOpInst<"vcle", "U..", "lUldQdQlQUl", OP_LE>;
924924
def CFMGT : SOpInst<"vcgt", "U..", "lUldQdQlQUl", OP_GT>;
925925
def CFMLT : SOpInst<"vclt", "U..", "lUldQdQlQUl", OP_LT>;
926926

927-
def CMEQ : SInst<"vceqz", "U.",
927+
def CMEQ : SInst<"vceqz", "U(.!)",
928928
"csilfUcUsUiUlPcPlQcQsQiQlQfQUcQUsQUiQUlQPcdQdQPl">;
929-
def CMGE : SInst<"vcgez", "U.", "csilfdQcQsQiQlQfQd">;
930-
def CMLE : SInst<"vclez", "U.", "csilfdQcQsQiQlQfQd">;
931-
def CMGT : SInst<"vcgtz", "U.", "csilfdQcQsQiQlQfQd">;
932-
def CMLT : SInst<"vcltz", "U.", "csilfdQcQsQiQlQfQd">;
929+
def CMGE : SInst<"vcgez", "U(.!)", "csilfdQcQsQiQlQfQd">;
930+
def CMLE : SInst<"vclez", "U(.!)", "csilfdQcQsQiQlQfQd">;
931+
def CMGT : SInst<"vcgtz", "U(.!)", "csilfdQcQsQiQlQfQd">;
932+
def CMLT : SInst<"vcltz", "U(.!)", "csilfdQcQsQiQlQfQd">;
933933

934934
////////////////////////////////////////////////////////////////////////////////
935935
// Max/Min Integer
@@ -1667,11 +1667,11 @@ let TargetGuard = "fullfp16,neon" in {
16671667
// ARMv8.2-A FP16 one-operand vector intrinsics.
16681668

16691669
// Comparison
1670-
def CMEQH : SInst<"vceqz", "U.", "hQh">;
1671-
def CMGEH : SInst<"vcgez", "U.", "hQh">;
1672-
def CMGTH : SInst<"vcgtz", "U.", "hQh">;
1673-
def CMLEH : SInst<"vclez", "U.", "hQh">;
1674-
def CMLTH : SInst<"vcltz", "U.", "hQh">;
1670+
def CMEQH : SInst<"vceqz", "U(.!)", "hQh">;
1671+
def CMGEH : SInst<"vcgez", "U(.!)", "hQh">;
1672+
def CMGTH : SInst<"vcgtz", "U(.!)", "hQh">;
1673+
def CMLEH : SInst<"vclez", "U(.!)", "hQh">;
1674+
def CMLTH : SInst<"vcltz", "U(.!)", "hQh">;
16751675

16761676
// Vector conversion
16771677
def VCVT_F16 : SInst<"vcvt_f16", "F(.!)", "sUsQsQUs">;

clang/lib/CodeGen/CodeGenFunction.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4694,10 +4694,10 @@ class CodeGenFunction : public CodeGenTypeCache {
46944694
llvm::Value *EmitTargetBuiltinExpr(unsigned BuiltinID, const CallExpr *E,
46954695
ReturnValueSlot ReturnValue);
46964696

4697-
llvm::Value *EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty,
4698-
const llvm::CmpInst::Predicate Fp,
4699-
const llvm::CmpInst::Predicate Ip,
4700-
const llvm::Twine &Name = "");
4697+
llvm::Value *
4698+
EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty,
4699+
const llvm::CmpInst::Predicate Pred,
4700+
const llvm::Twine &Name = "");
47014701
llvm::Value *EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E,
47024702
ReturnValueSlot ReturnValue,
47034703
llvm::Triple::ArchType Arch);

clang/lib/CodeGen/TargetBuiltins/ARM.cpp

Lines changed: 66 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1750,8 +1750,9 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
17501750

17511751
// Determine the type of this overloaded NEON intrinsic.
17521752
NeonTypeFlags Type(NeonTypeConst->getZExtValue());
1753-
bool Usgn = Type.isUnsigned();
1754-
bool Quad = Type.isQuad();
1753+
const bool Usgn = Type.isUnsigned();
1754+
const bool Quad = Type.isQuad();
1755+
const bool Floating = Type.isFloatingPoint();
17551756
const bool HasLegalHalfType = getTarget().hasLegalHalfType();
17561757
const bool AllowBFloatArgsAndRet =
17571758
getTargetHooks().getABIInfo().allowBFloatArgsAndRet();
@@ -1852,24 +1853,28 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
18521853
}
18531854
case NEON::BI__builtin_neon_vceqz_v:
18541855
case NEON::BI__builtin_neon_vceqzq_v:
1855-
return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
1856-
ICmpInst::ICMP_EQ, "vceqz");
1856+
return EmitAArch64CompareBuiltinExpr(
1857+
Ops[0], Ty, Floating ? ICmpInst::FCMP_OEQ : ICmpInst::ICMP_EQ, "vceqz");
18571858
case NEON::BI__builtin_neon_vcgez_v:
18581859
case NEON::BI__builtin_neon_vcgezq_v:
1859-
return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
1860-
ICmpInst::ICMP_SGE, "vcgez");
1860+
return EmitAArch64CompareBuiltinExpr(
1861+
Ops[0], Ty, Floating ? ICmpInst::FCMP_OGE : ICmpInst::ICMP_SGE,
1862+
"vcgez");
18611863
case NEON::BI__builtin_neon_vclez_v:
18621864
case NEON::BI__builtin_neon_vclezq_v:
1863-
return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
1864-
ICmpInst::ICMP_SLE, "vclez");
1865+
return EmitAArch64CompareBuiltinExpr(
1866+
Ops[0], Ty, Floating ? ICmpInst::FCMP_OLE : ICmpInst::ICMP_SLE,
1867+
"vclez");
18651868
case NEON::BI__builtin_neon_vcgtz_v:
18661869
case NEON::BI__builtin_neon_vcgtzq_v:
1867-
return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
1868-
ICmpInst::ICMP_SGT, "vcgtz");
1870+
return EmitAArch64CompareBuiltinExpr(
1871+
Ops[0], Ty, Floating ? ICmpInst::FCMP_OGT : ICmpInst::ICMP_SGT,
1872+
"vcgtz");
18691873
case NEON::BI__builtin_neon_vcltz_v:
18701874
case NEON::BI__builtin_neon_vcltzq_v:
1871-
return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
1872-
ICmpInst::ICMP_SLT, "vcltz");
1875+
return EmitAArch64CompareBuiltinExpr(
1876+
Ops[0], Ty, Floating ? ICmpInst::FCMP_OLT : ICmpInst::ICMP_SLT,
1877+
"vcltz");
18731878
case NEON::BI__builtin_neon_vclz_v:
18741879
case NEON::BI__builtin_neon_vclzq_v:
18751880
// We generate target-independent intrinsic, which needs a second argument
@@ -2432,28 +2437,32 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
24322437
return Builder.CreateBitCast(Result, ResultType, NameHint);
24332438
}
24342439

2435-
Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr(
2436-
Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
2437-
const CmpInst::Predicate Ip, const Twine &Name) {
2438-
llvm::Type *OTy = Op->getType();
2439-
2440-
// FIXME: this is utterly horrific. We should not be looking at previous
2441-
// codegen context to find out what needs doing. Unfortunately TableGen
2442-
// currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
2443-
// (etc).
2444-
if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
2445-
OTy = BI->getOperand(0)->getType();
2446-
2447-
Op = Builder.CreateBitCast(Op, OTy);
2448-
if (OTy->getScalarType()->isFloatingPointTy()) {
2449-
if (Fp == CmpInst::FCMP_OEQ)
2450-
Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
2440+
Value *
2441+
CodeGenFunction::EmitAArch64CompareBuiltinExpr(Value *Op, llvm::Type *Ty,
2442+
const CmpInst::Predicate Pred,
2443+
const Twine &Name) {
2444+
2445+
if (isa<FixedVectorType>(Ty)) {
2446+
// Vector types are cast to i8 vectors. Recover original type.
2447+
Op = Builder.CreateBitCast(Op, Ty);
2448+
}
2449+
2450+
if (CmpInst::isFPPredicate(Pred)) {
2451+
if (Pred == CmpInst::FCMP_OEQ)
2452+
Op = Builder.CreateFCmp(Pred, Op, Constant::getNullValue(Op->getType()));
24512453
else
2452-
Op = Builder.CreateFCmpS(Fp, Op, Constant::getNullValue(OTy));
2454+
Op = Builder.CreateFCmpS(Pred, Op, Constant::getNullValue(Op->getType()));
24532455
} else {
2454-
Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
2456+
Op = Builder.CreateICmp(Pred, Op, Constant::getNullValue(Op->getType()));
24552457
}
2456-
return Builder.CreateSExt(Op, Ty, Name);
2458+
2459+
llvm::Type *ResTy = Ty;
2460+
if (auto *VTy = dyn_cast<FixedVectorType>(Ty))
2461+
ResTy = FixedVectorType::get(
2462+
IntegerType::get(getLLVMContext(), VTy->getScalarSizeInBits()),
2463+
VTy->getNumElements());
2464+
2465+
return Builder.CreateSExt(Op, ResTy, Name);
24572466
}
24582467

24592468
static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
@@ -5955,45 +5964,66 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
59555964
return Builder.CreateFAdd(Op0, Op1, "vpaddd");
59565965
}
59575966
case NEON::BI__builtin_neon_vceqzd_s64:
5967+
Ops.push_back(EmitScalarExpr(E->getArg(0)));
5968+
return EmitAArch64CompareBuiltinExpr(
5969+
Ops[0], ConvertType(E->getCallReturnType(getContext())),
5970+
ICmpInst::ICMP_EQ, "vceqz");
59585971
case NEON::BI__builtin_neon_vceqzd_f64:
59595972
case NEON::BI__builtin_neon_vceqzs_f32:
59605973
case NEON::BI__builtin_neon_vceqzh_f16:
59615974
Ops.push_back(EmitScalarExpr(E->getArg(0)));
59625975
return EmitAArch64CompareBuiltinExpr(
59635976
Ops[0], ConvertType(E->getCallReturnType(getContext())),
5964-
ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
5977+
ICmpInst::FCMP_OEQ, "vceqz");
59655978
case NEON::BI__builtin_neon_vcgezd_s64:
5979+
Ops.push_back(EmitScalarExpr(E->getArg(0)));
5980+
return EmitAArch64CompareBuiltinExpr(
5981+
Ops[0], ConvertType(E->getCallReturnType(getContext())),
5982+
ICmpInst::ICMP_SGE, "vcgez");
59665983
case NEON::BI__builtin_neon_vcgezd_f64:
59675984
case NEON::BI__builtin_neon_vcgezs_f32:
59685985
case NEON::BI__builtin_neon_vcgezh_f16:
59695986
Ops.push_back(EmitScalarExpr(E->getArg(0)));
59705987
return EmitAArch64CompareBuiltinExpr(
59715988
Ops[0], ConvertType(E->getCallReturnType(getContext())),
5972-
ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
5989+
ICmpInst::FCMP_OGE, "vcgez");
59735990
case NEON::BI__builtin_neon_vclezd_s64:
5991+
Ops.push_back(EmitScalarExpr(E->getArg(0)));
5992+
return EmitAArch64CompareBuiltinExpr(
5993+
Ops[0], ConvertType(E->getCallReturnType(getContext())),
5994+
ICmpInst::ICMP_SLE, "vclez");
59745995
case NEON::BI__builtin_neon_vclezd_f64:
59755996
case NEON::BI__builtin_neon_vclezs_f32:
59765997
case NEON::BI__builtin_neon_vclezh_f16:
59775998
Ops.push_back(EmitScalarExpr(E->getArg(0)));
59785999
return EmitAArch64CompareBuiltinExpr(
59796000
Ops[0], ConvertType(E->getCallReturnType(getContext())),
5980-
ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
6001+
ICmpInst::FCMP_OLE, "vclez");
59816002
case NEON::BI__builtin_neon_vcgtzd_s64:
6003+
Ops.push_back(EmitScalarExpr(E->getArg(0)));
6004+
return EmitAArch64CompareBuiltinExpr(
6005+
Ops[0], ConvertType(E->getCallReturnType(getContext())),
6006+
ICmpInst::ICMP_SGT, "vcgtz");
59826007
case NEON::BI__builtin_neon_vcgtzd_f64:
59836008
case NEON::BI__builtin_neon_vcgtzs_f32:
59846009
case NEON::BI__builtin_neon_vcgtzh_f16:
59856010
Ops.push_back(EmitScalarExpr(E->getArg(0)));
59866011
return EmitAArch64CompareBuiltinExpr(
59876012
Ops[0], ConvertType(E->getCallReturnType(getContext())),
5988-
ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
6013+
ICmpInst::FCMP_OGT, "vcgtz");
59896014
case NEON::BI__builtin_neon_vcltzd_s64:
6015+
Ops.push_back(EmitScalarExpr(E->getArg(0)));
6016+
return EmitAArch64CompareBuiltinExpr(
6017+
Ops[0], ConvertType(E->getCallReturnType(getContext())),
6018+
ICmpInst::ICMP_SLT, "vcltz");
6019+
59906020
case NEON::BI__builtin_neon_vcltzd_f64:
59916021
case NEON::BI__builtin_neon_vcltzs_f32:
59926022
case NEON::BI__builtin_neon_vcltzh_f16:
59936023
Ops.push_back(EmitScalarExpr(E->getArg(0)));
59946024
return EmitAArch64CompareBuiltinExpr(
59956025
Ops[0], ConvertType(E->getCallReturnType(getContext())),
5996-
ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
6026+
ICmpInst::FCMP_OLT, "vcltz");
59976027

59986028
case NEON::BI__builtin_neon_vceqzd_u64: {
59996029
Ops.push_back(EmitScalarExpr(E->getArg(0)));

0 commit comments

Comments
 (0)