@@ -1193,14 +1193,22 @@ static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
11931193 NEONMAP1 (vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
11941194 NEONMAP1 (vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
11951195 NEONMAP1 (vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
1196- NEONMAP1 (vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
1197- NEONMAP1 (vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
1196+ NEONMAP1 (vaddv_s16, vector_reduce_add, Add1ArgType),
1197+ NEONMAP1 (vaddv_s32, vector_reduce_add, Add1ArgType),
1198+ NEONMAP1 (vaddv_s8, vector_reduce_add, Add1ArgType),
1199+ NEONMAP1 (vaddv_u16, vector_reduce_add, Add1ArgType),
1200+ NEONMAP1 (vaddv_u32, vector_reduce_add, Add1ArgType),
1201+ NEONMAP1 (vaddv_u8, vector_reduce_add, Add1ArgType),
11981202 NEONMAP1 (vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
11991203 NEONMAP1 (vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
1200- NEONMAP1 (vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
1201- NEONMAP1 (vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
1202- NEONMAP1 (vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
1203- NEONMAP1 (vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
1204+ NEONMAP1 (vaddvq_s16, vector_reduce_add, Add1ArgType),
1205+ NEONMAP1 (vaddvq_s32, vector_reduce_add, Add1ArgType),
1206+ NEONMAP1 (vaddvq_s64, vector_reduce_add, Add1ArgType),
1207+ NEONMAP1 (vaddvq_s8, vector_reduce_add, Add1ArgType),
1208+ NEONMAP1 (vaddvq_u16, vector_reduce_add, Add1ArgType),
1209+ NEONMAP1 (vaddvq_u32, vector_reduce_add, Add1ArgType),
1210+ NEONMAP1 (vaddvq_u64, vector_reduce_add, Add1ArgType),
1211+ NEONMAP1 (vaddvq_u8, vector_reduce_add, Add1ArgType),
12041212 NEONMAP1 (vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
12051213 NEONMAP1 (vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
12061214 NEONMAP1 (vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
@@ -1243,27 +1251,43 @@ static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
12431251 NEONMAP1 (vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
12441252 NEONMAP1 (vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
12451253 NEONMAP1 (vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1246- NEONMAP1 (vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
1247- NEONMAP1 (vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
1254+ NEONMAP1 (vmaxv_s16, vector_reduce_smax, Add1ArgType),
1255+ NEONMAP1 (vmaxv_s32, vector_reduce_smax, Add1ArgType),
1256+ NEONMAP1 (vmaxv_s8, vector_reduce_smax, Add1ArgType),
1257+ NEONMAP1 (vmaxv_u16, vector_reduce_umax, Add1ArgType),
1258+ NEONMAP1 (vmaxv_u32, vector_reduce_umax, Add1ArgType),
1259+ NEONMAP1 (vmaxv_u8, vector_reduce_umax, Add1ArgType),
12481260 NEONMAP1 (vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
12491261 NEONMAP1 (vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1250- NEONMAP1 (vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
1251- NEONMAP1 (vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
1262+ NEONMAP1 (vmaxvq_s16, vector_reduce_smax, Add1ArgType),
1263+ NEONMAP1 (vmaxvq_s32, vector_reduce_smax, Add1ArgType),
1264+ NEONMAP1 (vmaxvq_s8, vector_reduce_smax, Add1ArgType),
1265+ NEONMAP1 (vmaxvq_u16, vector_reduce_umax, Add1ArgType),
1266+ NEONMAP1 (vmaxvq_u32, vector_reduce_umax, Add1ArgType),
1267+ NEONMAP1 (vmaxvq_u8, vector_reduce_umax, Add1ArgType),
12521268 NEONMAP1 (vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
12531269 NEONMAP1 (vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
12541270 NEONMAP1 (vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
12551271 NEONMAP1 (vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
1256- NEONMAP1 (vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
1257- NEONMAP1 (vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
1272+ NEONMAP1 (vminv_s16, vector_reduce_smin, Add1ArgType),
1273+ NEONMAP1 (vminv_s32, vector_reduce_smin, Add1ArgType),
1274+ NEONMAP1 (vminv_s8, vector_reduce_smin, Add1ArgType),
1275+ NEONMAP1 (vminv_u16, vector_reduce_umin, Add1ArgType),
1276+ NEONMAP1 (vminv_u32, vector_reduce_umin, Add1ArgType),
1277+ NEONMAP1 (vminv_u8, vector_reduce_umin, Add1ArgType),
12581278 NEONMAP1 (vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
12591279 NEONMAP1 (vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
1260- NEONMAP1 (vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
1261- NEONMAP1 (vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
1280+ NEONMAP1 (vminvq_s16, vector_reduce_smin, Add1ArgType),
1281+ NEONMAP1 (vminvq_s32, vector_reduce_smin, Add1ArgType),
1282+ NEONMAP1 (vminvq_s8, vector_reduce_smin, Add1ArgType),
1283+ NEONMAP1 (vminvq_u16, vector_reduce_umin, Add1ArgType),
1284+ NEONMAP1 (vminvq_u32, vector_reduce_umin, Add1ArgType),
1285+ NEONMAP1 (vminvq_u8, vector_reduce_umin, Add1ArgType),
12621286 NEONMAP1 (vmull_p64, aarch64_neon_pmull64, 0 ),
12631287 NEONMAP1 (vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
12641288 NEONMAP1 (vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
1265- NEONMAP1 (vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
1266- NEONMAP1 (vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
1289+ NEONMAP1 (vpaddd_s64, vector_reduce_add, Add1ArgType),
1290+ NEONMAP1 (vpaddd_u64, vector_reduce_add, Add1ArgType),
12671291 NEONMAP1 (vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
12681292 NEONMAP1 (vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
12691293 NEONMAP1 (vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
@@ -7067,127 +7091,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
70677091 Int = Intrinsic::bitreverse;
70687092 return EmitNeonCall (CGM.getIntrinsic (Int, Ty), Ops, " vrbit" );
70697093 }
7070- case NEON::BI__builtin_neon_vaddv_u8:
7071- // FIXME: These are handled by the AArch64 scalar code.
7072- usgn = true ;
7073- [[fallthrough]];
7074- case NEON::BI__builtin_neon_vaddv_s8: {
7075- Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
7076- Ty = Int32Ty;
7077- VTy = llvm::FixedVectorType::get (Int8Ty, 8 );
7078- llvm::Type *Tys[2 ] = { Ty, VTy };
7079- Ops.push_back (EmitScalarExpr (E->getArg (0 )));
7080- Ops[0 ] = EmitNeonCall (CGM.getIntrinsic (Int, Tys), Ops, " vaddv" );
7081- return Builder.CreateTrunc (Ops[0 ], Int8Ty);
7082- }
7083- case NEON::BI__builtin_neon_vaddv_u16:
7084- usgn = true ;
7085- [[fallthrough]];
7086- case NEON::BI__builtin_neon_vaddv_s16: {
7087- Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
7088- Ty = Int32Ty;
7089- VTy = llvm::FixedVectorType::get (Int16Ty, 4 );
7090- llvm::Type *Tys[2 ] = { Ty, VTy };
7091- Ops.push_back (EmitScalarExpr (E->getArg (0 )));
7092- Ops[0 ] = EmitNeonCall (CGM.getIntrinsic (Int, Tys), Ops, " vaddv" );
7093- return Builder.CreateTrunc (Ops[0 ], Int16Ty);
7094- }
7095- case NEON::BI__builtin_neon_vaddvq_u8:
7096- usgn = true ;
7097- [[fallthrough]];
7098- case NEON::BI__builtin_neon_vaddvq_s8: {
7099- Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
7100- Ty = Int32Ty;
7101- VTy = llvm::FixedVectorType::get (Int8Ty, 16 );
7102- llvm::Type *Tys[2 ] = { Ty, VTy };
7103- Ops.push_back (EmitScalarExpr (E->getArg (0 )));
7104- Ops[0 ] = EmitNeonCall (CGM.getIntrinsic (Int, Tys), Ops, " vaddv" );
7105- return Builder.CreateTrunc (Ops[0 ], Int8Ty);
7106- }
7107- case NEON::BI__builtin_neon_vaddvq_u16:
7108- usgn = true ;
7109- [[fallthrough]];
7110- case NEON::BI__builtin_neon_vaddvq_s16: {
7111- Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
7112- Ty = Int32Ty;
7113- VTy = llvm::FixedVectorType::get (Int16Ty, 8 );
7114- llvm::Type *Tys[2 ] = { Ty, VTy };
7115- Ops.push_back (EmitScalarExpr (E->getArg (0 )));
7116- Ops[0 ] = EmitNeonCall (CGM.getIntrinsic (Int, Tys), Ops, " vaddv" );
7117- return Builder.CreateTrunc (Ops[0 ], Int16Ty);
7118- }
7119- case NEON::BI__builtin_neon_vmaxv_u8: {
7120- Int = Intrinsic::aarch64_neon_umaxv;
7121- Ty = Int32Ty;
7122- VTy = llvm::FixedVectorType::get (Int8Ty, 8 );
7123- llvm::Type *Tys[2 ] = { Ty, VTy };
7124- Ops.push_back (EmitScalarExpr (E->getArg (0 )));
7125- Ops[0 ] = EmitNeonCall (CGM.getIntrinsic (Int, Tys), Ops, " vmaxv" );
7126- return Builder.CreateTrunc (Ops[0 ], Int8Ty);
7127- }
7128- case NEON::BI__builtin_neon_vmaxv_u16: {
7129- Int = Intrinsic::aarch64_neon_umaxv;
7130- Ty = Int32Ty;
7131- VTy = llvm::FixedVectorType::get (Int16Ty, 4 );
7132- llvm::Type *Tys[2 ] = { Ty, VTy };
7133- Ops.push_back (EmitScalarExpr (E->getArg (0 )));
7134- Ops[0 ] = EmitNeonCall (CGM.getIntrinsic (Int, Tys), Ops, " vmaxv" );
7135- return Builder.CreateTrunc (Ops[0 ], Int16Ty);
7136- }
7137- case NEON::BI__builtin_neon_vmaxvq_u8: {
7138- Int = Intrinsic::aarch64_neon_umaxv;
7139- Ty = Int32Ty;
7140- VTy = llvm::FixedVectorType::get (Int8Ty, 16 );
7141- llvm::Type *Tys[2 ] = { Ty, VTy };
7142- Ops.push_back (EmitScalarExpr (E->getArg (0 )));
7143- Ops[0 ] = EmitNeonCall (CGM.getIntrinsic (Int, Tys), Ops, " vmaxv" );
7144- return Builder.CreateTrunc (Ops[0 ], Int8Ty);
7145- }
7146- case NEON::BI__builtin_neon_vmaxvq_u16: {
7147- Int = Intrinsic::aarch64_neon_umaxv;
7148- Ty = Int32Ty;
7149- VTy = llvm::FixedVectorType::get (Int16Ty, 8 );
7150- llvm::Type *Tys[2 ] = { Ty, VTy };
7151- Ops.push_back (EmitScalarExpr (E->getArg (0 )));
7152- Ops[0 ] = EmitNeonCall (CGM.getIntrinsic (Int, Tys), Ops, " vmaxv" );
7153- return Builder.CreateTrunc (Ops[0 ], Int16Ty);
7154- }
7155- case NEON::BI__builtin_neon_vmaxv_s8: {
7156- Int = Intrinsic::aarch64_neon_smaxv;
7157- Ty = Int32Ty;
7158- VTy = llvm::FixedVectorType::get (Int8Ty, 8 );
7159- llvm::Type *Tys[2 ] = { Ty, VTy };
7160- Ops.push_back (EmitScalarExpr (E->getArg (0 )));
7161- Ops[0 ] = EmitNeonCall (CGM.getIntrinsic (Int, Tys), Ops, " vmaxv" );
7162- return Builder.CreateTrunc (Ops[0 ], Int8Ty);
7163- }
7164- case NEON::BI__builtin_neon_vmaxv_s16: {
7165- Int = Intrinsic::aarch64_neon_smaxv;
7166- Ty = Int32Ty;
7167- VTy = llvm::FixedVectorType::get (Int16Ty, 4 );
7168- llvm::Type *Tys[2 ] = { Ty, VTy };
7169- Ops.push_back (EmitScalarExpr (E->getArg (0 )));
7170- Ops[0 ] = EmitNeonCall (CGM.getIntrinsic (Int, Tys), Ops, " vmaxv" );
7171- return Builder.CreateTrunc (Ops[0 ], Int16Ty);
7172- }
7173- case NEON::BI__builtin_neon_vmaxvq_s8: {
7174- Int = Intrinsic::aarch64_neon_smaxv;
7175- Ty = Int32Ty;
7176- VTy = llvm::FixedVectorType::get (Int8Ty, 16 );
7177- llvm::Type *Tys[2 ] = { Ty, VTy };
7178- Ops.push_back (EmitScalarExpr (E->getArg (0 )));
7179- Ops[0 ] = EmitNeonCall (CGM.getIntrinsic (Int, Tys), Ops, " vmaxv" );
7180- return Builder.CreateTrunc (Ops[0 ], Int8Ty);
7181- }
7182- case NEON::BI__builtin_neon_vmaxvq_s16: {
7183- Int = Intrinsic::aarch64_neon_smaxv;
7184- Ty = Int32Ty;
7185- VTy = llvm::FixedVectorType::get (Int16Ty, 8 );
7186- llvm::Type *Tys[2 ] = { Ty, VTy };
7187- Ops.push_back (EmitScalarExpr (E->getArg (0 )));
7188- Ops[0 ] = EmitNeonCall (CGM.getIntrinsic (Int, Tys), Ops, " vmaxv" );
7189- return Builder.CreateTrunc (Ops[0 ], Int16Ty);
7190- }
71917094 case NEON::BI__builtin_neon_vmaxv_f16: {
71927095 Int = Intrinsic::aarch64_neon_fmaxv;
71937096 Ty = HalfTy;
@@ -7206,78 +7109,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
72067109 Ops[0 ] = EmitNeonCall (CGM.getIntrinsic (Int, Tys), Ops, " vmaxv" );
72077110 return Builder.CreateTrunc (Ops[0 ], HalfTy);
72087111 }
7209- case NEON::BI__builtin_neon_vminv_u8: {
7210- Int = Intrinsic::aarch64_neon_uminv;
7211- Ty = Int32Ty;
7212- VTy = llvm::FixedVectorType::get (Int8Ty, 8 );
7213- llvm::Type *Tys[2 ] = { Ty, VTy };
7214- Ops.push_back (EmitScalarExpr (E->getArg (0 )));
7215- Ops[0 ] = EmitNeonCall (CGM.getIntrinsic (Int, Tys), Ops, " vminv" );
7216- return Builder.CreateTrunc (Ops[0 ], Int8Ty);
7217- }
7218- case NEON::BI__builtin_neon_vminv_u16: {
7219- Int = Intrinsic::aarch64_neon_uminv;
7220- Ty = Int32Ty;
7221- VTy = llvm::FixedVectorType::get (Int16Ty, 4 );
7222- llvm::Type *Tys[2 ] = { Ty, VTy };
7223- Ops.push_back (EmitScalarExpr (E->getArg (0 )));
7224- Ops[0 ] = EmitNeonCall (CGM.getIntrinsic (Int, Tys), Ops, " vminv" );
7225- return Builder.CreateTrunc (Ops[0 ], Int16Ty);
7226- }
7227- case NEON::BI__builtin_neon_vminvq_u8: {
7228- Int = Intrinsic::aarch64_neon_uminv;
7229- Ty = Int32Ty;
7230- VTy = llvm::FixedVectorType::get (Int8Ty, 16 );
7231- llvm::Type *Tys[2 ] = { Ty, VTy };
7232- Ops.push_back (EmitScalarExpr (E->getArg (0 )));
7233- Ops[0 ] = EmitNeonCall (CGM.getIntrinsic (Int, Tys), Ops, " vminv" );
7234- return Builder.CreateTrunc (Ops[0 ], Int8Ty);
7235- }
7236- case NEON::BI__builtin_neon_vminvq_u16: {
7237- Int = Intrinsic::aarch64_neon_uminv;
7238- Ty = Int32Ty;
7239- VTy = llvm::FixedVectorType::get (Int16Ty, 8 );
7240- llvm::Type *Tys[2 ] = { Ty, VTy };
7241- Ops.push_back (EmitScalarExpr (E->getArg (0 )));
7242- Ops[0 ] = EmitNeonCall (CGM.getIntrinsic (Int, Tys), Ops, " vminv" );
7243- return Builder.CreateTrunc (Ops[0 ], Int16Ty);
7244- }
7245- case NEON::BI__builtin_neon_vminv_s8: {
7246- Int = Intrinsic::aarch64_neon_sminv;
7247- Ty = Int32Ty;
7248- VTy = llvm::FixedVectorType::get (Int8Ty, 8 );
7249- llvm::Type *Tys[2 ] = { Ty, VTy };
7250- Ops.push_back (EmitScalarExpr (E->getArg (0 )));
7251- Ops[0 ] = EmitNeonCall (CGM.getIntrinsic (Int, Tys), Ops, " vminv" );
7252- return Builder.CreateTrunc (Ops[0 ], Int8Ty);
7253- }
7254- case NEON::BI__builtin_neon_vminv_s16: {
7255- Int = Intrinsic::aarch64_neon_sminv;
7256- Ty = Int32Ty;
7257- VTy = llvm::FixedVectorType::get (Int16Ty, 4 );
7258- llvm::Type *Tys[2 ] = { Ty, VTy };
7259- Ops.push_back (EmitScalarExpr (E->getArg (0 )));
7260- Ops[0 ] = EmitNeonCall (CGM.getIntrinsic (Int, Tys), Ops, " vminv" );
7261- return Builder.CreateTrunc (Ops[0 ], Int16Ty);
7262- }
7263- case NEON::BI__builtin_neon_vminvq_s8: {
7264- Int = Intrinsic::aarch64_neon_sminv;
7265- Ty = Int32Ty;
7266- VTy = llvm::FixedVectorType::get (Int8Ty, 16 );
7267- llvm::Type *Tys[2 ] = { Ty, VTy };
7268- Ops.push_back (EmitScalarExpr (E->getArg (0 )));
7269- Ops[0 ] = EmitNeonCall (CGM.getIntrinsic (Int, Tys), Ops, " vminv" );
7270- return Builder.CreateTrunc (Ops[0 ], Int8Ty);
7271- }
7272- case NEON::BI__builtin_neon_vminvq_s16: {
7273- Int = Intrinsic::aarch64_neon_sminv;
7274- Ty = Int32Ty;
7275- VTy = llvm::FixedVectorType::get (Int16Ty, 8 );
7276- llvm::Type *Tys[2 ] = { Ty, VTy };
7277- Ops.push_back (EmitScalarExpr (E->getArg (0 )));
7278- Ops[0 ] = EmitNeonCall (CGM.getIntrinsic (Int, Tys), Ops, " vminv" );
7279- return Builder.CreateTrunc (Ops[0 ], Int16Ty);
7280- }
72817112 case NEON::BI__builtin_neon_vminv_f16: {
72827113 Int = Intrinsic::aarch64_neon_fminv;
72837114 Ty = HalfTy;
0 commit comments