Skip to content

Commit 56be43a

Browse files
[fixup] Refector much of common code into a helper function (NFC)
1 parent d72f224 commit 56be43a

File tree

2 files changed

+69
-106
lines changed

2 files changed

+69
-106
lines changed

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 65 additions & 106 deletions
Original file line numberDiff line numberDiff line change
@@ -6881,6 +6881,23 @@ Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
68816881
return ConstantInt::get(Ty, neg ? -SV : SV);
68826882
}
68836883

6884+
Value *CodeGenFunction::EmitFP8NeonCvtCall(unsigned IID, llvm::Type *Ty0,
6885+
llvm::Type *Ty1, bool Extract,
6886+
SmallVectorImpl<llvm::Value *> &Ops,
6887+
const CallExpr *E,
6888+
const char *name) {
6889+
llvm::Type *Tys[] = {Ty0, Ty1};
6890+
if (Extract) {
6891+
// Op[0] is mfloat8x16_t, but the intrinsic converts only the lower part of
6892+
// the vector.
6893+
Tys[1] = llvm::FixedVectorType::get(Int8Ty, 8);
6894+
Ops[0] = Builder.CreateExtractVector(Tys[1], Ops[0], Builder.getInt64(0));
6895+
}
6896+
llvm::Value *FPM =
6897+
EmitScalarOrConstFoldImmArg(/* ICEArguments */ 0, E->getNumArgs() - 1, E);
6898+
return EmitFP8NeonCall(CGM.getIntrinsic(IID, Tys), Ops, FPM, name);
6899+
}
6900+
68846901
// Right-shift a vector by a constant.
68856902
Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift,
68866903
llvm::Type *Ty, bool usgn,
@@ -12804,6 +12821,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
1280412821
return V;
1280512822

1280612823
unsigned Int;
12824+
bool ExtractLow = false;
1280712825
switch (BuiltinID) {
1280812826
default: return nullptr;
1280912827
case NEON::BI__builtin_neon_vbsl_v:
@@ -14018,117 +14036,58 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
1401814036
Int = Intrinsic::aarch64_neon_vluti4q_laneq_x2;
1401914037
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq_x2");
1402014038
}
14021-
case NEON::BI__builtin_neon_vcvt1_bf16_mf8_fpm:
1402214039
case NEON::BI__builtin_neon_vcvt1_low_bf16_mf8_fpm:
14023-
case NEON::BI__builtin_neon_vcvt1_high_bf16_mf8_fpm: {
14024-
Int = Intrinsic::aarch64_neon_fp8_cvtl1;
14025-
llvm::Type *Tys[2];
14026-
Tys[0] = llvm::FixedVectorType::get(BFloatTy, 8);
14027-
// Op[1] is mfloat8x16_t, but the intrinsic converts only the lower part of
14028-
// the vector.
14029-
if (BuiltinID == NEON::BI__builtin_neon_vcvt1_low_bf16_mf8_fpm) {
14030-
Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
14031-
/*isQuad*/ false));
14032-
Ops[0] = Builder.CreateExtractVector(Tys[1], Ops[0], Builder.getInt64(0));
14033-
} else
14034-
Tys[1] = Ops[0]->getType();
14035-
llvm::Value *FPM =
14036-
EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14037-
return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vbfcvt1");
14038-
}
14039-
case NEON::BI__builtin_neon_vcvt2_bf16_mf8_fpm:
14040+
ExtractLow = true;
14041+
LLVM_FALLTHROUGH;
14042+
case NEON::BI__builtin_neon_vcvt1_bf16_mf8_fpm:
14043+
case NEON::BI__builtin_neon_vcvt1_high_bf16_mf8_fpm:
14044+
return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl1,
14045+
llvm::FixedVectorType::get(BFloatTy, 8),
14046+
Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt1");
1404014047
case NEON::BI__builtin_neon_vcvt2_low_bf16_mf8_fpm:
14041-
case NEON::BI__builtin_neon_vcvt2_high_bf16_mf8_fpm: {
14042-
Int = Intrinsic::aarch64_neon_fp8_cvtl2;
14043-
llvm::Type *Tys[2];
14044-
Tys[0] = llvm::FixedVectorType::get(BFloatTy, 8);
14045-
// Op[1] is mfloat8x16_t, but the intrinsic converts only the lower
14046-
// part of the vector.
14047-
if (BuiltinID == NEON::BI__builtin_neon_vcvt2_low_bf16_mf8_fpm) {
14048-
Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
14049-
/*isQuad*/ false));
14050-
Ops[0] = Builder.CreateExtractVector(Tys[1], Ops[0], Builder.getInt64(0));
14051-
} else
14052-
Tys[1] = Ops[0]->getType();
14053-
llvm::Value *FPM =
14054-
EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14055-
return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vbfcvt2");
14056-
}
14057-
case NEON::BI__builtin_neon_vcvt1_f16_mf8_fpm:
14048+
ExtractLow = true;
14049+
LLVM_FALLTHROUGH;
14050+
case NEON::BI__builtin_neon_vcvt2_bf16_mf8_fpm:
14051+
case NEON::BI__builtin_neon_vcvt2_high_bf16_mf8_fpm:
14052+
return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl2,
14053+
llvm::FixedVectorType::get(BFloatTy, 8),
14054+
Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt2");
1405814055
case NEON::BI__builtin_neon_vcvt1_low_f16_mf8_fpm:
14059-
case NEON::BI__builtin_neon_vcvt1_high_f16_mf8_fpm: {
14060-
Int = Intrinsic::aarch64_neon_fp8_cvtl1;
14061-
llvm::Type *Tys[2];
14062-
Tys[0] = llvm::FixedVectorType::get(HalfTy, 8);
14063-
// Op[1] is mfloat8x16_t, but the intrinsic converts only the lower
14064-
// part of the vector.
14065-
if (BuiltinID == NEON::BI__builtin_neon_vcvt1_low_f16_mf8_fpm) {
14066-
Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
14067-
/*isQuad*/ false));
14068-
Ops[0] = Builder.CreateExtractVector(Tys[1], Ops[0], Builder.getInt64(0));
14069-
} else
14070-
Tys[1] = Ops[0]->getType();
14071-
llvm::Value *FPM =
14072-
EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14073-
return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vbfcvt1");
14074-
}
14075-
case NEON::BI__builtin_neon_vcvt2_f16_mf8_fpm:
14056+
ExtractLow = true;
14057+
LLVM_FALLTHROUGH;
14058+
case NEON::BI__builtin_neon_vcvt1_f16_mf8_fpm:
14059+
case NEON::BI__builtin_neon_vcvt1_high_f16_mf8_fpm:
14060+
return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl1,
14061+
llvm::FixedVectorType::get(HalfTy, 8),
14062+
Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt1");
1407614063
case NEON::BI__builtin_neon_vcvt2_low_f16_mf8_fpm:
14077-
case NEON::BI__builtin_neon_vcvt2_high_f16_mf8_fpm: {
14078-
Int = Intrinsic::aarch64_neon_fp8_cvtl2;
14079-
llvm::Type *Tys[2];
14080-
Tys[0] = llvm::FixedVectorType::get(HalfTy, 8);
14081-
// Op[1] is mfloat8x16_t, but the intrinsic converts only the lower
14082-
// part of the vector.
14083-
if (BuiltinID == NEON::BI__builtin_neon_vcvt2_low_f16_mf8_fpm) {
14084-
Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
14085-
/*isQuad*/ false));
14086-
Ops[0] = Builder.CreateExtractVector(Tys[1], Ops[0], Builder.getInt64(0));
14087-
} else
14088-
Tys[1] = Ops[0]->getType();
14089-
llvm::Value *FPM =
14090-
EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14091-
return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vbfcvt2");
14092-
}
14093-
case NEON::BI__builtin_neon_vcvt_mf8_f32_fpm: {
14094-
Int = Intrinsic::aarch64_neon_fp8_fcvtn;
14095-
llvm::Type *Tys[2];
14096-
Tys[0] = llvm::FixedVectorType::get(Int8Ty, 8);
14097-
Tys[1] = Ops[0]->getType();
14098-
llvm::Value *FPM =
14099-
EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14100-
return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vfcvtn");
14101-
}
14102-
case NEON::BI__builtin_neon_vcvt_mf8_f16_fpm: {
14103-
Int = Intrinsic::aarch64_neon_fp8_fcvtn;
14104-
llvm::Type *Tys[2];
14105-
Tys[0] = llvm::FixedVectorType::get(Int8Ty, 8);
14106-
// Gets the expected type, because arm_neon.h casts float16x4_t to int8x8_t
14107-
Tys[1] = llvm::FixedVectorType::get(HalfTy, 4);
14108-
llvm::Value *FPM =
14109-
EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14110-
return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vfcvtn");
14111-
}
14112-
case NEON::BI__builtin_neon_vcvtq_mf8_f16_fpm: {
14113-
Int = Intrinsic::aarch64_neon_fp8_fcvtn;
14114-
llvm::Type *Tys[2];
14115-
Tys[0] = llvm::FixedVectorType::get(Int8Ty, 16);
14116-
// Gets the expected type, because arm_neon.h casts float16x8_t to int8x16_t
14117-
Tys[1] = llvm::FixedVectorType::get(HalfTy, 8);
14118-
llvm::Value *FPM =
14119-
EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14120-
return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vfcvtn");
14121-
}
14064+
ExtractLow = true;
14065+
LLVM_FALLTHROUGH;
14066+
case NEON::BI__builtin_neon_vcvt2_f16_mf8_fpm:
14067+
case NEON::BI__builtin_neon_vcvt2_high_f16_mf8_fpm:
14068+
return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl2,
14069+
llvm::FixedVectorType::get(HalfTy, 8),
14070+
Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt2");
14071+
case NEON::BI__builtin_neon_vcvt_mf8_f32_fpm:
14072+
return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
14073+
llvm::FixedVectorType::get(Int8Ty, 8),
14074+
Ops[0]->getType(), false, Ops, E, "vfcvtn");
14075+
case NEON::BI__builtin_neon_vcvt_mf8_f16_fpm:
14076+
return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
14077+
llvm::FixedVectorType::get(Int8Ty, 8),
14078+
llvm::FixedVectorType::get(HalfTy, 4), false, Ops,
14079+
E, "vfcvtn");
14080+
case NEON::BI__builtin_neon_vcvtq_mf8_f16_fpm:
14081+
return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
14082+
llvm::FixedVectorType::get(Int8Ty, 16),
14083+
llvm::FixedVectorType::get(HalfTy, 8), false, Ops,
14084+
E, "vfcvtn");
1412214085
case NEON::BI__builtin_neon_vcvt_high_mf8_f32_fpm: {
14123-
Int = Intrinsic::aarch64_neon_fp8_fcvtn2;
14124-
llvm::Type *Tys[2];
14125-
Tys[0] = llvm::FixedVectorType::get(Int8Ty, 16);
14126-
Tys[1] = Ops[1]->getType();
14127-
Ops[0] = Builder.CreateInsertVector(Tys[0], PoisonValue::get(Tys[0]),
14128-
Ops[0], Builder.getInt64(0));
14129-
llvm::Value *FPM =
14130-
EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14131-
return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vfcvtn2");
14086+
llvm::Type *Ty = llvm::FixedVectorType::get(Int8Ty, 16);
14087+
Ops[0] = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
14088+
Builder.getInt64(0));
14089+
return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn2,
14090+
Ty, Ops[1]->getType(), false, Ops, E, "vfcvtn2");
1413214091
}
1413314092
case NEON::BI__builtin_neon_vamin_f16:
1413414093
case NEON::BI__builtin_neon_vaminq_f16:

clang/lib/CodeGen/CodeGenFunction.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4666,6 +4666,10 @@ class CodeGenFunction : public CodeGenTypeCache {
46664666
llvm::Value *EmitFP8NeonCall(llvm::Function *F,
46674667
SmallVectorImpl<llvm::Value *> &O,
46684668
llvm::Value *FPM, const char *name);
4669+
llvm::Value *EmitFP8NeonCvtCall(unsigned IID, llvm::Type *Ty0,
4670+
llvm::Type *Ty1, bool Extract,
4671+
SmallVectorImpl<llvm::Value *> &Ops,
4672+
const CallExpr *E, const char *name);
46694673
llvm::Value *EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx,
46704674
const llvm::ElementCount &Count);
46714675
llvm::Value *EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx);

0 commit comments

Comments
 (0)