Skip to content

Commit f8ee48b

Browse files
[fixup] Refector much of common code into a helper function (NFC)
1 parent af18c3f commit f8ee48b

File tree

2 files changed

+69
-106
lines changed

2 files changed

+69
-106
lines changed

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 65 additions & 106 deletions
Original file line numberDiff line numberDiff line change
@@ -6921,6 +6921,23 @@ Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
69216921
return ConstantInt::get(Ty, neg ? -SV : SV);
69226922
}
69236923

6924+
Value *CodeGenFunction::EmitFP8NeonCvtCall(unsigned IID, llvm::Type *Ty0,
6925+
llvm::Type *Ty1, bool Extract,
6926+
SmallVectorImpl<llvm::Value *> &Ops,
6927+
const CallExpr *E,
6928+
const char *name) {
6929+
llvm::Type *Tys[] = {Ty0, Ty1};
6930+
if (Extract) {
6931+
// Op[0] is mfloat8x16_t, but the intrinsic converts only the lower part of
6932+
// the vector.
6933+
Tys[1] = llvm::FixedVectorType::get(Int8Ty, 8);
6934+
Ops[0] = Builder.CreateExtractVector(Tys[1], Ops[0], Builder.getInt64(0));
6935+
}
6936+
llvm::Value *FPM =
6937+
EmitScalarOrConstFoldImmArg(/* ICEArguments */ 0, E->getNumArgs() - 1, E);
6938+
return EmitFP8NeonCall(CGM.getIntrinsic(IID, Tys), Ops, FPM, name);
6939+
}
6940+
69246941
// Right-shift a vector by a constant.
69256942
Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift,
69266943
llvm::Type *Ty, bool usgn,
@@ -12874,6 +12891,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
1287412891
return V;
1287512892

1287612893
unsigned Int;
12894+
bool ExtractLow = false;
1287712895
switch (BuiltinID) {
1287812896
default: return nullptr;
1287912897
case NEON::BI__builtin_neon_vbsl_v:
@@ -14088,117 +14106,58 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
1408814106
Int = Intrinsic::aarch64_neon_vluti4q_laneq_x2;
1408914107
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq_x2");
1409014108
}
14091-
case NEON::BI__builtin_neon_vcvt1_bf16_mf8_fpm:
1409214109
case NEON::BI__builtin_neon_vcvt1_low_bf16_mf8_fpm:
14093-
case NEON::BI__builtin_neon_vcvt1_high_bf16_mf8_fpm: {
14094-
Int = Intrinsic::aarch64_neon_fp8_cvtl1;
14095-
llvm::Type *Tys[2];
14096-
Tys[0] = llvm::FixedVectorType::get(BFloatTy, 8);
14097-
// Op[1] is mfloat8x16_t, but the intrinsic converts only the lower part of
14098-
// the vector.
14099-
if (BuiltinID == NEON::BI__builtin_neon_vcvt1_low_bf16_mf8_fpm) {
14100-
Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
14101-
/*isQuad*/ false));
14102-
Ops[0] = Builder.CreateExtractVector(Tys[1], Ops[0], Builder.getInt64(0));
14103-
} else
14104-
Tys[1] = Ops[0]->getType();
14105-
llvm::Value *FPM =
14106-
EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14107-
return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vbfcvt1");
14108-
}
14109-
case NEON::BI__builtin_neon_vcvt2_bf16_mf8_fpm:
14110+
ExtractLow = true;
14111+
LLVM_FALLTHROUGH;
14112+
case NEON::BI__builtin_neon_vcvt1_bf16_mf8_fpm:
14113+
case NEON::BI__builtin_neon_vcvt1_high_bf16_mf8_fpm:
14114+
return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl1,
14115+
llvm::FixedVectorType::get(BFloatTy, 8),
14116+
Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt1");
1411014117
case NEON::BI__builtin_neon_vcvt2_low_bf16_mf8_fpm:
14111-
case NEON::BI__builtin_neon_vcvt2_high_bf16_mf8_fpm: {
14112-
Int = Intrinsic::aarch64_neon_fp8_cvtl2;
14113-
llvm::Type *Tys[2];
14114-
Tys[0] = llvm::FixedVectorType::get(BFloatTy, 8);
14115-
// Op[1] is mfloat8x16_t, but the intrinsic converts only the lower
14116-
// part of the vector.
14117-
if (BuiltinID == NEON::BI__builtin_neon_vcvt2_low_bf16_mf8_fpm) {
14118-
Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
14119-
/*isQuad*/ false));
14120-
Ops[0] = Builder.CreateExtractVector(Tys[1], Ops[0], Builder.getInt64(0));
14121-
} else
14122-
Tys[1] = Ops[0]->getType();
14123-
llvm::Value *FPM =
14124-
EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14125-
return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vbfcvt2");
14126-
}
14127-
case NEON::BI__builtin_neon_vcvt1_f16_mf8_fpm:
14118+
ExtractLow = true;
14119+
LLVM_FALLTHROUGH;
14120+
case NEON::BI__builtin_neon_vcvt2_bf16_mf8_fpm:
14121+
case NEON::BI__builtin_neon_vcvt2_high_bf16_mf8_fpm:
14122+
return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl2,
14123+
llvm::FixedVectorType::get(BFloatTy, 8),
14124+
Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt2");
1412814125
case NEON::BI__builtin_neon_vcvt1_low_f16_mf8_fpm:
14129-
case NEON::BI__builtin_neon_vcvt1_high_f16_mf8_fpm: {
14130-
Int = Intrinsic::aarch64_neon_fp8_cvtl1;
14131-
llvm::Type *Tys[2];
14132-
Tys[0] = llvm::FixedVectorType::get(HalfTy, 8);
14133-
// Op[1] is mfloat8x16_t, but the intrinsic converts only the lower
14134-
// part of the vector.
14135-
if (BuiltinID == NEON::BI__builtin_neon_vcvt1_low_f16_mf8_fpm) {
14136-
Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
14137-
/*isQuad*/ false));
14138-
Ops[0] = Builder.CreateExtractVector(Tys[1], Ops[0], Builder.getInt64(0));
14139-
} else
14140-
Tys[1] = Ops[0]->getType();
14141-
llvm::Value *FPM =
14142-
EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14143-
return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vbfcvt1");
14144-
}
14145-
case NEON::BI__builtin_neon_vcvt2_f16_mf8_fpm:
14126+
ExtractLow = true;
14127+
LLVM_FALLTHROUGH;
14128+
case NEON::BI__builtin_neon_vcvt1_f16_mf8_fpm:
14129+
case NEON::BI__builtin_neon_vcvt1_high_f16_mf8_fpm:
14130+
return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl1,
14131+
llvm::FixedVectorType::get(HalfTy, 8),
14132+
Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt1");
1414614133
case NEON::BI__builtin_neon_vcvt2_low_f16_mf8_fpm:
14147-
case NEON::BI__builtin_neon_vcvt2_high_f16_mf8_fpm: {
14148-
Int = Intrinsic::aarch64_neon_fp8_cvtl2;
14149-
llvm::Type *Tys[2];
14150-
Tys[0] = llvm::FixedVectorType::get(HalfTy, 8);
14151-
// Op[1] is mfloat8x16_t, but the intrinsic converts only the lower
14152-
// part of the vector.
14153-
if (BuiltinID == NEON::BI__builtin_neon_vcvt2_low_f16_mf8_fpm) {
14154-
Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
14155-
/*isQuad*/ false));
14156-
Ops[0] = Builder.CreateExtractVector(Tys[1], Ops[0], Builder.getInt64(0));
14157-
} else
14158-
Tys[1] = Ops[0]->getType();
14159-
llvm::Value *FPM =
14160-
EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14161-
return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vbfcvt2");
14162-
}
14163-
case NEON::BI__builtin_neon_vcvt_mf8_f32_fpm: {
14164-
Int = Intrinsic::aarch64_neon_fp8_fcvtn;
14165-
llvm::Type *Tys[2];
14166-
Tys[0] = llvm::FixedVectorType::get(Int8Ty, 8);
14167-
Tys[1] = Ops[0]->getType();
14168-
llvm::Value *FPM =
14169-
EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14170-
return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vfcvtn");
14171-
}
14172-
case NEON::BI__builtin_neon_vcvt_mf8_f16_fpm: {
14173-
Int = Intrinsic::aarch64_neon_fp8_fcvtn;
14174-
llvm::Type *Tys[2];
14175-
Tys[0] = llvm::FixedVectorType::get(Int8Ty, 8);
14176-
// Gets the expected type, because arm_neon.h casts float16x4_t to int8x8_t
14177-
Tys[1] = llvm::FixedVectorType::get(HalfTy, 4);
14178-
llvm::Value *FPM =
14179-
EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14180-
return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vfcvtn");
14181-
}
14182-
case NEON::BI__builtin_neon_vcvtq_mf8_f16_fpm: {
14183-
Int = Intrinsic::aarch64_neon_fp8_fcvtn;
14184-
llvm::Type *Tys[2];
14185-
Tys[0] = llvm::FixedVectorType::get(Int8Ty, 16);
14186-
// Gets the expected type, because arm_neon.h casts float16x8_t to int8x16_t
14187-
Tys[1] = llvm::FixedVectorType::get(HalfTy, 8);
14188-
llvm::Value *FPM =
14189-
EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14190-
return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vfcvtn");
14191-
}
14134+
ExtractLow = true;
14135+
LLVM_FALLTHROUGH;
14136+
case NEON::BI__builtin_neon_vcvt2_f16_mf8_fpm:
14137+
case NEON::BI__builtin_neon_vcvt2_high_f16_mf8_fpm:
14138+
return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl2,
14139+
llvm::FixedVectorType::get(HalfTy, 8),
14140+
Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt2");
14141+
case NEON::BI__builtin_neon_vcvt_mf8_f32_fpm:
14142+
return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
14143+
llvm::FixedVectorType::get(Int8Ty, 8),
14144+
Ops[0]->getType(), false, Ops, E, "vfcvtn");
14145+
case NEON::BI__builtin_neon_vcvt_mf8_f16_fpm:
14146+
return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
14147+
llvm::FixedVectorType::get(Int8Ty, 8),
14148+
llvm::FixedVectorType::get(HalfTy, 4), false, Ops,
14149+
E, "vfcvtn");
14150+
case NEON::BI__builtin_neon_vcvtq_mf8_f16_fpm:
14151+
return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
14152+
llvm::FixedVectorType::get(Int8Ty, 16),
14153+
llvm::FixedVectorType::get(HalfTy, 8), false, Ops,
14154+
E, "vfcvtn");
1419214155
case NEON::BI__builtin_neon_vcvt_high_mf8_f32_fpm: {
14193-
Int = Intrinsic::aarch64_neon_fp8_fcvtn2;
14194-
llvm::Type *Tys[2];
14195-
Tys[0] = llvm::FixedVectorType::get(Int8Ty, 16);
14196-
Tys[1] = Ops[1]->getType();
14197-
Ops[0] = Builder.CreateInsertVector(Tys[0], PoisonValue::get(Tys[0]),
14198-
Ops[0], Builder.getInt64(0));
14199-
llvm::Value *FPM =
14200-
EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14201-
return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vfcvtn2");
14156+
llvm::Type *Ty = llvm::FixedVectorType::get(Int8Ty, 16);
14157+
Ops[0] = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
14158+
Builder.getInt64(0));
14159+
return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn2,
14160+
Ty, Ops[1]->getType(), false, Ops, E, "vfcvtn2");
1420214161
}
1420314162
case NEON::BI__builtin_neon_vamin_f16:
1420414163
case NEON::BI__builtin_neon_vaminq_f16:

clang/lib/CodeGen/CodeGenFunction.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4666,6 +4666,10 @@ class CodeGenFunction : public CodeGenTypeCache {
46664666
llvm::Value *EmitFP8NeonCall(llvm::Function *F,
46674667
SmallVectorImpl<llvm::Value *> &O,
46684668
llvm::Value *FPM, const char *name);
4669+
llvm::Value *EmitFP8NeonCvtCall(unsigned IID, llvm::Type *Ty0,
4670+
llvm::Type *Ty1, bool Extract,
4671+
SmallVectorImpl<llvm::Value *> &Ops,
4672+
const CallExpr *E, const char *name);
46694673
llvm::Value *EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx,
46704674
const llvm::ElementCount &Count);
46714675
llvm::Value *EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx);

0 commit comments

Comments
 (0)