Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 52 additions & 52 deletions clang/include/clang/Basic/arm_neon.td
Original file line number Diff line number Diff line change
Expand Up @@ -279,10 +279,10 @@ def OP_CVT_F32_BF16

// Splat operation - performs a range-checked splat over a vector
def SPLAT : WInst<"splat_lane", ".(!q)I",
"UcUsUicsilPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlhdQhQdPlQPl",
"UcUsUicsilPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlhdQhQdPlQPlmQm",
[ImmCheck<1, ImmCheckLaneIndex, 0>]>;
def SPLATQ : WInst<"splat_laneq", ".(!Q)I",
"UcUsUicsilPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlhdQhQdPlQPl",
"UcUsUicsilPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlhdQhQdPlQPlmQm",
[ImmCheck<1, ImmCheckLaneIndex, 0>]>;

let TargetGuard = "bf16,neon" in {
Expand Down Expand Up @@ -547,40 +547,40 @@ def VST4_LANE_F16 : WInst<"vst4_lane", "v*(4!)I", "hQh",
// E.3.16 Extract lanes from a vector
let InstName = "vmov" in
def VGET_LANE : IInst<"vget_lane", "1.I",
"UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl",
"UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlmQm",
[ImmCheck<1, ImmCheckLaneIndex, 0>]>;

////////////////////////////////////////////////////////////////////////////////
// E.3.17 Set lanes within a vector
let InstName = "vmov" in
def VSET_LANE : IInst<"vset_lane", ".1.I",
"UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl",
"UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlmQm",
[ImmCheck<2, ImmCheckLaneIndex, 1>]>;

////////////////////////////////////////////////////////////////////////////////
// E.3.18 Initialize a vector from bit pattern
def VCREATE : NoTestOpInst<"vcreate", ".(IU>)", "csihfUcUsUiUlPcPsl", OP_CAST> {
def VCREATE : NoTestOpInst<"vcreate", ".(IU>)", "csihfUcUsUiUlPcPslm", OP_CAST> {
let BigEndianSafe = 1;
}

////////////////////////////////////////////////////////////////////////////////
// E.3.19 Set all lanes to same value
let InstName = "vmov" in {
def VDUP_N : WOpInst<"vdup_n", ".1",
"UcUsUicsiPcPshfQUcQUsQUiQcQsQiQPcQPsQhQflUlQlQUl",
"UcUsUicsiPcPshfQUcQUsQUiQcQsQiQPcQPsQhQflUlQlQUlmQm",
OP_DUP>;
def VMOV_N : WOpInst<"vmov_n", ".1",
"UcUsUicsiPcPshfQUcQUsQUiQcQsQiQPcQPsQhQflUlQlQUl",
"UcUsUicsiPcPshfQUcQUsQUiQcQsQiQPcQPsQhQflUlQlQUlmQm",
OP_DUP>;
}
let InstName = "" in
def VDUP_LANE: WOpInst<"vdup_lane", ".qI",
"UcUsUicsiPcPshfQUcQUsQUiQcQsQiQPcQPsQhQflUlQlQUl",
"UcUsUicsiPcPshfQUcQUsQUiQcQsQiQPcQPsQhQflUlQlQUlmQm",
OP_DUP_LN>;

////////////////////////////////////////////////////////////////////////////////
// E.3.20 Combining vectors
def VCOMBINE : NoTestOpInst<"vcombine", "Q..", "csilhfUcUsUiUlPcPs", OP_CONC>;
def VCOMBINE : NoTestOpInst<"vcombine", "Q..", "csilhfUcUsUiUlPcPsm", OP_CONC>;

////////////////////////////////////////////////////////////////////////////////
// E.3.21 Splitting vectors
Expand All @@ -589,8 +589,8 @@ def VCOMBINE : NoTestOpInst<"vcombine", "Q..", "csilhfUcUsUiUlPcPs", OP_CONC>;
// versions of these intrinsics in both AArch32 and AArch64 architectures. See
// D45668 for more details.
let InstName = "vmov" in {
def VGET_HIGH : NoTestOpInst<"vget_high", ".Q", "csilhfUcUsUiUlPcPs", OP_HI>;
def VGET_LOW : NoTestOpInst<"vget_low", ".Q", "csilhfUcUsUiUlPcPs", OP_LO>;
def VGET_HIGH : NoTestOpInst<"vget_high", ".Q", "csilhfUcUsUiUlPcPsm", OP_HI>;
def VGET_LOW : NoTestOpInst<"vget_low", ".Q", "csilhfUcUsUiUlPcPsm", OP_LO>;
}

////////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -619,16 +619,16 @@ def VQMOVUN : SInst<"vqmovun", "(<U)Q", "sil">;
////////////////////////////////////////////////////////////////////////////////
// E.3.23-24 Table lookup, Extended table lookup
let InstName = "vtbl" in {
def VTBL1 : WInst<"vtbl1", "..p", "UccPc">;
def VTBL2 : WInst<"vtbl2", ".2p", "UccPc">;
def VTBL3 : WInst<"vtbl3", ".3p", "UccPc">;
def VTBL4 : WInst<"vtbl4", ".4p", "UccPc">;
def VTBL1 : WInst<"vtbl1", "..p", "UccPcm">;
def VTBL2 : WInst<"vtbl2", ".2p", "UccPcm">;
def VTBL3 : WInst<"vtbl3", ".3p", "UccPcm">;
def VTBL4 : WInst<"vtbl4", ".4p", "UccPcm">;
}
let InstName = "vtbx" in {
def VTBX1 : WInst<"vtbx1", "...p", "UccPc">;
def VTBX2 : WInst<"vtbx2", "..2p", "UccPc">;
def VTBX3 : WInst<"vtbx3", "..3p", "UccPc">;
def VTBX4 : WInst<"vtbx4", "..4p", "UccPc">;
def VTBX1 : WInst<"vtbx1", "...p", "UccPcm">;
def VTBX2 : WInst<"vtbx2", "..2p", "UccPcm">;
def VTBX3 : WInst<"vtbx3", "..3p", "UccPcm">;
def VTBX4 : WInst<"vtbx4", "..4p", "UccPcm">;
}

////////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -677,15 +677,15 @@ def VQDMLSL_N : SOpInst<"vqdmlsl_n", "(>Q)(>Q).1", "si", OP_QDMLSL_N>;
////////////////////////////////////////////////////////////////////////////////
// E.3.26 Vector Extract
def VEXT : WInst<"vext", "...I",
"cUcPcsUsPsiUilUlfQcQUcQPcQsQUsQPsQiQUiQlQUlQf",
"cUcPcsUsPsiUilUlfQcQUcQPcQsQUsQPsQiQUiQlQUlQfmQm",
[ImmCheck<2, ImmCheckLaneIndex, 0>]>;

////////////////////////////////////////////////////////////////////////////////
// E.3.27 Reverse vector elements
def VREV64 : WOpInst<"vrev64", "..", "csiUcUsUiPcPsfQcQsQiQUcQUsQUiQPcQPsQf",
def VREV64 : WOpInst<"vrev64", "..", "csiUcUsUiPcPsfQcQsQiQUcQUsQUiQPcQPsQfmQm",
OP_REV64>;
def VREV32 : WOpInst<"vrev32", "..", "csUcUsPcPsQcQsQUcQUsQPcQPs", OP_REV32>;
def VREV16 : WOpInst<"vrev16", "..", "cUcPcQcQUcQPc", OP_REV16>;
def VREV32 : WOpInst<"vrev32", "..", "csUcUsPcPsQcQsQUcQUsQPcQPsmQm", OP_REV32>;
def VREV16 : WOpInst<"vrev16", "..", "cUcPcQcQUcQPcmQm", OP_REV16>;

////////////////////////////////////////////////////////////////////////////////
// E.3.28 Other single operand arithmetic
Expand All @@ -709,13 +709,13 @@ def VBIC : LOpInst<"vbic", "...", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_ANDN>;
def VORN : LOpInst<"vorn", "...", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_ORN>;
let isHiddenLInst = 1 in
def VBSL : SInst<"vbsl", ".U..",
"csilUcUsUiUlfPcPsQcQsQiQlQUcQUsQUiQUlQfQPcQPs">;
"csilUcUsUiUlfPcPsQcQsQiQlQUcQUsQUiQUlQfQPcQPsmQm">;

////////////////////////////////////////////////////////////////////////////////
// E.3.30 Transposition operations
def VTRN : WInst<"vtrn", "2..", "csiUcUsUifPcPsQcQsQiQUcQUsQUiQfQPcQPs">;
def VZIP : WInst<"vzip", "2..", "csiUcUsUifPcPsQcQsQiQUcQUsQUiQfQPcQPs">;
def VUZP : WInst<"vuzp", "2..", "csiUcUsUifPcPsQcQsQiQUcQUsQUiQfQPcQPs">;
def VTRN : WInst<"vtrn", "2..", "csiUcUsUifPcPsQcQsQiQUcQUsQUiQfQPcQPsmQm">;
def VZIP : WInst<"vzip", "2..", "csiUcUsUifPcPsQcQsQiQUcQUsQUiQfQPcQPsmQm">;
def VUZP : WInst<"vuzp", "2..", "csiUcUsUifPcPsQcQsQiQUcQUsQUiQfQPcQPsmQm">;

////////////////////////////////////////////////////////////////////////////////

Expand Down Expand Up @@ -1028,19 +1028,19 @@ def GET_LANE : IInst<"vget_lane", "1.I", "dQdPlQPl",
def SET_LANE : IInst<"vset_lane", ".1.I", "dQdPlQPl",
[ImmCheck<2, ImmCheckLaneIndex, 1>]>;
def COPY_LANE : IOpInst<"vcopy_lane", "..I.I",
"csilUcUsUiUlPcPsPlfd", OP_COPY_LN>;
"csilUcUsUiUlPcPsPlfdm", OP_COPY_LN>;
def COPYQ_LANE : IOpInst<"vcopy_lane", "..IqI",
"QcQsQiQlQUcQUsQUiQUlQPcQPsQfQdQPl", OP_COPY_LN>;
"QcQsQiQlQUcQUsQUiQUlQPcQPsQfQdQPlQm", OP_COPY_LN>;
def COPY_LANEQ : IOpInst<"vcopy_laneq", "..IQI",
"csilPcPsPlUcUsUiUlfd", OP_COPY_LN>;
"csilPcPsPlUcUsUiUlfdm", OP_COPY_LN>;
def COPYQ_LANEQ : IOpInst<"vcopy_laneq", "..I.I",
"QcQsQiQlQUcQUsQUiQUlQPcQPsQfQdQPl", OP_COPY_LN>;
"QcQsQiQlQUcQUsQUiQUlQPcQPsQfQdQPlQm", OP_COPY_LN>;

////////////////////////////////////////////////////////////////////////////////
// Set all lanes to same value
def VDUP_LANE1: WOpInst<"vdup_lane", ".qI", "dQdPlQPl", OP_DUP_LN>;
def VDUP_LANE2: WOpInst<"vdup_laneq", ".QI",
"csilUcUsUiUlPcPshfdQcQsQiQlQPcQPsQUcQUsQUiQUlQhQfQdPlQPl",
"csilUcUsUiUlPcPshfdQcQsQiQlQPcQPsQUcQUsQUiQUlQhQfQdPlQPlmQm",
OP_DUP_LN>;
def DUP_N : WOpInst<"vdup_n", ".1", "dQdPlQPl", OP_DUP>;
def MOV_N : WOpInst<"vmov_n", ".1", "dQdPlQPl", OP_DUP>;
Expand Down Expand Up @@ -1266,31 +1266,31 @@ def FMINNM_S64 : SInst<"vminnm", "...", "dQd">;
////////////////////////////////////////////////////////////////////////////////
// Permutation
def VTRN1 : SOpInst<"vtrn1", "...",
"csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_TRN1>;
"csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPlmQm", OP_TRN1>;
def VZIP1 : SOpInst<"vzip1", "...",
"csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_ZIP1>;
"csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPlmQm", OP_ZIP1>;
def VUZP1 : SOpInst<"vuzp1", "...",
"csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_UZP1>;
"csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPlmQm", OP_UZP1>;
def VTRN2 : SOpInst<"vtrn2", "...",
"csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_TRN2>;
"csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPlmQm", OP_TRN2>;
def VZIP2 : SOpInst<"vzip2", "...",
"csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_ZIP2>;
"csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPlmQm", OP_ZIP2>;
def VUZP2 : SOpInst<"vuzp2", "...",
"csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_UZP2>;
"csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPlmQm", OP_UZP2>;

////////////////////////////////////////////////////////////////////////////////
// Table lookup
let InstName = "vtbl" in {
def VQTBL1_A64 : WInst<"vqtbl1", ".QU", "UccPcQUcQcQPc">;
def VQTBL2_A64 : WInst<"vqtbl2", ".(2Q)U", "UccPcQUcQcQPc">;
def VQTBL3_A64 : WInst<"vqtbl3", ".(3Q)U", "UccPcQUcQcQPc">;
def VQTBL4_A64 : WInst<"vqtbl4", ".(4Q)U", "UccPcQUcQcQPc">;
def VQTBL1_A64 : WInst<"vqtbl1", ".QU", "UccPcQUcQcQPcmQm">;
def VQTBL2_A64 : WInst<"vqtbl2", ".(2Q)U", "UccPcQUcQcQPcmQm">;
def VQTBL3_A64 : WInst<"vqtbl3", ".(3Q)U", "UccPcQUcQcQPcmQm">;
def VQTBL4_A64 : WInst<"vqtbl4", ".(4Q)U", "UccPcQUcQcQPcmQm">;
}
let InstName = "vtbx" in {
def VQTBX1_A64 : WInst<"vqtbx1", "..QU", "UccPcQUcQcQPc">;
def VQTBX2_A64 : WInst<"vqtbx2", "..(2Q)U", "UccPcQUcQcQPc">;
def VQTBX3_A64 : WInst<"vqtbx3", "..(3Q)U", "UccPcQUcQcQPc">;
def VQTBX4_A64 : WInst<"vqtbx4", "..(4Q)U", "UccPcQUcQcQPc">;
def VQTBX1_A64 : WInst<"vqtbx1", "..QU", "UccPcQUcQcQPcmQm">;
def VQTBX2_A64 : WInst<"vqtbx2", "..(2Q)U", "UccPcQUcQcQPcmQm">;
def VQTBX3_A64 : WInst<"vqtbx3", "..(3Q)U", "UccPcQUcQcQPcmQm">;
def VQTBX4_A64 : WInst<"vqtbx4", "..(4Q)U", "UccPcQUcQcQPcmQm">;
}

////////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -1654,9 +1654,9 @@ def SCALAR_SQRDMLSH_LANE : SOpInst<"vqrdmlsh_lane", "111.I", "SsSi", OP_SCALAR_Q
def SCALAR_SQRDMLSH_LANEQ : SOpInst<"vqrdmlsh_laneq", "111QI", "SsSi", OP_SCALAR_QRDMLSH_LN>;
} // TargetGuard = "v8.1a"

def SCALAR_VDUP_LANE : IInst<"vdup_lane", "1.I", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs",
def SCALAR_VDUP_LANE : IInst<"vdup_lane", "1.I", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPsSm",
[ImmCheck<1, ImmCheckLaneIndex, 0>]>;
def SCALAR_VDUP_LANEQ : IInst<"vdup_laneq", "1QI", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs",
def SCALAR_VDUP_LANEQ : IInst<"vdup_laneq", "1QI", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPsSm",
[ImmCheck<1, ImmCheckLaneIndex, 0>]>;

} // ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)"
Expand Down Expand Up @@ -2090,17 +2090,17 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "r

// Lookup table read with 2-bit/4-bit indices
let ArchGuard = "defined(__aarch64__)", TargetGuard = "lut" in {
def VLUTI2_B : SInst<"vluti2_lane", "Q.(qU)I", "cUcPcQcQUcQPc",
def VLUTI2_B : SInst<"vluti2_lane", "Q.(qU)I", "cUcPcmQcQUcQPcQm",
[ImmCheck<2, ImmCheck0_1>]>;
def VLUTI2_B_Q : SInst<"vluti2_laneq", "Q.(QU)I", "cUcPcQcQUcQPc",
def VLUTI2_B_Q : SInst<"vluti2_laneq", "Q.(QU)I", "cUcPcmQcQUcQPcQm",
[ImmCheck<2, ImmCheck0_3>]>;
def VLUTI2_H : SInst<"vluti2_lane", "Q.(<qU)I", "sUsPshQsQUsQPsQh",
[ImmCheck<2, ImmCheck0_3>]>;
def VLUTI2_H_Q : SInst<"vluti2_laneq", "Q.(<QU)I", "sUsPshQsQUsQPsQh",
[ImmCheck<2, ImmCheck0_7>]>;
def VLUTI4_B : SInst<"vluti4_lane", "..(qU)I", "QcQUcQPc",
def VLUTI4_B : SInst<"vluti4_lane", "..(qU)I", "QcQUcQPcQm",
[ImmCheck<2, ImmCheck0_0>]>;
def VLUTI4_B_Q : SInst<"vluti4_laneq", "..UI", "QcQUcQPc",
def VLUTI4_B_Q : SInst<"vluti4_laneq", "..UI", "QcQUcQPcQm",
[ImmCheck<2, ImmCheck0_1>]>;
def VLUTI4_H_X2 : SInst<"vluti4_lane_x2", ".2(<qU)I", "QsQUsQPsQh",
[ImmCheck<3, ImmCheck0_1>]>;
Expand Down
5 changes: 5 additions & 0 deletions clang/lib/AST/Type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2824,6 +2824,11 @@ static bool isTriviallyCopyableTypeImpl(const QualType &type,
if (CanonicalType->isScalarType() || CanonicalType->isVectorType())
return true;

// Mfloat8 type is a special case as it not scalar, but is still trivially
// copyable.
if (CanonicalType->isMFloat8Type())
return true;

if (const auto *RT = CanonicalType->getAs<RecordType>()) {
if (const auto *ClassDecl = dyn_cast<CXXRecordDecl>(RT->getDecl())) {
if (IsCopyConstructible) {
Expand Down
9 changes: 9 additions & 0 deletions clang/lib/CodeGen/CGCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5484,6 +5484,15 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
Builder.CreateStore(errorValue, swiftErrorTemp);
}

// Mfloat8 type is loaded as scalar type, but is treated as single
// vector type for other operations. We need to bitcast it to the vector
// type here.
if (auto *EltTy =
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure if this is the best way to solve this issue so would appreciate your feedback on this.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see an issue here. That is exactly what should happen regardless of the target architecture any time the ABI for that architecture says values of type T are passed as <1 x T>.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does the ABI say this? My understand is that values of type _mfp8 are floating-point 8-bit values that are passes as _mfp8. The pretend it's an i8 in some cases and <1 x i8> in others is purely an implementation detail within clang.

This is not to say the code is invalid, but we should be cautious with how far down the rabbit hole we go.

FYI: As part of @MacDue's work to improve streaming-mode code generation I asked him to add the MVT aarch64mfp8 along with support to load and store it. I expect over time we'll migrate away from using i8 as our scalar type.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure what the fallout will be from this but I think the problem here is we should not have loaded a scalar in the first place. Looking at CodeGenTypes::ConvertTypeForMem() I can see that we're using a different type for the memory representation than the normal one, which I think is a mistake.

Changing this so the types are consistent will remove the need for this code but I suspect it'll prompt further work elsewhere. My hope is that work sits in target specific areas relating to modelling the builtin so seem reasonable. Please shout though if it starts to get out of control.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does the ABI say this?

It doesn't. Unfortunately this discussion was split and I didn't replicate all my comments here.

Momchil Velikov 15 Apr at 16:11
The ABI spec (naturally) does not say anything about <1 x i8> . It says (in a somewhat obscure way) that the value > is passed in a FPR.
And then clang/llvm decide to implement the ABI by mapping to <1 x T>.

I consider the "natural" mapping of __mfp8 to LLVM types to be i8 and <1 x i8> to be merely a hack coming from the peculiar way of implementing ABIs in clang/llvm (by implicit contracts and "mutual understading"). As such <1 x i8> out to be applicable only for values that are arguments passed in registers.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I’m not yet confident in my understanding of the trade-offs between the two approaches, beside that one impacts target-specific code while the other affects target-independent code. As such, I don’t feel well-positioned to contribute meaningfully to this discussion. That said, I’d appreciate it if we could reach alignment here, as I’d like to merge this patch soon.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The underlying storage for __mfp8 is an FPR and until we decide whether to use a dedicated target type, or LLVM gains an opaque 8-bit floating point type our only option is to represent it as an i8 vector type.

The reason for using i8 was for some specific code reuse but as this PR showed, that reuse is not total and so I'd rather we just be honest and insert the relevant bitcasts when necessary. This will put us in good stead if we decide to go the target type route.

dyn_cast<llvm::FixedVectorType>(ArgInfo.getCoerceToType());
EltTy && EltTy->getNumElements() == 1 &&
V->getType() == EltTy->getScalarType())
V = Builder.CreateBitCast(V, EltTy);

// We might have to widen integers, but we should never truncate.
if (ArgInfo.getCoerceToType() != V->getType() &&
V->getType()->isIntegerTy())
Expand Down
20 changes: 20 additions & 0 deletions clang/lib/CodeGen/TargetBuiltins/ARM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2624,22 +2624,26 @@ static bool HasExtraNeonArgument(unsigned BuiltinID) {
case NEON::BI__builtin_neon_vget_lane_bf16:
case NEON::BI__builtin_neon_vget_lane_i32:
case NEON::BI__builtin_neon_vget_lane_i64:
case NEON::BI__builtin_neon_vget_lane_mf8:
case NEON::BI__builtin_neon_vget_lane_f32:
case NEON::BI__builtin_neon_vgetq_lane_i8:
case NEON::BI__builtin_neon_vgetq_lane_i16:
case NEON::BI__builtin_neon_vgetq_lane_bf16:
case NEON::BI__builtin_neon_vgetq_lane_i32:
case NEON::BI__builtin_neon_vgetq_lane_i64:
case NEON::BI__builtin_neon_vgetq_lane_mf8:
case NEON::BI__builtin_neon_vgetq_lane_f32:
case NEON::BI__builtin_neon_vduph_lane_bf16:
case NEON::BI__builtin_neon_vduph_laneq_bf16:
case NEON::BI__builtin_neon_vset_lane_i8:
case NEON::BI__builtin_neon_vset_lane_mf8:
case NEON::BI__builtin_neon_vset_lane_i16:
case NEON::BI__builtin_neon_vset_lane_bf16:
case NEON::BI__builtin_neon_vset_lane_i32:
case NEON::BI__builtin_neon_vset_lane_i64:
case NEON::BI__builtin_neon_vset_lane_f32:
case NEON::BI__builtin_neon_vsetq_lane_i8:
case NEON::BI__builtin_neon_vsetq_lane_mf8:
case NEON::BI__builtin_neon_vsetq_lane_i16:
case NEON::BI__builtin_neon_vsetq_lane_bf16:
case NEON::BI__builtin_neon_vsetq_lane_i32:
Expand Down Expand Up @@ -6162,6 +6166,10 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 1));
Ops.push_back(EmitScalarExpr(E->getArg(2)));
return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
case NEON::BI__builtin_neon_vset_lane_mf8:
case NEON::BI__builtin_neon_vsetq_lane_mf8:
Ops.push_back(EmitScalarExpr(E->getArg(2)));
return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
case NEON::BI__builtin_neon_vsetq_lane_f64:
// The vector type needs a cast for the v2f64 variant.
Ops[1] =
Expand All @@ -6181,6 +6189,12 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 16));
return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
"vgetq_lane");
case NEON::BI__builtin_neon_vget_lane_mf8:
case NEON::BI__builtin_neon_vdupb_lane_mf8:
case NEON::BI__builtin_neon_vgetq_lane_mf8:
case NEON::BI__builtin_neon_vdupb_laneq_mf8:
return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
"vget_lane");
case NEON::BI__builtin_neon_vget_lane_i16:
case NEON::BI__builtin_neon_vduph_lane_i16:
Ops[0] =
Expand Down Expand Up @@ -7630,6 +7644,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
}

case NEON::BI__builtin_neon_vluti2_laneq_mf8:
case NEON::BI__builtin_neon_vluti2_laneq_bf16:
case NEON::BI__builtin_neon_vluti2_laneq_f16:
case NEON::BI__builtin_neon_vluti2_laneq_p16:
Expand All @@ -7645,6 +7660,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
/*isQuad*/ false));
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_laneq");
}
case NEON::BI__builtin_neon_vluti2q_laneq_mf8:
case NEON::BI__builtin_neon_vluti2q_laneq_bf16:
case NEON::BI__builtin_neon_vluti2q_laneq_f16:
case NEON::BI__builtin_neon_vluti2q_laneq_p16:
Expand All @@ -7660,6 +7676,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
/*isQuad*/ true));
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_laneq");
}
case NEON::BI__builtin_neon_vluti2_lane_mf8:
case NEON::BI__builtin_neon_vluti2_lane_bf16:
case NEON::BI__builtin_neon_vluti2_lane_f16:
case NEON::BI__builtin_neon_vluti2_lane_p16:
Expand All @@ -7675,6 +7692,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
/*isQuad*/ false));
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_lane");
}
case NEON::BI__builtin_neon_vluti2q_lane_mf8:
case NEON::BI__builtin_neon_vluti2q_lane_bf16:
case NEON::BI__builtin_neon_vluti2q_lane_f16:
case NEON::BI__builtin_neon_vluti2q_lane_p16:
Expand All @@ -7690,12 +7708,14 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
/*isQuad*/ true));
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_lane");
}
case NEON::BI__builtin_neon_vluti4q_lane_mf8:
case NEON::BI__builtin_neon_vluti4q_lane_p8:
case NEON::BI__builtin_neon_vluti4q_lane_s8:
case NEON::BI__builtin_neon_vluti4q_lane_u8: {
Int = Intrinsic::aarch64_neon_vluti4q_lane;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_lane");
}
case NEON::BI__builtin_neon_vluti4q_laneq_mf8:
case NEON::BI__builtin_neon_vluti4q_laneq_p8:
case NEON::BI__builtin_neon_vluti4q_laneq_s8:
case NEON::BI__builtin_neon_vluti4q_laneq_u8: {
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/Sema/SemaInit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1944,6 +1944,8 @@ void InitListChecker::CheckVectorType(const InitializedEntity &Entity,
typeCode = "s";
else if (elementType->isUnsignedIntegerType())
typeCode = "u";
else if (elementType->isMFloat8Type())
typeCode = "mf";
else
llvm_unreachable("Invalid element type!");

Expand Down
Loading