Skip to content
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
737470e
[AArch64][GlobalISel] Improve lowering of vector fp16 fptrunc and fpext
HolyMolyCowMan Oct 13, 2025
ae3ef1e
Address review comments
HolyMolyCowMan Oct 17, 2025
43b1509
Separate FPEXT & FPTRUNC changes
HolyMolyCowMan Oct 20, 2025
437caa3
Add new opcode for rounding to odd
HolyMolyCowMan Oct 21, 2025
6abe127
Disable combiner
HolyMolyCowMan Oct 21, 2025
13fc5dc
Use tablegen for matching
HolyMolyCowMan Oct 21, 2025
0ceacd7
Remove unused code
HolyMolyCowMan Oct 21, 2025
8b85744
Update tests
HolyMolyCowMan Oct 28, 2025
ec102fc
Move from a custom legalize function to lowering
HolyMolyCowMan Oct 29, 2025
411afc0
Linting
HolyMolyCowMan Oct 29, 2025
60b6da7
Update vocab & entities lists
HolyMolyCowMan Oct 29, 2025
76a03d6
Update reference_triplets.txt
HolyMolyCowMan Oct 29, 2025
a5635b7
Add trailing new line to reference_triplets.txt
HolyMolyCowMan Oct 29, 2025
5f97537
Add G_FPTRUNC_ODD to generic opcodes & check legality before emitting…
HolyMolyCowMan Oct 30, 2025
3671057
Undo removal of newline
HolyMolyCowMan Nov 3, 2025
39c3e04
Remove shared opcode & subsequent changes
HolyMolyCowMan Nov 26, 2025
0604176
Rework to make AArch64 specific & handle splitting into legal chunks
HolyMolyCowMan Nov 27, 2025
3513809
Merge remote-tracking branch 'origin/main' into fp16-fptrunc-fpext-lo…
HolyMolyCowMan Nov 27, 2025
74aa139
Linting
HolyMolyCowMan Nov 27, 2025
a1bf07a
Update tests
HolyMolyCowMan Nov 27, 2025
60cbbc7
Remove unnecessary brackets, revert a change & modify comment
HolyMolyCowMan Nov 27, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -1332,6 +1332,21 @@ class LLVM_ABI MachineIRBuilder {
buildFPTrunc(const DstOp &Res, const SrcOp &Op,
std::optional<unsigned> Flags = std::nullopt);

/// Build and insert \p Res = G_FPTRUNC_ODD \p Op
///
/// G_FPTRUNC_ODD converts a floating-point value into one with a smaller type
/// using round to odd.
///
/// \pre setBasicBlock or setMI must have been called.
/// \pre \p Res must be a generic virtual register with scalar or vector type.
/// \pre \p Op must be a generic virtual register with scalar or vector type.
/// \pre \p Res must be smaller than \p Op
///
/// \return The newly created instruction.
MachineInstrBuilder
buildFPTruncOdd(const DstOp &Res, const SrcOp &Op,
std::optional<unsigned> Flags = std::nullopt);

/// Build and insert \p Res = G_TRUNC \p Op
///
/// G_TRUNC extracts the low bits of a type. For a vector type each element is
Expand Down
3 changes: 3 additions & 0 deletions llvm/include/llvm/Support/TargetOpcodes.def
Original file line number Diff line number Diff line change
Expand Up @@ -692,6 +692,9 @@ HANDLE_TARGET_OPCODE(G_FPEXT)
/// Generic float to signed-int conversion
HANDLE_TARGET_OPCODE(G_FPTRUNC)

/// Generic float to signed-int conversion using round to odd
HANDLE_TARGET_OPCODE(G_FPTRUNC_ODD)

/// Generic float to signed-int conversion
HANDLE_TARGET_OPCODE(G_FPTOSI)

Expand Down
6 changes: 6 additions & 0 deletions llvm/include/llvm/Target/GenericOpcodes.td
Original file line number Diff line number Diff line change
Expand Up @@ -782,6 +782,12 @@ def G_FPTRUNC : GenericInstruction {
let hasSideEffects = false;
}

def G_FPTRUNC_ODD : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type1:$src);
let hasSideEffects = false;
}

def G_FPTOSI : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type1:$src);
Expand Down
31 changes: 28 additions & 3 deletions llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5595,6 +5595,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_ANYEXT:
case G_FPEXT:
case G_FPTRUNC:
case G_FPTRUNC_ODD:
case G_SITOFP:
case G_UITOFP:
case G_FPTOSI:
Expand Down Expand Up @@ -8476,7 +8477,8 @@ LegalizerHelper::lowerFPTOINT_SAT(MachineInstr &MI) {
return Legalized;
}

// f64 -> f16 conversion using round-to-nearest-even rounding mode.
// f64 -> f16 conversion using round-to-nearest-even rounding mode for scalars
// and round-to-odd for vectors.
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) {
const LLT S1 = LLT::scalar(1);
Expand All @@ -8486,8 +8488,31 @@ LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) {
assert(MRI.getType(Dst).getScalarType() == LLT::scalar(16) &&
MRI.getType(Src).getScalarType() == LLT::scalar(64));

if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
return UnableToLegalize;
if (MRI.getType(Src).isVector()) {
Register Dst = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
LLT DstTy = MRI.getType(Dst);
LLT SrcTy = MRI.getType(Src);

LLT MidTy = LLT::fixed_vector(SrcTy.getNumElements(), LLT::scalar(32));

MachineInstrBuilder Mid;
MachineInstrBuilder Fin;
MIRBuilder.setInstrAndDebugLoc(MI);
switch (MI.getOpcode()) {
default:
return UnableToLegalize;
case TargetOpcode::G_FPTRUNC: {
Mid = MIRBuilder.buildFPTruncOdd(MidTy, Src);
Fin = MIRBuilder.buildFPTrunc(DstTy, Mid.getReg(0));
break;
}
}

MRI.replaceRegWith(Dst, Fin.getReg(0));
MI.eraseFromParent();
return Legalized;
}

if (MI.getFlag(MachineInstr::FmAfn)) {
unsigned Flags = MI.getFlags();
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -936,6 +936,12 @@ MachineIRBuilder::buildFPTrunc(const DstOp &Res, const SrcOp &Op,
return buildInstr(TargetOpcode::G_FPTRUNC, Res, Op, Flags);
}

MachineInstrBuilder
MachineIRBuilder::buildFPTruncOdd(const DstOp &Res, const SrcOp &Op,
std::optional<unsigned> Flags) {
return buildInstr(TargetOpcode::G_FPTRUNC_ODD, Res, Op, Flags);
}

MachineInstrBuilder MachineIRBuilder::buildICmp(CmpInst::Predicate Pred,
const DstOp &Res,
const SrcOp &Op0,
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AArch64/AArch64InstrGISel.td
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,8 @@ def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;

def : GINodeEquiv<G_AARCH64_PREFETCH, AArch64Prefetch>;

def : GINodeEquiv<G_FPTRUNC_ODD, AArch64fcvtxn_n>;

// These are patterns that we only use for GlobalISel via the importer.
def : Pat<(f32 (fadd (vector_extract (v2f32 FPR64:$Rn), (i64 0)),
(vector_extract (v2f32 FPR64:$Rn), (i64 1)))),
Expand Down
21 changes: 18 additions & 3 deletions llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/IR/DerivedTypes.h"
Expand Down Expand Up @@ -817,10 +818,24 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.legalFor(
{{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}})
.libcallFor({{s16, s128}, {s32, s128}, {s64, s128}})
.clampNumElements(0, v4s16, v4s16)
.clampNumElements(0, v2s32, v2s32)
.moreElementsToNextPow2(1)
.lowerIf([](const LegalityQuery &Q) {
LLT DstTy = Q.Types[0];
LLT SrcTy = Q.Types[1];
return SrcTy.isFixedVector() && DstTy.isFixedVector() &&
SrcTy.getScalarSizeInBits() == 64 &&
DstTy.getScalarSizeInBits() == 16;
})
// Clamp based on input
.clampNumElements(1, v4s32, v4s32)
.clampNumElements(1, v2s64, v2s64)
.scalarize(0);

getActionDefinitionsBuilder(G_FPTRUNC_ODD)
.legalFor({{s16, s32}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}})
.clampMaxNumElements(1, s32, 4)
.clampMaxNumElements(1, s64, 2);

getActionDefinitionsBuilder(G_FPEXT)
.legalFor(
{{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})
Expand Down Expand Up @@ -2389,4 +2404,4 @@ bool AArch64LegalizerInfo::legalizePrefetch(MachineInstr &MI,
MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
MI.eraseFromParent();
return true;
}
}
2 changes: 2 additions & 0 deletions llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ class AArch64LegalizerInfo : public LegalizerInfo {
bool legalizeDynStackAlloc(MachineInstr &MI, LegalizerHelper &Helper) const;
bool legalizePrefetch(MachineInstr &MI, LegalizerHelper &Helper) const;
bool legalizeBitcast(MachineInstr &MI, LegalizerHelper &Helper) const;
bool legalizeFptrunc(MachineInstr &MI, MachineIRBuilder &MIRBuilder,
MachineRegisterInfo &MRI) const;
const AArch64Subtarget *ST;
};
} // End llvm namespace.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -558,8 +558,11 @@
# DEBUG-NEXT: .. the first uncovered type index: 2, OK
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
# DEBUG-NEXT: G_FPTRUNC (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. the first uncovered type index: 2, OK
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_FPTRUNC_ODD (opcode 204): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. the first uncovered type index: 2, OK
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
# DEBUG-NEXT: G_FPTOSI (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
Expand Down
24 changes: 8 additions & 16 deletions llvm/test/CodeGen/AArch64/arm64-fp128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1197,30 +1197,22 @@ define <2 x half> @vec_round_f16(<2 x fp128> %val) {
;
; CHECK-GI-LABEL: vec_round_f16:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: sub sp, sp, #64
; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
; CHECK-GI-NEXT: .cfi_def_cfa_offset 64
; CHECK-GI-NEXT: sub sp, sp, #48
; CHECK-GI-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
; CHECK-GI-NEXT: .cfi_def_cfa_offset 48
; CHECK-GI-NEXT: .cfi_offset w30, -16
; CHECK-GI-NEXT: mov v2.d[0], x8
; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill
; CHECK-GI-NEXT: mov v2.d[1], x8
; CHECK-GI-NEXT: str q2, [sp, #32] // 16-byte Folded Spill
; CHECK-GI-NEXT: bl __trunctfhf2
; CHECK-GI-NEXT: // kill: def $h0 killed $h0 def $q0
; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT: bl __trunctfhf2
; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
; CHECK-GI-NEXT: // kill: def $h0 killed $h0 def $q0
; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
; CHECK-GI-NEXT: bl __trunctfhf2
; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
; CHECK-GI-NEXT: bl __trunctfhf2
; CHECK-GI-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload
; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: add sp, sp, #64
; CHECK-GI-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[1], v0.h[0]
; CHECK-GI-NEXT: fmov d0, d1
; CHECK-GI-NEXT: add sp, sp, #48
; CHECK-GI-NEXT: ret
%dst = fptrunc <2 x fp128> %val to <2 x half>
ret <2 x half> %dst
Expand Down
47 changes: 6 additions & 41 deletions llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll
Original file line number Diff line number Diff line change
Expand Up @@ -170,47 +170,12 @@ define <4 x half> @s_to_h(<4 x float> %a) {
}

define <4 x half> @d_to_h(<4 x double> %a) {
; CHECK-CVT-SD-LABEL: d_to_h:
; CHECK-CVT-SD: // %bb.0:
; CHECK-CVT-SD-NEXT: fcvtxn v0.2s, v0.2d
; CHECK-CVT-SD-NEXT: fcvtxn2 v0.4s, v1.2d
; CHECK-CVT-SD-NEXT: fcvtn v0.4h, v0.4s
; CHECK-CVT-SD-NEXT: ret
;
; CHECK-FP16-SD-LABEL: d_to_h:
; CHECK-FP16-SD: // %bb.0:
; CHECK-FP16-SD-NEXT: fcvtxn v0.2s, v0.2d
; CHECK-FP16-SD-NEXT: fcvtxn2 v0.4s, v1.2d
; CHECK-FP16-SD-NEXT: fcvtn v0.4h, v0.4s
; CHECK-FP16-SD-NEXT: ret
;
; CHECK-CVT-GI-LABEL: d_to_h:
; CHECK-CVT-GI: // %bb.0:
; CHECK-CVT-GI-NEXT: mov d2, v0.d[1]
; CHECK-CVT-GI-NEXT: fcvt h0, d0
; CHECK-CVT-GI-NEXT: mov d3, v1.d[1]
; CHECK-CVT-GI-NEXT: fcvt h1, d1
; CHECK-CVT-GI-NEXT: fcvt h2, d2
; CHECK-CVT-GI-NEXT: mov v0.h[1], v2.h[0]
; CHECK-CVT-GI-NEXT: fcvt h2, d3
; CHECK-CVT-GI-NEXT: mov v0.h[2], v1.h[0]
; CHECK-CVT-GI-NEXT: mov v0.h[3], v2.h[0]
; CHECK-CVT-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-CVT-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: d_to_h:
; CHECK-FP16-GI: // %bb.0:
; CHECK-FP16-GI-NEXT: mov d2, v0.d[1]
; CHECK-FP16-GI-NEXT: fcvt h0, d0
; CHECK-FP16-GI-NEXT: mov d3, v1.d[1]
; CHECK-FP16-GI-NEXT: fcvt h1, d1
; CHECK-FP16-GI-NEXT: fcvt h2, d2
; CHECK-FP16-GI-NEXT: mov v0.h[1], v2.h[0]
; CHECK-FP16-GI-NEXT: fcvt h2, d3
; CHECK-FP16-GI-NEXT: mov v0.h[2], v1.h[0]
; CHECK-FP16-GI-NEXT: mov v0.h[3], v2.h[0]
; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-FP16-GI-NEXT: ret
; CHECK-LABEL: d_to_h:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtxn v0.2s, v0.2d
; CHECK-NEXT: fcvtxn2 v0.4s, v1.2d
; CHECK-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NEXT: ret
%1 = fptrunc <4 x double> %a to <4 x half>
ret <4 x half> %1
}
Expand Down
74 changes: 9 additions & 65 deletions llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll
Original file line number Diff line number Diff line change
Expand Up @@ -176,71 +176,15 @@ define <8 x half> @s_to_h(<8 x float> %a) {
}

define <8 x half> @d_to_h(<8 x double> %a) {
; CHECK-CVT-SD-LABEL: d_to_h:
; CHECK-CVT-SD: // %bb.0:
; CHECK-CVT-SD-NEXT: fcvtxn v0.2s, v0.2d
; CHECK-CVT-SD-NEXT: fcvtxn v2.2s, v2.2d
; CHECK-CVT-SD-NEXT: fcvtxn2 v0.4s, v1.2d
; CHECK-CVT-SD-NEXT: fcvtxn2 v2.4s, v3.2d
; CHECK-CVT-SD-NEXT: fcvtn v0.4h, v0.4s
; CHECK-CVT-SD-NEXT: fcvtn2 v0.8h, v2.4s
; CHECK-CVT-SD-NEXT: ret
;
; CHECK-FP16-SD-LABEL: d_to_h:
; CHECK-FP16-SD: // %bb.0:
; CHECK-FP16-SD-NEXT: fcvtxn v0.2s, v0.2d
; CHECK-FP16-SD-NEXT: fcvtxn v2.2s, v2.2d
; CHECK-FP16-SD-NEXT: fcvtxn2 v0.4s, v1.2d
; CHECK-FP16-SD-NEXT: fcvtxn2 v2.4s, v3.2d
; CHECK-FP16-SD-NEXT: fcvtn v0.4h, v0.4s
; CHECK-FP16-SD-NEXT: fcvtn2 v0.8h, v2.4s
; CHECK-FP16-SD-NEXT: ret
;
; CHECK-CVT-GI-LABEL: d_to_h:
; CHECK-CVT-GI: // %bb.0:
; CHECK-CVT-GI-NEXT: mov d4, v0.d[1]
; CHECK-CVT-GI-NEXT: fcvt h0, d0
; CHECK-CVT-GI-NEXT: mov d5, v1.d[1]
; CHECK-CVT-GI-NEXT: fcvt h1, d1
; CHECK-CVT-GI-NEXT: fcvt h4, d4
; CHECK-CVT-GI-NEXT: mov v0.h[1], v4.h[0]
; CHECK-CVT-GI-NEXT: fcvt h4, d5
; CHECK-CVT-GI-NEXT: mov v0.h[2], v1.h[0]
; CHECK-CVT-GI-NEXT: mov d1, v2.d[1]
; CHECK-CVT-GI-NEXT: fcvt h2, d2
; CHECK-CVT-GI-NEXT: mov v0.h[3], v4.h[0]
; CHECK-CVT-GI-NEXT: fcvt h1, d1
; CHECK-CVT-GI-NEXT: mov v0.h[4], v2.h[0]
; CHECK-CVT-GI-NEXT: mov d2, v3.d[1]
; CHECK-CVT-GI-NEXT: fcvt h3, d3
; CHECK-CVT-GI-NEXT: mov v0.h[5], v1.h[0]
; CHECK-CVT-GI-NEXT: fcvt h1, d2
; CHECK-CVT-GI-NEXT: mov v0.h[6], v3.h[0]
; CHECK-CVT-GI-NEXT: mov v0.h[7], v1.h[0]
; CHECK-CVT-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: d_to_h:
; CHECK-FP16-GI: // %bb.0:
; CHECK-FP16-GI-NEXT: mov d4, v0.d[1]
; CHECK-FP16-GI-NEXT: fcvt h0, d0
; CHECK-FP16-GI-NEXT: mov d5, v1.d[1]
; CHECK-FP16-GI-NEXT: fcvt h1, d1
; CHECK-FP16-GI-NEXT: fcvt h4, d4
; CHECK-FP16-GI-NEXT: mov v0.h[1], v4.h[0]
; CHECK-FP16-GI-NEXT: fcvt h4, d5
; CHECK-FP16-GI-NEXT: mov v0.h[2], v1.h[0]
; CHECK-FP16-GI-NEXT: mov d1, v2.d[1]
; CHECK-FP16-GI-NEXT: fcvt h2, d2
; CHECK-FP16-GI-NEXT: mov v0.h[3], v4.h[0]
; CHECK-FP16-GI-NEXT: fcvt h1, d1
; CHECK-FP16-GI-NEXT: mov v0.h[4], v2.h[0]
; CHECK-FP16-GI-NEXT: mov d2, v3.d[1]
; CHECK-FP16-GI-NEXT: fcvt h3, d3
; CHECK-FP16-GI-NEXT: mov v0.h[5], v1.h[0]
; CHECK-FP16-GI-NEXT: fcvt h1, d2
; CHECK-FP16-GI-NEXT: mov v0.h[6], v3.h[0]
; CHECK-FP16-GI-NEXT: mov v0.h[7], v1.h[0]
; CHECK-FP16-GI-NEXT: ret
; CHECK-LABEL: d_to_h:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtxn v0.2s, v0.2d
; CHECK-NEXT: fcvtxn v2.2s, v2.2d
; CHECK-NEXT: fcvtxn2 v0.4s, v1.2d
; CHECK-NEXT: fcvtxn2 v2.4s, v3.2d
; CHECK-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NEXT: fcvtn2 v0.8h, v2.4s
; CHECK-NEXT: ret
%1 = fptrunc <8 x double> %a to <8 x half>
ret <8 x half> %1
}
Expand Down
Loading