Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3065,6 +3065,16 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
Observer.changedInstr(MI);
return Legalized;

case TargetOpcode::G_FPEXT:
Observer.changingInstr(MI);

if (TypeIdx == 0)
widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
else
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);

Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_FPTOSI:
case TargetOpcode::G_FPTOUI:
case TargetOpcode::G_INTRINSIC_LRINT:
Expand Down
10 changes: 10 additions & 0 deletions llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -825,6 +825,16 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.legalFor(
{{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})
.libcallFor({{s128, s64}, {s128, s32}, {s128, s16}})
.moreElementsToNextPow2(0)
.widenScalarIf(
[](const LegalityQuery &Q) {
LLT DstTy = Q.Types[0];
LLT SrcTy = Q.Types[1];
return SrcTy.isVector() && DstTy.isVector() &&
SrcTy.getScalarSizeInBits() == 16 &&
DstTy.getScalarSizeInBits() == 64;
},
changeElementTo(1, s32))
.clampNumElements(0, v4s32, v4s32)
.clampNumElements(0, v2s64, v2s64)
.scalarize(0);
Expand Down
130 changes: 130 additions & 0 deletions llvm/test/CodeGen/AArch64/GlobalISel/legalize-fpext.mir
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,133 @@ body: |
RET_ReallyLR

...

---
name: fpext_f16_f64
body: |
bb.0:
liveins: $h0
; CHECK-LABEL: name: fpext_f16_f64
; CHECK: liveins: $h0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0
; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(s64) = G_FPEXT [[COPY]](s16)
; CHECK-NEXT: $d0 = COPY [[FPEXT]](s64)
; CHECK-NEXT: RET_ReallyLR implicit $d0
%0:_(s16) = COPY $h0
%1:_(s64) = G_FPEXT %0(s16)
$d0 = COPY %1(s64)
RET_ReallyLR implicit $d0
...

---
name: fpext_v2f16_v2f64
body: |
bb.0:
liveins: $d0

; CHECK-LABEL: name: fpext_v2f16_v2f64
; CHECK: liveins: $d0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0
; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[COPY]](<4 x s16>)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[FPEXT]](<4 x s32>)
; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(<2 x s64>) = G_FPEXT [[UV]](<2 x s32>)
; CHECK-NEXT: $q0 = COPY [[FPEXT1]](<2 x s64>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%1:_(<4 x s16>) = COPY $d0
%0:_(<2 x s16>), %2:_(<2 x s16>) = G_UNMERGE_VALUES %1(<4 x s16>)
%3:_(<2 x s64>) = G_FPEXT %0(<2 x s16>)
$q0 = COPY %3(<2 x s64>)
RET_ReallyLR implicit $q0
...

---
name: fpext_v3f16_v3f64
body: |
bb.0:
liveins: $d0

; CHECK-LABEL: name: fpext_v3f16_v3f64
; CHECK: liveins: $d0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0
; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[COPY]](<4 x s16>)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[FPEXT]](<4 x s32>)
; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(<2 x s64>) = G_FPEXT [[UV]](<2 x s32>)
; CHECK-NEXT: [[FPEXT2:%[0-9]+]]:_(<2 x s64>) = G_FPEXT [[UV1]](<2 x s32>)
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[FPEXT1]](<2 x s64>)
; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[FPEXT2]](<2 x s64>)
; CHECK-NEXT: $d0 = COPY [[UV2]](s64)
; CHECK-NEXT: $d1 = COPY [[UV3]](s64)
; CHECK-NEXT: $d2 = COPY [[UV4]](s64)
; CHECK-NEXT: RET_ReallyLR implicit $d0, implicit $d1, implicit $d2
%1:_(<4 x s16>) = COPY $d0
%2:_(s16), %3:_(s16), %4:_(s16), %5:_(s16) = G_UNMERGE_VALUES %1(<4 x s16>)
%0:_(<3 x s16>) = G_BUILD_VECTOR %2(s16), %3(s16), %4(s16)
%6:_(<3 x s64>) = G_FPEXT %0(<3 x s16>)
%7:_(s64), %8:_(s64), %9:_(s64) = G_UNMERGE_VALUES %6(<3 x s64>)
$d0 = COPY %7(s64)
$d1 = COPY %8(s64)
$d2 = COPY %9(s64)
RET_ReallyLR implicit $d0, implicit $d1, implicit $d2
...

---
name: fpext_v4f16_v4f64
body: |
bb.0:
liveins: $d0

; CHECK-LABEL: name: fpext_v4f16_v4f64
; CHECK: liveins: $d0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0
; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[COPY]](<4 x s16>)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[FPEXT]](<4 x s32>)
; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(<2 x s64>) = G_FPEXT [[UV]](<2 x s32>)
; CHECK-NEXT: [[FPEXT2:%[0-9]+]]:_(<2 x s64>) = G_FPEXT [[UV1]](<2 x s32>)
; CHECK-NEXT: $q0 = COPY [[FPEXT1]](<2 x s64>)
; CHECK-NEXT: $q1 = COPY [[FPEXT2]](<2 x s64>)
; CHECK-NEXT: RET_ReallyLR implicit $q0, implicit $q1
%0:_(<4 x s16>) = COPY $d0
%1:_(<4 x s64>) = G_FPEXT %0(<4 x s16>)
%2:_(<2 x s64>), %3:_(<2 x s64>) = G_UNMERGE_VALUES %1(<4 x s64>)
$q0 = COPY %2(<2 x s64>)
$q1 = COPY %3(<2 x s64>)
RET_ReallyLR implicit $q0, implicit $q1
...

---
name: fpext_v8f16_v8f64
body: |
bb.0:
liveins: $q0

; CHECK-LABEL: name: fpext_v8f16_v8f64
; CHECK: liveins: $q0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s16>), [[UV1:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>)
; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV]](<4 x s16>)
; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV1]](<4 x s16>)
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[FPEXT]](<4 x s32>)
; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s32>), [[UV5:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[FPEXT1]](<4 x s32>)
; CHECK-NEXT: [[FPEXT2:%[0-9]+]]:_(<2 x s64>) = G_FPEXT [[UV2]](<2 x s32>)
; CHECK-NEXT: [[FPEXT3:%[0-9]+]]:_(<2 x s64>) = G_FPEXT [[UV3]](<2 x s32>)
; CHECK-NEXT: [[FPEXT4:%[0-9]+]]:_(<2 x s64>) = G_FPEXT [[UV4]](<2 x s32>)
; CHECK-NEXT: [[FPEXT5:%[0-9]+]]:_(<2 x s64>) = G_FPEXT [[UV5]](<2 x s32>)
; CHECK-NEXT: $q0 = COPY [[FPEXT2]](<2 x s64>)
; CHECK-NEXT: $q1 = COPY [[FPEXT3]](<2 x s64>)
; CHECK-NEXT: $q2 = COPY [[FPEXT4]](<2 x s64>)
; CHECK-NEXT: $q3 = COPY [[FPEXT5]](<2 x s64>)
; CHECK-NEXT: RET_ReallyLR implicit $q0, implicit $q1, implicit $q2, implicit $q3
%0:_(<8 x s16>) = COPY $q0
%1:_(<8 x s64>) = G_FPEXT %0(<8 x s16>)
%2:_(<2 x s64>), %3:_(<2 x s64>), %4:_(<2 x s64>), %5:_(<2 x s64>) = G_UNMERGE_VALUES %1(<8 x s64>)
$q0 = COPY %2(<2 x s64>)
$q1 = COPY %3(<2 x s64>)
$q2 = COPY %4(<2 x s64>)
$q3 = COPY %5(<2 x s64>)
RET_ReallyLR implicit $q0, implicit $q1, implicit $q2, implicit $q3
...
Original file line number Diff line number Diff line change
Expand Up @@ -555,8 +555,8 @@
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_FPEXT (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. the first uncovered type index: 2, OK
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_FPTRUNC (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. the first uncovered type index: 2, OK
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
Expand Down
48 changes: 24 additions & 24 deletions llvm/test/CodeGen/AArch64/fmla.ll
Original file line number Diff line number Diff line change
Expand Up @@ -865,22 +865,22 @@ define <7 x half> @fmuladd_v7f16(<7 x half> %a, <7 x half> %b, <7 x half> %c) {
; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v3.4h
; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v2.4h
; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v5.4h
; CHECK-GI-NOFP16-NEXT: mov v5.h[0], v2.h[4]
; CHECK-GI-NOFP16-NEXT: fcvtl v4.4s, v4.4h
; CHECK-GI-NOFP16-NEXT: fadd v0.4s, v0.4s, v1.4s
; CHECK-GI-NOFP16-NEXT: mov v5.h[1], v2.h[5]
; CHECK-GI-NOFP16-NEXT: fmul v1.4s, v3.4s, v4.4s
; CHECK-GI-NOFP16-NEXT: fcvtn v3.4h, v0.4s
; CHECK-GI-NOFP16-NEXT: mov v5.h[2], v2.h[6]
; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s
; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v3.h[0]
; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v5.4h
; CHECK-GI-NOFP16-NEXT: mov v1.h[0], v2.h[4]
; CHECK-GI-NOFP16-NEXT: fmul v3.4s, v3.4s, v4.4s
; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v2.h[5]
; CHECK-GI-NOFP16-NEXT: fcvtn v4.4h, v0.4s
; CHECK-GI-NOFP16-NEXT: fcvtn v3.4h, v3.4s
; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[6]
; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v4.h[0]
; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v3.4h
; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v3.h[1]
; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v1.4s, v2.4s
; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v3.h[2]
; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v4.h[1]
; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v2.4s, v1.4s
; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[2]
; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s
; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v3.h[3]
; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v4.h[3]
; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0]
; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v1.h[1]
; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v1.h[2]
Expand Down Expand Up @@ -1350,22 +1350,22 @@ define <7 x half> @fmul_v7f16(<7 x half> %a, <7 x half> %b, <7 x half> %c) {
; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v3.4h
; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v2.4h
; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v5.4h
; CHECK-GI-NOFP16-NEXT: mov v5.h[0], v2.h[4]
; CHECK-GI-NOFP16-NEXT: fcvtl v4.4s, v4.4h
; CHECK-GI-NOFP16-NEXT: fadd v0.4s, v0.4s, v1.4s
; CHECK-GI-NOFP16-NEXT: mov v5.h[1], v2.h[5]
; CHECK-GI-NOFP16-NEXT: fmul v1.4s, v3.4s, v4.4s
; CHECK-GI-NOFP16-NEXT: fcvtn v3.4h, v0.4s
; CHECK-GI-NOFP16-NEXT: mov v5.h[2], v2.h[6]
; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s
; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v3.h[0]
; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v5.4h
; CHECK-GI-NOFP16-NEXT: mov v1.h[0], v2.h[4]
; CHECK-GI-NOFP16-NEXT: fmul v3.4s, v3.4s, v4.4s
; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v2.h[5]
; CHECK-GI-NOFP16-NEXT: fcvtn v4.4h, v0.4s
; CHECK-GI-NOFP16-NEXT: fcvtn v3.4h, v3.4s
; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[6]
; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v4.h[0]
; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v3.4h
; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v3.h[1]
; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v1.4s, v2.4s
; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v3.h[2]
; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v4.h[1]
; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v2.4s, v1.4s
; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[2]
; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s
; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v3.h[3]
; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v4.h[3]
; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0]
; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v1.h[1]
; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v1.h[2]
Expand Down
26 changes: 6 additions & 20 deletions llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll
Original file line number Diff line number Diff line change
Expand Up @@ -241,30 +241,16 @@ define <4 x double> @h_to_d(<4 x half> %a) {
;
; CHECK-CVT-GI-LABEL: h_to_d:
; CHECK-CVT-GI: // %bb.0:
; CHECK-CVT-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-CVT-GI-NEXT: mov h1, v0.h[1]
; CHECK-CVT-GI-NEXT: mov h2, v0.h[2]
; CHECK-CVT-GI-NEXT: mov h3, v0.h[3]
; CHECK-CVT-GI-NEXT: fcvt d0, h0
; CHECK-CVT-GI-NEXT: fcvt d4, h1
; CHECK-CVT-GI-NEXT: fcvt d1, h2
; CHECK-CVT-GI-NEXT: fcvt d2, h3
; CHECK-CVT-GI-NEXT: mov v0.d[1], v4.d[0]
; CHECK-CVT-GI-NEXT: mov v1.d[1], v2.d[0]
; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v0.4h
; CHECK-CVT-GI-NEXT: fcvtl v0.2d, v1.2s
; CHECK-CVT-GI-NEXT: fcvtl2 v1.2d, v1.4s
; CHECK-CVT-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: h_to_d:
; CHECK-FP16-GI: // %bb.0:
; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-FP16-GI-NEXT: mov h1, v0.h[1]
; CHECK-FP16-GI-NEXT: mov h2, v0.h[2]
; CHECK-FP16-GI-NEXT: mov h3, v0.h[3]
; CHECK-FP16-GI-NEXT: fcvt d0, h0
; CHECK-FP16-GI-NEXT: fcvt d4, h1
; CHECK-FP16-GI-NEXT: fcvt d1, h2
; CHECK-FP16-GI-NEXT: fcvt d2, h3
; CHECK-FP16-GI-NEXT: mov v0.d[1], v4.d[0]
; CHECK-FP16-GI-NEXT: mov v1.d[1], v2.d[0]
; CHECK-FP16-GI-NEXT: fcvtl v1.4s, v0.4h
; CHECK-FP16-GI-NEXT: fcvtl v0.2d, v1.2s
; CHECK-FP16-GI-NEXT: fcvtl2 v1.2d, v1.4s
; CHECK-FP16-GI-NEXT: ret
%1 = fpext <4 x half> %a to <4 x double>
ret <4 x double> %1
Expand Down
50 changes: 12 additions & 38 deletions llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll
Original file line number Diff line number Diff line change
Expand Up @@ -298,48 +298,22 @@ define <8 x double> @h_to_d(<8 x half> %a) {
;
; CHECK-CVT-GI-LABEL: h_to_d:
; CHECK-CVT-GI: // %bb.0:
; CHECK-CVT-GI-NEXT: mov h1, v0.h[1]
; CHECK-CVT-GI-NEXT: mov h2, v0.h[2]
; CHECK-CVT-GI-NEXT: mov h3, v0.h[3]
; CHECK-CVT-GI-NEXT: mov h4, v0.h[4]
; CHECK-CVT-GI-NEXT: mov h5, v0.h[5]
; CHECK-CVT-GI-NEXT: mov h6, v0.h[6]
; CHECK-CVT-GI-NEXT: mov h7, v0.h[7]
; CHECK-CVT-GI-NEXT: fcvt d0, h0
; CHECK-CVT-GI-NEXT: fcvt d16, h1
; CHECK-CVT-GI-NEXT: fcvt d1, h2
; CHECK-CVT-GI-NEXT: fcvt d17, h3
; CHECK-CVT-GI-NEXT: fcvt d2, h4
; CHECK-CVT-GI-NEXT: fcvt d4, h5
; CHECK-CVT-GI-NEXT: fcvt d3, h6
; CHECK-CVT-GI-NEXT: fcvt d5, h7
; CHECK-CVT-GI-NEXT: mov v0.d[1], v16.d[0]
; CHECK-CVT-GI-NEXT: mov v1.d[1], v17.d[0]
; CHECK-CVT-GI-NEXT: mov v2.d[1], v4.d[0]
; CHECK-CVT-GI-NEXT: mov v3.d[1], v5.d[0]
; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v0.4h
; CHECK-CVT-GI-NEXT: fcvtl2 v3.4s, v0.8h
; CHECK-CVT-GI-NEXT: fcvtl v0.2d, v1.2s
; CHECK-CVT-GI-NEXT: fcvtl2 v1.2d, v1.4s
; CHECK-CVT-GI-NEXT: fcvtl v2.2d, v3.2s
; CHECK-CVT-GI-NEXT: fcvtl2 v3.2d, v3.4s
; CHECK-CVT-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: h_to_d:
; CHECK-FP16-GI: // %bb.0:
; CHECK-FP16-GI-NEXT: mov h1, v0.h[1]
; CHECK-FP16-GI-NEXT: mov h2, v0.h[2]
; CHECK-FP16-GI-NEXT: mov h3, v0.h[3]
; CHECK-FP16-GI-NEXT: mov h4, v0.h[4]
; CHECK-FP16-GI-NEXT: mov h5, v0.h[5]
; CHECK-FP16-GI-NEXT: mov h6, v0.h[6]
; CHECK-FP16-GI-NEXT: mov h7, v0.h[7]
; CHECK-FP16-GI-NEXT: fcvt d0, h0
; CHECK-FP16-GI-NEXT: fcvt d16, h1
; CHECK-FP16-GI-NEXT: fcvt d1, h2
; CHECK-FP16-GI-NEXT: fcvt d17, h3
; CHECK-FP16-GI-NEXT: fcvt d2, h4
; CHECK-FP16-GI-NEXT: fcvt d4, h5
; CHECK-FP16-GI-NEXT: fcvt d3, h6
; CHECK-FP16-GI-NEXT: fcvt d5, h7
; CHECK-FP16-GI-NEXT: mov v0.d[1], v16.d[0]
; CHECK-FP16-GI-NEXT: mov v1.d[1], v17.d[0]
; CHECK-FP16-GI-NEXT: mov v2.d[1], v4.d[0]
; CHECK-FP16-GI-NEXT: mov v3.d[1], v5.d[0]
; CHECK-FP16-GI-NEXT: fcvtl v1.4s, v0.4h
; CHECK-FP16-GI-NEXT: fcvtl2 v3.4s, v0.8h
; CHECK-FP16-GI-NEXT: fcvtl v0.2d, v1.2s
; CHECK-FP16-GI-NEXT: fcvtl2 v1.2d, v1.4s
; CHECK-FP16-GI-NEXT: fcvtl v2.2d, v3.2s
; CHECK-FP16-GI-NEXT: fcvtl2 v3.2d, v3.4s
; CHECK-FP16-GI-NEXT: ret
%1 = fpext <8 x half> %a to <8 x double>
ret <8 x double> %1
Expand Down
Loading