Skip to content

Commit f8d65fd

Browse files
[AArch64][GlobalISel] Improve lowering of vector fp16 fpext (#165554)
This PR improves the lowering of vectors of fp16 when using fpext. Previously vectors of fp16 were scalarized leading to lots of extra instructions. Now, vectors of fp16 will be lowered when extended to fp64 via the preexisting lowering logic for extends. To make use of the existing logic, we need to add elements until we reach the next power of 2.
1 parent e7b41df commit f8d65fd

File tree

12 files changed

+407
-552
lines changed

12 files changed

+407
-552
lines changed

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3065,6 +3065,14 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
30653065
Observer.changedInstr(MI);
30663066
return Legalized;
30673067

3068+
case TargetOpcode::G_FPEXT:
3069+
if (TypeIdx != 1)
3070+
return UnableToLegalize;
3071+
3072+
Observer.changingInstr(MI);
3073+
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3074+
Observer.changedInstr(MI);
3075+
return Legalized;
30683076
case TargetOpcode::G_FPTOSI:
30693077
case TargetOpcode::G_FPTOUI:
30703078
case TargetOpcode::G_INTRINSIC_LRINT:

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -825,6 +825,16 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
825825
.legalFor(
826826
{{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})
827827
.libcallFor({{s128, s64}, {s128, s32}, {s128, s16}})
828+
.moreElementsToNextPow2(0)
829+
.widenScalarIf(
830+
[](const LegalityQuery &Q) {
831+
LLT DstTy = Q.Types[0];
832+
LLT SrcTy = Q.Types[1];
833+
return SrcTy.isVector() && DstTy.isVector() &&
834+
SrcTy.getScalarSizeInBits() == 16 &&
835+
DstTy.getScalarSizeInBits() == 64;
836+
},
837+
changeElementTo(1, s32))
828838
.clampNumElements(0, v4s32, v4s32)
829839
.clampNumElements(0, v2s64, v2s64)
830840
.scalarize(0);

llvm/test/CodeGen/AArch64/GlobalISel/legalize-fpext.mir

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,3 +32,133 @@ body: |
3232
RET_ReallyLR
3333
3434
...
35+
36+
---
37+
name: fpext_f16_f64
38+
body: |
39+
bb.0:
40+
liveins: $h0
41+
; CHECK-LABEL: name: fpext_f16_f64
42+
; CHECK: liveins: $h0
43+
; CHECK-NEXT: {{ $}}
44+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0
45+
; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(s64) = G_FPEXT [[COPY]](s16)
46+
; CHECK-NEXT: $d0 = COPY [[FPEXT]](s64)
47+
; CHECK-NEXT: RET_ReallyLR implicit $d0
48+
%0:_(s16) = COPY $h0
49+
%1:_(s64) = G_FPEXT %0(s16)
50+
$d0 = COPY %1(s64)
51+
RET_ReallyLR implicit $d0
52+
...
53+
54+
---
55+
name: fpext_v2f16_v2f64
56+
body: |
57+
bb.0:
58+
liveins: $d0
59+
60+
; CHECK-LABEL: name: fpext_v2f16_v2f64
61+
; CHECK: liveins: $d0
62+
; CHECK-NEXT: {{ $}}
63+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0
64+
; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[COPY]](<4 x s16>)
65+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[FPEXT]](<4 x s32>)
66+
; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(<2 x s64>) = G_FPEXT [[UV]](<2 x s32>)
67+
; CHECK-NEXT: $q0 = COPY [[FPEXT1]](<2 x s64>)
68+
; CHECK-NEXT: RET_ReallyLR implicit $q0
69+
%1:_(<4 x s16>) = COPY $d0
70+
%0:_(<2 x s16>), %2:_(<2 x s16>) = G_UNMERGE_VALUES %1(<4 x s16>)
71+
%3:_(<2 x s64>) = G_FPEXT %0(<2 x s16>)
72+
$q0 = COPY %3(<2 x s64>)
73+
RET_ReallyLR implicit $q0
74+
...
75+
76+
---
77+
name: fpext_v3f16_v3f64
78+
body: |
79+
bb.0:
80+
liveins: $d0
81+
82+
; CHECK-LABEL: name: fpext_v3f16_v3f64
83+
; CHECK: liveins: $d0
84+
; CHECK-NEXT: {{ $}}
85+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0
86+
; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[COPY]](<4 x s16>)
87+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[FPEXT]](<4 x s32>)
88+
; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(<2 x s64>) = G_FPEXT [[UV]](<2 x s32>)
89+
; CHECK-NEXT: [[FPEXT2:%[0-9]+]]:_(<2 x s64>) = G_FPEXT [[UV1]](<2 x s32>)
90+
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[FPEXT1]](<2 x s64>)
91+
; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[FPEXT2]](<2 x s64>)
92+
; CHECK-NEXT: $d0 = COPY [[UV2]](s64)
93+
; CHECK-NEXT: $d1 = COPY [[UV3]](s64)
94+
; CHECK-NEXT: $d2 = COPY [[UV4]](s64)
95+
; CHECK-NEXT: RET_ReallyLR implicit $d0, implicit $d1, implicit $d2
96+
%1:_(<4 x s16>) = COPY $d0
97+
%2:_(s16), %3:_(s16), %4:_(s16), %5:_(s16) = G_UNMERGE_VALUES %1(<4 x s16>)
98+
%0:_(<3 x s16>) = G_BUILD_VECTOR %2(s16), %3(s16), %4(s16)
99+
%6:_(<3 x s64>) = G_FPEXT %0(<3 x s16>)
100+
%7:_(s64), %8:_(s64), %9:_(s64) = G_UNMERGE_VALUES %6(<3 x s64>)
101+
$d0 = COPY %7(s64)
102+
$d1 = COPY %8(s64)
103+
$d2 = COPY %9(s64)
104+
RET_ReallyLR implicit $d0, implicit $d1, implicit $d2
105+
...
106+
107+
---
108+
name: fpext_v4f16_v4f64
109+
body: |
110+
bb.0:
111+
liveins: $d0
112+
113+
; CHECK-LABEL: name: fpext_v4f16_v4f64
114+
; CHECK: liveins: $d0
115+
; CHECK-NEXT: {{ $}}
116+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0
117+
; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[COPY]](<4 x s16>)
118+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[FPEXT]](<4 x s32>)
119+
; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(<2 x s64>) = G_FPEXT [[UV]](<2 x s32>)
120+
; CHECK-NEXT: [[FPEXT2:%[0-9]+]]:_(<2 x s64>) = G_FPEXT [[UV1]](<2 x s32>)
121+
; CHECK-NEXT: $q0 = COPY [[FPEXT1]](<2 x s64>)
122+
; CHECK-NEXT: $q1 = COPY [[FPEXT2]](<2 x s64>)
123+
; CHECK-NEXT: RET_ReallyLR implicit $q0, implicit $q1
124+
%0:_(<4 x s16>) = COPY $d0
125+
%1:_(<4 x s64>) = G_FPEXT %0(<4 x s16>)
126+
%2:_(<2 x s64>), %3:_(<2 x s64>) = G_UNMERGE_VALUES %1(<4 x s64>)
127+
$q0 = COPY %2(<2 x s64>)
128+
$q1 = COPY %3(<2 x s64>)
129+
RET_ReallyLR implicit $q0, implicit $q1
130+
...
131+
132+
---
133+
name: fpext_v8f16_v8f64
134+
body: |
135+
bb.0:
136+
liveins: $q0
137+
138+
; CHECK-LABEL: name: fpext_v8f16_v8f64
139+
; CHECK: liveins: $q0
140+
; CHECK-NEXT: {{ $}}
141+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
142+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s16>), [[UV1:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>)
143+
; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV]](<4 x s16>)
144+
; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV1]](<4 x s16>)
145+
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[FPEXT]](<4 x s32>)
146+
; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s32>), [[UV5:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[FPEXT1]](<4 x s32>)
147+
; CHECK-NEXT: [[FPEXT2:%[0-9]+]]:_(<2 x s64>) = G_FPEXT [[UV2]](<2 x s32>)
148+
; CHECK-NEXT: [[FPEXT3:%[0-9]+]]:_(<2 x s64>) = G_FPEXT [[UV3]](<2 x s32>)
149+
; CHECK-NEXT: [[FPEXT4:%[0-9]+]]:_(<2 x s64>) = G_FPEXT [[UV4]](<2 x s32>)
150+
; CHECK-NEXT: [[FPEXT5:%[0-9]+]]:_(<2 x s64>) = G_FPEXT [[UV5]](<2 x s32>)
151+
; CHECK-NEXT: $q0 = COPY [[FPEXT2]](<2 x s64>)
152+
; CHECK-NEXT: $q1 = COPY [[FPEXT3]](<2 x s64>)
153+
; CHECK-NEXT: $q2 = COPY [[FPEXT4]](<2 x s64>)
154+
; CHECK-NEXT: $q3 = COPY [[FPEXT5]](<2 x s64>)
155+
; CHECK-NEXT: RET_ReallyLR implicit $q0, implicit $q1, implicit $q2, implicit $q3
156+
%0:_(<8 x s16>) = COPY $q0
157+
%1:_(<8 x s64>) = G_FPEXT %0(<8 x s16>)
158+
%2:_(<2 x s64>), %3:_(<2 x s64>), %4:_(<2 x s64>), %5:_(<2 x s64>) = G_UNMERGE_VALUES %1(<8 x s64>)
159+
$q0 = COPY %2(<2 x s64>)
160+
$q1 = COPY %3(<2 x s64>)
161+
$q2 = COPY %4(<2 x s64>)
162+
$q3 = COPY %5(<2 x s64>)
163+
RET_ReallyLR implicit $q0, implicit $q1, implicit $q2, implicit $q3
164+
...

llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -555,8 +555,8 @@
555555
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
556556
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
557557
# DEBUG-NEXT: G_FPEXT (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
558-
# DEBUG-NEXT: .. the first uncovered type index: 2, OK
559-
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
558+
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
559+
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
560560
# DEBUG-NEXT: G_FPTRUNC (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
561561
# DEBUG-NEXT: .. the first uncovered type index: 2, OK
562562
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK

llvm/test/CodeGen/AArch64/fmla.ll

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -865,22 +865,22 @@ define <7 x half> @fmuladd_v7f16(<7 x half> %a, <7 x half> %b, <7 x half> %c) {
865865
; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v3.4h
866866
; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v2.4h
867867
; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v5.4h
868-
; CHECK-GI-NOFP16-NEXT: mov v5.h[0], v2.h[4]
869868
; CHECK-GI-NOFP16-NEXT: fcvtl v4.4s, v4.4h
870869
; CHECK-GI-NOFP16-NEXT: fadd v0.4s, v0.4s, v1.4s
871-
; CHECK-GI-NOFP16-NEXT: mov v5.h[1], v2.h[5]
872-
; CHECK-GI-NOFP16-NEXT: fmul v1.4s, v3.4s, v4.4s
873-
; CHECK-GI-NOFP16-NEXT: fcvtn v3.4h, v0.4s
874-
; CHECK-GI-NOFP16-NEXT: mov v5.h[2], v2.h[6]
875-
; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s
876-
; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v3.h[0]
877-
; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v5.4h
870+
; CHECK-GI-NOFP16-NEXT: mov v1.h[0], v2.h[4]
871+
; CHECK-GI-NOFP16-NEXT: fmul v3.4s, v3.4s, v4.4s
872+
; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v2.h[5]
873+
; CHECK-GI-NOFP16-NEXT: fcvtn v4.4h, v0.4s
874+
; CHECK-GI-NOFP16-NEXT: fcvtn v3.4h, v3.4s
875+
; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[6]
876+
; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v4.h[0]
877+
; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v3.4h
878878
; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
879-
; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v3.h[1]
880-
; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v1.4s, v2.4s
881-
; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v3.h[2]
879+
; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v4.h[1]
880+
; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v2.4s, v1.4s
881+
; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[2]
882882
; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s
883-
; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v3.h[3]
883+
; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v4.h[3]
884884
; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0]
885885
; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v1.h[1]
886886
; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v1.h[2]
@@ -1350,22 +1350,22 @@ define <7 x half> @fmul_v7f16(<7 x half> %a, <7 x half> %b, <7 x half> %c) {
13501350
; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v3.4h
13511351
; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v2.4h
13521352
; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v5.4h
1353-
; CHECK-GI-NOFP16-NEXT: mov v5.h[0], v2.h[4]
13541353
; CHECK-GI-NOFP16-NEXT: fcvtl v4.4s, v4.4h
13551354
; CHECK-GI-NOFP16-NEXT: fadd v0.4s, v0.4s, v1.4s
1356-
; CHECK-GI-NOFP16-NEXT: mov v5.h[1], v2.h[5]
1357-
; CHECK-GI-NOFP16-NEXT: fmul v1.4s, v3.4s, v4.4s
1358-
; CHECK-GI-NOFP16-NEXT: fcvtn v3.4h, v0.4s
1359-
; CHECK-GI-NOFP16-NEXT: mov v5.h[2], v2.h[6]
1360-
; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s
1361-
; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v3.h[0]
1362-
; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v5.4h
1355+
; CHECK-GI-NOFP16-NEXT: mov v1.h[0], v2.h[4]
1356+
; CHECK-GI-NOFP16-NEXT: fmul v3.4s, v3.4s, v4.4s
1357+
; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v2.h[5]
1358+
; CHECK-GI-NOFP16-NEXT: fcvtn v4.4h, v0.4s
1359+
; CHECK-GI-NOFP16-NEXT: fcvtn v3.4h, v3.4s
1360+
; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[6]
1361+
; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v4.h[0]
1362+
; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v3.4h
13631363
; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
1364-
; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v3.h[1]
1365-
; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v1.4s, v2.4s
1366-
; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v3.h[2]
1364+
; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v4.h[1]
1365+
; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v2.4s, v1.4s
1366+
; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[2]
13671367
; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s
1368-
; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v3.h[3]
1368+
; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v4.h[3]
13691369
; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0]
13701370
; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v1.h[1]
13711371
; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v1.h[2]

llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll

Lines changed: 6 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -241,30 +241,16 @@ define <4 x double> @h_to_d(<4 x half> %a) {
241241
;
242242
; CHECK-CVT-GI-LABEL: h_to_d:
243243
; CHECK-CVT-GI: // %bb.0:
244-
; CHECK-CVT-GI-NEXT: // kill: def $d0 killed $d0 def $q0
245-
; CHECK-CVT-GI-NEXT: mov h1, v0.h[1]
246-
; CHECK-CVT-GI-NEXT: mov h2, v0.h[2]
247-
; CHECK-CVT-GI-NEXT: mov h3, v0.h[3]
248-
; CHECK-CVT-GI-NEXT: fcvt d0, h0
249-
; CHECK-CVT-GI-NEXT: fcvt d4, h1
250-
; CHECK-CVT-GI-NEXT: fcvt d1, h2
251-
; CHECK-CVT-GI-NEXT: fcvt d2, h3
252-
; CHECK-CVT-GI-NEXT: mov v0.d[1], v4.d[0]
253-
; CHECK-CVT-GI-NEXT: mov v1.d[1], v2.d[0]
244+
; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v0.4h
245+
; CHECK-CVT-GI-NEXT: fcvtl v0.2d, v1.2s
246+
; CHECK-CVT-GI-NEXT: fcvtl2 v1.2d, v1.4s
254247
; CHECK-CVT-GI-NEXT: ret
255248
;
256249
; CHECK-FP16-GI-LABEL: h_to_d:
257250
; CHECK-FP16-GI: // %bb.0:
258-
; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0
259-
; CHECK-FP16-GI-NEXT: mov h1, v0.h[1]
260-
; CHECK-FP16-GI-NEXT: mov h2, v0.h[2]
261-
; CHECK-FP16-GI-NEXT: mov h3, v0.h[3]
262-
; CHECK-FP16-GI-NEXT: fcvt d0, h0
263-
; CHECK-FP16-GI-NEXT: fcvt d4, h1
264-
; CHECK-FP16-GI-NEXT: fcvt d1, h2
265-
; CHECK-FP16-GI-NEXT: fcvt d2, h3
266-
; CHECK-FP16-GI-NEXT: mov v0.d[1], v4.d[0]
267-
; CHECK-FP16-GI-NEXT: mov v1.d[1], v2.d[0]
251+
; CHECK-FP16-GI-NEXT: fcvtl v1.4s, v0.4h
252+
; CHECK-FP16-GI-NEXT: fcvtl v0.2d, v1.2s
253+
; CHECK-FP16-GI-NEXT: fcvtl2 v1.2d, v1.4s
268254
; CHECK-FP16-GI-NEXT: ret
269255
%1 = fpext <4 x half> %a to <4 x double>
270256
ret <4 x double> %1

llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll

Lines changed: 12 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -298,48 +298,22 @@ define <8 x double> @h_to_d(<8 x half> %a) {
298298
;
299299
; CHECK-CVT-GI-LABEL: h_to_d:
300300
; CHECK-CVT-GI: // %bb.0:
301-
; CHECK-CVT-GI-NEXT: mov h1, v0.h[1]
302-
; CHECK-CVT-GI-NEXT: mov h2, v0.h[2]
303-
; CHECK-CVT-GI-NEXT: mov h3, v0.h[3]
304-
; CHECK-CVT-GI-NEXT: mov h4, v0.h[4]
305-
; CHECK-CVT-GI-NEXT: mov h5, v0.h[5]
306-
; CHECK-CVT-GI-NEXT: mov h6, v0.h[6]
307-
; CHECK-CVT-GI-NEXT: mov h7, v0.h[7]
308-
; CHECK-CVT-GI-NEXT: fcvt d0, h0
309-
; CHECK-CVT-GI-NEXT: fcvt d16, h1
310-
; CHECK-CVT-GI-NEXT: fcvt d1, h2
311-
; CHECK-CVT-GI-NEXT: fcvt d17, h3
312-
; CHECK-CVT-GI-NEXT: fcvt d2, h4
313-
; CHECK-CVT-GI-NEXT: fcvt d4, h5
314-
; CHECK-CVT-GI-NEXT: fcvt d3, h6
315-
; CHECK-CVT-GI-NEXT: fcvt d5, h7
316-
; CHECK-CVT-GI-NEXT: mov v0.d[1], v16.d[0]
317-
; CHECK-CVT-GI-NEXT: mov v1.d[1], v17.d[0]
318-
; CHECK-CVT-GI-NEXT: mov v2.d[1], v4.d[0]
319-
; CHECK-CVT-GI-NEXT: mov v3.d[1], v5.d[0]
301+
; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v0.4h
302+
; CHECK-CVT-GI-NEXT: fcvtl2 v3.4s, v0.8h
303+
; CHECK-CVT-GI-NEXT: fcvtl v0.2d, v1.2s
304+
; CHECK-CVT-GI-NEXT: fcvtl2 v1.2d, v1.4s
305+
; CHECK-CVT-GI-NEXT: fcvtl v2.2d, v3.2s
306+
; CHECK-CVT-GI-NEXT: fcvtl2 v3.2d, v3.4s
320307
; CHECK-CVT-GI-NEXT: ret
321308
;
322309
; CHECK-FP16-GI-LABEL: h_to_d:
323310
; CHECK-FP16-GI: // %bb.0:
324-
; CHECK-FP16-GI-NEXT: mov h1, v0.h[1]
325-
; CHECK-FP16-GI-NEXT: mov h2, v0.h[2]
326-
; CHECK-FP16-GI-NEXT: mov h3, v0.h[3]
327-
; CHECK-FP16-GI-NEXT: mov h4, v0.h[4]
328-
; CHECK-FP16-GI-NEXT: mov h5, v0.h[5]
329-
; CHECK-FP16-GI-NEXT: mov h6, v0.h[6]
330-
; CHECK-FP16-GI-NEXT: mov h7, v0.h[7]
331-
; CHECK-FP16-GI-NEXT: fcvt d0, h0
332-
; CHECK-FP16-GI-NEXT: fcvt d16, h1
333-
; CHECK-FP16-GI-NEXT: fcvt d1, h2
334-
; CHECK-FP16-GI-NEXT: fcvt d17, h3
335-
; CHECK-FP16-GI-NEXT: fcvt d2, h4
336-
; CHECK-FP16-GI-NEXT: fcvt d4, h5
337-
; CHECK-FP16-GI-NEXT: fcvt d3, h6
338-
; CHECK-FP16-GI-NEXT: fcvt d5, h7
339-
; CHECK-FP16-GI-NEXT: mov v0.d[1], v16.d[0]
340-
; CHECK-FP16-GI-NEXT: mov v1.d[1], v17.d[0]
341-
; CHECK-FP16-GI-NEXT: mov v2.d[1], v4.d[0]
342-
; CHECK-FP16-GI-NEXT: mov v3.d[1], v5.d[0]
311+
; CHECK-FP16-GI-NEXT: fcvtl v1.4s, v0.4h
312+
; CHECK-FP16-GI-NEXT: fcvtl2 v3.4s, v0.8h
313+
; CHECK-FP16-GI-NEXT: fcvtl v0.2d, v1.2s
314+
; CHECK-FP16-GI-NEXT: fcvtl2 v1.2d, v1.4s
315+
; CHECK-FP16-GI-NEXT: fcvtl v2.2d, v3.2s
316+
; CHECK-FP16-GI-NEXT: fcvtl2 v3.2d, v3.4s
343317
; CHECK-FP16-GI-NEXT: ret
344318
%1 = fpext <8 x half> %a to <8 x double>
345319
ret <8 x double> %1

0 commit comments

Comments
 (0)