Skip to content

Commit 43b1509

Browse files
Separate FPEXT & FPTRUNC changes
1 parent ae3ef1e commit 43b1509

File tree

10 files changed

+553
-274
lines changed

10 files changed

+553
-274
lines changed

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 2 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -835,14 +835,6 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
835835
.legalFor(
836836
{{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})
837837
.libcallFor({{s128, s64}, {s128, s32}, {s128, s16}})
838-
.moreElementsToNextPow2(0)
839-
.customIf([](const LegalityQuery &Q) {
840-
LLT DstTy = Q.Types[0];
841-
LLT SrcTy = Q.Types[1];
842-
return SrcTy.isVector() && DstTy.isVector() &&
843-
SrcTy.getScalarSizeInBits() == 16 &&
844-
DstTy.getScalarSizeInBits() == 64;
845-
})
846838
.clampNumElements(0, v4s32, v4s32)
847839
.clampNumElements(0, v2s64, v2s64)
848840
.scalarize(0);
@@ -1482,11 +1474,10 @@ bool AArch64LegalizerInfo::legalizeCustom(
14821474
return legalizeICMP(MI, MRI, MIRBuilder);
14831475
case TargetOpcode::G_BITCAST:
14841476
return legalizeBitcast(MI, Helper);
1485-
case TargetOpcode::G_FPEXT:
14861477
case TargetOpcode::G_FPTRUNC:
14871478
// In order to vectorise f16 to f64 properly, we need to use f32 as an
14881479
// intermediary
1489-
return legalizeFpextFptrunc(MI, MIRBuilder, MRI);
1480+
return legalizeFptrunc(MI, MIRBuilder, MRI);
14901481
}
14911482

14921483
llvm_unreachable("expected switch to return");
@@ -2414,7 +2405,7 @@ bool AArch64LegalizerInfo::legalizePrefetch(MachineInstr &MI,
24142405
return true;
24152406
}
24162407

2417-
bool AArch64LegalizerInfo::legalizeFpextFptrunc(
2408+
bool AArch64LegalizerInfo::legalizeFptrunc(
24182409
MachineInstr &MI, MachineIRBuilder &MIRBuilder,
24192410
MachineRegisterInfo &MRI) const {
24202411
Register Dst = MI.getOperand(0).getReg();
@@ -2430,11 +2421,6 @@ bool AArch64LegalizerInfo::legalizeFpextFptrunc(
24302421
switch (MI.getOpcode()) {
24312422
default:
24322423
return false;
2433-
case TargetOpcode::G_FPEXT: {
2434-
Mid = MIRBuilder.buildFPExt(MidTy, Src);
2435-
Fin = MIRBuilder.buildFPExt(DstTy, Mid.getReg(0));
2436-
break;
2437-
}
24382424
case TargetOpcode::G_FPTRUNC: {
24392425
Mid = MIRBuilder.buildFPTrunc(MidTy, Src);
24402426
Fin = MIRBuilder.buildFPTrunc(DstTy, Mid.getReg(0));

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ class AArch64LegalizerInfo : public LegalizerInfo {
6767
bool legalizeDynStackAlloc(MachineInstr &MI, LegalizerHelper &Helper) const;
6868
bool legalizePrefetch(MachineInstr &MI, LegalizerHelper &Helper) const;
6969
bool legalizeBitcast(MachineInstr &MI, LegalizerHelper &Helper) const;
70-
bool legalizeFpextFptrunc(MachineInstr &MI, MachineIRBuilder &MIRBuilder,
70+
bool legalizeFptrunc(MachineInstr &MI, MachineIRBuilder &MIRBuilder,
7171
MachineRegisterInfo &MRI) const;
7272
const AArch64Subtarget *ST;
7373
};

llvm/test/CodeGen/AArch64/fmla.ll

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -865,22 +865,22 @@ define <7 x half> @fmuladd_v7f16(<7 x half> %a, <7 x half> %b, <7 x half> %c) {
865865
; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v3.4h
866866
; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v2.4h
867867
; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v5.4h
868+
; CHECK-GI-NOFP16-NEXT: mov v5.h[0], v2.h[4]
868869
; CHECK-GI-NOFP16-NEXT: fcvtl v4.4s, v4.4h
869870
; CHECK-GI-NOFP16-NEXT: fadd v0.4s, v0.4s, v1.4s
870-
; CHECK-GI-NOFP16-NEXT: mov v1.h[0], v2.h[4]
871-
; CHECK-GI-NOFP16-NEXT: fmul v3.4s, v3.4s, v4.4s
872-
; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v2.h[5]
873-
; CHECK-GI-NOFP16-NEXT: fcvtn v4.4h, v0.4s
874-
; CHECK-GI-NOFP16-NEXT: fcvtn v3.4h, v3.4s
875-
; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[6]
876-
; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v4.h[0]
877-
; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v3.4h
871+
; CHECK-GI-NOFP16-NEXT: mov v5.h[1], v2.h[5]
872+
; CHECK-GI-NOFP16-NEXT: fmul v1.4s, v3.4s, v4.4s
873+
; CHECK-GI-NOFP16-NEXT: fcvtn v3.4h, v0.4s
874+
; CHECK-GI-NOFP16-NEXT: mov v5.h[2], v2.h[6]
875+
; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s
876+
; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v3.h[0]
877+
; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v5.4h
878878
; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
879-
; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v4.h[1]
880-
; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v2.4s, v1.4s
881-
; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[2]
879+
; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v3.h[1]
880+
; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v1.4s, v2.4s
881+
; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v3.h[2]
882882
; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s
883-
; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v4.h[3]
883+
; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v3.h[3]
884884
; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0]
885885
; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v1.h[1]
886886
; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v1.h[2]
@@ -1350,22 +1350,22 @@ define <7 x half> @fmul_v7f16(<7 x half> %a, <7 x half> %b, <7 x half> %c) {
13501350
; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v3.4h
13511351
; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v2.4h
13521352
; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v5.4h
1353+
; CHECK-GI-NOFP16-NEXT: mov v5.h[0], v2.h[4]
13531354
; CHECK-GI-NOFP16-NEXT: fcvtl v4.4s, v4.4h
13541355
; CHECK-GI-NOFP16-NEXT: fadd v0.4s, v0.4s, v1.4s
1355-
; CHECK-GI-NOFP16-NEXT: mov v1.h[0], v2.h[4]
1356-
; CHECK-GI-NOFP16-NEXT: fmul v3.4s, v3.4s, v4.4s
1357-
; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v2.h[5]
1358-
; CHECK-GI-NOFP16-NEXT: fcvtn v4.4h, v0.4s
1359-
; CHECK-GI-NOFP16-NEXT: fcvtn v3.4h, v3.4s
1360-
; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[6]
1361-
; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v4.h[0]
1362-
; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v3.4h
1356+
; CHECK-GI-NOFP16-NEXT: mov v5.h[1], v2.h[5]
1357+
; CHECK-GI-NOFP16-NEXT: fmul v1.4s, v3.4s, v4.4s
1358+
; CHECK-GI-NOFP16-NEXT: fcvtn v3.4h, v0.4s
1359+
; CHECK-GI-NOFP16-NEXT: mov v5.h[2], v2.h[6]
1360+
; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s
1361+
; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v3.h[0]
1362+
; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v5.4h
13631363
; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
1364-
; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v4.h[1]
1365-
; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v2.4s, v1.4s
1366-
; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[2]
1364+
; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v3.h[1]
1365+
; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v1.4s, v2.4s
1366+
; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v3.h[2]
13671367
; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s
1368-
; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v4.h[3]
1368+
; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v3.h[3]
13691369
; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0]
13701370
; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v1.h[1]
13711371
; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v1.h[2]

llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -206,16 +206,30 @@ define <4 x double> @h_to_d(<4 x half> %a) {
206206
;
207207
; CHECK-CVT-GI-LABEL: h_to_d:
208208
; CHECK-CVT-GI: // %bb.0:
209-
; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v0.4h
210-
; CHECK-CVT-GI-NEXT: fcvtl v0.2d, v1.2s
211-
; CHECK-CVT-GI-NEXT: fcvtl2 v1.2d, v1.4s
209+
; CHECK-CVT-GI-NEXT: // kill: def $d0 killed $d0 def $q0
210+
; CHECK-CVT-GI-NEXT: mov h1, v0.h[1]
211+
; CHECK-CVT-GI-NEXT: mov h2, v0.h[2]
212+
; CHECK-CVT-GI-NEXT: mov h3, v0.h[3]
213+
; CHECK-CVT-GI-NEXT: fcvt d0, h0
214+
; CHECK-CVT-GI-NEXT: fcvt d4, h1
215+
; CHECK-CVT-GI-NEXT: fcvt d1, h2
216+
; CHECK-CVT-GI-NEXT: fcvt d2, h3
217+
; CHECK-CVT-GI-NEXT: mov v0.d[1], v4.d[0]
218+
; CHECK-CVT-GI-NEXT: mov v1.d[1], v2.d[0]
212219
; CHECK-CVT-GI-NEXT: ret
213220
;
214221
; CHECK-FP16-GI-LABEL: h_to_d:
215222
; CHECK-FP16-GI: // %bb.0:
216-
; CHECK-FP16-GI-NEXT: fcvtl v1.4s, v0.4h
217-
; CHECK-FP16-GI-NEXT: fcvtl v0.2d, v1.2s
218-
; CHECK-FP16-GI-NEXT: fcvtl2 v1.2d, v1.4s
223+
; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0
224+
; CHECK-FP16-GI-NEXT: mov h1, v0.h[1]
225+
; CHECK-FP16-GI-NEXT: mov h2, v0.h[2]
226+
; CHECK-FP16-GI-NEXT: mov h3, v0.h[3]
227+
; CHECK-FP16-GI-NEXT: fcvt d0, h0
228+
; CHECK-FP16-GI-NEXT: fcvt d4, h1
229+
; CHECK-FP16-GI-NEXT: fcvt d1, h2
230+
; CHECK-FP16-GI-NEXT: fcvt d2, h3
231+
; CHECK-FP16-GI-NEXT: mov v0.d[1], v4.d[0]
232+
; CHECK-FP16-GI-NEXT: mov v1.d[1], v2.d[0]
219233
; CHECK-FP16-GI-NEXT: ret
220234
%1 = fpext <4 x half> %a to <4 x double>
221235
ret <4 x double> %1

llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll

Lines changed: 38 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -272,22 +272,48 @@ define <8 x double> @h_to_d(<8 x half> %a) {
272272
;
273273
; CHECK-CVT-GI-LABEL: h_to_d:
274274
; CHECK-CVT-GI: // %bb.0:
275-
; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v0.4h
276-
; CHECK-CVT-GI-NEXT: fcvtl2 v3.4s, v0.8h
277-
; CHECK-CVT-GI-NEXT: fcvtl v0.2d, v1.2s
278-
; CHECK-CVT-GI-NEXT: fcvtl2 v1.2d, v1.4s
279-
; CHECK-CVT-GI-NEXT: fcvtl v2.2d, v3.2s
280-
; CHECK-CVT-GI-NEXT: fcvtl2 v3.2d, v3.4s
275+
; CHECK-CVT-GI-NEXT: mov h1, v0.h[1]
276+
; CHECK-CVT-GI-NEXT: mov h2, v0.h[2]
277+
; CHECK-CVT-GI-NEXT: mov h3, v0.h[3]
278+
; CHECK-CVT-GI-NEXT: mov h4, v0.h[4]
279+
; CHECK-CVT-GI-NEXT: mov h5, v0.h[5]
280+
; CHECK-CVT-GI-NEXT: mov h6, v0.h[6]
281+
; CHECK-CVT-GI-NEXT: mov h7, v0.h[7]
282+
; CHECK-CVT-GI-NEXT: fcvt d0, h0
283+
; CHECK-CVT-GI-NEXT: fcvt d16, h1
284+
; CHECK-CVT-GI-NEXT: fcvt d1, h2
285+
; CHECK-CVT-GI-NEXT: fcvt d17, h3
286+
; CHECK-CVT-GI-NEXT: fcvt d2, h4
287+
; CHECK-CVT-GI-NEXT: fcvt d4, h5
288+
; CHECK-CVT-GI-NEXT: fcvt d3, h6
289+
; CHECK-CVT-GI-NEXT: fcvt d5, h7
290+
; CHECK-CVT-GI-NEXT: mov v0.d[1], v16.d[0]
291+
; CHECK-CVT-GI-NEXT: mov v1.d[1], v17.d[0]
292+
; CHECK-CVT-GI-NEXT: mov v2.d[1], v4.d[0]
293+
; CHECK-CVT-GI-NEXT: mov v3.d[1], v5.d[0]
281294
; CHECK-CVT-GI-NEXT: ret
282295
;
283296
; CHECK-FP16-GI-LABEL: h_to_d:
284297
; CHECK-FP16-GI: // %bb.0:
285-
; CHECK-FP16-GI-NEXT: fcvtl v1.4s, v0.4h
286-
; CHECK-FP16-GI-NEXT: fcvtl2 v3.4s, v0.8h
287-
; CHECK-FP16-GI-NEXT: fcvtl v0.2d, v1.2s
288-
; CHECK-FP16-GI-NEXT: fcvtl2 v1.2d, v1.4s
289-
; CHECK-FP16-GI-NEXT: fcvtl v2.2d, v3.2s
290-
; CHECK-FP16-GI-NEXT: fcvtl2 v3.2d, v3.4s
298+
; CHECK-FP16-GI-NEXT: mov h1, v0.h[1]
299+
; CHECK-FP16-GI-NEXT: mov h2, v0.h[2]
300+
; CHECK-FP16-GI-NEXT: mov h3, v0.h[3]
301+
; CHECK-FP16-GI-NEXT: mov h4, v0.h[4]
302+
; CHECK-FP16-GI-NEXT: mov h5, v0.h[5]
303+
; CHECK-FP16-GI-NEXT: mov h6, v0.h[6]
304+
; CHECK-FP16-GI-NEXT: mov h7, v0.h[7]
305+
; CHECK-FP16-GI-NEXT: fcvt d0, h0
306+
; CHECK-FP16-GI-NEXT: fcvt d16, h1
307+
; CHECK-FP16-GI-NEXT: fcvt d1, h2
308+
; CHECK-FP16-GI-NEXT: fcvt d17, h3
309+
; CHECK-FP16-GI-NEXT: fcvt d2, h4
310+
; CHECK-FP16-GI-NEXT: fcvt d4, h5
311+
; CHECK-FP16-GI-NEXT: fcvt d3, h6
312+
; CHECK-FP16-GI-NEXT: fcvt d5, h7
313+
; CHECK-FP16-GI-NEXT: mov v0.d[1], v16.d[0]
314+
; CHECK-FP16-GI-NEXT: mov v1.d[1], v17.d[0]
315+
; CHECK-FP16-GI-NEXT: mov v2.d[1], v4.d[0]
316+
; CHECK-FP16-GI-NEXT: mov v3.d[1], v5.d[0]
291317
; CHECK-FP16-GI-NEXT: ret
292318
%1 = fpext <8 x half> %a to <8 x double>
293319
ret <8 x double> %1

0 commit comments

Comments
 (0)