Skip to content

Commit ab016f1

Browse files
[AArch64][GlobalISel] Add G_FPEXT(G_FCONSTANT) folding
1 parent bb38b48 commit ab016f1

23 files changed

+1833
-2339
lines changed

llvm/include/llvm/Target/GlobalISel/Combine.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -694,6 +694,7 @@ def constant_fold_fabs : constant_fold_unary_fp_op_rule<G_FABS>;
694694
def constant_fold_fsqrt : constant_fold_unary_fp_op_rule<G_FSQRT>;
695695
def constant_fold_flog2 : constant_fold_unary_fp_op_rule<G_FLOG2>;
696696
def constant_fold_fptrunc : constant_fold_unary_fp_op_rule<G_FPTRUNC>;
697+
def constant_fold_fpext : constant_fold_unary_fp_op_rule<G_FPEXT>;
697698

698699
// Fold constant zero int to fp conversions.
699700
class itof_const_zero_fold_rule<Instruction opcode> : GICombineRule <
@@ -712,6 +713,7 @@ def constant_fold_fp_ops : GICombineGroup<[
712713
constant_fold_fsqrt,
713714
constant_fold_flog2,
714715
constant_fold_fptrunc,
716+
constant_fold_fpext,
715717
itof_const_zero_fold_si,
716718
itof_const_zero_fold_ui
717719
]>;

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1728,6 +1728,7 @@ static APFloat constantFoldFpUnary(const MachineInstr &MI,
17281728
Result.clearSign();
17291729
return Result;
17301730
}
1731+
case TargetOpcode::G_FPEXT:
17311732
case TargetOpcode::G_FPTRUNC: {
17321733
bool Unused;
17331734
LLT DstTy = MRI.getType(MI.getOperand(0).getReg());

llvm/lib/Target/AArch64/AArch64Combine.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -351,7 +351,7 @@ def AArch64PostLegalizerLowering
351351
// Post-legalization combines which are primarily optimizations.
352352
def AArch64PostLegalizerCombiner
353353
: GICombiner<"AArch64PostLegalizerCombinerImpl",
354-
[copy_prop, cast_of_cast_combines,
354+
[copy_prop, cast_of_cast_combines, constant_fold_fp_ops,
355355
buildvector_of_truncate, integer_of_truncate,
356356
mutate_anyext_to_zext, combines_for_extload,
357357
combine_indexed_load_store, sext_trunc_sextload,

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -678,8 +678,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
678678
.widenScalarToNextPow2(0)
679679
.clampScalar(0, s8, s64);
680680
getActionDefinitionsBuilder(G_FCONSTANT)
681-
.legalFor({s32, s64, s128})
682-
.legalFor(HasFP16, {s16})
681+
// Always legalize S16 to prevent G_FCONSTANT being widened to G_CONSTANT
682+
.legalFor({s16, s32, s64, s128})
683683
.clampScalar(0, MinFPScalar, s128);
684684

685685
// FIXME: fix moreElementsToNextPow2

llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,9 @@ body: |
4848
; CHECK-NEXT: $w0 = COPY [[C]](s32)
4949
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00
5050
; CHECK-NEXT: $x0 = COPY [[C1]](s64)
51-
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
52-
; CHECK-NEXT: $w0 = COPY [[C2]](s32)
51+
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000
52+
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C2]](s16)
53+
; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
5354
%0:_(s32) = G_FCONSTANT float 1.0
5455
$w0 = COPY %0
5556
%1:_(s64) = G_FCONSTANT double 2.0

llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ tracksRegLiveness: true
88
body: |
99
bb.0:
1010
; NO-FP16-LABEL: name: fp16
11-
; NO-FP16: %cst:_(s16) = G_CONSTANT i16 0
11+
; NO-FP16: %cst:_(s16) = G_FCONSTANT half 0xH0000
1212
; NO-FP16-NEXT: $h0 = COPY %cst(s16)
1313
; NO-FP16-NEXT: RET_ReallyLR implicit $h0
1414
;
@@ -26,7 +26,7 @@ tracksRegLiveness: true
2626
body: |
2727
bb.0:
2828
; NO-FP16-LABEL: name: fp16_non_zero
29-
; NO-FP16: %cst:_(s16) = G_CONSTANT i16 16384
29+
; NO-FP16: %cst:_(s16) = G_FCONSTANT half 0xH4000
3030
; NO-FP16-NEXT: $h0 = COPY %cst(s16)
3131
; NO-FP16-NEXT: RET_ReallyLR implicit $h0
3232
;
@@ -44,7 +44,7 @@ tracksRegLiveness: true
4444
body: |
4545
bb.1.entry:
4646
; NO-FP16-LABEL: name: nan
47-
; NO-FP16: %cst:_(s16) = G_CONSTANT i16 31745
47+
; NO-FP16: %cst:_(s16) = G_FCONSTANT half 0xH7C01
4848
; NO-FP16-NEXT: %ext:_(s32) = G_FPEXT %cst(s16)
4949
; NO-FP16-NEXT: $w0 = COPY %ext(s32)
5050
; NO-FP16-NEXT: RET_ReallyLR implicit $w0

llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -739,15 +739,12 @@ define ptr @postidx32_shalf(ptr %src, ptr %out, half %a) {
739739
;
740740
; GISEL-LABEL: postidx32_shalf:
741741
; GISEL: ; %bb.0:
742-
; GISEL-NEXT: mov w8, #0 ; =0x0
743742
; GISEL-NEXT: ldr h1, [x0], #4
744-
; GISEL-NEXT: fmov s2, w8
745743
; GISEL-NEXT: ; kill: def $h0 killed $h0 def $s0
746744
; GISEL-NEXT: fmov w9, s0
747-
; GISEL-NEXT: fcvt s3, h1
745+
; GISEL-NEXT: fcvt s2, h1
748746
; GISEL-NEXT: fmov w8, s1
749-
; GISEL-NEXT: fcvt s2, h2
750-
; GISEL-NEXT: fcmp s3, s2
747+
; GISEL-NEXT: fcmp s2, #0.0
751748
; GISEL-NEXT: csel w8, w8, w9, mi
752749
; GISEL-NEXT: strh w8, [x1]
753750
; GISEL-NEXT: ret

llvm/test/CodeGen/AArch64/dup.ll

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1469,8 +1469,9 @@ define <2 x half> @loaddup_str_v2half(ptr %p) {
14691469
; CHECK-GI-LABEL: loaddup_str_v2half:
14701470
; CHECK-GI: // %bb.0: // %entry
14711471
; CHECK-GI-NEXT: ldr h0, [x0]
1472-
; CHECK-GI-NEXT: strh wzr, [x0]
1472+
; CHECK-GI-NEXT: movi d1, #0000000000000000
14731473
; CHECK-GI-NEXT: dup v0.4h, v0.h[0]
1474+
; CHECK-GI-NEXT: str h1, [x0]
14741475
; CHECK-GI-NEXT: ret
14751476
entry:
14761477
%a = load half, ptr %p
@@ -1526,8 +1527,9 @@ define <3 x half> @loaddup_str_v3half(ptr %p) {
15261527
; CHECK-GI-LABEL: loaddup_str_v3half:
15271528
; CHECK-GI: // %bb.0: // %entry
15281529
; CHECK-GI-NEXT: ldr h0, [x0]
1529-
; CHECK-GI-NEXT: strh wzr, [x0]
1530+
; CHECK-GI-NEXT: movi d1, #0000000000000000
15301531
; CHECK-GI-NEXT: dup v0.4h, v0.h[0]
1532+
; CHECK-GI-NEXT: str h1, [x0]
15311533
; CHECK-GI-NEXT: ret
15321534
entry:
15331535
%a = load half, ptr %p
@@ -1583,8 +1585,9 @@ define <4 x half> @loaddup_str_v4half(ptr %p) {
15831585
; CHECK-GI-LABEL: loaddup_str_v4half:
15841586
; CHECK-GI: // %bb.0: // %entry
15851587
; CHECK-GI-NEXT: ldr h0, [x0]
1586-
; CHECK-GI-NEXT: strh wzr, [x0]
1588+
; CHECK-GI-NEXT: movi d1, #0000000000000000
15871589
; CHECK-GI-NEXT: dup v0.4h, v0.h[0]
1590+
; CHECK-GI-NEXT: str h1, [x0]
15881591
; CHECK-GI-NEXT: ret
15891592
entry:
15901593
%a = load half, ptr %p
@@ -1639,8 +1642,9 @@ define <8 x half> @loaddup_str_v8half(ptr %p) {
16391642
; CHECK-GI-LABEL: loaddup_str_v8half:
16401643
; CHECK-GI: // %bb.0: // %entry
16411644
; CHECK-GI-NEXT: ldr h0, [x0]
1642-
; CHECK-GI-NEXT: strh wzr, [x0]
1645+
; CHECK-GI-NEXT: movi d1, #0000000000000000
16431646
; CHECK-GI-NEXT: dup v0.8h, v0.h[0]
1647+
; CHECK-GI-NEXT: str h1, [x0]
16441648
; CHECK-GI-NEXT: ret
16451649
entry:
16461650
%a = load half, ptr %p
@@ -1713,9 +1717,10 @@ define <16 x half> @loaddup_str_v16half(ptr %p) {
17131717
; CHECK-GI-LABEL: loaddup_str_v16half:
17141718
; CHECK-GI: // %bb.0: // %entry
17151719
; CHECK-GI-NEXT: ldr h1, [x0]
1716-
; CHECK-GI-NEXT: strh wzr, [x0]
1720+
; CHECK-GI-NEXT: movi d2, #0000000000000000
17171721
; CHECK-GI-NEXT: dup v0.8h, v1.h[0]
17181722
; CHECK-GI-NEXT: dup v1.8h, v1.h[0]
1723+
; CHECK-GI-NEXT: str h2, [x0]
17191724
; CHECK-GI-NEXT: ret
17201725
entry:
17211726
%a = load half, ptr %p
@@ -1771,8 +1776,9 @@ define <2 x bfloat> @loaddup_str_v2bfloat(ptr %p) {
17711776
; CHECK-GI-LABEL: loaddup_str_v2bfloat:
17721777
; CHECK-GI: // %bb.0: // %entry
17731778
; CHECK-GI-NEXT: ldr h0, [x0]
1774-
; CHECK-GI-NEXT: strh wzr, [x0]
1779+
; CHECK-GI-NEXT: movi d1, #0000000000000000
17751780
; CHECK-GI-NEXT: dup v0.4h, v0.h[0]
1781+
; CHECK-GI-NEXT: str h1, [x0]
17761782
; CHECK-GI-NEXT: ret
17771783
entry:
17781784
%a = load bfloat, ptr %p
@@ -1828,8 +1834,9 @@ define <3 x bfloat> @loaddup_str_v3bfloat(ptr %p) {
18281834
; CHECK-GI-LABEL: loaddup_str_v3bfloat:
18291835
; CHECK-GI: // %bb.0: // %entry
18301836
; CHECK-GI-NEXT: ldr h0, [x0]
1831-
; CHECK-GI-NEXT: strh wzr, [x0]
1837+
; CHECK-GI-NEXT: movi d1, #0000000000000000
18321838
; CHECK-GI-NEXT: dup v0.4h, v0.h[0]
1839+
; CHECK-GI-NEXT: str h1, [x0]
18331840
; CHECK-GI-NEXT: ret
18341841
entry:
18351842
%a = load bfloat, ptr %p
@@ -1885,8 +1892,9 @@ define <4 x bfloat> @loaddup_str_v4bfloat(ptr %p) {
18851892
; CHECK-GI-LABEL: loaddup_str_v4bfloat:
18861893
; CHECK-GI: // %bb.0: // %entry
18871894
; CHECK-GI-NEXT: ldr h0, [x0]
1888-
; CHECK-GI-NEXT: strh wzr, [x0]
1895+
; CHECK-GI-NEXT: movi d1, #0000000000000000
18891896
; CHECK-GI-NEXT: dup v0.4h, v0.h[0]
1897+
; CHECK-GI-NEXT: str h1, [x0]
18901898
; CHECK-GI-NEXT: ret
18911899
entry:
18921900
%a = load bfloat, ptr %p
@@ -1941,8 +1949,9 @@ define <8 x bfloat> @loaddup_str_v8bfloat(ptr %p) {
19411949
; CHECK-GI-LABEL: loaddup_str_v8bfloat:
19421950
; CHECK-GI: // %bb.0: // %entry
19431951
; CHECK-GI-NEXT: ldr h0, [x0]
1944-
; CHECK-GI-NEXT: strh wzr, [x0]
1952+
; CHECK-GI-NEXT: movi d1, #0000000000000000
19451953
; CHECK-GI-NEXT: dup v0.8h, v0.h[0]
1954+
; CHECK-GI-NEXT: str h1, [x0]
19461955
; CHECK-GI-NEXT: ret
19471956
entry:
19481957
%a = load bfloat, ptr %p
@@ -2015,9 +2024,10 @@ define <16 x bfloat> @loaddup_str_v16bfloat(ptr %p) {
20152024
; CHECK-GI-LABEL: loaddup_str_v16bfloat:
20162025
; CHECK-GI: // %bb.0: // %entry
20172026
; CHECK-GI-NEXT: ldr h1, [x0]
2018-
; CHECK-GI-NEXT: strh wzr, [x0]
2027+
; CHECK-GI-NEXT: movi d2, #0000000000000000
20192028
; CHECK-GI-NEXT: dup v0.8h, v1.h[0]
20202029
; CHECK-GI-NEXT: dup v1.8h, v1.h[0]
2030+
; CHECK-GI-NEXT: str h2, [x0]
20212031
; CHECK-GI-NEXT: ret
20222032
entry:
20232033
%a = load bfloat, ptr %p

llvm/test/CodeGen/AArch64/f16-instructions.ll

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -782,18 +782,17 @@ define void @test_fccmp(half %in, ptr %out) {
782782
;
783783
; CHECK-CVT-GI-LABEL: test_fccmp:
784784
; CHECK-CVT-GI: // %bb.0:
785-
; CHECK-CVT-GI-NEXT: mov w8, #17664 // =0x4500
786-
; CHECK-CVT-GI-NEXT: mov w9, #18432 // =0x4800
787785
; CHECK-CVT-GI-NEXT: // kill: def $h0 killed $h0 def $s0
788-
; CHECK-CVT-GI-NEXT: fcvt s2, h0
789-
; CHECK-CVT-GI-NEXT: fmov s1, w8
790-
; CHECK-CVT-GI-NEXT: fmov s3, w9
791-
; CHECK-CVT-GI-NEXT: fmov w9, s0
792-
; CHECK-CVT-GI-NEXT: fcvt s1, h1
793-
; CHECK-CVT-GI-NEXT: fcvt s3, h3
794-
; CHECK-CVT-GI-NEXT: fcmp s2, s1
795-
; CHECK-CVT-GI-NEXT: fccmp s2, s3, #4, mi
796-
; CHECK-CVT-GI-NEXT: csel w8, w9, w8, gt
786+
; CHECK-CVT-GI-NEXT: fcvt s1, h0
787+
; CHECK-CVT-GI-NEXT: fmov s2, #5.00000000
788+
; CHECK-CVT-GI-NEXT: adrp x8, .LCPI29_0
789+
; CHECK-CVT-GI-NEXT: fmov s3, #8.00000000
790+
; CHECK-CVT-GI-NEXT: fcmp s1, s2
791+
; CHECK-CVT-GI-NEXT: ldr h2, [x8, :lo12:.LCPI29_0]
792+
; CHECK-CVT-GI-NEXT: fmov w8, s0
793+
; CHECK-CVT-GI-NEXT: fmov w9, s2
794+
; CHECK-CVT-GI-NEXT: fccmp s1, s3, #4, mi
795+
; CHECK-CVT-GI-NEXT: csel w8, w8, w9, gt
797796
; CHECK-CVT-GI-NEXT: strh w8, [x0]
798797
; CHECK-CVT-GI-NEXT: ret
799798
;

0 commit comments

Comments
 (0)