Skip to content

Commit af4a9c6

Browse files
[AArch64][GlobalISel] Add G_FPEXT(G_FCONSTANT) folding
1 parent 348ffe8 commit af4a9c6

File tree

18 files changed

+1787
-2298
lines changed

18 files changed

+1787
-2298
lines changed

llvm/include/llvm/Target/GlobalISel/Combine.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -695,6 +695,7 @@ def constant_fold_fabs : constant_fold_unary_fp_op_rule<G_FABS>;
695695
def constant_fold_fsqrt : constant_fold_unary_fp_op_rule<G_FSQRT>;
696696
def constant_fold_flog2 : constant_fold_unary_fp_op_rule<G_FLOG2>;
697697
def constant_fold_fptrunc : constant_fold_unary_fp_op_rule<G_FPTRUNC>;
698+
def constant_fold_fpext : constant_fold_unary_fp_op_rule<G_FPEXT>;
698699

699700
// Fold constant zero int to fp conversions.
700701
class itof_const_zero_fold_rule<Instruction opcode> : GICombineRule <
@@ -713,6 +714,7 @@ def constant_fold_fp_ops : GICombineGroup<[
713714
constant_fold_fsqrt,
714715
constant_fold_flog2,
715716
constant_fold_fptrunc,
717+
constant_fold_fpext,
716718
itof_const_zero_fold_si,
717719
itof_const_zero_fold_ui
718720
]>;

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1728,6 +1728,7 @@ static APFloat constantFoldFpUnary(const MachineInstr &MI,
17281728
Result.clearSign();
17291729
return Result;
17301730
}
1731+
case TargetOpcode::G_FPEXT:
17311732
case TargetOpcode::G_FPTRUNC: {
17321733
bool Unused;
17331734
LLT DstTy = MRI.getType(MI.getOperand(0).getReg());

llvm/lib/Target/AArch64/AArch64Combine.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -350,7 +350,7 @@ def AArch64PostLegalizerLowering
350350
// Post-legalization combines which are primarily optimizations.
351351
def AArch64PostLegalizerCombiner
352352
: GICombiner<"AArch64PostLegalizerCombinerImpl",
353-
[copy_prop, cast_of_cast_combines,
353+
[copy_prop, cast_of_cast_combines, constant_fold_fp_ops,
354354
buildvector_of_truncate, integer_of_truncate,
355355
mutate_anyext_to_zext, combines_for_extload,
356356
combine_indexed_load_store, sext_trunc_sextload,

llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -739,14 +739,12 @@ define ptr @postidx32_shalf(ptr %src, ptr %out, half %a) {
739739
;
740740
; GISEL-LABEL: postidx32_shalf:
741741
; GISEL: ; %bb.0:
742-
; GISEL-NEXT: movi d1, #0000000000000000
743-
; GISEL-NEXT: ldr h2, [x0], #4
742+
; GISEL-NEXT: ldr h1, [x0], #4
744743
; GISEL-NEXT: ; kill: def $h0 killed $h0 def $s0
745744
; GISEL-NEXT: fmov w9, s0
746-
; GISEL-NEXT: fcvt s3, h2
747-
; GISEL-NEXT: fmov w8, s2
748-
; GISEL-NEXT: fcvt s1, h1
749-
; GISEL-NEXT: fcmp s3, s1
745+
; GISEL-NEXT: fcvt s2, h1
746+
; GISEL-NEXT: fmov w8, s1
747+
; GISEL-NEXT: fcmp s2, #0.0
750748
; GISEL-NEXT: csel w8, w8, w9, mi
751749
; GISEL-NEXT: strh w8, [x1]
752750
; GISEL-NEXT: ret

llvm/test/CodeGen/AArch64/f16-instructions.ll

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -782,18 +782,16 @@ define void @test_fccmp(half %in, ptr %out) {
782782
;
783783
; CHECK-CVT-GI-LABEL: test_fccmp:
784784
; CHECK-CVT-GI: // %bb.0:
785-
; CHECK-CVT-GI-NEXT: adrp x8, .LCPI29_0
786785
; CHECK-CVT-GI-NEXT: // kill: def $h0 killed $h0 def $s0
787-
; CHECK-CVT-GI-NEXT: fcvt s2, h0
788-
; CHECK-CVT-GI-NEXT: ldr h1, [x8, :lo12:.LCPI29_0]
789-
; CHECK-CVT-GI-NEXT: adrp x8, .LCPI29_1
790-
; CHECK-CVT-GI-NEXT: ldr h4, [x8, :lo12:.LCPI29_1]
786+
; CHECK-CVT-GI-NEXT: fcvt s1, h0
787+
; CHECK-CVT-GI-NEXT: fmov s2, #5.00000000
788+
; CHECK-CVT-GI-NEXT: adrp x8, .LCPI29_0
789+
; CHECK-CVT-GI-NEXT: fmov s3, #8.00000000
790+
; CHECK-CVT-GI-NEXT: fcmp s1, s2
791+
; CHECK-CVT-GI-NEXT: ldr h2, [x8, :lo12:.LCPI29_0]
791792
; CHECK-CVT-GI-NEXT: fmov w8, s0
792-
; CHECK-CVT-GI-NEXT: fcvt s3, h1
793-
; CHECK-CVT-GI-NEXT: fmov w9, s1
794-
; CHECK-CVT-GI-NEXT: fcvt s4, h4
795-
; CHECK-CVT-GI-NEXT: fcmp s2, s3
796-
; CHECK-CVT-GI-NEXT: fccmp s2, s4, #4, mi
793+
; CHECK-CVT-GI-NEXT: fmov w9, s2
794+
; CHECK-CVT-GI-NEXT: fccmp s1, s3, #4, mi
797795
; CHECK-CVT-GI-NEXT: csel w8, w8, w9, gt
798796
; CHECK-CVT-GI-NEXT: strh w8, [x0]
799797
; CHECK-CVT-GI-NEXT: ret

0 commit comments

Comments
 (0)