Skip to content

Commit cac5bfa

Browse files
[AArch64][GlobalISel] Legalize s16 G_FCONSTANT to avoid widening to G_CONSTANT (#161205)
When widening a `G_FCONSTANT` it is converted to a `G_CONSTANT` to avoid loss in accuracy (see #56454). This means that some folds such as `G_FPEXT(G_FCONSTANT)` fail to work when the scalar has been widened. This PR legalizes `s16`s by default in line with how s16 `G_CONSTANT`s are treated.
1 parent df3de13 commit cac5bfa

13 files changed

+153
-156
lines changed

llvm/lib/Target/AArch64/AArch64Combine.td

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,6 @@ def push_mul_through_sext : push_opcode_through_ext<G_MUL, G_SEXT>;
6969

7070
def AArch64PreLegalizerCombiner: GICombiner<
7171
"AArch64PreLegalizerCombinerImpl", [all_combines,
72-
fconstant_to_constant,
7372
icmp_redundant_trunc,
7473
fold_global_offset,
7574
shuffle_to_extract,
@@ -341,7 +340,7 @@ def AArch64PostLegalizerLowering
341340
: GICombiner<"AArch64PostLegalizerLoweringImpl",
342341
[shuffle_vector_lowering, vashr_vlshr_imm,
343342
icmp_lowering, build_vector_lowering,
344-
lower_vector_fcmp, form_truncstore,
343+
lower_vector_fcmp, form_truncstore, fconstant_to_constant,
345344
vector_sext_inreg_to_shift,
346345
unmerge_ext_to_unmerge, lower_mulv2s64,
347346
vector_unmerge_lowering, insertelt_nonconst,

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -678,8 +678,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
678678
.widenScalarToNextPow2(0)
679679
.clampScalar(0, s8, s64);
680680
getActionDefinitionsBuilder(G_FCONSTANT)
681-
.legalFor({s32, s64, s128})
682-
.legalFor(HasFP16, {s16})
681+
// Always legalize s16 to prevent G_FCONSTANT being widened to G_CONSTANT
682+
.legalFor({s16, s32, s64, s128})
683683
.clampScalar(0, MinFPScalar, s128);
684684

685685
// FIXME: fix moreElementsToNextPow2

llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,31 @@ struct ShuffleVectorPseudo {
7575
ShuffleVectorPseudo() = default;
7676
};
7777

78+
/// Return true if a G_FCONSTANT instruction is known to be better-represented
79+
/// as a G_CONSTANT.
80+
bool matchFConstantToConstant(MachineInstr &MI, MachineRegisterInfo &MRI) {
81+
assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT);
82+
Register DstReg = MI.getOperand(0).getReg();
83+
const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
84+
if (DstSize != 16 && DstSize != 32 && DstSize != 64)
85+
return false;
86+
87+
// When we're storing a value, it doesn't matter what register bank it's on.
88+
// Since not all floating point constants can be materialized using a fmov,
89+
// it makes more sense to just use a GPR.
90+
return all_of(MRI.use_nodbg_instructions(DstReg),
91+
[](const MachineInstr &Use) { return Use.mayStore(); });
92+
}
93+
94+
/// Change a G_FCONSTANT into a G_CONSTANT.
95+
void applyFConstantToConstant(MachineInstr &MI) {
96+
assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT);
97+
MachineIRBuilder MIB(MI);
98+
const APFloat &ImmValAPF = MI.getOperand(1).getFPImm()->getValueAPF();
99+
MIB.buildConstant(MI.getOperand(0).getReg(), ImmValAPF.bitcastToAPInt());
100+
MI.eraseFromParent();
101+
}
102+
78103
/// Check if a G_EXT instruction can handle a shuffle mask \p M when the vector
79104
/// sources of the shuffle are different.
80105
std::optional<std::pair<bool, uint64_t>> getExtMask(ArrayRef<int> M,

llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -44,31 +44,6 @@ namespace {
4444
#include "AArch64GenPreLegalizeGICombiner.inc"
4545
#undef GET_GICOMBINER_TYPES
4646

47-
/// Return true if a G_FCONSTANT instruction is known to be better-represented
48-
/// as a G_CONSTANT.
49-
bool matchFConstantToConstant(MachineInstr &MI, MachineRegisterInfo &MRI) {
50-
assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT);
51-
Register DstReg = MI.getOperand(0).getReg();
52-
const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
53-
if (DstSize != 32 && DstSize != 64)
54-
return false;
55-
56-
// When we're storing a value, it doesn't matter what register bank it's on.
57-
// Since not all floating point constants can be materialized using a fmov,
58-
// it makes more sense to just use a GPR.
59-
return all_of(MRI.use_nodbg_instructions(DstReg),
60-
[](const MachineInstr &Use) { return Use.mayStore(); });
61-
}
62-
63-
/// Change a G_FCONSTANT into a G_CONSTANT.
64-
void applyFConstantToConstant(MachineInstr &MI) {
65-
assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT);
66-
MachineIRBuilder MIB(MI);
67-
const APFloat &ImmValAPF = MI.getOperand(1).getFPImm()->getValueAPF();
68-
MIB.buildConstant(MI.getOperand(0).getReg(), ImmValAPF.bitcastToAPInt());
69-
MI.eraseFromParent();
70-
}
71-
7247
/// Try to match a G_ICMP of a G_TRUNC with zero, in which the truncated bits
7348
/// are sign bits. In this case, we can transform the G_ICMP to directly compare
7449
/// the wide value with a zero.

llvm/test/CodeGen/AArch64/GlobalISel/combine-fconstant.mir

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2-
# RUN: llc -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s
3-
# RUN: llc -debugify-and-strip-all-safe -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s
2+
# RUN: llc -run-pass=aarch64-postlegalizer-lowering -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s
3+
# RUN: llc -debugify-and-strip-all-safe -run-pass=aarch64-postlegalizer-lowering -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s
44
...
55
---
66
name: fconstant_to_constant_s32
77
alignment: 4
88
tracksRegLiveness: true
9+
legalized: true
910
frameInfo:
1011
maxAlignment: 1
1112
machineFunctionInfo: {}
@@ -24,16 +25,17 @@ body: |
2425
; CHECK-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p0) :: (store (s32))
2526
; CHECK-NEXT: RET_ReallyLR
2627
%0:_(p0) = COPY $x0
27-
%3:_(s32) = G_FCONSTANT float 0x3FA99999A0000000
28-
%1:_(s64) = G_CONSTANT i64 524
29-
%2:_(p0) = G_PTR_ADD %0, %1(s64)
30-
G_STORE %3(s32), %2(p0) :: (store (s32))
28+
%1:_(s32) = G_FCONSTANT float 0x3FA99999A0000000
29+
%2:_(s64) = G_CONSTANT i64 524
30+
%3:_(p0) = G_PTR_ADD %0, %2(s64)
31+
G_STORE %1(s32), %3(p0) :: (store (s32))
3132
RET_ReallyLR
3233
...
3334
---
3435
name: fconstant_to_constant_s64
3536
alignment: 4
3637
tracksRegLiveness: true
38+
legalized: true
3739
frameInfo:
3840
maxAlignment: 1
3941
machineFunctionInfo: {}
@@ -48,14 +50,15 @@ body: |
4850
; CHECK-NEXT: G_STORE %c(s64), %ptr(p0) :: (store (s64))
4951
; CHECK-NEXT: RET_ReallyLR
5052
%ptr:_(p0) = COPY $x0
51-
%c:_(s64) = G_FCONSTANT double 0.0
53+
%c:_(s64) = G_FCONSTANT double 0.000000e+00
5254
G_STORE %c(s64), %ptr(p0) :: (store (s64))
5355
RET_ReallyLR
5456
...
5557
---
5658
name: no_store_means_no_combine
5759
alignment: 4
5860
tracksRegLiveness: true
61+
legalized: true
5962
frameInfo:
6063
maxAlignment: 1
6164
machineFunctionInfo: {}
@@ -71,7 +74,7 @@ body: |
7174
; CHECK-NEXT: %add:_(s64) = G_FADD %v, %c
7275
; CHECK-NEXT: RET_ReallyLR implicit %add(s64)
7376
%v:_(s64) = COPY $x0
74-
%c:_(s64) = G_FCONSTANT double 0.0
77+
%c:_(s64) = G_FCONSTANT double 0.000000e+00
7578
%add:_(s64) = G_FADD %v, %c
76-
RET_ReallyLR implicit %add
79+
RET_ReallyLR implicit %add(s64)
7780
...

llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,9 @@ body: |
4848
; CHECK-NEXT: $w0 = COPY [[C]](s32)
4949
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00
5050
; CHECK-NEXT: $x0 = COPY [[C1]](s64)
51-
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
52-
; CHECK-NEXT: $w0 = COPY [[C2]](s32)
51+
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000
52+
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C2]](s16)
53+
; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
5354
%0:_(s32) = G_FCONSTANT float 1.0
5455
$w0 = COPY %0
5556
%1:_(s64) = G_FCONSTANT double 2.0

llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ tracksRegLiveness: true
88
body: |
99
bb.0:
1010
; NO-FP16-LABEL: name: fp16
11-
; NO-FP16: %cst:_(s16) = G_CONSTANT i16 0
11+
; NO-FP16: %cst:_(s16) = G_FCONSTANT half 0xH0000
1212
; NO-FP16-NEXT: $h0 = COPY %cst(s16)
1313
; NO-FP16-NEXT: RET_ReallyLR implicit $h0
1414
;
@@ -26,7 +26,7 @@ tracksRegLiveness: true
2626
body: |
2727
bb.0:
2828
; NO-FP16-LABEL: name: fp16_non_zero
29-
; NO-FP16: %cst:_(s16) = G_CONSTANT i16 16384
29+
; NO-FP16: %cst:_(s16) = G_FCONSTANT half 0xH4000
3030
; NO-FP16-NEXT: $h0 = COPY %cst(s16)
3131
; NO-FP16-NEXT: RET_ReallyLR implicit $h0
3232
;
@@ -44,7 +44,7 @@ tracksRegLiveness: true
4444
body: |
4545
bb.1.entry:
4646
; NO-FP16-LABEL: name: nan
47-
; NO-FP16: %cst:_(s16) = G_CONSTANT i16 31745
47+
; NO-FP16: %cst:_(s16) = G_FCONSTANT half 0xH7C01
4848
; NO-FP16-NEXT: %ext:_(s32) = G_FPEXT %cst(s16)
4949
; NO-FP16-NEXT: $w0 = COPY %ext(s32)
5050
; NO-FP16-NEXT: RET_ReallyLR implicit $w0

llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -739,15 +739,14 @@ define ptr @postidx32_shalf(ptr %src, ptr %out, half %a) {
739739
;
740740
; GISEL-LABEL: postidx32_shalf:
741741
; GISEL: ; %bb.0:
742-
; GISEL-NEXT: mov w8, #0 ; =0x0
743-
; GISEL-NEXT: ldr h1, [x0], #4
744-
; GISEL-NEXT: fmov s2, w8
742+
; GISEL-NEXT: movi d1, #0000000000000000
743+
; GISEL-NEXT: ldr h2, [x0], #4
745744
; GISEL-NEXT: ; kill: def $h0 killed $h0 def $s0
746745
; GISEL-NEXT: fmov w9, s0
747-
; GISEL-NEXT: fcvt s3, h1
748-
; GISEL-NEXT: fmov w8, s1
749-
; GISEL-NEXT: fcvt s2, h2
750-
; GISEL-NEXT: fcmp s3, s2
746+
; GISEL-NEXT: fcvt s3, h2
747+
; GISEL-NEXT: fmov w8, s2
748+
; GISEL-NEXT: fcvt s1, h1
749+
; GISEL-NEXT: fcmp s3, s1
751750
; GISEL-NEXT: csel w8, w8, w9, mi
752751
; GISEL-NEXT: strh w8, [x1]
753752
; GISEL-NEXT: ret

llvm/test/CodeGen/AArch64/f16-instructions.ll

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -782,18 +782,19 @@ define void @test_fccmp(half %in, ptr %out) {
782782
;
783783
; CHECK-CVT-GI-LABEL: test_fccmp:
784784
; CHECK-CVT-GI: // %bb.0:
785-
; CHECK-CVT-GI-NEXT: mov w8, #17664 // =0x4500
786-
; CHECK-CVT-GI-NEXT: mov w9, #18432 // =0x4800
785+
; CHECK-CVT-GI-NEXT: adrp x8, .LCPI29_0
787786
; CHECK-CVT-GI-NEXT: // kill: def $h0 killed $h0 def $s0
788787
; CHECK-CVT-GI-NEXT: fcvt s2, h0
789-
; CHECK-CVT-GI-NEXT: fmov s1, w8
790-
; CHECK-CVT-GI-NEXT: fmov s3, w9
791-
; CHECK-CVT-GI-NEXT: fmov w9, s0
792-
; CHECK-CVT-GI-NEXT: fcvt s1, h1
793-
; CHECK-CVT-GI-NEXT: fcvt s3, h3
794-
; CHECK-CVT-GI-NEXT: fcmp s2, s1
795-
; CHECK-CVT-GI-NEXT: fccmp s2, s3, #4, mi
796-
; CHECK-CVT-GI-NEXT: csel w8, w9, w8, gt
788+
; CHECK-CVT-GI-NEXT: ldr h1, [x8, :lo12:.LCPI29_0]
789+
; CHECK-CVT-GI-NEXT: adrp x8, .LCPI29_1
790+
; CHECK-CVT-GI-NEXT: ldr h4, [x8, :lo12:.LCPI29_1]
791+
; CHECK-CVT-GI-NEXT: fmov w8, s0
792+
; CHECK-CVT-GI-NEXT: fcvt s3, h1
793+
; CHECK-CVT-GI-NEXT: fmov w9, s1
794+
; CHECK-CVT-GI-NEXT: fcvt s4, h4
795+
; CHECK-CVT-GI-NEXT: fcmp s2, s3
796+
; CHECK-CVT-GI-NEXT: fccmp s2, s4, #4, mi
797+
; CHECK-CVT-GI-NEXT: csel w8, w8, w9, gt
797798
; CHECK-CVT-GI-NEXT: strh w8, [x0]
798799
; CHECK-CVT-GI-NEXT: ret
799800
;

0 commit comments

Comments
 (0)