Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions llvm/lib/Target/AArch64/AArch64Combine.td
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,6 @@ def push_mul_through_sext : push_opcode_through_ext<G_MUL, G_SEXT>;

def AArch64PreLegalizerCombiner: GICombiner<
"AArch64PreLegalizerCombinerImpl", [all_combines,
fconstant_to_constant,
icmp_redundant_trunc,
fold_global_offset,
shuffle_to_extract,
Expand Down Expand Up @@ -341,7 +340,7 @@ def AArch64PostLegalizerLowering
: GICombiner<"AArch64PostLegalizerLoweringImpl",
[shuffle_vector_lowering, vashr_vlshr_imm,
icmp_lowering, build_vector_lowering,
lower_vector_fcmp, form_truncstore,
lower_vector_fcmp, form_truncstore, fconstant_to_constant,
vector_sext_inreg_to_shift,
unmerge_ext_to_unmerge, lower_mulv2s64,
vector_unmerge_lowering, insertelt_nonconst,
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -678,8 +678,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.widenScalarToNextPow2(0)
.clampScalar(0, s8, s64);
getActionDefinitionsBuilder(G_FCONSTANT)
.legalFor({s32, s64, s128})
.legalFor(HasFP16, {s16})
// Always legalize s16 to prevent G_FCONSTANT being widened to G_CONSTANT
.legalFor({s16, s32, s64, s128})
.clampScalar(0, MinFPScalar, s128);

// FIXME: fix moreElementsToNextPow2
Expand Down
25 changes: 25 additions & 0 deletions llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,31 @@ struct ShuffleVectorPseudo {
ShuffleVectorPseudo() = default;
};

/// Return true if a G_FCONSTANT instruction is known to be better-represented
/// as a G_CONSTANT.
bool matchFConstantToConstant(MachineInstr &MI, MachineRegisterInfo &MRI) {
assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT);
Register DstReg = MI.getOperand(0).getReg();
const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
if (DstSize != 16 && DstSize != 32 && DstSize != 64)
return false;

// When we're storing a value, it doesn't matter what register bank it's on.
// Since not all floating point constants can be materialized using a fmov,
// it makes more sense to just use a GPR.
return all_of(MRI.use_nodbg_instructions(DstReg),
[](const MachineInstr &Use) { return Use.mayStore(); });
}

/// Change a G_FCONSTANT into a G_CONSTANT.
void applyFConstantToConstant(MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT);
MachineIRBuilder MIB(MI);
const APFloat &ImmValAPF = MI.getOperand(1).getFPImm()->getValueAPF();
MIB.buildConstant(MI.getOperand(0).getReg(), ImmValAPF.bitcastToAPInt());
MI.eraseFromParent();
}

/// Check if a G_EXT instruction can handle a shuffle mask \p M when the vector
/// sources of the shuffle are different.
std::optional<std::pair<bool, uint64_t>> getExtMask(ArrayRef<int> M,
Expand Down
25 changes: 0 additions & 25 deletions llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,31 +44,6 @@ namespace {
#include "AArch64GenPreLegalizeGICombiner.inc"
#undef GET_GICOMBINER_TYPES

/// Return true if a G_FCONSTANT instruction is known to be better-represented
/// as a G_CONSTANT.
bool matchFConstantToConstant(MachineInstr &MI, MachineRegisterInfo &MRI) {
assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT);
Register DstReg = MI.getOperand(0).getReg();
const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
if (DstSize != 32 && DstSize != 64)
return false;

// When we're storing a value, it doesn't matter what register bank it's on.
// Since not all floating point constants can be materialized using a fmov,
// it makes more sense to just use a GPR.
return all_of(MRI.use_nodbg_instructions(DstReg),
[](const MachineInstr &Use) { return Use.mayStore(); });
}

/// Change a G_FCONSTANT into a G_CONSTANT.
void applyFConstantToConstant(MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT);
MachineIRBuilder MIB(MI);
const APFloat &ImmValAPF = MI.getOperand(1).getFPImm()->getValueAPF();
MIB.buildConstant(MI.getOperand(0).getReg(), ImmValAPF.bitcastToAPInt());
MI.eraseFromParent();
}

/// Try to match a G_ICMP of a G_TRUNC with zero, in which the truncated bits
/// are sign bits. In this case, we can transform the G_ICMP to directly compare
/// the wide value with a zero.
Expand Down
21 changes: 12 additions & 9 deletions llvm/test/CodeGen/AArch64/GlobalISel/combine-fconstant.mir
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s
# RUN: llc -debugify-and-strip-all-safe -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s
# RUN: llc -run-pass=aarch64-postlegalizer-lowering -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s
# RUN: llc -debugify-and-strip-all-safe -run-pass=aarch64-postlegalizer-lowering -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s
...
---
name: fconstant_to_constant_s32
alignment: 4
tracksRegLiveness: true
legalized: true
frameInfo:
maxAlignment: 1
machineFunctionInfo: {}
Expand All @@ -24,16 +25,17 @@ body: |
; CHECK-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p0) :: (store (s32))
; CHECK-NEXT: RET_ReallyLR
%0:_(p0) = COPY $x0
%3:_(s32) = G_FCONSTANT float 0x3FA99999A0000000
%1:_(s64) = G_CONSTANT i64 524
%2:_(p0) = G_PTR_ADD %0, %1(s64)
G_STORE %3(s32), %2(p0) :: (store (s32))
%1:_(s32) = G_FCONSTANT float 0x3FA99999A0000000
%2:_(s64) = G_CONSTANT i64 524
%3:_(p0) = G_PTR_ADD %0, %2(s64)
G_STORE %1(s32), %3(p0) :: (store (s32))
RET_ReallyLR
...
---
name: fconstant_to_constant_s64
alignment: 4
tracksRegLiveness: true
legalized: true
frameInfo:
maxAlignment: 1
machineFunctionInfo: {}
Expand All @@ -48,14 +50,15 @@ body: |
; CHECK-NEXT: G_STORE %c(s64), %ptr(p0) :: (store (s64))
; CHECK-NEXT: RET_ReallyLR
%ptr:_(p0) = COPY $x0
%c:_(s64) = G_FCONSTANT double 0.0
%c:_(s64) = G_FCONSTANT double 0.000000e+00
G_STORE %c(s64), %ptr(p0) :: (store (s64))
RET_ReallyLR
...
---
name: no_store_means_no_combine
alignment: 4
tracksRegLiveness: true
legalized: true
frameInfo:
maxAlignment: 1
machineFunctionInfo: {}
Expand All @@ -71,7 +74,7 @@ body: |
; CHECK-NEXT: %add:_(s64) = G_FADD %v, %c
; CHECK-NEXT: RET_ReallyLR implicit %add(s64)
%v:_(s64) = COPY $x0
%c:_(s64) = G_FCONSTANT double 0.0
%c:_(s64) = G_FCONSTANT double 0.000000e+00
%add:_(s64) = G_FADD %v, %c
RET_ReallyLR implicit %add
RET_ReallyLR implicit %add(s64)
...
5 changes: 3 additions & 2 deletions llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,9 @@ body: |
; CHECK-NEXT: $w0 = COPY [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00
; CHECK-NEXT: $x0 = COPY [[C1]](s64)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: $w0 = COPY [[C2]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C2]](s16)
; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
%0:_(s32) = G_FCONSTANT float 1.0
$w0 = COPY %0
%1:_(s64) = G_FCONSTANT double 2.0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ tracksRegLiveness: true
body: |
bb.0:
; NO-FP16-LABEL: name: fp16
; NO-FP16: %cst:_(s16) = G_CONSTANT i16 0
; NO-FP16: %cst:_(s16) = G_FCONSTANT half 0xH0000
; NO-FP16-NEXT: $h0 = COPY %cst(s16)
; NO-FP16-NEXT: RET_ReallyLR implicit $h0
;
Expand All @@ -26,7 +26,7 @@ tracksRegLiveness: true
body: |
bb.0:
; NO-FP16-LABEL: name: fp16_non_zero
; NO-FP16: %cst:_(s16) = G_CONSTANT i16 16384
; NO-FP16: %cst:_(s16) = G_FCONSTANT half 0xH4000
; NO-FP16-NEXT: $h0 = COPY %cst(s16)
; NO-FP16-NEXT: RET_ReallyLR implicit $h0
;
Expand All @@ -44,7 +44,7 @@ tracksRegLiveness: true
body: |
bb.1.entry:
; NO-FP16-LABEL: name: nan
; NO-FP16: %cst:_(s16) = G_CONSTANT i16 31745
; NO-FP16: %cst:_(s16) = G_FCONSTANT half 0xH7C01
; NO-FP16-NEXT: %ext:_(s32) = G_FPEXT %cst(s16)
; NO-FP16-NEXT: $w0 = COPY %ext(s32)
; NO-FP16-NEXT: RET_ReallyLR implicit $w0
Expand Down
13 changes: 6 additions & 7 deletions llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
Original file line number Diff line number Diff line change
Expand Up @@ -739,15 +739,14 @@ define ptr @postidx32_shalf(ptr %src, ptr %out, half %a) {
;
; GISEL-LABEL: postidx32_shalf:
; GISEL: ; %bb.0:
; GISEL-NEXT: mov w8, #0 ; =0x0
; GISEL-NEXT: ldr h1, [x0], #4
; GISEL-NEXT: fmov s2, w8
; GISEL-NEXT: movi d1, #0000000000000000
; GISEL-NEXT: ldr h2, [x0], #4
; GISEL-NEXT: ; kill: def $h0 killed $h0 def $s0
; GISEL-NEXT: fmov w9, s0
; GISEL-NEXT: fcvt s3, h1
; GISEL-NEXT: fmov w8, s1
; GISEL-NEXT: fcvt s2, h2
; GISEL-NEXT: fcmp s3, s2
; GISEL-NEXT: fcvt s3, h2
; GISEL-NEXT: fmov w8, s2
; GISEL-NEXT: fcvt s1, h1
; GISEL-NEXT: fcmp s3, s1
; GISEL-NEXT: csel w8, w8, w9, mi
; GISEL-NEXT: strh w8, [x1]
; GISEL-NEXT: ret
Expand Down
21 changes: 11 additions & 10 deletions llvm/test/CodeGen/AArch64/f16-instructions.ll
Original file line number Diff line number Diff line change
Expand Up @@ -782,18 +782,19 @@ define void @test_fccmp(half %in, ptr %out) {
;
; CHECK-CVT-GI-LABEL: test_fccmp:
; CHECK-CVT-GI: // %bb.0:
; CHECK-CVT-GI-NEXT: mov w8, #17664 // =0x4500
; CHECK-CVT-GI-NEXT: mov w9, #18432 // =0x4800
; CHECK-CVT-GI-NEXT: adrp x8, .LCPI29_0
; CHECK-CVT-GI-NEXT: // kill: def $h0 killed $h0 def $s0
; CHECK-CVT-GI-NEXT: fcvt s2, h0
; CHECK-CVT-GI-NEXT: fmov s1, w8
; CHECK-CVT-GI-NEXT: fmov s3, w9
; CHECK-CVT-GI-NEXT: fmov w9, s0
; CHECK-CVT-GI-NEXT: fcvt s1, h1
; CHECK-CVT-GI-NEXT: fcvt s3, h3
; CHECK-CVT-GI-NEXT: fcmp s2, s1
; CHECK-CVT-GI-NEXT: fccmp s2, s3, #4, mi
; CHECK-CVT-GI-NEXT: csel w8, w9, w8, gt
; CHECK-CVT-GI-NEXT: ldr h1, [x8, :lo12:.LCPI29_0]
; CHECK-CVT-GI-NEXT: adrp x8, .LCPI29_1
; CHECK-CVT-GI-NEXT: ldr h4, [x8, :lo12:.LCPI29_1]
; CHECK-CVT-GI-NEXT: fmov w8, s0
; CHECK-CVT-GI-NEXT: fcvt s3, h1
; CHECK-CVT-GI-NEXT: fmov w9, s1
; CHECK-CVT-GI-NEXT: fcvt s4, h4
; CHECK-CVT-GI-NEXT: fcmp s2, s3
; CHECK-CVT-GI-NEXT: fccmp s2, s4, #4, mi
; CHECK-CVT-GI-NEXT: csel w8, w8, w9, gt
; CHECK-CVT-GI-NEXT: strh w8, [x0]
; CHECK-CVT-GI-NEXT: ret
;
Expand Down
Loading