Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53600,6 +53600,40 @@ static SDValue combineLRINT_LLRINT(SDNode *N, SelectionDAG &DAG,
DAG.getUNDEF(SrcVT)));
}

// Attempt to fold some (truncate (srl (add X, C1), C2)) patterns to
// (add (truncate (srl X, C2)), C1'). C1' will be smaller than C1 so we are able
// to avoid generating code with MOVABS and large constants in certain cases.
static SDValue combinei64TruncSrlAdd(SDValue N, EVT VT, SelectionDAG &DAG,
const SDLoc &DL) {
using namespace llvm::SDPatternMatch;

SDValue AddLhs;
APInt AddConst, SrlConst;
if (VT != MVT::i32 ||
!sd_match(N, m_AllOf(m_SpecificVT(MVT::i64),
m_Srl(m_OneUse(m_Add(m_Value(AddLhs),
m_ConstInt(AddConst))),
m_ConstInt(SrlConst)))))
return SDValue();

if (!SrlConst.ugt(31) || AddConst.lshr(SrlConst).shl(SrlConst) != AddConst)
return SDValue();

SDValue AddLHSSrl =
DAG.getNode(ISD::SRL, DL, MVT::i64, AddLhs, N.getOperand(1));
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, AddLHSSrl);

APInt NewAddConstVal =
(~((~AddConst).lshr(SrlConst))).trunc(VT.getSizeInBits());
SDValue NewAddConst = DAG.getConstant(NewAddConstVal, DL, VT);
SDValue NewAddNode = DAG.getNode(ISD::ADD, DL, VT, Trunc, NewAddConst);

APInt CleanupSizeConstVal = (SrlConst - 32).zextOrTrunc(VT.getSizeInBits());
SDValue CleanupSizeConst = DAG.getConstant(CleanupSizeConstVal, DL, VT);
SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewAddNode, CleanupSizeConst);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can it help if replace SHL/SRL with getAnyExtOrTrunc? I'm not sure anyext always generates the correct result though, but should help remove the movzwl

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

getAnyExtOrTrunc does indeed remove the movzwl, but I'm not sure about it's correctness. I might be mistaken, but judging by the proofs it looks like we need the higher bits to be zeroed. Both proofs explicitly zero the higher bits of the result, and this seems to be required for the transformation to be correct (https://alive2.llvm.org/ce/z/zs6NQc using trunc + zext, https://alive2.llvm.org/ce/z/F3oLAg using shl + lshr).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think they are the two sides to the same coin. If you believe MOVZ is unnecessary, then it means we can assume the high 16-bit are all zeros. But I don't know how to prove it. I used zext just because there's no anyext in the IR.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm inclined to think that the MOVZ is unnecessary. This transformation with the AND does not use it https://godbolt.org/z/a5d5vhdsb, and my previous implementation of the fold (although less general) also did not generate any MOVZs. I'll change the implementation to use anyext.

return DAG.getNode(ISD::SRL, DL, VT, Shl, CleanupSizeConst);
}

/// Attempt to pre-truncate inputs to arithmetic ops if it will simplify
/// the codegen.
/// e.g. TRUNC( BINOP( X, Y ) ) --> BINOP( TRUNC( X ), TRUNC( Y ) )
Expand Down Expand Up @@ -53645,6 +53679,9 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
if (!Src.hasOneUse())
return SDValue();

if (SDValue R = combinei64TruncSrlAdd(Src, VT, DAG, DL))
return R;

// Only support vector truncation for now.
// TODO: i64 scalar math would benefit as well.
if (!VT.isVector())
Expand Down
138 changes: 138 additions & 0 deletions llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=X64

; Test for https://github.com/llvm/llvm-project/issues/123239

define i1 @test_ult_trunc_add(i64 %x) {
; X64-LABEL: test_ult_trunc_add:
; X64: # %bb.0: # %entry
; X64-NEXT: shrq $48, %rdi
; X64-NEXT: addl $-65522, %edi # imm = 0xFFFF000E
; X64-NEXT: movzwl %di, %eax
; X64-NEXT: cmpl $3, %eax
; X64-NEXT: setb %al
; X64-NEXT: retq
entry:
%add = add i64 %x, 3940649673949184
%shr = lshr i64 %add, 48
%conv = trunc i64 %shr to i32
%res = icmp ult i32 %conv, 3
ret i1 %res
}

define i1 @test_ult_add(i64 %x) {
; X64-LABEL: test_ult_add:
; X64: # %bb.0: # %entry
; X64-NEXT: shrq $48, %rdi
; X64-NEXT: addl $-65522, %edi # imm = 0xFFFF000E
; X64-NEXT: movzwl %di, %eax
; X64-NEXT: cmpl $3, %eax
; X64-NEXT: setb %al
; X64-NEXT: retq
entry:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(style) remove entry: and avoid numbered variables

%0 = add i64 3940649673949184, %x
%1 = icmp ult i64 %0, 844424930131968
ret i1 %1
}

define i1 @test_ugt_trunc_add(i64 %x) {
; X64-LABEL: test_ugt_trunc_add:
; X64: # %bb.0: # %entry
; X64-NEXT: shrq $48, %rdi
; X64-NEXT: addl $-65522, %edi # imm = 0xFFFF000E
; X64-NEXT: movzwl %di, %eax
; X64-NEXT: cmpl $4, %eax
; X64-NEXT: setae %al
; X64-NEXT: retq
entry:
%add = add i64 %x, 3940649673949184
%shr = lshr i64 %add, 48
%conv = trunc i64 %shr to i32
%res = icmp ugt i32 %conv, 3
ret i1 %res
}

define i1 @test_ugt_add(i64 %x) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are these negative tests? Add comments if they are.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, this transformation is also applicable to those tests. However, the selection DAG differs in those cases, so I figured expanding the transformation to include them should be a separate patch. If preferred, I can expand it now, though.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's ok to do in a separate patch.

; X64-LABEL: test_ugt_add:
; X64: # %bb.0: # %entry
; X64-NEXT: movabsq $3940649673949184, %rax # imm = 0xE000000000000
; X64-NEXT: addq %rdi, %rax
; X64-NEXT: movabsq $844424930131968, %rcx # imm = 0x3000000000000
; X64-NEXT: cmpq %rcx, %rax
; X64-NEXT: seta %al
; X64-NEXT: retq
entry:
%0 = add i64 3940649673949184, %x
%1 = icmp ugt i64 %0, 844424930131968
ret i1 %1
}

define i1 @test_eq_trunc_add(i64 %x) {
; X64-LABEL: test_eq_trunc_add:
; X64: # %bb.0: # %entry
; X64-NEXT: shrq $48, %rdi
; X64-NEXT: cmpl $65525, %edi # imm = 0xFFF5
; X64-NEXT: sete %al
; X64-NEXT: retq
entry:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remove "entry " from all tests

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done. Sorry for missing it again after your first review.

%add = add i64 %x, 3940649673949184
%shr = lshr i64 %add, 48
%conv = trunc i64 %shr to i32
%res = icmp eq i32 %conv, 3
ret i1 %res
}

define i1 @test_eq_add(i64 %x) {
; X64-LABEL: test_eq_add:
; X64: # %bb.0: # %entry
; X64-NEXT: movabsq $-3096224743817216, %rax # imm = 0xFFF5000000000000
; X64-NEXT: cmpq %rax, %rdi
; X64-NEXT: sete %al
; X64-NEXT: retq
entry:
%0 = add i64 3940649673949184, %x
%1 = icmp eq i64 %0, 844424930131968
ret i1 %1
}

define i1 @test_ne_trunc_add(i64 %x) {
; X64-LABEL: test_ne_trunc_add:
; X64: # %bb.0: # %entry
; X64-NEXT: shrq $48, %rdi
; X64-NEXT: cmpl $65525, %edi # imm = 0xFFF5
; X64-NEXT: setne %al
; X64-NEXT: retq
entry:
%add = add i64 %x, 3940649673949184
%shr = lshr i64 %add, 48
%conv = trunc i64 %shr to i32
%res = icmp ne i32 %conv, 3
ret i1 %res
}

define i1 @test_ne_add(i64 %x) {
; X64-LABEL: test_ne_add:
; X64: # %bb.0: # %entry
; X64-NEXT: movabsq $-3096224743817216, %rax # imm = 0xFFF5000000000000
; X64-NEXT: cmpq %rax, %rdi
; X64-NEXT: setne %al
; X64-NEXT: retq
entry:
%0 = add i64 3940649673949184, %x
%1 = icmp ne i64 %0, 844424930131968
ret i1 %1
}

define i32 @test_trunc_add(i64 %x) {
; X64-LABEL: test_trunc_add:
; X64: # %bb.0: # %entry
; X64-NEXT: shrq $48, %rdi
; X64-NEXT: addl $-65522, %edi # imm = 0xFFFF000E
; X64-NEXT: movzwl %di, %eax
; X64-NEXT: retq
entry:
%add = add i64 %x, 3940649673949184
%shr = lshr i64 %add, 48
%conv = trunc i64 %shr to i32
ret i32 %conv
}