Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 16 additions & 4 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3154,19 +3154,31 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
}

// Attempt to form avgfloor(A, B) from (A & B) + ((A ^ B) >> 1)
// Attempt to form avgfloor(A, B) from ((A >> 1) + (B >> 1)) + (A & B & 1)
SDValue DAGCombiner::foldAddToAvg(SDNode *N, const SDLoc &DL) {
SDValue N0 = N->getOperand(0);
EVT VT = N0.getValueType();
SDValue A, B;

// FIXME: m_ReassociatableAdd can't handle m_Value/m_Deferred mixing.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a general comment / thinking out loud on this issue: I guess the reason it doesn't handle m_Value / m_Deferred in the same reassociatable expression right now is because reassociatable matching is done in two phases, in which individual m_Value & m_Deferred are completely detached
That being said, even we somehow make them not detached so that all m_Value & m_Deferred will be triggered in the same pattern matching expression / chain, how should we make sense of the permutation case where m_Deferred might appear before its m_Value (I guess this is related to your comment of "no gurantee on the order of matching")? More specifically, if m_Deferred appears before m_Value, the preceding m_Deferred might accidentally grab the bound value populated by the previous permutation.
I think we might be able to solve this problem by imposing some partial order during permutation for reassocitable expressions that contain m_Value & m_Deferred

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure - hence I punted this to #169645 - I can find a workaround if necessary as a followup, but I'd much prefer to handle this completely with m_Reassociatable matchers if possible.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like @bermondd is working on this at #170061 (new contributors not being able to assign reviewers is REALLY annoying).

if ((!LegalOperations || hasOperation(ISD::AVGFLOORU, VT)) &&
sd_match(N, m_Add(m_And(m_Value(A), m_Value(B)),
m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
(sd_match(N,
m_Add(m_And(m_Value(A), m_Value(B)),
m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)), m_One()))) ||
sd_match(N, m_Add(m_Add(m_Srl(m_Value(A), m_One()),
m_Srl(m_Value(B), m_One())),
m_ReassociatableAnd(m_Deferred(A), m_Deferred(B),
m_One()))))) {
return DAG.getNode(ISD::AVGFLOORU, DL, VT, A, B);
}
if ((!LegalOperations || hasOperation(ISD::AVGFLOORS, VT)) &&
sd_match(N, m_Add(m_And(m_Value(A), m_Value(B)),
m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
(sd_match(N,
m_Add(m_And(m_Value(A), m_Value(B)),
m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)), m_One()))) ||
sd_match(N, m_Add(m_Add(m_Sra(m_Value(A), m_One()),
m_Sra(m_Value(B), m_One())),
m_ReassociatableAnd(m_Deferred(A), m_Deferred(B),
m_One()))))) {
return DAG.getNode(ISD::AVGFLOORS, DL, VT, A, B);
}

Expand Down
74 changes: 27 additions & 47 deletions llvm/test/CodeGen/X86/avgfloors-scalar.ll
Original file line number Diff line number Diff line change
Expand Up @@ -38,26 +38,20 @@ define i8 @test_fixed_i8(i8 %a0, i8 %a1) nounwind {
define i8 @test_lsb_i8(i8 %a0, i8 %a1) nounwind {
; X86-LABEL: test_lsb_i8:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, %edx
; X86-NEXT: sarb %dl
; X86-NEXT: andb %cl, %al
; X86-NEXT: sarb %cl
; X86-NEXT: addb %dl, %cl
; X86-NEXT: andb $1, %al
; X86-NEXT: addb %cl, %al
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: addl %ecx, %eax
; X86-NEXT: shrl %eax
; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: retl
;
; X64-LABEL: test_lsb_i8:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: sarb %al
; X64-NEXT: andb %sil, %dil
; X64-NEXT: sarb %sil
; X64-NEXT: addb %sil, %al
; X64-NEXT: andb $1, %dil
; X64-NEXT: addb %dil, %al
; X64-NEXT: movsbl %sil, %ecx
; X64-NEXT: movsbl %dil, %eax
; X64-NEXT: addl %ecx, %eax
; X64-NEXT: shrl %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
%s0 = ashr i8 %a0, 1
%s1 = ashr i8 %a1, 1
Expand Down Expand Up @@ -124,26 +118,17 @@ define i16 @test_lsb_i16(i16 %a0, i16 %a1) nounwind {
; X86: # %bb.0:
; X86-NEXT: movswl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, %edx
; X86-NEXT: sarl %edx
; X86-NEXT: andl %ecx, %eax
; X86-NEXT: sarl %ecx
; X86-NEXT: addl %edx, %ecx
; X86-NEXT: andl $1, %eax
; X86-NEXT: addl %ecx, %eax
; X86-NEXT: shrl %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
;
; X64-LABEL: test_lsb_i16:
; X64: # %bb.0:
; X64-NEXT: movswl %si, %eax
; X64-NEXT: movswl %di, %ecx
; X64-NEXT: sarl %ecx
; X64-NEXT: sarl %eax
; X64-NEXT: movswl %si, %ecx
; X64-NEXT: movswl %di, %eax
; X64-NEXT: addl %ecx, %eax
; X64-NEXT: andl %esi, %edi
; X64-NEXT: andl $1, %edi
; X64-NEXT: addl %edi, %eax
; X64-NEXT: shrl %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
%s0 = ashr i16 %a0, 1
Expand Down Expand Up @@ -316,36 +301,31 @@ define i64 @test_lsb_i64(i64 %a0, i64 %a1) nounwind {
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl %edi, %ebx
; X86-NEXT: sarl %ebx
; X86-NEXT: shldl $31, %eax, %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %eax, %ebx
; X86-NEXT: xorl %esi, %ebx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: xorl %edi, %edx
; X86-NEXT: shrdl $1, %edx, %ebx
; X86-NEXT: andl %edi, %ecx
; X86-NEXT: sarl %edx
; X86-NEXT: shldl $31, %esi, %ecx
; X86-NEXT: addl %edi, %ecx
; X86-NEXT: adcl %ebx, %edx
; X86-NEXT: andl %esi, %eax
; X86-NEXT: andl $1, %eax
; X86-NEXT: addl %ecx, %eax
; X86-NEXT: adcl $0, %edx
; X86-NEXT: addl %ebx, %eax
; X86-NEXT: adcl %ecx, %edx
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: retl
;
; X64-LABEL: test_lsb_i64:
; X64: # %bb.0:
; X64-NEXT: movq %rdi, %rcx
; X64-NEXT: sarq %rcx
; X64-NEXT: andl %esi, %edi
; X64-NEXT: movq %rsi, %rax
; X64-NEXT: sarq %rax
; X64-NEXT: addq %rcx, %rax
; X64-NEXT: andl $1, %edi
; X64-NEXT: addq %rdi, %rax
; X64-NEXT: andq %rdi, %rax
; X64-NEXT: xorq %rdi, %rsi
; X64-NEXT: sarq %rsi
; X64-NEXT: addq %rsi, %rax
; X64-NEXT: retq
%s0 = ashr i64 %a0, 1
%s1 = ashr i64 %a1, 1
Expand Down
76 changes: 22 additions & 54 deletions llvm/test/CodeGen/X86/avgflooru-scalar.ll
Original file line number Diff line number Diff line change
Expand Up @@ -40,24 +40,18 @@ define i8 @test_lsb_i8(i8 %a0, i8 %a1) nounwind {
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, %edx
; X86-NEXT: shrb %dl
; X86-NEXT: andb %cl, %al
; X86-NEXT: shrb %cl
; X86-NEXT: addb %dl, %cl
; X86-NEXT: andb $1, %al
; X86-NEXT: addb %cl, %al
; X86-NEXT: addl %ecx, %eax
; X86-NEXT: shrl %eax
; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: retl
;
; X64-LABEL: test_lsb_i8:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: shrb %al
; X64-NEXT: andb %sil, %dil
; X64-NEXT: shrb %sil
; X64-NEXT: addb %sil, %al
; X64-NEXT: andb $1, %dil
; X64-NEXT: addb %dil, %al
; X64-NEXT: movzbl %sil, %ecx
; X64-NEXT: movzbl %dil, %eax
; X64-NEXT: addl %ecx, %eax
; X64-NEXT: shrl %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
%s0 = lshr i8 %a0, 1
%s1 = lshr i8 %a1, 1
Expand Down Expand Up @@ -124,26 +118,17 @@ define i16 @test_lsb_i16(i16 %a0, i16 %a1) nounwind {
; X86: # %bb.0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, %edx
; X86-NEXT: shrl %edx
; X86-NEXT: andl %ecx, %eax
; X86-NEXT: shrl %ecx
; X86-NEXT: addl %edx, %ecx
; X86-NEXT: andl $1, %eax
; X86-NEXT: addl %ecx, %eax
; X86-NEXT: shrl %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
;
; X64-LABEL: test_lsb_i16:
; X64: # %bb.0:
; X64-NEXT: movzwl %si, %eax
; X64-NEXT: movzwl %di, %ecx
; X64-NEXT: shrl %ecx
; X64-NEXT: shrl %eax
; X64-NEXT: movzwl %si, %ecx
; X64-NEXT: movzwl %di, %eax
; X64-NEXT: addl %ecx, %eax
; X64-NEXT: andl %esi, %edi
; X64-NEXT: andl $1, %edi
; X64-NEXT: addl %edi, %eax
; X64-NEXT: shrl %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
%s0 = lshr i16 %a0, 1
Expand Down Expand Up @@ -300,40 +285,23 @@ define i64 @test_fixed_i64(i64 %a0, i64 %a1) nounwind {
define i64 @test_lsb_i64(i64 %a0, i64 %a1) nounwind {
; X86-LABEL: test_lsb_i64:
; X86: # %bb.0:
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl %edi, %ebx
; X86-NEXT: shrl %ebx
; X86-NEXT: shldl $31, %eax, %edi
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: shrl %edx
; X86-NEXT: shldl $31, %esi, %ecx
; X86-NEXT: addl %edi, %ecx
; X86-NEXT: adcl %ebx, %edx
; X86-NEXT: andl %esi, %eax
; X86-NEXT: andl $1, %eax
; X86-NEXT: addl %ecx, %eax
; X86-NEXT: adcl $0, %edx
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: adcl {{[0-9]+}}(%esp), %eax
; X86-NEXT: setb %dl
; X86-NEXT: movzbl %dl, %edx
; X86-NEXT: shldl $31, %eax, %edx
; X86-NEXT: shldl $31, %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: test_lsb_i64:
; X64: # %bb.0:
; X64-NEXT: movq %rdi, %rcx
; X64-NEXT: shrq %rcx
; X64-NEXT: andl %esi, %edi
; X64-NEXT: movq %rsi, %rax
; X64-NEXT: shrq %rax
; X64-NEXT: addq %rcx, %rax
; X64-NEXT: andl $1, %edi
; X64-NEXT: addq %rdi, %rax
; X64-NEXT: andq %rdi, %rax
; X64-NEXT: xorq %rdi, %rsi
; X64-NEXT: shrq %rsi
; X64-NEXT: addq %rsi, %rax
; X64-NEXT: retq
%s0 = lshr i64 %a0, 1
%s1 = lshr i64 %a1, 1
Expand Down