From e734202987d8619eb35d35d30c1026c318312ac2 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 26 Nov 2025 12:10:42 +0000 Subject: [PATCH 1/2] [DAG] Recognise AVGFLOOR (((A >> 1) + (B >> 1)) + (A & B & 1)) patterns Recognise 'LSB' style AVGFLOOR patterns. I've attempted to use the m_Reassociatable* pattern matchers, but encountered an issue in that we can't correctly match m_Value/m_Deferred pairs in the same reassociation as it appears that we have no guarantees on the order of matching. I'll raise a bug for this, and in the meantime we have the pattern in the test_lsb_i32 tests to show the missed matching opportunity. Fixes #53648 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 20 ++++- llvm/test/CodeGen/X86/avgfloors-scalar.ll | 74 +++++++----------- llvm/test/CodeGen/X86/avgflooru-scalar.ll | 76 ++++++------------- 3 files changed, 65 insertions(+), 105 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 6b79dbb46cadc..813cbeafeaec9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3154,19 +3154,31 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) { } // Attempt to form avgfloor(A, B) from (A & B) + ((A ^ B) >> 1) +// Attempt to form avgfloor(A, B) from ((A >> 1) + (B >> 1)) + (A & B & 1) SDValue DAGCombiner::foldAddToAvg(SDNode *N, const SDLoc &DL) { SDValue N0 = N->getOperand(0); EVT VT = N0.getValueType(); SDValue A, B; + // FIXME: m_ReassociatableAdd can't handle m_Value/m_Deferred mixing. if ((!LegalOperations || hasOperation(ISD::AVGFLOORU, VT)) && - sd_match(N, m_Add(m_And(m_Value(A), m_Value(B)), - m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) { + (sd_match(N, + m_Add(m_And(m_Value(A), m_Value(B)), + m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)), m_One()))) || + sd_match(N, m_Add(m_Add(m_Srl(m_Value(A), m_One()), + m_Srl(m_Value(B), m_One())), + m_ReassociatableAnd(m_Deferred(A), m_Deferred(B), + m_One()))))) { return DAG.getNode(ISD::AVGFLOORU, DL, VT, A, B); } if ((!LegalOperations || hasOperation(ISD::AVGFLOORS, VT)) && - sd_match(N, m_Add(m_And(m_Value(A), m_Value(B)), - m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) { + (sd_match(N, + m_Add(m_And(m_Value(A), m_Value(B)), + m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)), m_One()))) || + sd_match(N, m_Add(m_Add(m_Sra(m_Value(A), m_One()), + m_Sra(m_Value(B), m_One())), + m_ReassociatableAnd(m_Deferred(A), m_Deferred(B), + m_One()))))) { return DAG.getNode(ISD::AVGFLOORS, DL, VT, A, B); } diff --git a/llvm/test/CodeGen/X86/avgfloors-scalar.ll b/llvm/test/CodeGen/X86/avgfloors-scalar.ll index fd303192e6c50..c8bbc875834d1 100644 --- a/llvm/test/CodeGen/X86/avgfloors-scalar.ll +++ b/llvm/test/CodeGen/X86/avgfloors-scalar.ll @@ -38,26 +38,20 @@ define i8 @test_fixed_i8(i8 %a0, i8 %a1) nounwind { define i8 @test_lsb_i8(i8 %a0, i8 %a1) nounwind { ; X86-LABEL: test_lsb_i8: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, %edx -; X86-NEXT: sarb %dl -; X86-NEXT: andb %cl, %al -; X86-NEXT: sarb %cl -; X86-NEXT: addb %dl, %cl -; X86-NEXT: andb $1, %al -; X86-NEXT: addb %cl, %al +; X86-NEXT: movsbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: addl %ecx, %eax +; X86-NEXT: shrl %eax +; X86-NEXT: # kill: def $al killed $al killed $eax ; X86-NEXT: retl ; ; X64-LABEL: test_lsb_i8: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: sarb %al -; X64-NEXT: andb %sil, %dil -; X64-NEXT: sarb %sil -; X64-NEXT: addb %sil, %al -; X64-NEXT: andb $1, %dil -; X64-NEXT: addb %dil, %al +; X64-NEXT: movsbl %sil, %ecx +; X64-NEXT: movsbl %dil, %eax +; X64-NEXT: addl %ecx, %eax +; X64-NEXT: shrl %eax +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %s0 = ashr i8 %a0, 1 %s1 = ashr i8 %a1, 1 @@ -124,26 +118,17 @@ define i16 @test_lsb_i16(i16 %a0, i16 %a1) nounwind { ; X86: # %bb.0: ; X86-NEXT: movswl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, %edx -; X86-NEXT: sarl %edx -; X86-NEXT: andl %ecx, %eax -; X86-NEXT: sarl %ecx -; X86-NEXT: addl %edx, %ecx -; X86-NEXT: andl $1, %eax ; X86-NEXT: addl %ecx, %eax +; X86-NEXT: shrl %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: test_lsb_i16: ; X64: # %bb.0: -; X64-NEXT: movswl %si, %eax -; X64-NEXT: movswl %di, %ecx -; X64-NEXT: sarl %ecx -; X64-NEXT: sarl %eax +; X64-NEXT: movswl %si, %ecx +; X64-NEXT: movswl %di, %eax ; X64-NEXT: addl %ecx, %eax -; X64-NEXT: andl %esi, %edi -; X64-NEXT: andl $1, %edi -; X64-NEXT: addl %edi, %eax +; X64-NEXT: shrl %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %s0 = ashr i16 %a0, 1 @@ -316,21 +301,19 @@ define i64 @test_lsb_i64(i64 %a0, i64 %a1) nounwind { ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %edi, %ebx -; X86-NEXT: sarl %ebx -; X86-NEXT: shldl $31, %eax, %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: xorl %esi, %ebx ; X86-NEXT: movl %ecx, %edx +; X86-NEXT: xorl %edi, %edx +; X86-NEXT: shrdl $1, %edx, %ebx +; X86-NEXT: andl %edi, %ecx ; X86-NEXT: sarl %edx -; X86-NEXT: shldl $31, %esi, %ecx -; X86-NEXT: addl %edi, %ecx -; X86-NEXT: adcl %ebx, %edx ; X86-NEXT: andl %esi, %eax -; X86-NEXT: andl $1, %eax -; X86-NEXT: addl %ecx, %eax -; X86-NEXT: adcl $0, %edx +; X86-NEXT: addl %ebx, %eax +; X86-NEXT: adcl %ecx, %edx ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -338,14 +321,11 @@ define i64 @test_lsb_i64(i64 %a0, i64 %a1) nounwind { ; ; X64-LABEL: test_lsb_i64: ; X64: # %bb.0: -; X64-NEXT: movq %rdi, %rcx -; X64-NEXT: sarq %rcx -; X64-NEXT: andl %esi, %edi ; X64-NEXT: movq %rsi, %rax -; X64-NEXT: sarq %rax -; X64-NEXT: addq %rcx, %rax -; X64-NEXT: andl $1, %edi -; X64-NEXT: addq %rdi, %rax +; X64-NEXT: andq %rdi, %rax +; X64-NEXT: xorq %rdi, %rsi +; X64-NEXT: sarq %rsi +; X64-NEXT: addq %rsi, %rax ; X64-NEXT: retq %s0 = ashr i64 %a0, 1 %s1 = ashr i64 %a1, 1 diff --git a/llvm/test/CodeGen/X86/avgflooru-scalar.ll b/llvm/test/CodeGen/X86/avgflooru-scalar.ll index 9ae4492bb4cd4..7ad10164ad484 100644 --- a/llvm/test/CodeGen/X86/avgflooru-scalar.ll +++ b/llvm/test/CodeGen/X86/avgflooru-scalar.ll @@ -40,24 +40,18 @@ define i8 @test_lsb_i8(i8 %a0, i8 %a1) nounwind { ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, %edx -; X86-NEXT: shrb %dl -; X86-NEXT: andb %cl, %al -; X86-NEXT: shrb %cl -; X86-NEXT: addb %dl, %cl -; X86-NEXT: andb $1, %al -; X86-NEXT: addb %cl, %al +; X86-NEXT: addl %ecx, %eax +; X86-NEXT: shrl %eax +; X86-NEXT: # kill: def $al killed $al killed $eax ; X86-NEXT: retl ; ; X64-LABEL: test_lsb_i8: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: shrb %al -; X64-NEXT: andb %sil, %dil -; X64-NEXT: shrb %sil -; X64-NEXT: addb %sil, %al -; X64-NEXT: andb $1, %dil -; X64-NEXT: addb %dil, %al +; X64-NEXT: movzbl %sil, %ecx +; X64-NEXT: movzbl %dil, %eax +; X64-NEXT: addl %ecx, %eax +; X64-NEXT: shrl %eax +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %s0 = lshr i8 %a0, 1 %s1 = lshr i8 %a1, 1 @@ -124,26 +118,17 @@ define i16 @test_lsb_i16(i16 %a0, i16 %a1) nounwind { ; X86: # %bb.0: ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, %edx -; X86-NEXT: shrl %edx -; X86-NEXT: andl %ecx, %eax -; X86-NEXT: shrl %ecx -; X86-NEXT: addl %edx, %ecx -; X86-NEXT: andl $1, %eax ; X86-NEXT: addl %ecx, %eax +; X86-NEXT: shrl %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: test_lsb_i16: ; X64: # %bb.0: -; X64-NEXT: movzwl %si, %eax -; X64-NEXT: movzwl %di, %ecx -; X64-NEXT: shrl %ecx -; X64-NEXT: shrl %eax +; X64-NEXT: movzwl %si, %ecx +; X64-NEXT: movzwl %di, %eax ; X64-NEXT: addl %ecx, %eax -; X64-NEXT: andl %esi, %edi -; X64-NEXT: andl $1, %edi -; X64-NEXT: addl %edi, %eax +; X64-NEXT: shrl %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %s0 = lshr i16 %a0, 1 @@ -300,40 +285,23 @@ define i64 @test_fixed_i64(i64 %a0, i64 %a1) nounwind { define i64 @test_lsb_i64(i64 %a0, i64 %a1) nounwind { ; X86-LABEL: test_lsb_i64: ; X86: # %bb.0: -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %edi, %ebx -; X86-NEXT: shrl %ebx -; X86-NEXT: shldl $31, %eax, %edi -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: shrl %edx -; X86-NEXT: shldl $31, %esi, %ecx -; X86-NEXT: addl %edi, %ecx -; X86-NEXT: adcl %ebx, %edx -; X86-NEXT: andl %esi, %eax -; X86-NEXT: andl $1, %eax -; X86-NEXT: addl %ecx, %eax -; X86-NEXT: adcl $0, %edx -; X86-NEXT: popl %esi -; X86-NEXT: popl %edi -; X86-NEXT: popl %ebx +; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: adcl {{[0-9]+}}(%esp), %eax +; X86-NEXT: setb %dl +; X86-NEXT: movzbl %dl, %edx +; X86-NEXT: shldl $31, %eax, %edx +; X86-NEXT: shldl $31, %ecx, %eax ; X86-NEXT: retl ; ; X64-LABEL: test_lsb_i64: ; X64: # %bb.0: -; X64-NEXT: movq %rdi, %rcx -; X64-NEXT: shrq %rcx -; X64-NEXT: andl %esi, %edi ; X64-NEXT: movq %rsi, %rax -; X64-NEXT: shrq %rax -; X64-NEXT: addq %rcx, %rax -; X64-NEXT: andl $1, %edi -; X64-NEXT: addq %rdi, %rax +; X64-NEXT: andq %rdi, %rax +; X64-NEXT: xorq %rdi, %rsi +; X64-NEXT: shrq %rsi +; X64-NEXT: addq %rsi, %rax ; X64-NEXT: retq %s0 = lshr i64 %a0, 1 %s1 = lshr i64 %a1, 1 From 6a796d31e1828f005643cfefd6f22dc19c92169c Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 9 Dec 2025 15:19:55 +0000 Subject: [PATCH 2/2] Use m_ReassociatableAdd now that #170061 is fixed --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 17 +++++++------- llvm/test/CodeGen/X86/avgfloors-scalar.ll | 22 ++++++++----------- llvm/test/CodeGen/X86/avgflooru-scalar.ll | 20 +++++++---------- 3 files changed, 25 insertions(+), 34 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index a5625553fa1fb..6a99d4e29b64f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3166,25 +3166,24 @@ SDValue DAGCombiner::foldAddToAvg(SDNode *N, const SDLoc &DL) { EVT VT = N0.getValueType(); SDValue A, B; - // FIXME: m_ReassociatableAdd can't handle m_Value/m_Deferred mixing. if ((!LegalOperations || hasOperation(ISD::AVGFLOORU, VT)) && (sd_match(N, m_Add(m_And(m_Value(A), m_Value(B)), m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)), m_One()))) || - sd_match(N, m_Add(m_Add(m_Srl(m_Value(A), m_One()), - m_Srl(m_Value(B), m_One())), - m_ReassociatableAnd(m_Deferred(A), m_Deferred(B), - m_One()))))) { + sd_match(N, m_ReassociatableAdd( + m_Srl(m_Value(A), m_One()), m_Srl(m_Value(B), m_One()), + m_ReassociatableAnd(m_Deferred(A), m_Deferred(B), + m_One()))))) { return DAG.getNode(ISD::AVGFLOORU, DL, VT, A, B); } if ((!LegalOperations || hasOperation(ISD::AVGFLOORS, VT)) && (sd_match(N, m_Add(m_And(m_Value(A), m_Value(B)), m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)), m_One()))) || - sd_match(N, m_Add(m_Add(m_Sra(m_Value(A), m_One()), - m_Sra(m_Value(B), m_One())), - m_ReassociatableAnd(m_Deferred(A), m_Deferred(B), - m_One()))))) { + sd_match(N, m_ReassociatableAdd( + m_Sra(m_Value(A), m_One()), m_Sra(m_Value(B), m_One()), + m_ReassociatableAnd(m_Deferred(A), m_Deferred(B), + m_One()))))) { return DAG.getNode(ISD::AVGFLOORS, DL, VT, A, B); } diff --git a/llvm/test/CodeGen/X86/avgfloors-scalar.ll b/llvm/test/CodeGen/X86/avgfloors-scalar.ll index c8bbc875834d1..b575d34a8c2dd 100644 --- a/llvm/test/CodeGen/X86/avgfloors-scalar.ll +++ b/llvm/test/CodeGen/X86/avgfloors-scalar.ll @@ -199,23 +199,19 @@ define i32 @test_lsb_i32(i32 %a0, i32 %a1) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, %edx -; X86-NEXT: sarl %edx -; X86-NEXT: andl %ecx, %eax -; X86-NEXT: sarl %ecx -; X86-NEXT: addl %edx, %ecx -; X86-NEXT: andl $1, %eax -; X86-NEXT: addl %ecx, %eax +; X86-NEXT: andl %ecx, %edx +; X86-NEXT: xorl %ecx, %eax +; X86-NEXT: sarl %eax +; X86-NEXT: addl %edx, %eax ; X86-NEXT: retl ; ; X64-LABEL: test_lsb_i32: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: sarl %eax -; X64-NEXT: andl %esi, %edi -; X64-NEXT: sarl %esi -; X64-NEXT: addl %esi, %eax -; X64-NEXT: andl $1, %edi -; X64-NEXT: addl %edi, %eax +; X64-NEXT: movslq %esi, %rcx +; X64-NEXT: movslq %edi, %rax +; X64-NEXT: addq %rcx, %rax +; X64-NEXT: shrq %rax +; X64-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NEXT: retq %s0 = ashr i32 %a0, 1 %s1 = ashr i32 %a1, 1 diff --git a/llvm/test/CodeGen/X86/avgflooru-scalar.ll b/llvm/test/CodeGen/X86/avgflooru-scalar.ll index 7ad10164ad484..614151c076eba 100644 --- a/llvm/test/CodeGen/X86/avgflooru-scalar.ll +++ b/llvm/test/CodeGen/X86/avgflooru-scalar.ll @@ -199,23 +199,19 @@ define i32 @test_lsb_i32(i32 %a0, i32 %a1) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, %edx -; X86-NEXT: shrl %edx -; X86-NEXT: andl %ecx, %eax -; X86-NEXT: shrl %ecx -; X86-NEXT: addl %edx, %ecx -; X86-NEXT: andl $1, %eax -; X86-NEXT: addl %ecx, %eax +; X86-NEXT: andl %ecx, %edx +; X86-NEXT: xorl %ecx, %eax +; X86-NEXT: shrl %eax +; X86-NEXT: addl %edx, %eax ; X86-NEXT: retl ; ; X64-LABEL: test_lsb_i32: ; X64: # %bb.0: +; X64-NEXT: movl %esi, %ecx ; X64-NEXT: movl %edi, %eax -; X64-NEXT: shrl %eax -; X64-NEXT: andl %esi, %edi -; X64-NEXT: shrl %esi -; X64-NEXT: addl %esi, %eax -; X64-NEXT: andl $1, %edi -; X64-NEXT: addl %edi, %eax +; X64-NEXT: addq %rcx, %rax +; X64-NEXT: shrq %rax +; X64-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NEXT: retq %s0 = lshr i32 %a0, 1 %s1 = lshr i32 %a1, 1