Skip to content

Commit e734202

Browse files
committed
[DAG] Recognise AVGFLOOR (((A >> 1) + (B >> 1)) + (A & B & 1)) patterns
Recognise 'LSB' style AVGFLOOR patterns. I've attempted to use the m_Reassociatable* pattern matchers, but encountered an issue in that we can't correctly match m_Value/m_Deferred pairs in the same reassociation as it appears that we have no guarantees on the order of matching. I'll raise a bug for this, and in the meantime we have the pattern in the test_lsb_i32 tests to show the missed matching opportunity. Fixes llvm#53648
1 parent 04bddda commit e734202

File tree

3 files changed

+65
-105
lines changed

3 files changed

+65
-105
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3154,19 +3154,31 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
31543154
}
31553155

31563156
// Attempt to form avgfloor(A, B) from (A & B) + ((A ^ B) >> 1)
3157+
// Attempt to form avgfloor(A, B) from ((A >> 1) + (B >> 1)) + (A & B & 1)
31573158
SDValue DAGCombiner::foldAddToAvg(SDNode *N, const SDLoc &DL) {
31583159
SDValue N0 = N->getOperand(0);
31593160
EVT VT = N0.getValueType();
31603161
SDValue A, B;
31613162

3163+
// FIXME: m_ReassociatableAdd can't handle m_Value/m_Deferred mixing.
31623164
if ((!LegalOperations || hasOperation(ISD::AVGFLOORU, VT)) &&
3163-
sd_match(N, m_Add(m_And(m_Value(A), m_Value(B)),
3164-
m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
3165+
(sd_match(N,
3166+
m_Add(m_And(m_Value(A), m_Value(B)),
3167+
m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)), m_One()))) ||
3168+
sd_match(N, m_Add(m_Add(m_Srl(m_Value(A), m_One()),
3169+
m_Srl(m_Value(B), m_One())),
3170+
m_ReassociatableAnd(m_Deferred(A), m_Deferred(B),
3171+
m_One()))))) {
31653172
return DAG.getNode(ISD::AVGFLOORU, DL, VT, A, B);
31663173
}
31673174
if ((!LegalOperations || hasOperation(ISD::AVGFLOORS, VT)) &&
3168-
sd_match(N, m_Add(m_And(m_Value(A), m_Value(B)),
3169-
m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
3175+
(sd_match(N,
3176+
m_Add(m_And(m_Value(A), m_Value(B)),
3177+
m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)), m_One()))) ||
3178+
sd_match(N, m_Add(m_Add(m_Sra(m_Value(A), m_One()),
3179+
m_Sra(m_Value(B), m_One())),
3180+
m_ReassociatableAnd(m_Deferred(A), m_Deferred(B),
3181+
m_One()))))) {
31703182
return DAG.getNode(ISD::AVGFLOORS, DL, VT, A, B);
31713183
}
31723184

llvm/test/CodeGen/X86/avgfloors-scalar.ll

Lines changed: 27 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -38,26 +38,20 @@ define i8 @test_fixed_i8(i8 %a0, i8 %a1) nounwind {
3838
define i8 @test_lsb_i8(i8 %a0, i8 %a1) nounwind {
3939
; X86-LABEL: test_lsb_i8:
4040
; X86: # %bb.0:
41-
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
42-
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
43-
; X86-NEXT: movl %eax, %edx
44-
; X86-NEXT: sarb %dl
45-
; X86-NEXT: andb %cl, %al
46-
; X86-NEXT: sarb %cl
47-
; X86-NEXT: addb %dl, %cl
48-
; X86-NEXT: andb $1, %al
49-
; X86-NEXT: addb %cl, %al
41+
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %ecx
42+
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
43+
; X86-NEXT: addl %ecx, %eax
44+
; X86-NEXT: shrl %eax
45+
; X86-NEXT: # kill: def $al killed $al killed $eax
5046
; X86-NEXT: retl
5147
;
5248
; X64-LABEL: test_lsb_i8:
5349
; X64: # %bb.0:
54-
; X64-NEXT: movl %edi, %eax
55-
; X64-NEXT: sarb %al
56-
; X64-NEXT: andb %sil, %dil
57-
; X64-NEXT: sarb %sil
58-
; X64-NEXT: addb %sil, %al
59-
; X64-NEXT: andb $1, %dil
60-
; X64-NEXT: addb %dil, %al
50+
; X64-NEXT: movsbl %sil, %ecx
51+
; X64-NEXT: movsbl %dil, %eax
52+
; X64-NEXT: addl %ecx, %eax
53+
; X64-NEXT: shrl %eax
54+
; X64-NEXT: # kill: def $al killed $al killed $eax
6155
; X64-NEXT: retq
6256
%s0 = ashr i8 %a0, 1
6357
%s1 = ashr i8 %a1, 1
@@ -124,26 +118,17 @@ define i16 @test_lsb_i16(i16 %a0, i16 %a1) nounwind {
124118
; X86: # %bb.0:
125119
; X86-NEXT: movswl {{[0-9]+}}(%esp), %ecx
126120
; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
127-
; X86-NEXT: movl %eax, %edx
128-
; X86-NEXT: sarl %edx
129-
; X86-NEXT: andl %ecx, %eax
130-
; X86-NEXT: sarl %ecx
131-
; X86-NEXT: addl %edx, %ecx
132-
; X86-NEXT: andl $1, %eax
133121
; X86-NEXT: addl %ecx, %eax
122+
; X86-NEXT: shrl %eax
134123
; X86-NEXT: # kill: def $ax killed $ax killed $eax
135124
; X86-NEXT: retl
136125
;
137126
; X64-LABEL: test_lsb_i16:
138127
; X64: # %bb.0:
139-
; X64-NEXT: movswl %si, %eax
140-
; X64-NEXT: movswl %di, %ecx
141-
; X64-NEXT: sarl %ecx
142-
; X64-NEXT: sarl %eax
128+
; X64-NEXT: movswl %si, %ecx
129+
; X64-NEXT: movswl %di, %eax
143130
; X64-NEXT: addl %ecx, %eax
144-
; X64-NEXT: andl %esi, %edi
145-
; X64-NEXT: andl $1, %edi
146-
; X64-NEXT: addl %edi, %eax
131+
; X64-NEXT: shrl %eax
147132
; X64-NEXT: # kill: def $ax killed $ax killed $eax
148133
; X64-NEXT: retq
149134
%s0 = ashr i16 %a0, 1
@@ -316,36 +301,31 @@ define i64 @test_lsb_i64(i64 %a0, i64 %a1) nounwind {
316301
; X86-NEXT: pushl %edi
317302
; X86-NEXT: pushl %esi
318303
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
319-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
320-
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
321304
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
322-
; X86-NEXT: movl %edi, %ebx
323-
; X86-NEXT: sarl %ebx
324-
; X86-NEXT: shldl $31, %eax, %edi
305+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
306+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
307+
; X86-NEXT: movl %eax, %ebx
308+
; X86-NEXT: xorl %esi, %ebx
325309
; X86-NEXT: movl %ecx, %edx
310+
; X86-NEXT: xorl %edi, %edx
311+
; X86-NEXT: shrdl $1, %edx, %ebx
312+
; X86-NEXT: andl %edi, %ecx
326313
; X86-NEXT: sarl %edx
327-
; X86-NEXT: shldl $31, %esi, %ecx
328-
; X86-NEXT: addl %edi, %ecx
329-
; X86-NEXT: adcl %ebx, %edx
330314
; X86-NEXT: andl %esi, %eax
331-
; X86-NEXT: andl $1, %eax
332-
; X86-NEXT: addl %ecx, %eax
333-
; X86-NEXT: adcl $0, %edx
315+
; X86-NEXT: addl %ebx, %eax
316+
; X86-NEXT: adcl %ecx, %edx
334317
; X86-NEXT: popl %esi
335318
; X86-NEXT: popl %edi
336319
; X86-NEXT: popl %ebx
337320
; X86-NEXT: retl
338321
;
339322
; X64-LABEL: test_lsb_i64:
340323
; X64: # %bb.0:
341-
; X64-NEXT: movq %rdi, %rcx
342-
; X64-NEXT: sarq %rcx
343-
; X64-NEXT: andl %esi, %edi
344324
; X64-NEXT: movq %rsi, %rax
345-
; X64-NEXT: sarq %rax
346-
; X64-NEXT: addq %rcx, %rax
347-
; X64-NEXT: andl $1, %edi
348-
; X64-NEXT: addq %rdi, %rax
325+
; X64-NEXT: andq %rdi, %rax
326+
; X64-NEXT: xorq %rdi, %rsi
327+
; X64-NEXT: sarq %rsi
328+
; X64-NEXT: addq %rsi, %rax
349329
; X64-NEXT: retq
350330
%s0 = ashr i64 %a0, 1
351331
%s1 = ashr i64 %a1, 1

llvm/test/CodeGen/X86/avgflooru-scalar.ll

Lines changed: 22 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -40,24 +40,18 @@ define i8 @test_lsb_i8(i8 %a0, i8 %a1) nounwind {
4040
; X86: # %bb.0:
4141
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
4242
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
43-
; X86-NEXT: movl %eax, %edx
44-
; X86-NEXT: shrb %dl
45-
; X86-NEXT: andb %cl, %al
46-
; X86-NEXT: shrb %cl
47-
; X86-NEXT: addb %dl, %cl
48-
; X86-NEXT: andb $1, %al
49-
; X86-NEXT: addb %cl, %al
43+
; X86-NEXT: addl %ecx, %eax
44+
; X86-NEXT: shrl %eax
45+
; X86-NEXT: # kill: def $al killed $al killed $eax
5046
; X86-NEXT: retl
5147
;
5248
; X64-LABEL: test_lsb_i8:
5349
; X64: # %bb.0:
54-
; X64-NEXT: movl %edi, %eax
55-
; X64-NEXT: shrb %al
56-
; X64-NEXT: andb %sil, %dil
57-
; X64-NEXT: shrb %sil
58-
; X64-NEXT: addb %sil, %al
59-
; X64-NEXT: andb $1, %dil
60-
; X64-NEXT: addb %dil, %al
50+
; X64-NEXT: movzbl %sil, %ecx
51+
; X64-NEXT: movzbl %dil, %eax
52+
; X64-NEXT: addl %ecx, %eax
53+
; X64-NEXT: shrl %eax
54+
; X64-NEXT: # kill: def $al killed $al killed $eax
6155
; X64-NEXT: retq
6256
%s0 = lshr i8 %a0, 1
6357
%s1 = lshr i8 %a1, 1
@@ -124,26 +118,17 @@ define i16 @test_lsb_i16(i16 %a0, i16 %a1) nounwind {
124118
; X86: # %bb.0:
125119
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
126120
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
127-
; X86-NEXT: movl %eax, %edx
128-
; X86-NEXT: shrl %edx
129-
; X86-NEXT: andl %ecx, %eax
130-
; X86-NEXT: shrl %ecx
131-
; X86-NEXT: addl %edx, %ecx
132-
; X86-NEXT: andl $1, %eax
133121
; X86-NEXT: addl %ecx, %eax
122+
; X86-NEXT: shrl %eax
134123
; X86-NEXT: # kill: def $ax killed $ax killed $eax
135124
; X86-NEXT: retl
136125
;
137126
; X64-LABEL: test_lsb_i16:
138127
; X64: # %bb.0:
139-
; X64-NEXT: movzwl %si, %eax
140-
; X64-NEXT: movzwl %di, %ecx
141-
; X64-NEXT: shrl %ecx
142-
; X64-NEXT: shrl %eax
128+
; X64-NEXT: movzwl %si, %ecx
129+
; X64-NEXT: movzwl %di, %eax
143130
; X64-NEXT: addl %ecx, %eax
144-
; X64-NEXT: andl %esi, %edi
145-
; X64-NEXT: andl $1, %edi
146-
; X64-NEXT: addl %edi, %eax
131+
; X64-NEXT: shrl %eax
147132
; X64-NEXT: # kill: def $ax killed $ax killed $eax
148133
; X64-NEXT: retq
149134
%s0 = lshr i16 %a0, 1
@@ -300,40 +285,23 @@ define i64 @test_fixed_i64(i64 %a0, i64 %a1) nounwind {
300285
define i64 @test_lsb_i64(i64 %a0, i64 %a1) nounwind {
301286
; X86-LABEL: test_lsb_i64:
302287
; X86: # %bb.0:
303-
; X86-NEXT: pushl %ebx
304-
; X86-NEXT: pushl %edi
305-
; X86-NEXT: pushl %esi
306-
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
307288
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
308289
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
309-
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
310-
; X86-NEXT: movl %edi, %ebx
311-
; X86-NEXT: shrl %ebx
312-
; X86-NEXT: shldl $31, %eax, %edi
313-
; X86-NEXT: movl %ecx, %edx
314-
; X86-NEXT: shrl %edx
315-
; X86-NEXT: shldl $31, %esi, %ecx
316-
; X86-NEXT: addl %edi, %ecx
317-
; X86-NEXT: adcl %ebx, %edx
318-
; X86-NEXT: andl %esi, %eax
319-
; X86-NEXT: andl $1, %eax
320-
; X86-NEXT: addl %ecx, %eax
321-
; X86-NEXT: adcl $0, %edx
322-
; X86-NEXT: popl %esi
323-
; X86-NEXT: popl %edi
324-
; X86-NEXT: popl %ebx
290+
; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx
291+
; X86-NEXT: adcl {{[0-9]+}}(%esp), %eax
292+
; X86-NEXT: setb %dl
293+
; X86-NEXT: movzbl %dl, %edx
294+
; X86-NEXT: shldl $31, %eax, %edx
295+
; X86-NEXT: shldl $31, %ecx, %eax
325296
; X86-NEXT: retl
326297
;
327298
; X64-LABEL: test_lsb_i64:
328299
; X64: # %bb.0:
329-
; X64-NEXT: movq %rdi, %rcx
330-
; X64-NEXT: shrq %rcx
331-
; X64-NEXT: andl %esi, %edi
332300
; X64-NEXT: movq %rsi, %rax
333-
; X64-NEXT: shrq %rax
334-
; X64-NEXT: addq %rcx, %rax
335-
; X64-NEXT: andl $1, %edi
336-
; X64-NEXT: addq %rdi, %rax
301+
; X64-NEXT: andq %rdi, %rax
302+
; X64-NEXT: xorq %rdi, %rsi
303+
; X64-NEXT: shrq %rsi
304+
; X64-NEXT: addq %rsi, %rax
337305
; X64-NEXT: retq
338306
%s0 = lshr i64 %a0, 1
339307
%s1 = lshr i64 %a1, 1

0 commit comments

Comments
 (0)