Skip to content

Commit 804e768

Browse files
authored
[DAG] Recognise AVGFLOOR (((A >> 1) + (B >> 1)) + (A & B & 1)) patterns (#169644)
Recognise 'LSB' style AVGFLOOR patterns. Alive2: [https://alive2.llvm.org/ce/z/nfSSk_](https://alive2.llvm.org/ce/z/nfSSk_) Fixes #53648
1 parent a108881 commit 804e768

File tree

3 files changed

+81
-130
lines changed

3 files changed

+81
-130
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3160,19 +3160,30 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
31603160
}
31613161

31623162
// Attempt to form avgfloor(A, B) from (A & B) + ((A ^ B) >> 1)
3163+
// Attempt to form avgfloor(A, B) from ((A >> 1) + (B >> 1)) + (A & B & 1)
31633164
SDValue DAGCombiner::foldAddToAvg(SDNode *N, const SDLoc &DL) {
31643165
SDValue N0 = N->getOperand(0);
31653166
EVT VT = N0.getValueType();
31663167
SDValue A, B;
31673168

31683169
if ((!LegalOperations || hasOperation(ISD::AVGFLOORU, VT)) &&
3169-
sd_match(N, m_Add(m_And(m_Value(A), m_Value(B)),
3170-
m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
3170+
(sd_match(N,
3171+
m_Add(m_And(m_Value(A), m_Value(B)),
3172+
m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)), m_One()))) ||
3173+
sd_match(N, m_ReassociatableAdd(
3174+
m_Srl(m_Value(A), m_One()), m_Srl(m_Value(B), m_One()),
3175+
m_ReassociatableAnd(m_Deferred(A), m_Deferred(B),
3176+
m_One()))))) {
31713177
return DAG.getNode(ISD::AVGFLOORU, DL, VT, A, B);
31723178
}
31733179
if ((!LegalOperations || hasOperation(ISD::AVGFLOORS, VT)) &&
3174-
sd_match(N, m_Add(m_And(m_Value(A), m_Value(B)),
3175-
m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
3180+
(sd_match(N,
3181+
m_Add(m_And(m_Value(A), m_Value(B)),
3182+
m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)), m_One()))) ||
3183+
sd_match(N, m_ReassociatableAdd(
3184+
m_Sra(m_Value(A), m_One()), m_Sra(m_Value(B), m_One()),
3185+
m_ReassociatableAnd(m_Deferred(A), m_Deferred(B),
3186+
m_One()))))) {
31763187
return DAG.getNode(ISD::AVGFLOORS, DL, VT, A, B);
31773188
}
31783189

llvm/test/CodeGen/X86/avgfloors-scalar.ll

Lines changed: 36 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -38,26 +38,20 @@ define i8 @test_fixed_i8(i8 %a0, i8 %a1) nounwind {
3838
define i8 @test_lsb_i8(i8 %a0, i8 %a1) nounwind {
3939
; X86-LABEL: test_lsb_i8:
4040
; X86: # %bb.0:
41-
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
42-
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
43-
; X86-NEXT: movl %eax, %edx
44-
; X86-NEXT: sarb %dl
45-
; X86-NEXT: andb %cl, %al
46-
; X86-NEXT: sarb %cl
47-
; X86-NEXT: addb %dl, %cl
48-
; X86-NEXT: andb $1, %al
49-
; X86-NEXT: addb %cl, %al
41+
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %ecx
42+
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
43+
; X86-NEXT: addl %ecx, %eax
44+
; X86-NEXT: shrl %eax
45+
; X86-NEXT: # kill: def $al killed $al killed $eax
5046
; X86-NEXT: retl
5147
;
5248
; X64-LABEL: test_lsb_i8:
5349
; X64: # %bb.0:
54-
; X64-NEXT: movl %edi, %eax
55-
; X64-NEXT: sarb %al
56-
; X64-NEXT: andb %sil, %dil
57-
; X64-NEXT: sarb %sil
58-
; X64-NEXT: addb %sil, %al
59-
; X64-NEXT: andb $1, %dil
60-
; X64-NEXT: addb %dil, %al
50+
; X64-NEXT: movsbl %sil, %ecx
51+
; X64-NEXT: movsbl %dil, %eax
52+
; X64-NEXT: addl %ecx, %eax
53+
; X64-NEXT: shrl %eax
54+
; X64-NEXT: # kill: def $al killed $al killed $eax
6155
; X64-NEXT: retq
6256
%s0 = ashr i8 %a0, 1
6357
%s1 = ashr i8 %a1, 1
@@ -124,26 +118,17 @@ define i16 @test_lsb_i16(i16 %a0, i16 %a1) nounwind {
124118
; X86: # %bb.0:
125119
; X86-NEXT: movswl {{[0-9]+}}(%esp), %ecx
126120
; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
127-
; X86-NEXT: movl %eax, %edx
128-
; X86-NEXT: sarl %edx
129-
; X86-NEXT: andl %ecx, %eax
130-
; X86-NEXT: sarl %ecx
131-
; X86-NEXT: addl %edx, %ecx
132-
; X86-NEXT: andl $1, %eax
133121
; X86-NEXT: addl %ecx, %eax
122+
; X86-NEXT: shrl %eax
134123
; X86-NEXT: # kill: def $ax killed $ax killed $eax
135124
; X86-NEXT: retl
136125
;
137126
; X64-LABEL: test_lsb_i16:
138127
; X64: # %bb.0:
139-
; X64-NEXT: movswl %si, %eax
140-
; X64-NEXT: movswl %di, %ecx
141-
; X64-NEXT: sarl %ecx
142-
; X64-NEXT: sarl %eax
128+
; X64-NEXT: movswl %si, %ecx
129+
; X64-NEXT: movswl %di, %eax
143130
; X64-NEXT: addl %ecx, %eax
144-
; X64-NEXT: andl %esi, %edi
145-
; X64-NEXT: andl $1, %edi
146-
; X64-NEXT: addl %edi, %eax
131+
; X64-NEXT: shrl %eax
147132
; X64-NEXT: # kill: def $ax killed $ax killed $eax
148133
; X64-NEXT: retq
149134
%s0 = ashr i16 %a0, 1
@@ -214,23 +199,19 @@ define i32 @test_lsb_i32(i32 %a0, i32 %a1) nounwind {
214199
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
215200
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
216201
; X86-NEXT: movl %eax, %edx
217-
; X86-NEXT: sarl %edx
218-
; X86-NEXT: andl %ecx, %eax
219-
; X86-NEXT: sarl %ecx
220-
; X86-NEXT: addl %edx, %ecx
221-
; X86-NEXT: andl $1, %eax
222-
; X86-NEXT: addl %ecx, %eax
202+
; X86-NEXT: andl %ecx, %edx
203+
; X86-NEXT: xorl %ecx, %eax
204+
; X86-NEXT: sarl %eax
205+
; X86-NEXT: addl %edx, %eax
223206
; X86-NEXT: retl
224207
;
225208
; X64-LABEL: test_lsb_i32:
226209
; X64: # %bb.0:
227-
; X64-NEXT: movl %edi, %eax
228-
; X64-NEXT: sarl %eax
229-
; X64-NEXT: andl %esi, %edi
230-
; X64-NEXT: sarl %esi
231-
; X64-NEXT: addl %esi, %eax
232-
; X64-NEXT: andl $1, %edi
233-
; X64-NEXT: addl %edi, %eax
210+
; X64-NEXT: movslq %esi, %rcx
211+
; X64-NEXT: movslq %edi, %rax
212+
; X64-NEXT: addq %rcx, %rax
213+
; X64-NEXT: shrq %rax
214+
; X64-NEXT: # kill: def $eax killed $eax killed $rax
234215
; X64-NEXT: retq
235216
%s0 = ashr i32 %a0, 1
236217
%s1 = ashr i32 %a1, 1
@@ -316,36 +297,31 @@ define i64 @test_lsb_i64(i64 %a0, i64 %a1) nounwind {
316297
; X86-NEXT: pushl %edi
317298
; X86-NEXT: pushl %esi
318299
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
319-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
320-
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
321300
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
322-
; X86-NEXT: movl %edi, %ebx
323-
; X86-NEXT: sarl %ebx
324-
; X86-NEXT: shldl $31, %eax, %edi
301+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
302+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
303+
; X86-NEXT: movl %eax, %ebx
304+
; X86-NEXT: xorl %esi, %ebx
325305
; X86-NEXT: movl %ecx, %edx
306+
; X86-NEXT: xorl %edi, %edx
307+
; X86-NEXT: shrdl $1, %edx, %ebx
308+
; X86-NEXT: andl %edi, %ecx
326309
; X86-NEXT: sarl %edx
327-
; X86-NEXT: shldl $31, %esi, %ecx
328-
; X86-NEXT: addl %edi, %ecx
329-
; X86-NEXT: adcl %ebx, %edx
330310
; X86-NEXT: andl %esi, %eax
331-
; X86-NEXT: andl $1, %eax
332-
; X86-NEXT: addl %ecx, %eax
333-
; X86-NEXT: adcl $0, %edx
311+
; X86-NEXT: addl %ebx, %eax
312+
; X86-NEXT: adcl %ecx, %edx
334313
; X86-NEXT: popl %esi
335314
; X86-NEXT: popl %edi
336315
; X86-NEXT: popl %ebx
337316
; X86-NEXT: retl
338317
;
339318
; X64-LABEL: test_lsb_i64:
340319
; X64: # %bb.0:
341-
; X64-NEXT: movq %rdi, %rcx
342-
; X64-NEXT: sarq %rcx
343-
; X64-NEXT: andl %esi, %edi
344320
; X64-NEXT: movq %rsi, %rax
345-
; X64-NEXT: sarq %rax
346-
; X64-NEXT: addq %rcx, %rax
347-
; X64-NEXT: andl $1, %edi
348-
; X64-NEXT: addq %rdi, %rax
321+
; X64-NEXT: andq %rdi, %rax
322+
; X64-NEXT: xorq %rdi, %rsi
323+
; X64-NEXT: sarq %rsi
324+
; X64-NEXT: addq %rsi, %rax
349325
; X64-NEXT: retq
350326
%s0 = ashr i64 %a0, 1
351327
%s1 = ashr i64 %a1, 1

llvm/test/CodeGen/X86/avgflooru-scalar.ll

Lines changed: 30 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -40,24 +40,18 @@ define i8 @test_lsb_i8(i8 %a0, i8 %a1) nounwind {
4040
; X86: # %bb.0:
4141
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
4242
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
43-
; X86-NEXT: movl %eax, %edx
44-
; X86-NEXT: shrb %dl
45-
; X86-NEXT: andb %cl, %al
46-
; X86-NEXT: shrb %cl
47-
; X86-NEXT: addb %dl, %cl
48-
; X86-NEXT: andb $1, %al
49-
; X86-NEXT: addb %cl, %al
43+
; X86-NEXT: addl %ecx, %eax
44+
; X86-NEXT: shrl %eax
45+
; X86-NEXT: # kill: def $al killed $al killed $eax
5046
; X86-NEXT: retl
5147
;
5248
; X64-LABEL: test_lsb_i8:
5349
; X64: # %bb.0:
54-
; X64-NEXT: movl %edi, %eax
55-
; X64-NEXT: shrb %al
56-
; X64-NEXT: andb %sil, %dil
57-
; X64-NEXT: shrb %sil
58-
; X64-NEXT: addb %sil, %al
59-
; X64-NEXT: andb $1, %dil
60-
; X64-NEXT: addb %dil, %al
50+
; X64-NEXT: movzbl %sil, %ecx
51+
; X64-NEXT: movzbl %dil, %eax
52+
; X64-NEXT: addl %ecx, %eax
53+
; X64-NEXT: shrl %eax
54+
; X64-NEXT: # kill: def $al killed $al killed $eax
6155
; X64-NEXT: retq
6256
%s0 = lshr i8 %a0, 1
6357
%s1 = lshr i8 %a1, 1
@@ -124,26 +118,17 @@ define i16 @test_lsb_i16(i16 %a0, i16 %a1) nounwind {
124118
; X86: # %bb.0:
125119
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
126120
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
127-
; X86-NEXT: movl %eax, %edx
128-
; X86-NEXT: shrl %edx
129-
; X86-NEXT: andl %ecx, %eax
130-
; X86-NEXT: shrl %ecx
131-
; X86-NEXT: addl %edx, %ecx
132-
; X86-NEXT: andl $1, %eax
133121
; X86-NEXT: addl %ecx, %eax
122+
; X86-NEXT: shrl %eax
134123
; X86-NEXT: # kill: def $ax killed $ax killed $eax
135124
; X86-NEXT: retl
136125
;
137126
; X64-LABEL: test_lsb_i16:
138127
; X64: # %bb.0:
139-
; X64-NEXT: movzwl %si, %eax
140-
; X64-NEXT: movzwl %di, %ecx
141-
; X64-NEXT: shrl %ecx
142-
; X64-NEXT: shrl %eax
128+
; X64-NEXT: movzwl %si, %ecx
129+
; X64-NEXT: movzwl %di, %eax
143130
; X64-NEXT: addl %ecx, %eax
144-
; X64-NEXT: andl %esi, %edi
145-
; X64-NEXT: andl $1, %edi
146-
; X64-NEXT: addl %edi, %eax
131+
; X64-NEXT: shrl %eax
147132
; X64-NEXT: # kill: def $ax killed $ax killed $eax
148133
; X64-NEXT: retq
149134
%s0 = lshr i16 %a0, 1
@@ -214,23 +199,19 @@ define i32 @test_lsb_i32(i32 %a0, i32 %a1) nounwind {
214199
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
215200
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
216201
; X86-NEXT: movl %eax, %edx
217-
; X86-NEXT: shrl %edx
218-
; X86-NEXT: andl %ecx, %eax
219-
; X86-NEXT: shrl %ecx
220-
; X86-NEXT: addl %edx, %ecx
221-
; X86-NEXT: andl $1, %eax
222-
; X86-NEXT: addl %ecx, %eax
202+
; X86-NEXT: andl %ecx, %edx
203+
; X86-NEXT: xorl %ecx, %eax
204+
; X86-NEXT: shrl %eax
205+
; X86-NEXT: addl %edx, %eax
223206
; X86-NEXT: retl
224207
;
225208
; X64-LABEL: test_lsb_i32:
226209
; X64: # %bb.0:
210+
; X64-NEXT: movl %esi, %ecx
227211
; X64-NEXT: movl %edi, %eax
228-
; X64-NEXT: shrl %eax
229-
; X64-NEXT: andl %esi, %edi
230-
; X64-NEXT: shrl %esi
231-
; X64-NEXT: addl %esi, %eax
232-
; X64-NEXT: andl $1, %edi
233-
; X64-NEXT: addl %edi, %eax
212+
; X64-NEXT: addq %rcx, %rax
213+
; X64-NEXT: shrq %rax
214+
; X64-NEXT: # kill: def $eax killed $eax killed $rax
234215
; X64-NEXT: retq
235216
%s0 = lshr i32 %a0, 1
236217
%s1 = lshr i32 %a1, 1
@@ -300,40 +281,23 @@ define i64 @test_fixed_i64(i64 %a0, i64 %a1) nounwind {
300281
define i64 @test_lsb_i64(i64 %a0, i64 %a1) nounwind {
301282
; X86-LABEL: test_lsb_i64:
302283
; X86: # %bb.0:
303-
; X86-NEXT: pushl %ebx
304-
; X86-NEXT: pushl %edi
305-
; X86-NEXT: pushl %esi
306-
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
307284
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
308285
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
309-
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
310-
; X86-NEXT: movl %edi, %ebx
311-
; X86-NEXT: shrl %ebx
312-
; X86-NEXT: shldl $31, %eax, %edi
313-
; X86-NEXT: movl %ecx, %edx
314-
; X86-NEXT: shrl %edx
315-
; X86-NEXT: shldl $31, %esi, %ecx
316-
; X86-NEXT: addl %edi, %ecx
317-
; X86-NEXT: adcl %ebx, %edx
318-
; X86-NEXT: andl %esi, %eax
319-
; X86-NEXT: andl $1, %eax
320-
; X86-NEXT: addl %ecx, %eax
321-
; X86-NEXT: adcl $0, %edx
322-
; X86-NEXT: popl %esi
323-
; X86-NEXT: popl %edi
324-
; X86-NEXT: popl %ebx
286+
; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx
287+
; X86-NEXT: adcl {{[0-9]+}}(%esp), %eax
288+
; X86-NEXT: setb %dl
289+
; X86-NEXT: movzbl %dl, %edx
290+
; X86-NEXT: shldl $31, %eax, %edx
291+
; X86-NEXT: shldl $31, %ecx, %eax
325292
; X86-NEXT: retl
326293
;
327294
; X64-LABEL: test_lsb_i64:
328295
; X64: # %bb.0:
329-
; X64-NEXT: movq %rdi, %rcx
330-
; X64-NEXT: shrq %rcx
331-
; X64-NEXT: andl %esi, %edi
332296
; X64-NEXT: movq %rsi, %rax
333-
; X64-NEXT: shrq %rax
334-
; X64-NEXT: addq %rcx, %rax
335-
; X64-NEXT: andl $1, %edi
336-
; X64-NEXT: addq %rdi, %rax
297+
; X64-NEXT: andq %rdi, %rax
298+
; X64-NEXT: xorq %rdi, %rsi
299+
; X64-NEXT: shrq %rsi
300+
; X64-NEXT: addq %rsi, %rax
337301
; X64-NEXT: retq
338302
%s0 = lshr i64 %a0, 1
339303
%s1 = lshr i64 %a1, 1

0 commit comments

Comments
 (0)