Skip to content

Commit 793115a

Browse files
Remove isLegalOrCustom check from DAG combine
This makes it so the changes are reflected in the tests, so that we can tell the DAG combine is actually happening. It has been replaced with a FIXME note saying to potentially add it back in when the rest of the implementation is complete.
1 parent bd7d333 commit 793115a

File tree

3 files changed

+277
-255
lines changed

3 files changed

+277
-255
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12536,10 +12536,9 @@ SDValue DAGCombiner::visitPARTIAL_REDUCE_MLA(SDNode *N) {
1253612536
EVT MulOpLHSVT = MulOpLHS.getValueType();
1253712537
if (MulOpLHSVT != MulOpRHS.getValueType())
1253812538
return SDValue();
12539-
// Only perform the DAG combine if there is custom lowering provided by the
12540-
// target
12541-
if (!TLI.isPartialReduceMLALegalOrCustom(N->getValueType(0), MulOpLHSVT))
12542-
return SDValue();
12539+
12540+
// FIXME: Add a check to only perform the DAG combine if there is lowering
12541+
// provided by the target
1254312542

1254412543
bool LHSIsSigned = ExtMulOpLHSOpcode == ISD::SIGN_EXTEND;
1254512544
bool RHSIsSigned = ExtMulOpRHSOpcode == ISD::SIGN_EXTEND;

llvm/test/CodeGen/AArch64/neon-partial-reduce-dot-product.ll

Lines changed: 82 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,15 @@ define <4 x i32> @udot(<4 x i32> %acc, <16 x i8> %u, <16 x i8> %s) {
1212
;
1313
; CHECK-NODOT-LABEL: udot:
1414
; CHECK-NODOT: // %bb.0:
15-
; CHECK-NODOT-NEXT: umull v3.8h, v2.8b, v1.8b
16-
; CHECK-NODOT-NEXT: umull2 v1.8h, v2.16b, v1.16b
17-
; CHECK-NODOT-NEXT: ushll v2.4s, v1.4h, #0
18-
; CHECK-NODOT-NEXT: uaddw v0.4s, v0.4s, v3.4h
19-
; CHECK-NODOT-NEXT: uaddw2 v2.4s, v2.4s, v3.8h
20-
; CHECK-NODOT-NEXT: uaddw2 v0.4s, v0.4s, v1.8h
21-
; CHECK-NODOT-NEXT: add v0.4s, v2.4s, v0.4s
15+
; CHECK-NODOT-NEXT: ushll v3.8h, v1.8b, #0
16+
; CHECK-NODOT-NEXT: ushll v4.8h, v2.8b, #0
17+
; CHECK-NODOT-NEXT: ushll2 v1.8h, v1.16b, #0
18+
; CHECK-NODOT-NEXT: ushll2 v2.8h, v2.16b, #0
19+
; CHECK-NODOT-NEXT: umlal v0.4s, v4.4h, v3.4h
20+
; CHECK-NODOT-NEXT: umull v5.4s, v2.4h, v1.4h
21+
; CHECK-NODOT-NEXT: umlal2 v0.4s, v2.8h, v1.8h
22+
; CHECK-NODOT-NEXT: umlal2 v5.4s, v4.8h, v3.8h
23+
; CHECK-NODOT-NEXT: add v0.4s, v5.4s, v0.4s
2224
; CHECK-NODOT-NEXT: ret
2325
%u.wide = zext <16 x i8> %u to <16 x i32>
2426
%s.wide = zext <16 x i8> %s to <16 x i32>
@@ -35,17 +37,19 @@ define <2 x i32> @udot_narrow(<2 x i32> %acc, <8 x i8> %u, <8 x i8> %s) {
3537
;
3638
; CHECK-NODOT-LABEL: udot_narrow:
3739
; CHECK-NODOT: // %bb.0:
38-
; CHECK-NODOT-NEXT: umull v1.8h, v2.8b, v1.8b
40+
; CHECK-NODOT-NEXT: ushll v1.8h, v1.8b, #0
41+
; CHECK-NODOT-NEXT: ushll v2.8h, v2.8b, #0
3942
; CHECK-NODOT-NEXT: // kill: def $d0 killed $d0 def $q0
40-
; CHECK-NODOT-NEXT: ushll v2.4s, v1.4h, #0
41-
; CHECK-NODOT-NEXT: ushll2 v3.4s, v1.8h, #0
42-
; CHECK-NODOT-NEXT: ext v4.16b, v1.16b, v1.16b, #8
43-
; CHECK-NODOT-NEXT: uaddw v0.4s, v0.4s, v1.4h
43+
; CHECK-NODOT-NEXT: umull v3.4s, v2.4h, v1.4h
44+
; CHECK-NODOT-NEXT: umull2 v4.4s, v2.8h, v1.8h
45+
; CHECK-NODOT-NEXT: ext v5.16b, v1.16b, v1.16b, #8
46+
; CHECK-NODOT-NEXT: ext v6.16b, v2.16b, v2.16b, #8
47+
; CHECK-NODOT-NEXT: umlal v0.4s, v2.4h, v1.4h
4448
; CHECK-NODOT-NEXT: ext v3.16b, v3.16b, v3.16b, #8
45-
; CHECK-NODOT-NEXT: ext v2.16b, v2.16b, v2.16b, #8
46-
; CHECK-NODOT-NEXT: add v0.2s, v3.2s, v0.2s
47-
; CHECK-NODOT-NEXT: uaddw v1.4s, v2.4s, v4.4h
49+
; CHECK-NODOT-NEXT: ext v1.16b, v4.16b, v4.16b, #8
50+
; CHECK-NODOT-NEXT: umlal v3.4s, v6.4h, v5.4h
4851
; CHECK-NODOT-NEXT: add v0.2s, v1.2s, v0.2s
52+
; CHECK-NODOT-NEXT: add v0.2s, v3.2s, v0.2s
4953
; CHECK-NODOT-NEXT: ret
5054
%u.wide = zext <8 x i8> %u to <8 x i32>
5155
%s.wide = zext <8 x i8> %s to <8 x i32>
@@ -62,13 +66,15 @@ define <4 x i32> @sdot(<4 x i32> %acc, <16 x i8> %u, <16 x i8> %s) {
6266
;
6367
; CHECK-NODOT-LABEL: sdot:
6468
; CHECK-NODOT: // %bb.0:
65-
; CHECK-NODOT-NEXT: smull v3.8h, v2.8b, v1.8b
66-
; CHECK-NODOT-NEXT: smull2 v1.8h, v2.16b, v1.16b
67-
; CHECK-NODOT-NEXT: sshll v2.4s, v1.4h, #0
68-
; CHECK-NODOT-NEXT: saddw v0.4s, v0.4s, v3.4h
69-
; CHECK-NODOT-NEXT: saddw2 v2.4s, v2.4s, v3.8h
70-
; CHECK-NODOT-NEXT: saddw2 v0.4s, v0.4s, v1.8h
71-
; CHECK-NODOT-NEXT: add v0.4s, v2.4s, v0.4s
69+
; CHECK-NODOT-NEXT: sshll v3.8h, v1.8b, #0
70+
; CHECK-NODOT-NEXT: sshll v4.8h, v2.8b, #0
71+
; CHECK-NODOT-NEXT: sshll2 v1.8h, v1.16b, #0
72+
; CHECK-NODOT-NEXT: sshll2 v2.8h, v2.16b, #0
73+
; CHECK-NODOT-NEXT: smlal v0.4s, v4.4h, v3.4h
74+
; CHECK-NODOT-NEXT: smull v5.4s, v2.4h, v1.4h
75+
; CHECK-NODOT-NEXT: smlal2 v0.4s, v2.8h, v1.8h
76+
; CHECK-NODOT-NEXT: smlal2 v5.4s, v4.8h, v3.8h
77+
; CHECK-NODOT-NEXT: add v0.4s, v5.4s, v0.4s
7278
; CHECK-NODOT-NEXT: ret
7379
%u.wide = sext <16 x i8> %u to <16 x i32>
7480
%s.wide = sext <16 x i8> %s to <16 x i32>
@@ -85,17 +91,19 @@ define <2 x i32> @sdot_narrow(<2 x i32> %acc, <8 x i8> %u, <8 x i8> %s) {
8591
;
8692
; CHECK-NODOT-LABEL: sdot_narrow:
8793
; CHECK-NODOT: // %bb.0:
88-
; CHECK-NODOT-NEXT: smull v1.8h, v2.8b, v1.8b
94+
; CHECK-NODOT-NEXT: sshll v1.8h, v1.8b, #0
95+
; CHECK-NODOT-NEXT: sshll v2.8h, v2.8b, #0
8996
; CHECK-NODOT-NEXT: // kill: def $d0 killed $d0 def $q0
90-
; CHECK-NODOT-NEXT: sshll v2.4s, v1.4h, #0
91-
; CHECK-NODOT-NEXT: sshll2 v3.4s, v1.8h, #0
92-
; CHECK-NODOT-NEXT: ext v4.16b, v1.16b, v1.16b, #8
93-
; CHECK-NODOT-NEXT: saddw v0.4s, v0.4s, v1.4h
97+
; CHECK-NODOT-NEXT: smull v3.4s, v2.4h, v1.4h
98+
; CHECK-NODOT-NEXT: smull2 v4.4s, v2.8h, v1.8h
99+
; CHECK-NODOT-NEXT: ext v5.16b, v1.16b, v1.16b, #8
100+
; CHECK-NODOT-NEXT: ext v6.16b, v2.16b, v2.16b, #8
101+
; CHECK-NODOT-NEXT: smlal v0.4s, v2.4h, v1.4h
94102
; CHECK-NODOT-NEXT: ext v3.16b, v3.16b, v3.16b, #8
95-
; CHECK-NODOT-NEXT: ext v2.16b, v2.16b, v2.16b, #8
96-
; CHECK-NODOT-NEXT: add v0.2s, v3.2s, v0.2s
97-
; CHECK-NODOT-NEXT: saddw v1.4s, v2.4s, v4.4h
103+
; CHECK-NODOT-NEXT: ext v1.16b, v4.16b, v4.16b, #8
104+
; CHECK-NODOT-NEXT: smlal v3.4s, v6.4h, v5.4h
98105
; CHECK-NODOT-NEXT: add v0.2s, v1.2s, v0.2s
106+
; CHECK-NODOT-NEXT: add v0.2s, v3.2s, v0.2s
99107
; CHECK-NODOT-NEXT: ret
100108
%u.wide = sext <8 x i8> %u to <8 x i32>
101109
%s.wide = sext <8 x i8> %s to <8 x i32>
@@ -223,19 +231,27 @@ define <4 x i64> @udot_8to64(<4 x i64> %acc, <16 x i8> %a, <16 x i8> %b) {
223231
;
224232
; CHECK-NODOT-LABEL: udot_8to64:
225233
; CHECK-NODOT: // %bb.0: // %entry
226-
; CHECK-NODOT-NEXT: umull v4.8h, v2.8b, v3.8b
227-
; CHECK-NODOT-NEXT: umull2 v2.8h, v2.16b, v3.16b
228-
; CHECK-NODOT-NEXT: ushll v3.4s, v4.4h, #0
229-
; CHECK-NODOT-NEXT: ushll v5.4s, v2.4h, #0
234+
; CHECK-NODOT-NEXT: ushll v4.8h, v3.8b, #0
235+
; CHECK-NODOT-NEXT: ushll v5.8h, v2.8b, #0
236+
; CHECK-NODOT-NEXT: ushll2 v3.8h, v3.16b, #0
237+
; CHECK-NODOT-NEXT: ushll2 v2.8h, v2.16b, #0
238+
; CHECK-NODOT-NEXT: ushll v6.4s, v4.4h, #0
239+
; CHECK-NODOT-NEXT: ushll v7.4s, v5.4h, #0
230240
; CHECK-NODOT-NEXT: ushll2 v4.4s, v4.8h, #0
231-
; CHECK-NODOT-NEXT: ushll2 v2.4s, v2.8h, #0
232-
; CHECK-NODOT-NEXT: uaddw2 v1.2d, v1.2d, v3.4s
233-
; CHECK-NODOT-NEXT: uaddw v0.2d, v0.2d, v3.2s
234-
; CHECK-NODOT-NEXT: uaddl2 v3.2d, v4.4s, v5.4s
235-
; CHECK-NODOT-NEXT: uaddl v4.2d, v4.2s, v5.2s
236-
; CHECK-NODOT-NEXT: uaddw2 v1.2d, v1.2d, v2.4s
237-
; CHECK-NODOT-NEXT: uaddw v0.2d, v0.2d, v2.2s
238-
; CHECK-NODOT-NEXT: add v1.2d, v3.2d, v1.2d
241+
; CHECK-NODOT-NEXT: ushll2 v5.4s, v5.8h, #0
242+
; CHECK-NODOT-NEXT: ushll2 v16.4s, v3.8h, #0
243+
; CHECK-NODOT-NEXT: ushll2 v17.4s, v2.8h, #0
244+
; CHECK-NODOT-NEXT: ushll v3.4s, v3.4h, #0
245+
; CHECK-NODOT-NEXT: ushll v2.4s, v2.4h, #0
246+
; CHECK-NODOT-NEXT: umlal2 v1.2d, v7.4s, v6.4s
247+
; CHECK-NODOT-NEXT: umlal v0.2d, v7.2s, v6.2s
248+
; CHECK-NODOT-NEXT: umull2 v18.2d, v5.4s, v4.4s
249+
; CHECK-NODOT-NEXT: umull v4.2d, v5.2s, v4.2s
250+
; CHECK-NODOT-NEXT: umlal2 v1.2d, v17.4s, v16.4s
251+
; CHECK-NODOT-NEXT: umlal v0.2d, v17.2s, v16.2s
252+
; CHECK-NODOT-NEXT: umlal2 v18.2d, v2.4s, v3.4s
253+
; CHECK-NODOT-NEXT: umlal v4.2d, v2.2s, v3.2s
254+
; CHECK-NODOT-NEXT: add v1.2d, v18.2d, v1.2d
239255
; CHECK-NODOT-NEXT: add v0.2d, v4.2d, v0.2d
240256
; CHECK-NODOT-NEXT: ret
241257
entry:
@@ -258,19 +274,27 @@ define <4 x i64> @sdot_8to64(<4 x i64> %acc, <16 x i8> %a, <16 x i8> %b){
258274
;
259275
; CHECK-NODOT-LABEL: sdot_8to64:
260276
; CHECK-NODOT: // %bb.0: // %entry
261-
; CHECK-NODOT-NEXT: smull v4.8h, v2.8b, v3.8b
262-
; CHECK-NODOT-NEXT: smull2 v2.8h, v2.16b, v3.16b
263-
; CHECK-NODOT-NEXT: sshll v3.4s, v4.4h, #0
264-
; CHECK-NODOT-NEXT: sshll v5.4s, v2.4h, #0
277+
; CHECK-NODOT-NEXT: sshll v4.8h, v3.8b, #0
278+
; CHECK-NODOT-NEXT: sshll v5.8h, v2.8b, #0
279+
; CHECK-NODOT-NEXT: sshll2 v3.8h, v3.16b, #0
280+
; CHECK-NODOT-NEXT: sshll2 v2.8h, v2.16b, #0
281+
; CHECK-NODOT-NEXT: sshll v6.4s, v4.4h, #0
282+
; CHECK-NODOT-NEXT: sshll v7.4s, v5.4h, #0
265283
; CHECK-NODOT-NEXT: sshll2 v4.4s, v4.8h, #0
266-
; CHECK-NODOT-NEXT: sshll2 v2.4s, v2.8h, #0
267-
; CHECK-NODOT-NEXT: saddw2 v1.2d, v1.2d, v3.4s
268-
; CHECK-NODOT-NEXT: saddw v0.2d, v0.2d, v3.2s
269-
; CHECK-NODOT-NEXT: saddl2 v3.2d, v4.4s, v5.4s
270-
; CHECK-NODOT-NEXT: saddl v4.2d, v4.2s, v5.2s
271-
; CHECK-NODOT-NEXT: saddw2 v1.2d, v1.2d, v2.4s
272-
; CHECK-NODOT-NEXT: saddw v0.2d, v0.2d, v2.2s
273-
; CHECK-NODOT-NEXT: add v1.2d, v3.2d, v1.2d
284+
; CHECK-NODOT-NEXT: sshll2 v5.4s, v5.8h, #0
285+
; CHECK-NODOT-NEXT: sshll2 v16.4s, v3.8h, #0
286+
; CHECK-NODOT-NEXT: sshll2 v17.4s, v2.8h, #0
287+
; CHECK-NODOT-NEXT: sshll v3.4s, v3.4h, #0
288+
; CHECK-NODOT-NEXT: sshll v2.4s, v2.4h, #0
289+
; CHECK-NODOT-NEXT: smlal2 v1.2d, v7.4s, v6.4s
290+
; CHECK-NODOT-NEXT: smlal v0.2d, v7.2s, v6.2s
291+
; CHECK-NODOT-NEXT: smull2 v18.2d, v5.4s, v4.4s
292+
; CHECK-NODOT-NEXT: smull v4.2d, v5.2s, v4.2s
293+
; CHECK-NODOT-NEXT: smlal2 v1.2d, v17.4s, v16.4s
294+
; CHECK-NODOT-NEXT: smlal v0.2d, v17.2s, v16.2s
295+
; CHECK-NODOT-NEXT: smlal2 v18.2d, v2.4s, v3.4s
296+
; CHECK-NODOT-NEXT: smlal v4.2d, v2.2s, v3.2s
297+
; CHECK-NODOT-NEXT: add v1.2d, v18.2d, v1.2d
274298
; CHECK-NODOT-NEXT: add v0.2d, v4.2d, v0.2d
275299
; CHECK-NODOT-NEXT: ret
276300
entry:
@@ -531,9 +555,10 @@ define <4 x i64> @sdot_no_bin_op_8to64(<4 x i64> %acc, <16 x i8> %a){
531555
define <4 x i32> @not_udot(<4 x i32> %acc, <8 x i8> %u, <8 x i8> %s) #0{
532556
; CHECK-LABEL: not_udot:
533557
; CHECK: // %bb.0:
534-
; CHECK-NEXT: umull v1.8h, v2.8b, v1.8b
535-
; CHECK-NEXT: uaddw v0.4s, v0.4s, v1.4h
536-
; CHECK-NEXT: uaddw2 v0.4s, v0.4s, v1.8h
558+
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
559+
; CHECK-NEXT: ushll v2.8h, v2.8b, #0
560+
; CHECK-NEXT: umlal v0.4s, v2.4h, v1.4h
561+
; CHECK-NEXT: umlal2 v0.4s, v2.8h, v1.8h
537562
; CHECK-NEXT: ret
538563
%u.wide = zext <8 x i8> %u to <8 x i32>
539564
%s.wide = zext <8 x i8> %s to <8 x i32>

0 commit comments

Comments
 (0)