@@ -29,3 +29,92 @@ bb193: ; preds = %bb173
2929 store volatile i32 0 , ptr null , align 4
3030 unreachable
3131}
32+
33+ ; Check that the deinterleaving pass doesn't try to transform isolated patterns without a relevant deinterleaving pattern
34+ define i32 @check_deinterleaving_has_deinterleave (ptr %a ) {
35+ ; CHECK-LABEL: check_deinterleaving_has_deinterleave:
36+ ; CHECK: // %bb.0: // %entry
37+ ; CHECK-NEXT: movi v0.2d, #0000000000000000
38+ ; CHECK-NEXT: movi v1.4s, #1
39+ ; CHECK-NEXT: add x8, x0, #16
40+ ; CHECK-NEXT: movi v3.2d, #0000000000000000
41+ ; CHECK-NEXT: movi v2.2d, #0000000000000000
42+ ; CHECK-NEXT: mov w9, #32 // =0x20
43+ ; CHECK-NEXT: movi v4.2d, #0000000000000000
44+ ; CHECK-NEXT: movi v5.2d, #0000000000000000
45+ ; CHECK-NEXT: movi v7.2d, #0000000000000000
46+ ; CHECK-NEXT: movi v6.2d, #0000000000000000
47+ ; CHECK-NEXT: movi v16.2d, #0000000000000000
48+ ; CHECK-NEXT: .LBB1_1: // %vector.body
49+ ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
50+ ; CHECK-NEXT: ldp q17, q18, [x8, #-16]
51+ ; CHECK-NEXT: subs x9, x9, #32
52+ ; CHECK-NEXT: add x8, x8, #32
53+ ; CHECK-NEXT: cmeq v17.16b, v17.16b, #0
54+ ; CHECK-NEXT: cmeq v18.16b, v18.16b, #0
55+ ; CHECK-NEXT: ushll2 v19.8h, v17.16b, #0
56+ ; CHECK-NEXT: ushll v17.8h, v17.8b, #0
57+ ; CHECK-NEXT: ushll2 v20.8h, v18.16b, #0
58+ ; CHECK-NEXT: ushll v18.8h, v18.8b, #0
59+ ; CHECK-NEXT: ushll v21.4s, v19.4h, #0
60+ ; CHECK-NEXT: ushll2 v19.4s, v19.8h, #0
61+ ; CHECK-NEXT: ushll v22.4s, v17.4h, #0
62+ ; CHECK-NEXT: ushll2 v17.4s, v17.8h, #0
63+ ; CHECK-NEXT: ushll2 v23.4s, v20.8h, #0
64+ ; CHECK-NEXT: ushll v24.4s, v18.4h, #0
65+ ; CHECK-NEXT: ushll2 v18.4s, v18.8h, #0
66+ ; CHECK-NEXT: ushll v20.4s, v20.4h, #0
67+ ; CHECK-NEXT: and v21.16b, v21.16b, v1.16b
68+ ; CHECK-NEXT: and v19.16b, v19.16b, v1.16b
69+ ; CHECK-NEXT: and v22.16b, v22.16b, v1.16b
70+ ; CHECK-NEXT: and v17.16b, v17.16b, v1.16b
71+ ; CHECK-NEXT: and v23.16b, v23.16b, v1.16b
72+ ; CHECK-NEXT: and v24.16b, v24.16b, v1.16b
73+ ; CHECK-NEXT: and v18.16b, v18.16b, v1.16b
74+ ; CHECK-NEXT: and v20.16b, v20.16b, v1.16b
75+ ; CHECK-NEXT: add v4.4s, v4.4s, v19.4s
76+ ; CHECK-NEXT: add v2.4s, v2.4s, v21.4s
77+ ; CHECK-NEXT: add v0.4s, v0.4s, v22.4s
78+ ; CHECK-NEXT: add v3.4s, v3.4s, v17.4s
79+ ; CHECK-NEXT: add v16.4s, v16.4s, v23.4s
80+ ; CHECK-NEXT: add v5.4s, v5.4s, v24.4s
81+ ; CHECK-NEXT: add v6.4s, v6.4s, v20.4s
82+ ; CHECK-NEXT: add v7.4s, v7.4s, v18.4s
83+ ; CHECK-NEXT: b.ne .LBB1_1
84+ ; CHECK-NEXT: // %bb.2: // %middle.block
85+ ; CHECK-NEXT: add v1.4s, v7.4s, v3.4s
86+ ; CHECK-NEXT: add v3.4s, v16.4s, v4.4s
87+ ; CHECK-NEXT: add v0.4s, v5.4s, v0.4s
88+ ; CHECK-NEXT: add v2.4s, v6.4s, v2.4s
89+ ; CHECK-NEXT: add v1.4s, v1.4s, v3.4s
90+ ; CHECK-NEXT: add v0.4s, v0.4s, v2.4s
91+ ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
92+ ; CHECK-NEXT: addv s0, v0.4s
93+ ; CHECK-NEXT: fmov w0, s0
94+ ; CHECK-NEXT: ret
95+ entry:
96+ br label %vector.body
97+
98+ vector.body: ; preds = %vector.body, %entry
99+ %index = phi i64 [ 0 , %entry ], [ %index.next , %vector.body ]
100+ %vec.phi = phi <16 x i32 > [ zeroinitializer , %entry ], [ %9 , %vector.body ]
101+ %vec.phi50 = phi <16 x i32 > [ zeroinitializer , %entry ], [ %10 , %vector.body ]
102+ %next.gep = getelementptr i8 , ptr %a , i64 %index
103+ %4 = getelementptr i8 , ptr %next.gep , i64 16
104+ %wide.load = load <16 x i8 >, ptr %next.gep , align 1
105+ %wide.load51 = load <16 x i8 >, ptr %4 , align 1
106+ %5 = icmp eq <16 x i8 > %wide.load , zeroinitializer
107+ %6 = icmp eq <16 x i8 > %wide.load51 , zeroinitializer
108+ %7 = zext <16 x i1 > %5 to <16 x i32 >
109+ %8 = zext <16 x i1 > %6 to <16 x i32 >
110+ %9 = add <16 x i32 > %vec.phi , %7
111+ %10 = add <16 x i32 > %vec.phi50 , %8
112+ %index.next = add nuw i64 %index , 32
113+ %11 = icmp eq i64 %index.next , 32
114+ br i1 %11 , label %middle.block , label %vector.body
115+
116+ middle.block:
117+ %bin.rdx = add <16 x i32 > %10 , %9
118+ %12 = tail call i32 @llvm.vector.reduce.add.v16i32 (<16 x i32 > %bin.rdx )
119+ ret i32 %12
120+ }
0 commit comments