Skip to content

Commit 914b998

Browse files
committed
Add relevant debug output and test case
1 parent fd95d5f commit 914b998

File tree

2 files changed

+94
-1
lines changed

2 files changed

+94
-1
lines changed

llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1766,8 +1766,12 @@ bool ComplexDeinterleavingGraph::checkNodes() {
17661766

17671767
// We need a deinterleave node in order to guarantee that we're working with
17681768
// complex numbers.
1769-
if (!FoundDeinterleaveNode)
1769+
if (!FoundDeinterleaveNode) {
1770+
LLVM_DEBUG(
1771+
dbgs() << "Couldn't find a deinterleave node within the graph, cannot "
1772+
"guarantee safety during graph transformation.\n");
17701773
return false;
1774+
}
17711775

17721776
// Collect all instructions from roots to leaves
17731777
SmallPtrSet<Instruction *, 16> AllInstructions;

llvm/test/CodeGen/AArch64/complex-deinterleaving-crash.ll

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,3 +29,92 @@ bb193: ; preds = %bb173
2929
store volatile i32 0, ptr null, align 4
3030
unreachable
3131
}
32+
33+
; Check that the deinterleaving pass doesn't try to transform isolated patterns without a relevant deinterleaving pattern
34+
define i32 @check_deinterleaving_has_deinterleave(ptr %a) {
35+
; CHECK-LABEL: check_deinterleaving_has_deinterleave:
36+
; CHECK: // %bb.0: // %entry
37+
; CHECK-NEXT: movi v0.2d, #0000000000000000
38+
; CHECK-NEXT: movi v1.4s, #1
39+
; CHECK-NEXT: add x8, x0, #16
40+
; CHECK-NEXT: movi v3.2d, #0000000000000000
41+
; CHECK-NEXT: movi v2.2d, #0000000000000000
42+
; CHECK-NEXT: mov w9, #32 // =0x20
43+
; CHECK-NEXT: movi v4.2d, #0000000000000000
44+
; CHECK-NEXT: movi v5.2d, #0000000000000000
45+
; CHECK-NEXT: movi v7.2d, #0000000000000000
46+
; CHECK-NEXT: movi v6.2d, #0000000000000000
47+
; CHECK-NEXT: movi v16.2d, #0000000000000000
48+
; CHECK-NEXT: .LBB1_1: // %vector.body
49+
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
50+
; CHECK-NEXT: ldp q17, q18, [x8, #-16]
51+
; CHECK-NEXT: subs x9, x9, #32
52+
; CHECK-NEXT: add x8, x8, #32
53+
; CHECK-NEXT: cmeq v17.16b, v17.16b, #0
54+
; CHECK-NEXT: cmeq v18.16b, v18.16b, #0
55+
; CHECK-NEXT: ushll2 v19.8h, v17.16b, #0
56+
; CHECK-NEXT: ushll v17.8h, v17.8b, #0
57+
; CHECK-NEXT: ushll2 v20.8h, v18.16b, #0
58+
; CHECK-NEXT: ushll v18.8h, v18.8b, #0
59+
; CHECK-NEXT: ushll v21.4s, v19.4h, #0
60+
; CHECK-NEXT: ushll2 v19.4s, v19.8h, #0
61+
; CHECK-NEXT: ushll v22.4s, v17.4h, #0
62+
; CHECK-NEXT: ushll2 v17.4s, v17.8h, #0
63+
; CHECK-NEXT: ushll2 v23.4s, v20.8h, #0
64+
; CHECK-NEXT: ushll v24.4s, v18.4h, #0
65+
; CHECK-NEXT: ushll2 v18.4s, v18.8h, #0
66+
; CHECK-NEXT: ushll v20.4s, v20.4h, #0
67+
; CHECK-NEXT: and v21.16b, v21.16b, v1.16b
68+
; CHECK-NEXT: and v19.16b, v19.16b, v1.16b
69+
; CHECK-NEXT: and v22.16b, v22.16b, v1.16b
70+
; CHECK-NEXT: and v17.16b, v17.16b, v1.16b
71+
; CHECK-NEXT: and v23.16b, v23.16b, v1.16b
72+
; CHECK-NEXT: and v24.16b, v24.16b, v1.16b
73+
; CHECK-NEXT: and v18.16b, v18.16b, v1.16b
74+
; CHECK-NEXT: and v20.16b, v20.16b, v1.16b
75+
; CHECK-NEXT: add v4.4s, v4.4s, v19.4s
76+
; CHECK-NEXT: add v2.4s, v2.4s, v21.4s
77+
; CHECK-NEXT: add v0.4s, v0.4s, v22.4s
78+
; CHECK-NEXT: add v3.4s, v3.4s, v17.4s
79+
; CHECK-NEXT: add v16.4s, v16.4s, v23.4s
80+
; CHECK-NEXT: add v5.4s, v5.4s, v24.4s
81+
; CHECK-NEXT: add v6.4s, v6.4s, v20.4s
82+
; CHECK-NEXT: add v7.4s, v7.4s, v18.4s
83+
; CHECK-NEXT: b.ne .LBB1_1
84+
; CHECK-NEXT: // %bb.2: // %middle.block
85+
; CHECK-NEXT: add v1.4s, v7.4s, v3.4s
86+
; CHECK-NEXT: add v3.4s, v16.4s, v4.4s
87+
; CHECK-NEXT: add v0.4s, v5.4s, v0.4s
88+
; CHECK-NEXT: add v2.4s, v6.4s, v2.4s
89+
; CHECK-NEXT: add v1.4s, v1.4s, v3.4s
90+
; CHECK-NEXT: add v0.4s, v0.4s, v2.4s
91+
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
92+
; CHECK-NEXT: addv s0, v0.4s
93+
; CHECK-NEXT: fmov w0, s0
94+
; CHECK-NEXT: ret
95+
entry:
96+
br label %vector.body
97+
98+
vector.body: ; preds = %vector.body, %entry
99+
%index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
100+
%vec.phi = phi <16 x i32> [ zeroinitializer, %entry ], [ %9, %vector.body ]
101+
%vec.phi50 = phi <16 x i32> [ zeroinitializer, %entry ], [ %10, %vector.body ]
102+
%next.gep = getelementptr i8, ptr %a, i64 %index
103+
%4 = getelementptr i8, ptr %next.gep, i64 16
104+
%wide.load = load <16 x i8>, ptr %next.gep, align 1
105+
%wide.load51 = load <16 x i8>, ptr %4, align 1
106+
%5 = icmp eq <16 x i8> %wide.load, zeroinitializer
107+
%6 = icmp eq <16 x i8> %wide.load51, zeroinitializer
108+
%7 = zext <16 x i1> %5 to <16 x i32>
109+
%8 = zext <16 x i1> %6 to <16 x i32>
110+
%9 = add <16 x i32> %vec.phi, %7
111+
%10 = add <16 x i32> %vec.phi50, %8
112+
%index.next = add nuw i64 %index, 32
113+
%11 = icmp eq i64 %index.next, 32
114+
br i1 %11, label %middle.block, label %vector.body
115+
116+
middle.block:
117+
%bin.rdx = add <16 x i32> %10, %9
118+
%12 = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %bin.rdx)
119+
ret i32 %12
120+
}

0 commit comments

Comments
 (0)