Skip to content

Commit 032d1ae

Browse files
committed
8342498: Add test for Allocation elimination after use as alignment reference by SuperWord
Backport-of: f62fc4844125cc20a91dc2be39ba05a2d3aca8cf
1 parent 77ce004 commit 032d1ae

File tree

1 file changed

+183
-0
lines changed

1 file changed

+183
-0
lines changed
Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
/*
2+
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
3+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
*
5+
* This code is free software; you can redistribute it and/or modify it
6+
* under the terms of the GNU General Public License version 2 only, as
7+
* published by the Free Software Foundation.
8+
*
9+
* This code is distributed in the hope that it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12+
* version 2 for more details (a copy is included in the LICENSE file that
13+
* accompanied this code).
14+
*
15+
* You should have received a copy of the GNU General Public License version
16+
* 2 along with this work; if not, write to the Free Software Foundation,
17+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18+
*
19+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20+
* or visit www.oracle.com if you need additional information or have any
21+
* questions.
22+
*/
23+
24+
package compiler.loopopts.superword;
25+
26+
/*
27+
* @test
28+
* @bug 8342498
29+
* @summary Test SuperWord, when it aligns to field-store, and the corresponding allocation is eliminated.
30+
* @run driver compiler.loopopts.superword.TestEliminateAllocationWithCastP2XUse
31+
* @run main/othervm -Xbatch
32+
* -XX:-SplitIfBlocks -XX:LoopMaxUnroll=8
33+
* -XX:+UnlockDiagnosticVMOptions -XX:DominatorSearchLimit=45
34+
* compiler.loopopts.superword.TestEliminateAllocationWithCastP2XUse
35+
*/
36+
37+
public class TestEliminateAllocationWithCastP2XUse {
38+
public static void main(String args[]) {
39+
byte[] a = new byte[10_000];
40+
for (int i = 0; i < 10000; i++) {
41+
test(a);
42+
}
43+
}
44+
45+
// Summary:
46+
// - Some B allocations are detected as NoEscape, but cannot be removed because of a field load.
47+
// - The field loads cannot be LoadNode::split_through_phi because DominatorSearchLimit is too low
48+
// for the dominates query to look through some IfNode / IfProj path.
49+
// - We go into loop-opts.
50+
// - In theory, the Stores of B::offset would be moved out of the loop. But we disable
51+
// PhaseIdealLoop::try_move_store_after_loop by setting -XX:-SplitIfBlocks.
52+
// - The field loads are folded away because of some MaxUnroll trick, where the val constant folds to 1.
53+
// - SuperWord eventually kicks in, and vectorizes the array stores.
54+
// - Since some vectorization has happened, SuperWord wants to align the main loop with a memory reference
55+
// in the loop. The code here is not very smart, and just picks the memory reference that occurs the
56+
// most often. But the B::offset stores occur more often than the array stores, and so we align to
57+
// one of the B::offset stores. This inserts a CastP2X under the CheckCastPP of the B allocation.
58+
// - Once loop opts is over, we eventually go into macro expansion.
59+
// - During macro expansion, we now discover that the Allocations were marked NoEscape, and that by now
60+
// there are no field loads any more: yay, we can remove the allocation!
61+
// - ... except that there is the CastP2X from SuperWord alignment ...
62+
// - The Allocation removal code wants to pattern match the CastP2X as part of a GC barrier, but then
63+
// the pattern does not conform to the expecatation - it is after all from SuperWord. This leads to
64+
// an assert, and SIGSEGV in product, at least with G1GC.
65+
public static long test(byte[] a) {
66+
// Delay val == 1 until loop-opts, with MaxUnroll trick.
67+
int val = 0;
68+
for (int i = 0; i < 4; i++) {
69+
if ((i % 2) == 0) {
70+
val = 1;
71+
}
72+
}
73+
// during loop opts, we learn val == 1
74+
// But we don't know that during EscapeAnalysis (EA) yet.
75+
76+
// 9 Allocations, discovered as NoEscape during EA.
77+
B b1 = new B();
78+
B b2 = new B();
79+
B b3 = new B();
80+
B b4 = new B();
81+
B b5 = new B();
82+
B b6 = new B();
83+
B b7 = new B();
84+
B b8 = new B();
85+
B b9 = new B();
86+
87+
// Some path of IfNode / IfProj.
88+
// Only folds away once we know val == 1
89+
// This delays the LoadNode::split_through_phi, because it needs a dominates call
90+
// to succeed, but it cannot look through this path because we set -XX:DominatorSearchLimit=45
91+
// i.e. just a little too low to be able to look through.
92+
// Without the LoadNode::split_through_phi before the end of EA, the Allocation cannot yet be
93+
// removed, due to a "Field load", i.e. that Load for B::offset.
94+
// But later, this path can actually fold away, when we know that val == 1. At that point,
95+
// also the Load from B::offset folds away because LoadNode::split_through_phi succeeds
96+
// At that point the B allocations have no Loads any more, and can be removed... but this only
97+
// happens at macro expansion, after all loop opts.
98+
if (val == 1010) { throw new RuntimeException("never"); }
99+
if (val == 1020) { throw new RuntimeException("never"); }
100+
if (val == 1030) { throw new RuntimeException("never"); }
101+
if (val == 1040) { throw new RuntimeException("never"); }
102+
if (val == 1060) { throw new RuntimeException("never"); }
103+
if (val == 1070) { throw new RuntimeException("never"); }
104+
if (val == 1080) { throw new RuntimeException("never"); }
105+
if (val == 1090) { throw new RuntimeException("never"); }
106+
107+
if (val == 2010) { throw new RuntimeException("never"); }
108+
if (val == 2020) { throw new RuntimeException("never"); }
109+
if (val == 2030) { throw new RuntimeException("never"); }
110+
if (val == 2040) { throw new RuntimeException("never"); }
111+
if (val == 2060) { throw new RuntimeException("never"); }
112+
if (val == 2070) { throw new RuntimeException("never"); }
113+
if (val == 2080) { throw new RuntimeException("never"); }
114+
if (val == 2090) { throw new RuntimeException("never"); }
115+
116+
if (val == 3010) { throw new RuntimeException("never"); }
117+
if (val == 3020) { throw new RuntimeException("never"); }
118+
if (val == 3030) { throw new RuntimeException("never"); }
119+
if (val == 3040) { throw new RuntimeException("never"); }
120+
if (val == 3060) { throw new RuntimeException("never"); }
121+
if (val == 3070) { throw new RuntimeException("never"); }
122+
if (val == 3080) { throw new RuntimeException("never"); }
123+
if (val == 3090) { throw new RuntimeException("never"); }
124+
125+
if (val == 4010) { throw new RuntimeException("never"); }
126+
if (val == 4020) { throw new RuntimeException("never"); }
127+
if (val == 4030) { throw new RuntimeException("never"); }
128+
if (val == 4040) { throw new RuntimeException("never"); }
129+
if (val == 4060) { throw new RuntimeException("never"); }
130+
if (val == 4070) { throw new RuntimeException("never"); }
131+
if (val == 4080) { throw new RuntimeException("never"); }
132+
if (val == 4090) { throw new RuntimeException("never"); }
133+
134+
long mulVal = 1;
135+
for (int i = 0; i < a.length; i++) {
136+
mulVal *= 3;
137+
// We do some vector store, so that SuperWord succeeds, and creates the
138+
// alignment code, which emits the CastP2X.
139+
a[i]++;
140+
// But we also have 9 Stores for the B::offset.
141+
// SuperWord now sees more of these stores than of the array stores, and picks
142+
// one of the B::offset stores as the alignment reference... creating a CastP2X
143+
// for the CheckCastPP of the B allocation.
144+
b1.offset = mulVal;
145+
b2.offset = mulVal;
146+
b3.offset = mulVal;
147+
b4.offset = mulVal;
148+
b5.offset = mulVal;
149+
b6.offset = mulVal;
150+
b7.offset = mulVal;
151+
b8.offset = mulVal;
152+
b9.offset = mulVal;
153+
}
154+
155+
// This folds the loads away, once we know val == 1
156+
// That happens during loop-opts, so after EA, but before macro expansion.
157+
long ret = 0;
158+
if (val == 42) {
159+
ret = b1.offset +
160+
b2.offset +
161+
b3.offset +
162+
b4.offset +
163+
b5.offset +
164+
b6.offset +
165+
b7.offset +
166+
b8.offset +
167+
b9.offset;
168+
}
169+
170+
return ret;
171+
}
172+
173+
static class B {
174+
// Add padding so that the old SuperWord::can_create_pairs accepts the field store to B.offset
175+
long pad1 = 0; // at 16
176+
long pad2 = 0; // at 24
177+
long pad3 = 0; // at 32
178+
long pad4 = 0; // at 40
179+
long pad5 = 0; // at 48
180+
long pad6 = 0; // at 56
181+
long offset = 0; // offset at 64 bytes
182+
}
183+
}

0 commit comments

Comments
 (0)