|
| 1 | +/* |
| 2 | + * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. |
| 3 | + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| 4 | + * |
| 5 | + * This code is free software; you can redistribute it and/or modify it |
| 6 | + * under the terms of the GNU General Public License version 2 only, as |
| 7 | + * published by the Free Software Foundation. |
| 8 | + * |
| 9 | + * This code is distributed in the hope that it will be useful, but WITHOUT |
| 10 | + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 11 | + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 12 | + * version 2 for more details (a copy is included in the LICENSE file that |
| 13 | + * accompanied this code). |
| 14 | + * |
| 15 | + * You should have received a copy of the GNU General Public License version |
| 16 | + * 2 along with this work; if not, write to the Free Software Foundation, |
| 17 | + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| 18 | + * |
| 19 | + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
| 20 | + * or visit www.oracle.com if you need additional information or have any |
| 21 | + * questions. |
| 22 | + */ |
| 23 | + |
| 24 | +package compiler.loopopts.superword; |
| 25 | + |
| 26 | +/* |
| 27 | + * @test |
| 28 | + * @bug 8342498 |
| 29 | + * @summary Test SuperWord, when it aligns to field-store, and the corresponding allocation is eliminated. |
| 30 | + * @run driver compiler.loopopts.superword.TestEliminateAllocationWithCastP2XUse |
| 31 | + * @run main/othervm -Xbatch |
| 32 | + * -XX:-SplitIfBlocks -XX:LoopMaxUnroll=8 |
| 33 | + * -XX:+UnlockDiagnosticVMOptions -XX:DominatorSearchLimit=45 |
| 34 | + * compiler.loopopts.superword.TestEliminateAllocationWithCastP2XUse |
| 35 | + */ |
| 36 | + |
| 37 | +public class TestEliminateAllocationWithCastP2XUse { |
| 38 | + public static void main(String args[]) { |
| 39 | + byte[] a = new byte[10_000]; |
| 40 | + for (int i = 0; i < 10000; i++) { |
| 41 | + test(a); |
| 42 | + } |
| 43 | + } |
| 44 | + |
| 45 | + // Summary: |
| 46 | + // - Some B allocations are detected as NoEscape, but cannot be removed because of a field load. |
| 47 | + // - The field loads cannot be LoadNode::split_through_phi because DominatorSearchLimit is too low |
| 48 | + // for the dominates query to look through some IfNode / IfProj path. |
| 49 | + // - We go into loop-opts. |
| 50 | + // - In theory, the Stores of B::offset would be moved out of the loop. But we disable |
| 51 | + // PhaseIdealLoop::try_move_store_after_loop by setting -XX:-SplitIfBlocks. |
| 52 | + // - The field loads are folded away because of some MaxUnroll trick, where the val constant folds to 1. |
| 53 | + // - SuperWord eventually kicks in, and vectorizes the array stores. |
| 54 | + // - Since some vectorization has happened, SuperWord wants to align the main loop with a memory reference |
| 55 | + // in the loop. The code here is not very smart, and just picks the memory reference that occurs the |
| 56 | + // most often. But the B::offset stores occur more often than the array stores, and so we align to |
| 57 | + // one of the B::offset stores. This inserts a CastP2X under the CheckCastPP of the B allocation. |
| 58 | + // - Once loop opts is over, we eventually go into macro expansion. |
| 59 | + // - During macro expansion, we now discover that the Allocations were marked NoEscape, and that by now |
| 60 | + // there are no field loads any more: yay, we can remove the allocation! |
| 61 | + // - ... except that there is the CastP2X from SuperWord alignment ... |
| 62 | + // - The Allocation removal code wants to pattern match the CastP2X as part of a GC barrier, but then |
| 63 | + // the pattern does not conform to the expecatation - it is after all from SuperWord. This leads to |
| 64 | + // an assert, and SIGSEGV in product, at least with G1GC. |
| 65 | + public static long test(byte[] a) { |
| 66 | + // Delay val == 1 until loop-opts, with MaxUnroll trick. |
| 67 | + int val = 0; |
| 68 | + for (int i = 0; i < 4; i++) { |
| 69 | + if ((i % 2) == 0) { |
| 70 | + val = 1; |
| 71 | + } |
| 72 | + } |
| 73 | + // during loop opts, we learn val == 1 |
| 74 | + // But we don't know that during EscapeAnalysis (EA) yet. |
| 75 | + |
| 76 | + // 9 Allocations, discovered as NoEscape during EA. |
| 77 | + B b1 = new B(); |
| 78 | + B b2 = new B(); |
| 79 | + B b3 = new B(); |
| 80 | + B b4 = new B(); |
| 81 | + B b5 = new B(); |
| 82 | + B b6 = new B(); |
| 83 | + B b7 = new B(); |
| 84 | + B b8 = new B(); |
| 85 | + B b9 = new B(); |
| 86 | + |
| 87 | + // Some path of IfNode / IfProj. |
| 88 | + // Only folds away once we know val == 1 |
| 89 | + // This delays the LoadNode::split_through_phi, because it needs a dominates call |
| 90 | + // to succeed, but it cannot look through this path because we set -XX:DominatorSearchLimit=45 |
| 91 | + // i.e. just a little too low to be able to look through. |
| 92 | + // Without the LoadNode::split_through_phi before the end of EA, the Allocation cannot yet be |
| 93 | + // removed, due to a "Field load", i.e. that Load for B::offset. |
| 94 | + // But later, this path can actually fold away, when we know that val == 1. At that point, |
| 95 | + // also the Load from B::offset folds away because LoadNode::split_through_phi succeeds |
| 96 | + // At that point the B allocations have no Loads any more, and can be removed... but this only |
| 97 | + // happens at macro expansion, after all loop opts. |
| 98 | + if (val == 1010) { throw new RuntimeException("never"); } |
| 99 | + if (val == 1020) { throw new RuntimeException("never"); } |
| 100 | + if (val == 1030) { throw new RuntimeException("never"); } |
| 101 | + if (val == 1040) { throw new RuntimeException("never"); } |
| 102 | + if (val == 1060) { throw new RuntimeException("never"); } |
| 103 | + if (val == 1070) { throw new RuntimeException("never"); } |
| 104 | + if (val == 1080) { throw new RuntimeException("never"); } |
| 105 | + if (val == 1090) { throw new RuntimeException("never"); } |
| 106 | + |
| 107 | + if (val == 2010) { throw new RuntimeException("never"); } |
| 108 | + if (val == 2020) { throw new RuntimeException("never"); } |
| 109 | + if (val == 2030) { throw new RuntimeException("never"); } |
| 110 | + if (val == 2040) { throw new RuntimeException("never"); } |
| 111 | + if (val == 2060) { throw new RuntimeException("never"); } |
| 112 | + if (val == 2070) { throw new RuntimeException("never"); } |
| 113 | + if (val == 2080) { throw new RuntimeException("never"); } |
| 114 | + if (val == 2090) { throw new RuntimeException("never"); } |
| 115 | + |
| 116 | + if (val == 3010) { throw new RuntimeException("never"); } |
| 117 | + if (val == 3020) { throw new RuntimeException("never"); } |
| 118 | + if (val == 3030) { throw new RuntimeException("never"); } |
| 119 | + if (val == 3040) { throw new RuntimeException("never"); } |
| 120 | + if (val == 3060) { throw new RuntimeException("never"); } |
| 121 | + if (val == 3070) { throw new RuntimeException("never"); } |
| 122 | + if (val == 3080) { throw new RuntimeException("never"); } |
| 123 | + if (val == 3090) { throw new RuntimeException("never"); } |
| 124 | + |
| 125 | + if (val == 4010) { throw new RuntimeException("never"); } |
| 126 | + if (val == 4020) { throw new RuntimeException("never"); } |
| 127 | + if (val == 4030) { throw new RuntimeException("never"); } |
| 128 | + if (val == 4040) { throw new RuntimeException("never"); } |
| 129 | + if (val == 4060) { throw new RuntimeException("never"); } |
| 130 | + if (val == 4070) { throw new RuntimeException("never"); } |
| 131 | + if (val == 4080) { throw new RuntimeException("never"); } |
| 132 | + if (val == 4090) { throw new RuntimeException("never"); } |
| 133 | + |
| 134 | + long mulVal = 1; |
| 135 | + for (int i = 0; i < a.length; i++) { |
| 136 | + mulVal *= 3; |
| 137 | + // We do some vector store, so that SuperWord succeeds, and creates the |
| 138 | + // alignment code, which emits the CastP2X. |
| 139 | + a[i]++; |
| 140 | + // But we also have 9 Stores for the B::offset. |
| 141 | + // SuperWord now sees more of these stores than of the array stores, and picks |
| 142 | + // one of the B::offset stores as the alignment reference... creating a CastP2X |
| 143 | + // for the CheckCastPP of the B allocation. |
| 144 | + b1.offset = mulVal; |
| 145 | + b2.offset = mulVal; |
| 146 | + b3.offset = mulVal; |
| 147 | + b4.offset = mulVal; |
| 148 | + b5.offset = mulVal; |
| 149 | + b6.offset = mulVal; |
| 150 | + b7.offset = mulVal; |
| 151 | + b8.offset = mulVal; |
| 152 | + b9.offset = mulVal; |
| 153 | + } |
| 154 | + |
| 155 | + // This folds the loads away, once we know val == 1 |
| 156 | + // That happens during loop-opts, so after EA, but before macro expansion. |
| 157 | + long ret = 0; |
| 158 | + if (val == 42) { |
| 159 | + ret = b1.offset + |
| 160 | + b2.offset + |
| 161 | + b3.offset + |
| 162 | + b4.offset + |
| 163 | + b5.offset + |
| 164 | + b6.offset + |
| 165 | + b7.offset + |
| 166 | + b8.offset + |
| 167 | + b9.offset; |
| 168 | + } |
| 169 | + |
| 170 | + return ret; |
| 171 | + } |
| 172 | + |
| 173 | + static class B { |
| 174 | + // Add padding so that the old SuperWord::can_create_pairs accepts the field store to B.offset |
| 175 | + long pad1 = 0; // at 16 |
| 176 | + long pad2 = 0; // at 24 |
| 177 | + long pad3 = 0; // at 32 |
| 178 | + long pad4 = 0; // at 40 |
| 179 | + long pad5 = 0; // at 48 |
| 180 | + long pad6 = 0; // at 56 |
| 181 | + long offset = 0; // offset at 64 bytes |
| 182 | + } |
| 183 | +} |
0 commit comments