Skip to content

Commit 7d6c902

Browse files
author
Xiaohong Gong
committed
8357726: Improve C2 to recognize counted loops with multiple casts in trip counter
Reviewed-by: chagedorn, epeter, galder
1 parent 5091057 commit 7d6c902

File tree

3 files changed

+275
-1
lines changed

3 files changed

+275
-1
lines changed

src/hotspot/share/opto/loopnode.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1689,7 +1689,9 @@ bool PhaseIdealLoop::is_counted_loop(Node* x, IdealLoopTree*&loop, BasicType iv_
16891689
return false;
16901690
}
16911691

1692-
if (xphi->Opcode() == Op_Cast(iv_bt)) {
1692+
// Iteratively uncast the loop induction variable
1693+
// until no more CastII/CastLL nodes are found.
1694+
while (xphi->Opcode() == Op_Cast(iv_bt)) {
16931695
xphi = xphi->in(1);
16941696
}
16951697

Lines changed: 195 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,195 @@
1+
/*
2+
* Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
*
5+
* This code is free software; you can redistribute it and/or modify it
6+
* under the terms of the GNU General Public License version 2 only, as
7+
* published by the Free Software Foundation.
8+
*
9+
* This code is distributed in the hope that it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12+
* version 2 for more details (a copy is included in the LICENSE file that
13+
* accompanied this code).
14+
*
15+
* You should have received a copy of the GNU General Public License version
16+
* 2 along with this work; if not, write to the Free Software Foundation,
17+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18+
*
19+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20+
* or visit www.oracle.com if you need additional information or have any
21+
* questions.
22+
*/
23+
24+
package compiler.loopopts;
25+
26+
import java.util.Arrays;
27+
import java.util.Objects;
28+
29+
import compiler.lib.generators.*;
30+
import compiler.lib.ir_framework.*;
31+
import jdk.test.lib.Asserts;
32+
33+
/*
34+
* @test
35+
* @bug 8357726
36+
* @summary Improve C2 to recognize counted loops with multiple casts in trip counter
37+
* @library /test/lib /
38+
* @run driver compiler.loopopts.TestCountedLoopCastIV DisableUnroll
39+
* @run driver compiler.loopopts.TestCountedLoopCastIV
40+
*/
41+
42+
public class TestCountedLoopCastIV {
43+
private static final int LEN = 1024;
44+
private static final Generators random = Generators.G;
45+
46+
private static int[] in;
47+
private static int[] out;
48+
49+
static {
50+
in = new int[LEN];
51+
out = new int[LEN];
52+
random.fill(random.ints(), in);
53+
}
54+
55+
private static void cleanUp() {
56+
Arrays.fill(out, 0);
57+
}
58+
59+
private static void verify(int[] ref, int[] res, int start,
60+
int limit, int stride,
61+
int in_offset, int out_offset) {
62+
for (int i = start; i < limit; i += stride) {
63+
Asserts.assertEquals(ref[i + in_offset], res[i + out_offset]);
64+
}
65+
}
66+
67+
// Test a counted loop with two explicit range checkes
68+
// which will create CastIINodes for loop induction variable.
69+
// In this case, the loop start, limit and stride are
70+
// all constants.
71+
//
72+
// The first IR check with "-XX:LoopUnrollLimit=0" makes sure
73+
// the loop is transformed into exactly one CountedLoopNode,
74+
// verifying the CastII recognition works correctly.
75+
//
76+
// The second IR check ensures the optimization works properly
77+
// with default vm settings.
78+
//
79+
@Test
80+
@IR(counts = {IRNode.COUNTED_LOOP, "1" }, applyIf = {"LoopUnrollLimit", "0"})
81+
@IR(counts = {IRNode.COUNTED_LOOP, ">0" })
82+
static void test1() {
83+
for (int i = 0; i < LEN; i += 16) {
84+
Objects.checkIndex(i, LEN - 3);
85+
int a = in[i + 3];
86+
Objects.checkIndex(i, LEN - 15);
87+
out[i + 15] = a;
88+
}
89+
}
90+
91+
@Run(test = "test1")
92+
public static void runTest1() {
93+
test1();
94+
verify(in, out, 0, LEN, 16, 3, 15);
95+
}
96+
97+
// Similar to test1, but the loop limit is a variable.
98+
@Test
99+
@IR(counts = {IRNode.COUNTED_LOOP, "1" }, applyIf = {"LoopUnrollLimit", "0"})
100+
@IR(counts = {IRNode.COUNTED_LOOP, ">0" })
101+
static void test2(int limit) {
102+
for (int i = 0; i < limit; i += 16) {
103+
Objects.checkIndex(i, LEN - 3);
104+
int a = in[i + 3];
105+
Objects.checkIndex(i, LEN - 15);
106+
out[i + 15] = a;
107+
}
108+
}
109+
110+
@Run(test = "test2")
111+
private void runTest2() {
112+
cleanUp();
113+
test2(100);
114+
verify(in, out, 0, 100, 16, 3, 15);
115+
116+
cleanUp();
117+
test2(500);
118+
verify(in, out, 0, 500, 16, 3, 15);
119+
120+
cleanUp();
121+
test2(LEN);
122+
verify(in, out, 0, LEN, 16, 3, 15);
123+
}
124+
125+
// Similar to test1 and test2, but the loop is a
126+
// while loop with a variable start and limit.
127+
@Test
128+
@IR(counts = {IRNode.COUNTED_LOOP, "1" }, applyIf = {"LoopUnrollLimit", "0"})
129+
@IR(counts = {IRNode.COUNTED_LOOP, ">0" })
130+
static void test3(int start, int limit) {
131+
int i = start;
132+
while (i < limit) {
133+
Objects.checkIndex(i, LEN);
134+
int a = in[i];
135+
Objects.checkIndex(i, LEN - 3);
136+
out[i + 3] = a;
137+
i++;
138+
}
139+
}
140+
141+
@Run(test = "test3")
142+
private void runTest3() {
143+
cleanUp();
144+
test3(0, 100);
145+
verify(in, out, 0, 100, 1, 0, 3);
146+
147+
cleanUp();
148+
test3(128, 500);
149+
verify(in, out, 128, 500, 1, 0, 3);
150+
151+
cleanUp();
152+
test3(LEN - 128, LEN - 3);
153+
verify(in, out, LEN - 128, LEN - 3, 1, 0, 3);
154+
}
155+
156+
// Similar to test3, but the type of induction variable
157+
// is long.
158+
@Test
159+
@IR(counts = {IRNode.COUNTED_LOOP, "1" }, applyIf = {"LoopUnrollLimit", "0"})
160+
@IR(counts = {IRNode.COUNTED_LOOP, ">0" })
161+
static void test4(long start, long limit) {
162+
for (long i = start; i < limit; i++) {
163+
Objects.checkIndex(i, LEN);
164+
int a = in[(int) i];
165+
Objects.checkIndex(i, LEN - 3);
166+
out[(int) i + 3] = a;
167+
}
168+
}
169+
170+
@Run(test = "test4")
171+
private void runTest4() {
172+
cleanUp();
173+
test3(0, 100);
174+
verify(in, out, 0, 100, 1, 0, 3);
175+
176+
cleanUp();
177+
test3(128, 500);
178+
verify(in, out, 128, 500, 1, 0, 3);
179+
180+
cleanUp();
181+
test3(LEN - 128, LEN - 3);
182+
verify(in, out, LEN - 128, LEN - 3, 1, 0, 3);
183+
}
184+
185+
public static void main(String[] args) {
186+
if (args != null && args.length > 0 && args[0].equals("DisableUnroll")) {
187+
TestFramework.runWithFlags("-XX:LoopUnrollLimit=0");
188+
} else {
189+
if (args != null && args.length != 0) {
190+
throw new RuntimeException("Unexpected args");
191+
}
192+
TestFramework.run();
193+
}
194+
}
195+
}
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
/*
2+
* Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
*
5+
* This code is free software; you can redistribute it and/or modify it
6+
* under the terms of the GNU General Public License version 2 only, as
7+
* published by the Free Software Foundation.
8+
*
9+
* This code is distributed in the hope that it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12+
* version 2 for more details (a copy is included in the LICENSE file that
13+
* accompanied this code).
14+
*
15+
* You should have received a copy of the GNU General Public License version
16+
* 2 along with this work; if not, write to the Free Software Foundation,
17+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18+
*
19+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20+
* or visit www.oracle.com if you need additional information or have any
21+
* questions.
22+
*/
23+
24+
package org.openjdk.bench.vm.compiler;
25+
26+
import org.openjdk.jmh.annotations.*;
27+
28+
import java.util.Objects;
29+
import java.util.concurrent.TimeUnit;
30+
31+
@Warmup(iterations = 3, time = 1)
32+
@Measurement(iterations = 5, time = 1)
33+
@BenchmarkMode(Mode.Throughput)
34+
@OutputTimeUnit(TimeUnit.SECONDS)
35+
@State(Scope.Thread)
36+
@Fork(value=3)
37+
public class CountedLoopCastIV {
38+
@Param({"1024", "1536", "2048"})
39+
private int limit;
40+
41+
private static final int LEN = 2048;
42+
private int start;
43+
private int[] arr;
44+
45+
@Setup
46+
public void init() {
47+
arr = new int[LEN];
48+
for (int i = 0; i < LEN; i++) {
49+
arr[i] = i;
50+
}
51+
52+
start = 0;
53+
limit = Math.min(limit, LEN - 4);
54+
}
55+
56+
@Benchmark
57+
public void loop_iv_int() {
58+
int i = start;
59+
while (i < limit) {
60+
Objects.checkIndex(i, LEN - 1);
61+
int a = arr[i + 1];
62+
Objects.checkIndex(i, LEN - 3);
63+
arr[i + 3] = a;
64+
i++;
65+
}
66+
}
67+
68+
@Benchmark
69+
public void loop_iv_long() {
70+
for (long i = start; i < limit; i++) {
71+
Objects.checkIndex(i, LEN - 1);
72+
int a = arr[(int)i + 1];
73+
Objects.checkIndex(i, LEN - 3);
74+
arr[(int)i + 3] = a;
75+
}
76+
}
77+
}

0 commit comments

Comments
 (0)