Skip to content

Commit 2cda0d5

Browse files
committed
GROOVY-10307: add targeted JMH benchmarks for SwitchPoint invalidation regression
1 parent 8a40250 commit 2cda0d5

File tree

3 files changed

+1196
-0
lines changed

3 files changed

+1196
-0
lines changed
Lines changed: 313 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,313 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.apache.groovy.perf.grails
20+
21+
import groovy.lang.ExpandoMetaClass
22+
import groovy.lang.GroovySystem
23+
24+
import org.openjdk.jmh.annotations.*
25+
import org.openjdk.jmh.infra.Blackhole
26+
27+
import java.util.concurrent.TimeUnit
28+
29+
/**
30+
* Tests the core SwitchPoint invalidation overhead that causes the
31+
* GROOVY-10307 performance regression in Grails applications.
32+
*
33+
* The key insight: in Groovy 4's indy-only dispatch, ALL call sites
34+
* share a single global SwitchPoint. When ANY metaclass changes
35+
* (even on an unrelated type), the SwitchPoint is invalidated and
36+
* the JVM must deoptimize ALL compiled call sites. This causes:
37+
*
38+
* <ul>
39+
* <li>nmethod invalidation (compiled code thrown away)</li>
40+
* <li>Fallback to interpreter for affected call sites</li>
41+
* <li>Re-compilation after sufficient invocations</li>
42+
* <li>Repeated OSR bailouts in tight loops</li>
43+
* </ul>
44+
*
45+
* This benchmark reproduces the 57x regression ratio observed in
46+
* the groovy-indy-performance test suite by exercising cross-type
47+
* invalidation at various frequencies.
48+
*
49+
* @see <a href="https://issues.apache.org/jira/browse/GROOVY-10307">GROOVY-10307</a>
50+
*/
51+
@Warmup(iterations = 3, time = 2, timeUnit = TimeUnit.SECONDS)
52+
@Measurement(iterations = 5, time = 2, timeUnit = TimeUnit.SECONDS)
53+
@Fork(2)
54+
@BenchmarkMode(Mode.AverageTime)
55+
@OutputTimeUnit(TimeUnit.MILLISECONDS)
56+
@State(Scope.Thread)
57+
class CallSiteInvalidationBench {
58+
static final int ITERATIONS = 100_000
59+
60+
// Hot path types - methods on these are called in tight loops
61+
static class HotTarget {
62+
int value = 42
63+
int compute() { value * 2 }
64+
String describe() { "v=$value" }
65+
}
66+
67+
static class HotTargetB {
68+
int count = 10
69+
int getCount() { count }
70+
}
71+
72+
static class HotTargetC {
73+
List items = [1, 2, 3]
74+
int itemCount() { items.size() }
75+
}
76+
77+
// Cold type - its metaclass is modified to trigger global invalidation
78+
// but its methods are NOT called in the hot loop
79+
static class ColdType {
80+
String label = "cold"
81+
}
82+
83+
HotTarget hotTarget
84+
HotTargetB hotTargetB
85+
HotTargetC hotTargetC
86+
List<Integer> sampleList
87+
88+
@Setup(Level.Iteration)
89+
void setup() {
90+
GroovySystem.metaClassRegistry.removeMetaClass(HotTarget)
91+
GroovySystem.metaClassRegistry.removeMetaClass(HotTargetB)
92+
GroovySystem.metaClassRegistry.removeMetaClass(HotTargetC)
93+
GroovySystem.metaClassRegistry.removeMetaClass(ColdType)
94+
hotTarget = new HotTarget()
95+
hotTargetB = new HotTargetB()
96+
hotTargetC = new HotTargetC()
97+
sampleList = [1, 2, 3, 4, 5]
98+
}
99+
100+
// ===== SINGLE CALL SITE BASELINES =====
101+
102+
/**
103+
* Baseline: single method call in tight loop, no invalidation.
104+
* The call site stabilizes after JIT compilation and stays
105+
* monomorphic. This is the best-case scenario for indy dispatch.
106+
*/
107+
@Benchmark
108+
void baselineHotLoop(Blackhole bh) {
109+
int sum = 0
110+
for (int i = 0; i < ITERATIONS; i++) {
111+
sum += hotTarget.compute()
112+
}
113+
bh.consume(sum)
114+
}
115+
116+
/**
117+
* Baseline: list.size() in tight loop, no invalidation.
118+
* Control for the cross-type invalidation benchmarks below.
119+
*/
120+
@Benchmark
121+
void baselineListSize(Blackhole bh) {
122+
int sum = 0
123+
for (int i = 0; i < ITERATIONS; i++) {
124+
sum += sampleList.size()
125+
}
126+
bh.consume(sum)
127+
}
128+
129+
// ===== CROSS-TYPE INVALIDATION (the core regression pattern) =====
130+
131+
/**
132+
* Cross-type invalidation at medium frequency (every 1000 calls).
133+
*
134+
* The hot loop calls hotTarget.compute(), but ColdType's metaclass
135+
* is modified periodically. Because the SwitchPoint is global,
136+
* modifying ColdType invalidates the hotTarget.compute() call site.
137+
*
138+
* This is the fundamental Grails pain point: loading one plugin's
139+
* domain classes (metaclass changes) degrades call site performance
140+
* for all other components.
141+
*/
142+
@Benchmark
143+
void crossTypeInvalidationEvery1000(Blackhole bh) {
144+
int sum = 0
145+
for (int i = 0; i < ITERATIONS; i++) {
146+
sum += hotTarget.compute()
147+
if (i % 1000 == 0) {
148+
ColdType.metaClass."dynamic${i % 5}" = { -> i }
149+
}
150+
}
151+
bh.consume(sum)
152+
}
153+
154+
/**
155+
* Cross-type invalidation at high frequency (every 100 calls).
156+
* Simulates aggressive framework initialization where metaclass
157+
* modifications happen rapidly. This gives call sites almost no
158+
* time to stabilize between invalidations.
159+
*/
160+
@Benchmark
161+
void crossTypeInvalidationEvery100(Blackhole bh) {
162+
int sum = 0
163+
for (int i = 0; i < ITERATIONS; i++) {
164+
sum += hotTarget.compute()
165+
if (i % 100 == 0) {
166+
ColdType.metaClass."dynamic${i % 5}" = { -> i }
167+
}
168+
}
169+
bh.consume(sum)
170+
}
171+
172+
/**
173+
* Cross-type invalidation at low frequency (every 10000 calls).
174+
* Call sites have more time to stabilize between invalidations.
175+
* This represents post-startup behavior where occasional metaclass
176+
* changes still occur (e.g., lazy plugin loading).
177+
*/
178+
@Benchmark
179+
void crossTypeInvalidationEvery10000(Blackhole bh) {
180+
int sum = 0
181+
for (int i = 0; i < ITERATIONS; i++) {
182+
sum += hotTarget.compute()
183+
if (i % 10000 == 0) {
184+
ColdType.metaClass."dynamic${i % 5}" = { -> i }
185+
}
186+
}
187+
bh.consume(sum)
188+
}
189+
190+
/**
191+
* Cross-type invalidation on list.size() - reproduces the exact
192+
* pattern from the groovy-indy-performance stress test that
193+
* demonstrated a 57.92x regression ratio.
194+
*/
195+
@Benchmark
196+
void listSizeWithCrossTypeInvalidation(Blackhole bh) {
197+
int sum = 0
198+
for (int i = 0; i < ITERATIONS; i++) {
199+
sum += sampleList.size()
200+
if (i % 1000 == 0) {
201+
ColdType.metaClass."dynamic${i % 5}" = { -> i }
202+
}
203+
}
204+
bh.consume(sum)
205+
}
206+
207+
// ===== SAME-TYPE INVALIDATION =====
208+
209+
/**
210+
* Same-type invalidation - modifying the metaclass of the type
211+
* whose methods are being called. This is more expensive than
212+
* cross-type because the call site guard check also fails.
213+
*/
214+
@Benchmark
215+
void sameTypeInvalidationEvery1000(Blackhole bh) {
216+
int sum = 0
217+
for (int i = 0; i < ITERATIONS; i++) {
218+
sum += hotTarget.compute()
219+
if (i % 1000 == 0) {
220+
HotTarget.metaClass."dynamic${i % 5}" = { -> i }
221+
}
222+
}
223+
bh.consume(sum)
224+
}
225+
226+
// ===== MULTIPLE CALL SITES =====
227+
228+
/**
229+
* Baseline: five distinct method calls across three types.
230+
* Five separate call sites, all stable. Control for the
231+
* invalidation variant below.
232+
*/
233+
@Benchmark
234+
void baselineMultipleCallSites(Blackhole bh) {
235+
int sum = 0
236+
for (int i = 0; i < ITERATIONS; i++) {
237+
sum += hotTarget.compute()
238+
sum += hotTarget.describe().length()
239+
sum += hotTargetB.getCount()
240+
sum += hotTargetC.itemCount()
241+
sum += sampleList.size()
242+
}
243+
bh.consume(sum)
244+
}
245+
246+
/**
247+
* Five distinct call sites with periodic cross-type invalidation.
248+
* When ColdType's metaclass changes, ALL five call sites must
249+
* relink. This tests the scaling behavior: with more active call
250+
* sites, each invalidation is more expensive because more compiled
251+
* code must be thrown away and recompiled.
252+
*
253+
* In a real Grails app, there are thousands of active call sites.
254+
*/
255+
@Benchmark
256+
void multipleCallSitesWithInvalidation(Blackhole bh) {
257+
int sum = 0
258+
for (int i = 0; i < ITERATIONS; i++) {
259+
sum += hotTarget.compute()
260+
sum += hotTarget.describe().length()
261+
sum += hotTargetB.getCount()
262+
sum += hotTargetC.itemCount()
263+
sum += sampleList.size()
264+
if (i % 1000 == 0) {
265+
ColdType.metaClass."dynamic${i % 5}" = { -> i }
266+
}
267+
}
268+
bh.consume(sum)
269+
}
270+
271+
// ===== BURST THEN STEADY STATE =====
272+
273+
/**
274+
* Burst of metaclass changes followed by steady-state calls.
275+
* Simulates Grails application startup: the framework modifies
276+
* many metaclasses while loading plugins and domain classes,
277+
* then enters steady-state request handling.
278+
*
279+
* Measures how quickly call sites recover after the burst ends.
280+
*/
281+
@Benchmark
282+
void burstThenSteadyState(Blackhole bh) {
283+
// Phase 1: Burst of metaclass changes (framework startup)
284+
for (int i = 0; i < 100; i++) {
285+
ColdType.metaClass."startup${i % 20}" = { -> i }
286+
}
287+
288+
// Phase 2: Steady-state method calls (request handling)
289+
int sum = 0
290+
for (int i = 0; i < ITERATIONS; i++) {
291+
sum += hotTarget.compute()
292+
sum += hotTargetB.getCount()
293+
sum += sampleList.size()
294+
}
295+
bh.consume(sum)
296+
}
297+
298+
/**
299+
* Baseline for burst comparison - same steady-state work
300+
* without any preceding burst. Shows the recovery cost of
301+
* the burst phase.
302+
*/
303+
@Benchmark
304+
void baselineSteadyStateNoBurst(Blackhole bh) {
305+
int sum = 0
306+
for (int i = 0; i < ITERATIONS; i++) {
307+
sum += hotTarget.compute()
308+
sum += hotTargetB.getCount()
309+
sum += sampleList.size()
310+
}
311+
bh.consume(sum)
312+
}
313+
}

0 commit comments

Comments
 (0)