Skip to content

Commit 46bf5e8

Browse files
committed
GROOVY-10307: add targeted JMH benchmarks for SwitchPoint invalidation regression
1 parent 8a40250 commit 46bf5e8

File tree

3 files changed

+1198
-0
lines changed

3 files changed

+1198
-0
lines changed
Lines changed: 316 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,316 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.apache.groovy.perf.grails
20+
21+
import groovy.lang.ExpandoMetaClass
22+
import groovy.lang.GroovySystem
23+
24+
import org.openjdk.jmh.annotations.*
25+
import org.openjdk.jmh.infra.Blackhole
26+
27+
import java.util.concurrent.TimeUnit
28+
29+
/**
30+
* Tests the core SwitchPoint invalidation overhead that causes the
31+
* GROOVY-10307 performance regression in Grails applications.
32+
*
33+
* The key insight: in Groovy's indy dispatch, ALL call sites share a
34+
* single global SwitchPoint. When ANY metaclass changes (even on an
35+
* unrelated type), the SwitchPoint is invalidated and the JVM must
36+
* deoptimize ALL compiled call sites. This causes:
37+
*
38+
* <ul>
39+
* <li>nmethod invalidation (compiled code thrown away)</li>
40+
* <li>Fallback to interpreter for affected call sites</li>
41+
* <li>Re-compilation after sufficient invocations</li>
42+
* <li>Repeated OSR bailouts in tight loops</li>
43+
* </ul>
44+
*
45+
* On Groovy 4 (GROOVY_4_0_X), there is no mitigation for this cycle.
46+
* Groovy 6 (master) adds a fallback round cutoff after 100 rounds that
47+
* caps the worst-case damage but does not eliminate the overhead.
48+
*
49+
* This benchmark exercises cross-type invalidation at various
50+
* frequencies to quantify the overhead under different metaclass
51+
* change rates.
52+
*
53+
* @see <a href="https://issues.apache.org/jira/browse/GROOVY-10307">GROOVY-10307</a>
54+
*/
55+
@Warmup(iterations = 3, time = 2, timeUnit = TimeUnit.SECONDS)
56+
@Measurement(iterations = 5, time = 2, timeUnit = TimeUnit.SECONDS)
57+
@Fork(2)
58+
@BenchmarkMode(Mode.AverageTime)
59+
@OutputTimeUnit(TimeUnit.MILLISECONDS)
60+
@State(Scope.Thread)
61+
class CallSiteInvalidationBench {
62+
static final int ITERATIONS = 100_000
63+
64+
// Hot path types - methods on these are called in tight loops
65+
static class HotTarget {
66+
int value = 42
67+
int compute() { value * 2 }
68+
String describe() { "v=$value" }
69+
}
70+
71+
static class HotTargetB {
72+
int count = 10
73+
int getCount() { count }
74+
}
75+
76+
static class HotTargetC {
77+
List items = [1, 2, 3]
78+
int itemCount() { items.size() }
79+
}
80+
81+
// Cold type - its metaclass is modified to trigger global invalidation
82+
// but its methods are NOT called in the hot loop
83+
static class ColdType {
84+
String label = "cold"
85+
}
86+
87+
HotTarget hotTarget
88+
HotTargetB hotTargetB
89+
HotTargetC hotTargetC
90+
List<Integer> sampleList
91+
92+
@Setup(Level.Iteration)
93+
void setup() {
94+
GroovySystem.metaClassRegistry.removeMetaClass(HotTarget)
95+
GroovySystem.metaClassRegistry.removeMetaClass(HotTargetB)
96+
GroovySystem.metaClassRegistry.removeMetaClass(HotTargetC)
97+
GroovySystem.metaClassRegistry.removeMetaClass(ColdType)
98+
hotTarget = new HotTarget()
99+
hotTargetB = new HotTargetB()
100+
hotTargetC = new HotTargetC()
101+
sampleList = [1, 2, 3, 4, 5]
102+
}
103+
104+
// ===== SINGLE CALL SITE BASELINES =====
105+
106+
/**
107+
* Baseline: single method call in tight loop, no invalidation.
108+
* The call site stabilizes after JIT compilation and stays
109+
* monomorphic. This is the best-case scenario for indy dispatch.
110+
*/
111+
@Benchmark
112+
void baselineHotLoop(Blackhole bh) {
113+
int sum = 0
114+
for (int i = 0; i < ITERATIONS; i++) {
115+
sum += hotTarget.compute()
116+
}
117+
bh.consume(sum)
118+
}
119+
120+
/**
121+
* Baseline: list.size() in tight loop, no invalidation.
122+
* Control for the cross-type invalidation benchmarks below.
123+
*/
124+
@Benchmark
125+
void baselineListSize(Blackhole bh) {
126+
int sum = 0
127+
for (int i = 0; i < ITERATIONS; i++) {
128+
sum += sampleList.size()
129+
}
130+
bh.consume(sum)
131+
}
132+
133+
// ===== CROSS-TYPE INVALIDATION (the core regression pattern) =====
134+
135+
/**
136+
* Cross-type invalidation at medium frequency (every 1000 calls).
137+
*
138+
* The hot loop calls hotTarget.compute(), but ColdType's metaclass
139+
* is modified periodically. Because the SwitchPoint is global,
140+
* modifying ColdType invalidates the hotTarget.compute() call site.
141+
*
142+
* This is the fundamental Grails pain point: loading one plugin's
143+
* domain classes (metaclass changes) degrades call site performance
144+
* for all other components.
145+
*/
146+
@Benchmark
147+
void crossTypeInvalidationEvery1000(Blackhole bh) {
148+
int sum = 0
149+
for (int i = 0; i < ITERATIONS; i++) {
150+
sum += hotTarget.compute()
151+
if (i % 1000 == 0) {
152+
ColdType.metaClass."dynamic${i % 5}" = { -> i }
153+
}
154+
}
155+
bh.consume(sum)
156+
}
157+
158+
/**
159+
* Cross-type invalidation at high frequency (every 100 calls).
160+
* Simulates aggressive framework initialization where metaclass
161+
* modifications happen rapidly. This gives call sites almost no
162+
* time to stabilize between invalidations.
163+
*/
164+
@Benchmark
165+
void crossTypeInvalidationEvery100(Blackhole bh) {
166+
int sum = 0
167+
for (int i = 0; i < ITERATIONS; i++) {
168+
sum += hotTarget.compute()
169+
if (i % 100 == 0) {
170+
ColdType.metaClass."dynamic${i % 5}" = { -> i }
171+
}
172+
}
173+
bh.consume(sum)
174+
}
175+
176+
/**
177+
* Cross-type invalidation at low frequency (every 10000 calls).
178+
* Call sites have more time to stabilize between invalidations.
179+
* This represents post-startup behavior where occasional metaclass
180+
* changes still occur (e.g., lazy plugin loading).
181+
*/
182+
@Benchmark
183+
void crossTypeInvalidationEvery10000(Blackhole bh) {
184+
int sum = 0
185+
for (int i = 0; i < ITERATIONS; i++) {
186+
sum += hotTarget.compute()
187+
if (i % 10000 == 0) {
188+
ColdType.metaClass."dynamic${i % 5}" = { -> i }
189+
}
190+
}
191+
bh.consume(sum)
192+
}
193+
194+
/**
195+
* Cross-type invalidation on list.size() - a simple call site
196+
* exercised with periodic metaclass changes on an unrelated type.
197+
*/
198+
@Benchmark
199+
void listSizeWithCrossTypeInvalidation(Blackhole bh) {
200+
int sum = 0
201+
for (int i = 0; i < ITERATIONS; i++) {
202+
sum += sampleList.size()
203+
if (i % 1000 == 0) {
204+
ColdType.metaClass."dynamic${i % 5}" = { -> i }
205+
}
206+
}
207+
bh.consume(sum)
208+
}
209+
210+
// ===== SAME-TYPE INVALIDATION =====
211+
212+
/**
213+
* Same-type invalidation - modifying the metaclass of the type
214+
* whose methods are being called. This is more expensive than
215+
* cross-type because the call site guard check also fails.
216+
*/
217+
@Benchmark
218+
void sameTypeInvalidationEvery1000(Blackhole bh) {
219+
int sum = 0
220+
for (int i = 0; i < ITERATIONS; i++) {
221+
sum += hotTarget.compute()
222+
if (i % 1000 == 0) {
223+
HotTarget.metaClass."dynamic${i % 5}" = { -> i }
224+
}
225+
}
226+
bh.consume(sum)
227+
}
228+
229+
// ===== MULTIPLE CALL SITES =====
230+
231+
/**
232+
* Baseline: five distinct method calls across three types.
233+
* Five separate call sites, all stable. Control for the
234+
* invalidation variant below.
235+
*/
236+
@Benchmark
237+
void baselineMultipleCallSites(Blackhole bh) {
238+
int sum = 0
239+
for (int i = 0; i < ITERATIONS; i++) {
240+
sum += hotTarget.compute()
241+
sum += hotTarget.describe().length()
242+
sum += hotTargetB.getCount()
243+
sum += hotTargetC.itemCount()
244+
sum += sampleList.size()
245+
}
246+
bh.consume(sum)
247+
}
248+
249+
/**
250+
* Five distinct call sites with periodic cross-type invalidation.
251+
* When ColdType's metaclass changes, ALL five call sites must
252+
* relink. This tests the scaling behavior: with more active call
253+
* sites, each invalidation is more expensive because more compiled
254+
* code must be thrown away and recompiled.
255+
*
256+
* In a real Grails app, there are thousands of active call sites.
257+
*/
258+
@Benchmark
259+
void multipleCallSitesWithInvalidation(Blackhole bh) {
260+
int sum = 0
261+
for (int i = 0; i < ITERATIONS; i++) {
262+
sum += hotTarget.compute()
263+
sum += hotTarget.describe().length()
264+
sum += hotTargetB.getCount()
265+
sum += hotTargetC.itemCount()
266+
sum += sampleList.size()
267+
if (i % 1000 == 0) {
268+
ColdType.metaClass."dynamic${i % 5}" = { -> i }
269+
}
270+
}
271+
bh.consume(sum)
272+
}
273+
274+
// ===== BURST THEN STEADY STATE =====
275+
276+
/**
277+
* Burst of metaclass changes followed by steady-state calls.
278+
* Simulates Grails application startup: the framework modifies
279+
* many metaclasses while loading plugins and domain classes,
280+
* then enters steady-state request handling.
281+
*
282+
* Measures how quickly call sites recover after the burst ends.
283+
*/
284+
@Benchmark
285+
void burstThenSteadyState(Blackhole bh) {
286+
// Phase 1: Burst of metaclass changes (framework startup)
287+
for (int i = 0; i < 100; i++) {
288+
ColdType.metaClass."startup${i % 20}" = { -> i }
289+
}
290+
291+
// Phase 2: Steady-state method calls (request handling)
292+
int sum = 0
293+
for (int i = 0; i < ITERATIONS; i++) {
294+
sum += hotTarget.compute()
295+
sum += hotTargetB.getCount()
296+
sum += sampleList.size()
297+
}
298+
bh.consume(sum)
299+
}
300+
301+
/**
302+
* Baseline for burst comparison - same steady-state work
303+
* without any preceding burst. Shows the recovery cost of
304+
* the burst phase.
305+
*/
306+
@Benchmark
307+
void baselineSteadyStateNoBurst(Blackhole bh) {
308+
int sum = 0
309+
for (int i = 0; i < ITERATIONS; i++) {
310+
sum += hotTarget.compute()
311+
sum += hotTargetB.getCount()
312+
sum += sampleList.size()
313+
}
314+
bh.consume(sum)
315+
}
316+
}

0 commit comments

Comments
 (0)