apache
diff --git a/‎subprojects/performance/src/jmh/groovy/org/apache/groovy/perf/grails/CallSiteInvalidationBench.groovy‎
Lines changed: 313 additions & 0 deletions b/‎subprojects/performance/src/jmh/groovy/org/apache/groovy/perf/grails/CallSiteInvalidationBench.groovy‎
Lines changed: 313 additions & 0 deletions
@@ -0,0 +1,313 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing,
+ *  software distributed under the License is distributed on an
+ *  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ *  KIND, either express or implied.  See the License for the
+ *  specific language governing permissions and limitations
+ *  under the License.
+ */
+package org.apache.groovy.perf.grails
+
+import groovy.lang.ExpandoMetaClass
+import groovy.lang.GroovySystem
+
+import org.openjdk.jmh.annotations.*
+import org.openjdk.jmh.infra.Blackhole
+
+import java.util.concurrent.TimeUnit
+
+/**
+ * Tests the core SwitchPoint invalidation overhead that causes the
+ * GROOVY-10307 performance regression in Grails applications.
+ *
+ * The key insight: in Groovy 4's indy-only dispatch, ALL call sites
+ * share a single global SwitchPoint. When ANY metaclass changes
+ * (even on an unrelated type), the SwitchPoint is invalidated and
+ * the JVM must deoptimize ALL compiled call sites. This causes:
+ *
+ * <ul>
+ *   <li>nmethod invalidation (compiled code thrown away)</li>
+ *   <li>Fallback to interpreter for affected call sites</li>
+ *   <li>Re-compilation after sufficient invocations</li>
+ *   <li>Repeated OSR bailouts in tight loops</li>
+ * </ul>
+ *
+ * This benchmark reproduces the 57x regression ratio observed in
+ * the groovy-indy-performance test suite by exercising cross-type
+ * invalidation at various frequencies.
+ *
+ * @see <a href="https://issues.apache.org/jira/browse/GROOVY-10307">GROOVY-10307</a>
+ */
+@Warmup(iterations = 3, time = 2, timeUnit = TimeUnit.SECONDS)
+@Measurement(iterations = 5, time = 2, timeUnit = TimeUnit.SECONDS)
+@Fork(2)
+@BenchmarkMode(Mode.AverageTime)
+@OutputTimeUnit(TimeUnit.MILLISECONDS)
+@State(Scope.Thread)
+class CallSiteInvalidationBench {
+    static final int ITERATIONS = 100_000
+
+    // Hot path types - methods on these are called in tight loops
+    static class HotTarget {
+        int value = 42
+        int compute() { value * 2 }
+        String describe() { "v=$value" }
+    }
+
+    static class HotTargetB {
+        int count = 10
+        int getCount() { count }
+    }
+
+    static class HotTargetC {
+        List items = [1, 2, 3]
+        int itemCount() { items.size() }
+    }
+
+    // Cold type - its metaclass is modified to trigger global invalidation
+    // but its methods are NOT called in the hot loop
+    static class ColdType {
+        String label = "cold"
+    }
+
+    HotTarget hotTarget
+    HotTargetB hotTargetB
+    HotTargetC hotTargetC
+    List<Integer> sampleList
+
+    @Setup(Level.Iteration)
+    void setup() {
+        GroovySystem.metaClassRegistry.removeMetaClass(HotTarget)
+        GroovySystem.metaClassRegistry.removeMetaClass(HotTargetB)
+        GroovySystem.metaClassRegistry.removeMetaClass(HotTargetC)
+        GroovySystem.metaClassRegistry.removeMetaClass(ColdType)
+        hotTarget = new HotTarget()
+        hotTargetB = new HotTargetB()
+        hotTargetC = new HotTargetC()
+        sampleList = [1, 2, 3, 4, 5]
+    }
+
+    // ===== SINGLE CALL SITE BASELINES =====
+
+    /**
+     * Baseline: single method call in tight loop, no invalidation.
+     * The call site stabilizes after JIT compilation and stays
+     * monomorphic. This is the best-case scenario for indy dispatch.
+     */
+    @Benchmark
+    void baselineHotLoop(Blackhole bh) {
+        int sum = 0
+        for (int i = 0; i < ITERATIONS; i++) {
+            sum += hotTarget.compute()
+        }
+        bh.consume(sum)
+    }
+
+    /**
+     * Baseline: list.size() in tight loop, no invalidation.
+     * Control for the cross-type invalidation benchmarks below.
+     */
+    @Benchmark
+    void baselineListSize(Blackhole bh) {
+        int sum = 0
+        for (int i = 0; i < ITERATIONS; i++) {
+            sum += sampleList.size()
+        }
+        bh.consume(sum)
+    }
+
+    // ===== CROSS-TYPE INVALIDATION (the core regression pattern) =====
+
+    /**
+     * Cross-type invalidation at medium frequency (every 1000 calls).
+     *
+     * The hot loop calls hotTarget.compute(), but ColdType's metaclass
+     * is modified periodically. Because the SwitchPoint is global,
+     * modifying ColdType invalidates the hotTarget.compute() call site.
+     *
+     * This is the fundamental Grails pain point: loading one plugin's
+     * domain classes (metaclass changes) degrades call site performance
+     * for all other components.
+     */
+    @Benchmark
+    void crossTypeInvalidationEvery1000(Blackhole bh) {
+        int sum = 0
+        for (int i = 0; i < ITERATIONS; i++) {
+            sum += hotTarget.compute()
+            if (i % 1000 == 0) {
+                ColdType.metaClass."dynamic${i % 5}" = { -> i }
+            }
+        }
+        bh.consume(sum)
+    }
+
+    /**
+     * Cross-type invalidation at high frequency (every 100 calls).
+     * Simulates aggressive framework initialization where metaclass
+     * modifications happen rapidly. This gives call sites almost no
+     * time to stabilize between invalidations.
+     */
+    @Benchmark
+    void crossTypeInvalidationEvery100(Blackhole bh) {
+        int sum = 0
+        for (int i = 0; i < ITERATIONS; i++) {
+            sum += hotTarget.compute()
+            if (i % 100 == 0) {
+                ColdType.metaClass."dynamic${i % 5}" = { -> i }
+            }
+        }
+        bh.consume(sum)
+    }
+
+    /**
+     * Cross-type invalidation at low frequency (every 10000 calls).
+     * Call sites have more time to stabilize between invalidations.
+     * This represents post-startup behavior where occasional metaclass
+     * changes still occur (e.g., lazy plugin loading).
+     */
+    @Benchmark
+    void crossTypeInvalidationEvery10000(Blackhole bh) {
+        int sum = 0
+        for (int i = 0; i < ITERATIONS; i++) {
+            sum += hotTarget.compute()
+            if (i % 10000 == 0) {
+                ColdType.metaClass."dynamic${i % 5}" = { -> i }
+            }
+        }
+        bh.consume(sum)
+    }
+
+    /**
+     * Cross-type invalidation on list.size() - reproduces the exact
+     * pattern from the groovy-indy-performance stress test that
+     * demonstrated a 57.92x regression ratio.
+     */
+    @Benchmark
+    void listSizeWithCrossTypeInvalidation(Blackhole bh) {
+        int sum = 0
+        for (int i = 0; i < ITERATIONS; i++) {
+            sum += sampleList.size()
+            if (i % 1000 == 0) {
+                ColdType.metaClass."dynamic${i % 5}" = { -> i }
+            }
+        }
+        bh.consume(sum)
+    }
+
+    // ===== SAME-TYPE INVALIDATION =====
+
+    /**
+     * Same-type invalidation - modifying the metaclass of the type
+     * whose methods are being called. This is more expensive than
+     * cross-type because the call site guard check also fails.
+     */
+    @Benchmark
+    void sameTypeInvalidationEvery1000(Blackhole bh) {
+        int sum = 0
+        for (int i = 0; i < ITERATIONS; i++) {
+            sum += hotTarget.compute()
+            if (i % 1000 == 0) {
+                HotTarget.metaClass."dynamic${i % 5}" = { -> i }
+            }
+        }
+        bh.consume(sum)
+    }
+
+    // ===== MULTIPLE CALL SITES =====
+
+    /**
+     * Baseline: five distinct method calls across three types.
+     * Five separate call sites, all stable. Control for the
+     * invalidation variant below.
+     */
+    @Benchmark
+    void baselineMultipleCallSites(Blackhole bh) {
+        int sum = 0
+        for (int i = 0; i < ITERATIONS; i++) {
+            sum += hotTarget.compute()
+            sum += hotTarget.describe().length()
+            sum += hotTargetB.getCount()
+            sum += hotTargetC.itemCount()
+            sum += sampleList.size()
+        }
+        bh.consume(sum)
+    }
+
+    /**
+     * Five distinct call sites with periodic cross-type invalidation.
+     * When ColdType's metaclass changes, ALL five call sites must
+     * relink. This tests the scaling behavior: with more active call
+     * sites, each invalidation is more expensive because more compiled
+     * code must be thrown away and recompiled.
+     *
+     * In a real Grails app, there are thousands of active call sites.
+     */
+    @Benchmark
+    void multipleCallSitesWithInvalidation(Blackhole bh) {
+        int sum = 0
+        for (int i = 0; i < ITERATIONS; i++) {
+            sum += hotTarget.compute()
+            sum += hotTarget.describe().length()
+            sum += hotTargetB.getCount()
+            sum += hotTargetC.itemCount()
+            sum += sampleList.size()
+            if (i % 1000 == 0) {
+                ColdType.metaClass."dynamic${i % 5}" = { -> i }
+            }
+        }
+        bh.consume(sum)
+    }
+
+    // ===== BURST THEN STEADY STATE =====
+
+    /**
+     * Burst of metaclass changes followed by steady-state calls.
+     * Simulates Grails application startup: the framework modifies
+     * many metaclasses while loading plugins and domain classes,
+     * then enters steady-state request handling.
+     *
+     * Measures how quickly call sites recover after the burst ends.
+     */
+    @Benchmark
+    void burstThenSteadyState(Blackhole bh) {
+        // Phase 1: Burst of metaclass changes (framework startup)
+        for (int i = 0; i < 100; i++) {
+            ColdType.metaClass."startup${i % 20}" = { -> i }
+        }
+
+        // Phase 2: Steady-state method calls (request handling)
+        int sum = 0
+        for (int i = 0; i < ITERATIONS; i++) {
+            sum += hotTarget.compute()
+            sum += hotTargetB.getCount()
+            sum += sampleList.size()
+        }
+        bh.consume(sum)
+    }
+
+    /**
+     * Baseline for burst comparison - same steady-state work
+     * without any preceding burst. Shows the recovery cost of
+     * the burst phase.
+     */
+    @Benchmark
+    void baselineSteadyStateNoBurst(Blackhole bh) {
+        int sum = 0
+        for (int i = 0; i < ITERATIONS; i++) {
+            sum += hotTarget.compute()
+            sum += hotTargetB.getCount()
+            sum += sampleList.size()
+        }
+        bh.consume(sum)
+    }
+}