diff --git a/src/hotspot/share/jfr/periodic/sampling/jfrCPUTimeThreadSampler.cpp b/src/hotspot/share/jfr/periodic/sampling/jfrCPUTimeThreadSampler.cpp index 2793a1fb984..ba5dcd74da3 100644 --- a/src/hotspot/share/jfr/periodic/sampling/jfrCPUTimeThreadSampler.cpp +++ b/src/hotspot/share/jfr/periodic/sampling/jfrCPUTimeThreadSampler.cpp @@ -206,6 +206,7 @@ class JfrCPUSamplerThread : public NonJavaThread { volatile bool _is_async_processing_of_cpu_time_jfr_requests_triggered; volatile bool _warned_about_timer_creation_failure; volatile bool _signal_handler_installed; + DEBUG_ONLY(volatile bool _out_of_stack_walking_enabled = true;) static const u4 STOP_SIGNAL_BIT = 0x80000000; @@ -251,6 +252,12 @@ class JfrCPUSamplerThread : public NonJavaThread { void stop_timer(); void trigger_async_processing_of_cpu_time_jfr_requests(); + + #ifdef ASSERT + void set_out_of_stack_walking_enabled(bool runnable) { + AtomicAccess::release_store(&_out_of_stack_walking_enabled, runnable); + } + #endif }; JfrCPUSamplerThread::JfrCPUSamplerThread(JfrCPUSamplerThrottle& throttle) : @@ -354,9 +361,11 @@ void JfrCPUSamplerThread::run() { last_recompute_check = os::javaTimeNanos(); } - if (Atomic::cmpxchg(&_is_async_processing_of_cpu_time_jfr_requests_triggered, true, false)) { - stackwalk_threads_in_native(); - } + DEBUG_ONLY(if (AtomicAccess::load_acquire(&_out_of_stack_walking_enabled)) {) + if (AtomicAccess::cmpxchg(&_is_async_processing_of_cpu_time_jfr_requests_triggered, true, false)) { + stackwalk_threads_in_native(); + } + DEBUG_ONLY(}) os::naked_sleep(100); } } @@ -546,6 +555,17 @@ void JfrCPUTimeThreadSampling::handle_timer_signal(siginfo_t* info, void* contex _sampler->decrement_signal_handler_count(); } +#ifdef ASSERT +bool JfrCPUTimeThreadSampling::set_out_of_stack_walking_enabled(bool runnable) { + if (_instance != nullptr && _instance->_sampler != nullptr) { + _instance->_sampler->set_out_of_stack_walking_enabled(runnable); + return true; + } else { + return false; + } +} +#endif + void JfrCPUSamplerThread::sample_thread(JfrSampleRequest& request, void* ucontext, JavaThread* jt, JfrThreadLocal* tl, JfrTicks& now) { JfrSampleRequestBuilder::build_cpu_time_sample_request(request, ucontext, jt, jt->jfr_thread_local(), now); } @@ -808,4 +828,11 @@ void JfrCPUTimeThreadSampling::on_javathread_create(JavaThread* thread) { void JfrCPUTimeThreadSampling::on_javathread_terminate(JavaThread* thread) { } +#ifdef ASSERT +bool JfrCPUTimeThreadSampling::set_out_of_stack_walking_enabled(bool runnable) { + warn(); + return false; +} +#endif + #endif // defined(LINUX) && defined(INCLUDE_JFR) diff --git a/src/hotspot/share/jfr/periodic/sampling/jfrCPUTimeThreadSampler.hpp b/src/hotspot/share/jfr/periodic/sampling/jfrCPUTimeThreadSampler.hpp index dae0be5c3a7..08a35152c55 100644 --- a/src/hotspot/share/jfr/periodic/sampling/jfrCPUTimeThreadSampler.hpp +++ b/src/hotspot/share/jfr/periodic/sampling/jfrCPUTimeThreadSampler.hpp @@ -130,6 +130,8 @@ class JfrCPUTimeThreadSampling : public JfrCHeapObj { static void send_lost_event(const JfrTicks& time, traceid tid, s4 lost_samples); static void trigger_async_processing_of_cpu_time_jfr_requests(); + + DEBUG_ONLY(static bool set_out_of_stack_walking_enabled(bool runnable);) }; #else @@ -150,6 +152,8 @@ class JfrCPUTimeThreadSampling : public JfrCHeapObj { static void on_javathread_create(JavaThread* thread); static void on_javathread_terminate(JavaThread* thread); + + DEBUG_ONLY(static bool set_out_of_stack_walking_enabled(bool runnable)); }; #endif // defined(LINUX) diff --git a/src/hotspot/share/prims/whitebox.cpp b/src/hotspot/share/prims/whitebox.cpp index 311e91ecaf5..6cf4a94cbb5 100644 --- a/src/hotspot/share/prims/whitebox.cpp +++ b/src/hotspot/share/prims/whitebox.cpp @@ -2683,6 +2683,26 @@ WB_ENTRY(void, WB_WaitUnsafe(JNIEnv* env, jobject wb, jint time)) os::naked_short_sleep(time); WB_END +<<<<<<< HEAD +======= +WB_ENTRY(void, WB_BusyWaitCPUTime(JNIEnv* env, jobject wb, jint time)) + ThreadToNativeFromVM ttn(thread); + u8 start = os::current_thread_cpu_time(); + u8 target_duration = time * (u8)1000000; + while (os::current_thread_cpu_time() - start < target_duration) { + for (volatile int i = 0; i < 1000000; i++); + } +WB_END + +WB_ENTRY(jboolean, WB_CPUSamplerSetOutOfStackWalking(JNIEnv* env, jobject wb, jboolean enable)) + #if defined(ASSERT) && INCLUDE_JFR && defined(LINUX) + return JfrCPUTimeThreadSampling::set_out_of_stack_walking_enabled(enable == JNI_TRUE) ? JNI_TRUE : JNI_FALSE; + #else + return JNI_FALSE; + #endif +WB_END + +>>>>>>> d27649fe22a (8367302: New test jdk/jfr/event/profiling/TestCPUTimeSampleQueueAutoSizes.java from JDK-8366082 is failing) WB_ENTRY(jstring, WB_GetLibcName(JNIEnv* env, jobject o)) ThreadToNativeFromVM ttn(thread); jstring info_string = env->NewStringUTF(XSTR(LIBC)); @@ -3032,6 +3052,11 @@ static JNINativeMethod methods[] = { {CC"isJVMTIIncluded", CC"()Z", (void*)&WB_IsJVMTIIncluded}, {CC"waitUnsafe", CC"(I)V", (void*)&WB_WaitUnsafe}, +<<<<<<< HEAD +======= + {CC"busyWaitCPUTime", CC"(I)V", (void*)&WB_BusyWaitCPUTime}, + {CC"cpuSamplerSetOutOfStackWalking", CC"(Z)Z", (void*)&WB_CPUSamplerSetOutOfStackWalking}, +>>>>>>> d27649fe22a (8367302: New test jdk/jfr/event/profiling/TestCPUTimeSampleQueueAutoSizes.java from JDK-8366082 is failing) {CC"getLibcName", CC"()Ljava/lang/String;", (void*)&WB_GetLibcName}, {CC"pinObject", CC"(Ljava/lang/Object;)V", (void*)&WB_PinObject}, diff --git a/test/jdk/ProblemList-Xcomp.txt b/test/jdk/ProblemList-Xcomp.txt index 680806e6e31..f23d6c89b09 100644 --- a/test/jdk/ProblemList-Xcomp.txt +++ b/test/jdk/ProblemList-Xcomp.txt @@ -29,4 +29,7 @@ java/lang/invoke/MethodHandles/CatchExceptionTest.java 8146623 generic-all java/lang/reflect/callerCache/ReflectionCallerCacheTest.java 8332028 generic-all +<<<<<<< HEAD com/sun/jdi/InterruptHangTest.java 8043571 generic-all +======= +>>>>>>> d27649fe22a (8367302: New test jdk/jfr/event/profiling/TestCPUTimeSampleQueueAutoSizes.java from JDK-8366082 is failing) diff --git a/test/jdk/jdk/jfr/event/profiling/TestCPUTimeSampleQueueAutoSizes.java b/test/jdk/jdk/jfr/event/profiling/TestCPUTimeSampleQueueAutoSizes.java new file mode 100644 index 00000000000..819329aabf5 --- /dev/null +++ b/test/jdk/jdk/jfr/event/profiling/TestCPUTimeSampleQueueAutoSizes.java @@ -0,0 +1,179 @@ +/* + * Copyright (c) 2025 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package jdk.jfr.event.profiling; + +import java.time.Duration; +import java.time.Instant; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.stream.Collectors; +import java.util.List; +import java.util.concurrent.atomic.AtomicLong; + +import jdk.jfr.Recording; +import jdk.jfr.consumer.RecordingStream; +import jdk.jfr.consumer.RecordedEvent; +import jdk.jfr.internal.JVM; +import jdk.test.lib.Asserts; +import jdk.test.lib.jfr.EventNames; +import jdk.test.whitebox.WhiteBox; + + +/* + * Tests the sample queues increase in size as needed, when loss is recorded. + * + * The test starts CPU time sampling with a short interval (1ms), disabling + * out-of-stack sample processing for the duration of the test. + * It now runs in native for one second, to cause queue overflows, + * then it comes back into Java to trigger the queue walking. + * Repeats the cycle 5 times and verifies that the loss decreases from the first + * to the last iteration. + * @test + * @requires vm.hasJFR & os.family == "linux" & vm.debug & vm.flagless + * @library /test/lib + * @modules jdk.jfr/jdk.jfr.internal + * @build jdk.test.whitebox.WhiteBox + * @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox + * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI + * -Xbatch -XX:StartFlightRecording:dumponexit=true jdk.jfr.event.profiling.TestCPUTimeSampleQueueAutoSizes + */ +public class TestCPUTimeSampleQueueAutoSizes { + + private static final WhiteBox WHITE_BOX = WhiteBox.getWhiteBox(); + + record LossEvent(long relativeTimeMillis, long lostSamples) {} + + /** A data collection from the CPUTimeSampleLost events for the burst thread */ + static class LossEventCollection { + private final List events = new ArrayList<>(); + private final List sampleEventsInTimeBox = new ArrayList<>(); + private final List timeBoxEnds = new ArrayList<>(); + + public synchronized void addEvent(LossEvent event) { + events.add(event); + } + + public synchronized List getSortedEvents() { + return events.stream() + .sorted(Comparator.comparingLong(e -> e.relativeTimeMillis)) + .collect(Collectors.toList()); + } + + public synchronized List getEventsPerTimeBox() { + List ret = new ArrayList<>(); + AtomicLong previousEnd = new AtomicLong(0); + for (Long timeBoxEnd : timeBoxEnds) { + long lostSamples = events.stream() + .filter(e -> e.relativeTimeMillis >= previousEnd.get() && e.relativeTimeMillis <= timeBoxEnd) + .mapToLong(e -> e.lostSamples) + .sum(); + ret.add(new LossEvent(previousEnd.get(), lostSamples)); + previousEnd.set(timeBoxEnd); + } + return ret; + } + + public synchronized void addTimeBoxEnd(long timeBoxEnd, long sampleEvents) { + timeBoxEnds.add(timeBoxEnd); + sampleEventsInTimeBox.add(sampleEvents); + } + + public synchronized void print() { + System.out.println("Loss event information:"); + for (int i = 0; i < timeBoxEnds.size(); i++) { + System.out.println(" Time box end: " + timeBoxEnds.get(i) + ", sample events: " + sampleEventsInTimeBox.get(i)); + } + for (LossEvent e : events) { + System.out.println(" Lost samples event: " + e.lostSamples + " at " + e.relativeTimeMillis); + } + for (LossEvent e : getEventsPerTimeBox()) { + System.out.println(" Lost samples in time box ending at " + e.relativeTimeMillis + ": " + e.lostSamples); + } + } + } + + public static void main(String[] args) throws Exception { + try (RecordingStream rs = new RecordingStream()) { + // setup recording + long burstThreadId = Thread.currentThread().threadId(); + final long startTimeMillis = Instant.now().toEpochMilli(); + LossEventCollection lossEvents = new LossEventCollection(); + AtomicLong sampleEventCountInTimeBox = new AtomicLong(0); + rs.enable(EventNames.CPUTimeSample).with("throttle", "1ms"); + rs.enable(EventNames.CPUTimeSamplesLost); + rs.onEvent(EventNames.CPUTimeSamplesLost, e -> { + if (e.getThread("eventThread").getJavaThreadId() == burstThreadId) { + long eventTime = e.getStartTime().toEpochMilli(); + long relativeTime = eventTime - startTimeMillis; + System.out.println("Lost samples: " + e.getLong("lostSamples") + " at " + relativeTime + " start time " + startTimeMillis); + lossEvents.addEvent(new LossEvent(relativeTime, e.getLong("lostSamples"))); + } + }); + rs.onEvent(EventNames.CPUTimeSample, e -> { + if (e.getThread("eventThread").getJavaThreadId() == burstThreadId) { + sampleEventCountInTimeBox.incrementAndGet(); + } + }); + rs.startAsync(); + // we disable the out-of-stack walking so that the queue fills up and overflows + // while we are in native code + disableOutOfStackWalking(); + + + for (int i = 0; i < 5; i++) { + // run in native for one second + WHITE_BOX.busyWaitCPUTime(1000); + // going out-of-native at the end of the previous call should have triggered + // the safepoint handler, thereby also triggering the stack walking and creation + // of the loss event + WHITE_BOX.forceSafepoint(); // just to be sure + lossEvents.addTimeBoxEnd(Instant.now().toEpochMilli() - startTimeMillis, sampleEventCountInTimeBox.get()); + sampleEventCountInTimeBox.set(0); + } + + rs.stop(); + rs.close(); + + enableOutOfStackWalking(); + + checkThatLossDecreased(lossEvents); + } + } + + static void disableOutOfStackWalking() { + Asserts.assertTrue(WHITE_BOX.cpuSamplerSetOutOfStackWalking(false), "Out-of-stack-walking not supported"); + } + + static void enableOutOfStackWalking() { + WHITE_BOX.cpuSamplerSetOutOfStackWalking(true); + } + + static void checkThatLossDecreased(LossEventCollection lossEvents) { + lossEvents.print(); + List timeBoxedLosses = lossEvents.getEventsPerTimeBox(); + // check that the last time box has far fewer lost samples than the first + Asserts.assertTrue(timeBoxedLosses.get(timeBoxedLosses.size() - 1).lostSamples <= + timeBoxedLosses.get(0).lostSamples / 2); + } +} diff --git a/test/lib/jdk/test/whitebox/WhiteBox.java b/test/lib/jdk/test/whitebox/WhiteBox.java index 2b96cbc3a94..d83565e54d0 100644 --- a/test/lib/jdk/test/whitebox/WhiteBox.java +++ b/test/lib/jdk/test/whitebox/WhiteBox.java @@ -845,6 +845,15 @@ public native int validateCgroup(boolean cgroupsV2Enabled, public native void waitUnsafe(int time_ms); +<<<<<<< HEAD +======= + public native void busyWaitCPUTime(int cpuTimeMs); + + + // returns true if supported, false if not + public native boolean cpuSamplerSetOutOfStackWalking(boolean enable); + +>>>>>>> d27649fe22a (8367302: New test jdk/jfr/event/profiling/TestCPUTimeSampleQueueAutoSizes.java from JDK-8366082 is failing) public native void pinObject(Object o); public native void unpinObject(Object o);