From fd1e35d177ecefa2ca444b4ee9e9d43ebf859a5a Mon Sep 17 00:00:00 2001 From: Duane May Date: Wed, 5 Nov 2025 17:00:03 -0500 Subject: [PATCH 1/3] Update Bouncy Castle to 2.1.2 and temp directory handling in integration test scripts - org.bouncycastle.native.loader.install_dir allows a user to specify where the .so files should be found - Bump `bouncyCastleFipsVersion` to 2.1.2 in dependencies. - Create and utilize separate temporary directory for integration tests. - Adjust JVM options to reference the new temp directory in the script. - Docker may not have system tmp mounted as executable Check server readiness via HTTP instead of log file parsing - Handle boot timeout with additional logging and `SIGQUIT` for stuck processes. Add logging config to integration tests Eliminate the error that log config was not found Add timeout to gradle integrationTest task Update Selenium headless mode and browser flags for stability in Docker/CI environments The traditional --headless, and since version 96, Chrome has a new headless mode that allows users to get the full browser functionality (even run extensions). Between versions 96 to 108 it was --headless=chrome, after version 109 --headless=new. Using --headless=new should bring a better experience when using headless with Selenium. Refactor boot timeout handling: extract thread dump logic to `thread_dump_on_boot_pid` function - Centralize JVM diagnostics and thread dump logic in a reusable function. - Replace in-line logic with `thread_dump_on_boot_pid` call for better readability and maintainability. Update JVM GC strategy to G1 in integration tests - Switch from ParallelGC to G1 for better performance. - Add `--no-watch-fs` flag to Gradle tasks to eliminate warning exception in docker container Add Chrome flags in integration tests - Export `DBUS_SESSION_BUS_ADDRESS` to avoid session errors during tests. - Add Chrome stability flags for improved reliability in Docker/CI environments. [bc-212-test]: testing memory pressure issues [bc-212-test]: testing memory pressure issues [bc-212-test]: testing memory pressure issues [bc-212-test]: testing memory pressure issues [bc-212-test]: testing memory pressure issues [bc-212-test]: testing memory pressure issues [bc-212-test]: testing memory pressure issues [bc-212-test]: testing memory pressure issues [bc-212-test]: testing memory pressure issues [bc-212-test]: testing memory pressure issues [bc-212-test]: testing memory pressure issues [bc-212-test]: testing memory pressure issues [bc-212-test]: testing memory pressure issues [bc-212-test]: testing memory pressure issues [bc-212-test]: testing memory pressure issues [bc-212-test]: testing memory pressure issues [bc-212-test]: testing memory pressure issues [bc-212-test]: testing memory pressure issues [bc-212-test]: testing memory pressure issues [bc-212-test]: testing memory pressure issues [bc-212-test]: testing memory pressure issues [bc-212-test]: testing memory pressure issues [bc-212-test]: testing memory pressure issues [bc-212-test]: testing memory pressure issues [bc-212-test]: testing memory pressure issues [bc-212-test]: testing memory pressure issues [bc-212-test]: testing memory pressure issues [bc-212-test]: testing memory pressure issues [bc-212-test]: testing memory pressure issues --- build.gradle | 20 +++-- dependencies.gradle | 2 +- gradle.properties | 21 ++++- scripts/integration_tests.sh | 81 ++++++++++++++++--- scripts/lib_util_helper.sh | 35 ++++++-- scripts/unit_tests.sh | 8 ++ .../feature/DefaultIntegrationTestConfig.java | 34 +++++++- 7 files changed, 172 insertions(+), 29 deletions(-) diff --git a/build.gradle b/build.gradle index 1077d4579a9..f75b70c9b16 100644 --- a/build.gradle +++ b/build.gradle @@ -105,7 +105,8 @@ subprojects { // gradle might stop the test run due to the failFast but still concludes with BUILD SUCCESSFUL (if the retry is successful) failFast = false useJUnitPlatform() - jvmArgs += ["-Xmx1024m", + // Reduced from 1024m to 640m - unit tests don't need as much as integration tests + jvmArgs += ["-Xmx640m", "-XX:+StartAttachListener", "-XX:+HeapDumpOnOutOfMemoryError", "-XX:HeapDumpPath=/var/log/uaa-tests.hprof" @@ -120,8 +121,21 @@ subprojects { } tasks.register('integrationTest', Test) { + dependsOn subprojects.integrationTest + useJUnitPlatform() + // This prevents integrationTests from hanging indefinitely + timeout = Duration.ofMinutes(180) + + // Integration test workers need same memory as unit tests + // Actual CI configuration is controlled via integration_tests.sh script + jvmArgs += ["-Xmx640m", + "-XX:+StartAttachListener", + "-XX:+HeapDumpOnOutOfMemoryError", + "-XX:HeapDumpPath=/var/log/uaa-tests.hprof" + ] + // Enable JaCoCo for integration tests jacoco { enabled = true @@ -198,10 +212,6 @@ tasks.register('manifests', Copy) { into("build/sample-manifests") } -tasks.register('integrationTest', Test) { - dependsOn subprojects.integrationTest -} - tasks.register('cleanBootTomcatDir') { String tomcatBase = file("scripts/boot/tomcat/").getAbsolutePath() delete(java.nio.file.Path.of(tomcatBase)) diff --git a/dependencies.gradle b/dependencies.gradle index 0ab7517ff96..4220c3cae54 100644 --- a/dependencies.gradle +++ b/dependencies.gradle @@ -5,7 +5,7 @@ ext { // Versions shared between multiple dependencies versions.apacheDsVersion = "2.0.0.AM27" -versions.bouncyCastleFipsVersion = "2.1.1" +versions.bouncyCastleFipsVersion = "2.1.2" versions.bouncyCastlePkixFipsVersion = "2.1.9" versions.bouncyCastleTlsFipsVersion = "2.1.20" versions.springBootVersion = "3.5.7" diff --git a/gradle.properties b/gradle.properties index e8f382a981e..2dd43b00efd 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1,9 +1,23 @@ version=0.0.0 # Required for LdapMockMvcTests when asserting it can find a user in a different language -org.gradle.jvmargs=-Dfile.encoding=utf8 -XX:+StartAttachListener -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/var/log/uaa-tests.hprof +# Optimized for Gradle 9.0 + Kotlin 2.2 to prevent test hanging and classloading deadlocks +# CICompilerCount=2 and ProcessReaper flag prevent thread contention in containers +org.gradle.jvmargs=-Dfile.encoding=utf8 \ + -XX:+StartAttachListener \ + -XX:+HeapDumpOnOutOfMemoryError \ + -XX:HeapDumpPath=/var/log/uaa-tests.hprof \ + -Xmx1024m \ + -Xms1024m \ + -XX:MaxMetaspaceSize=256m \ + -XX:+UseG1GC \ + -XX:MaxGCPauseMillis=100 \ + -XX:ParallelGCThreads=2 \ + -XX:CICompilerCount=2 \ + -Djdk.lang.processReaperUseDefaultStackSize=true -org.gradle.parallel=true +# Disable parallel execution to prevent classloading deadlocks with Kotlin 2.2 +org.gradle.parallel=false signing.keyId= signing.password= @@ -12,4 +26,5 @@ ossrhUsername= ossrhPassword= group=org.cloudfoundry.identity archivesBaseName="uaa" -org.gradle.workers.max=6 \ No newline at end of file +# Limit workers to 2 for memory-constrained environments (CI containers) +org.gradle.workers.max=2 \ No newline at end of file diff --git a/scripts/integration_tests.sh b/scripts/integration_tests.sh index 50784221885..2521244a9ca 100755 --- a/scripts/integration_tests.sh +++ b/scripts/integration_tests.sh @@ -6,6 +6,10 @@ set -eu # Global env vars: # UAA_GRADLE_INT_TEST_COMMAND: Gradle command to run integration tests (default: integrationTest) # this could include :cloudfoundry-identity-server:integrationTest --tests to run specific tests +# jvm_heap: JVM heap size for UAA boot server (default: 640m) +# jvm_metaspace: JVM metaspace size for UAA boot server (default: 192m) +# gradle_heap: JVM heap size for Gradle daemon (default: 1024m) +# gradle_test_heap: JVM heap size for Gradle test workers (default: 640m) ####################################### function main() { local script_dir; script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" @@ -24,18 +28,38 @@ function main() { local wd launch_boot assemble_code integration_test_code wd=$(pwd) - echo "Setting heap to ${jvm_heap:=768m}" - echo "Setting metaspace to ${jvm_metaspace:=256m}" + temp_dir=${script_dir}/tmp + mkdir -p "${temp_dir}" + + # Memory settings optimized for Gradle 9.0 with Kotlin 2.2 + # Boot server needs enough memory to handle test requests without crashing + # Increased Gradle daemon heap to 1GB to prevent hanging with 2 workers + # --no-configuration-cache prevents stale Kotlin compiler state reuse between daemon processes + # logging.manager is set to org.apache.logging.log4j.jul.LogManager to prevent log4j2 from using java.util.logging + echo "Setting boot heap to ${jvm_heap:=640m}" + echo "Setting boot metaspace to ${jvm_metaspace:=192m}" + echo "Setting Gradle daemon heap to ${gradle_heap:=1024m}" + echo "Setting test worker heap to ${gradle_test_heap:=640m}" readonly launch_boot="nohup java \ - -XX:+UseParallelGC \ - -Xmx${jvm_heap} \ + -XX:+UseG1GC \ + -XX:G1HeapRegionSize=1m \ + -Xms64m -Xmx${jvm_heap} \ -XX:MaxMetaspaceSize=${jvm_metaspace} \ + -XX:MetaspaceSize=${jvm_metaspace} \ + -XX:+UseStringDeduplication \ + -XX:MaxGCPauseMillis=200 \ -XX:+HeapDumpOnOutOfMemoryError \ -XX:HeapDumpPath=${wd} \ -DCLOUDFOUNDRY_CONFIG_PATH=${wd}/scripts/boot \ + -Dlogging.config=${wd}/scripts/boot/log4j2.properties \ + -Dlog4j.configurationFile=${wd}/scripts/boot/log4j2.properties \ + -Dlog4j2.formatMsgNoLookups=true \ + -Djava.util.logging.manager=org.apache.logging.log4j.jul.LogManager \ -DSECRETS_DIR=${wd}/scripts/boot \ -Djava.security.egd=file:/dev/./urandom \ + -Djava.io.tmpdir=${temp_dir} \ + -Dorg.bouncycastle.native.loader.install_dir=${temp_dir} \ -Dmetrics.perRequestMetrics=true \ -Dserver.servlet.context-path=/uaa \ -Dserver.tomcat.basedir=${wd}/scripts/boot/tomcat \ @@ -50,43 +74,82 @@ function main() { -jar ${wd}/uaa/build/libs/cloudfoundry-identity-uaa-0.0.0.war \ > boot.log 2>&1 &" + # Explicit Gradle daemon memory for Kotlin 2.2 with additional GC tuning readonly assemble_code="./gradlew '-Dspring.profiles.active=${test_profile}' \ '-Djava.security.egd=file:/dev/./urandom' \ + '-Dorg.gradle.jvmargs=-Dfile.encoding=utf8 -Xms64m -Xmx${gradle_heap} -XX:MaxMetaspaceSize=128m -XX:+UseG1GC -XX:MaxGCPauseMillis=100' \ assemble \ + --no-watch-fs \ --no-daemon \ - --max-workers=4 \ + --no-configuration-cache \ + --max-workers=2 \ --stacktrace \ --console=plain" + # Explicit memory limits for test JVMs with GC tuning and classloader fixes + # All flags required to prevent classloading deadlocks and thread starvation during test init + # --no-configuration-cache prevents stale Kotlin compiler state reuse between daemon processes + readonly compile_test_code="./gradlew \ + '-Dspring.profiles.active=${test_profile}' \ + '-Djava.security.egd=file:/dev/./urandom' \ + '-DskipUaaAutoStart=true' \ + '-Dorg.gradle.jvmargs=-Dfile.encoding=utf8 -Xms64m -Xmx${gradle_test_heap} -XX:MaxMetaspaceSize=128m -XX:+UseG1GC -XX:MaxGCPauseMillis=100 -XX:ParallelGCThreads=2 -XX:CICompilerCount=2 -Djdk.lang.processReaperUseDefaultStackSize=true' \ + '-Dorg.gradle.daemon.idletimeout=300000' \ + '-Dorg.gradle.parallel=false' \ + '-Dorg.gradle.workers.max=2' \ + clean assemble compileTestJava \ + --no-watch-fs \ + --no-daemon \ + --no-configuration-cache \ + --max-workers=2 \ + --stacktrace \ + --console=plain" + + # Explicit memory limits for test JVMs with GC tuning and classloader fixes + # All flags required to prevent classloading deadlocks and thread starvation during test init readonly integration_test_code="./gradlew \ '-Dspring.profiles.active=${test_profile}' \ '-Djava.security.egd=file:/dev/./urandom' \ '-DskipUaaAutoStart=true' \ + '-Dorg.gradle.jvmargs=-Dfile.encoding=utf8 -Xms64m -Xmx${gradle_test_heap} -XX:MaxMetaspaceSize=128m -XX:+UseG1GC -XX:MaxGCPauseMillis=100 -XX:ParallelGCThreads=2 -XX:CICompilerCount=2 -Djdk.lang.processReaperUseDefaultStackSize=true' \ + '-Dorg.gradle.daemon.idletimeout=300000' \ + '-Dorg.gradle.parallel=false' \ + '-Dorg.gradle.workers.max=2' \ ${UAA_GRADLE_INT_TEST_COMMAND:-integrationTest} \ - --stacktrace \ + --no-watch-fs \ --no-daemon \ + --no-configuration-cache \ + --max-workers=2 \ + --stacktrace \ --console=plain" set -x if [[ "${RUN_TESTS:-true}" = 'true' ]]; then eval "$assemble_code" - # Always start the boot server before running integration tests + # Start and ensure the boot server is running before integration tests eval "$launch_boot" echo $! > boot.pid if is_boot_running ; then echo "Boot started. Can continue to run tests." else - echo "Boot did not start - failing" + echo "Boot did not start, failing" cat boot.log exit 1 fi + if [[ -z "${DBUS_SESSION_BUS_ADDRESS:-}" ]]; then + export DBUS_SESSION_BUS_ADDRESS=/dev/null + fi + + eval "$compile_test_code" eval "$integration_test_code" # Clean up: kill the boot server if [[ -f boot.pid ]]; then - kill -9 "$(cat boot.pid)" || true + local pid; pid=$(cat boot.pid) + echo "Sending SIGKILL (kill -9) to UAA process (pid=${pid})" + kill -9 "${pid}" || true rm boot.pid fi else diff --git a/scripts/lib_util_helper.sh b/scripts/lib_util_helper.sh index dd7a6810646..9add1e185f1 100755 --- a/scripts/lib_util_helper.sh +++ b/scripts/lib_util_helper.sh @@ -2,19 +2,20 @@ set -eu ######################################## -# Check if Boot has started by looking for a specific line in the log file +# Check if Boot has started by checking if the port is responding ########################################## function is_boot_running() { - local log_file="boot.log" - local target_line="Started UaaBootApplication" + local port=${PORT:-8080} local timeout=600 # Timeout in seconds local start_time start_time=$(date +%s) while true; do - if grep "$target_line" "$log_file"; then - echo "Boot Start was found in the log file." + # Use curl to check if the port is responding + # Any HTTP response (even 4xx/5xx) indicates the server is running + if curl -ksS --max-time 5 --connect-timeout 2 "http://127.0.0.1:${port}/uaa/info"; then + echo "Boot is running on port ${port}." return 0 fi @@ -23,15 +24,35 @@ function is_boot_running() { elapsed_time=$((current_time - start_time)) if [[ "$elapsed_time" -ge "$timeout" ]]; then - echo "Timeout reached. Boot did not start" + echo "Timeout reached. Boot did not start on port ${port}" + thread_dump_on_boot_pid return 1 fi - tail -n 1 "$log_file" + tail -n 1 boot.log sleep 1 # Check every second done } +######################################## +# thread_dump_on_boot_pid +# Display Memeory info and Request a thread dump on the pid in boot.pid +########################################## +function thread_dump_on_boot_pid() { + local pid + pid=$(cat boot.pid) + if [[ -n "$pid" ]] && kill -0 "$pid" 2>/dev/null; then + echo "Collecting JVM diagnostics..." + jstat -gccause "${pid}" 2>/dev/null || true + jstat -gccapacity "${pid}" 2>/dev/null || true + jstat -gcmetacapacity "${pid}" 2>/dev/null || true + echo "Sending SIGQUIT (kill -3) to Thread Dump on UAA process (pid=${pid})" + kill -3 "$pid" || true + # Wait a moment for the thread dump to be written + sleep 2 + fi +} + ######################################## # setup_hosts_file # Appends test-zone and other necessary host entries to /etc/hosts diff --git a/scripts/unit_tests.sh b/scripts/unit_tests.sh index f1b49d035d6..e41a03b46de 100755 --- a/scripts/unit_tests.sh +++ b/scripts/unit_tests.sh @@ -22,6 +22,14 @@ function main() { start_ldap set -x + + ./gradlew "-Dspring.profiles.active=${test_profile}" \ + "-Djava.security.egd=file:/dev/./urandom" \ + clean assemble compileTestJava \ + --stacktrace \ + --no-daemon \ + --console=plain + ./gradlew "-Dspring.profiles.active=${test_profile}" \ "-Djava.security.egd=file:/dev/./urandom" \ ${UAA_GRADLE_UNIT_TEST_COMMAND:-test} \ diff --git a/uaa/src/test/java/org/cloudfoundry/identity/uaa/integration/feature/DefaultIntegrationTestConfig.java b/uaa/src/test/java/org/cloudfoundry/identity/uaa/integration/feature/DefaultIntegrationTestConfig.java index c961e93377d..4b05e9e74ae 100644 --- a/uaa/src/test/java/org/cloudfoundry/identity/uaa/integration/feature/DefaultIntegrationTestConfig.java +++ b/uaa/src/test/java/org/cloudfoundry/identity/uaa/integration/feature/DefaultIntegrationTestConfig.java @@ -72,19 +72,45 @@ public UaaWebDriver webDriver() { private static ChromeOptions getChromeOptions() { ChromeOptions options = new ChromeOptions(); options.addArguments( - "--verbose", // Comment the following line to run selenium test browser in Headed Mode - "--headless", + "--headless=new", // Use new headless mode (more stable) "--guest", //attempt to disable password checkups that disrupt the flow "--disable-web-security", "--ignore-certificate-errors", "--allow-running-insecure-content", "--allow-insecure-localhost", - "--no-sandbox", + "--no-sandbox", // Required for Docker/CI environments "--disable-gpu", - "--remote-allow-origins=*" + "--remote-allow-origins=*", + "--disable-dev-shm-usage", // Overcome limited resource problems in Docker + // Additional stability flags + "--disable-extensions", + "--disable-software-rasterizer", + "--disable-background-timer-throttling", + "--disable-backgrounding-occluded-windows", + "--disable-renderer-backgrounding", + "--disable-blink-features=AutomationControlled", + "--disable-features=TranslateUI", + // Hang detection and renderer stability flags + "--disable-hang-monitor", // Prevents Chrome from killing "hung" renderer processes (useful for slow backend responses) + "--disable-background-networking", // Reduces background network activity that could interfere with test requests + "--disable-features=RendererScheduling", // Disables aggressive renderer scheduling that might cause timeouts + "--run-all-compositor-stages-before-draw", // Ensures all rendering stages complete before drawing (prevents partial renders) + "--disable-prompt-on-repost", + "--disable-sync", + "--disable-component-extensions-with-background-pages", + "--force-color-profile=srgb", + "--no-first-run", + "--no-default-browser-check", + "--disable-default-apps", + "--disable-popup-blocking", + "--test-type", + "--disable-infobars" ); options.setAcceptInsecureCerts(true); + + // Set page load strategy to 'normal' to ensure proper page load detection + options.setPageLoadStrategy(org.openqa.selenium.PageLoadStrategy.NORMAL); return options; } From 4d7cf2706777b9de3ca21fd2a70154144053f671 Mon Sep 17 00:00:00 2001 From: Duane May Date: Wed, 12 Nov 2025 17:44:19 -0500 Subject: [PATCH 2/3] Adjust verbosity in integration tests --- scripts/integration_tests.sh | 3 ++- scripts/lib_util_helper.sh | 13 ++++++++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/scripts/integration_tests.sh b/scripts/integration_tests.sh index 2521244a9ca..67d44022a0a 100755 --- a/scripts/integration_tests.sh +++ b/scripts/integration_tests.sh @@ -123,13 +123,14 @@ function main() { --stacktrace \ --console=plain" - set -x if [[ "${RUN_TESTS:-true}" = 'true' ]]; then eval "$assemble_code" # Start and ensure the boot server is running before integration tests eval "$launch_boot" echo $! > boot.pid + { set +x; } 2>/dev/null + if is_boot_running ; then echo "Boot started. Can continue to run tests." else diff --git a/scripts/lib_util_helper.sh b/scripts/lib_util_helper.sh index 9add1e185f1..48ffeb511a3 100755 --- a/scripts/lib_util_helper.sh +++ b/scripts/lib_util_helper.sh @@ -11,11 +11,17 @@ function is_boot_running() { local start_time start_time=$(date +%s) + echo + echo "Waiting for the UAA server to start, only partial log messages will be shown as it progresses:" while true; do # Use curl to check if the port is responding # Any HTTP response (even 4xx/5xx) indicates the server is running - if curl -ksS --max-time 5 --connect-timeout 2 "http://127.0.0.1:${port}/uaa/info"; then + if curl -ks --max-time 5 -o /dev/null --connect-timeout 2 -u "admin:adminsecret" \ + --data "client_id=admin&grant_type=client_credentials" \ + -X POST "http://localhost:${port}/uaa/oauth/token" 2>/dev/null; then + echo echo "Boot is running on port ${port}." + grep "Started UaaBootApplication" boot.log return 0 fi @@ -24,7 +30,12 @@ function is_boot_running() { elapsed_time=$((current_time - start_time)) if [[ "$elapsed_time" -ge "$timeout" ]]; then + echo echo "Timeout reached. Boot did not start on port ${port}" + curl -ksS --max-time 5 --connect-timeout 2 -u "admin:adminsecret" \ + --data "client_id=admin&grant_type=client_credentials" \ + -X POST "http://localhost:${port}/uaa/info" || true + thread_dump_on_boot_pid return 1 fi From 28b12a63f6d1e979a62b3ea31c64d4af8ac16853 Mon Sep 17 00:00:00 2001 From: Joe Mahady Date: Thu, 13 Nov 2025 15:17:06 +0000 Subject: [PATCH 3/3] [bc-212-test]: adding verbose --- scripts/integration_tests.sh | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/scripts/integration_tests.sh b/scripts/integration_tests.sh index 67d44022a0a..f69dad865aa 100755 --- a/scripts/integration_tests.sh +++ b/scripts/integration_tests.sh @@ -124,6 +124,7 @@ function main() { --console=plain" if [[ "${RUN_TESTS:-true}" = 'true' ]]; then + set -x eval "$assemble_code" # Start and ensure the boot server is running before integration tests @@ -142,10 +143,11 @@ function main() { if [[ -z "${DBUS_SESSION_BUS_ADDRESS:-}" ]]; then export DBUS_SESSION_BUS_ADDRESS=/dev/null fi - + set -x eval "$compile_test_code" eval "$integration_test_code" - + { set +x; } 2>/dev/null + # Clean up: kill the boot server if [[ -f boot.pid ]]; then local pid; pid=$(cat boot.pid) @@ -154,6 +156,7 @@ function main() { rm boot.pid fi else + set -x echo "$integration_test_code" bash fi