From 1ad9e3bd3b30c133faed2f621c9994c0b0b1bcaf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Sautter?= Date: Mon, 30 Dec 2024 12:10:02 +0100 Subject: [PATCH 1/5] [grid] retry if no node does support the Capabilities --- .../selenium/grid/distributor/local/LocalDistributor.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/java/src/org/openqa/selenium/grid/distributor/local/LocalDistributor.java b/java/src/org/openqa/selenium/grid/distributor/local/LocalDistributor.java index 9bf2f880a56ad..b6db25c5dbfab 100644 --- a/java/src/org/openqa/selenium/grid/distributor/local/LocalDistributor.java +++ b/java/src/org/openqa/selenium/grid/distributor/local/LocalDistributor.java @@ -586,6 +586,11 @@ public Either newSession( new SessionNotCreatedException("Unable to create new session"); for (Capabilities caps : request.getDesiredCapabilities()) { if (isNotSupported(caps)) { + // e.g. the last node drained, we have to wait for a new to register + lastFailure = + new SessionNotCreatedException( + "Unable to find a node supporting the desired capabilities"); + retry = true; continue; } From 9be08970a29a44a54ba9c341dcfdb6b75ddf1dac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Sautter?= Date: Mon, 30 Dec 2024 22:13:44 +0100 Subject: [PATCH 2/5] [grid] add some unit tests to provoke the failure --- .../selenium/grid/node/httpd/NodeServer.java | 9 +- .../selenium/grid/distributor/BUILD.bazel | 49 +++- .../selenium/grid/distributor/DrainTest.java | 246 ++++++++++++++++++ 3 files changed, 299 insertions(+), 5 deletions(-) create mode 100644 java/test/org/openqa/selenium/grid/distributor/DrainTest.java diff --git a/java/src/org/openqa/selenium/grid/node/httpd/NodeServer.java b/java/src/org/openqa/selenium/grid/node/httpd/NodeServer.java index f1ac54bb62ea7..17123e23cbbf0 100644 --- a/java/src/org/openqa/selenium/grid/node/httpd/NodeServer.java +++ b/java/src/org/openqa/selenium/grid/node/httpd/NodeServer.java @@ -17,7 +17,7 @@ package org.openqa.selenium.grid.node.httpd; -import static java.net.HttpURLConnection.HTTP_NO_CONTENT; +import static java.net.HttpURLConnection.HTTP_OK; import static java.net.HttpURLConnection.HTTP_UNAVAILABLE; import static org.openqa.selenium.grid.config.StandardGridRoles.EVENT_BUS_ROLE; import static org.openqa.selenium.grid.config.StandardGridRoles.HTTPD_ROLE; @@ -131,13 +131,16 @@ protected Handlers createHandlers(Config config) { HttpHandler readinessCheck = req -> { if (node.getStatus().hasCapacity()) { - return new HttpResponse().setStatus(HTTP_NO_CONTENT); + return new HttpResponse() + .setStatus(HTTP_OK) + .setHeader("Content-Type", MediaType.PLAIN_TEXT_UTF_8.toString()) + .setContent(Contents.utf8String("Node has capacity available")); } return new HttpResponse() .setStatus(HTTP_UNAVAILABLE) .setHeader("Content-Type", MediaType.PLAIN_TEXT_UTF_8.toString()) - .setContent(Contents.utf8String("No capacity available")); + .setContent(Contents.utf8String("Node has no capacity available")); }; bus.addListener( diff --git a/java/test/org/openqa/selenium/grid/distributor/BUILD.bazel b/java/test/org/openqa/selenium/grid/distributor/BUILD.bazel index fd9ac64e97005..33f21dd8c5ee5 100644 --- a/java/test/org/openqa/selenium/grid/distributor/BUILD.bazel +++ b/java/test/org/openqa/selenium/grid/distributor/BUILD.bazel @@ -1,11 +1,56 @@ load("@rules_jvm_external//:defs.bzl", "artifact") -load("//java:defs.bzl", "JUNIT5_DEPS", "java_test_suite") +load("//java:defs.bzl", "JUNIT5_DEPS", "java_selenium_test_suite", "java_test_suite") load("//java:version.bzl", "TOOLS_JAVA_VERSION") +LARGE_TESTS = [ + "DrainTest.java", +] + +java_selenium_test_suite( + name = "large-tests", + size = "large", + srcs = LARGE_TESTS, + browsers = [ + "chrome", + "firefox", + "edge", + ], + javacopts = [ + "--release", + TOOLS_JAVA_VERSION, + ], + tags = [ + "selenium-remote", + ], + deps = [ + "//java/src/org/openqa/selenium/chrome", + "//java/src/org/openqa/selenium/firefox", + "//java/src/org/openqa/selenium/grid", + "//java/src/org/openqa/selenium/grid/config", + "//java/src/org/openqa/selenium/grid/distributor", + "//java/src/org/openqa/selenium/json", + "//java/src/org/openqa/selenium/remote", + "//java/src/org/openqa/selenium/support", + "//java/test/org/openqa/selenium/environment", + "//java/test/org/openqa/selenium/grid/testing", + "//java/test/org/openqa/selenium/remote/tracing:tracing-support", + "//java/test/org/openqa/selenium/testing:annotations", + "//java/test/org/openqa/selenium/testing:test-base", + artifact("org.junit.jupiter:junit-jupiter-api"), + artifact("org.junit.jupiter:junit-jupiter-params"), + artifact("org.assertj:assertj-core"), + "//java/src/org/openqa/selenium:core", + "//java/src/org/openqa/selenium/remote/http", + ] + JUNIT5_DEPS, +) + java_test_suite( name = "medium-tests", size = "medium", - srcs = glob(["*.java"]), + srcs = glob( + ["*.java"], + exclude = LARGE_TESTS, + ), javacopts = [ "--release", TOOLS_JAVA_VERSION, diff --git a/java/test/org/openqa/selenium/grid/distributor/DrainTest.java b/java/test/org/openqa/selenium/grid/distributor/DrainTest.java new file mode 100644 index 0000000000000..9db572a28d88e --- /dev/null +++ b/java/test/org/openqa/selenium/grid/distributor/DrainTest.java @@ -0,0 +1,246 @@ +// Licensed to the Software Freedom Conservancy (SFC) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The SFC licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.openqa.selenium.grid.distributor; + +import static org.assertj.core.api.Assertions.assertThat; + +import java.io.StringReader; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.function.Supplier; +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.openqa.selenium.WebDriver; +import org.openqa.selenium.grid.commands.Hub; +import org.openqa.selenium.grid.config.CompoundConfig; +import org.openqa.selenium.grid.config.Config; +import org.openqa.selenium.grid.config.MapConfig; +import org.openqa.selenium.grid.config.MemoizedConfig; +import org.openqa.selenium.grid.config.TomlConfig; +import org.openqa.selenium.grid.node.httpd.NodeServer; +import org.openqa.selenium.grid.server.Server; +import org.openqa.selenium.net.PortProber; +import org.openqa.selenium.net.UrlChecker; +import org.openqa.selenium.remote.RemoteWebDriver; +import org.openqa.selenium.testing.Safely; +import org.openqa.selenium.testing.drivers.Browser; + +class DrainTest { + + private final Browser browser = Objects.requireNonNull(Browser.detect()); + + @Disabled("will be fixed with PR 14987") + @Test + void nodeDoesNotTakeTooManySessions() throws Exception { + String[] rawConfig = + new String[] { + "[events]", + "publish = \"tcp://localhost:" + PortProber.findFreePort() + "\"", + "subscribe = \"tcp://localhost:" + PortProber.findFreePort() + "\"", + "", + "[server]", + "registration-secret = \"feta\"" + }; + + Config baseConfig = + new MemoizedConfig(new TomlConfig(new StringReader(String.join("\n", rawConfig)))); + + Server hub = startHub(baseConfig); + try (AutoCloseable stopHub = () -> Safely.safelyCall(hub::stop); ) { + UrlChecker urlChecker = new UrlChecker(); + urlChecker.waitUntilAvailable( + 5, TimeUnit.SECONDS, hub.getUrl().toURI().resolve("readyz").toURL()); + + // the CI has not enough CPUs so use a fixed number here + int nThreads = 4 * 3; + ExecutorService executor = Executors.newFixedThreadPool(nThreads); + + try { + List> pendingSessions = new ArrayList<>(); + CountDownLatch allPending = new CountDownLatch(nThreads); + + for (int i = 0; i < nThreads; i++) { + Future future = + executor.submit( + () -> { + allPending.countDown(); + + return RemoteWebDriver.builder() + .oneOf(browser.getCapabilities()) + .address(hub.getUrl()) + .build(); + }); + + pendingSessions.add(future); + } + + // ensure all sessions are in the queue + Assertions.assertThat(allPending.await(8, TimeUnit.SECONDS)).isTrue(); + + for (int i = 0; i < nThreads; i += 3) { + // remove all completed futures + assertThat(pendingSessions.removeIf(Future::isDone)).isEqualTo(i != 0); + + // start a node draining after 3 sessions + var node = startNode(baseConfig, hub, 6, 3); + + urlChecker.waitUntilAvailable( + 20, TimeUnit.SECONDS, node.getUrl().toURI().resolve("readyz").toURL()); + + // we want to check not more than 3 are started, polling won't help here + Thread.sleep(20_000); + int stopped = 0; + + for (int j = 0; j < pendingSessions.size(); j++) { + var future = pendingSessions.get(j); + + if (future.isDone()) { + stopped++; + future.get().quit(); + } + } + + // the node should only pick 3 sessions to start, then starts to drain + Assertions.assertThat(stopped).isEqualTo(3); + + // check the node stopped + urlChecker.waitUntilUnavailable( + 10, TimeUnit.SECONDS, node.getUrl().toURI().resolve("readyz").toURL()); + } + } finally { + executor.shutdownNow(); + } + } + } + + @Test + void sessionIsNotRejectedWhenNodeDrains() throws Exception { + String[] rawConfig = + new String[] { + "[events]", + "publish = \"tcp://localhost:" + PortProber.findFreePort() + "\"", + "subscribe = \"tcp://localhost:" + PortProber.findFreePort() + "\"", + "", + "[server]", + "registration-secret = \"feta\"" + }; + + Config baseConfig = + new MemoizedConfig(new TomlConfig(new StringReader(String.join("\n", rawConfig)))); + + Server hub = startHub(baseConfig); + try (AutoCloseable stopHub = () -> Safely.safelyCall(hub::stop); ) { + UrlChecker urlChecker = new UrlChecker(); + urlChecker.waitUntilAvailable( + 5, TimeUnit.SECONDS, hub.getUrl().toURI().resolve("readyz").toURL()); + + ExecutorService executor = Executors.newFixedThreadPool(2); + + Supplier> newDriver = + () -> + executor.submit( + () -> + RemoteWebDriver.builder() + .oneOf(browser.getCapabilities()) + .address(hub.getUrl()) + .build()); + + try { + Future pendingA = newDriver.get(); + Future pendingB = newDriver.get(); + + for (int i = 0; i < 16; i++) { + // the node should drain automatically + startNode(baseConfig, hub, 6, 1); + + for (int j = 0; j < 2000; j++) { + Thread.sleep(10); + + if (pendingA.isDone() || pendingB.isDone()) { + break; + } + } + + if (pendingA.isDone() && pendingB.isDone()) { + pendingA.get().quit(); + pendingB.get().quit(); + + throw new IllegalStateException("only one should be started"); + } else if (pendingA.isDone()) { + pendingA.get().quit(); + pendingA = newDriver.get(); + } else if (pendingB.isDone()) { + pendingB.get().quit(); + pendingB = newDriver.get(); + } else { + throw new IllegalStateException("no browser started"); + } + } + } finally { + executor.shutdownNow(); + } + } + } + + Server startHub(Config baseConfig) { + Config hubConfig = + new MemoizedConfig( + new CompoundConfig( + new MapConfig( + Map.of( + "server", + Map.of("port", PortProber.findFreePort()), + "events", + Map.of("bind", true), + "distributor", + Map.of("newsession-threadpool-size", "6"))), + baseConfig)); + + return new Hub().asServer(hubConfig).start(); + } + + Server startNode(Config baseConfig, Server hub, int maxSessions, int drainAfter) { + MapConfig additionalNodeConfig = + new MapConfig( + Map.of( + "server", Map.of("port", PortProber.findFreePort()), + "node", + Map.of( + "hub", + hub.getUrl(), + "driver-implementation", + browser.displayName(), + "override-max-sessions", + "true", + "max-sessions", + Integer.toString(maxSessions), + "drain-after-session-count", + drainAfter))); + + Config nodeConfig = new MemoizedConfig(new CompoundConfig(additionalNodeConfig, baseConfig)); + return new NodeServer().asServer(nodeConfig).start(); + } +} From f3d4dbbb9b2e67ada6cfb5e55b3758621c72d7ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Sautter?= Date: Wed, 1 Jan 2025 14:00:12 +0100 Subject: [PATCH 3/5] [grid] increase the timeout of the tests --- .../selenium/grid/distributor/DrainTest.java | 43 ++++++++----------- 1 file changed, 18 insertions(+), 25 deletions(-) diff --git a/java/test/org/openqa/selenium/grid/distributor/DrainTest.java b/java/test/org/openqa/selenium/grid/distributor/DrainTest.java index 9db572a28d88e..fd329ac8e26a6 100644 --- a/java/test/org/openqa/selenium/grid/distributor/DrainTest.java +++ b/java/test/org/openqa/selenium/grid/distributor/DrainTest.java @@ -24,6 +24,7 @@ import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; @@ -31,7 +32,6 @@ import java.util.concurrent.TimeUnit; import java.util.function.Supplier; import org.assertj.core.api.Assertions; -import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.openqa.selenium.WebDriver; import org.openqa.selenium.grid.commands.Hub; @@ -52,7 +52,6 @@ class DrainTest { private final Browser browser = Objects.requireNonNull(Browser.detect()); - @Disabled("will be fixed with PR 14987") @Test void nodeDoesNotTakeTooManySessions() throws Exception { String[] rawConfig = @@ -105,7 +104,7 @@ void nodeDoesNotTakeTooManySessions() throws Exception { assertThat(pendingSessions.removeIf(Future::isDone)).isEqualTo(i != 0); // start a node draining after 3 sessions - var node = startNode(baseConfig, hub, 6, 3); + Server node = startNode(baseConfig, hub, 6, 3); urlChecker.waitUntilAvailable( 20, TimeUnit.SECONDS, node.getUrl().toURI().resolve("readyz").toURL()); @@ -115,7 +114,7 @@ void nodeDoesNotTakeTooManySessions() throws Exception { int stopped = 0; for (int j = 0; j < pendingSessions.size(); j++) { - var future = pendingSessions.get(j); + Future future = pendingSessions.get(j); if (future.isDone()) { stopped++; @@ -159,30 +158,26 @@ void sessionIsNotRejectedWhenNodeDrains() throws Exception { ExecutorService executor = Executors.newFixedThreadPool(2); - Supplier> newDriver = - () -> - executor.submit( - () -> - RemoteWebDriver.builder() - .oneOf(browser.getCapabilities()) - .address(hub.getUrl()) - .build()); - try { - Future pendingA = newDriver.get(); - Future pendingB = newDriver.get(); + Supplier> newDriver = + () -> + CompletableFuture.supplyAsync( + () -> + RemoteWebDriver.builder() + .oneOf(browser.getCapabilities()) + .address(hub.getUrl()) + .build(), + executor); + + CompletableFuture pendingA = newDriver.get(); + CompletableFuture pendingB = newDriver.get(); for (int i = 0; i < 16; i++) { - // the node should drain automatically + // the node should drain automatically, covered by other tests startNode(baseConfig, hub, 6, 1); - for (int j = 0; j < 2000; j++) { - Thread.sleep(10); - - if (pendingA.isDone() || pendingB.isDone()) { - break; - } - } + // wait for one to start + CompletableFuture.anyOf(pendingA, pendingB).get(80, TimeUnit.SECONDS); if (pendingA.isDone() && pendingB.isDone()) { pendingA.get().quit(); @@ -195,8 +190,6 @@ void sessionIsNotRejectedWhenNodeDrains() throws Exception { } else if (pendingB.isDone()) { pendingB.get().quit(); pendingB = newDriver.get(); - } else { - throw new IllegalStateException("no browser started"); } } } finally { From 6c43f655665a0fb952c1deee3b1b4f0167bce245 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Sautter?= Date: Wed, 1 Jan 2025 14:31:42 +0100 Subject: [PATCH 4/5] [grid] increase the timeouts of the tests --- .../selenium/grid/distributor/DrainTest.java | 31 ++++++++++++------- 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/java/test/org/openqa/selenium/grid/distributor/DrainTest.java b/java/test/org/openqa/selenium/grid/distributor/DrainTest.java index fd329ac8e26a6..3a702723c9bb8 100644 --- a/java/test/org/openqa/selenium/grid/distributor/DrainTest.java +++ b/java/test/org/openqa/selenium/grid/distributor/DrainTest.java @@ -20,6 +20,7 @@ import static org.assertj.core.api.Assertions.assertThat; import java.io.StringReader; +import java.time.Duration; import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -28,7 +29,6 @@ import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; -import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import java.util.function.Supplier; import org.assertj.core.api.Assertions; @@ -78,12 +78,12 @@ void nodeDoesNotTakeTooManySessions() throws Exception { ExecutorService executor = Executors.newFixedThreadPool(nThreads); try { - List> pendingSessions = new ArrayList<>(); + List> pendingSessions = new ArrayList<>(); CountDownLatch allPending = new CountDownLatch(nThreads); for (int i = 0; i < nThreads; i++) { - Future future = - executor.submit( + CompletableFuture future = + CompletableFuture.supplyAsync( () -> { allPending.countDown(); @@ -91,7 +91,8 @@ void nodeDoesNotTakeTooManySessions() throws Exception { .oneOf(browser.getCapabilities()) .address(hub.getUrl()) .build(); - }); + }, + executor); pendingSessions.add(future); } @@ -101,21 +102,27 @@ void nodeDoesNotTakeTooManySessions() throws Exception { for (int i = 0; i < nThreads; i += 3) { // remove all completed futures - assertThat(pendingSessions.removeIf(Future::isDone)).isEqualTo(i != 0); + assertThat(pendingSessions.removeIf(CompletableFuture::isDone)).isEqualTo(i != 0); // start a node draining after 3 sessions Server node = startNode(baseConfig, hub, 6, 3); urlChecker.waitUntilAvailable( - 20, TimeUnit.SECONDS, node.getUrl().toURI().resolve("readyz").toURL()); + 60, TimeUnit.SECONDS, node.getUrl().toURI().resolve("readyz").toURL()); + + // use nano time to avoid issues with a jumping clock e.g. on WSL2 or due to time-sync + long started = System.nanoTime(); + + // wait for the first to start + CompletableFuture.anyOf(pendingSessions.toArray(CompletableFuture[]::new)) + .get(120, TimeUnit.SECONDS); // we want to check not more than 3 are started, polling won't help here - Thread.sleep(20_000); - int stopped = 0; + Thread.sleep(Duration.ofNanos(System.nanoTime() - started).multipliedBy(2).toMillis()); - for (int j = 0; j < pendingSessions.size(); j++) { - Future future = pendingSessions.get(j); + int stopped = 0; + for (CompletableFuture future : pendingSessions) { if (future.isDone()) { stopped++; future.get().quit(); @@ -127,7 +134,7 @@ void nodeDoesNotTakeTooManySessions() throws Exception { // check the node stopped urlChecker.waitUntilUnavailable( - 10, TimeUnit.SECONDS, node.getUrl().toURI().resolve("readyz").toURL()); + 40, TimeUnit.SECONDS, node.getUrl().toURI().resolve("readyz").toURL()); } } finally { executor.shutdownNow(); From c67e940f26d783f8ead0d94299b284c3d39f5a81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Sautter?= Date: Wed, 1 Jan 2025 18:51:54 +0100 Subject: [PATCH 5/5] [grid] disable one of the tests for now --- java/test/org/openqa/selenium/grid/distributor/DrainTest.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/java/test/org/openqa/selenium/grid/distributor/DrainTest.java b/java/test/org/openqa/selenium/grid/distributor/DrainTest.java index 3a702723c9bb8..fb8f13d01ce1d 100644 --- a/java/test/org/openqa/selenium/grid/distributor/DrainTest.java +++ b/java/test/org/openqa/selenium/grid/distributor/DrainTest.java @@ -32,6 +32,7 @@ import java.util.concurrent.TimeUnit; import java.util.function.Supplier; import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.openqa.selenium.WebDriver; import org.openqa.selenium.grid.commands.Hub; @@ -52,6 +53,7 @@ class DrainTest { private final Browser browser = Objects.requireNonNull(Browser.detect()); + @Disabled("the Node is terminated calling System.exit, this should be reworked in the future") @Test void nodeDoesNotTakeTooManySessions() throws Exception { String[] rawConfig =