Skip to content

Commit dbfaaa3

Browse files
joerg1985VietND96
authored andcommitted
[grid] retry if no node does support the Capabilities (SeleniumHQ#14986)
* [grid] retry if no node does support the Capabilities * [grid] add some unit tests to provoke the failure * [grid] increase the timeout of the tests * [grid] increase the timeouts of the tests * [grid] disable one of the tests for now --------- Co-authored-by: Viet Nguyen Duc <[email protected]>
1 parent 3476715 commit dbfaaa3

File tree

4 files changed

+306
-5
lines changed

4 files changed

+306
-5
lines changed

java/src/org/openqa/selenium/grid/distributor/local/LocalDistributor.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -586,6 +586,11 @@ public Either<SessionNotCreatedException, CreateSessionResponse> newSession(
586586
new SessionNotCreatedException("Unable to create new session");
587587
for (Capabilities caps : request.getDesiredCapabilities()) {
588588
if (isNotSupported(caps)) {
589+
// e.g. the last node drained, we have to wait for a new to register
590+
lastFailure =
591+
new SessionNotCreatedException(
592+
"Unable to find a node supporting the desired capabilities");
593+
retry = true;
589594
continue;
590595
}
591596

java/src/org/openqa/selenium/grid/node/httpd/NodeServer.java

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
package org.openqa.selenium.grid.node.httpd;
1919

20-
import static java.net.HttpURLConnection.HTTP_NO_CONTENT;
20+
import static java.net.HttpURLConnection.HTTP_OK;
2121
import static java.net.HttpURLConnection.HTTP_UNAVAILABLE;
2222
import static org.openqa.selenium.grid.config.StandardGridRoles.EVENT_BUS_ROLE;
2323
import static org.openqa.selenium.grid.config.StandardGridRoles.HTTPD_ROLE;
@@ -131,13 +131,16 @@ protected Handlers createHandlers(Config config) {
131131
HttpHandler readinessCheck =
132132
req -> {
133133
if (node.getStatus().hasCapacity()) {
134-
return new HttpResponse().setStatus(HTTP_NO_CONTENT);
134+
return new HttpResponse()
135+
.setStatus(HTTP_OK)
136+
.setHeader("Content-Type", MediaType.PLAIN_TEXT_UTF_8.toString())
137+
.setContent(Contents.utf8String("Node has capacity available"));
135138
}
136139

137140
return new HttpResponse()
138141
.setStatus(HTTP_UNAVAILABLE)
139142
.setHeader("Content-Type", MediaType.PLAIN_TEXT_UTF_8.toString())
140-
.setContent(Contents.utf8String("No capacity available"));
143+
.setContent(Contents.utf8String("Node has no capacity available"));
141144
};
142145

143146
bus.addListener(

java/test/org/openqa/selenium/grid/distributor/BUILD.bazel

Lines changed: 47 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,56 @@
11
load("@rules_jvm_external//:defs.bzl", "artifact")
2-
load("//java:defs.bzl", "JUNIT5_DEPS", "java_test_suite")
2+
load("//java:defs.bzl", "JUNIT5_DEPS", "java_selenium_test_suite", "java_test_suite")
33
load("//java:version.bzl", "TOOLS_JAVA_VERSION")
44

5+
LARGE_TESTS = [
6+
"DrainTest.java",
7+
]
8+
9+
java_selenium_test_suite(
10+
name = "large-tests",
11+
size = "large",
12+
srcs = LARGE_TESTS,
13+
browsers = [
14+
"chrome",
15+
"firefox",
16+
"edge",
17+
],
18+
javacopts = [
19+
"--release",
20+
TOOLS_JAVA_VERSION,
21+
],
22+
tags = [
23+
"selenium-remote",
24+
],
25+
deps = [
26+
"//java/src/org/openqa/selenium/chrome",
27+
"//java/src/org/openqa/selenium/firefox",
28+
"//java/src/org/openqa/selenium/grid",
29+
"//java/src/org/openqa/selenium/grid/config",
30+
"//java/src/org/openqa/selenium/grid/distributor",
31+
"//java/src/org/openqa/selenium/json",
32+
"//java/src/org/openqa/selenium/remote",
33+
"//java/src/org/openqa/selenium/support",
34+
"//java/test/org/openqa/selenium/environment",
35+
"//java/test/org/openqa/selenium/grid/testing",
36+
"//java/test/org/openqa/selenium/remote/tracing:tracing-support",
37+
"//java/test/org/openqa/selenium/testing:annotations",
38+
"//java/test/org/openqa/selenium/testing:test-base",
39+
artifact("org.junit.jupiter:junit-jupiter-api"),
40+
artifact("org.junit.jupiter:junit-jupiter-params"),
41+
artifact("org.assertj:assertj-core"),
42+
"//java/src/org/openqa/selenium:core",
43+
"//java/src/org/openqa/selenium/remote/http",
44+
] + JUNIT5_DEPS,
45+
)
46+
547
java_test_suite(
648
name = "medium-tests",
749
size = "medium",
8-
srcs = glob(["*.java"]),
50+
srcs = glob(
51+
["*.java"],
52+
exclude = LARGE_TESTS,
53+
),
954
javacopts = [
1055
"--release",
1156
TOOLS_JAVA_VERSION,
Lines changed: 248 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,248 @@
1+
// Licensed to the Software Freedom Conservancy (SFC) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The SFC licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
package org.openqa.selenium.grid.distributor;
19+
20+
import static org.assertj.core.api.Assertions.assertThat;
21+
22+
import java.io.StringReader;
23+
import java.time.Duration;
24+
import java.util.ArrayList;
25+
import java.util.List;
26+
import java.util.Map;
27+
import java.util.Objects;
28+
import java.util.concurrent.CompletableFuture;
29+
import java.util.concurrent.CountDownLatch;
30+
import java.util.concurrent.ExecutorService;
31+
import java.util.concurrent.Executors;
32+
import java.util.concurrent.TimeUnit;
33+
import java.util.function.Supplier;
34+
import org.assertj.core.api.Assertions;
35+
import org.junit.jupiter.api.Disabled;
36+
import org.junit.jupiter.api.Test;
37+
import org.openqa.selenium.WebDriver;
38+
import org.openqa.selenium.grid.commands.Hub;
39+
import org.openqa.selenium.grid.config.CompoundConfig;
40+
import org.openqa.selenium.grid.config.Config;
41+
import org.openqa.selenium.grid.config.MapConfig;
42+
import org.openqa.selenium.grid.config.MemoizedConfig;
43+
import org.openqa.selenium.grid.config.TomlConfig;
44+
import org.openqa.selenium.grid.node.httpd.NodeServer;
45+
import org.openqa.selenium.grid.server.Server;
46+
import org.openqa.selenium.net.PortProber;
47+
import org.openqa.selenium.net.UrlChecker;
48+
import org.openqa.selenium.remote.RemoteWebDriver;
49+
import org.openqa.selenium.testing.Safely;
50+
import org.openqa.selenium.testing.drivers.Browser;
51+
52+
class DrainTest {
53+
54+
private final Browser browser = Objects.requireNonNull(Browser.detect());
55+
56+
@Disabled("the Node is terminated calling System.exit, this should be reworked in the future")
57+
@Test
58+
void nodeDoesNotTakeTooManySessions() throws Exception {
59+
String[] rawConfig =
60+
new String[] {
61+
"[events]",
62+
"publish = \"tcp://localhost:" + PortProber.findFreePort() + "\"",
63+
"subscribe = \"tcp://localhost:" + PortProber.findFreePort() + "\"",
64+
"",
65+
"[server]",
66+
"registration-secret = \"feta\""
67+
};
68+
69+
Config baseConfig =
70+
new MemoizedConfig(new TomlConfig(new StringReader(String.join("\n", rawConfig))));
71+
72+
Server<?> hub = startHub(baseConfig);
73+
try (AutoCloseable stopHub = () -> Safely.safelyCall(hub::stop); ) {
74+
UrlChecker urlChecker = new UrlChecker();
75+
urlChecker.waitUntilAvailable(
76+
5, TimeUnit.SECONDS, hub.getUrl().toURI().resolve("readyz").toURL());
77+
78+
// the CI has not enough CPUs so use a fixed number here
79+
int nThreads = 4 * 3;
80+
ExecutorService executor = Executors.newFixedThreadPool(nThreads);
81+
82+
try {
83+
List<CompletableFuture<WebDriver>> pendingSessions = new ArrayList<>();
84+
CountDownLatch allPending = new CountDownLatch(nThreads);
85+
86+
for (int i = 0; i < nThreads; i++) {
87+
CompletableFuture<WebDriver> future =
88+
CompletableFuture.supplyAsync(
89+
() -> {
90+
allPending.countDown();
91+
92+
return RemoteWebDriver.builder()
93+
.oneOf(browser.getCapabilities())
94+
.address(hub.getUrl())
95+
.build();
96+
},
97+
executor);
98+
99+
pendingSessions.add(future);
100+
}
101+
102+
// ensure all sessions are in the queue
103+
Assertions.assertThat(allPending.await(8, TimeUnit.SECONDS)).isTrue();
104+
105+
for (int i = 0; i < nThreads; i += 3) {
106+
// remove all completed futures
107+
assertThat(pendingSessions.removeIf(CompletableFuture::isDone)).isEqualTo(i != 0);
108+
109+
// start a node draining after 3 sessions
110+
Server<?> node = startNode(baseConfig, hub, 6, 3);
111+
112+
urlChecker.waitUntilAvailable(
113+
60, TimeUnit.SECONDS, node.getUrl().toURI().resolve("readyz").toURL());
114+
115+
// use nano time to avoid issues with a jumping clock e.g. on WSL2 or due to time-sync
116+
long started = System.nanoTime();
117+
118+
// wait for the first to start
119+
CompletableFuture.anyOf(pendingSessions.toArray(CompletableFuture<?>[]::new))
120+
.get(120, TimeUnit.SECONDS);
121+
122+
// we want to check not more than 3 are started, polling won't help here
123+
Thread.sleep(Duration.ofNanos(System.nanoTime() - started).multipliedBy(2).toMillis());
124+
125+
int stopped = 0;
126+
127+
for (CompletableFuture<WebDriver> future : pendingSessions) {
128+
if (future.isDone()) {
129+
stopped++;
130+
future.get().quit();
131+
}
132+
}
133+
134+
// the node should only pick 3 sessions to start, then starts to drain
135+
Assertions.assertThat(stopped).isEqualTo(3);
136+
137+
// check the node stopped
138+
urlChecker.waitUntilUnavailable(
139+
40, TimeUnit.SECONDS, node.getUrl().toURI().resolve("readyz").toURL());
140+
}
141+
} finally {
142+
executor.shutdownNow();
143+
}
144+
}
145+
}
146+
147+
@Test
148+
void sessionIsNotRejectedWhenNodeDrains() throws Exception {
149+
String[] rawConfig =
150+
new String[] {
151+
"[events]",
152+
"publish = \"tcp://localhost:" + PortProber.findFreePort() + "\"",
153+
"subscribe = \"tcp://localhost:" + PortProber.findFreePort() + "\"",
154+
"",
155+
"[server]",
156+
"registration-secret = \"feta\""
157+
};
158+
159+
Config baseConfig =
160+
new MemoizedConfig(new TomlConfig(new StringReader(String.join("\n", rawConfig))));
161+
162+
Server<?> hub = startHub(baseConfig);
163+
try (AutoCloseable stopHub = () -> Safely.safelyCall(hub::stop); ) {
164+
UrlChecker urlChecker = new UrlChecker();
165+
urlChecker.waitUntilAvailable(
166+
5, TimeUnit.SECONDS, hub.getUrl().toURI().resolve("readyz").toURL());
167+
168+
ExecutorService executor = Executors.newFixedThreadPool(2);
169+
170+
try {
171+
Supplier<CompletableFuture<WebDriver>> newDriver =
172+
() ->
173+
CompletableFuture.supplyAsync(
174+
() ->
175+
RemoteWebDriver.builder()
176+
.oneOf(browser.getCapabilities())
177+
.address(hub.getUrl())
178+
.build(),
179+
executor);
180+
181+
CompletableFuture<WebDriver> pendingA = newDriver.get();
182+
CompletableFuture<WebDriver> pendingB = newDriver.get();
183+
184+
for (int i = 0; i < 16; i++) {
185+
// the node should drain automatically, covered by other tests
186+
startNode(baseConfig, hub, 6, 1);
187+
188+
// wait for one to start
189+
CompletableFuture.anyOf(pendingA, pendingB).get(80, TimeUnit.SECONDS);
190+
191+
if (pendingA.isDone() && pendingB.isDone()) {
192+
pendingA.get().quit();
193+
pendingB.get().quit();
194+
195+
throw new IllegalStateException("only one should be started");
196+
} else if (pendingA.isDone()) {
197+
pendingA.get().quit();
198+
pendingA = newDriver.get();
199+
} else if (pendingB.isDone()) {
200+
pendingB.get().quit();
201+
pendingB = newDriver.get();
202+
}
203+
}
204+
} finally {
205+
executor.shutdownNow();
206+
}
207+
}
208+
}
209+
210+
Server<?> startHub(Config baseConfig) {
211+
Config hubConfig =
212+
new MemoizedConfig(
213+
new CompoundConfig(
214+
new MapConfig(
215+
Map.of(
216+
"server",
217+
Map.of("port", PortProber.findFreePort()),
218+
"events",
219+
Map.of("bind", true),
220+
"distributor",
221+
Map.of("newsession-threadpool-size", "6"))),
222+
baseConfig));
223+
224+
return new Hub().asServer(hubConfig).start();
225+
}
226+
227+
Server<?> startNode(Config baseConfig, Server<?> hub, int maxSessions, int drainAfter) {
228+
MapConfig additionalNodeConfig =
229+
new MapConfig(
230+
Map.of(
231+
"server", Map.of("port", PortProber.findFreePort()),
232+
"node",
233+
Map.of(
234+
"hub",
235+
hub.getUrl(),
236+
"driver-implementation",
237+
browser.displayName(),
238+
"override-max-sessions",
239+
"true",
240+
"max-sessions",
241+
Integer.toString(maxSessions),
242+
"drain-after-session-count",
243+
drainAfter)));
244+
245+
Config nodeConfig = new MemoizedConfig(new CompoundConfig(additionalNodeConfig, baseConfig));
246+
return new NodeServer().asServer(nodeConfig).start();
247+
}
248+
}

0 commit comments

Comments
 (0)