Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# See https://github.com/apache/solr/blob/main/dev-docs/changelog.adoc
title: Add unresponsive servers to zombie list
type: fixed # added, changed, fixed, deprecated, removed, dependency_update, security, other
authors:
- name: James Vanneman
links:
- name: SOLR-18002
url: https://issues.apache.org/jira/browse/SOLR-18002
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicReference;
import org.apache.solr.client.solrj.ResponseParser;
import org.apache.solr.client.solrj.SolrClient;
Expand Down Expand Up @@ -282,7 +283,7 @@ private void onFailedRequest(
}
} catch (SolrServerException e) {
Throwable rootCause = e.getRootCause();
if (!isNonRetryable && rootCause instanceof IOException) {
if (!isNonRetryable && (rootCause instanceof IOException || rootCause instanceof TimeoutException)) {
listener.onFailure((!isZombie) ? makeServerAZombie(endpoint, e) : e, true);
} else if (isNonRetryable && rootCause instanceof ConnectException) {
listener.onFailure((!isZombie) ? makeServerAZombie(endpoint, e) : e, true);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
import org.apache.solr.client.solrj.ResponseParser;
Expand Down Expand Up @@ -554,7 +555,7 @@ protected Exception doRequest(
}
} catch (SolrServerException e) {
Throwable rootCause = e.getRootCause();
if (!isNonRetryable && rootCause instanceof IOException) {
if (!isNonRetryable && (rootCause instanceof IOException || rootCause instanceof TimeoutException)) {
ex = (!isZombie) ? makeServerAZombie(baseUrl, e) : e;
} else if (isNonRetryable && rootCause instanceof ConnectException) {
ex = (!isZombie) ? makeServerAZombie(baseUrl, e) : e;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import java.io.IOException;
import java.io.UncheckedIOException;
import java.lang.invoke.MethodHandles;
import java.net.ServerSocket;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
Expand All @@ -32,6 +33,7 @@
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.request.QueryRequest;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.response.SolrResponseBase;
import org.apache.solr.common.SolrInputDocument;
Expand Down Expand Up @@ -206,6 +208,29 @@ public void testTwoServers() throws Exception {
}
}

public void testTimeoutExceptionMarksServerAsZombie() throws Exception {
try (ZombieTestContext ctx = new ZombieTestContext()) {
LBSolrClient.Req lbReq = ctx.createQueryRequest();

try {
ctx.lbClient.request(lbReq);
} catch (Exception e) {
}

ctx.assertZombieState();
}
}

public void testTimeoutExceptionMarksServerAsZombieAsyncRequest() throws Exception {
try (ZombieTestContext ctx = new ZombieTestContext()) {
LBSolrClient.Req lbReq = ctx.createQueryRequest();

ctx.lbClient.requestAsync(lbReq).exceptionally(e -> null).get();

ctx.assertZombieState();
}
}

private LBSolrClient.Endpoint[] bootstrapBaseSolrEndpoints(int max) {
LBSolrClient.Endpoint[] solrUrls = new LBSolrClient.Endpoint[max];
for (int i = 0; i < max; i++) {
Expand Down Expand Up @@ -334,4 +359,60 @@ public void close() {
}
}
}

private class ZombieTestContext implements AutoCloseable {
final ServerSocket blackhole;
final LBSolrClient.Endpoint nonRoutableEndpoint;
final Http2SolrClient delegateClient;
final LBHttp2SolrClient<?> lbClient;

ZombieTestContext() throws Exception {
//create a socket that allows a client to connect but causes them to hang until idleTimeout is triggered
blackhole = new ServerSocket(0);
int blackholePort = blackhole.getLocalPort();
nonRoutableEndpoint =
new LBSolrClient.Endpoint("http://localhost:" + blackholePort + "/solr");

delegateClient =
new Http2SolrClient.Builder()
.withConnectionTimeout(1000, TimeUnit.MILLISECONDS)
.withIdleTimeout(100, TimeUnit.MILLISECONDS)
.build();

lbClient =
new LBHttp2SolrClient.Builder<>(delegateClient)
.setAliveCheckInterval(500, TimeUnit.MILLISECONDS)
.build();
}

LBSolrClient.Req createQueryRequest() {
SolrQuery solrQuery = new SolrQuery("*:*");
QueryRequest queryRequest = new QueryRequest(solrQuery);

List<LBSolrClient.Endpoint> endpoints =
List.of(
new LBSolrClient.Endpoint(
nonRoutableEndpoint.getBaseUrl(), solr[0].getDefaultCollection())
);
return new LBSolrClient.Req(queryRequest, endpoints);
}

void assertZombieState() {
assertTrue(
"Non-routable endpoint should be marked as zombie due to timeout",
lbClient.zombieServers.containsKey(
nonRoutableEndpoint.getBaseUrl() + "/" + solr[0].getDefaultCollection()));
}

@Override
public void close() {
lbClient.close();
delegateClient.close();
try {
blackhole.close();
} catch (IOException ioe) {

}
}
}
}