Skip to content

Commit 4b508b9

Browse files
authored
Support custom GRPCClient health checker logic. (#13353)
[NOTICE] Roll back score meaning in GraphQL health check API.
1 parent 4430cf5 commit 4b508b9

File tree

14 files changed

+116
-97
lines changed

14 files changed

+116
-97
lines changed

dist-material/release-docs/LICENSE

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ The text of each license is the standard Apache 2.0 license.
226226
https://mvnrepository.com/artifact/com.fasterxml.jackson.datatype/jackson-datatype-jsr310/2.18.2 Apache-2.0
227227
https://mvnrepository.com/artifact/com.fasterxml.jackson.module/jackson-module-kotlin/2.13.4 Apache-2.0
228228
https://mvnrepository.com/artifact/com.fasterxml/classmate/1.5.1 Apache-2.0
229-
https://mvnrepository.com/artifact/com.google.api.grpc/proto-google-common-protos/2.41.0 Apache-2.0
229+
https://mvnrepository.com/artifact/com.google.api.grpc/proto-google-common-protos/2.48.0 Apache-2.0
230230
https://mvnrepository.com/artifact/com.google.auto.service/auto-service-annotations/1.0.1 Apache-2.0
231231
https://mvnrepository.com/artifact/com.google.code.findbugs/jsr305/3.0.2 Apache-2.0
232232
https://mvnrepository.com/artifact/com.google.code.gson/gson/2.9.0 Apache-2.0
@@ -290,16 +290,16 @@ The text of each license is the standard Apache 2.0 license.
290290
https://mvnrepository.com/artifact/io.fabric8/kubernetes-model-scheduling/6.7.1 Apache-2.0
291291
https://mvnrepository.com/artifact/io.fabric8/kubernetes-model-storageclass/6.7.1 Apache-2.0
292292
https://mvnrepository.com/artifact/io.fabric8/zjsonpatch/0.3.0 Apache-2.0
293-
https://mvnrepository.com/artifact/io.grpc/grpc-api/1.68.1 Apache-2.0
294-
https://mvnrepository.com/artifact/io.grpc/grpc-context/1.68.1 Apache-2.0
295-
https://mvnrepository.com/artifact/io.grpc/grpc-core/1.68.1 Apache-2.0
296-
https://mvnrepository.com/artifact/io.grpc/grpc-grpclb/1.68.1 Apache-2.0
297-
https://mvnrepository.com/artifact/io.grpc/grpc-netty/1.68.1 Apache-2.0
298-
https://mvnrepository.com/artifact/io.grpc/grpc-protobuf/1.68.1 Apache-2.0
299-
https://mvnrepository.com/artifact/io.grpc/grpc-protobuf-lite/1.68.1 Apache-2.0
293+
https://mvnrepository.com/artifact/io.grpc/grpc-api/1.70.0 Apache-2.0
294+
https://mvnrepository.com/artifact/io.grpc/grpc-context/1.70.0 Apache-2.0
295+
https://mvnrepository.com/artifact/io.grpc/grpc-core/1.70.0 Apache-2.0
296+
https://mvnrepository.com/artifact/io.grpc/grpc-grpclb/1.70.0 Apache-2.0
297+
https://mvnrepository.com/artifact/io.grpc/grpc-netty/1.70.0 Apache-2.0
298+
https://mvnrepository.com/artifact/io.grpc/grpc-protobuf/1.70.0 Apache-2.0
299+
https://mvnrepository.com/artifact/io.grpc/grpc-protobuf-lite/1.70.0 Apache-2.0
300300
https://mvnrepository.com/artifact/io.grpc/grpc-services/1.70.0 Apache-2.0
301-
https://mvnrepository.com/artifact/io.grpc/grpc-stub/1.68.1 Apache-2.0
302-
https://mvnrepository.com/artifact/io.grpc/grpc-util/1.68.1 Apache-2.0
301+
https://mvnrepository.com/artifact/io.grpc/grpc-stub/1.70.0 Apache-2.0
302+
https://mvnrepository.com/artifact/io.grpc/grpc-util/1.70.0 Apache-2.0
303303
https://mvnrepository.com/artifact/io.micrometer/micrometer-commons/1.14.4 Apache-2.0
304304
https://mvnrepository.com/artifact/io.micrometer/micrometer-core/1.14.4 Apache-2.0
305305
https://mvnrepository.com/artifact/io.micrometer/micrometer-observation/1.14.4 Apache-2.0

docs/en/changes/changes.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,8 @@
3737
* chore: add a warning log when connecting to ES takes too long.
3838
* Fix the query time range in the metadata API.
3939
* OAP gRPC-Client support `Health Check`.
40-
* [Break Change] `Health Check` make response 1 represents healthy, 0 represents unhealthy.
40+
* [Break Change] `health_check_xx` metrics make response 1 represents healthy, 0 represents unhealthy.
41+
* Bump up grpc to 1.70.0.
4142

4243
#### UI
4344

docs/en/setup/backend/backend-health-check.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ If the OAP server is healthy, the response should be
3636
{
3737
"data": {
3838
"checkHealth": {
39-
"score": 1,
39+
"score": 0,
4040
"details": ""
4141
}
4242
}
@@ -49,7 +49,7 @@ If some modules are unhealthy (e.g. storage H2 is down), then the result may loo
4949
{
5050
"data": {
5151
"checkHealth": {
52-
"score": 0,
52+
"score": 1,
5353
"details": "storage_h2,"
5454
}
5555
}

oap-server-bom/pom.xml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,11 @@
253253
<artifactId>grpc-stub</artifactId>
254254
<version>${grpc.version}</version>
255255
</dependency>
256+
<dependency>
257+
<groupId>io.grpc</groupId>
258+
<artifactId>grpc-services</artifactId>
259+
<version>${grpc.version}</version>
260+
</dependency>
256261
<dependency>
257262
<groupId>io.netty</groupId>
258263
<artifactId>netty-tcnative-boringssl-static</artifactId>

oap-server/server-core/src/main/java/org/apache/skywalking/oap/server/core/query/type/HealthStatus.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
@Setter
2727
@ToString
2828
public class HealthStatus {
29-
// score == 1 means healthy, otherwise it's unhealthy.
29+
// score == 0 means healthy and no unhealthy component or connection, otherwise it's unhealthy.
3030
private int score;
3131
private String details;
3232
}

oap-server/server-core/src/main/java/org/apache/skywalking/oap/server/core/remote/health/HealthCheckServiceHandler.java

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -18,33 +18,29 @@
1818

1919
package org.apache.skywalking.oap.server.core.remote.health;
2020

21-
import grpc.health.v1.HealthCheckService;
22-
import grpc.health.v1.HealthGrpc;
21+
import io.grpc.health.v1.HealthCheckRequest;
22+
import io.grpc.health.v1.HealthCheckResponse;
23+
import io.grpc.health.v1.HealthGrpc;
2324
import io.grpc.stub.StreamObserver;
25+
import lombok.extern.slf4j.Slf4j;
2426
import org.apache.skywalking.oap.server.library.server.grpc.GRPCHandler;
25-
import org.slf4j.Logger;
26-
import org.slf4j.LoggerFactory;
2727

28+
@Slf4j
2829
public class HealthCheckServiceHandler extends HealthGrpc.HealthImplBase implements GRPCHandler {
29-
30-
private static final Logger LOGGER = LoggerFactory.getLogger(HealthCheckServiceHandler.class);
31-
3230
/**
3331
* By my test, consul didn't send the service.
3432
*
3533
* @param request service
3634
* @param responseObserver status
3735
*/
3836
@Override
39-
public void check(HealthCheckService.HealthCheckRequest request,
40-
StreamObserver<HealthCheckService.HealthCheckResponse> responseObserver) {
41-
42-
if (LOGGER.isDebugEnabled()) {
43-
LOGGER.debug("Received the gRPC server health check with the service name of {}", request.getService());
37+
public void check(HealthCheckRequest request, StreamObserver<HealthCheckResponse> responseObserver) {
38+
if (log.isDebugEnabled()) {
39+
log.debug("Received the gRPC server health check with the service name of {}", request.getService());
4440
}
4541

46-
HealthCheckService.HealthCheckResponse.Builder response = HealthCheckService.HealthCheckResponse.newBuilder();
47-
response.setStatus(HealthCheckService.HealthCheckResponse.ServingStatus.SERVING);
42+
HealthCheckResponse.Builder response = HealthCheckResponse.newBuilder();
43+
response.setStatus(HealthCheckResponse.ServingStatus.SERVING);
4844

4945
responseObserver.onNext(response.build());
5046
responseObserver.onCompleted();

oap-server/server-core/src/main/proto/HealthCheckService.proto

Lines changed: 0 additions & 40 deletions
This file was deleted.

oap-server/server-health-checker/src/main/java/org/apache/skywalking/oap/server/health/checker/provider/HealthCheckerHttpService.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ public HttpResponse healthcheck(ServiceRequestContext ctx, HttpRequest req) thro
3737
final var status = healthQueryService.checkHealth();
3838
log.info("Health status: {}", status);
3939

40-
if (status.getScore() == 1) {
40+
if (status.getScore() == 0) {
4141
return HttpResponse.of(HttpStatus.OK);
4242
}
4343
return HttpResponse.of(HttpStatus.SERVICE_UNAVAILABLE);

oap-server/server-health-checker/src/main/java/org/apache/skywalking/oap/server/health/checker/provider/HealthCheckerProvider.java

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
import java.util.concurrent.Executors;
2626
import java.util.concurrent.ScheduledExecutorService;
2727
import java.util.concurrent.TimeUnit;
28-
import java.util.concurrent.atomic.AtomicBoolean;
2928
import java.util.concurrent.atomic.AtomicReference;
3029
import lombok.extern.slf4j.Slf4j;
3130
import org.apache.skywalking.oap.server.core.CoreModule;
@@ -100,18 +99,22 @@ public void onInitialized(final HealthCheckerConfig initialized) {
10099
@Override public void notifyAfterCompleted() throws ServiceNotProvidedException, ModuleStartException {
101100
ses.scheduleAtFixedRate(() -> {
102101
StringBuilder unhealthyModules = new StringBuilder();
103-
AtomicBoolean hasUnhealthyModule = new AtomicBoolean(false);
102+
AtomicDouble unhealthyModule = new AtomicDouble(0);
104103
Stream.ofAll(collector.collect())
105104
.flatMap(metricFamily -> metricFamily.samples)
106105
.filter(sample -> metricsCreator.isHealthCheckerMetrics(sample.name))
107106
.forEach(sample -> {
108107
if (sample.value < 1) {
109108
unhealthyModules.append(metricsCreator.extractModuleName(sample.name)).append(",");
110-
hasUnhealthyModule.set(true);
109+
unhealthyModule.updateAndGet(v -> v + 1);
111110
}
112111
});
113112

114-
score.set(hasUnhealthyModule.get() ? 0 : 1);
113+
if (unhealthyModule.get() > 0) {
114+
score.set(unhealthyModule.get());
115+
} else {
116+
score.set(0);
117+
}
115118
details.set(unhealthyModules.toString());
116119
},
117120
2, config.getCheckIntervalSeconds(), TimeUnit.SECONDS);

oap-server/server-library/library-client/pom.xml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,10 @@
5353
<groupId>io.grpc</groupId>
5454
<artifactId>grpc-netty</artifactId>
5555
</dependency>
56+
<dependency>
57+
<groupId>io.grpc</groupId>
58+
<artifactId>grpc-services</artifactId>
59+
</dependency>
5660
<dependency>
5761
<groupId>io.netty</groupId>
5862
<artifactId>netty-codec-http2</artifactId>

0 commit comments

Comments
 (0)