From 69b3a53043264e10c24eda10b3b04c89df159642 Mon Sep 17 00:00:00 2001 From: Sam Xiao Date: Wed, 12 Feb 2025 22:19:29 +0800 Subject: [PATCH 1/5] Add logs for SLM health indicator missing snapshot --- .../health/HealthPeriodicLogger.java | 35 ++++++++++++++----- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/health/HealthPeriodicLogger.java b/server/src/main/java/org/elasticsearch/health/HealthPeriodicLogger.java index cd6b7c7c7cd86..b0bec8f304f7c 100644 --- a/server/src/main/java/org/elasticsearch/health/HealthPeriodicLogger.java +++ b/server/src/main/java/org/elasticsearch/health/HealthPeriodicLogger.java @@ -42,6 +42,7 @@ import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.Optional; import java.util.Set; import java.util.concurrent.Semaphore; import java.util.concurrent.TimeUnit; @@ -52,16 +53,20 @@ import static org.elasticsearch.health.HealthStatus.RED; /** - * This class periodically logs the results of the Health API to the standard Elasticsearch server log file. It a lifecycle - * aware component because it health depends on other lifecycle aware components. This means: + * This class periodically logs the results of the Health API to the standard Elasticsearch server log file. It is a lifecycle + * aware component because it depends on other lifecycle aware components. This means: * - We do not schedule any jobs until the lifecycle state is STARTED - * - When the lifecycle state becomes STOPPED, do not schedule any more runs, but we do let the current one finish + * - When the lifecycle state becomes STOPPED, we do not schedule any more runs, but we do let the current one finish * - When the lifecycle state becomes CLOSED, we will interrupt the current run as well. */ public class HealthPeriodicLogger extends AbstractLifecycleComponent implements ClusterStateListener, SchedulerEngine.Listener { public static final String HEALTH_FIELD_PREFIX = "elasticsearch.health"; public static final String MESSAGE_FIELD = "message"; + // duplicated from SlmHealthIndicatorService since x-pack not accessible + private static final String SLM_HEALTH_INDICATOR_SERVICE_NAME = "slm"; + private static final String SLM_HEALTH_INDICATOR_SERVICE_IMPACT_ID_MISSING_SNAPSHOT = "missing_snapshot"; + /** * Valid modes of output for this logger */ @@ -361,11 +366,25 @@ static Map convertToLoggedFields(List ind String.format(Locale.ROOT, "%s.%s.status", HEALTH_FIELD_PREFIX, indicatorResult.name()), indicatorResult.status().xContentValue() ); - if (GREEN.equals(indicatorResult.status()) == false && indicatorResult.details() != null) { - result.put( - String.format(Locale.ROOT, "%s.%s.details", HEALTH_FIELD_PREFIX, indicatorResult.name()), - Strings.toString(indicatorResult.details()) - ); + if (GREEN.equals(indicatorResult.status()) == false) { + if (indicatorResult.details() != null) { + result.put( + String.format(Locale.ROOT, "%s.%s.details", HEALTH_FIELD_PREFIX, indicatorResult.name()), + Strings.toString(indicatorResult.details()) + ); + } + + // output the SLM health indicator missing snapshot impact, so it can be specifically identified when occur + if (SLM_HEALTH_INDICATOR_SERVICE_NAME.equals(indicatorResult.name())) { + Optional impactMissingSnapshot = indicatorResult.impacts() + .stream() + .filter(impact -> SLM_HEALTH_INDICATOR_SERVICE_IMPACT_ID_MISSING_SNAPSHOT.equals(impact.id())) + .findFirst(); + impactMissingSnapshot.ifPresent(impact -> result.put( + String.format(Locale.ROOT, "%s.%s.%s", HEALTH_FIELD_PREFIX, indicatorResult.name(), impact.id()), + true + )); + } } }); From 8e7ba91435d4d6e1f9247f3c94bb8df40f0da31c Mon Sep 17 00:00:00 2001 From: Sam Xiao Date: Thu, 20 Feb 2025 12:12:44 -0500 Subject: [PATCH 2/5] Add indicator impact --- .../health/HealthPeriodicLogger.java | 20 +++++------ .../node/DiskHealthIndicatorService.java | 3 +- .../health/HealthPeriodicLoggerTests.java | 34 ++++++++++++++++--- 3 files changed, 40 insertions(+), 17 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/health/HealthPeriodicLogger.java b/server/src/main/java/org/elasticsearch/health/HealthPeriodicLogger.java index b0bec8f304f7c..3e2507b59869c 100644 --- a/server/src/main/java/org/elasticsearch/health/HealthPeriodicLogger.java +++ b/server/src/main/java/org/elasticsearch/health/HealthPeriodicLogger.java @@ -367,23 +367,21 @@ static Map convertToLoggedFields(List ind indicatorResult.status().xContentValue() ); if (GREEN.equals(indicatorResult.status()) == false) { + // indicator details if (indicatorResult.details() != null) { result.put( String.format(Locale.ROOT, "%s.%s.details", HEALTH_FIELD_PREFIX, indicatorResult.name()), Strings.toString(indicatorResult.details()) ); } - - // output the SLM health indicator missing snapshot impact, so it can be specifically identified when occur - if (SLM_HEALTH_INDICATOR_SERVICE_NAME.equals(indicatorResult.name())) { - Optional impactMissingSnapshot = indicatorResult.impacts() - .stream() - .filter(impact -> SLM_HEALTH_INDICATOR_SERVICE_IMPACT_ID_MISSING_SNAPSHOT.equals(impact.id())) - .findFirst(); - impactMissingSnapshot.ifPresent(impact -> result.put( - String.format(Locale.ROOT, "%s.%s.%s", HEALTH_FIELD_PREFIX, indicatorResult.name(), impact.id()), - true - )); + // indicator impact + if (indicatorResult.impacts() != null) { + indicatorResult.impacts().forEach( + impact -> result.put( + String.format(Locale.ROOT, "%s.%s.%s.impacted", HEALTH_FIELD_PREFIX, indicatorResult.name(), impact.id()), + true + ) + ); } } }); diff --git a/server/src/main/java/org/elasticsearch/health/node/DiskHealthIndicatorService.java b/server/src/main/java/org/elasticsearch/health/node/DiskHealthIndicatorService.java index 841973911d150..db55005859793 100644 --- a/server/src/main/java/org/elasticsearch/health/node/DiskHealthIndicatorService.java +++ b/server/src/main/java/org/elasticsearch/health/node/DiskHealthIndicatorService.java @@ -66,7 +66,8 @@ public class DiskHealthIndicatorService implements HealthIndicatorService { private static final Logger logger = LogManager.getLogger(DiskHealthIndicatorService.class); - private static final String IMPACT_INGEST_UNAVAILABLE_ID = "ingest_capability_unavailable"; + // VisibleForTesting + public static final String IMPACT_INGEST_UNAVAILABLE_ID = "ingest_capability_unavailable"; private static final String IMPACT_INGEST_AT_RISK_ID = "ingest_capability_at_risk"; private static final String IMPACT_CLUSTER_STABILITY_AT_RISK_ID = "cluster_stability_at_risk"; private static final String IMPACT_CLUSTER_FUNCTIONALITY_UNAVAILABLE_ID = "cluster_functionality_unavailable"; diff --git a/server/src/test/java/org/elasticsearch/health/HealthPeriodicLoggerTests.java b/server/src/test/java/org/elasticsearch/health/HealthPeriodicLoggerTests.java index e4bb23b092869..a0032e5693dbb 100644 --- a/server/src/test/java/org/elasticsearch/health/HealthPeriodicLoggerTests.java +++ b/server/src/test/java/org/elasticsearch/health/HealthPeriodicLoggerTests.java @@ -19,6 +19,7 @@ import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.node.DiscoveryNodeRole; import org.elasticsearch.cluster.node.DiscoveryNodeUtils; +import org.elasticsearch.cluster.routing.allocation.shards.ShardsAvailabilityHealthIndicatorService; import org.elasticsearch.cluster.routing.allocation.shards.ShardsAvailabilityHealthIndicatorServiceTests; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.Strings; @@ -28,6 +29,7 @@ import org.elasticsearch.common.settings.ClusterSettings; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.TimeValue; +import org.elasticsearch.health.node.DiskHealthIndicatorService; import org.elasticsearch.telemetry.TelemetryProvider; import org.elasticsearch.telemetry.metric.LongGaugeMetric; import org.elasticsearch.telemetry.metric.MeterRegistry; @@ -51,9 +53,12 @@ import java.util.function.BiConsumer; import java.util.function.Consumer; +import static org.elasticsearch.cluster.routing.allocation.shards.ShardsAvailabilityHealthIndicatorService.PRIMARY_UNASSIGNED_IMPACT_ID; +import static org.elasticsearch.cluster.routing.allocation.shards.ShardsAvailabilityHealthIndicatorService.REPLICA_UNASSIGNED_IMPACT_ID; import static org.elasticsearch.health.HealthStatus.GREEN; import static org.elasticsearch.health.HealthStatus.RED; import static org.elasticsearch.health.HealthStatus.YELLOW; +import static org.elasticsearch.health.node.DiskHealthIndicatorService.IMPACT_INGEST_UNAVAILABLE_ID; import static org.elasticsearch.test.ClusterServiceUtils.createClusterService; import static org.hamcrest.Matchers.equalTo; import static org.mockito.ArgumentMatchers.any; @@ -125,9 +130,9 @@ public void testConvertToLoggedFields() { Map loggerResults = HealthPeriodicLogger.convertToLoggedFields(results); - // verify that the number of fields is the number of indicators + 4 - // (for overall and for message, plus details for the two yellow indicators) - assertThat(loggerResults.size(), equalTo(results.size() + 4)); + // verify that the number of fields is the number of indicators + 7 + // (for overall and for message, plus details for the two yellow indicators, plus three impact) + assertThat(loggerResults.size(), equalTo(results.size() + 7)); // test indicator status assertThat(loggerResults.get(makeHealthStatusString("master_is_stable")), equalTo("green")); @@ -165,6 +170,13 @@ public void testConvertToLoggedFields() { equalTo(String.format(Locale.ROOT, "health=%s [disk,shards_availability]", overallStatus.xContentValue())) ); + // test impact + assertThat(loggerResults.get(makeHealthImpactString(DiskHealthIndicatorService.NAME, IMPACT_INGEST_UNAVAILABLE_ID)), equalTo(true)); + assertThat(loggerResults.get(makeHealthImpactString(ShardsAvailabilityHealthIndicatorService.NAME, PRIMARY_UNASSIGNED_IMPACT_ID)), + equalTo(true)); + assertThat(loggerResults.get(makeHealthImpactString(ShardsAvailabilityHealthIndicatorService.NAME, REPLICA_UNASSIGNED_IMPACT_ID)), + equalTo(true)); + // test empty results { List empty = new ArrayList<>(); @@ -793,7 +805,10 @@ private List getTestIndicatorResults() { 1 ) ), - null, + List.of( + new HealthIndicatorImpact(DiskHealthIndicatorService.NAME, IMPACT_INGEST_UNAVAILABLE_ID, 2, "description", + List.of(ImpactArea.INGEST)) + ), null ); var shardsAvailable = new HealthIndicatorResult( @@ -801,7 +816,12 @@ private List getTestIndicatorResults() { YELLOW, null, new SimpleHealthIndicatorDetails(ShardsAvailabilityHealthIndicatorServiceTests.addDefaults(Map.of())), - null, + List.of( + new HealthIndicatorImpact(ShardsAvailabilityHealthIndicatorService.NAME, PRIMARY_UNASSIGNED_IMPACT_ID, 2, "description", + List.of(ImpactArea.SEARCH)), + new HealthIndicatorImpact(ShardsAvailabilityHealthIndicatorService.NAME, REPLICA_UNASSIGNED_IMPACT_ID, 2, "description", + List.of(ImpactArea.SEARCH)) + ), null ); @@ -846,6 +866,10 @@ private String makeHealthDetailsString(String key) { return String.format(Locale.ROOT, "%s.%s.details", HealthPeriodicLogger.HEALTH_FIELD_PREFIX, key); } + private String makeHealthImpactString(String indicatorName, String impact) { + return String.format(Locale.ROOT, "%s.%s.%s.impacted", HealthPeriodicLogger.HEALTH_FIELD_PREFIX, indicatorName, impact); + } + private HealthPeriodicLogger createAndInitHealthPeriodicLogger( ClusterService clusterService, HealthService testHealthService, From 24c33023385d9ac113e8f675ff305968c37c607e Mon Sep 17 00:00:00 2001 From: Sam Xiao Date: Thu, 20 Feb 2025 12:15:05 -0500 Subject: [PATCH 3/5] clean up --- .../java/org/elasticsearch/health/HealthPeriodicLogger.java | 5 ----- 1 file changed, 5 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/health/HealthPeriodicLogger.java b/server/src/main/java/org/elasticsearch/health/HealthPeriodicLogger.java index 3e2507b59869c..c212d504d2c0d 100644 --- a/server/src/main/java/org/elasticsearch/health/HealthPeriodicLogger.java +++ b/server/src/main/java/org/elasticsearch/health/HealthPeriodicLogger.java @@ -42,7 +42,6 @@ import java.util.List; import java.util.Locale; import java.util.Map; -import java.util.Optional; import java.util.Set; import java.util.concurrent.Semaphore; import java.util.concurrent.TimeUnit; @@ -63,10 +62,6 @@ public class HealthPeriodicLogger extends AbstractLifecycleComponent implements public static final String HEALTH_FIELD_PREFIX = "elasticsearch.health"; public static final String MESSAGE_FIELD = "message"; - // duplicated from SlmHealthIndicatorService since x-pack not accessible - private static final String SLM_HEALTH_INDICATOR_SERVICE_NAME = "slm"; - private static final String SLM_HEALTH_INDICATOR_SERVICE_IMPACT_ID_MISSING_SNAPSHOT = "missing_snapshot"; - /** * Valid modes of output for this logger */ From 863176442a9df17812a8ec55d31b56367a00d625 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Thu, 20 Feb 2025 17:24:38 +0000 Subject: [PATCH 4/5] [CI] Auto commit changes from spotless --- .../health/HealthPeriodicLogger.java | 13 ++++--- .../health/HealthPeriodicLoggerTests.java | 39 ++++++++++++++----- 2 files changed, 36 insertions(+), 16 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/health/HealthPeriodicLogger.java b/server/src/main/java/org/elasticsearch/health/HealthPeriodicLogger.java index c212d504d2c0d..d7a2762556f19 100644 --- a/server/src/main/java/org/elasticsearch/health/HealthPeriodicLogger.java +++ b/server/src/main/java/org/elasticsearch/health/HealthPeriodicLogger.java @@ -371,12 +371,13 @@ static Map convertToLoggedFields(List ind } // indicator impact if (indicatorResult.impacts() != null) { - indicatorResult.impacts().forEach( - impact -> result.put( - String.format(Locale.ROOT, "%s.%s.%s.impacted", HEALTH_FIELD_PREFIX, indicatorResult.name(), impact.id()), - true - ) - ); + indicatorResult.impacts() + .forEach( + impact -> result.put( + String.format(Locale.ROOT, "%s.%s.%s.impacted", HEALTH_FIELD_PREFIX, indicatorResult.name(), impact.id()), + true + ) + ); } } }); diff --git a/server/src/test/java/org/elasticsearch/health/HealthPeriodicLoggerTests.java b/server/src/test/java/org/elasticsearch/health/HealthPeriodicLoggerTests.java index a0032e5693dbb..43d8a74395f1a 100644 --- a/server/src/test/java/org/elasticsearch/health/HealthPeriodicLoggerTests.java +++ b/server/src/test/java/org/elasticsearch/health/HealthPeriodicLoggerTests.java @@ -172,10 +172,14 @@ public void testConvertToLoggedFields() { // test impact assertThat(loggerResults.get(makeHealthImpactString(DiskHealthIndicatorService.NAME, IMPACT_INGEST_UNAVAILABLE_ID)), equalTo(true)); - assertThat(loggerResults.get(makeHealthImpactString(ShardsAvailabilityHealthIndicatorService.NAME, PRIMARY_UNASSIGNED_IMPACT_ID)), - equalTo(true)); - assertThat(loggerResults.get(makeHealthImpactString(ShardsAvailabilityHealthIndicatorService.NAME, REPLICA_UNASSIGNED_IMPACT_ID)), - equalTo(true)); + assertThat( + loggerResults.get(makeHealthImpactString(ShardsAvailabilityHealthIndicatorService.NAME, PRIMARY_UNASSIGNED_IMPACT_ID)), + equalTo(true) + ); + assertThat( + loggerResults.get(makeHealthImpactString(ShardsAvailabilityHealthIndicatorService.NAME, REPLICA_UNASSIGNED_IMPACT_ID)), + equalTo(true) + ); // test empty results { @@ -806,8 +810,13 @@ private List getTestIndicatorResults() { ) ), List.of( - new HealthIndicatorImpact(DiskHealthIndicatorService.NAME, IMPACT_INGEST_UNAVAILABLE_ID, 2, "description", - List.of(ImpactArea.INGEST)) + new HealthIndicatorImpact( + DiskHealthIndicatorService.NAME, + IMPACT_INGEST_UNAVAILABLE_ID, + 2, + "description", + List.of(ImpactArea.INGEST) + ) ), null ); @@ -817,10 +826,20 @@ private List getTestIndicatorResults() { null, new SimpleHealthIndicatorDetails(ShardsAvailabilityHealthIndicatorServiceTests.addDefaults(Map.of())), List.of( - new HealthIndicatorImpact(ShardsAvailabilityHealthIndicatorService.NAME, PRIMARY_UNASSIGNED_IMPACT_ID, 2, "description", - List.of(ImpactArea.SEARCH)), - new HealthIndicatorImpact(ShardsAvailabilityHealthIndicatorService.NAME, REPLICA_UNASSIGNED_IMPACT_ID, 2, "description", - List.of(ImpactArea.SEARCH)) + new HealthIndicatorImpact( + ShardsAvailabilityHealthIndicatorService.NAME, + PRIMARY_UNASSIGNED_IMPACT_ID, + 2, + "description", + List.of(ImpactArea.SEARCH) + ), + new HealthIndicatorImpact( + ShardsAvailabilityHealthIndicatorService.NAME, + REPLICA_UNASSIGNED_IMPACT_ID, + 2, + "description", + List.of(ImpactArea.SEARCH) + ) ), null ); From f22debdba0ed24c70fa8e8a4276c4ec2123a2d94 Mon Sep 17 00:00:00 2001 From: Sam Xiao Date: Thu, 20 Feb 2025 14:01:43 -0500 Subject: [PATCH 5/5] Update docs/changelog/122390.yaml --- docs/changelog/122390.yaml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 docs/changelog/122390.yaml diff --git a/docs/changelog/122390.yaml b/docs/changelog/122390.yaml new file mode 100644 index 0000000000000..4338519ad60ba --- /dev/null +++ b/docs/changelog/122390.yaml @@ -0,0 +1,5 @@ +pr: 122390 +summary: Add health indicator impact to `HealthPeriodicLogger` +area: Health +type: enhancement +issues: []