Skip to content

Commit 1a4b3d3

Browse files
authored
File-based settings health indicator (#117081)
* Add FileSettingsService health indicator * spotless * YELLOW for any failure, plus most_recent_failure
1 parent 06840ba commit 1a4b3d3

File tree

5 files changed

+230
-12
lines changed

5 files changed

+230
-12
lines changed

server/src/main/java/org/elasticsearch/node/NodeConstruction.java

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,7 @@
187187
import org.elasticsearch.reservedstate.ReservedClusterStateHandlerProvider;
188188
import org.elasticsearch.reservedstate.action.ReservedClusterSettingsAction;
189189
import org.elasticsearch.reservedstate.service.FileSettingsService;
190+
import org.elasticsearch.reservedstate.service.FileSettingsService.FileSettingsHealthIndicatorService;
190191
import org.elasticsearch.rest.action.search.SearchResponseMetrics;
191192
import org.elasticsearch.script.ScriptModule;
192193
import org.elasticsearch.script.ScriptService;
@@ -1032,10 +1033,12 @@ private void construct(
10321033
actionModule.getReservedClusterStateService().installStateHandler(new ReservedRepositoryAction(repositoriesService));
10331034
actionModule.getReservedClusterStateService().installStateHandler(new ReservedPipelineAction());
10341035

1036+
FileSettingsHealthIndicatorService fileSettingsHealthIndicatorService = new FileSettingsHealthIndicatorService();
10351037
FileSettingsService fileSettingsService = new FileSettingsService(
10361038
clusterService,
10371039
actionModule.getReservedClusterStateService(),
1038-
environment
1040+
environment,
1041+
fileSettingsHealthIndicatorService
10391042
);
10401043

10411044
RestoreService restoreService = new RestoreService(
@@ -1129,7 +1132,8 @@ private void construct(
11291132
featureService,
11301133
threadPool,
11311134
telemetryProvider,
1132-
repositoriesService
1135+
repositoriesService,
1136+
fileSettingsHealthIndicatorService
11331137
)
11341138
);
11351139

@@ -1301,7 +1305,8 @@ private Module loadDiagnosticServices(
13011305
FeatureService featureService,
13021306
ThreadPool threadPool,
13031307
TelemetryProvider telemetryProvider,
1304-
RepositoriesService repositoriesService
1308+
RepositoriesService repositoriesService,
1309+
FileSettingsHealthIndicatorService fileSettingsHealthIndicatorService
13051310
) {
13061311

13071312
MasterHistoryService masterHistoryService = new MasterHistoryService(transportService, threadPool, clusterService);
@@ -1316,7 +1321,8 @@ private Module loadDiagnosticServices(
13161321
new StableMasterHealthIndicatorService(coordinationDiagnosticsService, clusterService),
13171322
new RepositoryIntegrityHealthIndicatorService(clusterService, featureService),
13181323
new DiskHealthIndicatorService(clusterService, featureService),
1319-
new ShardsCapacityHealthIndicatorService(clusterService, featureService)
1324+
new ShardsCapacityHealthIndicatorService(clusterService, featureService),
1325+
fileSettingsHealthIndicatorService
13201326
);
13211327
var pluginHealthIndicatorServices = pluginsService.filterPlugins(HealthPlugin.class)
13221328
.flatMap(plugin -> plugin.getHealthIndicatorServices().stream());

server/src/main/java/org/elasticsearch/reservedstate/service/FileSettingsService.java

Lines changed: 93 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,14 +22,27 @@
2222
import org.elasticsearch.cluster.service.ClusterService;
2323
import org.elasticsearch.common.file.MasterNodeFileWatchingService;
2424
import org.elasticsearch.env.Environment;
25+
import org.elasticsearch.health.HealthIndicatorDetails;
26+
import org.elasticsearch.health.HealthIndicatorImpact;
27+
import org.elasticsearch.health.HealthIndicatorResult;
28+
import org.elasticsearch.health.HealthIndicatorService;
29+
import org.elasticsearch.health.SimpleHealthIndicatorDetails;
30+
import org.elasticsearch.health.node.HealthInfo;
2531
import org.elasticsearch.xcontent.XContentParseException;
2632
import org.elasticsearch.xcontent.XContentParserConfiguration;
2733

2834
import java.io.BufferedInputStream;
2935
import java.io.IOException;
3036
import java.nio.file.Files;
37+
import java.util.List;
38+
import java.util.Map;
3139
import java.util.concurrent.ExecutionException;
40+
import java.util.concurrent.atomic.AtomicLong;
41+
import java.util.concurrent.atomic.AtomicReference;
3242

43+
import static org.elasticsearch.health.HealthStatus.GREEN;
44+
import static org.elasticsearch.health.HealthStatus.YELLOW;
45+
import static org.elasticsearch.health.ImpactArea.DEPLOYMENT_MANAGEMENT;
3346
import static org.elasticsearch.reservedstate.service.ReservedStateVersionCheck.HIGHER_OR_SAME_VERSION;
3447
import static org.elasticsearch.reservedstate.service.ReservedStateVersionCheck.HIGHER_VERSION_ONLY;
3548
import static org.elasticsearch.xcontent.XContentType.JSON;
@@ -53,17 +66,29 @@ public class FileSettingsService extends MasterNodeFileWatchingService implement
5366
public static final String NAMESPACE = "file_settings";
5467
public static final String OPERATOR_DIRECTORY = "operator";
5568
private final ReservedClusterStateService stateService;
69+
private final FileSettingsHealthIndicatorService healthIndicatorService;
5670

5771
/**
5872
* Constructs the {@link FileSettingsService}
5973
*
6074
* @param clusterService so we can register ourselves as a cluster state change listener
6175
* @param stateService an instance of the immutable cluster state controller, so we can perform the cluster state changes
6276
* @param environment we need the environment to pull the location of the config and operator directories
77+
* @param healthIndicatorService tracks the success or failure of file-based settings
6378
*/
64-
public FileSettingsService(ClusterService clusterService, ReservedClusterStateService stateService, Environment environment) {
79+
public FileSettingsService(
80+
ClusterService clusterService,
81+
ReservedClusterStateService stateService,
82+
Environment environment,
83+
FileSettingsHealthIndicatorService healthIndicatorService
84+
) {
6585
super(clusterService, environment.configFile().toAbsolutePath().resolve(OPERATOR_DIRECTORY).resolve(SETTINGS_FILE_NAME));
6686
this.stateService = stateService;
87+
this.healthIndicatorService = healthIndicatorService;
88+
}
89+
90+
public FileSettingsHealthIndicatorService healthIndicatorService() {
91+
return healthIndicatorService;
6792
}
6893

6994
/**
@@ -121,6 +146,7 @@ protected boolean shouldRefreshFileState(ClusterState clusterState) {
121146
@Override
122147
protected void processFileChanges() throws ExecutionException, InterruptedException, IOException {
123148
logger.info("processing path [{}] for [{}]", watchedFile(), NAMESPACE);
149+
healthIndicatorService.changeOccurred();
124150
processFileChanges(HIGHER_VERSION_ONLY);
125151
}
126152

@@ -131,6 +157,7 @@ protected void processFileChanges() throws ExecutionException, InterruptedExcept
131157
@Override
132158
protected void processFileOnServiceStart() throws IOException, ExecutionException, InterruptedException {
133159
logger.info("processing path [{}] for [{}] on service start", watchedFile(), NAMESPACE);
160+
healthIndicatorService.changeOccurred();
134161
processFileChanges(HIGHER_OR_SAME_VERSION);
135162
}
136163

@@ -146,6 +173,16 @@ private void processFileChanges(ReservedStateVersionCheck versionCheck) throws I
146173
completion.get();
147174
}
148175

176+
private void completeProcessing(Exception e, PlainActionFuture<Void> completion) {
177+
if (e != null) {
178+
healthIndicatorService.failureOccurred(e.toString());
179+
completion.onFailure(e);
180+
} else {
181+
completion.onResponse(null);
182+
healthIndicatorService.successOccurred();
183+
}
184+
}
185+
149186
@Override
150187
protected void onProcessFileChangesException(Exception e) {
151188
if (e instanceof ExecutionException) {
@@ -172,11 +209,61 @@ protected void processInitialFileMissing() throws ExecutionException, Interrupte
172209
completion.get();
173210
}
174211

175-
private static void completeProcessing(Exception e, PlainActionFuture<Void> completion) {
176-
if (e != null) {
177-
completion.onFailure(e);
178-
} else {
179-
completion.onResponse(null);
212+
public static class FileSettingsHealthIndicatorService implements HealthIndicatorService {
213+
static final String NAME = "file_settings";
214+
static final String NO_CHANGES_SYMPTOM = "No file-based setting changes have occurred";
215+
static final String SUCCESS_SYMPTOM = "The most recent file-based settings were applied successfully";
216+
static final String FAILURE_SYMPTOM = "The most recent file-based settings encountered an error";
217+
218+
static final List<HealthIndicatorImpact> STALE_SETTINGS_IMPACT = List.of(
219+
new HealthIndicatorImpact(
220+
NAME,
221+
"stale",
222+
3,
223+
"The most recent file-based settings changes have not been applied.",
224+
List.of(DEPLOYMENT_MANAGEMENT)
225+
)
226+
);
227+
228+
private final AtomicLong changeCount = new AtomicLong(0);
229+
private final AtomicLong failureStreak = new AtomicLong(0);
230+
private final AtomicReference<String> mostRecentFailure = new AtomicReference<>();
231+
232+
public void changeOccurred() {
233+
changeCount.incrementAndGet();
234+
}
235+
236+
public void successOccurred() {
237+
failureStreak.set(0);
238+
}
239+
240+
public void failureOccurred(String description) {
241+
failureStreak.incrementAndGet();
242+
mostRecentFailure.set(description);
243+
}
244+
245+
@Override
246+
public String name() {
247+
return NAME;
248+
}
249+
250+
@Override
251+
public HealthIndicatorResult calculate(boolean verbose, int maxAffectedResourcesCount, HealthInfo healthInfo) {
252+
if (0 == changeCount.get()) {
253+
return createIndicator(GREEN, NO_CHANGES_SYMPTOM, HealthIndicatorDetails.EMPTY, List.of(), List.of());
254+
}
255+
long numFailures = failureStreak.get();
256+
if (0 == numFailures) {
257+
return createIndicator(GREEN, SUCCESS_SYMPTOM, HealthIndicatorDetails.EMPTY, List.of(), List.of());
258+
} else {
259+
return createIndicator(
260+
YELLOW,
261+
FAILURE_SYMPTOM,
262+
new SimpleHealthIndicatorDetails(Map.of("failure_streak", numFailures, "most_recent_failure", mostRecentFailure.get())),
263+
STALE_SETTINGS_IMPACT,
264+
List.of()
265+
);
266+
}
180267
}
181268
}
182269
}

server/src/test/java/org/elasticsearch/action/ingest/ReservedPipelineActionTests.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,12 @@ public void setup() {
134134
);
135135

136136
fileSettingsService = spy(
137-
new FileSettingsService(clusterService, mock(ReservedClusterStateService.class), newEnvironment(Settings.EMPTY))
137+
new FileSettingsService(
138+
clusterService,
139+
mock(ReservedClusterStateService.class),
140+
newEnvironment(Settings.EMPTY),
141+
new FileSettingsService.FileSettingsHealthIndicatorService()
142+
)
138143
);
139144
}
140145

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.reservedstate.service;
11+
12+
import org.elasticsearch.health.HealthIndicatorDetails;
13+
import org.elasticsearch.health.HealthIndicatorResult;
14+
import org.elasticsearch.health.SimpleHealthIndicatorDetails;
15+
import org.elasticsearch.reservedstate.service.FileSettingsService.FileSettingsHealthIndicatorService;
16+
import org.elasticsearch.test.ESTestCase;
17+
import org.junit.Before;
18+
19+
import java.util.List;
20+
import java.util.Map;
21+
22+
import static org.elasticsearch.health.HealthStatus.GREEN;
23+
import static org.elasticsearch.health.HealthStatus.YELLOW;
24+
import static org.elasticsearch.reservedstate.service.FileSettingsService.FileSettingsHealthIndicatorService.FAILURE_SYMPTOM;
25+
import static org.elasticsearch.reservedstate.service.FileSettingsService.FileSettingsHealthIndicatorService.NO_CHANGES_SYMPTOM;
26+
import static org.elasticsearch.reservedstate.service.FileSettingsService.FileSettingsHealthIndicatorService.STALE_SETTINGS_IMPACT;
27+
import static org.elasticsearch.reservedstate.service.FileSettingsService.FileSettingsHealthIndicatorService.SUCCESS_SYMPTOM;
28+
29+
/**
30+
* Here, we test {@link FileSettingsHealthIndicatorService} in isolation;
31+
* we do not test that {@link FileSettingsService} uses it correctly.
32+
*/
33+
public class FileSettingsHealthIndicatorServiceTests extends ESTestCase {
34+
35+
FileSettingsHealthIndicatorService healthIndicatorService;
36+
37+
@Before
38+
public void initialize() {
39+
healthIndicatorService = new FileSettingsHealthIndicatorService();
40+
}
41+
42+
public void testInitiallyGreen() {
43+
assertEquals(
44+
new HealthIndicatorResult("file_settings", GREEN, NO_CHANGES_SYMPTOM, HealthIndicatorDetails.EMPTY, List.of(), List.of()),
45+
healthIndicatorService.calculate(false, null)
46+
);
47+
}
48+
49+
public void testGreenYellowYellowGreen() {
50+
healthIndicatorService.changeOccurred();
51+
// This is a strange case: a change occurred, but neither success nor failure have been reported yet.
52+
// While the change is still in progress, we don't change the status.
53+
assertEquals(
54+
new HealthIndicatorResult("file_settings", GREEN, SUCCESS_SYMPTOM, HealthIndicatorDetails.EMPTY, List.of(), List.of()),
55+
healthIndicatorService.calculate(false, null)
56+
);
57+
58+
healthIndicatorService.failureOccurred("whoopsie 1");
59+
assertEquals(
60+
new HealthIndicatorResult(
61+
"file_settings",
62+
YELLOW,
63+
FAILURE_SYMPTOM,
64+
new SimpleHealthIndicatorDetails(Map.of("failure_streak", 1L, "most_recent_failure", "whoopsie 1")),
65+
STALE_SETTINGS_IMPACT,
66+
List.of()
67+
),
68+
healthIndicatorService.calculate(false, null)
69+
);
70+
71+
healthIndicatorService.failureOccurred("whoopsie #2");
72+
assertEquals(
73+
new HealthIndicatorResult(
74+
"file_settings",
75+
YELLOW,
76+
FAILURE_SYMPTOM,
77+
new SimpleHealthIndicatorDetails(Map.of("failure_streak", 2L, "most_recent_failure", "whoopsie #2")),
78+
STALE_SETTINGS_IMPACT,
79+
List.of()
80+
),
81+
healthIndicatorService.calculate(false, null)
82+
);
83+
84+
healthIndicatorService.successOccurred();
85+
assertEquals(
86+
new HealthIndicatorResult("file_settings", GREEN, SUCCESS_SYMPTOM, HealthIndicatorDetails.EMPTY, List.of(), List.of()),
87+
healthIndicatorService.calculate(false, null)
88+
);
89+
}
90+
}

0 commit comments

Comments
 (0)