Skip to content

Commit 2a68d42

Browse files
authored
[7.7][ML] Fix monitoring if orphaned anomaly detector persistent tasks exist (#57243)
Since #51888 the ML job stats endpoint has returned entries for jobs that have a persistent task but not job config. Such orphaned tasks caused monitoring to fail. This change ignores any such corrupt jobs for monitoring purposes. Backport of #57235
1 parent b640f0b commit 2a68d42

File tree

2 files changed

+46
-1
lines changed

2 files changed

+46
-1
lines changed

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearningFeatureSet.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,9 +215,14 @@ private void addJobsUsage(GetJobsStatsAction.Response response, List<Job> jobs)
215215
Map<String, Long> allJobsCreatedBy = jobs.stream().map(this::jobCreatedBy)
216216
.collect(Collectors.groupingBy(item -> item, Collectors.counting()));;
217217
for (GetJobsStatsAction.Response.JobStats jobStats : jobsStats) {
218-
ModelSizeStats modelSizeStats = jobStats.getModelSizeStats();
219218
Job job = jobMap.get(jobStats.getJobId());
219+
if (job == null) {
220+
// It's possible we can get job stats without a corresponding job config, if a
221+
// persistent task is orphaned. Omit these corrupt jobs from the usage info.
222+
continue;
223+
}
220224
int detectorsCount = job.getAnalysisConfig().getDetectors().size();
225+
ModelSizeStats modelSizeStats = jobStats.getModelSizeStats();
221226
double modelSize = modelSizeStats == null ? 0.0
222227
: jobStats.getModelSizeStats().getModelBytes();
223228

x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningFeatureSetTests.java

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,46 @@ public void testUsage() throws Exception {
333333
}
334334
}
335335

336+
public void testUsageWithOrphanedTask() throws Exception {
337+
when(licenseState.isMachineLearningAllowed()).thenReturn(true);
338+
Settings.Builder settings = Settings.builder().put(commonSettings);
339+
settings.put("xpack.ml.enabled", true);
340+
341+
Job opened1 = buildJob("opened1", Collections.singletonList(buildMinDetector("foo")),
342+
Collections.singletonMap("created_by", randomFrom("a-cool-module", "a_cool_module", "a cool module")));
343+
GetJobsStatsAction.Response.JobStats opened1JobStats = buildJobStats("opened1", JobState.OPENED, 100L, 3L);
344+
// NB: we have JobStats but no Job for "opened2"
345+
GetJobsStatsAction.Response.JobStats opened2JobStats = buildJobStats("opened2", JobState.OPENED, 200L, 8L);
346+
Job closed1 = buildJob("closed1", Arrays.asList(buildMinDetector("foo"), buildMinDetector("bar"), buildMinDetector("foobar")));
347+
GetJobsStatsAction.Response.JobStats closed1JobStats = buildJobStats("closed1", JobState.CLOSED, 300L, 0);
348+
givenJobs(Arrays.asList(opened1, closed1), Arrays.asList(opened1JobStats, opened2JobStats, closed1JobStats));
349+
350+
MachineLearningFeatureSet featureSet = new MachineLearningFeatureSet(TestEnvironment.newEnvironment(settings.build()),
351+
clusterService, client, licenseState, jobManagerHolder);
352+
PlainActionFuture<Usage> future = new PlainActionFuture<>();
353+
featureSet.usage(future);
354+
XPackFeatureSet.Usage usage = future.get();
355+
356+
XContentSource source;
357+
try (XContentBuilder builder = XContentFactory.jsonBuilder()) {
358+
usage.toXContent(builder, ToXContent.EMPTY_PARAMS);
359+
source = new XContentSource(builder);
360+
}
361+
362+
// The orphaned job should be excluded from the usage info
363+
assertThat(source.getValue("jobs._all.count"), equalTo(2));
364+
assertThat(source.getValue("jobs._all.detectors.min"), equalTo(1.0));
365+
assertThat(source.getValue("jobs._all.detectors.max"), equalTo(3.0));
366+
assertThat(source.getValue("jobs._all.detectors.total"), equalTo(4.0));
367+
assertThat(source.getValue("jobs._all.detectors.avg"), equalTo(2.0));
368+
assertThat(source.getValue("jobs._all.model_size.min"), equalTo(100.0));
369+
assertThat(source.getValue("jobs._all.model_size.max"), equalTo(300.0));
370+
assertThat(source.getValue("jobs._all.model_size.total"), equalTo(400.0));
371+
assertThat(source.getValue("jobs._all.model_size.avg"), equalTo(200.0));
372+
assertThat(source.getValue("jobs._all.created_by.a_cool_module"), equalTo(1));
373+
assertThat(source.getValue("jobs._all.created_by.unknown"), equalTo(1));
374+
}
375+
336376
public void testUsageDisabledML() throws Exception {
337377
when(licenseState.isMachineLearningAllowed()).thenReturn(true);
338378
Settings.Builder settings = Settings.builder().put(commonSettings);

0 commit comments

Comments
 (0)