Skip to content

Commit f371e66

Browse files
authored
[6.8] Fix delete_expired_data/nightly maintenance when many model snapshots need deleting (#57174)
The queries performed by the expired data removers pull back entire documents when only a few fields are required. For ModelSnapshots in particular this is a problem as they contain quantiles which may be 100s of KB and the search size is set to 10,000. This change makes the search more efficient by only requesting the fields needed to work out which expired data should be deleted.
1 parent 3761165 commit f371e66

File tree

7 files changed

+210
-45
lines changed

7 files changed

+210
-45
lines changed

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/retention/ExpiredForecastsRemover.java

Lines changed: 62 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,7 @@
1414
import org.elasticsearch.action.search.SearchResponse;
1515
import org.elasticsearch.action.support.ThreadedActionListener;
1616
import org.elasticsearch.client.Client;
17-
import org.elasticsearch.common.xcontent.LoggingDeprecationHandler;
18-
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
19-
import org.elasticsearch.common.xcontent.XContentFactory;
20-
import org.elasticsearch.common.xcontent.XContentParser;
21-
import org.elasticsearch.common.xcontent.XContentType;
17+
import org.elasticsearch.common.document.DocumentField;
2218
import org.elasticsearch.index.query.BoolQueryBuilder;
2319
import org.elasticsearch.index.query.QueryBuilder;
2420
import org.elasticsearch.index.query.QueryBuilders;
@@ -39,8 +35,6 @@
3935
import org.joda.time.DateTime;
4036
import org.joda.time.chrono.ISOChronology;
4137

42-
import java.io.IOException;
43-
import java.io.InputStream;
4438
import java.util.ArrayList;
4539
import java.util.List;
4640
import java.util.Objects;
@@ -83,6 +77,10 @@ public void remove(ActionListener<Boolean> listener, Supplier<Boolean> isTimedOu
8377
.filter(QueryBuilders.termQuery(Result.RESULT_TYPE.getPreferredName(), ForecastRequestStats.RESULT_TYPE_VALUE))
8478
.filter(QueryBuilders.existsQuery(ForecastRequestStats.EXPIRY_TIME.getPreferredName())));
8579
source.size(MAX_FORECASTS);
80+
source.fetchSource(false);
81+
source.docValueField(Job.ID.getPreferredName(), null);
82+
source.docValueField(ForecastRequestStats.FORECAST_ID.getPreferredName(), null);
83+
source.docValueField(ForecastRequestStats.EXPIRY_TIME.getPreferredName(), "epoch_millis");
8684

8785
// _doc is the most efficient sort order and will also disable scoring
8886
source.sort(ElasticsearchMappings.ES_DOC);
@@ -94,11 +92,9 @@ public void remove(ActionListener<Boolean> listener, Supplier<Boolean> isTimedOu
9492
}
9593

9694
private void deleteForecasts(SearchResponse searchResponse, ActionListener<Boolean> listener, Supplier<Boolean> isTimedOutSupplier) {
97-
List<ForecastRequestStats> forecastsToDelete;
98-
try {
99-
forecastsToDelete = findForecastsToDelete(searchResponse);
100-
} catch (IOException e) {
101-
listener.onFailure(e);
95+
List<JobForecastId> forecastsToDelete = findForecastsToDelete(searchResponse);
96+
if (forecastsToDelete.isEmpty()) {
97+
listener.onResponse(true);
10298
return;
10399
}
104100

@@ -129,39 +125,56 @@ public void onFailure(Exception e) {
129125
});
130126
}
131127

132-
private List<ForecastRequestStats> findForecastsToDelete(SearchResponse searchResponse) throws IOException {
133-
List<ForecastRequestStats> forecastsToDelete = new ArrayList<>();
128+
private List<JobForecastId> findForecastsToDelete(SearchResponse searchResponse) {
129+
List<JobForecastId> forecastsToDelete = new ArrayList<>();
134130

135131
SearchHits hits = searchResponse.getHits();
136132
if (hits.getTotalHits() > MAX_FORECASTS) {
137133
LOGGER.info("More than [{}] forecasts were found. This run will only delete [{}] of them", MAX_FORECASTS, MAX_FORECASTS);
138134
}
139135

140136
for (SearchHit hit : hits.getHits()) {
141-
try (InputStream stream = hit.getSourceRef().streamInput();
142-
XContentParser parser = XContentFactory.xContent(XContentType.JSON).createParser(
143-
NamedXContentRegistry.EMPTY, LoggingDeprecationHandler.INSTANCE, stream)) {
144-
ForecastRequestStats forecastRequestStats = ForecastRequestStats.LENIENT_PARSER.apply(parser, null);
145-
if (forecastRequestStats.getExpiryTime().toEpochMilli() < cutoffEpochMs) {
146-
forecastsToDelete.add(forecastRequestStats);
137+
DocumentField docField = hit.field(ForecastRequestStats.EXPIRY_TIME.getPreferredName());
138+
if (docField == null) {
139+
LOGGER.warn("Forecast request stats document [{}] has a null [{}] field", hit.getId(),
140+
ForecastRequestStats.EXPIRY_TIME.getPreferredName());
141+
continue;
142+
}
143+
144+
Long expiryMs = parseDateField(docField.getValue());
145+
if (expiryMs == null) {
146+
LOGGER.warn("Forecast request stats document [{}] date field [{}] cannot be parsed", hit.getId(),
147+
ForecastRequestStats.EXPIRY_TIME.getPreferredName());
148+
continue;
149+
}
150+
151+
if (expiryMs < cutoffEpochMs) {
152+
JobForecastId idPair = new JobForecastId(
153+
stringFieldValueOrNull(hit, Job.ID.getPreferredName()),
154+
stringFieldValueOrNull(hit, Forecast.FORECAST_ID.getPreferredName()));
155+
156+
if (idPair.hasNullValue() == false) {
157+
forecastsToDelete.add(idPair);
147158
}
148159
}
149160
}
150161
return forecastsToDelete;
151162
}
152163

153-
private DeleteByQueryRequest buildDeleteByQuery(List<ForecastRequestStats> forecastsToDelete) {
164+
private DeleteByQueryRequest buildDeleteByQuery(List<JobForecastId> ids) {
154165
DeleteByQueryRequest request = new DeleteByQueryRequest();
155166
request.setSlices(5);
156167

157168
request.indices(RESULTS_INDEX_PATTERN);
158169
BoolQueryBuilder boolQuery = QueryBuilders.boolQuery().minimumShouldMatch(1);
159170
boolQuery.must(QueryBuilders.termsQuery(Result.RESULT_TYPE.getPreferredName(),
160171
ForecastRequestStats.RESULT_TYPE_VALUE, Forecast.RESULT_TYPE_VALUE));
161-
for (ForecastRequestStats forecastToDelete : forecastsToDelete) {
162-
boolQuery.should(QueryBuilders.boolQuery()
163-
.must(QueryBuilders.termQuery(Job.ID.getPreferredName(), forecastToDelete.getJobId()))
164-
.must(QueryBuilders.termQuery(Forecast.FORECAST_ID.getPreferredName(), forecastToDelete.getForecastId())));
172+
for (JobForecastId jobForecastId : ids) {
173+
if (jobForecastId.hasNullValue() == false) {
174+
boolQuery.should(QueryBuilders.boolQuery()
175+
.must(QueryBuilders.termQuery(Job.ID.getPreferredName(), jobForecastId.jobId))
176+
.must(QueryBuilders.termQuery(Forecast.FORECAST_ID.getPreferredName(), jobForecastId.forecastId)));
177+
}
165178
}
166179
QueryBuilder query = QueryBuilders.boolQuery().filter(boolQuery);
167180
request.setQuery(query);
@@ -171,4 +184,28 @@ private DeleteByQueryRequest buildDeleteByQuery(List<ForecastRequestStats> forec
171184

172185
return request;
173186
}
187+
188+
static Long parseDateField(Object value) {
189+
if (value instanceof String) { // doc_value field with the epoch_millis format
190+
return Long.parseLong((String)value);
191+
} else if (value instanceof Long) { // pre-6.0 field
192+
return (Long)value;
193+
} else {
194+
return null;
195+
}
196+
}
197+
198+
private static class JobForecastId {
199+
private final String jobId;
200+
private final String forecastId;
201+
202+
private JobForecastId(String jobId, String forecastId) {
203+
this.jobId = jobId;
204+
this.forecastId = forecastId;
205+
}
206+
207+
boolean hasNullValue() {
208+
return jobId == null || forecastId == null;
209+
}
210+
}
174211
}

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/retention/ExpiredModelSnapshotsRemover.java

Lines changed: 38 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,14 @@ protected void removeDataBefore(Job job, long cutoffEpochMs, ActionListener<Bool
8888
.mustNot(activeSnapshotFilter)
8989
.mustNot(retainFilter);
9090

91-
searchRequest.source(new SearchSourceBuilder().query(query).size(MODEL_SNAPSHOT_SEARCH_SIZE).sort(ElasticsearchMappings.ES_DOC));
91+
SearchSourceBuilder source = new SearchSourceBuilder();
92+
source.query(query);
93+
source.size(MODEL_SNAPSHOT_SEARCH_SIZE);
94+
source.sort(ElasticsearchMappings.ES_DOC);
95+
source.fetchSource(false);
96+
source.docValueField(Job.ID.getPreferredName(), null);
97+
source.docValueField(ModelSnapshotField.SNAPSHOT_ID.getPreferredName(), null);
98+
searchRequest.source(source);
9299

93100
getClient().execute(SearchAction.INSTANCE, searchRequest, new ThreadedActionListener<>(LOGGER, threadPool,
94101
MachineLearning.UTILITY_THREAD_POOL_NAME, expiredSnapshotsListener(job.getId(), listener), false));
@@ -99,11 +106,18 @@ private ActionListener<SearchResponse> expiredSnapshotsListener(String jobId, Ac
99106
@Override
100107
public void onResponse(SearchResponse searchResponse) {
101108
try {
102-
List<ModelSnapshot> modelSnapshots = new ArrayList<>();
109+
List<JobSnapshotId> snapshotIds = new ArrayList<>();
103110
for (SearchHit hit : searchResponse.getHits()) {
104-
modelSnapshots.add(ModelSnapshot.fromJson(hit.getSourceRef()));
111+
JobSnapshotId idPair = new JobSnapshotId(
112+
stringFieldValueOrNull(hit, Job.ID.getPreferredName()),
113+
stringFieldValueOrNull(hit, ModelSnapshotField.SNAPSHOT_ID.getPreferredName()));
114+
115+
if (idPair.hasNullValue() == false) {
116+
snapshotIds.add(idPair);
117+
}
105118
}
106-
deleteModelSnapshots(new VolatileCursorIterator<>(modelSnapshots), listener);
119+
120+
deleteModelSnapshots(new VolatileCursorIterator<>(snapshotIds), listener);
107121
} catch (Exception e) {
108122
onFailure(e);
109123
}
@@ -116,14 +130,14 @@ public void onFailure(Exception e) {
116130
};
117131
}
118132

119-
private void deleteModelSnapshots(Iterator<ModelSnapshot> modelSnapshotIterator, ActionListener<Boolean> listener) {
133+
private void deleteModelSnapshots(Iterator<JobSnapshotId> modelSnapshotIterator, ActionListener<Boolean> listener) {
120134
if (modelSnapshotIterator.hasNext() == false) {
121135
listener.onResponse(true);
122136
return;
123137
}
124-
ModelSnapshot modelSnapshot = modelSnapshotIterator.next();
125-
DeleteModelSnapshotAction.Request deleteSnapshotRequest = new DeleteModelSnapshotAction.Request(
126-
modelSnapshot.getJobId(), modelSnapshot.getSnapshotId());
138+
JobSnapshotId idPair = modelSnapshotIterator.next();
139+
DeleteModelSnapshotAction.Request deleteSnapshotRequest =
140+
new DeleteModelSnapshotAction.Request(idPair.jobId, idPair.snapshotId);
127141
getClient().execute(DeleteModelSnapshotAction.INSTANCE, deleteSnapshotRequest, new ActionListener<AcknowledgedResponse>() {
128142
@Override
129143
public void onResponse(AcknowledgedResponse response) {
@@ -136,9 +150,23 @@ public void onResponse(AcknowledgedResponse response) {
136150

137151
@Override
138152
public void onFailure(Exception e) {
139-
listener.onFailure(new ElasticsearchException("[" + modelSnapshot.getJobId() + "] Failed to delete snapshot ["
140-
+ modelSnapshot.getSnapshotId() + "]", e));
153+
listener.onFailure(new ElasticsearchException("[" + idPair.jobId + "] Failed to delete snapshot ["
154+
+ idPair.snapshotId + "]", e));
141155
}
142156
});
143157
}
158+
159+
static class JobSnapshotId {
160+
private final String jobId;
161+
private final String snapshotId;
162+
163+
JobSnapshotId(String jobId, String snapshotId) {
164+
this.jobId = jobId;
165+
this.snapshotId = snapshotId;
166+
}
167+
168+
boolean hasNullValue() {
169+
return jobId == null || snapshotId == null;
170+
}
171+
}
144172
}

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/retention/MlDataRemover.java

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,29 @@
66
package org.elasticsearch.xpack.ml.job.retention;
77

88
import org.elasticsearch.action.ActionListener;
9+
import org.elasticsearch.common.document.DocumentField;
10+
import org.elasticsearch.search.SearchHit;
911

1012
import java.util.function.Supplier;
1113

1214
public interface MlDataRemover {
1315
void remove(ActionListener<Boolean> listener, Supplier<Boolean> isTimedOutSupplier);
16+
17+
/**
18+
* Extract {@code fieldName} from {@code hit} and if it is a string
19+
* return the string else {@code null}.
20+
* @param hit The search hit
21+
* @param fieldName Field to find
22+
* @return value iff the docfield is present and it is a string. Otherwise {@code null}
23+
*/
24+
default String stringFieldValueOrNull(SearchHit hit, String fieldName) {
25+
DocumentField docField = hit.field(fieldName);
26+
if (docField != null) {
27+
Object value = docField.getValue();
28+
if (value instanceof String) {
29+
return (String)value;
30+
}
31+
}
32+
return null;
33+
}
1434
}

x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/retention/AbstractExpiredJobDataRemoverTests.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,13 @@ private static SearchResponse createSearchResponse(List<? extends ToXContent> to
9292
return searchResponse;
9393
}
9494

95+
static SearchResponse createSearchResponseFromHits(List<SearchHit> hits) {
96+
SearchHits searchHits = new SearchHits(hits.toArray(new SearchHit[] {}), hits.size(), 1.0f);
97+
SearchResponse searchResponse = mock(SearchResponse.class);
98+
when(searchResponse.getHits()).thenReturn(searchHits);
99+
return searchResponse;
100+
}
101+
95102
public void testRemoveGivenNoJobs() throws IOException {
96103
SearchResponse response = createSearchResponse(Collections.emptyList());
97104

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License;
4+
* you may not use this file except in compliance with the Elastic License.
5+
*/
6+
7+
package org.elasticsearch.xpack.ml.job.retention;
8+
9+
import org.elasticsearch.test.ESTestCase;
10+
11+
import java.util.Date;
12+
13+
public class ExpiredForecastsRemoverTests extends ESTestCase {
14+
15+
public void testDateParsing() {
16+
assertEquals(Long.valueOf(1462096800000L), ExpiredForecastsRemover.parseDateField("1462096800000"));
17+
assertEquals(Long.valueOf(1462096800000L), ExpiredForecastsRemover.parseDateField(1462096800000L));
18+
assertNull(ExpiredForecastsRemover.parseDateField(new Date()));
19+
}
20+
}

x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/retention/ExpiredModelSnapshotsRemoverTests.java

Lines changed: 33 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import org.elasticsearch.cluster.service.ClusterService;
1818
import org.elasticsearch.common.settings.Settings;
1919
import org.elasticsearch.mock.orig.Mockito;
20+
import org.elasticsearch.search.SearchHit;
2021
import org.elasticsearch.test.ESTestCase;
2122
import org.elasticsearch.threadpool.FixedExecutorBuilder;
2223
import org.elasticsearch.threadpool.ThreadPool;
@@ -25,7 +26,9 @@
2526
import org.elasticsearch.xpack.core.ml.job.config.JobTests;
2627
import org.elasticsearch.xpack.core.ml.job.persistence.AnomalyDetectorsIndex;
2728
import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.ModelSnapshot;
29+
import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.ModelSnapshotField;
2830
import org.elasticsearch.xpack.ml.MachineLearning;
31+
import org.elasticsearch.xpack.ml.test.SearchHitBuilder;
2932
import org.junit.After;
3033
import org.junit.Before;
3134
import org.mockito.invocation.InvocationOnMock;
@@ -118,11 +121,13 @@ public void testRemove_GivenJobsWithMixedRetentionPolicies() throws IOException
118121
JobTests.buildJobBuilder("snapshots-2").setModelSnapshotRetentionDays(17L).setModelSnapshotId("active").build()
119122
));
120123

121-
List<ModelSnapshot> snapshots1JobSnapshots = Arrays.asList(createModelSnapshot("snapshots-1", "snapshots-1_1"),
122-
createModelSnapshot("snapshots-1", "snapshots-1_2"));
123-
List<ModelSnapshot> snapshots2JobSnapshots = Collections.singletonList(createModelSnapshot("snapshots-2", "snapshots-2_1"));
124-
searchResponsesPerCall.add(AbstractExpiredJobDataRemoverTests.createSearchResponse(snapshots1JobSnapshots));
125-
searchResponsesPerCall.add(AbstractExpiredJobDataRemoverTests.createSearchResponse(snapshots2JobSnapshots));
124+
SearchHit snapshot1_1 = createModelSnapshotQueryHit("snapshots-1", "snapshots-1_1");
125+
SearchHit snapshot1_2 = createModelSnapshotQueryHit("snapshots-1", "snapshots-1_2");
126+
searchResponsesPerCall.add(
127+
AbstractExpiredJobDataRemoverTests.createSearchResponseFromHits(Arrays.asList(snapshot1_1, snapshot1_2)));
128+
129+
SearchHit snapshot2_1 = createModelSnapshotQueryHit("snapshots-2", "snapshots-2_1");
130+
searchResponsesPerCall.add(AbstractExpiredJobDataRemoverTests.createSearchResponseFromHits(Collections.singletonList(snapshot2_1)));
126131

127132
createExpiredModelSnapshotsRemover().remove(listener, () -> false);
128133

@@ -203,12 +208,13 @@ public void testRemove_GivenClientDeleteSnapshotRequestsFail() throws IOExceptio
203208
JobTests.buildJobBuilder("snapshots-2").setModelSnapshotRetentionDays(17L).setModelSnapshotId("active").build()
204209
));
205210

206-
List<ModelSnapshot> snapshots1JobSnapshots = Arrays.asList(createModelSnapshot("snapshots-1", "snapshots-1_1"),
207-
createModelSnapshot("snapshots-1", "snapshots-1_2"));
208-
List<ModelSnapshot> snapshots2JobSnapshots = Collections.singletonList(createModelSnapshot("snapshots-2", "snapshots-2_1"));
209-
searchResponsesPerCall.add(AbstractExpiredJobDataRemoverTests.createSearchResponse(snapshots1JobSnapshots));
210-
searchResponsesPerCall.add(AbstractExpiredJobDataRemoverTests.createSearchResponse(snapshots2JobSnapshots));
211+
SearchHit snapshot1_1 = createModelSnapshotQueryHit("snapshots-1", "snapshots-1_1");
212+
SearchHit snapshot1_2 = createModelSnapshotQueryHit("snapshots-1", "snapshots-1_2");
213+
searchResponsesPerCall.add(AbstractExpiredJobDataRemoverTests.createSearchResponseFromHits(
214+
Arrays.asList(snapshot1_1, snapshot1_2)));
211215

216+
SearchHit snapshot2_2 = createModelSnapshotQueryHit("snapshots-2", "snapshots-2_1");
217+
searchResponsesPerCall.add(AbstractExpiredJobDataRemoverTests.createSearchResponseFromHits(Collections.singletonList(snapshot2_2)));
212218
createExpiredModelSnapshotsRemover().remove(listener, () -> false);
213219

214220
listener.waitToCompletion();
@@ -224,6 +230,17 @@ public void testRemove_GivenClientDeleteSnapshotRequestsFail() throws IOExceptio
224230
assertThat(deleteSnapshotRequest.getSnapshotId(), equalTo("snapshots-1_1"));
225231
}
226232

233+
public void testJobSnapshotId() {
234+
ExpiredModelSnapshotsRemover.JobSnapshotId id = new ExpiredModelSnapshotsRemover.JobSnapshotId("a", "b");
235+
assertFalse(id.hasNullValue());
236+
id = new ExpiredModelSnapshotsRemover.JobSnapshotId(null, "b");
237+
assertTrue(id.hasNullValue());
238+
id = new ExpiredModelSnapshotsRemover.JobSnapshotId("a", null);
239+
assertTrue(id.hasNullValue());
240+
id = new ExpiredModelSnapshotsRemover.JobSnapshotId(null, null);
241+
assertTrue(id.hasNullValue());
242+
}
243+
227244
@SuppressWarnings("unchecked")
228245
private void givenJobs(List<Job> jobs) throws IOException {
229246
SearchResponse response = AbstractExpiredJobDataRemoverTests.createSearchResponse(jobs);
@@ -287,4 +304,10 @@ public Void answer(InvocationOnMock invocationOnMock) {
287304
}).when(client).execute(same(DeleteModelSnapshotAction.INSTANCE), any(), any());
288305
}
289306

307+
private static SearchHit createModelSnapshotQueryHit(String jobId, String snapshotId) {
308+
SearchHitBuilder hitBuilder = new SearchHitBuilder(0);
309+
hitBuilder.addField(Job.ID.getPreferredName(), Collections.singletonList(jobId));
310+
hitBuilder.addField(ModelSnapshotField.SNAPSHOT_ID.getPreferredName(), Collections.singletonList(snapshotId));
311+
return hitBuilder.build();
312+
}
290313
}

0 commit comments

Comments
 (0)