Skip to content

Commit 6b5f6fb

Browse files
davidkylemaxhniebergallprwhelanMax Hniebergallelasticmachine
authored
[ML] Wait for all shards to be active when creating the ML stats index (#108202)
* Wait for all shards to be active when creating the ML stats index * Unmute tests * Wait for the stats index in cleanup * more waiting for the stats index * Add adminclient to ensureHealth Co-authored-by: Pat Whelan <[email protected]> * fix errors causing build failures --------- Co-authored-by: Max Hniebergall <[email protected]> Co-authored-by: Pat Whelan <[email protected]> Co-authored-by: Max Hniebergall <[email protected]> Co-authored-by: Elastic Machine <[email protected]>
1 parent 099a3db commit 6b5f6fb

File tree

8 files changed

+42
-17
lines changed

8 files changed

+42
-17
lines changed

test/framework/src/main/java/org/elasticsearch/test/rest/ESRestTestCase.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1726,7 +1726,7 @@ public static void ensureHealth(RestClient restClient, Consumer<Request> request
17261726
ensureHealth(restClient, "", requestConsumer);
17271727
}
17281728

1729-
protected static void ensureHealth(RestClient restClient, String index, Consumer<Request> requestConsumer) throws IOException {
1729+
public static void ensureHealth(RestClient restClient, String index, Consumer<Request> requestConsumer) throws IOException {
17301730
Request request = new Request("GET", "/_cluster/health" + (index.isBlank() ? "" : "/" + index));
17311731
requestConsumer.accept(request);
17321732
try {

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/MlStatsIndex.java

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
package org.elasticsearch.xpack.core.ml;
88

99
import org.elasticsearch.action.ActionListener;
10+
import org.elasticsearch.action.support.ActiveShardCount;
1011
import org.elasticsearch.client.internal.Client;
1112
import org.elasticsearch.cluster.ClusterState;
1213
import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver;
@@ -66,6 +67,15 @@ public static void createStatsIndexAndAliasIfNecessary(
6667
TimeValue masterNodeTimeout,
6768
ActionListener<Boolean> listener
6869
) {
69-
MlIndexAndAlias.createIndexAndAliasIfNecessary(client, state, resolver, TEMPLATE_NAME, writeAlias(), masterNodeTimeout, listener);
70+
MlIndexAndAlias.createIndexAndAliasIfNecessary(
71+
client,
72+
state,
73+
resolver,
74+
TEMPLATE_NAME,
75+
writeAlias(),
76+
masterNodeTimeout,
77+
ActiveShardCount.ALL,
78+
listener
79+
);
7080
}
7181
}

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/persistence/AnomalyDetectorsIndex.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import org.elasticsearch.action.ActionListener;
1010
import org.elasticsearch.action.admin.cluster.health.ClusterHealthRequest;
1111
import org.elasticsearch.action.admin.cluster.health.TransportClusterHealthAction;
12+
import org.elasticsearch.action.support.ActiveShardCount;
1213
import org.elasticsearch.client.internal.Client;
1314
import org.elasticsearch.cluster.ClusterState;
1415
import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver;
@@ -91,6 +92,10 @@ public static void createStateIndexAndAliasIfNecessary(
9192
AnomalyDetectorsIndexFields.STATE_INDEX_PREFIX,
9293
AnomalyDetectorsIndex.jobStateIndexWriteAlias(),
9394
masterNodeTimeout,
95+
// TODO: shard count default preserves the existing behaviour when the
96+
// parameter was added but it may be that ActiveShardCount.ALL is a
97+
// better option
98+
ActiveShardCount.DEFAULT,
9499
finalListener
95100
);
96101
}
@@ -123,6 +128,10 @@ public static void createStateIndexAndAliasIfNecessaryAndWaitForYellow(
123128
AnomalyDetectorsIndexFields.STATE_INDEX_PREFIX,
124129
AnomalyDetectorsIndex.jobStateIndexWriteAlias(),
125130
masterNodeTimeout,
131+
// TODO: shard count default preserves the existing behaviour when the
132+
// parameter was added but it may be that ActiveShardCount.ALL is a
133+
// better option
134+
ActiveShardCount.DEFAULT,
126135
stateIndexAndAliasCreated
127136
);
128137
}

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/utils/MlIndexAndAlias.java

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder;
2222
import org.elasticsearch.action.admin.indices.create.CreateIndexResponse;
2323
import org.elasticsearch.action.admin.indices.template.put.TransportPutComposableIndexTemplateAction;
24+
import org.elasticsearch.action.support.ActiveShardCount;
2425
import org.elasticsearch.action.support.IndicesOptions;
2526
import org.elasticsearch.action.support.master.AcknowledgedResponse;
2627
import org.elasticsearch.client.internal.Client;
@@ -105,6 +106,7 @@ public static void createIndexAndAliasIfNecessary(
105106
String indexPatternPrefix,
106107
String alias,
107108
TimeValue masterNodeTimeout,
109+
ActiveShardCount waitForShardCount,
108110
ActionListener<Boolean> finalListener
109111
) {
110112

@@ -133,7 +135,7 @@ public static void createIndexAndAliasIfNecessary(
133135

134136
if (concreteIndexNames.length == 0) {
135137
if (indexPointedByCurrentWriteAlias.isEmpty()) {
136-
createFirstConcreteIndex(client, firstConcreteIndex, alias, true, indexCreatedListener);
138+
createFirstConcreteIndex(client, firstConcreteIndex, alias, true, waitForShardCount, indexCreatedListener);
137139
return;
138140
}
139141
logger.error(
@@ -144,7 +146,7 @@ public static void createIndexAndAliasIfNecessary(
144146
);
145147
} else if (concreteIndexNames.length == 1 && concreteIndexNames[0].equals(legacyIndexWithoutSuffix)) {
146148
if (indexPointedByCurrentWriteAlias.isEmpty()) {
147-
createFirstConcreteIndex(client, firstConcreteIndex, alias, true, indexCreatedListener);
149+
createFirstConcreteIndex(client, firstConcreteIndex, alias, true, waitForShardCount, indexCreatedListener);
148150
return;
149151
}
150152
if (indexPointedByCurrentWriteAlias.get().equals(legacyIndexWithoutSuffix)) {
@@ -153,6 +155,7 @@ public static void createIndexAndAliasIfNecessary(
153155
firstConcreteIndex,
154156
alias,
155157
false,
158+
waitForShardCount,
156159
indexCreatedListener.delegateFailureAndWrap(
157160
(l, unused) -> updateWriteAlias(client, alias, legacyIndexWithoutSuffix, firstConcreteIndex, l)
158161
)
@@ -241,13 +244,15 @@ private static void createFirstConcreteIndex(
241244
String index,
242245
String alias,
243246
boolean addAlias,
247+
ActiveShardCount waitForShardCount,
244248
ActionListener<Boolean> listener
245249
) {
246250
logger.info("About to create first concrete index [{}] with alias [{}]", index, alias);
247251
CreateIndexRequestBuilder requestBuilder = client.admin().indices().prepareCreate(index);
248252
if (addAlias) {
249253
requestBuilder.addAlias(new Alias(alias).isHidden(true));
250254
}
255+
requestBuilder.setWaitForActiveShards(waitForShardCount);
251256
CreateIndexRequest request = requestBuilder.request();
252257

253258
executeAsyncWithOrigin(

x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/integration/MlRestTestStateCleaner.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ public MlRestTestStateCleaner(Logger logger, RestClient adminClient) {
3030
}
3131

3232
public void resetFeatures() throws IOException {
33+
waitForMlStatsIndexToInitialize();
3334
deleteAllTrainedModelIngestPipelines();
3435
// This resets all features, not just ML, but they should have been getting reset between tests anyway so it shouldn't matter
3536
adminClient.performRequest(new Request("POST", "/_features/_reset"));
@@ -54,4 +55,12 @@ private void deleteAllTrainedModelIngestPipelines() throws IOException {
5455
}
5556
}
5657
}
58+
59+
private void waitForMlStatsIndexToInitialize() throws IOException {
60+
ESRestTestCase.ensureHealth(adminClient, ".ml-stats-*", (request) -> {
61+
request.addParameter("wait_for_no_initializing_shards", "true");
62+
request.addParameter("level", "shards");
63+
request.addParameter("timeout", "30s");
64+
});
65+
}
5766
}

x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/utils/MlIndexAndAliasTests.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder;
1919
import org.elasticsearch.action.admin.indices.create.CreateIndexResponse;
2020
import org.elasticsearch.action.admin.indices.template.put.TransportPutComposableIndexTemplateAction;
21+
import org.elasticsearch.action.support.ActiveShardCount;
2122
import org.elasticsearch.client.internal.AdminClient;
2223
import org.elasticsearch.client.internal.Client;
2324
import org.elasticsearch.client.internal.ClusterAdminClient;
@@ -370,7 +371,8 @@ private void createIndexAndAliasIfNecessary(ClusterState clusterState) {
370371
TestIndexNameExpressionResolver.newInstance(),
371372
TEST_INDEX_PREFIX,
372373
TEST_INDEX_ALIAS,
373-
TEST_REQUEST_TIMEOUT,
374+
TimeValue.timeValueSeconds(30),
375+
ActiveShardCount.DEFAULT,
374376
listener
375377
);
376378
}

x-pack/plugin/ml/qa/single-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/InferenceProcessorIT.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,13 +40,13 @@ private void putModelAlias(String modelAlias, String newModel) throws IOExceptio
4040
}
4141

4242
@SuppressWarnings("unchecked")
43-
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/107777")
4443
public void testCreateAndDeletePipelineWithInferenceProcessor() throws Exception {
4544
putRegressionModel(MODEL_ID);
4645
String pipelineId = "regression-model-pipeline";
4746
createdPipelines.add(pipelineId);
4847
putPipeline(MODEL_ID, pipelineId);
4948

49+
waitForStats();
5050
Map<String, Object> statsAsMap = getStats(MODEL_ID);
5151
List<Integer> pipelineCount = (List<Integer>) XContentMapValues.extractValue("trained_model_stats.pipeline_count", statsAsMap);
5252
assertThat(pipelineCount.get(0), equalTo(1));
@@ -107,6 +107,7 @@ public void testCreateAndDeletePipelineWithInferenceProcessorByName() throws Exc
107107
createdPipelines.add("second_pipeline");
108108
putPipeline("regression_second", "second_pipeline");
109109

110+
waitForStats();
110111
Map<String, Object> statsAsMap = getStats(MODEL_ID);
111112
List<Integer> pipelineCount = (List<Integer>) XContentMapValues.extractValue("trained_model_stats.pipeline_count", statsAsMap);
112113
assertThat(pipelineCount.get(0), equalTo(2));

x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/inference_crud.yml

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -563,9 +563,6 @@ setup:
563563

564564
---
565565
"Test delete given model referenced by pipeline":
566-
- skip:
567-
awaits_fix: "https://github.com/elastic/elasticsearch/issues/80703"
568-
569566
- do:
570567
ingest.put_pipeline:
571568
id: "pipeline-using-a-classification-model"
@@ -592,9 +589,6 @@ setup:
592589

593590
---
594591
"Test force delete given model referenced by pipeline":
595-
- skip:
596-
awaits_fix: "https://github.com/elastic/elasticsearch/issues/80703"
597-
598592
- do:
599593
ingest.put_pipeline:
600594
id: "pipeline-using-a-classification-model"
@@ -622,9 +616,6 @@ setup:
622616

623617
---
624618
"Test delete given model with alias referenced by pipeline":
625-
- skip:
626-
awaits_fix: "https://github.com/elastic/elasticsearch/issues/80703"
627-
628619
- do:
629620
ml.put_trained_model_alias:
630621
model_alias: "alias-to-a-classification-model"
@@ -655,8 +646,6 @@ setup:
655646

656647
---
657648
"Test force delete given model with alias referenced by pipeline":
658-
- skip:
659-
awaits_fix: "https://github.com/elastic/elasticsearch/issues/106652"
660649
- do:
661650
ml.put_trained_model_alias:
662651
model_alias: "alias-to-a-classification-model"

0 commit comments

Comments
 (0)