Skip to content

Commit 14b5736

Browse files
authored
[ML] Fix Flaky Audit Message Assertion in testWithDatastream for RegressionIT and ClassificationIT (#138065)
Both testWithDatastream in RegressionIT.java and testWithDatastreams in ClassificationIT.java sometimes fail because assertThatAuditMessagesMatch doesn't reliably wait for all audit messages to be written. Audit messages are written asynchronously (fire-and-forget), creating a race condition. The tests complete quickly (on a small dataset: 300 training + 50 non-training rows), increasing the likelihood of a race condition. Both tests use the same assertThatAuditMessagesMatch method from MlNativeDataFrameAnalyticsIntegTestCase, so fixing it resolves both test failures. This PR verifies all expected message prefixes exist, adds robust waiting with timeout and retries, refreshes the notifications index before each check, and restores a more lenient size check, ensuring reliability and thoroughness. Fixes #128166 Fixes #129457
1 parent c0e0bda commit 14b5736

File tree

2 files changed

+34
-13
lines changed

2 files changed

+34
-13
lines changed

muted-tests.yml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -200,9 +200,6 @@ tests:
200200
- class: org.elasticsearch.entitlement.runtime.policy.FileAccessTreeTests
201201
method: testWindowsAbsolutPathAccess
202202
issue: https://github.com/elastic/elasticsearch/issues/129168
203-
- class: org.elasticsearch.xpack.ml.integration.ClassificationIT
204-
method: testWithDatastreams
205-
issue: https://github.com/elastic/elasticsearch/issues/129457
206203
- class: org.elasticsearch.xpack.profiling.action.GetStatusActionIT
207204
method: testWaitsUntilResourcesAreCreated
208205
issue: https://github.com/elastic/elasticsearch/issues/129486

x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/MlNativeDataFrameAnalyticsIntegTestCase.java

Lines changed: 34 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@
99
import org.apache.lucene.util.SetOnce;
1010
import org.elasticsearch.action.admin.indices.get.GetIndexAction;
1111
import org.elasticsearch.action.admin.indices.get.GetIndexRequest;
12+
import org.elasticsearch.action.admin.indices.refresh.RefreshAction;
13+
import org.elasticsearch.action.admin.indices.refresh.RefreshRequest;
14+
import org.elasticsearch.action.support.broadcast.BroadcastResponse;
1215
import org.elasticsearch.cluster.ClusterState;
1316
import org.elasticsearch.common.Strings;
1417
import org.elasticsearch.common.settings.Settings;
@@ -56,29 +59,29 @@
5659
import org.elasticsearch.xpack.core.ml.utils.QueryProvider;
5760
import org.elasticsearch.xpack.ml.dataframe.StoredProgress;
5861
import org.hamcrest.Matcher;
59-
import org.hamcrest.Matchers;
6062

6163
import java.io.IOException;
6264
import java.io.UncheckedIOException;
6365
import java.util.ArrayList;
64-
import java.util.Arrays;
6566
import java.util.Collection;
6667
import java.util.Collections;
6768
import java.util.HashSet;
6869
import java.util.List;
70+
import java.util.Locale;
6971
import java.util.Map;
7072
import java.util.Optional;
7173
import java.util.Set;
7274
import java.util.concurrent.TimeUnit;
75+
import java.util.stream.Collectors;
7376

7477
import static org.elasticsearch.common.xcontent.support.XContentMapValues.extractValue;
7578
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
7679
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponse;
80+
import static org.hamcrest.Matchers.anyOf;
7781
import static org.hamcrest.Matchers.arrayWithSize;
7882
import static org.hamcrest.Matchers.equalTo;
7983
import static org.hamcrest.Matchers.greaterThan;
8084
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
81-
import static org.hamcrest.Matchers.hasItems;
8285
import static org.hamcrest.Matchers.hasSize;
8386
import static org.hamcrest.Matchers.is;
8487
import static org.hamcrest.Matchers.nullValue;
@@ -359,23 +362,44 @@ protected TrainedModelDefinition getModelDefinition(String modelId) throws IOExc
359362
/**
360363
* Asserts whether the audit messages fetched from index match provided prefixes.
361364
* More specifically, in order to pass:
362-
* 1. the number of fetched messages must equal the number of provided prefixes
365+
* 1. ALL expected message prefixes must be found in the fetched messages
363366
* AND
364-
* 2. each fetched message must start with the corresponding prefix
367+
* 2. each fetched message that matches must start with the corresponding prefix
365368
*/
366369
protected static void assertThatAuditMessagesMatch(String configId, String... expectedAuditMessagePrefixes) throws Exception {
367370
// Make sure we wrote to the audit
368371
// Since calls to write the AbstractAuditor are sent and forgot (async) we could have returned from the start,
369372
// finished the job (as this is a very short analytics job), all without the audit being fully written.
370373
awaitIndexExists(NotificationsIndex.NOTIFICATIONS_INDEX);
371374

372-
@SuppressWarnings("unchecked")
373-
Matcher<String>[] itemMatchers = Arrays.stream(expectedAuditMessagePrefixes).map(Matchers::startsWith).toArray(Matcher[]::new);
375+
Set<String> expectedPrefixes = Set.of(expectedAuditMessagePrefixes);
374376
assertBusy(() -> {
377+
// Refresh the notifications index to ensure latest writes are visible
378+
RefreshRequest refreshRequest = new RefreshRequest(NotificationsIndex.NOTIFICATIONS_INDEX);
379+
BroadcastResponse refreshResponse = client().execute(RefreshAction.INSTANCE, refreshRequest).actionGet();
380+
assertThat(refreshResponse.getStatus().getStatus(), anyOf(equalTo(200), equalTo(201)));
381+
375382
List<String> allAuditMessages = fetchAllAuditMessages(configId);
376-
assertThat(allAuditMessages, hasItems(itemMatchers));
377-
// TODO: Consider restoring this assertion when we are sure all the audit messages are available at this point.
378-
// assertThat("Messages: " + allAuditMessages, allAuditMessages, hasSize(expectedAuditMessagePrefixes.length));
383+
384+
// Find which expected prefixes match any of the audit messages
385+
Set<String> foundPrefixes = expectedPrefixes.stream()
386+
.filter(prefix -> allAuditMessages.stream().anyMatch(msg -> msg.startsWith(prefix)))
387+
.collect(Collectors.toSet());
388+
389+
// Only calculate missing prefixes if not all were found
390+
if (foundPrefixes.size() != expectedPrefixes.size()) {
391+
Set<String> missingPrefixes = new HashSet<>(expectedPrefixes);
392+
missingPrefixes.removeAll(foundPrefixes);
393+
fail(
394+
String.format(
395+
Locale.ROOT,
396+
"Expected audit messages not found for config [%s]. Missing prefixes: %s. Found messages: %s",
397+
configId,
398+
missingPrefixes,
399+
allAuditMessages
400+
)
401+
);
402+
}
379403
});
380404
}
381405

0 commit comments

Comments
 (0)