From f0dede0dd09b958d7fab53204faea00f82ab2da8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=BCscher?= Date: Mon, 5 May 2025 10:43:52 +0200 Subject: [PATCH 1/2] Add logging to FullClusterRestartSystemIndexCompatibilityIT This test occasionally fails on version 8.19 clusters when we are waiting for system index migration to finish. In order to understand this better, log time when migration API call is sent and also catch assertion error and log migration API status in that case to help the investigation. --- ...sterRestartSystemIndexCompatibilityIT.java | 42 +++++++++++++------ 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/qa/lucene-index-compatibility/src/javaRestTest/java/org/elasticsearch/lucene/FullClusterRestartSystemIndexCompatibilityIT.java b/qa/lucene-index-compatibility/src/javaRestTest/java/org/elasticsearch/lucene/FullClusterRestartSystemIndexCompatibilityIT.java index 985a073bd6034..6373336eb71d3 100644 --- a/qa/lucene-index-compatibility/src/javaRestTest/java/org/elasticsearch/lucene/FullClusterRestartSystemIndexCompatibilityIT.java +++ b/qa/lucene-index-compatibility/src/javaRestTest/java/org/elasticsearch/lucene/FullClusterRestartSystemIndexCompatibilityIT.java @@ -9,6 +9,7 @@ package org.elasticsearch.lucene; +import org.apache.http.util.EntityUtils; import org.elasticsearch.client.Request; import org.elasticsearch.client.RequestOptions; import org.elasticsearch.client.Response; @@ -17,6 +18,7 @@ import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.test.cluster.util.Version; +import org.elasticsearch.test.junit.annotations.TestIssueLogging; import org.elasticsearch.test.rest.ObjectPath; import java.io.IOException; @@ -43,6 +45,10 @@ public FullClusterRestartSystemIndexCompatibilityIT(Version version) { * 2. After update to N-1 (latest) perform a system index migration step, also write block the index * 3. on N, check that async search results are still retrievable and we can write to the system index */ + @TestIssueLogging( + value = "org.elasticsearch.lucene.FullClusterRestartSystemIndexCompatibilityIT:DEBUG", + issueUrl = "https://github.com/elastic/elasticsearch/issues/127245" + ) public void testAsyncSearchIndexMigration() throws Exception { final String index = suffix("index"); final String asyncSearchIndex = ".async-search"; @@ -83,19 +89,29 @@ public void testAsyncSearchIndexMigration() throws Exception { ObjectPath.createFromResponse(client().performRequest(migrateRequest)).evaluate("features.0.feature_name"), equalTo("async_search") ); - assertBusy(() -> { - Request checkMigrateProgress = new Request("GET", "/_migration/system_features"); - Response resp = null; - try { - assertFalse( - ObjectPath.createFromResponse(client().performRequest(checkMigrateProgress)) - .evaluate("migration_status") - .equals("IN_PROGRESS") - ); - } catch (IOException e) { - throw new AssertionError("System feature migration failed", e); - } - }); + + logger.debug("--> starting system index migration"); + Request checkMigrateProgress = new Request("GET", "/_migration/system_features"); + try { + assertBusy(() -> { + Response resp = null; + try { + assertFalse( + ObjectPath.createFromResponse(client().performRequest(checkMigrateProgress)) + .evaluate("migration_status") + .equals("IN_PROGRESS") + ); + } catch (IOException e) { + throw new AssertionError("System feature migration failed", e); + } + }); + } catch (AssertionError e) { + logger.debug( + "--> system index migration not finished yet, response: {}", + EntityUtils.toString(client().performRequest(checkMigrateProgress).getEntity()) + ); + throw e; + } // check search results from n-2 search are still readable assertAsyncSearchHitCount(async_search_ids.get("n-2_id"), numDocs); From 2c3bf442cc6f8f2f79962f1f9b63ba2a5e98603c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=BCscher?= Date: Mon, 5 May 2025 15:48:05 +0200 Subject: [PATCH 2/2] Increase timeout, migration might be occasionally slow --- ...sterRestartSystemIndexCompatibilityIT.java | 43 ++++++------------- 1 file changed, 14 insertions(+), 29 deletions(-) diff --git a/qa/lucene-index-compatibility/src/javaRestTest/java/org/elasticsearch/lucene/FullClusterRestartSystemIndexCompatibilityIT.java b/qa/lucene-index-compatibility/src/javaRestTest/java/org/elasticsearch/lucene/FullClusterRestartSystemIndexCompatibilityIT.java index 6373336eb71d3..3bc2fde3e396b 100644 --- a/qa/lucene-index-compatibility/src/javaRestTest/java/org/elasticsearch/lucene/FullClusterRestartSystemIndexCompatibilityIT.java +++ b/qa/lucene-index-compatibility/src/javaRestTest/java/org/elasticsearch/lucene/FullClusterRestartSystemIndexCompatibilityIT.java @@ -9,7 +9,6 @@ package org.elasticsearch.lucene; -import org.apache.http.util.EntityUtils; import org.elasticsearch.client.Request; import org.elasticsearch.client.RequestOptions; import org.elasticsearch.client.Response; @@ -18,12 +17,12 @@ import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.test.cluster.util.Version; -import org.elasticsearch.test.junit.annotations.TestIssueLogging; import org.elasticsearch.test.rest.ObjectPath; import java.io.IOException; import java.util.HashMap; import java.util.Map; +import java.util.concurrent.TimeUnit; import static org.hamcrest.Matchers.equalTo; @@ -45,10 +44,6 @@ public FullClusterRestartSystemIndexCompatibilityIT(Version version) { * 2. After update to N-1 (latest) perform a system index migration step, also write block the index * 3. on N, check that async search results are still retrievable and we can write to the system index */ - @TestIssueLogging( - value = "org.elasticsearch.lucene.FullClusterRestartSystemIndexCompatibilityIT:DEBUG", - issueUrl = "https://github.com/elastic/elasticsearch/issues/127245" - ) public void testAsyncSearchIndexMigration() throws Exception { final String index = suffix("index"); final String asyncSearchIndex = ".async-search"; @@ -89,29 +84,19 @@ public void testAsyncSearchIndexMigration() throws Exception { ObjectPath.createFromResponse(client().performRequest(migrateRequest)).evaluate("features.0.feature_name"), equalTo("async_search") ); - - logger.debug("--> starting system index migration"); - Request checkMigrateProgress = new Request("GET", "/_migration/system_features"); - try { - assertBusy(() -> { - Response resp = null; - try { - assertFalse( - ObjectPath.createFromResponse(client().performRequest(checkMigrateProgress)) - .evaluate("migration_status") - .equals("IN_PROGRESS") - ); - } catch (IOException e) { - throw new AssertionError("System feature migration failed", e); - } - }); - } catch (AssertionError e) { - logger.debug( - "--> system index migration not finished yet, response: {}", - EntityUtils.toString(client().performRequest(checkMigrateProgress).getEntity()) - ); - throw e; - } + assertBusy(() -> { + Request checkMigrateProgress = new Request("GET", "/_migration/system_features"); + Response resp = null; + try { + assertFalse( + ObjectPath.createFromResponse(client().performRequest(checkMigrateProgress)) + .evaluate("migration_status") + .equals("IN_PROGRESS") + ); + } catch (IOException e) { + throw new AssertionError("System feature migration failed", e); + } + }, 30, TimeUnit.SECONDS); // check search results from n-2 search are still readable assertAsyncSearchHitCount(async_search_ids.get("n-2_id"), numDocs);