Fix mapping conflicts in clone/split/shrink APIs (elastic#137096) (elastic#137118)

nielsbauman · web-flow · commit 11bd5a0dbd66 · 2025-10-25T10:37:11.000+02:00
If an index is in either `logsdb` or `time_series` mode and specifies a non-default `@timestamp` type mapping (e.g. `date_nanos`), using the clone, split, or shrink API will result in shards that are unable to initialize/recover due to a mapping conflict. As of elastic#133954, the `searchable_snapshot` ILM action makes use of the clone API by default - if the index has more than `0` replicas - and will thus also run into this issue.
diff --git a/docs/changelog/137096.yaml b/docs/changelog/137096.yaml
@@ -0,0 +1,5 @@
+pr: 137096
+summary: Fix mapping conflicts in clone/split/shrink APIs
+area: Indices APIs
+type: bug
+issues: []
diff --git a/server/src/internalClusterTest/java/org/elasticsearch/action/admin/indices/create/CloneIndexIT.java b/server/src/internalClusterTest/java/org/elasticsearch/action/admin/indices/create/CloneIndexIT.java
@@ -20,6 +20,7 @@
 import org.elasticsearch.index.seqno.SeqNoStats;
 import org.elasticsearch.test.ESIntegTestCase;
 import org.elasticsearch.test.index.IndexVersionUtils;
+import org.elasticsearch.xcontent.ObjectPath;
 import org.elasticsearch.xcontent.XContentType;
 
 import java.util.List;
@@ -203,4 +204,60 @@ public void testResizeChangeIndexSorts() {
         });
         assertThat(error.getMessage(), containsString("can't override index sort when resizing an index"));
     }
+
+    /**
+     * Test that cloning a logsdb index with a non-default timestamp mapping doesn't result in any mapping conflicts.
+     */
+    public void testCloneLogsdbIndexWithNonDefaultTimestamp() {
+        // Create a logsdb index with a date_nanos @timestamp field
+        final int numberOfReplicas = randomInt(internalCluster().numDataNodes() - 1);
+        final var settings = indexSettings(1, numberOfReplicas).put("index.mode", "logsdb").put("index.blocks.write", true);
+        prepareCreate("source").setSettings(settings).setMapping("@timestamp", "type=date_nanos").get();
+        ensureGreen();
+
+        // Clone the index
+        indicesAdmin().prepareResizeIndex("source", "target")
+            .setResizeType(ResizeType.CLONE)
+            // We need to explicitly set the number of replicas in case the source has 0 replicas and the cluster has only 1 data node
+            .setSettings(Settings.builder().put("index.number_of_replicas", numberOfReplicas).build())
+            .get();
+
+        // Verify that the target index has the correct @timestamp mapping
+        final var targetMappings = indicesAdmin().prepareGetMappings("target").get();
+        assertThat(
+            ObjectPath.eval("properties.@timestamp.type", targetMappings.mappings().get("target").getSourceAsMap()),
+            equalTo("date_nanos")
+        );
+        ensureGreen();
+    }
+
+    /**
+     * Test that cloning a time series index with a non-default timestamp mapping doesn't result in any mapping conflicts.
+     */
+    public void testCloneTimeSeriesIndexWithNonDefaultTimestamp() {
+        // Create a time series index with a date_nanos @timestamp field
+        final int numberOfReplicas = randomInt(internalCluster().numDataNodes() - 1);
+        final var settings = indexSettings(1, numberOfReplicas).put("index.mode", "time_series")
+            .put("index.routing_path", "sensor_id")
+            .put("index.blocks.write", true);
+        prepareCreate("source").setSettings(settings)
+            .setMapping("@timestamp", "type=date_nanos", "sensor_id", "type=keyword,time_series_dimension=true")
+            .get();
+        ensureGreen();
+
+        // Clone the index
+        indicesAdmin().prepareResizeIndex("source", "target")
+            .setResizeType(ResizeType.CLONE)
+            // We need to explicitly set the number of replicas in case the source has 0 replicas and the cluster has only 1 data node
+            .setSettings(Settings.builder().put("index.number_of_replicas", numberOfReplicas).build())
+            .get();
+
+        // Verify that the target index has the correct @timestamp mapping
+        final var targetMappings = indicesAdmin().prepareGetMappings("target").get();
+        assertThat(
+            ObjectPath.eval("properties.@timestamp.type", targetMappings.mappings().get("target").getSourceAsMap()),
+            equalTo("date_nanos")
+        );
+        ensureGreen();
+    }
 }
diff --git a/server/src/internalClusterTest/java/org/elasticsearch/action/admin/indices/create/ShrinkIndexIT.java b/server/src/internalClusterTest/java/org/elasticsearch/action/admin/indices/create/ShrinkIndexIT.java
@@ -51,6 +51,7 @@
 import org.elasticsearch.indices.IndicesService;
 import org.elasticsearch.test.ESIntegTestCase;
 import org.elasticsearch.test.index.IndexVersionUtils;
+import org.elasticsearch.xcontent.ObjectPath;
 import org.elasticsearch.xcontent.XContentType;
 
 import java.util.Arrays;
@@ -608,6 +609,63 @@ public void testShrinkThenSplitWithFailedNode() throws Exception {
         assertNoResizeSourceIndexSettings("splitagain");
     }
 
+    /**
+     * Tests that shrinking a logsdb index with a non-default timestamp mapping doesn't result in any mapping conflicts.
+     */
+    public void testShrinkLogsdbIndexWithNonDefaultTimestamp() {
+        // Create a logsdb index with a date_nanos @timestamp field
+        final var settings = indexSettings(2, 0).put("index.mode", "logsdb")
+            .put("index.blocks.write", true)
+            .put("index.routing.allocation.require._name", internalCluster().getRandomDataNodeName());
+        prepareCreate("source").setSettings(settings).setMapping("@timestamp", "type=date_nanos").get();
+        ensureGreen();
+
+        // Shrink the index
+        indicesAdmin().prepareResizeIndex("source", "target")
+            .setResizeType(ResizeType.SHRINK)
+            // We need to explicitly set the number of replicas in case the source has 0 replicas and the cluster has only 1 data node
+            .setSettings(Settings.builder().put("index.number_of_shards", 1).put("index.number_of_replicas", 0).build())
+            .get();
+
+        // Verify that the target index has the correct @timestamp mapping
+        final var targetMappings = indicesAdmin().prepareGetMappings("target").get();
+        assertThat(
+            ObjectPath.eval("properties.@timestamp.type", targetMappings.mappings().get("target").getSourceAsMap()),
+            equalTo("date_nanos")
+        );
+        ensureGreen();
+    }
+
+    /**
+     * Tests that shrinking a time series index with a non-default timestamp mapping doesn't result in any mapping conflicts.
+     */
+    public void testShrinkTimeSeriesIndexWithNonDefaultTimestamp() {
+        // Create a time series index with a date_nanos @timestamp field
+        final var settings = indexSettings(2, 0).put("index.mode", "time_series")
+            .put("index.routing_path", "sensor_id")
+            .put("index.routing.allocation.require._name", internalCluster().getRandomDataNodeName())
+            .put("index.blocks.write", true);
+        prepareCreate("source").setSettings(settings)
+            .setMapping("@timestamp", "type=date_nanos", "sensor_id", "type=keyword,time_series_dimension=true")
+            .get();
+        ensureGreen();
+
+        // Shrink the index
+        indicesAdmin().prepareResizeIndex("source", "target")
+            .setResizeType(ResizeType.SHRINK)
+            // We need to explicitly set the number of replicas in case the source has 0 replicas and the cluster has only 1 data node
+            .setSettings(Settings.builder().put("index.number_of_shards", 1).put("index.number_of_replicas", 0).build())
+            .get();
+
+        // Verify that the target index has the correct @timestamp mapping
+        final var targetMappings = indicesAdmin().prepareGetMappings("target").get();
+        assertThat(
+            ObjectPath.eval("properties.@timestamp.type", targetMappings.mappings().get("target").getSourceAsMap()),
+            equalTo("date_nanos")
+        );
+        ensureGreen();
+    }
+
     static void assertNoResizeSourceIndexSettings(final String index) {
         ClusterStateResponse clusterStateResponse = clusterAdmin().prepareState(TEST_REQUEST_TIMEOUT)
             .clear()
diff --git a/server/src/internalClusterTest/java/org/elasticsearch/action/admin/indices/create/SplitIndexIT.java b/server/src/internalClusterTest/java/org/elasticsearch/action/admin/indices/create/SplitIndexIT.java
@@ -46,6 +46,7 @@
 import org.elasticsearch.indices.IndicesService;
 import org.elasticsearch.test.ESIntegTestCase;
 import org.elasticsearch.test.index.IndexVersionUtils;
+import org.elasticsearch.xcontent.ObjectPath;
 import org.elasticsearch.xcontent.XContentType;
 
 import java.io.IOException;
@@ -493,4 +494,31 @@ public void testCreateSplitWithIndexSort() throws Exception {
         assertSortedSegments("target", expectedIndexSort);
         assertNoResizeSourceIndexSettings("target");
     }
+
+    /**
+     * Tests that splitting a logsdb index with a non-default timestamp mapping doesn't result in any mapping conflicts.
+     * N.B.: we don't test time_series indices as split is not supported for them.
+     */
+    public void testSplitLogsdbIndexWithNonDefaultTimestamp() {
+        // Create a logsdb index with a date_nanos @timestamp field
+        final int numberOfReplicas = randomInt(internalCluster().numDataNodes() - 1);
+        final var settings = indexSettings(1, numberOfReplicas).put("index.mode", "logsdb").put("index.blocks.write", true);
+        prepareCreate("source").setSettings(settings).setMapping("@timestamp", "type=date_nanos").get();
+        ensureGreen();
+
+        // Split the index
+        indicesAdmin().prepareResizeIndex("source", "target")
+            .setResizeType(ResizeType.SPLIT)
+            // We need to explicitly set the number of replicas in case the source has 0 replicas and the cluster has only 1 data node
+            .setSettings(Settings.builder().put("index.number_of_shards", 2).put("index.number_of_replicas", numberOfReplicas).build())
+            .get();
+
+        // Verify that the target index has the correct @timestamp mapping
+        final var targetMappings = indicesAdmin().prepareGetMappings("target").get();
+        assertThat(
+            ObjectPath.eval("properties.@timestamp.type", targetMappings.mappings().get("target").getSourceAsMap()),
+            equalTo("date_nanos")
+        );
+        ensureGreen();
+    }
 }
diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexService.java b/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexService.java
@@ -512,11 +512,15 @@ private ClusterState applyCreateIndexWithTemporaryService(
         assert indicesService.hasIndex(temporaryIndexMeta.getIndex()) == false
             : Strings.format("Index [%s] already exists", temporaryIndexMeta.getIndex().getName());
         return indicesService.<ClusterState, Exception>withTempIndexService(temporaryIndexMeta, indexService -> {
-            try {
-                updateIndexMappingsAndBuildSortOrder(indexService, request, mappings, sourceMetadata);
-            } catch (Exception e) {
-                logger.log(silent ? Level.DEBUG : Level.INFO, "failed on parsing mappings on index creation [{}]", request.index(), e);
-                throw e;
+            // If we're creating the index from an existing index, we should not provide any mappings, as the new index shards will take
+            // care of copying the mappings from the source index during recovery. Providing mappings here would cause conflicts.
+            if (sourceMetadata == null) {
+                try {
+                    updateIndexMappingsAndBuildSortOrder(indexService, request, mappings);
+                } catch (Exception e) {
+                    logger.log(silent ? Level.DEBUG : Level.INFO, "failed on parsing mappings on index creation [{}]", request.index(), e);
+                    throw e;
+                }
             }
 
             final List<AliasMetadata> aliases = aliasSupplier.apply(indexService);
@@ -1422,8 +1426,7 @@ private static IndexMetadata.Builder createIndexMetadataBuilder(
     private static void updateIndexMappingsAndBuildSortOrder(
         IndexService indexService,
         CreateIndexClusterStateUpdateRequest request,
-        List<CompressedXContent> mappings,
-        @Nullable IndexMetadata sourceMetadata
+        List<CompressedXContent> mappings
     ) throws IOException {
         MapperService mapperService = indexService.mapperService();
         IndexMode indexMode = indexService.getIndexSettings() != null ? indexService.getIndexSettings().getMode() : IndexMode.STANDARD;
@@ -1437,13 +1440,11 @@ private static void updateIndexMappingsAndBuildSortOrder(
 
         indexMode.validateTimestampFieldMapping(request.dataStreamName() != null, mapperService.mappingLookup());
 
-        if (sourceMetadata == null) {
-            // now that the mapping is merged we can validate the index sort.
-            // we cannot validate for index shrinking since the mapping is empty
-            // at this point. The validation will take place later in the process
-            // (when all shards are copied in a single place).
-            indexService.getIndexSortSupplier().get();
-        }
+        // now that the mapping is merged we can validate the index sort.
+        // we cannot validate for index shrinking since the mapping is empty
+        // at this point. The validation will take place later in the process
+        // (when all shards are copied in a single place).
+        indexService.getIndexSortSupplier().get();
     }
 
     private static void validateActiveShardCount(ActiveShardCount waitForActiveShards, IndexMetadata indexMetadata) {
diff --git a/test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java b/test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java
@@ -2073,6 +2073,13 @@ public String getRandomNodeName() {
         return getNodeNameThat(Predicates.always());
     }
 
+    /**
+     * @return the name of a random data node in a cluster
+     */
+    public String getRandomDataNodeName() {
+        return getNodeNameThat(DiscoveryNode::canContainData);
+    }
+
     /**
      * @return the name of a random node in a cluster that match the {@code predicate}
      */