elastic · joshua-adams-1 · Oct 27, 2025 · Oct 27, 2025 · Oct 27, 2025 · Oct 28, 2025
diff --git a/docs/changelog/137210.yaml b/docs/changelog/137210.yaml
@@ -0,0 +1,5 @@
+pr: 137210
+summary: "[WIP] Introduce INDEX_SHARD_COUNT_FORMAT"
+area: Snapshot/Restore
+type: bug
+issues: []
diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/IndexMetadata.java b/server/src/main/java/org/elasticsearch/cluster/metadata/IndexMetadata.java
@@ -2881,13 +2881,7 @@ public static IndexMetadata legacyFromXContent(XContentParser parser) throws IOE
                 } else if (token == XContentParser.Token.START_OBJECT) {
                     if ("settings".equals(currentFieldName)) {
                         Settings settings = Settings.fromXContent(parser);
-                        if (SETTING_INDEX_VERSION_COMPATIBILITY.get(settings).isLegacyIndexVersion() == false) {
-                            throw new IllegalStateException(
-                                "this method should only be used to parse older incompatible index metadata versions "
-                                    + "but got "
-                                    + SETTING_INDEX_VERSION_COMPATIBILITY.get(settings).toReleaseVersion()
-                            );
-                        }
+                        checkSettingIndexVersionCompatibility(settings);
                         builder.settings(settings);
                     } else if ("mappings".equals(currentFieldName)) {
                         Map<String, Object> mappingSourceBuilder = new HashMap<>();
@@ -2980,6 +2974,16 @@ private static void handleLegacyMapping(Builder builder, Map<String, Object> map
         }
     }
 
+    private static void checkSettingIndexVersionCompatibility(Settings settings) {
+        if (SETTING_INDEX_VERSION_COMPATIBILITY.get(settings).isLegacyIndexVersion() == false) {
+            throw new IllegalStateException(
+                "this method should only be used to parse older incompatible index metadata versions "
+                    + "but got "
+                    + SETTING_INDEX_VERSION_COMPATIBILITY.get(settings).toReleaseVersion()
+            );
+        }
+    }
+
     /**
      * Return the {@link IndexVersion} of Elasticsearch that has been used to create an index given its settings.
      *
@@ -3228,4 +3232,109 @@ public static int parseIndexNameCounter(String indexName) {
             throw new IllegalArgumentException("unable to parse the index name [" + indexName + "] to extract the counter", e);
         }
     }
+
+    /**
+     * A subset of {@link IndexMetadata} storing only the shard count of an index
+     * Prior to v9.3, the entire {@link IndexMetadata} object was stored in heap and then loaded during snapshotting to determine
+     * the shard count. As per ES-12539, this is replaced with the {@link IndexShardCount} class that writes and loads only the index's
+     * shard count to and from heap memory, reducing the possibility of smaller nodes going OOMe during snapshotting
+     */
+    public static class IndexShardCount implements ToXContentFragment {
+        private static final String KEY_SHARD_COUNT = "shard_count";
+        private final int shardCount;
+
+        public IndexShardCount(int count) {
+            this.shardCount = count;
+        }
+
+        public int getCount() {
+            return shardCount;
+        }
+
+        @Override
+        public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
+            builder.field(KEY_SHARD_COUNT, shardCount);
+            return builder;
+        }
+
+        public static IndexShardCount.Builder builder() {
+            return new IndexShardCount.Builder();
+        }
+
+        public static class Builder {
+            private int count;
+
+            public IndexShardCount.Builder setCount(int count) {
+                this.count = count;
+                return this;
+            }
+
+            public IndexShardCount build() {
+                return new IndexShardCount(count);
+            }
+        }
+
+        /**
+         * Parses an {@link IndexMetadata} object, reading only the shard count and skipping the rest
+         * @param parser The parser of the {@link IndexMetadata} object
+         * @return Returns an {@link IndexShardCount} containing the shard count for the index
+         * @throws IOException Thrown if the {@link IndexMetadata} object cannot be parsed correctly
+         */
+        public static IndexShardCount fromIndexMetaData(XContentParser parser) throws IOException {
+            return fromIndexMetaData(parser, false);
+        }
+
+        public static IndexShardCount fromIndexMetaData(XContentParser parser, boolean legacy) throws IOException {
+            if (parser.currentToken() == null) { // fresh parser? move to the first token
+                parser.nextToken();
+            }
+            if (parser.currentToken() == XContentParser.Token.START_OBJECT) {  // on a start object move to next token
+                parser.nextToken();
+            }
+            XContentParserUtils.ensureExpectedToken(XContentParser.Token.FIELD_NAME, parser.currentToken(), parser);
+            String currentFieldName;
+            XContentParser.Token token = parser.nextToken();
+            XContentParserUtils.ensureExpectedToken(XContentParser.Token.START_OBJECT, token, parser);
+
+            Builder indexShardCountBuilder = new Builder();
+            // Skip over everything except the settings object we care about, or any unexpected tokens
+            while ((currentFieldName = parser.nextFieldName()) != null) {
+                token = parser.nextToken();
+                if (token == XContentParser.Token.START_OBJECT) {
+                    if (currentFieldName.equals(KEY_SETTINGS)) {
+                        Settings settings = Settings.fromXContent(parser);
+                        if (legacy) {
+                            checkSettingIndexVersionCompatibility(settings);
+                        }
+                        indexShardCountBuilder.setCount(settings.getAsInt(SETTING_NUMBER_OF_SHARDS, -1));
+                    } else {
+                        // Iterate through the object, but we don't care for it's contents
+                        while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
+                        }
+                    }
+                } else if (token == XContentParser.Token.START_ARRAY) {
+                    // Iterate through the array, but we don't care for it's contents
+                    while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
+                    }
+                } else if (token.isValue() == false) {
+                    throw new IllegalArgumentException("Unexpected token " + token);
+                }
+            }
+            XContentParserUtils.ensureExpectedToken(XContentParser.Token.END_OBJECT, parser.nextToken(), parser);
+            return indexShardCountBuilder.build();
+        }
+
+        /**
+         * Parses legacy metadata from ES versions that are no longer index-compatible, returning information on best-effort basis.
+         * <p>
+         * Like {@link #fromIndexMetaData}, we are parsing an {@link IndexMetadata} object,
+         * reading only the shard count and skipping the rest.
+         * <p>
+         * Throws an exception if the metadata is index-compatible with the current version (in that case,
+         * {@link #fromXContent} should be used to load the content.
+         */
+        public static IndexShardCount fromLegacyIndexMetaData(XContentParser parser) throws IOException {
+            return fromIndexMetaData(parser, true);
+        }
+    }
 }
diff --git a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java
@@ -398,6 +398,14 @@ public static String getRepositoryDataBlobName(long repositoryGeneration) {
         Function.identity()
     );
 
+    public static final ChecksumBlobStoreFormat<IndexMetadata.IndexShardCount> INDEX_SHARD_COUNT_FORMAT = new ChecksumBlobStoreFormat<>(
+        "index-metadata",
+        METADATA_NAME_FORMAT,
+        (repoName, parser) -> IndexMetadata.IndexShardCount.fromLegacyIndexMetaData(parser),
+        (repoName, parser) -> IndexMetadata.IndexShardCount.fromIndexMetaData(parser),
+        Function.identity()
+    );
+
     private static final String SNAPSHOT_CODEC = "snapshot";
 
     public static final ChecksumBlobStoreFormat<SnapshotInfo> SNAPSHOT_FORMAT = new ChecksumBlobStoreFormat<>(
@@ -1327,13 +1335,14 @@ private void determineShardCount(ActionListener<Void> listener) {
             private void getOneShardCount(String indexMetaGeneration) {
                 try {
                     updateShardCount(
-                        INDEX_METADATA_FORMAT.read(getProjectRepo(), indexContainer, indexMetaGeneration, namedXContentRegistry)
-                            .getNumberOfShards()
+                        INDEX_SHARD_COUNT_FORMAT.read(getProjectRepo(), indexContainer, indexMetaGeneration, namedXContentRegistry)
+                            .getCount()
                     );
                 } catch (Exception ex) {
-                    logger.warn(() -> format("[%s] [%s] failed to read metadata for index", indexMetaGeneration, indexId.getName()), ex);
+                    logger.warn(() -> format("[%s] [%s] failed to read shard count for index", indexMetaGeneration, indexId.getName()), ex);
                     // Definitely indicates something fairly badly wrong with the repo, but not immediately fatal here: we might get the
-                    // shard count from another metadata blob, or we might just not process these shards. If we skip these shards then the
+                    // shard count from a subsequent indexMetaGeneration, or we might just not process these shards. If we skip these shards
+                    // then the
                     // repository will technically enter an invalid state (these shards' index-XXX blobs will refer to snapshots that no
                     // longer exist) and may contain dangling blobs too. A subsequent delete that hits this index may repair the state if
                     // the metadata read error is transient, but if not then the stale indices cleanup will eventually remove this index
@@ -1904,7 +1913,8 @@ record RootBlobUpdateResult(RepositoryData oldRepositoryData, RepositoryData new
                         }
                     }));
 
-                    // Write the index metadata for each index in the snapshot
+                    // Write the index metadata and the shard count to memory for each index in the snapshot, so that it persists
+                    // even if the repository is deleted
                     for (IndexId index : indices) {
                         executor.execute(ActionRunnable.run(allMetaListeners.acquire(), () -> {
                             final IndexMetadata indexMetaData = projectMetadata.index(index.getName());