elastic · joshua-adams-1 · Oct 27, 2025 · Oct 27, 2025 · Oct 27, 2025 · Oct 28, 2025
diff --git a/docs/changelog/137210.yaml b/docs/changelog/137210.yaml
@@ -0,0 +1,5 @@
+pr: 137210
+summary: "[WIP] Introduce INDEX_SHARD_COUNT_FORMAT"
+area: Snapshot/Restore
+type: bug
+issues: []
diff --git a/.../java/org/elasticsearch/repositories/blobstore/BlobStoreRepositoryOperationPurposeIT.java b/.../java/org/elasticsearch/repositories/blobstore/BlobStoreRepositoryOperationPurposeIT.java
@@ -253,6 +253,7 @@ private static void assertPurposeConsistency(OperationPurpose purpose, String bl
                     startsWith(BlobStoreRepository.INDEX_FILE_PREFIX),
                     startsWith(BlobStoreRepository.METADATA_PREFIX),
                     startsWith(BlobStoreRepository.SNAPSHOT_PREFIX),
+                    startsWith(BlobStoreRepository.SHARD_COUNT_PREFIX),
                     equalTo(BlobStoreRepository.INDEX_LATEST_BLOB),
                     // verification
                     equalTo("master.dat"),

diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/IndexShardCount.java b/server/src/main/java/org/elasticsearch/cluster/metadata/IndexShardCount.java
@@ -0,0 +1,78 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.cluster.metadata;
+
+import org.elasticsearch.common.xcontent.XContentParserUtils;
+import org.elasticsearch.xcontent.ToXContentFragment;
+import org.elasticsearch.xcontent.XContentBuilder;
+import org.elasticsearch.xcontent.XContentParser;
+
+import java.io.IOException;
+
+/**
+ * Used to store the shard count for an index
+ * Prior to v9.3, the entire {@link IndexMetadata} object was stored in heap and then loaded during snapshotting to determine
+ * the shard count. As per ES-12539, this was replaced with {@link IndexShardCount} that tracks writes and loads only the index's
+ * shard count to and from heap memory. This not only reduces the likelihood of a node going OOMe, but increases snapshotting performance.
+ */
+public class IndexShardCount implements ToXContentFragment {
+    private static final String KEY_SHARD_COUNT = "shard_count";
+    private final int shardCount;
+
+    public IndexShardCount(int count) {
+        this.shardCount = count;
+    }
+
+    public int getCount() {
+        return shardCount;
+    }
+
+    @Override
+    public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
+        builder.field(KEY_SHARD_COUNT, shardCount);
+        return builder;
+    }
+
+    public static Builder builder() {
+        return new Builder();
+    }
+
+    public static class Builder {
+        private int count;
+
+        public Builder setCount(int count) {
+            this.count = count;
+            return this;
+        }
+
+        public IndexShardCount build() {
+            return new IndexShardCount(count);
+        }
+    }
+
+    public static IndexShardCount fromXContent(XContentParser parser) throws IOException {
+        if (parser.currentToken() == null) { // fresh parser? move to the first token
+            parser.nextToken();
+        }
+        if (parser.currentToken() == XContentParser.Token.START_OBJECT) {  // on a start object move to next token
+            parser.nextToken();
+        }
+        XContentParserUtils.ensureExpectedToken(XContentParser.Token.FIELD_NAME, parser.currentToken(), parser);
+        XContentParser.Token currentToken = parser.nextToken();
+        IndexShardCount x;
+        if (currentToken.isValue()) {
+            x = new IndexShardCount(parser.intValue());
+        } else {
+            throw new IllegalArgumentException("Unexpected token " + currentToken);
+        }
+        XContentParserUtils.ensureExpectedToken(XContentParser.Token.END_OBJECT, parser.nextToken(), parser);
+        return x;
+    }
+}
diff --git a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java
@@ -39,6 +39,7 @@
 import org.elasticsearch.cluster.SnapshotDeletionsInProgress;
 import org.elasticsearch.cluster.SnapshotsInProgress;
 import org.elasticsearch.cluster.metadata.IndexMetadata;
+import org.elasticsearch.cluster.metadata.IndexShardCount;
 import org.elasticsearch.cluster.metadata.Metadata;
 import org.elasticsearch.cluster.metadata.ProjectId;
 import org.elasticsearch.cluster.metadata.ProjectMetadata;
@@ -240,6 +241,12 @@ private class ShutdownLogger {
      */
     public static final String METADATA_PREFIX = "meta-";
 
+    /**
+     * Name prefix for the blobs that hold the global {@link IndexShardCount} for each index
+     * @see #SHARD_COUNT_NAME_FORMAT
+     */
+    public static final String SHARD_COUNT_PREFIX = "shard-count-";
+
     /**
      * Name prefix for the blobs that hold top-level {@link SnapshotInfo} and shard-level {@link BlobStoreIndexShardSnapshot} metadata.
      * @see #SNAPSHOT_NAME_FORMAT
@@ -260,6 +267,12 @@ private class ShutdownLogger {
      */
     public static final String METADATA_NAME_FORMAT = METADATA_PREFIX + "%s" + METADATA_BLOB_NAME_SUFFIX;
 
+    /**
+     * Blob name format for global {@link IndexShardCount} blobs.
+     * @see #INDEX_SHARD_COUNT_FORMAT
+     */
+    public static final String SHARD_COUNT_NAME_FORMAT = SHARD_COUNT_PREFIX + "%s" + METADATA_BLOB_NAME_SUFFIX;
+
     /**
      * Blob name format for top-level {@link SnapshotInfo} and shard-level {@link BlobStoreIndexShardSnapshot} blobs.
      * @see #SNAPSHOT_FORMAT
@@ -398,6 +411,13 @@ public static String getRepositoryDataBlobName(long repositoryGeneration) {
         Function.identity()
     );
 
+    public static final ChecksumBlobStoreFormat<IndexShardCount> INDEX_SHARD_COUNT_FORMAT = new ChecksumBlobStoreFormat<>(
+        "index-shard-count",
+        SHARD_COUNT_NAME_FORMAT,
+        (repoName, parser) -> IndexShardCount.fromXContent(parser),
+        Function.identity()
+    );
+
     private static final String SNAPSHOT_CODEC = "snapshot";
 
     public static final ChecksumBlobStoreFormat<SnapshotInfo> SNAPSHOT_FORMAT = new ChecksumBlobStoreFormat<>(
@@ -1327,13 +1347,14 @@ private void determineShardCount(ActionListener<Void> listener) {
             private void getOneShardCount(String indexMetaGeneration) {
                 try {
                     updateShardCount(
-                        INDEX_METADATA_FORMAT.read(getProjectRepo(), indexContainer, indexMetaGeneration, namedXContentRegistry)
-                            .getNumberOfShards()
+                        INDEX_SHARD_COUNT_FORMAT.read(getProjectRepo(), indexContainer, indexMetaGeneration, namedXContentRegistry)
+                            .getCount()
                     );
                 } catch (Exception ex) {
-                    logger.warn(() -> format("[%s] [%s] failed to read metadata for index", indexMetaGeneration, indexId.getName()), ex);
+                    logger.warn(() -> format("[%s] [%s] failed to read shard count for index", indexMetaGeneration, indexId.getName()), ex);
                     // Definitely indicates something fairly badly wrong with the repo, but not immediately fatal here: we might get the
-                    // shard count from another metadata blob, or we might just not process these shards. If we skip these shards then the
+                    // shard count from a subsequent indexMetaGeneration, or we might just not process these shards. If we skip these shards
+                    // then the
                     // repository will technically enter an invalid state (these shards' index-XXX blobs will refer to snapshots that no
                     // longer exist) and may contain dangling blobs too. A subsequent delete that hits this index may repair the state if
                     // the metadata read error is transient, but if not then the stale indices cleanup will eventually remove this index
@@ -1904,21 +1925,25 @@ record RootBlobUpdateResult(RepositoryData oldRepositoryData, RepositoryData new
                         }
                     }));
 
-                    // Write the index metadata for each index in the snapshot
+                    // Write the index metadata and the shard count to memory for each index in the snapshot, so that it persists
+                    // even if the repository is deleted
                     for (IndexId index : indices) {
                         executor.execute(ActionRunnable.run(allMetaListeners.acquire(), () -> {
                             final IndexMetadata indexMetaData = projectMetadata.index(index.getName());
+                            final IndexShardCount indexShardCount = new IndexShardCount(indexMetaData.getNumberOfShards());
                             if (writeIndexGens) {
                                 final String identifiers = IndexMetaDataGenerations.buildUniqueIdentifier(indexMetaData);
                                 String metaUUID = existingRepositoryData.indexMetaDataGenerations().getIndexMetaBlobId(identifiers);
                                 if (metaUUID == null) {
                                     // We don't yet have this version of the metadata so we write it
                                     metaUUID = UUIDs.base64UUID();
+                                    INDEX_SHARD_COUNT_FORMAT.write(indexShardCount, indexContainer(index), metaUUID, compress);
                                     INDEX_METADATA_FORMAT.write(indexMetaData, indexContainer(index), metaUUID, compress);
                                     metadataWriteResult.indexMetaIdentifiers().put(identifiers, metaUUID);
                                 } // else this task was largely a no-op - TODO no need to fork in that case
                                 metadataWriteResult.indexMetas().put(index, identifiers);
                             } else {
+                                INDEX_SHARD_COUNT_FORMAT.write(indexShardCount, indexContainer(index), snapshotId.getUUID(), compress);
                                 INDEX_METADATA_FORMAT.write(
                                     clusterMetadata.getProject(getProjectId()).index(index.getName()),
                                     indexContainer(index),

diff --git a/...va/org/elasticsearch/repositories/blobstore/BlobStoreRepositoryDeleteThrottlingTests.java b/...va/org/elasticsearch/repositories/blobstore/BlobStoreRepositoryDeleteThrottlingTests.java
@@ -133,7 +133,9 @@ protected BlobContainer wrapChild(BlobContainer child) {
         @Override
         public InputStream readBlob(OperationPurpose purpose, String blobName) throws IOException {
             final var pathParts = path().parts();
-            if (pathParts.size() == 2 && pathParts.get(0).equals("indices") && blobName.startsWith(BlobStoreRepository.METADATA_PREFIX)) {
+            if (pathParts.size() == 2
+                && pathParts.get(0).equals("indices")
+                && blobName.startsWith(BlobStoreRepository.SHARD_COUNT_PREFIX)) {
                 // reading index metadata, so mark index as active
                 assertTrue(activeIndices.add(pathParts.get(1)));
                 assertThat(activeIndices.size(), lessThanOrEqualTo(MAX_SNAPSHOT_THREADS));

diff --git a/.../framework/src/main/java/org/elasticsearch/repositories/blobstore/RepositoryFileType.java b/.../framework/src/main/java/org/elasticsearch/repositories/blobstore/RepositoryFileType.java
@@ -25,6 +25,7 @@ public enum RepositoryFileType {
     SNAPSHOT_INFO("snap-UUID.dat"),
     GLOBAL_METADATA("meta-UUID.dat"),
     INDEX_METADATA("indices/UUID/meta-SHORTUUID.dat"),
+    INDEX_SHARD_COUNT("indices/UUID/shard-count-SHORTUUID.dat"),
     SHARD_GENERATION("indices/UUID/NUM/index-UUID"),
     SHARD_SNAPSHOT_INFO("indices/UUID/NUM/snap-UUID.dat"),
     SHARD_DATA("indices/UUID/NUM/__UUID"),