Skip to content

Commit ff6465b

Browse files
authored
Avoid hoarding cluster state references during rollover (#124107)
By keeping a list of all the rollover results in a rollover request batch, we were keeping references to all the intermediate cluster states that we built. We've seen this list take up ~1.4GB with 600 rollover requests in one batch. We only kept the list of results to compute the "reason" for the allocation reroute, so we can easily drop the cluster state reference from the list and only keep what we need. Fixes #123893
1 parent a1ee3c9 commit ff6465b

File tree

3 files changed

+14
-24
lines changed

3 files changed

+14
-24
lines changed

docs/changelog/124107.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 124107
2+
summary: Avoid hoarding cluster state references during rollover
3+
area: Indices APIs
4+
type: bug
5+
issues:
6+
- 123893

server/src/main/java/org/elasticsearch/action/admin/indices/rollover/LazyRolloverAction.java

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@
3636
import org.elasticsearch.cluster.service.MasterServiceTaskQueue;
3737
import org.elasticsearch.common.Priority;
3838
import org.elasticsearch.common.Strings;
39-
import org.elasticsearch.common.collect.Iterators;
4039
import org.elasticsearch.injection.guice.Inject;
4140
import org.elasticsearch.tasks.CancellableTask;
4241
import org.elasticsearch.tasks.Task;
@@ -189,7 +188,7 @@ record LazyRolloverExecutor(
189188
@Override
190189
public ClusterState execute(BatchExecutionContext<LazyRolloverTask> batchExecutionContext) {
191190
final var listener = new AllocationActionMultiListener<RolloverResponse>(threadPool.getThreadContext());
192-
final var results = new ArrayList<MetadataRolloverService.RolloverResult>(batchExecutionContext.taskContexts().size());
191+
final var results = new ArrayList<String>(batchExecutionContext.taskContexts().size());
193192
var state = batchExecutionContext.initialState();
194193
Map<ProjectId, Map<RolloverRequest, List<TaskContext<LazyRolloverTask>>>> groupedRequests = new HashMap<>();
195194
for (final var taskContext : batchExecutionContext.taskContexts()) {
@@ -214,14 +213,7 @@ public ClusterState execute(BatchExecutionContext<LazyRolloverTask> batchExecuti
214213

215214
if (state != batchExecutionContext.initialState()) {
216215
var reason = new StringBuilder();
217-
Strings.collectionToDelimitedStringWithLimit(
218-
(Iterable<String>) () -> Iterators.map(results.iterator(), t -> t.sourceIndexName() + "->" + t.rolloverIndexName()),
219-
",",
220-
"lazy bulk rollover [",
221-
"]",
222-
1024,
223-
reason
224-
);
216+
Strings.collectionToDelimitedStringWithLimit(results, ",", "lazy bulk rollover [", "]", 1024, reason);
225217
try (var ignored = batchExecutionContext.dropHeadersContext()) {
226218
state = allocationService.reroute(state, reason.toString(), listener.reroute());
227219
}
@@ -234,7 +226,7 @@ public ClusterState execute(BatchExecutionContext<LazyRolloverTask> batchExecuti
234226
public ClusterState executeTask(
235227
ProjectState currentState,
236228
RolloverRequest rolloverRequest,
237-
List<MetadataRolloverService.RolloverResult> results,
229+
ArrayList<String> results,
238230
List<TaskContext<LazyRolloverTask>> rolloverTaskContexts,
239231
AllocationActionMultiListener<RolloverResponse> allocationActionMultiListener
240232
) throws Exception {
@@ -271,7 +263,7 @@ public ClusterState executeTask(
271263
null,
272264
isFailureStoreRollover
273265
);
274-
results.add(rolloverResult);
266+
results.add(rolloverResult.sourceIndexName() + "->" + rolloverResult.rolloverIndexName());
275267
logger.trace("lazy rollover result [{}]", rolloverResult);
276268

277269
final var rolloverIndexName = rolloverResult.rolloverIndexName();

server/src/main/java/org/elasticsearch/action/admin/indices/rollover/TransportRolloverAction.java

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@
4949
import org.elasticsearch.cluster.service.MasterServiceTaskQueue;
5050
import org.elasticsearch.common.Priority;
5151
import org.elasticsearch.common.Strings;
52-
import org.elasticsearch.common.collect.Iterators;
5352
import org.elasticsearch.common.unit.ByteSizeValue;
5453
import org.elasticsearch.common.util.concurrent.EsExecutors;
5554
import org.elasticsearch.core.Nullable;
@@ -494,7 +493,7 @@ record RolloverExecutor(
494493
@Override
495494
public ClusterState execute(BatchExecutionContext<RolloverTask> batchExecutionContext) {
496495
final var listener = new AllocationActionMultiListener<RolloverResponse>(threadPool.getThreadContext());
497-
final var results = new ArrayList<MetadataRolloverService.RolloverResult>(batchExecutionContext.taskContexts().size());
496+
final var results = new ArrayList<String>(batchExecutionContext.taskContexts().size());
498497
var state = batchExecutionContext.initialState();
499498
for (final var taskContext : batchExecutionContext.taskContexts()) {
500499
try (var ignored = taskContext.captureResponseHeaders()) {
@@ -506,14 +505,7 @@ public ClusterState execute(BatchExecutionContext<RolloverTask> batchExecutionCo
506505

507506
if (state != batchExecutionContext.initialState()) {
508507
var reason = new StringBuilder();
509-
Strings.collectionToDelimitedStringWithLimit(
510-
(Iterable<String>) () -> Iterators.map(results.iterator(), t -> t.sourceIndexName() + "->" + t.rolloverIndexName()),
511-
",",
512-
"bulk rollover [",
513-
"]",
514-
1024,
515-
reason
516-
);
508+
Strings.collectionToDelimitedStringWithLimit(results, ",", "bulk rollover [", "]", 1024, reason);
517509
try (var ignored = batchExecutionContext.dropHeadersContext()) {
518510
state = allocationService.reroute(state, reason.toString(), listener.reroute());
519511
}
@@ -525,7 +517,7 @@ public ClusterState execute(BatchExecutionContext<RolloverTask> batchExecutionCo
525517

526518
public ClusterState executeTask(
527519
ClusterState currentState,
528-
List<MetadataRolloverService.RolloverResult> results,
520+
ArrayList<String> results,
529521
TaskContext<RolloverTask> rolloverTaskContext,
530522
AllocationActionMultiListener<RolloverResponse> allocationActionMultiListener
531523
) throws Exception {
@@ -596,7 +588,7 @@ public ClusterState executeTask(
596588
rolloverTask.autoShardingResult(),
597589
targetFailureStore
598590
);
599-
results.add(rolloverResult);
591+
results.add(rolloverResult.sourceIndexName() + "->" + rolloverResult.rolloverIndexName());
600592
logger.trace("rollover result [{}]", rolloverResult);
601593

602594
final var rolloverIndexName = rolloverResult.rolloverIndexName();

0 commit comments

Comments
 (0)