Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/132536.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 132536
summary: Avoid stack overflow in `IndicesClusterStateService` `applyClusterState`
area: Cluster Coordination
type: bug
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,7 @@ protected void doClose() {}
* other lenience to handle that. It'd be better to wait for the shard locks to be released and then delete the data. See #74149.
*/
private volatile SubscribableListener<Void> lastClusterStateShardsClosedListener = SubscribableListener.nullSuccess();
private int shardsClosedListenerChainLength = 0;

@Nullable // if not currently applying a cluster state
private RefCountingListener currentClusterStateShardsClosedListeners;
Expand Down Expand Up @@ -274,8 +275,26 @@ public synchronized void applyClusterState(final ClusterChangedEvent event) {
lastClusterStateShardsClosedListener = new SubscribableListener<>();
currentClusterStateShardsClosedListeners = new RefCountingListener(lastClusterStateShardsClosedListener);
try {
previousShardsClosedListener.addListener(currentClusterStateShardsClosedListeners.acquire());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm are you sure we should move all this listener stuff below doApplyClusterState()?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can't think of any impact to execution.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But I've put it back at the original place.

doApplyClusterState(event);
// HACK: chain listeners but avoid too deep of a stack
{
if (previousShardsClosedListener.isDone()) {
shardsClosedListenerChainLength = 0;
}
previousShardsClosedListener.addListener(
currentClusterStateShardsClosedListeners.acquire(),
// Sometimes fork the listener on a different thread.
// Chaining too many listeners might trigger a stackoverflow exception on the thread that eventually gets to
// execute them all (because the last thread that decreases the ref count to 0 of a {@link RefCountingListener}
// also executes its listeners, which in turn might decrease the ref count to 0 of another
// {@link RefCountingListerner}, again executing its listeners, etc...).
shardsClosedListenerChainLength++ < 10 ? EsExecutors.DIRECT_EXECUTOR_SERVICE : threadPool.generic(),
null
);
if (shardsClosedListenerChainLength >= 10) {
shardsClosedListenerChainLength = 0;
}
}
} finally {
currentClusterStateShardsClosedListeners.close();
currentClusterStateShardsClosedListeners = null;
Expand Down