Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
ab05bdd
Start with test
HoustonPutman Jul 21, 2025
8328c78
Solve issue, but need to clean up. Need to fix for Restore as well
HoustonPutman Jul 21, 2025
d1d6bf5
Use new shardrequest constructs for SyncStrategy
HoustonPutman Jul 21, 2025
f5ee234
Response status should now be the number of replicas
HoustonPutman Jul 21, 2025
02d1e90
Cleanup unused parts of tests
HoustonPutman Jul 22, 2025
1be4ed0
Huge commit - restore uses installshard - big update in locking
HoustonPutman Jul 24, 2025
8365a23
Implement callingLock mirroring for distributed API Manager locking
HoustonPutman Aug 1, 2025
00a67b4
Huge commit - restore uses installshard - big update in locking
HoustonPutman Jul 24, 2025
2d5e12b
Implement callingLock mirroring for distributed API Manager locking
HoustonPutman Aug 1, 2025
37cea68
Merge remote-tracking branch 'apache/main' into locking-update
HoustonPutman Sep 2, 2025
293f35d
Merge remote-tracking branch 'apache/main' into locking-update
HoustonPutman Dec 2, 2025
242235a
Changelog
HoustonPutman Dec 2, 2025
4a2c562
Merge remote-tracking branch 'apache/main' into fix-restore-partial-s…
HoustonPutman Dec 2, 2025
b4c3b7b
WIP - testing
HoustonPutman Dec 4, 2025
72ae4bf
Merge remote-tracking branch 'apache/main' into solr-18011-locking-up…
HoustonPutman Jan 5, 2026
da3e6cd
Merge remote-tracking branch 'apache/main' into fix-restore-partial-s…
HoustonPutman Jan 5, 2026
43d1091
Removing leadership starts a recovery, don't issue a second recovery
HoustonPutman Jan 6, 2026
e037654
Handle async success/failure better for collectionHandlingUtils
HoustonPutman Jan 6, 2026
8323e72
Small fixes for installShardCmd
HoustonPutman Jan 6, 2026
e9c09d6
Add tests, fix recovery contention
HoustonPutman Jan 8, 2026
5e4c1be
Remove uneeded testing code
HoustonPutman Jan 8, 2026
25e9ecd
Tidy
HoustonPutman Jan 8, 2026
b0452f4
Support distributed locks better
HoustonPutman Jan 8, 2026
72401ca
Start switching over to AdminCmdContext
HoustonPutman Jan 8, 2026
6dac593
Refactor how locking is passed through collections api commands.
HoustonPutman Jan 9, 2026
e368c48
Make some bug fixes
HoustonPutman Jan 10, 2026
97352f7
Some test 'fixes', need to rethink v2 tests still
HoustonPutman Jan 10, 2026
6f2d61a
Add real tests for DeleteAlias and DeleteNode v2 apis
HoustonPutman Jan 12, 2026
74176a4
Tidy
HoustonPutman Jan 12, 2026
df0888c
Fix broken tests
HoustonPutman Jan 13, 2026
3627d29
Add missing test utility class
HoustonPutman Jan 13, 2026
4a49aa9
Move over remaining APIs and tests
HoustonPutman Jan 13, 2026
021303a
Merge remote-tracking branch 'apache/main' into solr-18011-locking-up…
HoustonPutman Jan 13, 2026
e61279c
Merge remote-tracking branch 'apache/main' into solr-18011-locking-up…
HoustonPutman Jan 17, 2026
4c9a766
Fix changelog entry
HoustonPutman Jan 17, 2026
c7cfe19
Remove files that shouldn't be changed.
HoustonPutman Jan 17, 2026
08326cc
One more that shouldn't be changed
HoustonPutman Jan 17, 2026
eaddd46
Merge branch 'solr-18011-locking-update' into fix-restore-partial-suc…
HoustonPutman Jan 17, 2026
b90c283
Some more fixes
HoustonPutman Jan 17, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions changelog/unreleased/solr-18011-locking-update.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# See https://github.com/apache/solr/blob/main/dev-docs/changelog.adoc
title: Allow locked Admin APIs to call other locked AdminAPIs without deadlocking
type: changed # added, changed, fixed, deprecated, removed, dependency_update, security, other
authors:
- name: Houston Putman
nick: HoustonPutman
links:
- name: SOLR-18011
url: https://issues.apache.org/jira/browse/SOLR-18011
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,9 @@ public class InstallShardDataRequestBody {

@JsonProperty public String repository;

@JsonProperty public String name;

@JsonProperty public String shardBackupId;

@JsonProperty public String async;
}
6 changes: 6 additions & 0 deletions solr/core/src/java/org/apache/solr/api/V2HttpCall.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
package org.apache.solr.api;

import static org.apache.solr.common.cloud.ZkStateReader.COLLECTION_PROP;
import static org.apache.solr.common.params.CollectionAdminParams.CALLING_LOCK_IDS_HEADER;
import static org.apache.solr.servlet.SolrDispatchFilter.Action.ADMIN;
import static org.apache.solr.servlet.SolrDispatchFilter.Action.ADMIN_OR_REMOTEPROXY;
import static org.apache.solr.servlet.SolrDispatchFilter.Action.PROCESS;
Expand Down Expand Up @@ -218,6 +219,11 @@ private void initAdminRequest(String path) throws Exception {
solrReq.getContext().put(CoreContainer.class.getName(), cores);
requestType = AuthorizationContext.RequestType.ADMIN;
action = ADMIN;

String callingLockIds = req.getHeader(CALLING_LOCK_IDS_HEADER);
if (callingLockIds != null && !callingLockIds.isBlank()) {
solrReq.getContext().put(CALLING_LOCK_IDS_HEADER, callingLockIds);
}
}

protected void parseRequest() throws Exception {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

package org.apache.solr.cloud;

import java.util.List;
import org.apache.solr.cloud.api.collections.CollectionApiLockFactory;
import org.apache.solr.common.params.CollectionParams;

Expand Down Expand Up @@ -62,5 +63,6 @@ DistributedLock createLock(
CollectionParams.LockLevel level,
String collName,
String shardId,
String replicaName);
String replicaName,
List<String> callingLockIds);
}
4 changes: 4 additions & 0 deletions solr/core/src/java/org/apache/solr/cloud/DistributedLock.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,8 @@ public interface DistributedLock {
void release();

boolean isAcquired();

String getLockId();

boolean isMirroringLock();
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import com.google.common.annotations.VisibleForTesting;
import java.lang.invoke.MethodHandles;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.solr.common.SolrException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand All @@ -46,7 +47,12 @@ public void waitUntilAcquired() {
for (DistributedLock lock : locks) {
log.debug("DistributedMultiLock.waitUntilAcquired. About to wait on lock {}", lock);
lock.waitUntilAcquired();
log.debug("DistributedMultiLock.waitUntilAcquired. Acquired lock {}", lock);
if (lock.isMirroringLock()) {
log.debug(
"DistributedMultiLock.waitUntilAcquired. Mirroring already-acquired lock {}", lock);
} else {
log.debug("DistributedMultiLock.waitUntilAcquired. Acquired lock {}", lock);
}
}
}

Expand All @@ -70,6 +76,10 @@ public boolean isAcquired() {
return true;
}

public String getLockId() {
return locks.stream().map(DistributedLock::getLockId).collect(Collectors.joining(","));
}

@VisibleForTesting
public int getCountInternalLocks() {
return locks.size();
Expand Down
87 changes: 79 additions & 8 deletions solr/core/src/java/org/apache/solr/cloud/LockTree.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import org.apache.solr.cloud.OverseerMessageHandler.Lock;
import org.apache.solr.common.params.CollectionParams;
import org.apache.solr.common.params.CollectionParams.LockLevel;
Expand All @@ -38,20 +39,35 @@ public class LockTree {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private final Node root = new Node(null, LockLevel.CLUSTER, null);

public final Map<String, Lock> allLocks = new HashMap<>();

private class LockImpl implements Lock {
final Node node;
final String id;

LockImpl(Node node) {
this.node = node;
this.id = UUID.randomUUID().toString();
}

@Override
public void unlock() {
synchronized (LockTree.this) {
node.unlock(this);
allLocks.remove(id);
}
}

@Override
public String id() {
return id;
}

@Override
public boolean validateSubpath(int lockLevel, List<String> path) {
return node.validateSubpath(lockLevel, path);
}

@Override
public String toString() {
return StrUtils.join(node.constructPath(new ArrayDeque<>()), '/');
Expand All @@ -71,12 +87,33 @@ public String toString() {
public class Session {
private SessionNode root = new SessionNode(LockLevel.CLUSTER);

public Lock lock(CollectionParams.CollectionAction action, List<String> path) {
public Lock lock(
CollectionParams.CollectionAction action, List<String> path, List<String> callingLockIds) {
if (action.lockLevel == LockLevel.NONE) return FREELOCK;
log.debug("Calling lock level: {}", callingLockIds);
Node startingNode = LockTree.this.root;
SessionNode startingSession = root;

// If a callingLockId was passed in, validate it with the current lock path, and only start
// locking below the calling lock
Lock callingLock = callingLockIds != null ? allLocks.get(callingLockIds.getLast()) : null;
boolean ignoreCallingLock = false;
if (callingLock != null && callingLock.validateSubpath(action.lockLevel.getHeight(), path)) {
startingNode = ((LockImpl) callingLock).node;
startingSession = startingSession.find(startingNode.level.getHeight(), path);
if (startingSession == null) {
startingSession = root;
}
ignoreCallingLock = true;
}
synchronized (LockTree.this) {
if (root.isBusy(action.lockLevel, path)) return null;
Lock lockObject = LockTree.this.root.lock(action.lockLevel, path);
if (lockObject == null) root.markBusy(action.lockLevel, path);
if (startingSession.isBusy(action.lockLevel, path)) return null;
Lock lockObject = startingNode.lock(action.lockLevel, path, ignoreCallingLock);
if (lockObject == null) {
startingSession.markBusy(action.lockLevel, path);
} else {
allLocks.put(lockObject.id(), lockObject);
}
return lockObject;
}
}
Expand Down Expand Up @@ -125,6 +162,18 @@ boolean isBusy(LockLevel lockLevel, List<String> path) {
return false;
}
}

SessionNode find(int lockLevel, List<String> path) {
if (level.getHeight() == lockLevel) {
return this;
} else if (level.getHeight() < lockLevel
&& kids != null
&& kids.containsKey(path.get(level.getHeight()))) {
return kids.get(path.get(level.getHeight())).find(lockLevel, path);
} else {
return null;
}
}
}

public Session getSession() {
Expand Down Expand Up @@ -158,8 +207,9 @@ void unlock(LockImpl lockObject) {
}
}

Lock lock(LockLevel lockLevel, List<String> path) {
if (myLock != null) return null; // I'm already locked. no need to go any further
Lock lock(LockLevel lockLevel, List<String> path, boolean ignoreCurrentLock) {
if (myLock != null && !ignoreCurrentLock)
return null; // I'm already locked. no need to go any further
if (lockLevel == level) {
// lock is supposed to be acquired at this level
// If I am locked or any of my children or grandchildren are locked
Expand All @@ -171,16 +221,37 @@ Lock lock(LockLevel lockLevel, List<String> path) {
Node child = children.get(childName);
if (child == null)
children.put(childName, child = new Node(childName, level.getChild(), this));
return child.lock(lockLevel, path);
return child.lock(lockLevel, path, false);
}
}

boolean validateSubpath(int lockLevel, List<String> path) {
return level.getHeight() < lockLevel
&& (level.getHeight() == 0 || name.equals(path.get(level.getHeight() - 1)))
&& (mom == null || mom.validateSubpath(lockLevel, path));
}

ArrayDeque<String> constructPath(ArrayDeque<String> collect) {
if (name != null) collect.addFirst(name);
if (mom != null) mom.constructPath(collect);
return collect;
}
}

static final Lock FREELOCK = () -> {};
static final String FREELOCK_ID = "-1";
static final Lock FREELOCK =
new Lock() {
@Override
public void unlock() {}

@Override
public String id() {
return FREELOCK_ID;
}

@Override
public boolean validateSubpath(int lockLevel, List<String> path) {
return false;
}
};
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

import java.lang.invoke.MethodHandles;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
Expand Down Expand Up @@ -61,7 +62,7 @@ public OverseerConfigSetMessageHandler(ZkStateReader zkStateReader, CoreContaine
}

@Override
public OverseerSolrResponse processMessage(ZkNodeProps message, String operation) {
public OverseerSolrResponse processMessage(ZkNodeProps message, String operation, Lock lock) {
NamedList<Object> results = new NamedList<>();
try {
if (!operation.startsWith(CONFIGSETS_ACTION_PREFIX)) {
Expand Down Expand Up @@ -117,7 +118,22 @@ public Lock lockTask(ZkNodeProps message, long ignored) {
String configSetName = getTaskKey(message);
if (canExecute(configSetName, message)) {
markExclusiveTask(configSetName, message);
return () -> unmarkExclusiveTask(configSetName, message);
return new Lock() {
@Override
public void unlock() {
unmarkExclusiveTask(configSetName, message);
}

@Override
public String id() {
return configSetName;
}

@Override
public boolean validateSubpath(int lockLevel, List<String> path) {
return false;
}
};
}
return null;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
*/
package org.apache.solr.cloud;

import java.util.List;
import org.apache.solr.common.cloud.ZkNodeProps;

/** Interface for processing messages received by an {@link OverseerTaskProcessor} */
Expand All @@ -26,7 +27,7 @@ public interface OverseerMessageHandler {
* @param operation the operation to process
* @return response
*/
OverseerSolrResponse processMessage(ZkNodeProps message, String operation);
OverseerSolrResponse processMessage(ZkNodeProps message, String operation, Lock lock);

/**
* @return the name of the OverseerMessageHandler
Expand All @@ -41,6 +42,10 @@ public interface OverseerMessageHandler {

interface Lock {
void unlock();

String id();

boolean validateSubpath(int lockLevel, List<String> path);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -562,7 +562,7 @@ public void run() {
if (log.isDebugEnabled()) {
log.debug("Runner processing {}", head.getId());
}
response = messageHandler.processMessage(message, operation);
response = messageHandler.processMessage(message, operation, lock);
} finally {
timerContext.stop();
updateStats(statsName);
Expand Down
Loading
Loading