@@ -323,7 +323,55 @@ final class RemoteRewoundActionSynchronizer implements RewoundActionSynchronizer
323323 // The values of this cache are weakly referenced to ensure that locks are cleaned up when they
324324 // are no longer needed.
325325 @ Nullable
326- private volatile LoadingCache <ActionLookupData , WeakSafeReadWriteLock > fineLocks = null ;
326+ private volatile LoadingCache <ActionLookupData , WeakSafeReentrantReadWriteLock > fineLocks ;
327+
328+ /*
329+ Proof of deadlock freedom:
330+
331+ As long as the coarse lock is used, there can't be any deadlock because there is only a single
332+ read-write lock.
333+
334+ Now assume that there is a deadlock while the fine locks are used. Consider the directed
335+ labeled "wait-for" graph defined as follows:
336+
337+ * Nodes are given by the currently active Skyframe action execution threads, each of which is
338+ identified with the action it is (or will be) executing. Actions are in one-to-one
339+ correspondence with the ActionLookupData that is used as the key in the fine locks map.
340+ * For each pair of actions A_1 and A_2, there is an edge from A_1 to B_2 labeled with XY(A_3)
341+ if A_1 is waiting for the X lock of A_3 and B currently holds the Y lock of A_3, where X and Y
342+ are either R (for read) or W (for write). The resulting graph may have parallel edges with
343+ distinct labels.
344+
345+ Let C be any directed cycle in the graph representing a deadlock, let A_1 -[XY(A_3)]-> A_2 be an
346+ edge in C and consider the following cases for the pair XY:
347+
348+ * RR: Since a read-write lock whose read lock is held by at least one thread doesn't
349+ block any other thread from acquiring its read lock, this case doesn't occur.
350+ * WW: The write lock of A_3 is only ever (attempted to be) acquired by A_3 itself when it is
351+ rewound, which means that the edge would necessarily be of the shape A_3 -[WW(A_3)]-> A_3.
352+ But this isn't possible since the read-write locks are reentrant.
353+ * WR: In this case, A_1 attempts to acquire a write lock, which only happens when A_1 is a
354+ rewound action about to prepare for its (re-)execution. This means that the edge is
355+ necessarily of the shape A_1 -[WR(A_1)]-> A_2. While a rewound action is waiting for its
356+ own write lock in enterActionPeparation, it doesn't hold any locks since
357+ enterActionExecution hasn't been called yet in SkyframeActionExecutor and all past
358+ executions of the action have released all their locks due to use of try-with-resources.
359+ This means that A_1 can't have any incoming edges in the wait-for graph, which is a
360+ contradiction to the assumption that is contained in the directed cycle C.
361+
362+ We conclude that XY = RW. Since the write lock of A_3 is only ever acquired by A_3 itself, all
363+ edges in C are of the form A_1 -[RW(A_2)]-> A_2. But by construction of inputKeysFor, the
364+ action A_1 is attempting to acquire the read locks of all its inputs' generating actions, and
365+ thus the action A_1 depends on one of the outputs of A_2 (*).
366+
367+ Applied to all edges of C, we conclude that there is a corresponding directed cycle in the
368+ action graph, which is a contradiction since Bazel disallows dependency cycles.
369+
370+ Notes:
371+ * The proof would not go through at (*) if fineLocks was replaced by a Striped lock structure
372+ with a fixed number of locks. In fact, this gives rise to a deadlock if the number of stripes
373+ is at least 2, but low enough that distinct generating actions hash to the same stripe.
374+ */
327375
328376 @ Override
329377 public SilentCloseable enterActionPreparation (Action action , boolean wasRewound )
@@ -360,7 +408,7 @@ private SilentCloseable enterActionPreparationInternal(Action action)
360408 Caffeine .newBuilder ()
361409 .weakValues ()
362410 // TODO: Investigate whether fair locks would be beneficial.
363- .build ((ActionLookupData unused ) -> new WeakSafeReadWriteLock ());
411+ .build ((ActionLookupData unused ) -> new WeakSafeReentrantReadWriteLock ());
364412 // Lock the corresponding fine lock and publish the fine locks before releasing the coarse
365413 // lock.
366414 writeLock = localFineLocks .get (outputKeyFor (action )).writeLock ();
@@ -462,7 +510,7 @@ private static ActionLookupData outputKeyFor(Action action) {
462510 * Otherwise, a reference to just the read lock or just the write lock would not suffice to
463511 * ensure the {@code ReadWriteLock} is retained.
464512 */
465- private static final class WeakSafeReadWriteLock extends ReentrantReadWriteLock {
513+ private static final class WeakSafeReentrantReadWriteLock extends ReentrantReadWriteLock {
466514 @ Override
467515 public WeakSafeReadLock readLock () {
468516 return new WeakSafeReadLock (this );
@@ -479,9 +527,9 @@ public WeakSafeWriteLock writeLock() {
479527 */
480528 private static final class WeakSafeReadLock extends ReentrantReadWriteLock .ReadLock {
481529 @ SuppressWarnings ({"unused" , "FieldCanBeLocal" })
482- private final WeakSafeReadWriteLock strongReference ;
530+ private final WeakSafeReentrantReadWriteLock strongReference ;
483531
484- WeakSafeReadLock (WeakSafeReadWriteLock readWriteLock ) {
532+ WeakSafeReadLock (WeakSafeReentrantReadWriteLock readWriteLock ) {
485533 super (readWriteLock );
486534 this .strongReference = readWriteLock ;
487535 }
@@ -497,9 +545,9 @@ public Condition newCondition() {
497545 */
498546 private static final class WeakSafeWriteLock extends ReentrantReadWriteLock .WriteLock {
499547 @ SuppressWarnings ({"unused" , "FieldCanBeLocal" })
500- private final WeakSafeReadWriteLock strongReference ;
548+ private final WeakSafeReentrantReadWriteLock strongReference ;
501549
502- WeakSafeWriteLock (WeakSafeReadWriteLock readWriteLock ) {
550+ WeakSafeWriteLock (WeakSafeReentrantReadWriteLock readWriteLock ) {
503551 super (readWriteLock );
504552 this .strongReference = readWriteLock ;
505553 }
0 commit comments