Skip to content

Commit 9df5481

Browse files
committed
Accord: Update CommandsForRanges.Manager interval tree asynchronously
patch by Benedict; reviewed by David Capwell for CASSANDRA-20764
1 parent 37b6ade commit 9df5481

File tree

5 files changed

+161
-16
lines changed

5 files changed

+161
-16
lines changed

src/java/org/apache/cassandra/service/accord/AccordCommandStore.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,6 @@ public ExclusiveCaches(Lock lock, AccordCache global, AccordCache.Type<TxnId, Co
116116
this.lock = lock;
117117
}
118118

119-
120119
@Override
121120
public AccordSafeCommand acquireIfLoaded(TxnId txnId)
122121
{

src/java/org/apache/cassandra/service/accord/AccordExecutor.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -543,6 +543,11 @@ private void submitExclusive(Plain task)
543543
waitingToRun(task, task.executor());
544544
}
545545

546+
void submitExclusive(Runnable run)
547+
{
548+
submitExclusive(new PlainRunnable(run));
549+
}
550+
546551
void submitExclusive(AccordTask<?> task)
547552
{
548553
++tasks;
@@ -1246,6 +1251,11 @@ class PlainRunnable extends Plain implements Cancellable
12461251
final Runnable run;
12471252
final @Nullable SequentialExecutor executor;
12481253

1254+
PlainRunnable(Runnable run)
1255+
{
1256+
this(null, run, null);
1257+
}
1258+
12491259
PlainRunnable(AsyncPromise<Void> result, Runnable run, @Nullable SequentialExecutor executor)
12501260
{
12511261
this.result = result;

src/java/org/apache/cassandra/service/accord/CommandsForRanges.java

Lines changed: 133 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,18 @@
1818

1919
package org.apache.cassandra.service.accord;
2020

21+
import java.util.ArrayList;
2122
import java.util.Comparator;
23+
import java.util.HashMap;
2224
import java.util.Iterator;
25+
import java.util.List;
2326
import java.util.Map;
2427
import java.util.NavigableMap;
2528
import java.util.TreeMap;
2629
import java.util.concurrent.atomic.AtomicReference;
30+
import java.util.concurrent.atomic.AtomicReferenceFieldUpdater;
31+
import java.util.concurrent.locks.Lock;
32+
import java.util.concurrent.locks.ReentrantLock;
2733
import java.util.function.BiFunction;
2834
import java.util.function.Consumer;
2935
import java.util.function.UnaryOperator;
@@ -54,7 +60,9 @@
5460
import accord.utils.UnhandledEnum;
5561
import org.agrona.collections.Object2ObjectHashMap;
5662
import org.apache.cassandra.service.accord.api.TokenKey;
63+
import org.apache.cassandra.utils.btree.BTreeSet;
5764
import org.apache.cassandra.utils.btree.IntervalBTree;
65+
import org.apache.cassandra.utils.concurrent.IntrusiveStack;
5866

5967
import static accord.local.CommandSummaries.SummaryStatus.NOT_DIRECTLY_WITNESSED;
6068
import static org.apache.cassandra.utils.btree.IntervalBTree.InclusiveEndHelper.endWithStart;
@@ -122,15 +130,54 @@ public NavigableMap<Timestamp, CommandSummaries.Summary> byTxnId()
122130
return this;
123131
}
124132

125-
public static class Manager implements AccordCache.Listener<TxnId, Command>
133+
public static class Manager implements AccordCache.Listener<TxnId, Command>, Runnable
126134
{
135+
static class IntervalTreeEdit extends IntrusiveStack<IntervalTreeEdit>
136+
{
137+
final TxnId txnId;
138+
final @Nullable Object[] update, remove;
139+
140+
IntervalTreeEdit(TxnId txnId, Object[] update, Object[] remove)
141+
{
142+
this.txnId = txnId;
143+
this.update = update;
144+
this.remove = remove;
145+
}
146+
147+
public static boolean push(IntervalTreeEdit edit, Manager manager)
148+
{
149+
return null == IntrusiveStack.getAndPush(pendingEditsUpdater, manager, edit);
150+
}
151+
152+
public IntervalTreeEdit reverse()
153+
{
154+
return reverse(this);
155+
}
156+
157+
boolean isSize(int size)
158+
{
159+
return IntrusiveStack.isSize(size, this);
160+
}
161+
162+
IntervalTreeEdit merge(IntervalTreeEdit next)
163+
{
164+
Invariants.require(this.txnId.equals(next.txnId));
165+
Object[] remove = this.remove == null ? next.remove : next.remove == null ? this.remove : IntervalBTree.update(this.remove, next.remove, COMPARATORS);
166+
return new IntervalTreeEdit(txnId, next.update, remove);
167+
}
168+
}
169+
127170
private final AccordCommandStore commandStore;
128171
private final RangeSearcher searcher;
129172
private final AtomicReference<NavigableMap<TxnId, Ranges>> transitive = new AtomicReference<>(new TreeMap<>());
130173
// TODO (desired): manage memory consumed by this auxillary information
131174
private final Object2ObjectHashMap<TxnId, RangeRoute> cachedRangeTxnsById = new Object2ObjectHashMap<>();
132175
private Object[] cachedRangeTxnsByRange = IntervalBTree.empty();
133176

177+
private volatile IntervalTreeEdit pendingEdits;
178+
private final Lock drainPendingEditsLock = new ReentrantLock();
179+
private static final AtomicReferenceFieldUpdater<Manager, IntervalTreeEdit> pendingEditsUpdater = AtomicReferenceFieldUpdater.newUpdater(Manager.class, IntervalTreeEdit.class, "pendingEdits");
180+
134181
public Manager(AccordCommandStore commandStore)
135182
{
136183
this.commandStore = commandStore;
@@ -155,16 +202,93 @@ public void onUpdate(AccordCacheEntry<TxnId, Command> state)
155202
{
156203
RangeRoute cur = cachedRangeTxnsById.put(cmd.txnId(), upd);
157204
if (!upd.equals(cur))
158-
{
159-
if (cur != null)
160-
remove(txnId, cur);
161-
cachedRangeTxnsByRange = IntervalBTree.update(cachedRangeTxnsByRange, toMap(txnId, upd), COMPARATORS);
162-
}
205+
pushEdit(new IntervalTreeEdit(txnId, toMap(txnId, upd), cur == null ? null : toMap(txnId, cur)));
163206
}
164207
}
165208
}
166209
}
167210

211+
private void pushEdit(IntervalTreeEdit edit)
212+
{
213+
if (IntervalTreeEdit.push(edit, this))
214+
commandStore.executor().submitExclusive(this);
215+
}
216+
217+
@Override
218+
public void run()
219+
{
220+
if (drainPendingEditsLock.tryLock())
221+
{
222+
try
223+
{
224+
drainPendingEditsInternal();
225+
}
226+
finally
227+
{
228+
drainPendingEditsLock.unlock();
229+
postUnlock();
230+
}
231+
}
232+
}
233+
234+
Object[] cachedRangeTxnsByRange()
235+
{
236+
drainPendingEditsLock.lock();
237+
try
238+
{
239+
drainPendingEditsInternal();
240+
return cachedRangeTxnsByRange;
241+
}
242+
finally
243+
{
244+
drainPendingEditsLock.unlock();
245+
postUnlock();
246+
}
247+
}
248+
249+
void drainPendingEditsInternal()
250+
{
251+
IntervalTreeEdit edits = pendingEditsUpdater.getAndSet(this, null);
252+
if (edits == null)
253+
return;
254+
255+
if (edits.isSize(1))
256+
{
257+
if (edits.remove != null) cachedRangeTxnsByRange = IntervalBTree.subtract(cachedRangeTxnsByRange, edits.remove, COMPARATORS);
258+
if (edits.update != null) cachedRangeTxnsByRange = IntervalBTree.update(cachedRangeTxnsByRange, edits.update, COMPARATORS);
259+
return;
260+
}
261+
262+
edits = edits.reverse();
263+
Map<TxnId, IntervalTreeEdit> editMap = new HashMap<>();
264+
for (IntervalTreeEdit edit : edits)
265+
editMap.merge(edit.txnId, edit, IntervalTreeEdit::merge);
266+
267+
List<TxnIdInterval> update = new ArrayList<>(), remove = new ArrayList<>();
268+
for (IntervalTreeEdit edit : editMap.values())
269+
{
270+
if (edit.update != null) update.addAll(BTreeSet.wrap(edit.update, COMPARATORS.totalOrder()));
271+
if (edit.remove != null) remove.addAll(BTreeSet.wrap(edit.remove, COMPARATORS.totalOrder()));
272+
}
273+
274+
if (!remove.isEmpty())
275+
{
276+
remove.sort(COMPARATORS.totalOrder());
277+
cachedRangeTxnsByRange = IntervalBTree.subtract(cachedRangeTxnsByRange, IntervalBTree.build(remove, COMPARATORS), COMPARATORS);
278+
}
279+
if (!update.isEmpty())
280+
{
281+
update.sort(COMPARATORS.totalOrder());
282+
cachedRangeTxnsByRange = IntervalBTree.update(cachedRangeTxnsByRange, IntervalBTree.build(update, COMPARATORS), COMPARATORS);
283+
}
284+
}
285+
286+
private void postUnlock()
287+
{
288+
if (pendingEdits != null)
289+
commandStore.executor().submit(this);
290+
}
291+
168292
@Override
169293
public void onEvict(AccordCacheEntry<TxnId, Command> state)
170294
{
@@ -173,15 +297,10 @@ public void onEvict(AccordCacheEntry<TxnId, Command> state)
173297
{
174298
RangeRoute cur = cachedRangeTxnsById.remove(txnId);
175299
if (cur != null)
176-
remove(txnId, cur);
300+
pushEdit(new IntervalTreeEdit(txnId, null, toMap(txnId, cur)));
177301
}
178302
}
179303

180-
private void remove(TxnId txnId, RangeRoute route)
181-
{
182-
cachedRangeTxnsByRange = IntervalBTree.subtract(cachedRangeTxnsByRange, toMap(txnId, route), COMPARATORS);
183-
}
184-
185304
static Object[] toMap(TxnId txnId, RangeRoute route)
186305
{
187306
int size = route.size();
@@ -199,7 +318,6 @@ static Object[] toMap(TxnId txnId, RangeRoute route)
199318
}
200319
}
201320
}
202-
203321
}
204322

205323
public CommandsForRanges.Loader loader(@Nullable TxnId primaryTxnId, KeyHistory keyHistory, Unseekables<?> keysOrRanges)
@@ -331,7 +449,7 @@ public void forEachInCache(Unseekables<?> keysOrRanges, Consumer<Summary> forEac
331449
{
332450
for (RoutingKey key : (AbstractUnseekableKeys)keysOrRanges)
333451
{
334-
IntervalBTree.accumulate(manager.cachedRangeTxnsByRange, KEY_COMPARATORS, key, (f, s, i, c) -> {
452+
IntervalBTree.accumulate(manager.cachedRangeTxnsByRange(), KEY_COMPARATORS, key, (f, s, i, c) -> {
335453
TxnIdInterval interval = (TxnIdInterval)i;
336454
if (isRelevant(interval))
337455
{
@@ -349,7 +467,7 @@ public void forEachInCache(Unseekables<?> keysOrRanges, Consumer<Summary> forEac
349467
{
350468
for (Range range : (AbstractRanges)keysOrRanges)
351469
{
352-
IntervalBTree.accumulate(manager.cachedRangeTxnsByRange, COMPARATORS, new TxnIdInterval(range.start(), range.end(), TxnId.NONE), (f, s, i, c) -> {
470+
IntervalBTree.accumulate(manager.cachedRangeTxnsByRange(), COMPARATORS, new TxnIdInterval(range.start(), range.end(), TxnId.NONE), (f, s, i, c) -> {
353471
if (isRelevant(i))
354472
{
355473
TxnId txnId = i.txnId;

src/java/org/apache/cassandra/utils/btree/IntervalBTree.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
import java.util.ArrayList;
2222
import java.util.Arrays;
23+
import java.util.Collection;
2324
import java.util.Comparator;
2425
import java.util.List;
2526

@@ -595,6 +596,16 @@ public static <Compare, Existing extends Compare, Insert extends Compare> Object
595596
}
596597
}
597598

599+
public static <V> Object[] build(Collection<V> build, IntervalComparators<V> comparators)
600+
{
601+
try (FastIntervalTreeBuilder<V> builder = IntervalBTree.fastBuilder(comparators))
602+
{
603+
for (V v : build)
604+
builder.add(v);
605+
return builder.build();
606+
}
607+
}
608+
598609
/**
599610
* Build a tree of unknown size, in order.
600611
*/

src/java/org/apache/cassandra/utils/concurrent/IntrusiveStack.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,13 @@ protected static int size(IntrusiveStack<?> list)
163163
return size;
164164
}
165165

166+
protected static boolean isSize(int size, IntrusiveStack<?> list)
167+
{
168+
while (list != null && --size >= 0)
169+
list = list.next;
170+
return list == null && size == 0;
171+
}
172+
166173
protected static <T extends IntrusiveStack<T>> long accumulate(T list, LongAccumulator<T> accumulator, long initialValue)
167174
{
168175
long value = initialValue;

0 commit comments

Comments
 (0)