Skip to content

Commit 362af8d

Browse files
authored
CNDB-13457: Fix problems with byte-comparable versions in SAI (#1655)
### What is the issue The original AA SAI format was writing to disk with different byte-comparable version depending on the version of the sstable files. This causes problems with the ByteComparable version checking that was recently introduced. ### What does this PR fix and why was it fixed - Introduces a new `ByteComparable.Preencoded` interface meant to store and surface the byte-comparable version something was encoded with. - Changes tries to always return `Preencoded` type, so that callers may easily query the version. - Changes SAI code to use the correct byte-comparable version when that is necessary, and to ignore or use Preencoded's version when it is not feasible or important.
1 parent b741795 commit 362af8d

35 files changed

+283
-173
lines changed

build.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1723,6 +1723,7 @@
17231723
<jvmarg value="-Dcassandra.ring_delay_ms=1000"/>
17241724
<jvmarg value="-Dcassandra.tolerate_sstable_size=true"/>
17251725
<jvmarg value="-Dcassandra.sai.latest.version=aa"/>
1726+
<jvmarg value="-Dcassandra.trie_index_format_version=bb"/>
17261727
<jvmarg value="-Dcassandra.skip_sync=true" />
17271728
<jvmarg value="-Dlogback.configurationFile=file://${test.logback.configurationFile}"/>
17281729
</testmacrohelper>

src/java/org/apache/cassandra/db/memtable/TrieMemtable.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -729,11 +729,11 @@ long currentOperations()
729729

730730
private DecoratedKey firstPartitionKey(Direction direction)
731731
{
732-
Iterator<Map.Entry<ByteComparable, PartitionData>> iter = data.filteredEntryIterator(direction, PartitionData.class);
732+
Iterator<Map.Entry<ByteComparable.Preencoded, PartitionData>> iter = data.filteredEntryIterator(direction, PartitionData.class);
733733
if (!iter.hasNext())
734734
return null;
735735

736-
Map.Entry<ByteComparable, PartitionData> entry = iter.next();
736+
Map.Entry<ByteComparable.Preencoded, PartitionData> entry = iter.next();
737737
return getPartitionKeyFromPath(metadata.get(), entry.getKey());
738738
}
739739

src/java/org/apache/cassandra/db/memtable/TrieMemtableStage1.java

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -404,7 +404,7 @@ private static MemtablePartition createPartition(TableMetadata metadata, EnsureO
404404
return new MemtablePartition(metadata, ensureOnHeap, key, data);
405405
}
406406

407-
private static MemtablePartition getPartitionFromTrieEntry(TableMetadata metadata, EnsureOnHeap ensureOnHeap, Map.Entry<ByteComparable, BTreePartitionData> en)
407+
private static MemtablePartition getPartitionFromTrieEntry(TableMetadata metadata, EnsureOnHeap ensureOnHeap, Map.Entry<? extends ByteComparable, BTreePartitionData> en)
408408
{
409409
DecoratedKey key = BufferDecoratedKey.fromByteComparable(en.getKey(),
410410
BYTE_COMPARABLE_VERSION,
@@ -423,11 +423,10 @@ public FlushCollection<MemtablePartition> getFlushSet(PartitionPosition from, Pa
423423
long keySize = 0;
424424
int keyCount = 0;
425425

426-
for (Iterator<Map.Entry<ByteComparable, BTreePartitionData>> it = toFlush.entryIterator(); it.hasNext(); )
426+
for (Iterator<Map.Entry<ByteComparable.Preencoded, BTreePartitionData>> it = toFlush.entryIterator(); it.hasNext(); )
427427
{
428-
Map.Entry<ByteComparable, BTreePartitionData> en = it.next();
429-
ByteComparable byteComparable = v -> en.getKey().asPeekableBytes(BYTE_COMPARABLE_VERSION);
430-
byte[] keyBytes = DecoratedKey.keyFromByteComparable(byteComparable, BYTE_COMPARABLE_VERSION, metadata().partitioner);
428+
Map.Entry<ByteComparable.Preencoded, BTreePartitionData> en = it.next();
429+
byte[] keyBytes = DecoratedKey.keyFromByteComparable(en.getKey(), BYTE_COMPARABLE_VERSION, metadata().partitioner);
431430
keySize += keyBytes.length;
432431
keyCount++;
433432
}
@@ -629,11 +628,11 @@ long currentOperations()
629628

630629
private DecoratedKey firstPartitionKey(Direction direction)
631630
{
632-
Iterator<Map.Entry<ByteComparable, BTreePartitionData>> iter = data.entryIterator(direction);
631+
Iterator<Map.Entry<ByteComparable.Preencoded, BTreePartitionData>> iter = data.entryIterator(direction);
633632
if (!iter.hasNext())
634633
return null;
635634

636-
Map.Entry<ByteComparable, BTreePartitionData> entry = iter.next();
635+
Map.Entry<ByteComparable.Preencoded, BTreePartitionData> entry = iter.next();
637636
return getPartitionKeyFromPath(metadata.get(), entry.getKey());
638637
}
639638

@@ -653,7 +652,7 @@ static class MemtableUnfilteredPartitionIterator extends AbstractUnfilteredParti
653652
private final TableMetadata metadata;
654653
private final EnsureOnHeap ensureOnHeap;
655654
private final Trie<BTreePartitionData> source;
656-
private final Iterator<Map.Entry<ByteComparable, BTreePartitionData>> iter;
655+
private final Iterator<Map.Entry<ByteComparable.Preencoded, BTreePartitionData>> iter;
657656
private final ColumnFilter columnFilter;
658657
private final DataRange dataRange;
659658

src/java/org/apache/cassandra/db/tries/InMemoryReadTrie.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1132,6 +1132,11 @@ public boolean isEmpty()
11321132
return isNull(root);
11331133
}
11341134

1135+
public ByteComparable.Version byteComparableVersion()
1136+
{
1137+
return byteComparableVersion;
1138+
}
1139+
11351140
/**
11361141
* Override of dump to provide more detailed printout that includes the type of each node in the trie.
11371142
* We do this via a wrapping cursor that returns a content string for the type of node for every node we return.

src/java/org/apache/cassandra/db/tries/Trie.java

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -375,15 +375,15 @@ public void forEachValue(ValueConsumer<T> consumer)
375375
/**
376376
* Call the given consumer on all (path, content) pairs with non-null content in the trie in order.
377377
*/
378-
public void forEachEntry(BiConsumer<ByteComparable, T> consumer)
378+
public void forEachEntry(BiConsumer<ByteComparable.Preencoded, T> consumer)
379379
{
380380
forEachEntry(Direction.FORWARD, consumer);
381381
}
382382

383383
/**
384384
* Call the given consumer on all (path, content) pairs with non-null content in the trie in order.
385385
*/
386-
public void forEachEntry(Direction direction, BiConsumer<ByteComparable, T> consumer)
386+
public void forEachEntry(Direction direction, BiConsumer<ByteComparable.Preencoded, T> consumer)
387387
{
388388
Cursor<T> cursor = cursor(direction);
389389
process(new TrieEntriesWalker.WithConsumer<T>(consumer, cursor.byteComparableVersion()), cursor);
@@ -427,7 +427,7 @@ public Void forEachValueSkippingBranches(Direction direction, ValueConsumer<T> c
427427
* Call the given consumer on all (path, content) pairs with non-null content in the trie in order, skipping all
428428
* branches below the top content-bearing node.
429429
*/
430-
public void forEachEntrySkippingBranches(Direction direction, BiConsumer<ByteComparable, T> consumer)
430+
public void forEachEntrySkippingBranches(Direction direction, BiConsumer<ByteComparable.Preencoded, T> consumer)
431431
{
432432
Cursor<T> cursor = cursor(direction);
433433
processSkippingBranches(new TrieEntriesWalker.WithConsumer<T>(consumer, cursor.byteComparableVersion()), cursor);
@@ -545,55 +545,55 @@ public Trie<T> subtrie(ByteComparable left, ByteComparable right)
545545
/**
546546
* Returns the ordered entry set of this trie's content as an iterable.
547547
*/
548-
public Iterable<Map.Entry<ByteComparable, T>> entrySet()
548+
public Iterable<Map.Entry<ByteComparable.Preencoded, T>> entrySet()
549549
{
550550
return this::entryIterator;
551551
}
552552

553553
/**
554554
* Returns the ordered entry set of this trie's content as an iterable.
555555
*/
556-
public Iterable<Map.Entry<ByteComparable, T>> entrySet(Direction direction)
556+
public Iterable<Map.Entry<ByteComparable.Preencoded, T>> entrySet(Direction direction)
557557
{
558558
return () -> entryIterator(direction);
559559
}
560560

561561
/**
562562
* Returns the ordered entry set of this trie's content in an iterator.
563563
*/
564-
public Iterator<Map.Entry<ByteComparable, T>> entryIterator()
564+
public Iterator<Map.Entry<ByteComparable.Preencoded, T>> entryIterator()
565565
{
566566
return entryIterator(Direction.FORWARD);
567567
}
568568

569569
/**
570570
* Returns the ordered entry set of this trie's content in an iterator.
571571
*/
572-
public Iterator<Map.Entry<ByteComparable, T>> entryIterator(Direction direction)
572+
public Iterator<Map.Entry<ByteComparable.Preencoded, T>> entryIterator(Direction direction)
573573
{
574574
return new TrieEntriesIterator.AsEntries<>(cursor(direction));
575575
}
576576

577577
/**
578578
* Returns the ordered entry set of this trie's content in an iterable, filtered by the given type.
579579
*/
580-
public <U extends T> Iterable<Map.Entry<ByteComparable, U>> filteredEntrySet(Class<U> clazz)
580+
public <U extends T> Iterable<Map.Entry<ByteComparable.Preencoded, U>> filteredEntrySet(Class<U> clazz)
581581
{
582582
return filteredEntrySet(Direction.FORWARD, clazz);
583583
}
584584

585585
/**
586586
* Returns the ordered entry set of this trie's content in an iterable, filtered by the given type.
587587
*/
588-
public <U extends T> Iterable<Map.Entry<ByteComparable, U>> filteredEntrySet(Direction direction, Class<U> clazz)
588+
public <U extends T> Iterable<Map.Entry<ByteComparable.Preencoded, U>> filteredEntrySet(Direction direction, Class<U> clazz)
589589
{
590590
return () -> filteredEntryIterator(direction, clazz);
591591
}
592592

593593
/**
594594
* Returns the ordered entry set of this trie's content in an iterator, filtered by the given type.
595595
*/
596-
public <U extends T> Iterator<Map.Entry<ByteComparable, U>> filteredEntryIterator(Direction direction, Class<U> clazz)
596+
public <U extends T> Iterator<Map.Entry<ByteComparable.Preencoded, U>> filteredEntryIterator(Direction direction, Class<U> clazz)
597597
{
598598
return new TrieEntriesIterator.AsEntriesFilteredByType<>(cursor(direction), clazz);
599599
}
@@ -801,7 +801,7 @@ public Trie<T> prefixedBy(ByteComparable prefix)
801801
* Returns an entry set containing all tail tree constructed at the points that contain content of
802802
* the given type.
803803
*/
804-
public Iterable<Map.Entry<ByteComparable, Trie<T>>> tailTries(Direction direction, Class<? extends T> clazz)
804+
public Iterable<Map.Entry<ByteComparable.Preencoded, Trie<T>>> tailTries(Direction direction, Class<? extends T> clazz)
805805
{
806806
return () -> new TrieTailsIterator.AsEntries<>(cursor(direction), clazz);
807807
}

src/java/org/apache/cassandra/db/tries/TrieEntriesIterator.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -86,15 +86,15 @@ ByteComparable.Version byteComparableVersion()
8686
/**
8787
* Iterator representing the content of the trie a sequence of (path, content) pairs.
8888
*/
89-
static class AsEntries<T> extends TrieEntriesIterator<T, Map.Entry<ByteComparable, T>>
89+
static class AsEntries<T> extends TrieEntriesIterator<T, Map.Entry<ByteComparable.Preencoded, T>>
9090
{
9191
public AsEntries(Trie.Cursor<T> cursor)
9292
{
9393
super(cursor, Predicates.alwaysTrue());
9494
}
9595

9696
@Override
97-
protected Map.Entry<ByteComparable, T> mapContent(T content, byte[] bytes, int byteLength)
97+
protected Map.Entry<ByteComparable.Preencoded, T> mapContent(T content, byte[] bytes, int byteLength)
9898
{
9999
return toEntry(byteComparableVersion(), content, bytes, byteLength);
100100
}
@@ -103,7 +103,7 @@ protected Map.Entry<ByteComparable, T> mapContent(T content, byte[] bytes, int b
103103
/**
104104
* Iterator representing the content of the trie a sequence of (path, content) pairs.
105105
*/
106-
static class AsEntriesFilteredByType<T, U extends T> extends TrieEntriesIterator<T, Map.Entry<ByteComparable, U>>
106+
static class AsEntriesFilteredByType<T, U extends T> extends TrieEntriesIterator<T, Map.Entry<ByteComparable.Preencoded, U>>
107107
{
108108
public AsEntriesFilteredByType(Trie.Cursor<T> cursor, Class<U> clazz)
109109
{
@@ -112,13 +112,13 @@ public AsEntriesFilteredByType(Trie.Cursor<T> cursor, Class<U> clazz)
112112

113113
@Override
114114
@SuppressWarnings("unchecked") // checked by the predicate
115-
protected Map.Entry<ByteComparable, U> mapContent(T content, byte[] bytes, int byteLength)
115+
protected Map.Entry<ByteComparable.Preencoded, U> mapContent(T content, byte[] bytes, int byteLength)
116116
{
117117
return toEntry(byteComparableVersion(), (U) content, bytes, byteLength);
118118
}
119119
}
120120

121-
static <T> java.util.Map.Entry<ByteComparable, T> toEntry(ByteComparable.Version version, T content, byte[] bytes, int byteLength)
121+
static <T> java.util.Map.Entry<ByteComparable.Preencoded, T> toEntry(ByteComparable.Version version, T content, byte[] bytes, int byteLength)
122122
{
123123
return new AbstractMap.SimpleImmutableEntry<>(toByteComparable(version, bytes, byteLength), content);
124124
}

src/java/org/apache/cassandra/db/tries/TrieEntriesWalker.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,10 @@ public void content(T content)
4040
*/
4141
static class WithConsumer<T> extends TrieEntriesWalker<T, Void>
4242
{
43-
private final BiConsumer<ByteComparable, T> consumer;
43+
private final BiConsumer<ByteComparable.Preencoded, T> consumer;
4444
private final ByteComparable.Version byteComparableVersion;
4545

46-
public WithConsumer(BiConsumer<ByteComparable, T> consumer, ByteComparable.Version byteComparableVersion)
46+
public WithConsumer(BiConsumer<ByteComparable.Preencoded, T> consumer, ByteComparable.Version byteComparableVersion)
4747
{
4848
this.consumer = consumer;
4949
this.byteComparableVersion = byteComparableVersion;

src/java/org/apache/cassandra/db/tries/TriePathReconstructor.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ public void resetPathLength(int newLength)
4949
keyPos = newLength;
5050
}
5151

52-
static ByteComparable toByteComparable(ByteComparable.Version byteComparableVersion, byte[] bytes, int byteLength)
52+
static ByteComparable.Preencoded toByteComparable(ByteComparable.Version byteComparableVersion, byte[] bytes, int byteLength)
5353
{
5454
// Taking a copy here to make sure it does not get modified when the cursor advances.
5555
return ByteComparable.preencoded(byteComparableVersion, Arrays.copyOf(bytes, byteLength));

src/java/org/apache/cassandra/db/tries/TrieTailsIterator.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -101,17 +101,17 @@ ByteComparable.Version byteComparableVersion()
101101
* {@code tail} is the branch of the trie rooted at the selected content node (reachable by following
102102
* {@code path}). The tail trie will have the selected content at its root.
103103
*/
104-
static class AsEntries<T> extends TrieTailsIterator<T, Map.Entry<ByteComparable, Trie<T>>>
104+
static class AsEntries<T> extends TrieTailsIterator<T, Map.Entry<ByteComparable.Preencoded, Trie<T>>>
105105
{
106106
public AsEntries(Trie.Cursor<T> cursor, Class<? extends T> clazz)
107107
{
108108
super(cursor, clazz::isInstance);
109109
}
110110

111111
@Override
112-
protected Map.Entry<ByteComparable, Trie<T>> mapContent(T value, Trie<T> tailTrie, byte[] bytes, int byteLength)
112+
protected Map.Entry<ByteComparable.Preencoded, Trie<T>> mapContent(T value, Trie<T> tailTrie, byte[] bytes, int byteLength)
113113
{
114-
ByteComparable key = toByteComparable(byteComparableVersion(), bytes, byteLength);
114+
ByteComparable.Preencoded key = toByteComparable(byteComparableVersion(), bytes, byteLength);
115115
return new AbstractMap.SimpleImmutableEntry<>(key, tailTrie);
116116
}
117117
}

src/java/org/apache/cassandra/index/sai/disk/MemtableTermsIterator.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,16 +34,16 @@ public class MemtableTermsIterator implements TermsIterator
3434
{
3535
private final ByteBuffer minTerm;
3636
private final ByteBuffer maxTerm;
37-
private final Iterator<Pair<ByteComparable, List<RowMapping.RowIdWithFrequency>>> iterator;
37+
private final Iterator<Pair<ByteComparable.Preencoded, List<RowMapping.RowIdWithFrequency>>> iterator;
3838

39-
private Pair<ByteComparable, List<RowMapping.RowIdWithFrequency>> current;
39+
private Pair<ByteComparable.Preencoded, List<RowMapping.RowIdWithFrequency>> current;
4040

4141
private int maxSSTableRowId = -1;
4242
private int minSSTableRowId = Integer.MAX_VALUE;
4343

4444
public MemtableTermsIterator(ByteBuffer minTerm,
4545
ByteBuffer maxTerm,
46-
Iterator<Pair<ByteComparable, List<RowMapping.RowIdWithFrequency>>> iterator)
46+
Iterator<Pair<ByteComparable.Preencoded, List<RowMapping.RowIdWithFrequency>>> iterator)
4747
{
4848
Preconditions.checkArgument(iterator != null);
4949
this.minTerm = minTerm;

0 commit comments

Comments
 (0)