Skip to content

Commit a097be4

Browse files
k-rusdjatnieks
authored andcommitted
CNDB-13696 fix empty iterator access in BM25 search on partial SSTable (#1691)
Executing a query with BM25 search and a condition on partial SSTable results in empty iterator access error. And there was no test with storing data in segments. The PR implements BM25 search tests with splitting data into two tables. This reproduced this bug, CNDB-13696, and demonstrates current confusion on the BM25 ordering result to be fixed by CNDB-13553. This PR adds a check for empty iterator created for a PK belonging to another segment. This fixes the bug of trying to get the first element of an empty iterator.
1 parent fa52c61 commit a097be4

File tree

2 files changed

+46
-1
lines changed

2 files changed

+46
-1
lines changed

src/java/org/apache/cassandra/index/sai/disk/v1/InvertedIndexSearcher.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,9 @@ private Cell<?> readColumn(SSTableReader sstable, PrimaryKey primaryKey)
168168
var slices = Slices.with(indexContext.comparator(), Slice.make(primaryKey.clustering()));
169169
try (var rowIterator = sstable.rowIterator(dk, slices, columnFilter, false, SSTableReadsListener.NOOP_LISTENER))
170170
{
171+
// primaryKey might not belong to this sstable, thus the iterator will be empty
172+
if (rowIterator.isEmpty())
173+
return null;
171174
var unfiltered = rowIterator.next();
172175
assert unfiltered.isRow() : unfiltered;
173176
Row row = (Row) unfiltered;

test/unit/org/apache/cassandra/index/sai/cql/BM25Test.java

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -749,6 +749,42 @@ public void testCollections() throws Throwable
749749
});
750750
}
751751

752+
@Test
753+
public void testOrderingSeveralSegments() throws Throwable
754+
{
755+
createTable("CREATE TABLE %s (id int PRIMARY KEY, category text, score int," +
756+
"title text, body text)");
757+
createAnalyzedIndex("body", true);
758+
createIndex("CREATE CUSTOM INDEX ON %s (score) USING 'StorageAttachedIndex'");
759+
insertPrimitiveData(0, 10);
760+
flush();
761+
insertPrimitiveData(10, 20);
762+
763+
// One memtable, one sstable - different result from the reference in testCollections
764+
// ID 1 and 6 contain 3 and 2 climate occurrences correspondingly,
765+
// while ID 11 and 19 - 4 climate occurrences. However,
766+
// since the segment with 0-9 IDs have only 2 rows with climate and 10-19 - 5,
767+
// 1 and 6 win over 11 and 19.
768+
executeQuery(Arrays.asList(1, 6, 11, 19, 16, 12, 18), "SELECT * FROM %s ORDER BY body BM25 OF ? LIMIT 10",
769+
"climate");
770+
executeQuery(Arrays.asList(1, 11, 19), "SELECT * FROM %s WHERE score = 5 ORDER BY body BM25 OF ? LIMIT 10",
771+
"climate");
772+
773+
// Flush into Two sstables - same result as the different above
774+
flush();
775+
executeQuery(Arrays.asList(1, 6, 11, 19, 16, 12, 18), "SELECT * FROM %s ORDER BY body BM25 OF ? LIMIT 10",
776+
"climate");
777+
executeQuery(Arrays.asList(1, 11, 19), "SELECT * FROM %s WHERE score = 5 ORDER BY body BM25 OF ? LIMIT 10",
778+
"climate");
779+
780+
// Compact into one sstable - same as reference from testCollections
781+
compact();
782+
executeQuery(Arrays.asList(11, 19, 1, 16, 6, 12, 18), "SELECT * FROM %s ORDER BY body BM25 OF ? LIMIT 10",
783+
"climate");
784+
executeQuery(Arrays.asList(11, 19, 1), "SELECT * FROM %s WHERE score = 5 ORDER BY body BM25 OF ? LIMIT 10",
785+
"climate");
786+
}
787+
752788
private final static Object[][] DATASET =
753789
{
754790
{ 1, "Climate", 5, "Climate change is a pressing issue. Climate patterns are shifting globally. Scientists study climate data daily.", 1 },
@@ -794,8 +830,14 @@ private void analyzeDataset(String term)
794830

795831
private void insertPrimitiveData()
796832
{
797-
for (Object[] row : DATASET)
833+
insertPrimitiveData(0, DATASET.length);
834+
}
835+
836+
private void insertPrimitiveData(int start, int end)
837+
{
838+
for (int i = start; i < end; i++)
798839
{
840+
Object[] row = DATASET[i];
799841
execute(
800842
"INSERT INTO %s (id, category, score, body) VALUES (?, ?, ?, ?)",
801843
row[0],

0 commit comments

Comments
 (0)