Skip to content

Commit 09190f8

Browse files
CNDB-14144: Change Version.LATEST to ED. Move testIndexMetaForNumRows from BM25Test to FeaturesVersionSupportTest and extend coverage for all versions. Add FeaturesVersionSupportEDTest. Make BM25Test tests run on all sai versions post BM25_EARLIEST
1 parent 0aff773 commit 09190f8

File tree

4 files changed

+171
-127
lines changed

4 files changed

+171
-127
lines changed

src/java/org/apache/cassandra/index/sai/disk/format/Version.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ public class Version implements Comparable<Version>
7474
public static final Version VECTOR_EARLIEST = BA;
7575
public static final Version JVECTOR_EARLIEST = CA;
7676
public static final Version BM25_EARLIEST = EC;
77-
public static final Version LATEST = EC;
77+
public static final Version LATEST = ALL.get(0);
7878
// The current version can be configured to be an earlier version to support partial upgrades that don't
7979
// write newer versions of the on-disk formats. This is volatile rather than final so that tests may
8080
// use reflection to change it and safely publish across threads.
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
/*
2+
* Copyright DataStax, Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package org.apache.cassandra.distributed.test.sai.features;
18+
19+
import java.io.IOException;
20+
21+
import org.junit.BeforeClass;
22+
23+
import org.apache.cassandra.index.sai.disk.format.Version;
24+
25+
/**
26+
* {@link FeaturesVersionSupportTester} for {@link Version#ED}.
27+
*/
28+
public class FeaturesVersionSupportEDTest extends FeaturesVersionSupportTester
29+
{
30+
@BeforeClass
31+
public static void setup() throws IOException
32+
{
33+
initCluster(Version.ED);
34+
}
35+
}

test/unit/org/apache/cassandra/index/sai/cql/BM25Test.java

Lines changed: 9 additions & 125 deletions
Original file line numberDiff line numberDiff line change
@@ -16,24 +16,15 @@
1616

1717
package org.apache.cassandra.index.sai.cql;
1818

19-
import java.util.ArrayList;
20-
import java.util.Arrays;
21-
import java.util.HashMap;
22-
import java.util.HashSet;
23-
import java.util.List;
19+
import java.util.*;
2420
import java.util.concurrent.ExecutorService;
2521
import java.util.concurrent.Executors;
2622
import java.util.concurrent.Future;
2723
import java.util.regex.Pattern;
2824
import java.util.stream.Collectors;
29-
import java.util.stream.IntStream;
3025

31-
import org.apache.cassandra.index.sai.SSTableIndex;
32-
import org.apache.cassandra.index.sai.memory.MemtableIndex;
33-
import org.apache.cassandra.index.sai.memory.TrieMemtableIndex;
3426
import org.assertj.core.api.Assertions;
3527

36-
import org.junit.Assert;
3728
import org.junit.Before;
3829
import org.junit.Test;
3930

@@ -63,11 +54,13 @@ public class BM25Test extends SAITester
6354
@Parameterized.Parameters(name = "version={0}")
6455
public static List<Object> data()
6556
{
66-
return Arrays.asList(new Object[]{ Version.BM25_EARLIEST, Version.ED });
57+
return Version.ALL.stream().filter(v -> v.onOrAfter(Version.BM25_EARLIEST))
58+
.map(v -> new Object[]{ v })
59+
.collect(Collectors.toList());
6760
}
6861

6962
// Pattern that treats apostrophes within words as part of the word
70-
private static final Pattern PATTERN = Pattern.compile("[^\\w']+|'(?=\\s)|(?<=\\s)'");
63+
public static final Pattern PATTERN = Pattern.compile("[^\\w']+|'(?=\\s)|(?<=\\s)'");
7164
public static final int DATASET_BODY_COLUMN = 3;
7265

7366
@Before
@@ -755,7 +748,7 @@ public void testCollections() throws Throwable
755748
createIndex("CREATE CUSTOM INDEX ON %s (category) USING 'StorageAttachedIndex'");
756749
createIndex("CREATE CUSTOM INDEX ON %s (map_category) USING 'StorageAttachedIndex'");
757750
createIndex("CREATE CUSTOM INDEX ON %s (KEYS(map_body)) USING 'StorageAttachedIndex'");
758-
insertCollectionData();
751+
insertCollectionData(this);
759752
analyzeDataset("climate");
760753
analyzeDataset("health");
761754

@@ -841,110 +834,7 @@ public void testOrderingSeveralSegments() throws Throwable
841834
"climate");
842835
}
843836

844-
/**
845-
* Asserts that memtable SAI index maintains expected row count, which is, then,
846-
* used to store row count in SSTable SAI index and its segments. This is also
847-
* asserted.
848-
*/
849-
@Test
850-
public void testIndexMetaForNumRows()
851-
{
852-
SAIUtil.setCurrentVersion(Version.ED);
853-
854-
createTable("CREATE TABLE %s (id int PRIMARY KEY, category text, score int, " +
855-
"title text, body text, bodyset set<text>, " +
856-
"map_category map<int, text>, map_body map<text, text>)");
857-
String bodyIndexName = createAnalyzedIndex("body", true);
858-
String scoreIndexName = createIndex("CREATE CUSTOM INDEX ON %s (score) USING 'StorageAttachedIndex'");
859-
String mapIndexName = createIndex("CREATE CUSTOM INDEX ON %s (map_category) USING 'StorageAttachedIndex'");
860-
insertCollectionData();
861-
int totalTermsCount = IntStream.range(0, DATASET.length)
862-
.map(this::calculateTotalTermsForRow)
863-
.sum();
864-
865-
assertNumRowsMemtable(scoreIndexName, DATASET.length, DATASET.length);
866-
assertNumRowsMemtable(bodyIndexName, DATASET.length, totalTermsCount);
867-
assertNumRowsMemtable(mapIndexName, DATASET.length);
868-
execute("DELETE FROM %s WHERE id = ?", 4);
869-
// Deletion is not tracked by Memindex
870-
assertNumRowsMemtable(bodyIndexName, DATASET.length, totalTermsCount);
871-
// Test an update to different value for analyzed index
872-
execute("UPDATE %s SET body = ? WHERE id = ?", DATASET[10][DATASET_BODY_COLUMN], 6);
873-
totalTermsCount += calculateTotalTermsForRow(10) - calculateTotalTermsForRow(6);
874-
assertNumRowsMemtable(bodyIndexName, DATASET.length, totalTermsCount);
875-
// Update back to the original value
876-
execute("UPDATE %s SET body = ? WHERE id = ?", DATASET[6][DATASET_BODY_COLUMN], 10);
877-
totalTermsCount += calculateTotalTermsForRow(6) - calculateTotalTermsForRow(10);
878-
assertNumRowsMemtable(bodyIndexName, DATASET.length, totalTermsCount);
879-
// Flush will account for the deleted row
880-
totalTermsCount -= calculateTotalTermsForRow(4);
881-
flush();
882-
assertNumRowsAndTotalTermsSSTable(scoreIndexName, DATASET.length - 1, DATASET.length - 1);
883-
assertNumRowsAndTotalTermsSSTable(bodyIndexName, DATASET.length - 1, totalTermsCount);
884-
assertNumRowsSSTable(mapIndexName, DATASET.length - 1);
885-
execute("DELETE FROM %s WHERE id = ?", 9);
886-
flush();
887-
assertNumRowsAndTotalTermsSSTable(scoreIndexName, DATASET.length - 1, DATASET.length - 1);
888-
assertNumRowsAndTotalTermsSSTable(bodyIndexName, DATASET.length - 1, totalTermsCount);
889-
assertNumRowsSSTable(mapIndexName, DATASET.length - 1);
890-
compact();
891-
totalTermsCount -= calculateTotalTermsForRow(9);
892-
assertNumRowsAndTotalTermsSSTable(scoreIndexName, DATASET.length - 2, DATASET.length - 2);
893-
assertNumRowsAndTotalTermsSSTable(bodyIndexName, DATASET.length - 2, totalTermsCount);
894-
assertNumRowsSSTable(mapIndexName, DATASET.length - 2);
895-
}
896-
897-
private void assertNumRowsMemtable(String indexName, int expectedNumRows)
898-
{
899-
assertNumRowsMemtable(indexName, expectedNumRows, -1);
900-
}
901-
902-
private void assertNumRowsMemtable(String indexName, int expectedNumRows, int expectedTotalTermsCount)
903-
{
904-
int rowCount = 0;
905-
long termCount = 0;
906-
907-
for (var memtable : getCurrentColumnFamilyStore().getAllMemtables())
908-
{
909-
MemtableIndex memIndex = getIndexContext(indexName).getLiveMemtables().get(memtable);
910-
assert memIndex instanceof TrieMemtableIndex;
911-
rowCount += ((TrieMemtableIndex) memIndex).indexedRows();
912-
termCount += ((TrieMemtableIndex) memIndex).approximateTotalTermCount();
913-
}
914-
assertEquals(expectedNumRows, rowCount);
915-
if (expectedTotalTermsCount >= 0)
916-
assertEquals(expectedTotalTermsCount, termCount);
917-
}
918-
919-
private void assertNumRowsSSTable(String indexName, int expectedNumRows)
920-
{
921-
assertNumRowsAndTotalTermsSSTable(indexName, expectedNumRows, -1);
922-
}
923-
924-
private void assertNumRowsAndTotalTermsSSTable(String indexName, int expectedNumRows, int expectedTotalTermsCount
925-
)
926-
{
927-
long indexRowCount = 0;
928-
long segmentRowCount = 0;
929-
long totalTermCount = 0;
930-
for (SSTableIndex sstableIndex : getIndexContext(indexName).getView())
931-
{
932-
indexRowCount += sstableIndex.getRowCount();
933-
for (var segment : sstableIndex.getSegments())
934-
{
935-
var metadata = segment.metadata;
936-
Assert.assertNotNull(metadata);
937-
segmentRowCount += metadata.numRows;
938-
totalTermCount += metadata.totalTermCount;
939-
}
940-
}
941-
assertEquals(indexRowCount, segmentRowCount);
942-
assertEquals(expectedNumRows, indexRowCount);
943-
if (expectedTotalTermsCount >= 0)
944-
assertEquals(expectedTotalTermsCount, totalTermCount);
945-
}
946-
947-
private final static Object[][] DATASET =
837+
public final static Object[][] DATASET =
948838
{
949839
{ 0, "Climate", 5, "Climate change is a pressing issue. Climate patterns are shifting globally. Scientists study climate data daily.", 1 },
950840
{ 1, "Technology", 3, "Technology is advancing. New technology in AI and robotics is groundbreaking.", 1 },
@@ -986,12 +876,6 @@ private void analyzeDataset(String term)
986876
}
987877
}
988878

989-
private int calculateTotalTermsForRow(int row)
990-
{
991-
String body = (String) DATASET[row][DATASET_BODY_COLUMN];
992-
return PATTERN.split(body.toLowerCase()).length;
993-
}
994-
995879
private void insertPrimitiveData()
996880
{
997881
insertPrimitiveData(0, DATASET.length);
@@ -1012,7 +896,7 @@ private void insertPrimitiveData(int start, int end)
1012896
}
1013897
}
1014898

1015-
private void insertCollectionData()
899+
public static void insertCollectionData(SAITester tester)
1016900
{
1017901
int setsize = 1;
1018902
for (int row = 0; row < DATASET.length; row++)
@@ -1032,7 +916,7 @@ private void insertCollectionData()
1032916
map_text.putIfAbsent((String) DATASET[row - j][1], (String) DATASET[row - j][3]);
1033917
}
1034918

1035-
execute(
919+
tester.execute(
1036920
"INSERT INTO %s (id, category, score, body, bodyset, map_category, map_body) " +
1037921
"VALUES (?, ?, ?, ?, ?, ?, ?)",
1038922
DATASET[row][0],

test/unit/org/apache/cassandra/index/sai/cql/FeaturesVersionSupportTest.java

Lines changed: 126 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,12 @@
1616

1717
package org.apache.cassandra.index.sai.cql;
1818

19+
1920
import java.util.Collection;
2021
import java.util.stream.Collectors;
22+
import java.util.stream.IntStream;
2123

24+
import org.junit.Assert;
2225
import org.junit.Before;
2326
import org.junit.Test;
2427
import org.junit.runner.RunWith;
@@ -28,13 +31,18 @@
2831
import org.apache.cassandra.exceptions.ReadFailureException;
2932
import org.apache.cassandra.exceptions.RequestFailureReason;
3033
import org.apache.cassandra.index.sai.SAIUtil;
34+
import org.apache.cassandra.index.sai.SSTableIndex;
3135
import org.apache.cassandra.index.sai.disk.format.Version;
36+
import org.apache.cassandra.index.sai.memory.MemtableIndex;
37+
import org.apache.cassandra.index.sai.memory.TrieMemtableIndex;
3238
import org.assertj.core.api.Assertions;
3339

40+
import static org.apache.cassandra.index.sai.cql.BM25Test.*;
3441
import static org.assertj.core.api.Assertions.assertThat;
42+
import static org.junit.Assert.assertEquals;
3543

3644
/**
37-
* Tests the availabilty of features in different versions of the SAI on-disk format.
45+
* Tests the availability of features in different versions of the SAI on-disk format.
3846
*/
3947
@RunWith(Parameterized.class)
4048
public class FeaturesVersionSupportTest extends VectorTester
@@ -226,4 +234,121 @@ public void testQueryAnalyzer()
226234
assertRows(execute("SELECT k FROM %s WHERE v : 'fox'"), row(1), row(2), row(3));
227235
assertRows(execute("SELECT k FROM %s WHERE v : 'foxes'"), row(3));
228236
}
237+
238+
/**
239+
* Asserts that memtable SAI index maintains expected row count, which is, then,
240+
* used to store row count in SSTable SAI index and its segments. This is also
241+
* asserted.
242+
*/
243+
@Test
244+
public void testIndexMetaForNumRows()
245+
{
246+
SAIUtil.setCurrentVersion(Version.ED);
247+
248+
createTable("CREATE TABLE %s (id int PRIMARY KEY, category text, score int, " +
249+
"title text, body text, bodyset set<text>, " +
250+
"map_category map<int, text>, map_body map<text, text>)");
251+
String bodyIndexName = createIndex("CREATE CUSTOM INDEX ON %s(body) " +
252+
"USING 'org.apache.cassandra.index.sai.StorageAttachedIndex' " +
253+
"WITH OPTIONS = {" +
254+
"'index_analyzer': '{" +
255+
"\"tokenizer\" : {\"name\" : \"standard\"}, " +
256+
"\"filters\" : [{\"name\" : \"porterstem\"}" +
257+
", {\"name\" : \"lowercase\"}]" +
258+
"}'}"
259+
);
260+
String scoreIndexName = createIndex("CREATE CUSTOM INDEX ON %s (score) USING 'StorageAttachedIndex'");
261+
String mapIndexName = createIndex("CREATE CUSTOM INDEX ON %s (map_category) USING 'StorageAttachedIndex'");
262+
insertCollectionData(this);
263+
int totalTermsCount = IntStream.range(0, DATASET.length)
264+
.map(this::calculateTotalTermsForRow)
265+
.sum();
266+
267+
assertNumRowsMemtable(scoreIndexName, DATASET.length, DATASET.length);
268+
assertNumRowsMemtable(bodyIndexName, DATASET.length, totalTermsCount);
269+
assertNumRowsMemtable(mapIndexName, DATASET.length);
270+
execute("DELETE FROM %s WHERE id = ?", 4);
271+
// Deletion is not tracked by Memindex
272+
assertNumRowsMemtable(bodyIndexName, DATASET.length, totalTermsCount);
273+
// Test an update to a different value for the analyzed index
274+
execute("UPDATE %s SET body = ? WHERE id = ?", DATASET[10][DATASET_BODY_COLUMN], 6);
275+
totalTermsCount += calculateTotalTermsForRow(10) - calculateTotalTermsForRow(6);
276+
assertNumRowsMemtable(bodyIndexName, DATASET.length, totalTermsCount);
277+
// Update back to the original value
278+
execute("UPDATE %s SET body = ? WHERE id = ?", DATASET[6][DATASET_BODY_COLUMN], 10);
279+
totalTermsCount += calculateTotalTermsForRow(6) - calculateTotalTermsForRow(10);
280+
assertNumRowsMemtable(bodyIndexName, DATASET.length, totalTermsCount);
281+
// Flush will account for the deleted row
282+
totalTermsCount -= calculateTotalTermsForRow(4);
283+
flush();
284+
assertNumRowsAndTotalTermsSSTable(scoreIndexName, DATASET.length - 1, DATASET.length - 1);
285+
assertNumRowsAndTotalTermsSSTable(bodyIndexName, DATASET.length - 1, totalTermsCount);
286+
assertNumRowsSSTable(mapIndexName, DATASET.length - 1);
287+
execute("DELETE FROM %s WHERE id = ?", 9);
288+
flush();
289+
assertNumRowsAndTotalTermsSSTable(scoreIndexName, DATASET.length - 1, DATASET.length - 1);
290+
assertNumRowsAndTotalTermsSSTable(bodyIndexName, DATASET.length - 1, totalTermsCount);
291+
assertNumRowsSSTable(mapIndexName, DATASET.length - 1);
292+
compact();
293+
totalTermsCount -= calculateTotalTermsForRow(9);
294+
assertNumRowsAndTotalTermsSSTable(scoreIndexName, DATASET.length - 2, DATASET.length - 2);
295+
assertNumRowsAndTotalTermsSSTable(bodyIndexName, DATASET.length - 2, totalTermsCount);
296+
assertNumRowsSSTable(mapIndexName, DATASET.length - 2);
297+
}
298+
299+
private int calculateTotalTermsForRow(int row)
300+
{
301+
String body = (String) DATASET[row][DATASET_BODY_COLUMN];
302+
return PATTERN.split(body.toLowerCase()).length;
303+
}
304+
305+
private void assertNumRowsMemtable(String indexName, int expectedNumRows)
306+
{
307+
assertNumRowsMemtable(indexName, expectedNumRows, -1);
308+
}
309+
310+
private void assertNumRowsMemtable(String indexName, int expectedNumRows, int expectedTotalTermsCount)
311+
{
312+
int rowCount = 0;
313+
long termCount = 0;
314+
315+
for (var memtable : getCurrentColumnFamilyStore().getAllMemtables())
316+
{
317+
MemtableIndex memIndex = getIndexContext(indexName).getLiveMemtables().get(memtable);
318+
assert memIndex instanceof TrieMemtableIndex;
319+
rowCount += ((TrieMemtableIndex) memIndex).indexedRows();
320+
termCount += ((TrieMemtableIndex) memIndex).approximateTotalTermCount();
321+
}
322+
assertEquals(expectedNumRows, rowCount);
323+
if (expectedTotalTermsCount >= 0)
324+
assertEquals(expectedTotalTermsCount, termCount);
325+
}
326+
327+
private void assertNumRowsSSTable(String indexName, int expectedNumRows)
328+
{
329+
assertNumRowsAndTotalTermsSSTable(indexName, expectedNumRows, -1);
330+
}
331+
332+
private void assertNumRowsAndTotalTermsSSTable(String indexName, int expectedNumRows, int expectedTotalTermsCount
333+
)
334+
{
335+
long indexRowCount = 0;
336+
long segmentRowCount = 0;
337+
long totalTermCount = 0;
338+
for (SSTableIndex sstableIndex : getIndexContext(indexName).getView())
339+
{
340+
indexRowCount += sstableIndex.getRowCount();
341+
for (var segment : sstableIndex.getSegments())
342+
{
343+
var metadata = segment.metadata;
344+
Assert.assertNotNull(metadata);
345+
segmentRowCount += metadata.numRows;
346+
totalTermCount += metadata.totalTermCount;
347+
}
348+
}
349+
assertEquals(indexRowCount, segmentRowCount);
350+
assertEquals(expectedNumRows, indexRowCount);
351+
if (expectedTotalTermsCount >= 0)
352+
assertEquals(expectedTotalTermsCount, totalTermCount);
353+
}
229354
}

0 commit comments

Comments
 (0)