16
16
17
17
package org .apache .cassandra .index .sai .cql ;
18
18
19
- import java .util .ArrayList ;
20
- import java .util .Arrays ;
21
- import java .util .HashMap ;
22
- import java .util .HashSet ;
23
- import java .util .List ;
19
+ import java .util .*;
24
20
import java .util .concurrent .ExecutorService ;
25
21
import java .util .concurrent .Executors ;
26
22
import java .util .concurrent .Future ;
27
23
import java .util .regex .Pattern ;
28
24
import java .util .stream .Collectors ;
29
- import java .util .stream .IntStream ;
30
25
31
- import org .apache .cassandra .index .sai .SSTableIndex ;
32
- import org .apache .cassandra .index .sai .memory .MemtableIndex ;
33
- import org .apache .cassandra .index .sai .memory .TrieMemtableIndex ;
34
26
import org .assertj .core .api .Assertions ;
35
27
36
- import org .junit .Assert ;
37
28
import org .junit .Before ;
38
29
import org .junit .Test ;
39
30
@@ -63,11 +54,13 @@ public class BM25Test extends SAITester
63
54
@ Parameterized .Parameters (name = "version={0}" )
64
55
public static List <Object > data ()
65
56
{
66
- return Arrays .asList (new Object []{ Version .BM25_EARLIEST , Version .ED });
57
+ return Version .ALL .stream ().filter (v -> v .onOrAfter (Version .BM25_EARLIEST ))
58
+ .map (v -> new Object []{ v })
59
+ .collect (Collectors .toList ());
67
60
}
68
61
69
62
// Pattern that treats apostrophes within words as part of the word
70
- private static final Pattern PATTERN = Pattern .compile ("[^\\ w']+|'(?=\\ s)|(?<=\\ s)'" );
63
+ public static final Pattern PATTERN = Pattern .compile ("[^\\ w']+|'(?=\\ s)|(?<=\\ s)'" );
71
64
public static final int DATASET_BODY_COLUMN = 3 ;
72
65
73
66
@ Before
@@ -755,7 +748,7 @@ public void testCollections() throws Throwable
755
748
createIndex ("CREATE CUSTOM INDEX ON %s (category) USING 'StorageAttachedIndex'" );
756
749
createIndex ("CREATE CUSTOM INDEX ON %s (map_category) USING 'StorageAttachedIndex'" );
757
750
createIndex ("CREATE CUSTOM INDEX ON %s (KEYS(map_body)) USING 'StorageAttachedIndex'" );
758
- insertCollectionData ();
751
+ insertCollectionData (this );
759
752
analyzeDataset ("climate" );
760
753
analyzeDataset ("health" );
761
754
@@ -841,110 +834,7 @@ public void testOrderingSeveralSegments() throws Throwable
841
834
"climate" );
842
835
}
843
836
844
- /**
845
- * Asserts that memtable SAI index maintains expected row count, which is, then,
846
- * used to store row count in SSTable SAI index and its segments. This is also
847
- * asserted.
848
- */
849
- @ Test
850
- public void testIndexMetaForNumRows ()
851
- {
852
- SAIUtil .setCurrentVersion (Version .ED );
853
-
854
- createTable ("CREATE TABLE %s (id int PRIMARY KEY, category text, score int, " +
855
- "title text, body text, bodyset set<text>, " +
856
- "map_category map<int, text>, map_body map<text, text>)" );
857
- String bodyIndexName = createAnalyzedIndex ("body" , true );
858
- String scoreIndexName = createIndex ("CREATE CUSTOM INDEX ON %s (score) USING 'StorageAttachedIndex'" );
859
- String mapIndexName = createIndex ("CREATE CUSTOM INDEX ON %s (map_category) USING 'StorageAttachedIndex'" );
860
- insertCollectionData ();
861
- int totalTermsCount = IntStream .range (0 , DATASET .length )
862
- .map (this ::calculateTotalTermsForRow )
863
- .sum ();
864
-
865
- assertNumRowsMemtable (scoreIndexName , DATASET .length , DATASET .length );
866
- assertNumRowsMemtable (bodyIndexName , DATASET .length , totalTermsCount );
867
- assertNumRowsMemtable (mapIndexName , DATASET .length );
868
- execute ("DELETE FROM %s WHERE id = ?" , 4 );
869
- // Deletion is not tracked by Memindex
870
- assertNumRowsMemtable (bodyIndexName , DATASET .length , totalTermsCount );
871
- // Test an update to different value for analyzed index
872
- execute ("UPDATE %s SET body = ? WHERE id = ?" , DATASET [10 ][DATASET_BODY_COLUMN ], 6 );
873
- totalTermsCount += calculateTotalTermsForRow (10 ) - calculateTotalTermsForRow (6 );
874
- assertNumRowsMemtable (bodyIndexName , DATASET .length , totalTermsCount );
875
- // Update back to the original value
876
- execute ("UPDATE %s SET body = ? WHERE id = ?" , DATASET [6 ][DATASET_BODY_COLUMN ], 10 );
877
- totalTermsCount += calculateTotalTermsForRow (6 ) - calculateTotalTermsForRow (10 );
878
- assertNumRowsMemtable (bodyIndexName , DATASET .length , totalTermsCount );
879
- // Flush will account for the deleted row
880
- totalTermsCount -= calculateTotalTermsForRow (4 );
881
- flush ();
882
- assertNumRowsAndTotalTermsSSTable (scoreIndexName , DATASET .length - 1 , DATASET .length - 1 );
883
- assertNumRowsAndTotalTermsSSTable (bodyIndexName , DATASET .length - 1 , totalTermsCount );
884
- assertNumRowsSSTable (mapIndexName , DATASET .length - 1 );
885
- execute ("DELETE FROM %s WHERE id = ?" , 9 );
886
- flush ();
887
- assertNumRowsAndTotalTermsSSTable (scoreIndexName , DATASET .length - 1 , DATASET .length - 1 );
888
- assertNumRowsAndTotalTermsSSTable (bodyIndexName , DATASET .length - 1 , totalTermsCount );
889
- assertNumRowsSSTable (mapIndexName , DATASET .length - 1 );
890
- compact ();
891
- totalTermsCount -= calculateTotalTermsForRow (9 );
892
- assertNumRowsAndTotalTermsSSTable (scoreIndexName , DATASET .length - 2 , DATASET .length - 2 );
893
- assertNumRowsAndTotalTermsSSTable (bodyIndexName , DATASET .length - 2 , totalTermsCount );
894
- assertNumRowsSSTable (mapIndexName , DATASET .length - 2 );
895
- }
896
-
897
- private void assertNumRowsMemtable (String indexName , int expectedNumRows )
898
- {
899
- assertNumRowsMemtable (indexName , expectedNumRows , -1 );
900
- }
901
-
902
- private void assertNumRowsMemtable (String indexName , int expectedNumRows , int expectedTotalTermsCount )
903
- {
904
- int rowCount = 0 ;
905
- long termCount = 0 ;
906
-
907
- for (var memtable : getCurrentColumnFamilyStore ().getAllMemtables ())
908
- {
909
- MemtableIndex memIndex = getIndexContext (indexName ).getLiveMemtables ().get (memtable );
910
- assert memIndex instanceof TrieMemtableIndex ;
911
- rowCount += ((TrieMemtableIndex ) memIndex ).indexedRows ();
912
- termCount += ((TrieMemtableIndex ) memIndex ).approximateTotalTermCount ();
913
- }
914
- assertEquals (expectedNumRows , rowCount );
915
- if (expectedTotalTermsCount >= 0 )
916
- assertEquals (expectedTotalTermsCount , termCount );
917
- }
918
-
919
- private void assertNumRowsSSTable (String indexName , int expectedNumRows )
920
- {
921
- assertNumRowsAndTotalTermsSSTable (indexName , expectedNumRows , -1 );
922
- }
923
-
924
- private void assertNumRowsAndTotalTermsSSTable (String indexName , int expectedNumRows , int expectedTotalTermsCount
925
- )
926
- {
927
- long indexRowCount = 0 ;
928
- long segmentRowCount = 0 ;
929
- long totalTermCount = 0 ;
930
- for (SSTableIndex sstableIndex : getIndexContext (indexName ).getView ())
931
- {
932
- indexRowCount += sstableIndex .getRowCount ();
933
- for (var segment : sstableIndex .getSegments ())
934
- {
935
- var metadata = segment .metadata ;
936
- Assert .assertNotNull (metadata );
937
- segmentRowCount += metadata .numRows ;
938
- totalTermCount += metadata .totalTermCount ;
939
- }
940
- }
941
- assertEquals (indexRowCount , segmentRowCount );
942
- assertEquals (expectedNumRows , indexRowCount );
943
- if (expectedTotalTermsCount >= 0 )
944
- assertEquals (expectedTotalTermsCount , totalTermCount );
945
- }
946
-
947
- private final static Object [][] DATASET =
837
+ public final static Object [][] DATASET =
948
838
{
949
839
{ 0 , "Climate" , 5 , "Climate change is a pressing issue. Climate patterns are shifting globally. Scientists study climate data daily." , 1 },
950
840
{ 1 , "Technology" , 3 , "Technology is advancing. New technology in AI and robotics is groundbreaking." , 1 },
@@ -986,12 +876,6 @@ private void analyzeDataset(String term)
986
876
}
987
877
}
988
878
989
- private int calculateTotalTermsForRow (int row )
990
- {
991
- String body = (String ) DATASET [row ][DATASET_BODY_COLUMN ];
992
- return PATTERN .split (body .toLowerCase ()).length ;
993
- }
994
-
995
879
private void insertPrimitiveData ()
996
880
{
997
881
insertPrimitiveData (0 , DATASET .length );
@@ -1012,7 +896,7 @@ private void insertPrimitiveData(int start, int end)
1012
896
}
1013
897
}
1014
898
1015
- private void insertCollectionData ()
899
+ public static void insertCollectionData (SAITester tester )
1016
900
{
1017
901
int setsize = 1 ;
1018
902
for (int row = 0 ; row < DATASET .length ; row ++)
@@ -1032,7 +916,7 @@ private void insertCollectionData()
1032
916
map_text .putIfAbsent ((String ) DATASET [row - j ][1 ], (String ) DATASET [row - j ][3 ]);
1033
917
}
1034
918
1035
- execute (
919
+ tester . execute (
1036
920
"INSERT INTO %s (id, category, score, body, bodyset, map_category, map_body) " +
1037
921
"VALUES (?, ?, ?, ?, ?, ?, ?)" ,
1038
922
DATASET [row ][0 ],
0 commit comments