24
24
import java .util .concurrent .ExecutorService ;
25
25
import java .util .concurrent .Executors ;
26
26
import java .util .concurrent .Future ;
27
+ import java .util .regex .Pattern ;
27
28
import java .util .stream .Collectors ;
28
29
29
30
import org .assertj .core .api .Assertions ;
@@ -675,50 +676,76 @@ public void testErrorMessages()
675
676
"SELECT * FROM %s WHERE map_body CONTAINS KEY 'Climate' ORDER BY body BM25 OF ? LIMIT 10" );
676
677
}
677
678
679
+ @ Test
680
+ public void testWithLowercase () throws Throwable
681
+ {
682
+ createTable ("CREATE TABLE %s (id int PRIMARY KEY, body text)" );
683
+ createAnalyzedIndex ("body" , true );
684
+ execute ("INSERT INTO %s (id, body) VALUES (?, ?)" , 1 , "Hi hi" );
685
+ execute ("INSERT INTO %s (id, body) VALUES (?, ?)" , 2 , "hi hi longer" );
686
+ executeQuery (Arrays .asList (1 , 2 ), "SELECT * FROM %s ORDER BY body BM25 OF 'hi' LIMIT 4" );
687
+ }
688
+
678
689
@ Test
679
690
public void testCollections () throws Throwable
680
691
{
681
- createTable ("CREATE TABLE %s (id int PRIMARY KEY, category text, score int, tie int, " +
692
+ createTable ("CREATE TABLE %s (id int PRIMARY KEY, category text, score int, " +
682
693
"title text, body text, bodyset set<text>, " +
683
694
"map_category map<int, text>, map_body map<text, text>)" );
684
695
createAnalyzedIndex ("body" , true );
685
696
createAnalyzedIndex ("bodyset" , true );
686
697
createAnalyzedIndex ("map_body" , true );
687
698
createIndex ("CREATE CUSTOM INDEX ON %s (score) USING 'StorageAttachedIndex'" );
688
699
createIndex ("CREATE CUSTOM INDEX ON %s (category) USING 'StorageAttachedIndex'" );
689
- createIndex ("CREATE CUSTOM INDEX ON %s (tie) USING 'StorageAttachedIndex'" );
690
700
createIndex ("CREATE CUSTOM INDEX ON %s (map_category) USING 'StorageAttachedIndex'" );
691
701
createIndex ("CREATE CUSTOM INDEX ON %s (KEYS(map_body)) USING 'StorageAttachedIndex'" );
692
702
insertCollectionData ();
703
+ analyzeDataset ("climate" );
704
+ analyzeDataset ("health" );
693
705
694
706
beforeAndAfterFlush (
695
707
() -> {
696
- executeQuery (Arrays .asList (11 , 1 , 16 , 18 ), "SELECT * FROM %s WHERE tie = 1 ORDER BY body BM25 OF ? LIMIT 10" ,
708
+ // ID 11: total words = 12, climate occurrences = 4
709
+ // ID 19: total words = 13, climate occurrences = 4
710
+ // ID 1: total words = 16, climate occurrences = 3
711
+ // ID 16: total words = 11, climate occurrences = 2
712
+ // ID 6: total words = 13, climate occurrences = 2
713
+ // ID 12: total words = 12, climate occurrences = 1
714
+ // ID 18: total words = 14, climate occurrences = 1
715
+ executeQuery (Arrays .asList (11 , 19 , 1 , 16 , 6 , 12 , 18 ), "SELECT * FROM %s ORDER BY body BM25 OF ? LIMIT 10" ,
697
716
"climate" );
698
- executeQuery (Arrays .asList (11 , 1 ), "SELECT * FROM %s WHERE score = 5 AND tie = 1 ORDER BY body BM25 OF ? LIMIT 10" ,
717
+ executeQuery (Arrays .asList (11 , 19 , 1 ), "SELECT * FROM %s WHERE score = 5 ORDER BY body BM25 OF ? LIMIT 10" ,
699
718
"climate" );
700
- executeQuery (Arrays .asList (6 , 16 ), "SELECT * FROM %s WHERE score > 3 ORDER BY body BM25 OF ? LIMIT 10" ,
701
- "health" );
702
- executeQuery (Arrays .asList (4 , 18 , 14 ), "SELECT * FROM %s WHERE category = 'Health' AND tie = 1 " +
703
- "ORDER BY body BM25 OF ? LIMIT 10" ,
704
- "Health" );
705
- executeQuery (Arrays .asList (4 , 18 , 14 ), "SELECT * FROM %s WHERE score <= 3 AND tie = 1 AND category = 'Health' " +
706
- "ORDER BY body BM25 OF ? LIMIT 10" ,
707
- "health" );
708
- executeQuery (Arrays .asList (11 , 1 , 16 , 18 ), "SELECT * FROM %s WHERE bodyset CONTAINS 'climate' AND tie <= 1 ORDER BY body BM25 OF ? LIMIT 10" ,
719
+ executeQuery (Arrays .asList (11 , 19 , 1 , 16 , 6 , 12 , 18 ), "SELECT * FROM %s WHERE bodyset CONTAINS 'climate' ORDER BY body BM25 OF ? LIMIT 10" ,
709
720
"climate" );
710
- executeQuery (Arrays .asList (6 , 12 ), "SELECT * FROM %s WHERE bodyset CONTAINS 'health' AND tie > 1 ORDER BY body BM25 OF ? LIMIT 10" ,
721
+ executeQuery (Arrays .asList (16 , 6 , 12 , 18 ), "SELECT * FROM %s WHERE bodyset CONTAINS 'health' ORDER BY body BM25 OF ? LIMIT 10" ,
711
722
"climate" );
712
- executeQuery (Arrays .asList (11 , 1 , 16 , 18 ), "SELECT * FROM %s WHERE map_category CONTAINS 'Climate' AND tie <= 1 ORDER BY body BM25 OF ? LIMIT 10" ,
723
+ executeQuery (Arrays .asList (11 , 19 , 1 , 16 , 6 , 12 , 18 ), "SELECT * FROM %s WHERE map_category CONTAINS 'Climate' ORDER BY body BM25 OF ? LIMIT 10" ,
713
724
"climate" );
714
- executeQuery (Arrays .asList (19 , 6 , 12 ), "SELECT * FROM %s WHERE map_category CONTAINS 'Health' AND tie > 1 ORDER BY body BM25 OF ? LIMIT 10" ,
725
+ executeQuery (Arrays .asList (19 , 16 , 6 , 12 , 18 ), "SELECT * FROM %s WHERE map_category CONTAINS 'Health' ORDER BY body BM25 OF ? LIMIT 10" ,
715
726
"climate" );
716
- executeQuery (Arrays .asList (11 , 1 , 16 , 18 ), "SELECT * FROM %s WHERE map_body CONTAINS 'Climate' AND tie <= 1 ORDER BY body BM25 OF ? LIMIT 10" ,
727
+ executeQuery (Arrays .asList (11 , 19 , 1 , 16 , 6 , 12 , 18 ), "SELECT * FROM %s WHERE map_body CONTAINS 'Climate' ORDER BY body BM25 OF ? LIMIT 10" ,
717
728
"climate" );
718
- executeQuery (Arrays .asList (11 , 16 , 18 ), "SELECT * FROM %s WHERE map_body CONTAINS 'health' AND tie < 2 ORDER BY body BM25 OF ? LIMIT 10" ,
729
+ executeQuery (Arrays .asList (11 , 19 , 16 , 6 , 12 , 18 ), "SELECT * FROM %s WHERE map_body CONTAINS 'health' ORDER BY body BM25 OF ? LIMIT 10" ,
719
730
"climate" );
720
- executeQuery (Arrays .asList (19 , 6 , 12 ), "SELECT * FROM %s WHERE map_body CONTAINS KEY 'Health' AND tie >= 2 ORDER BY body BM25 OF ? LIMIT 10" ,
731
+ executeQuery (Arrays .asList (11 , 19 , 16 , 6 , 12 , 18 ), "SELECT * FROM %s WHERE map_body CONTAINS KEY 'Health' ORDER BY body BM25 OF ? LIMIT 10" ,
721
732
"climate" );
733
+
734
+ // ID 4: total words = 15, health occurrences = 3
735
+ // ID 12: total words = 12, health occurrences = 2
736
+ // ID 6: total words = 13, health occurrences = 2
737
+ // ID 9: total words = 13, health occurrences = 2
738
+ // ID 18: total words = 14, health occurrences = 2
739
+ // ID 14: total words = 11, health occurrences = 1
740
+ // ID 16: total words = 11, health occurrences = 1
741
+ executeQuery (Arrays .asList (6 , 16 ), "SELECT * FROM %s WHERE score > 3 ORDER BY body BM25 OF ? LIMIT 10" ,
742
+ "health" );
743
+ executeQuery (Arrays .asList (4 , 12 , 9 , 18 , 14 ), "SELECT * FROM %s WHERE category = 'Health' " +
744
+ "ORDER BY body BM25 OF ? LIMIT 10" ,
745
+ "Health" );
746
+ executeQuery (Arrays .asList (4 , 12 , 9 , 18 , 14 ), "SELECT * FROM %s WHERE score <= 3 AND category = 'Health' " +
747
+ "ORDER BY body BM25 OF ? LIMIT 10" ,
748
+ "health" );
722
749
});
723
750
}
724
751
@@ -741,10 +768,29 @@ public void testCollections() throws Throwable
741
768
{ 15 , "Education" , 2 , "Education reforms are underway. Education experts suggest holistic changes." , 1 },
742
769
{ 16 , "Climate" , 4 , "Climate affects the economy and health. Climate events cost billions annually." , 1 },
743
770
{ 17 , "Technology" , 3 , "Technology is the backbone of the modern economy. Without technology, economic growth stagnates." , 2 },
744
- { 18 , "Health" , 2 , "Health is discussed less than economy or climate, but health matters deeply." , 1 },
771
+ { 18 , "Health" , 2 , "Health is discussed less than economy or climate or technology , but health matters deeply." , 1 },
745
772
{ 19 , "Climate" , 5 , "Climate change, climate policies, climate research—climate is the buzzword of our time." , 2 },
746
773
{ 20 , "Mixed" , 3 , "Investments in education and technology will shape the future of the global economy." , 1 }
747
774
};
775
+
776
+ private void analyzeDataset (String term )
777
+ {
778
+ final Pattern PATTERN = Pattern .compile ("\\ W+" );
779
+ for (Object [] row : DATASET )
780
+ {
781
+ String body = (String ) row [3 ];
782
+ String [] words = PATTERN .split (body .toLowerCase ());
783
+
784
+ long totalWords = words .length ;
785
+ long termCount = Arrays .stream (words )
786
+ .filter (word -> word .equals (term ))
787
+ .count ();
788
+
789
+ if (termCount > 0 )
790
+ System .out .printf (" // ID %d: total words = %d, %s occurrences = %d%n" ,
791
+ (Integer ) row [0 ], totalWords , term , termCount );
792
+ }
793
+ }
748
794
749
795
private void insertPrimitiveData ()
750
796
{
@@ -781,13 +827,12 @@ private void insertCollectionData()
781
827
}
782
828
783
829
execute (
784
- "INSERT INTO %s (id, category, score, body, tie, bodyset, map_category, map_body) " +
785
- "VALUES (?, ?, ?, ?, ?, ?, ?, ? )" ,
830
+ "INSERT INTO %s (id, category, score, body, bodyset, map_category, map_body) " +
831
+ "VALUES (?, ?, ?, ?, ?, ?, ?)" ,
786
832
DATASET [row ][0 ],
787
833
DATASET [row ][1 ],
788
834
DATASET [row ][2 ],
789
835
DATASET [row ][3 ],
790
- DATASET [row ][4 ],
791
836
set ,
792
837
map ,
793
838
map_text
0 commit comments