1
1
/*
2
- * Licensed to the Apache Software Foundation (ASF) under one
3
- * or more contributor license agreements. See the NOTICE file
4
- * distributed with this work for additional information
5
- * regarding copyright ownership. The ASF licenses this file
6
- * to you under the Apache License, Version 2.0 (the
7
- * "License"); you may not use this file except in compliance
8
- * with the License. You may obtain a copy of the License at
2
+ * Copyright DataStax, Inc.
9
3
*
10
- * http://www.apache.org/licenses/LICENSE-2.0
4
+ * Licensed under the Apache License, Version 2.0 (the "License");
5
+ * you may not use this file except in compliance with the License.
6
+ * You may obtain a copy of the License at
7
+ *
8
+ * http://www.apache.org/licenses/LICENSE-2.0
11
9
*
12
10
* Unless required by applicable law or agreed to in writing, software
13
11
* distributed under the License is distributed on an "AS IS" BASIS,
19
17
package org .apache .cassandra .index .sai .cql ;
20
18
21
19
import java .util .ArrayList ;
20
+ import java .util .Arrays ;
21
+ import java .util .HashMap ;
22
+ import java .util .HashSet ;
23
+ import java .util .List ;
22
24
import java .util .concurrent .ExecutorService ;
23
25
import java .util .concurrent .Executors ;
24
26
import java .util .concurrent .Future ;
27
+ import java .util .stream .Collectors ;
25
28
29
+ import org .assertj .core .api .Assertions ;
26
30
import org .junit .Before ;
27
31
import org .junit .Test ;
28
32
@@ -124,15 +128,15 @@ public void testTwoIndexesAmbiguousPredicate() throws Throwable
124
128
// be rejected
125
129
beforeAndAfterFlush (() -> {
126
130
// Single predicate
127
- assertInvalidMessage (String .format (EQ_AMBIGUOUS_ERROR , "v" , getIndex (0 ), getIndex (1 )),
131
+ assertInvalidMessage (String .format (EQ_AMBIGUOUS_ERROR , 'v' , getIndex (0 ), getIndex (1 )),
128
132
"SELECT k FROM %s WHERE v = 'apple'" );
129
133
130
134
// AND
131
- assertInvalidMessage (String .format (EQ_AMBIGUOUS_ERROR , "v" , getIndex (0 ), getIndex (1 )),
135
+ assertInvalidMessage (String .format (EQ_AMBIGUOUS_ERROR , 'v' , getIndex (0 ), getIndex (1 )),
132
136
"SELECT k FROM %s WHERE v = 'apple' AND v : 'juice'" );
133
137
134
138
// OR
135
- assertInvalidMessage (String .format (EQ_AMBIGUOUS_ERROR , "v" , getIndex (0 ), getIndex (1 )),
139
+ assertInvalidMessage (String .format (EQ_AMBIGUOUS_ERROR , 'v' , getIndex (0 ), getIndex (1 )),
136
140
"SELECT k FROM %s WHERE v = 'apple' OR v : 'juice'" );
137
141
});
138
142
}
@@ -178,14 +182,7 @@ public void testComplexQueriesWithMultipleIndexes() throws Throwable
178
182
179
183
// Create mix of analyzed, unanalyzed, and non-text indexes
180
184
createIndex ("CREATE CUSTOM INDEX ON %s(v1) USING 'org.apache.cassandra.index.sai.StorageAttachedIndex'" );
181
- createIndex ("CREATE CUSTOM INDEX ON %s(v2) " +
182
- "USING 'org.apache.cassandra.index.sai.StorageAttachedIndex' " +
183
- "WITH OPTIONS = {" +
184
- "'index_analyzer': '{" +
185
- "\" tokenizer\" : {\" name\" : \" standard\" }, " +
186
- "\" filters\" : [{\" name\" : \" porterstem\" }]" +
187
- "}'" +
188
- "}" );
185
+ createAnalyzedIndex ("v2" );
189
186
createIndex ("CREATE CUSTOM INDEX ON %s(v3) USING 'org.apache.cassandra.index.sai.StorageAttachedIndex'" );
190
187
191
188
execute ("INSERT INTO %s (k, v1, v2, v3) VALUES (1, 'apple', 'orange juice', 5)" );
@@ -263,10 +260,8 @@ public void testEmptyQuery() throws Throwable
263
260
execute ("INSERT INTO %s (k, v) VALUES (1, 'apple')" );
264
261
265
262
beforeAndAfterFlush (() ->
266
- {
267
- assertInvalidMessage ("BM25 query must contain at least one term (perhaps your analyzer is discarding tokens you didn't expect)" ,
268
- "SELECT k FROM %s ORDER BY v BM25 OF '+' LIMIT 1" );
269
- });
263
+ assertInvalidMessage ("BM25 query must contain at least one term (perhaps your analyzer is discarding tokens you didn't expect)" ,
264
+ "SELECT k FROM %s ORDER BY v BM25 OF '+' LIMIT 1" ));
270
265
}
271
266
272
267
@ Test
@@ -420,14 +415,20 @@ private String createAnalyzedIndex()
420
415
}
421
416
422
417
private String createAnalyzedIndex (String column )
418
+ {
419
+ return createAnalyzedIndex (column , false );
420
+ }
421
+
422
+ private String createAnalyzedIndex (String column , boolean lowercase )
423
423
{
424
424
return createIndex ("CREATE CUSTOM INDEX ON %s(" + column + ") " +
425
425
"USING 'org.apache.cassandra.index.sai.StorageAttachedIndex' " +
426
426
"WITH OPTIONS = {" +
427
427
"'index_analyzer': '{" +
428
428
"\" tokenizer\" : {\" name\" : \" standard\" }, " +
429
- "\" filters\" : [{\" name\" : \" porterstem\" }]" +
430
- "}'}"
429
+ "\" filters\" : [{\" name\" : \" porterstem\" }" +
430
+ (lowercase ? ", {\" name\" : \" lowercase\" }]" : "]" )
431
+ + "}'}"
431
432
);
432
433
}
433
434
@@ -640,7 +641,7 @@ public void testBM25andFilterz() throws Throwable
640
641
createTable ("CREATE TABLE %s (id int PRIMARY KEY, category text, score int, title text, body text)" );
641
642
createAnalyzedIndex ("body" );
642
643
createIndex ("CREATE CUSTOM INDEX ON %s (score) USING 'StorageAttachedIndex'" );
643
- insertArticle ();
644
+ insertPrimitiveData ();
644
645
beforeAndAfterFlush (
645
646
() -> {
646
647
// 10 docs have score 3 and 3 of those have "health"
@@ -655,41 +656,158 @@ public void testBM25andFilterz() throws Throwable
655
656
});
656
657
}
657
658
658
- private void insertArticle ()
659
- {
660
- Object [][] dataset = {
661
- { 1 , "Climate" , 5 , "Climate change is a pressing issue. Climate patterns are shifting globally. Scientists study climate data daily." },
662
- { 2 , "Technology" , 3 , "Technology is advancing. New technology in AI and robotics is groundbreaking." },
663
- { 3 , "Economy" , 4 , "The economy is recovering. Economy experts are optimistic. However, the global economy still faces risks." },
664
- { 4 , "Health" , 3 , "Health is wealth. Health policies need to be improved to ensure better public health outcomes." },
665
- { 5 , "Education" , 2 , "Education is the foundation of success. Online education is booming." },
666
- { 6 , "Climate" , 4 , "Climate and health are closely linked. Climate affects air quality and health outcomes." },
667
- { 7 , "Education" , 3 , "Technology and education go hand in hand. EdTech is revolutionizing education through technology." },
668
- { 8 , "Economy" , 3 , "The global economy is influenced by technology. Fintech is a key part of the economy today." },
669
- { 9 , "Health" , 3 , "Education and health programs must be prioritized. Health education is vital in schools." },
670
- { 10 , "Mixed" , 3 , "Technology, economy, and education are pillars of development." },
671
- { 11 , "Climate" , 5 , "Climate climate climate. It's everywhere. Climate drives political and economic decisions." },
672
- { 12 , "Health" , 2 , "Health concerns rise with climate issues. Health organizations are sounding the alarm." },
673
- { 13 , "Economy" , 3 , "The economy is fluctuating. Uncertainty looms over the economy." },
674
- { 14 , "Health" , 3 , "Cutting-edge technology is transforming healthcare. Healthtech merges health and technology." },
675
- { 15 , "Education" , 2 , "Education reforms are underway. Education experts suggest holistic changes." },
676
- { 16 , "Climate" , 4 , "Climate affects the economy and health. Climate events cost billions annually." },
677
- { 17 , "Technology" , 3 , "Technology is the backbone of the modern economy. Without technology, economic growth stagnates." },
678
- { 18 , "Health" , 2 , "Health is discussed less than economy or climate, but health matters deeply." },
679
- { 19 , "Climate" , 5 , "Climate change, climate policies, climate research—climate is the buzzword of our time." },
680
- { 20 , "Mixed" , 3 , "Investments in education and technology will shape the future of the global economy." }
681
- };
682
-
683
- for (Object [] article : dataset )
659
+ @ Test
660
+ public void testErrorMessages ()
661
+ {
662
+ createTable ("CREATE TABLE %s (id int PRIMARY KEY, category text, score int, " +
663
+ "title text, body text, bodyset set<text>, " +
664
+ "map_category map<int, text>, map_body map<text, text>)" );
665
+ createAnalyzedIndex ("body" , true );
666
+ createAnalyzedIndex ("bodyset" , true );
667
+ createAnalyzedIndex ("map_body" , true );
668
+
669
+ // Improve message issue CNDB-13514
670
+ assertInvalidMessage ("BM25 ordering on column bodyset requires an analyzed index" ,
671
+ "SELECT * FROM %s ORDER BY bodyset BM25 OF ? LIMIT 10" );
672
+
673
+ // Discussion of message incosistency CNDB-13526
674
+ assertInvalidMessage ("Ordering on non-clustering column requires each restricted column to be indexed except for fully-specified partition keys" ,
675
+ "SELECT * FROM %s WHERE map_body CONTAINS KEY 'Climate' ORDER BY body BM25 OF ? LIMIT 10" );
676
+ }
677
+
678
+ @ Test
679
+ public void testCollections () throws Throwable
680
+ {
681
+ createTable ("CREATE TABLE %s (id int PRIMARY KEY, category text, score int, tie int," +
682
+ "title text, body text, bodyset set<text>, " +
683
+ "map_category map<int, text>, map_body map<text, text>)" );
684
+ createAnalyzedIndex ("body" , true );
685
+ createAnalyzedIndex ("bodyset" , true );
686
+ createAnalyzedIndex ("map_body" , true );
687
+ createIndex ("CREATE CUSTOM INDEX ON %s (score) USING 'StorageAttachedIndex'" );
688
+ createIndex ("CREATE CUSTOM INDEX ON %s (category) USING 'StorageAttachedIndex'" );
689
+ createIndex ("CREATE CUSTOM INDEX ON %s (tie) USING 'StorageAttachedIndex'" );
690
+ createIndex ("CREATE CUSTOM INDEX ON %s (map_category) USING 'StorageAttachedIndex'" );
691
+ createIndex ("CREATE CUSTOM INDEX ON %s (KEYS(map_body)) USING 'StorageAttachedIndex'" );
692
+ insertCollectionData ();
693
+
694
+ beforeAndAfterFlush (
695
+ () -> {
696
+ executeQuery (Arrays .asList (11 , 1 , 16 , 18 ), "SELECT * FROM %s WHERE tie = 1 ORDER BY body BM25 OF ? LIMIT 10" ,
697
+ "climate" );
698
+ executeQuery (Arrays .asList (11 , 1 ), "SELECT * FROM %s WHERE score = 5 AND tie = 1 ORDER BY body BM25 OF ? LIMIT 10" ,
699
+ "climate" );
700
+ executeQuery (Arrays .asList (6 , 16 ), "SELECT * FROM %s WHERE score > 3 ORDER BY body BM25 OF ? LIMIT 10" ,
701
+ "health" );
702
+ executeQuery (Arrays .asList (4 , 18 , 14 ), "SELECT * FROM %s WHERE category = 'Health' AND tie = 1 " +
703
+ "ORDER BY body BM25 OF ? LIMIT 10" ,
704
+ "Health" );
705
+ executeQuery (Arrays .asList (4 , 18 , 14 ), "SELECT * FROM %s WHERE score <= 3 AND tie = 1 AND category = 'Health' " +
706
+ "ORDER BY body BM25 OF ? LIMIT 10" ,
707
+ "health" );
708
+ executeQuery (Arrays .asList (11 , 1 , 16 , 18 ), "SELECT * FROM %s WHERE bodyset CONTAINS 'climate' AND tie <= 1 ORDER BY body BM25 OF ? LIMIT 10" ,
709
+ "climate" );
710
+ executeQuery (Arrays .asList (6 , 12 ), "SELECT * FROM %s WHERE bodyset CONTAINS 'health' AND tie > 1 ORDER BY body BM25 OF ? LIMIT 10" ,
711
+ "climate" );
712
+ executeQuery (Arrays .asList (11 , 1 , 16 , 18 ), "SELECT * FROM %s WHERE map_category CONTAINS 'Climate' AND tie <= 1 ORDER BY body BM25 OF ? LIMIT 10" ,
713
+ "climate" );
714
+ executeQuery (Arrays .asList (19 , 6 , 12 ), "SELECT * FROM %s WHERE map_category CONTAINS 'Health' AND tie > 1 ORDER BY body BM25 OF ? LIMIT 10" ,
715
+ "climate" );
716
+ executeQuery (Arrays .asList (11 , 1 , 16 , 18 ), "SELECT * FROM %s WHERE map_body CONTAINS 'Climate' AND tie <= 1 ORDER BY body BM25 OF ? LIMIT 10" ,
717
+ "climate" );
718
+ executeQuery (Arrays .asList (11 , 16 , 18 ), "SELECT * FROM %s WHERE map_body CONTAINS 'health' AND tie < 2 ORDER BY body BM25 OF ? LIMIT 10" ,
719
+ "climate" );
720
+ executeQuery (Arrays .asList (19 , 6 , 12 ), "SELECT * FROM %s WHERE map_body CONTAINS KEY 'Health' AND tie >= 2 ORDER BY body BM25 OF ? LIMIT 10" ,
721
+ "climate" );
722
+ });
723
+ }
724
+
725
+ private final static Object [][] DATASET =
726
+ {
727
+ { 1 , "Climate" , 5 , "Climate change is a pressing issue. Climate patterns are shifting globally. Scientists study climate data daily." , 1 },
728
+ { 2 , "Technology" , 3 , "Technology is advancing. New technology in AI and robotics is groundbreaking." , 1 },
729
+ { 3 , "Economy" , 4 , "The economy is recovering. Economy experts are optimistic. However, the global economy still faces risks." , 1 },
730
+ { 4 , "Health" , 3 , "Health is wealth. Health policies need to be improved to ensure better public health outcomes." , 1 },
731
+ { 5 , "Education" , 2 , "Education is the foundation of success. Online education is booming." , 4 },
732
+ { 6 , "Climate" , 4 , "Climate and health are closely linked. Climate affects air quality and health outcomes." , 2 },
733
+ { 7 , "Education" , 3 , "Technology and education go hand in hand. EdTech is revolutionizing education through technology." , 3 },
734
+ { 8 , "Economy" , 3 , "The global economy is influenced by technology. Fintech is a key part of the economy today." , 2 },
735
+ { 9 , "Health" , 3 , "Education and health programs must be prioritized. Health education is vital in schools." , 2 },
736
+ { 10 , "Mixed" , 3 , "Technology, economy, and education are pillars of development." , 2 },
737
+ { 11 , "Climate" , 5 , "Climate climate climate. It's everywhere. Climate drives political and economic decisions." , 1 },
738
+ { 12 , "Health" , 2 , "Health concerns rise with climate issues. Health organizations are sounding the alarm." , 2 },
739
+ { 13 , "Economy" , 3 , "The economy is fluctuating. Uncertainty looms over the economy." , 1 },
740
+ { 14 , "Health" , 3 , "Cutting-edge technology is transforming healthcare. Healthtech merges health and technology." , 1 },
741
+ { 15 , "Education" , 2 , "Education reforms are underway. Education experts suggest holistic changes." , 1 },
742
+ { 16 , "Climate" , 4 , "Climate affects the economy and health. Climate events cost billions annually." , 1 },
743
+ { 17 , "Technology" , 3 , "Technology is the backbone of the modern economy. Without technology, economic growth stagnates." , 2 },
744
+ { 18 , "Health" , 2 , "Health is discussed less than economy or climate, but health matters deeply." , 1 },
745
+ { 19 , "Climate" , 5 , "Climate change, climate policies, climate research—climate is the buzzword of our time." , 2 },
746
+ { 20 , "Mixed" , 3 , "Investments in education and technology will shape the future of the global economy." , 1 }
747
+ };
748
+
749
+ private void insertPrimitiveData ()
750
+ {
751
+ for (Object [] row : DATASET )
684
752
{
685
753
execute (
686
754
"INSERT INTO %s (id, category, score, body) VALUES (?, ?, ?, ?)" ,
687
- article [0 ],
688
- article [1 ],
689
- article [2 ],
690
- article [3 ]
755
+ row [0 ],
756
+ row [1 ],
757
+ row [2 ],
758
+ row [3 ]
759
+ );
760
+ }
761
+ }
762
+
763
+ private void insertCollectionData ()
764
+ {
765
+ int setsize = 1 ;
766
+ for (int row = 0 ; row < DATASET .length ; row ++)
767
+ {
768
+ var set = new HashSet <String >();
769
+ for (int j = 0 ; j < setsize ; j ++)
770
+ set .add ((String ) DATASET [row - j ][3 ]);
771
+ if (setsize >= 3 )
772
+ setsize -= 2 ;
773
+ else
774
+ setsize ++;
775
+ var map = new HashMap <Integer , String >();
776
+ var map_text = new HashMap <String , String >();
777
+ for (int j = 0 ; j <= row && j < 3 ; j ++)
778
+ {
779
+ map .putIfAbsent ((Integer ) DATASET [row - j ][2 ], (String ) DATASET [row - j ][1 ]);
780
+ map_text .putIfAbsent ((String ) DATASET [row - j ][1 ], (String ) DATASET [row - j ][3 ]);
781
+ }
782
+
783
+ execute (
784
+ "INSERT INTO %s (id, category, score, body, tie, bodyset, map_category, map_body) " +
785
+ "VALUES (?, ?, ?, ?, ?, ?, ?, ?)" ,
786
+ DATASET [row ][0 ],
787
+ DATASET [row ][1 ],
788
+ DATASET [row ][2 ],
789
+ DATASET [row ][3 ],
790
+ DATASET [row ][4 ],
791
+ set ,
792
+ map ,
793
+ map_text
691
794
);
692
795
}
693
796
}
694
797
798
+ private void executeQuery (List <Integer > expected , String query , Object ... values ) throws Throwable
799
+ {
800
+ assertResult (execute (query , values ), expected );
801
+ prepare (query );
802
+ assertResult (execute (query , values ), expected );
803
+ }
804
+
805
+ private void assertResult (UntypedResultSet result , List <Integer > expected )
806
+ {
807
+ Assertions .assertThat (result ).hasSize (expected .size ());
808
+ var ids = result .stream ()
809
+ .map (row -> row .getInt ("id" ))
810
+ .collect (Collectors .toList ());
811
+ Assertions .assertThat (ids ).isEqualTo (expected );
812
+ }
695
813
}
0 commit comments