Skip to content

Commit f0ed83e

Browse files
authored
CNDB-13074: Reject analysis options on frozen collections (#1610)
Reject creating indexes with analysis options on frozen collections. We don't have a way to correctly index them, and we don't support querying either.
1 parent 88675ba commit f0ed83e

File tree

6 files changed

+104
-7
lines changed

6 files changed

+104
-7
lines changed

src/java/org/apache/cassandra/index/sai/IndexContext.java

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -178,14 +178,16 @@ public IndexContext(@Nonnull String keyspace,
178178
this.cfs = cfs;
179179
this.primaryKeyFactory = Version.latest().onDiskFormat().newPrimaryKeyFactory(clusteringComparator);
180180

181+
String columnName = column.name.toString();
182+
181183
if (config != null)
182184
{
183185
String fullIndexName = String.format("%s.%s.%s", this.keyspace, this.table, this.config.name);
184186
this.indexWriterConfig = IndexWriterConfig.fromOptions(fullIndexName, validator, config.options);
185187
this.isAnalyzed = AbstractAnalyzer.isAnalyzed(config.options);
186-
this.analyzerFactory = AbstractAnalyzer.fromOptions(getValidator(), config.options);
188+
this.analyzerFactory = AbstractAnalyzer.fromOptions(columnName, validator, config.options);
187189
this.queryAnalyzerFactory = AbstractAnalyzer.hasQueryAnalyzer(config.options)
188-
? AbstractAnalyzer.fromOptionsQueryAnalyzer(getValidator(), config.options)
190+
? AbstractAnalyzer.fromOptionsQueryAnalyzer(validator, config.options)
189191
: this.analyzerFactory;
190192
this.vectorSimilarityFunction = indexWriterConfig.getSimilarityFunction();
191193
this.hasEuclideanSimilarityFunc = vectorSimilarityFunction == VectorSimilarityFunction.EUCLIDEAN;
@@ -199,7 +201,7 @@ public IndexContext(@Nonnull String keyspace,
199201
{
200202
this.indexWriterConfig = IndexWriterConfig.emptyConfig();
201203
this.isAnalyzed = AbstractAnalyzer.isAnalyzed(Collections.EMPTY_MAP);
202-
this.analyzerFactory = AbstractAnalyzer.fromOptions(getValidator(), Collections.EMPTY_MAP);
204+
this.analyzerFactory = AbstractAnalyzer.fromOptions(columnName, validator, Collections.EMPTY_MAP);
203205
this.queryAnalyzerFactory = this.analyzerFactory;
204206
this.vectorSimilarityFunction = null;
205207
this.hasEuclideanSimilarityFunc = false;

src/java/org/apache/cassandra/index/sai/StorageAttachedIndex.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,7 @@ public static Map<String, String> validateOptions(Map<String, String> options, T
330330
AbstractType<?> type = TypeUtil.cellValueType(target.left, target.right);
331331

332332
// Validate analyzers by building them
333-
try (AbstractAnalyzer.AnalyzerFactory analyzerFactory = AbstractAnalyzer.fromOptions(type, options))
333+
try (AbstractAnalyzer.AnalyzerFactory analyzerFactory = AbstractAnalyzer.fromOptions(targetColumn, type, options))
334334
{
335335
if (AbstractAnalyzer.hasQueryAnalyzer(options))
336336
AbstractAnalyzer.fromOptionsQueryAnalyzer(type, options).close();

src/java/org/apache/cassandra/index/sai/analyzer/AbstractAnalyzer.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ public static boolean isAnalyzed(Map<String, String> options) {
187187
return options.containsKey(LuceneAnalyzer.INDEX_ANALYZER) || NonTokenizingOptions.hasNonDefaultOptions(options);
188188
}
189189

190-
public static AnalyzerFactory fromOptions(AbstractType<?> type, Map<String, String> options)
190+
public static AnalyzerFactory fromOptions(String target, AbstractType<?> type, Map<String, String> options)
191191
{
192192
boolean containsIndexAnalyzer = options.containsKey(LuceneAnalyzer.INDEX_ANALYZER);
193193
boolean containsNonTokenizingOptions = NonTokenizingOptions.hasNonDefaultOptions(options);
@@ -206,6 +206,9 @@ public static AnalyzerFactory fromOptions(AbstractType<?> type, Map<String, Stri
206206
" combination of case_sensitive, normalize, or ascii options. options=" + options);
207207
}
208208

209+
if ((containsIndexAnalyzer || containsNonTokenizingOptions) && type.isCollection() && !type.isMultiCell())
210+
throw new InvalidRequestException("Cannot use an analyzer on " + target + " because it's a frozen collection.");
211+
209212
if (containsIndexAnalyzer)
210213
{
211214
String json = options.get(LuceneAnalyzer.INDEX_ANALYZER);

test/unit/org/apache/cassandra/cql3/CQLTester.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ public abstract class CQLTester
204204
private static final User SUPER_USER = new User("cassandra", "cassandra");
205205

206206
/**
207-
* Whether to use coorfinator execution in {@link #execute(String, Object...)}, so queries get full validation and
207+
* Whether to use coordinator execution in {@link #execute(String, Object...)}, so queries get full validation and
208208
* go through reconciliation. When enabled, calls to {@link #execute(String, Object...)} will behave as calls to
209209
* {@link #executeWithCoordinator(String, Object...)}. Otherwise, they will behave as calls to
210210
* {@link #executeInternal(String, Object...)}.

test/unit/org/apache/cassandra/index/sai/SAITester.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,23 @@ public static void setUpClass()
230230
CQLTester.enableCoordinatorExecution();
231231
}
232232

233+
/**
234+
* Creates a SAI index on the current table, waiting for it to become queryable.
235+
*
236+
* @param column the name of the indexed column, maybe with {@code FULL()}, {@code KEYS()} or {@code VALUES()} spec
237+
* @param options the index options, of the form {@code "{'option1': value1, 'option2': value2, ...}"}.
238+
* @return the name of the created index
239+
*/
240+
public String createSAIIndex(String column, @Nullable String options)
241+
{
242+
String query = String.format(CREATE_INDEX_TEMPLATE, column);
243+
244+
if (options != null)
245+
query += " WITH OPTIONS = " + options;
246+
247+
return createIndex(query);
248+
}
249+
233250
public static IndexContext createIndexContext(String name, AbstractType<?> validator, ColumnFamilyStore cfs)
234251
{
235252
return new IndexContext(cfs.getKeyspaceName(),

test/unit/org/apache/cassandra/index/sai/cql/AnalyzerTest.java

Lines changed: 76 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,16 @@
1818

1919
package org.apache.cassandra.index.sai.cql;
2020

21+
import org.apache.cassandra.exceptions.InvalidRequestException;
22+
import org.apache.cassandra.index.sai.SAITester;
23+
import org.apache.cassandra.index.sai.analyzer.NonTokenizingOptions;
24+
import org.assertj.core.api.Assertions;
2125
import org.junit.Test;
2226

23-
public class AnalyzerTest extends VectorTester
27+
import javax.annotation.Nullable;
28+
import java.util.Arrays;
29+
30+
public class AnalyzerTest extends SAITester
2431
{
2532
@Test
2633
public void createAnalyzerWrongTypeTest()
@@ -49,4 +56,72 @@ public void createAnalyzerWrongTypeTest()
4956
execute("INSERT INTO %s (pk1, pk2, val) VALUES (0, 'c', -2)");
5057
execute("INSERT INTO %s (pk1, pk2, val) VALUES (1, 'c', -3)");
5158
}
59+
60+
/**
61+
* Test that we cannot use an analyzer, tokenizing or not, on a frozen collection.
62+
*/
63+
@Test
64+
public void analyzerOnFrozenCollectionTest()
65+
{
66+
createTable("CREATE TABLE %s (k int PRIMARY KEY, l frozen<list<text>>, s frozen<set<text>>, m frozen<map<text, text>>)");
67+
68+
for (String column : Arrays.asList("l", "s", "m"))
69+
{
70+
assertRejectsNonFullIndexCreationOnFrozenCollection(column);
71+
column = String.format("full(%s)", column);
72+
73+
// non-tokenizing options that produce an analyzer should be rejected
74+
assertRejectsAnalyzerOnFrozenCollection(column, String.format("{'%s': %s}", NonTokenizingOptions.CASE_SENSITIVE, false));
75+
assertRejectsAnalyzerOnFrozenCollection(column, String.format("{'%s': %s}", NonTokenizingOptions.NORMALIZE, true));
76+
assertRejectsAnalyzerOnFrozenCollection(column, String.format("{'%s': %s}", NonTokenizingOptions.ASCII, true));
77+
78+
// non-tokenizing options that do not produce an analyzer should be accepted
79+
assertAcceptsIndexOptions(column, String.format("{'%s': %s}", NonTokenizingOptions.CASE_SENSITIVE, true));
80+
assertAcceptsIndexOptions(column, String.format("{'%s': %s}", NonTokenizingOptions.NORMALIZE, false));
81+
assertAcceptsIndexOptions(column, String.format("{'%s': %s}", NonTokenizingOptions.ASCII, false));
82+
83+
// Lucene analyzer should always be rejected
84+
assertRejectsAnalyzerOnFrozenCollection(column, "{'index_analyzer': 'standard'}");
85+
assertRejectsAnalyzerOnFrozenCollection(column,
86+
"{'index_analyzer': " +
87+
" '{\"tokenizer\":{\"name\":\"ngram\", \"args\":{\"minGramSize\":\"2\", \"maxGramSize\":\"3\"}}," +
88+
" \"filters\":[{\"name\":\"lowercase\"}]}'}");
89+
assertRejectsAnalyzerOnFrozenCollection(column,
90+
"{'index_analyzer':'\n" +
91+
" {\"tokenizer\":{\"name\" : \"whitespace\"},\n" +
92+
" \"filters\":[{\"name\":\"stop\", \"args\": {\"words\": \"the, test\", \"format\": \"wordset\"}}]}'}");
93+
94+
// no options should be accepted
95+
assertAcceptsIndexOptions(column, null);
96+
assertAcceptsIndexOptions(column, "{}");
97+
}
98+
}
99+
100+
private void assertRejectsNonFullIndexCreationOnFrozenCollection(String column)
101+
{
102+
Assertions.assertThatThrownBy(() -> createSAIIndex(column, null))
103+
.isInstanceOf(InvalidRequestException.class)
104+
.hasMessageContaining("Cannot create values() index on frozen column " + column);
105+
106+
Assertions.assertThatThrownBy(() -> createSAIIndex("KEYS(" + column + ')', null))
107+
.isInstanceOf(InvalidRequestException.class)
108+
.hasMessageContaining("Cannot create keys() index on frozen column " + column);
109+
110+
Assertions.assertThatThrownBy(() -> createSAIIndex("VALUES(" + column + ')', null))
111+
.isInstanceOf(InvalidRequestException.class)
112+
.hasMessageContaining("Cannot create values() index on frozen column " + column);
113+
}
114+
115+
private void assertAcceptsIndexOptions(String column, @Nullable String options)
116+
{
117+
String index = createSAIIndex(column, options);
118+
dropIndex("DROP INDEX %s." + index); // clear for further tests
119+
}
120+
121+
private void assertRejectsAnalyzerOnFrozenCollection(String column, String options)
122+
{
123+
Assertions.assertThatThrownBy(() -> createSAIIndex(column, options))
124+
.isInstanceOf(InvalidRequestException.class)
125+
.hasMessageContaining("Cannot use an analyzer on " + column + " because it's a frozen collection.");
126+
}
52127
}

0 commit comments

Comments
 (0)