Skip to content

Commit 133ac77

Browse files
CNDB-14210: Fix analyzed sai index on compound partition key column (#1763)
- **CNDB-14210: Fix analyzed sai index on compound partition key column** ### What is the issue Fixes: riptano/cndb#14210 ### What does this PR fix and why was it fixed Fixes some queries that were broken by the march and may release. In #1434, we introduced some logic to help make the eq behavior better, and it incorrectly handled compound partition keys. This fixes that. The central fix is to use the `EQRestriction` any time we have a primary key column. This is necessary to ensure we can write and read data. The tests cover the relevant cases. I also fixed the error message returned when attempting to use `:` on a clustering column index. Please review the text of the error message.
1 parent 84e6256 commit 133ac77

File tree

4 files changed

+116
-4
lines changed

4 files changed

+116
-4
lines changed

src/java/org/apache/cassandra/cql3/SingleColumnRelation.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,10 @@ protected Restriction newEQRestriction(TableMetadata table, VariableSpecificatio
197197
Term term = toTerm(toReceivers(columnDef), value, table.keyspace, boundNames);
198198
// Leave the restriction as EQ if no analyzed index in backwards compatibility mode is present
199199
var ebi = IndexRegistry.obtain(table).getEqBehavior(columnDef);
200-
if (ebi.behavior == IndexRegistry.EqBehavior.EQ)
200+
// The primary key always has ambiguous EQ behavior and we have to defer to later logic to decide
201+
// whether the EQ is analayzed or not. This is a legacy behavior that "does the right thing" when
202+
// there is a fully restricted partition key or not.
203+
if (ebi.behavior == IndexRegistry.EqBehavior.EQ || columnDef.isPrimaryKeyColumn())
201204
return new SingleColumnRestriction.EQRestriction(columnDef, term);
202205

203206
// the index is configured to transform EQ into MATCH for backwards compatibility

src/java/org/apache/cassandra/cql3/restrictions/SingleColumnRestriction.java

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1336,17 +1336,24 @@ public boolean isBoundedAnn()
13361336
public static final class AnalyzerMatchesRestriction extends SingleColumnRestriction
13371337
{
13381338
public static final String CANNOT_BE_MERGED_ERROR = "%s cannot be restricted by other operators if it includes analyzer match (:)";
1339+
public static final String CANNOT_BE_RESTRICTED_BY_CLUSTERING_ERROR =
1340+
"Cannot restrict column '%s' by analyzer match (:) because it is a clustering column. Equals (=) can be used " +
1341+
"instead of match, but it will produce incomplete results due to clustering column post filtering.";
1342+
13391343
private final List<Term> values;
13401344

13411345
public AnalyzerMatchesRestriction(ColumnMetadata columnDef, Term value)
13421346
{
1343-
super(columnDef);
1344-
this.values = Collections.singletonList(value);
1347+
this(columnDef, Collections.singletonList(value));
13451348
}
13461349

13471350
public AnalyzerMatchesRestriction(ColumnMetadata columnDef, List<Term> values)
13481351
{
13491352
super(columnDef);
1353+
// If we don't fail here, we would alternatively fail with the call to this::appendTo, which produces
1354+
// an unhelpful error message.
1355+
if (columnDef.isClusteringColumn())
1356+
throw invalidRequest(CANNOT_BE_RESTRICTED_BY_CLUSTERING_ERROR, columnDef.name);
13501357
this.values = values;
13511358
}
13521359

test/unit/org/apache/cassandra/index/sai/analyzer/AnalyzerEqOperatorSupportTest.java

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@
2121

2222
import org.apache.cassandra.cql3.CQLTester;
2323
import org.apache.cassandra.cql3.conditions.ColumnCondition;
24+
import org.apache.cassandra.cql3.restrictions.SingleColumnRestriction;
2425
import org.apache.cassandra.cql3.restrictions.StatementRestrictions;
26+
import org.apache.cassandra.exceptions.InvalidRequestException;
2527
import org.apache.cassandra.index.sai.SAITester;
2628
import org.apache.cassandra.service.ClientWarn;
2729
import org.assertj.core.api.Assertions;
@@ -30,6 +32,8 @@
3032
import static java.lang.String.format;
3133
import static org.apache.cassandra.index.sai.analyzer.AnalyzerEqOperatorSupport.EQ_RESTRICTION_ON_ANALYZED_WARNING;
3234
import static org.apache.cassandra.index.sai.analyzer.AnalyzerEqOperatorSupport.LWT_CONDITION_ON_ANALYZED_WARNING;
35+
import static org.assertj.core.api.Assertions.assertThatThrownBy;
36+
import static org.junit.Assert.assertEquals;
3337

3438
/**
3539
* Tests for {@link AnalyzerEqOperatorSupport}.
@@ -359,6 +363,103 @@ private void assertTokenizedIndexSupportsMixedEqualityAndMatches()
359363
assertRowsWithSelectWarning("SELECT k FROM %s WHERE v = 'Quick' OR f = 'Lazy' ALLOW FILTERING", row(1));
360364
}
361365

366+
@Test
367+
public void testCompoundPrimaryKeyRestrictionsWithAnalyzerEqOperatorSupportUnsupported() throws Throwable
368+
{
369+
// Note that we test clustering columns, but they don't technically work quite right. I include them
370+
// in this test to cover their behavior and prevent unintentional changes.
371+
createTable("CREATE TABLE %s (x text, y text, z text, a set<text>, PRIMARY KEY ((x, y), z))");
372+
373+
// Create case-insensitive index on all columns except y since it is essentially the same as x here
374+
createIndex(createCaseInsensitiveIndexString("x", AnalyzerEqOperatorSupport.Value.UNSUPPORTED));
375+
createIndex(createCaseInsensitiveIndexString("z", AnalyzerEqOperatorSupport.Value.UNSUPPORTED));
376+
createIndex(createCaseInsensitiveIndexString("values(a)", AnalyzerEqOperatorSupport.Value.UNSUPPORTED));
377+
378+
// Set up two unique rows that will map to the same index terms
379+
execute("INSERT INTO %s (x, y, z, a) VALUES (?, ?, ?, ?)", "a", "b", "c", set("d", "e", "f"));
380+
execute("INSERT INTO %s (x, y, z, a) VALUES (?, ?, ?, ?)", "A", "B", "C", set("D", "E", "F"));
381+
execute("INSERT INTO %s (x, y, z, a) VALUES (?, ?, ?, ?)", "z", "z", "z", set("z", "z", "z"));
382+
383+
beforeAndAfterFlush(
384+
() -> {
385+
// Fully qualified partition key and primary key
386+
assertRows(execute("SELECT x FROM %s WHERE x = 'a' AND y = 'b'"), row("a"));
387+
assertRows(execute("SELECT x FROM %s WHERE x = 'a' AND y = 'b' AND z = 'c'"), row("a"));
388+
assertRows(execute("SELECT x FROM %s WHERE x = 'A' AND y = 'B'"), row("A"));
389+
assertRows(execute("SELECT x FROM %s WHERE x = 'A' AND y = 'B' AND z = 'C'"), row("A"));
390+
assertRows(execute("SELECT * FROM %s WHERE x = 'A' AND y = 'b'"));
391+
392+
// Index based query for x and a
393+
assertEquals(2, execute("SELECT * FROM %s WHERE x : 'a'").size());
394+
assertEquals(2, execute("SELECT * FROM %s WHERE a CONTAINS 'd'").size());
395+
396+
// Setting this as a proper exception so that we get a good error back to the client. It has
397+
// been failing for a while, so this at least produces a nice error message.
398+
assertThatThrownBy(() -> execute("SELECT * FROM %s WHERE z : 'c'"))
399+
.isInstanceOf(InvalidRequestException.class)
400+
.hasMessageContaining(String.format(SingleColumnRestriction.AnalyzerMatchesRestriction.CANNOT_BE_RESTRICTED_BY_CLUSTERING_ERROR, "z"));
401+
402+
// Expect failure for eq on partition key column since it is interpreted as eq and forms an incomplete
403+
// partition key restriction.
404+
assertThatThrownBy(() -> execute("SELECT * FROM %s WHERE x = 'a'"))
405+
.isInstanceOf(InvalidRequestException.class)
406+
.hasMessageContaining("Cannot execute this query as it might involve data filtering and thus may have unpredictable performance.");
407+
408+
assertThatThrownBy(() -> execute("SELECT * FROM %s WHERE z = 'c'"))
409+
.isInstanceOf(InvalidRequestException.class)
410+
.hasMessageContaining("Column 'z' has an index but does not support the operators specified in the query. " +
411+
"If you want to execute this query despite the performance unpredictability, use ALLOW FILTERING");
412+
});
413+
}
414+
415+
@Test
416+
public void testCompoundPrimaryKeyRestrictionsWithAnalyzerEqOperatorSupportMatch() throws Throwable
417+
{
418+
createTable("CREATE TABLE %s (x text, y text, z text, a set<text>, PRIMARY KEY ((x, y), z))");
419+
420+
// Create case-insensitive index on all columns except y since it is essentially the same as x here
421+
createIndex(createCaseInsensitiveIndexString("x", AnalyzerEqOperatorSupport.Value.MATCH));
422+
createIndex(createCaseInsensitiveIndexString("z", AnalyzerEqOperatorSupport.Value.MATCH));
423+
createIndex(createCaseInsensitiveIndexString("values(a)", AnalyzerEqOperatorSupport.Value.MATCH));
424+
425+
// Set up two unique rows that will map to the same index terms
426+
execute("INSERT INTO %s (x, y, z, a) VALUES (?, ?, ?, ?)", "a", "b", "c", set("d", "e", "f"));
427+
execute("INSERT INTO %s (x, y, z, a) VALUES (?, ?, ?, ?)", "A", "B", "C", set("D", "E", "F"));
428+
429+
beforeAndAfterFlush(
430+
() -> {
431+
// Fully qualified partition key
432+
assertRows(execute("SELECT x FROM %s WHERE x = 'a' AND y = 'b'"), row("a"));
433+
assertRows(execute("SELECT x FROM %s WHERE x = 'a' AND y = 'b' AND z = 'c'"), row("a"));
434+
assertRows(execute("SELECT x FROM %s WHERE x = 'A' AND y = 'B'"), row("A"));
435+
assertRows(execute("SELECT x FROM %s WHERE x = 'A' AND y = 'B' AND z = 'C'"), row("A"));
436+
assertRows(execute("SELECT * FROM %s WHERE x = 'A' AND y = 'b'"));
437+
438+
// EQ gets interpreted as a match query here
439+
assertRows(execute("SELECT x FROM %s WHERE x = 'a'"), row("A"), row("a"));
440+
// This only gets 1 row instead of 2 because the index is on a clustering column and that applies post
441+
// filtering in surprising and problematic way.
442+
assertRows(execute("SELECT x FROM %s WHERE z = 'c'"), row("a"));
443+
444+
// Index based query for x, z, and a
445+
assertRows(execute("SELECT x FROM %s WHERE x : 'a'"), row("A"), row("a"));
446+
assertRows(execute("SELECT x FROM %s WHERE a CONTAINS 'd'"), row("A"), row("a"));
447+
448+
// Setting this as a proper exception so that we get a good error back to the client. It has
449+
// been failing for a while, so this at least produces a nice error message.
450+
assertThatThrownBy(() -> execute("SELECT * FROM %s WHERE z : 'c'"))
451+
.isInstanceOf(InvalidRequestException.class)
452+
.hasMessageContaining(String.format(SingleColumnRestriction.AnalyzerMatchesRestriction.CANNOT_BE_RESTRICTED_BY_CLUSTERING_ERROR, "z"));
453+
});
454+
}
455+
456+
private String createCaseInsensitiveIndexString(String column, AnalyzerEqOperatorSupport.Value eqBehaviour)
457+
{
458+
return "CREATE CUSTOM INDEX ON %s(" + column + ") " +
459+
"USING 'org.apache.cassandra.index.sai.StorageAttachedIndex' " +
460+
"WITH OPTIONS = { 'case_sensitive': false, 'equals_behaviour_when_analyzed': '" + eqBehaviour + "'}";
461+
}
462+
362463
private void assertIndexDoesNotSupportEquals()
363464
{
364465
// the EQ query should not be supported by the index

test/unit/org/apache/cassandra/index/sai/cql/AllowFilteringTest.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
import org.junit.Test;
2121

22+
import org.apache.cassandra.cql3.restrictions.SingleColumnRestriction;
2223
import org.apache.cassandra.cql3.restrictions.StatementRestrictions;
2324
import org.apache.cassandra.index.IndexBuildInProgressException;
2425
import org.apache.cassandra.index.sai.SAITester;
@@ -435,7 +436,7 @@ public void testIndexedColumnDoesNotSupportAnalyzerRestriction()
435436

436437
// Analyzer restriction
437438
assertInvalidMessage(": restriction is only supported on properly indexed columns. a : 'Test' is not valid.", "SELECT * FROM %s WHERE a : 'Test'");
438-
assertInvalidMessage(String.format(StatementRestrictions.INDEX_DOES_NOT_SUPPORT_ANALYZER_MATCHES_MESSAGE, 'b'), "SELECT * FROM %s WHERE b : 'Test'");
439+
assertInvalidMessage(String.format(SingleColumnRestriction.AnalyzerMatchesRestriction.CANNOT_BE_RESTRICTED_BY_CLUSTERING_ERROR, 'b'), "SELECT * FROM %s WHERE b : 'Test'");
439440
assertInvalidMessage(String.format(StatementRestrictions.INDEX_DOES_NOT_SUPPORT_ANALYZER_MATCHES_MESSAGE, 'c'), "SELECT * FROM %s WHERE c : 'Test'");
440441
assertInvalidMessage(String.format(StatementRestrictions.INDEX_DOES_NOT_SUPPORT_ANALYZER_MATCHES_MESSAGE, 'd'), "SELECT * FROM %s WHERE d : 'Test'");
441442
}

0 commit comments

Comments
 (0)