Skip to content

Commit ff878c3

Browse files
committed
GH-3160: Handle element types correctly in CassandraFilterExpressionConverter.doValue
Fixes: 3160 #3160 When using a filter expression with IN operator on a collection field in CassandraVectorStore.similaritySearch, a ClassCastException was thrown because the code attempted to format individual collection elements using the collection's codec instead of the element type's codec. This fix modifies doValue to detect when we are formatting elements inside a collection type and use the appropriate element type codec. While Cassandra does not support using the IN operator directly on collection columns, this fix ensures we generate syntactically correct CQL rather than throwing a Java exception. The change specifically addresses ListType collections by using the element type codec for individual elements within the list. Signed-off-by: Soby Chacko <[email protected]>
1 parent b219c21 commit ff878c3

File tree

2 files changed

+72
-1
lines changed

2 files changed

+72
-1
lines changed

vector-stores/spring-ai-cassandra-store/src/main/java/org/springframework/ai/vectorstore/cassandra/CassandraFilterExpressionConverter.java

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,11 @@
2323
import java.util.stream.Collectors;
2424

2525
import com.datastax.oss.driver.api.core.metadata.schema.ColumnMetadata;
26+
import com.datastax.oss.driver.api.core.type.DataType;
2627
import com.datastax.oss.driver.api.core.type.DataTypes;
28+
import com.datastax.oss.driver.api.core.type.ListType;
29+
import com.datastax.oss.driver.api.core.type.MapType;
30+
import com.datastax.oss.driver.api.core.type.SetType;
2731
import com.datastax.oss.driver.api.core.type.codec.registry.CodecRegistry;
2832
import com.datastax.oss.driver.shaded.guava.common.base.Preconditions;
2933

@@ -118,10 +122,19 @@ private void doListValue(ColumnMetadata column, Object v, StringBuilder context)
118122
}
119123

120124
private void doValue(ColumnMetadata column, Object v, StringBuilder context) {
125+
126+
DataType dataType = column.getType();
127+
128+
// Check if we're handling an element inside a collection for an IN clause
129+
if ((dataType instanceof ListType) && !(v instanceof Collection)) {
130+
// Extract the element type from the collection type
131+
dataType = ((ListType) dataType).getElementType();
132+
}
133+
121134
if (DataTypes.SMALLINT.equals(column.getType())) {
122135
v = ((Number) v).shortValue();
123136
}
124-
context.append(CodecRegistry.DEFAULT.codecFor(column.getType()).format(v));
137+
context.append(CodecRegistry.DEFAULT.codecFor(dataType).format(v));
125138
}
126139

127140
private Optional<ColumnMetadata> getColumn(String name) {

vector-stores/spring-ai-cassandra-store/src/test/java/org/springframework/ai/vectorstore/cassandra/CassandraVectorStoreIT.java

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@
2929

3030
import com.datastax.oss.driver.api.core.CqlSession;
3131
import com.datastax.oss.driver.api.core.CqlSessionBuilder;
32+
import com.datastax.oss.driver.api.core.metadata.schema.KeyspaceMetadata;
33+
import com.datastax.oss.driver.api.core.metadata.schema.TableMetadata;
3234
import com.datastax.oss.driver.api.core.servererrors.InvalidQueryException;
3335
import com.datastax.oss.driver.api.core.servererrors.SyntaxError;
3436
import com.datastax.oss.driver.api.core.type.DataTypes;
@@ -57,6 +59,7 @@
5759
import org.springframework.core.io.DefaultResourceLoader;
5860

5961
import static org.assertj.core.api.Assertions.assertThat;
62+
import static org.junit.Assert.fail;
6063

6164
/**
6265
* Use `mvn failsafe:integration-test -Dit.test=CassandraVectorStoreIT`
@@ -522,6 +525,61 @@ void getNativeClientTest() {
522525
});
523526
}
524527

528+
@Test
529+
void searchWithCollectionFilter() {
530+
this.contextRunner.run(context -> {
531+
try (CassandraVectorStore store = createTestStore(context,
532+
new SchemaColumn("currencies", DataTypes.listOf(DataTypes.TEXT), SchemaColumnTags.INDEXED))) {
533+
534+
// Create test documents with different currency lists
535+
var btcDocument = new Document("BTC_doc", "Bitcoin document", Map.of("currencies", List.of("BTC")));
536+
var ethDocument = new Document("ETH_doc", "Ethereum document", Map.of("currencies", List.of("ETH")));
537+
var multiCurrencyDocument = new Document("MULTI_doc", "Multi-currency document",
538+
Map.of("currencies", List.of("BTC", "ETH", "SOL")));
539+
540+
store.add(List.of(btcDocument, ethDocument, multiCurrencyDocument));
541+
542+
// Verify initial state
543+
List<Document> results = store
544+
.similaritySearch(SearchRequest.builder().query("document").topK(5).build());
545+
assertThat(results).hasSize(3);
546+
547+
try {
548+
// Test filtering with IN operator on a collection field
549+
Filter.Expression filterExpression = new Filter.Expression(Filter.ExpressionType.IN,
550+
new Filter.Key("currencies"), new Filter.Value(List.of("BTC")));
551+
552+
// Search using programmatic filter
553+
store.similaritySearch(SearchRequest.builder()
554+
.query("document")
555+
.topK(5)
556+
.similarityThresholdAll()
557+
.filterExpression(filterExpression)
558+
.build());
559+
560+
// If we get here without an exception, it means Cassandra
561+
// unexpectedly accepted the query,
562+
// which is surprising since Cassandra doesn't support the IN operator
563+
// on collection columns.
564+
// This would indicate a potential change in Cassandra's behavior.
565+
Assertions.fail("Expected InvalidQueryException from Cassandra");
566+
}
567+
catch (InvalidQueryException e) {
568+
// This is the expected outcome: Cassandra rejects the query with a
569+
// specific error
570+
// indicating that collection columns cannot be used with IN
571+
// operators, which is
572+
// a documented limitation of Cassandra's query language. Support for
573+
// collection
574+
// filtering via CONTAINS would be needed for this type of query to
575+
// work.
576+
assertThat(e.getMessage()).contains("Collection column 'currencies'");
577+
assertThat(e.getMessage()).contains("cannot be restricted by a 'IN' relation");
578+
}
579+
}
580+
});
581+
}
582+
525583
@SpringBootConfiguration
526584
@EnableAutoConfiguration(exclude = { DataSourceAutoConfiguration.class })
527585
public static class TestApplication {

0 commit comments

Comments
 (0)