Skip to content

Commit 4fcab6a

Browse files
author
Milder Hernandez
authored
Merge pull request #243 from milderhc/any-tag-filter
Add tag list support to JDBC vector stores
2 parents 3e7f6c8 + c4bc8c6 commit 4fcab6a

File tree

52 files changed

+698
-569
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+698
-569
lines changed

aiservices/google/src/main/java/com/microsoft/semantickernel/aiservices/google/GeminiService.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66

77
import javax.annotation.Nullable;
88

9-
109
/**
1110
* Makes a Gemini service available to the Semantic Kernel.
1211
*/

aiservices/google/src/main/java/com/microsoft/semantickernel/aiservices/google/chatcompletion/GeminiChatCompletion.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,6 @@
5050
import reactor.core.publisher.Flux;
5151
import reactor.core.publisher.Mono;
5252

53-
5453
/**
5554
* A chat completion service that uses the Gemini model to generate chat completions.
5655
*/
@@ -66,7 +65,7 @@ public class GeminiChatCompletion extends GeminiService implements ChatCompletio
6665
public GeminiChatCompletion(VertexAI client, String modelId) {
6766
super(client, modelId);
6867
}
69-
68+
7069
/**
7170
* Create a new instance of {@link GeminiChatCompletion.Builder}.
7271
*

aiservices/huggingface/src/main/java/com/microsoft/semantickernel/aiservices/huggingface/HuggingFaceClient.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ public static Builder builder() {
157157
/**
158158
* Builder for a Hugging Face client.
159159
*/
160-
public static class Builder {
160+
public static class Builder {
161161

162162
@Nullable
163163
private KeyCredential key = null;

api-test/integration-tests/src/test/java/com/microsoft/semantickernel/tests/connectors/memory/jdbc/Hotel.java

Lines changed: 24 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,49 +2,53 @@
22

33
import com.fasterxml.jackson.annotation.JsonCreator;
44
import com.fasterxml.jackson.annotation.JsonProperty;
5-
import com.microsoft.semantickernel.data.vectorstorage.attributes.VectorStoreRecordDataAttribute;
6-
import com.microsoft.semantickernel.data.vectorstorage.attributes.VectorStoreRecordKeyAttribute;
7-
import com.microsoft.semantickernel.data.vectorstorage.attributes.VectorStoreRecordVectorAttribute;
5+
import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordData;
6+
import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordKey;
7+
import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordVector;
88
import com.microsoft.semantickernel.data.vectorstorage.definition.DistanceFunction;
99
import com.microsoft.semantickernel.data.vectorstorage.definition.IndexKind;
1010

1111
import java.util.List;
1212

1313
public class Hotel {
1414
@JsonProperty("hotelId")
15-
@VectorStoreRecordKeyAttribute
15+
@VectorStoreRecordKey
1616
private final String id;
1717

18-
@VectorStoreRecordDataAttribute(isFilterable = true)
18+
@VectorStoreRecordData(isFilterable = true)
1919
private final String name;
2020

21-
@VectorStoreRecordDataAttribute
21+
@VectorStoreRecordData
2222
private final int code;
2323

2424
@JsonProperty("summary")
25-
@VectorStoreRecordDataAttribute()
25+
@VectorStoreRecordData()
2626
private final String description;
2727

2828
@JsonProperty("summaryEmbedding1")
29-
@VectorStoreRecordVectorAttribute(dimensions = 8, distanceFunction = DistanceFunction.EUCLIDEAN_DISTANCE)
29+
@VectorStoreRecordVector(dimensions = 8, distanceFunction = DistanceFunction.EUCLIDEAN_DISTANCE)
3030
private final List<Float> euclidean;
3131

3232
@JsonProperty("summaryEmbedding2")
33-
@VectorStoreRecordVectorAttribute(dimensions = 8, distanceFunction = DistanceFunction.COSINE_DISTANCE)
33+
@VectorStoreRecordVector(dimensions = 8, distanceFunction = DistanceFunction.COSINE_DISTANCE)
3434
private final List<Float> cosineDistance;
3535

3636
@JsonProperty("summaryEmbedding3")
37-
@VectorStoreRecordVectorAttribute(dimensions = 8, distanceFunction = DistanceFunction.DOT_PRODUCT)
37+
@VectorStoreRecordVector(dimensions = 8, distanceFunction = DistanceFunction.DOT_PRODUCT)
3838
private final List<Float> dotProduct;
3939

4040
@JsonProperty("indexedSummaryEmbedding")
41-
@VectorStoreRecordVectorAttribute(dimensions = 8, indexKind = IndexKind.HNSW, distanceFunction = DistanceFunction.EUCLIDEAN_DISTANCE)
41+
@VectorStoreRecordVector(dimensions = 8, indexKind = IndexKind.HNSW, distanceFunction = DistanceFunction.EUCLIDEAN_DISTANCE)
4242
private final List<Float> indexedEuclidean;
43-
@VectorStoreRecordDataAttribute
43+
44+
@VectorStoreRecordData
45+
private final List<String> tags;
46+
47+
@VectorStoreRecordData
4448
private double rating;
4549

4650
public Hotel() {
47-
this(null, null, 0, null, null, null, null, null, 0.0);
51+
this(null, null, 0, null, null, null, null, null, 0.0, null);
4852
}
4953

5054
@JsonCreator
@@ -57,7 +61,8 @@ public Hotel(
5761
@JsonProperty("summaryEmbedding2") List<Float> cosineDistance,
5862
@JsonProperty("summaryEmbedding3") List<Float> dotProduct,
5963
@JsonProperty("indexedSummaryEmbedding") List<Float> indexedEuclidean,
60-
@JsonProperty("rating") double rating) {
64+
@JsonProperty("rating") double rating,
65+
@JsonProperty("tags") List<String> tags) {
6166
this.id = id;
6267
this.name = name;
6368
this.code = code;
@@ -67,6 +72,7 @@ public Hotel(
6772
this.dotProduct = euclidean;
6873
this.indexedEuclidean = euclidean;
6974
this.rating = rating;
75+
this.tags = tags;
7076
}
7177

7278
public String getId() {
@@ -97,6 +103,10 @@ public double getRating() {
97103
return rating;
98104
}
99105

106+
public List<String> getTags() {
107+
return tags;
108+
}
109+
100110
public void setRating(double rating) {
101111
this.rating = rating;
102112
}

api-test/integration-tests/src/test/java/com/microsoft/semantickernel/tests/connectors/memory/jdbc/JDBCVectorStoreRecordCollectionTest.java

Lines changed: 45 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
import com.microsoft.semantickernel.connectors.data.jdbc.JDBCVectorStoreRecordCollection;
99
import com.microsoft.semantickernel.connectors.data.jdbc.JDBCVectorStoreRecordCollectionOptions;
1010
import com.microsoft.semantickernel.connectors.data.jdbc.SQLVectorStoreQueryProvider;
11-
import com.microsoft.semantickernel.connectors.data.jdbc.filter.SQLEqualToFilterClause;
1211
import com.microsoft.semantickernel.connectors.data.mysql.MySQLVectorStoreQueryProvider;
1312
import com.microsoft.semantickernel.connectors.data.postgres.PostgreSQLVectorStoreQueryProvider;
1413
import com.microsoft.semantickernel.connectors.data.sqlite.SQLiteVectorStoreQueryProvider;
@@ -159,19 +158,19 @@ private List<Hotel> getHotels() {
159158
return Arrays.asList(
160159
new Hotel("id_1", "Hotel 1", 1, "Hotel 1 description",
161160
Arrays.asList(0.5f, 3.2f, 7.1f, -4.0f, 2.8f, 10.0f, -1.3f, 5.5f), null, null, null,
162-
4.0),
161+
4.0, Arrays.asList("luxury", "city")),
163162
new Hotel("id_2", "Hotel 2", 2, "Hotel 2 description",
164163
Arrays.asList(-2.0f, 8.1f, 0.9f, 5.4f, -3.3f, 2.2f, 9.9f, -4.5f), null, null, null,
165-
4.0),
164+
4.0, Arrays.asList("luxury", "city")),
166165
new Hotel("id_3", "Hotel 3", 3, "Hotel 3 description",
167166
Arrays.asList(4.5f, -6.2f, 3.1f, 7.7f, -0.8f, 1.1f, -2.2f, 8.3f), null, null, null,
168-
5.0),
167+
5.0, Arrays.asList("luxury", "beach")),
169168
new Hotel("id_4", "Hotel 4", 4, "Hotel 4 description",
170169
Arrays.asList(7.0f, 1.2f, -5.3f, 2.5f, 6.6f, -7.8f, 3.9f, -0.1f), null, null, null,
171-
4.0),
170+
4.0, Arrays.asList("luxury", "city")),
172171
new Hotel("id_5", "Hotel 5", 5, "Hotel 5 description",
173172
Arrays.asList(-3.5f, 4.4f, -1.2f, 9.9f, 5.7f, -6.1f, 7.8f, -2.0f), null, null, null,
174-
4.0)
173+
4.0, Arrays.asList("luxury", "city"))
175174
);
176175
}
177176

@@ -396,14 +395,13 @@ public void getBatchWithNoVectors(QueryProvider provider) {
396395
}
397396

398397
private static Stream<Arguments> provideSearchParameters() {
399-
return Stream.of(
400-
Arguments.of(QueryProvider.MySQL, "euclidean"),
401-
Arguments.of(QueryProvider.MySQL, "cosineDistance"),
402-
Arguments.of(QueryProvider.MySQL, "dotProduct"),
403-
Arguments.of(QueryProvider.PostgreSQL, "euclidean"),
404-
Arguments.of(QueryProvider.PostgreSQL, "cosineDistance"),
405-
Arguments.of(QueryProvider.PostgreSQL, "dotProduct")
406-
);
398+
return Arrays.stream(QueryProvider.values()).map(provider ->
399+
Stream.of(
400+
Arguments.of(provider, "euclidean"),
401+
Arguments.of(provider, "cosineDistance"),
402+
Arguments.of(provider, "dotProduct")
403+
)
404+
).flatMap(s -> s);
407405
}
408406

409407
@ParameterizedTest
@@ -464,13 +462,13 @@ public void approximateSearch(QueryProvider provider) {
464462
assertNotNull(results);
465463
assertEquals(5, results.size());
466464
// The third hotel should be the most similar
467-
assertEquals(hotels.get(2).getId(), results.get(0).getRecord().getId());
465+
assertEquals("id_3", results.get(0).getRecord().getId());
468466
}
469467

470468
@ParameterizedTest
471469
@MethodSource("provideSearchParameters")
472-
public void searchWithFilter(QueryProvider provider, String embeddingName) {
473-
String collectionName = "searchWithFilter";
470+
public void searchWithFilterEqualToFilter(QueryProvider provider, String embeddingName) {
471+
String collectionName = "searchWithFilterEqualToFilter";
474472
JDBCVectorStoreRecordCollection<Hotel> recordCollection = buildRecordCollection(provider,
475473
collectionName);
476474

@@ -482,7 +480,7 @@ public void searchWithFilter(QueryProvider provider, String embeddingName) {
482480
.withLimit(3)
483481
.withVectorSearchFilter(
484482
VectorSearchFilter.builder()
485-
.withEqualToFilterClause(new SQLEqualToFilterClause("rating", 4.0)).build())
483+
.equalTo("rating", 4.0).build())
486484
.build();
487485

488486
// Embeddings similar to the third hotel, but as the filter is set to 4.0, the third hotel should not be returned
@@ -491,7 +489,34 @@ public void searchWithFilter(QueryProvider provider, String embeddingName) {
491489
assertNotNull(results);
492490
assertEquals(3, results.size());
493491
// The first hotel should be the most similar
494-
assertEquals(hotels.get(0).getId(), results.get(0).getRecord().getId());
492+
assertEquals("id_1", results.get(0).getRecord().getId());
493+
}
494+
495+
@ParameterizedTest
496+
@MethodSource("provideSearchParameters")
497+
public void searchWithAnyTagEqualToFilter(QueryProvider provider, String embeddingName) {
498+
String collectionName = "searchWithAnyTagEqualToFilter";
499+
JDBCVectorStoreRecordCollection<Hotel> recordCollection = buildRecordCollection(provider,
500+
collectionName);
501+
502+
List<Hotel> hotels = getHotels();
503+
recordCollection.upsertBatchAsync(hotels, null).block();
504+
505+
VectorSearchOptions options = VectorSearchOptions.builder()
506+
.withVectorFieldName(embeddingName)
507+
.withLimit(3)
508+
.withVectorSearchFilter(
509+
VectorSearchFilter.builder()
510+
.anyTagEqualTo("tags", "city").build())
511+
.build();
512+
513+
// Embeddings similar to the third hotel, but as the filter is set to 4.0, the third hotel should not be returned
514+
List<VectorSearchResult<Hotel>> results = recordCollection.searchAsync(SEARCH_EMBEDDINGS,
515+
options).block();
516+
assertNotNull(results);
517+
assertEquals(3, results.size());
518+
// The first hotel should be the most similar
519+
assertEquals("id_1", results.get(0).getRecord().getId());
495520
}
496521

497522
// MySQL will always return the vectors as they're needed to compute the distances
@@ -520,7 +545,7 @@ public void postgresSearchIncludeAndNotIncludeVectors() {
520545
assertNotNull(results);
521546
assertEquals(3, results.size());
522547
// The third hotel should be the most similar
523-
assertEquals(hotels.get(2).getId(), results.get(0).getRecord().getId());
548+
assertEquals("id_3", results.get(0).getRecord().getId());
524549
assertNotNull(results.get(0).getRecord().getEuclidean());
525550
}
526551
}

api-test/integration-tests/src/test/java/com/microsoft/semantickernel/tests/connectors/memory/redis/Hotel.java

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,41 +2,42 @@
22

33
import com.fasterxml.jackson.annotation.JsonCreator;
44
import com.fasterxml.jackson.annotation.JsonProperty;
5-
import com.microsoft.semantickernel.data.vectorstorage.attributes.VectorStoreRecordDataAttribute;
6-
import com.microsoft.semantickernel.data.vectorstorage.attributes.VectorStoreRecordKeyAttribute;
7-
import com.microsoft.semantickernel.data.vectorstorage.attributes.VectorStoreRecordVectorAttribute;
5+
import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordData;
6+
import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordKey;
7+
import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordVector;
88
import com.microsoft.semantickernel.data.vectorstorage.definition.DistanceFunction;
99
import com.microsoft.semantickernel.data.vectorstorage.definition.IndexKind;
1010

1111
import java.util.List;
1212

1313
public class Hotel {
1414

15-
@VectorStoreRecordKeyAttribute
15+
@VectorStoreRecordKey
1616
private final String id;
1717

18-
@VectorStoreRecordDataAttribute(isFilterable = true)
18+
@VectorStoreRecordData(isFilterable = true)
1919
private final String name;
2020

21-
@VectorStoreRecordDataAttribute
21+
@VectorStoreRecordData
2222
private final int code;
2323

2424
@JsonProperty("summary")
25-
@VectorStoreRecordDataAttribute()
25+
@VectorStoreRecordData()
2626
private final String description;
2727

2828
@JsonProperty("summaryEmbedding1")
29-
@VectorStoreRecordVectorAttribute(dimensions = 8, indexKind = IndexKind.HNSW, distanceFunction = DistanceFunction.EUCLIDEAN_DISTANCE)
29+
@VectorStoreRecordVector(dimensions = 8, indexKind = IndexKind.HNSW, distanceFunction = DistanceFunction.EUCLIDEAN_DISTANCE)
3030
private final List<Float> euclidean;
3131

3232
@JsonProperty("summaryEmbedding2")
33-
@VectorStoreRecordVectorAttribute(dimensions = 8)
33+
@VectorStoreRecordVector(dimensions = 8)
3434
private final List<Float> cosineDistance;
3535

3636
@JsonProperty("summaryEmbedding3")
37-
@VectorStoreRecordVectorAttribute(dimensions = 8, indexKind = IndexKind.HNSW, distanceFunction = DistanceFunction.DOT_PRODUCT)
37+
@VectorStoreRecordVector(dimensions = 8, indexKind = IndexKind.HNSW, distanceFunction = DistanceFunction.DOT_PRODUCT)
3838
private final List<Float> dotProduct;
39-
@VectorStoreRecordDataAttribute
39+
40+
@VectorStoreRecordData(isFilterable = true)
4041
private double rating;
4142

4243
public Hotel() {

api-test/integration-tests/src/test/java/com/microsoft/semantickernel/tests/connectors/memory/redis/RedisHashSetVectorStoreRecordCollectionTest.java

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import com.microsoft.semantickernel.connectors.data.redis.RedisHashSetVectorStoreRecordCollection;
44
import com.microsoft.semantickernel.connectors.data.redis.RedisHashSetVectorStoreRecordCollectionOptions;
5+
import com.microsoft.semantickernel.data.vectorsearch.VectorSearchFilter;
56
import com.microsoft.semantickernel.data.vectorsearch.VectorSearchResult;
67
import com.microsoft.semantickernel.data.vectorstorage.definition.VectorStoreRecordDataField;
78
import com.microsoft.semantickernel.data.vectorstorage.definition.VectorStoreRecordDefinition;
@@ -94,6 +95,7 @@ static void setup() {
9495
fields.add(VectorStoreRecordDataField.builder()
9596
.withName("rating")
9697
.withFieldType(Double.class)
98+
.isFilterable(true)
9799
.build());
98100
VectorStoreRecordDefinition recordDefinition = VectorStoreRecordDefinition.fromFields(fields);
99101

@@ -440,4 +442,30 @@ public void searchWithOffSet(RecordCollectionOptions options, String embeddingNa
440442
// The first hotel should be the most similar
441443
assertEquals(hotels.get(0).getId(), results.get(0).getRecord().getId(), indexingFailureMessage);
442444
}
445+
446+
@ParameterizedTest
447+
@MethodSource("provideSearchParameters")
448+
public void searchWithFilterEqualToFilter(RecordCollectionOptions recordCollectionOptions, String embeddingName) {
449+
String collectionName = getCollectionName("search", recordCollectionOptions);
450+
RedisHashSetVectorStoreRecordCollection<Hotel> recordCollection = createCollection(optionsMap.get(recordCollectionOptions), collectionName);
451+
452+
List<Hotel> hotels = getHotels();
453+
recordCollection.upsertBatchAsync(hotels, null).block();
454+
455+
VectorSearchOptions options = VectorSearchOptions.builder()
456+
.withVectorFieldName(embeddingName)
457+
.withLimit(3)
458+
.withVectorSearchFilter(
459+
VectorSearchFilter.builder()
460+
.equalTo("rating", 4.0).build())
461+
.build();
462+
463+
// Embeddings similar to the third hotel, but as the filter is set to 4.0, the third hotel should not be returned
464+
List<VectorSearchResult<Hotel>> results = recordCollection.searchAsync(SEARCH_EMBEDDINGS,
465+
options).block();
466+
assertNotNull(results);
467+
assertEquals(3, results.size());
468+
// The first hotel should be the most similar
469+
assertEquals("id_1", results.get(0).getRecord().getId());
470+
}
443471
}

0 commit comments

Comments
 (0)