Skip to content

Commit 8c40ed2

Browse files
committed
DOCSP-33428: vectorsearch builder (#143)
* DOCSP-33428: vectorsearch builder * use other data class * add taxonomy + small fixes * JS PR fixes - remove projection stage * fixes * fixes (cherry picked from commit 65f210d)
1 parent 038532c commit 8c40ed2

File tree

6 files changed

+109
-4
lines changed

6 files changed

+109
-4
lines changed

examples/build.gradle.kts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ repositories {
1818
dependencies {
1919
implementation("org.mongodb:mongodb-driver-kotlin-coroutine:$kotlin_mongodb_version")
2020
implementation("org.jetbrains.kotlinx:kotlinx-coroutines-core:1.7.1")
21-
testImplementation(kotlin("test"))
21+
testImplementation("org.jetbrains.kotlin:kotlin-test:1.8.10")
2222
implementation("org.slf4j:slf4j-api:2.0.5")
2323
implementation("ch.qos.logback:logback-classic:1.4.7")
2424
implementation("io.github.cdimascio:dotenv-kotlin:6.4.1")

examples/gradle.properties

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
kotlin.code.style=official
2-
kotlin_mongodb_version=4.10.0
2+
kotlin_mongodb_version=4.11.0

examples/src/test/kotlin/AggregatesBuilderTest.kt

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import com.mongodb.client.model.geojson.Position
2828
import com.mongodb.client.model.search.SearchOperator
2929
import com.mongodb.client.model.search.SearchOptions
3030
import com.mongodb.client.model.search.SearchPath
31+
import com.mongodb.client.model.search.VectorSearchOptions.vectorSearchOptions
3132
import com.mongodb.kotlin.client.coroutine.MongoClient
3233
import config.getConfig
3334
import kotlinx.coroutines.flow.firstOrNull
@@ -80,6 +81,15 @@ class AggregatesBuilderTest {
8081
)
8182
// :snippet-end:
8283

84+
// :snippet-start: vector-search-data-class
85+
data class MovieAlt(
86+
val title: String,
87+
val year: Int,
88+
val plot: String,
89+
val plotEmbedding: List<Double>
90+
)
91+
// :snippet-end:
92+
8393
companion object {
8494
val config = getConfig()
8595
private val client = MongoClient.create(config.connectionUri)
@@ -674,7 +684,15 @@ class AggregatesBuilderTest {
674684
// :snippet-end:
675685
Aggregates.sort(Sorts.descending(Results::count.name, "_id"))))
676686
val results = resultsFlow.toList()
677-
val actual = listOf(Results("Drama", 8), Results("Crime", 3), Results("Action", 2), Results("Thriller", 1), Results("Sci-Fi", 1), Results("Romance", 1), Results("Mystery", 1),)
687+
val actual = listOf(
688+
Results("Drama", 8),
689+
Results("Crime", 3),
690+
Results("Action", 2),
691+
Results("Thriller", 1),
692+
Results("Sci-Fi", 1),
693+
Results("Romance", 1),
694+
Results("Mystery", 1)
695+
)
678696
assertEquals(results, actual)
679697
}
680698

@@ -951,4 +969,27 @@ class AggregatesBuilderTest {
951969
assertEquals(1, resultsFlow.toList().size)
952970
assertEquals(1, results.first().get("count", Document::class.java).get("lowerBound", java.lang.Long::class.java)?.toInt())
953971
}
972+
973+
/* NOTE: Test is not run by default. Vector search requires the creation of a vector search index on the collection before running.
974+
*/
975+
@Ignore
976+
fun vectorSearchTest() = runBlocking {
977+
val resultsFlow = movieCollection.aggregate<Document>(
978+
listOf(
979+
// :snippet-start: vector-search
980+
Aggregates.vectorSearch(
981+
SearchPath.fieldPath(MovieAlt::plotEmbedding.name),
982+
listOf(-0.0072121937, -0.030757688, -0.012945653),
983+
"mflix_movies_embedding_index",
984+
2.toLong(),
985+
1.toLong(),
986+
vectorSearchOptions().filter(Filters.gte(MovieAlt::year.name, 2016))
987+
)
988+
// :snippet-end:
989+
)
990+
)
991+
val results = resultsFlow.toList()
992+
assertEquals(1, resultsFlow.toList().size)
993+
assertEquals(1, results.first().get("count", Document::class.java).get("lowerBound", java.lang.Long::class.java)?.toInt())
994+
}
954995
}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
data class MovieAlt(
2+
val title: String,
3+
val year: Int,
4+
val plot: String,
5+
val plotEmbedding: List<Double>
6+
)
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
Aggregates.vectorSearch(
2+
SearchPath.fieldPath(MovieAlt::plotEmbedding.name),
3+
listOf(-0.0072121937, -0.030757688, -0.012945653),
4+
"mflix_movies_embedding_index",
5+
2.toLong(),
6+
1.toLong(),
7+
vectorSearchOptions().filter(Filters.gte(MovieAlt::year.name, 2016))
8+
)

source/fundamentals/builders/aggregates.txt

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,13 @@
44
Aggregates Builders
55
===================
66

7+
.. facet::
8+
:name: genre
9+
:values: reference
10+
11+
.. meta::
12+
:keywords: code example, data insights, compute, atlas
13+
714
.. contents:: On this page
815
:local:
916
:backlinks: none
@@ -668,7 +675,7 @@ The following examples use data modeled with the following Kotlin data class:
668675
.. literalinclude:: /examples/generated/AggregatesBuilderTest.snippet.bucket-data-class.kt
669676
:language: kotlin
670677

671-
This exampled creates a pipeline stage that will attempt to create and evenly
678+
This example creates a pipeline stage that will attempt to create and evenly
672679
distribute documents into 5 buckets using the value of their ``price`` field:
673680

674681
.. literalinclude:: /examples/generated/AggregatesBuilderTest.snippet.bucket-auto.kt
@@ -898,3 +905,46 @@ aggregation stage:
898905
Learn more about this helper from the
899906
`searchMeta() API documentation <{+api+}/apidocs/mongodb-driver-core/com/mongodb/client/model/Aggregates.html#searchMeta(com.mongodb.client.model.search.SearchCollector)>`__.
900907

908+
.. _kotlin-atlas-vector-search:
909+
910+
Atlas Vector Search
911+
-------------------
912+
913+
.. important::
914+
915+
To learn about which versions of MongoDB Atlas support this feature, see
916+
:atlas:`Limitations </atlas-vector-search/vector-search-stage/#limitations>`
917+
in the Atlas documentation.
918+
919+
Use the ``vectorSearch()`` method to create a :atlas:`$vectorSearch </atlas-vector-search/vector-search-stage/>`
920+
pipeline stage that specifies a **semantic search**. A semantic search is
921+
a type of search that locates pieces of information that are similar in meaning.
922+
923+
To use this feature when performing an aggregation on a collection, you
924+
must create a vector search index and index your vector embeddings. To
925+
learn how to set up search indexes in MongoDB Atlas, see :atlas:`How to
926+
Index Vector Embeddings for Vector Search
927+
</atlas-search/field-types/knn-vector/>` in the Atlas documentation.
928+
929+
The example in this section uses data modeled with the following Kotlin data class:
930+
931+
.. literalinclude:: /examples/generated/AggregatesBuilderTest.snippet.vector-search-data-class.kt
932+
:language: kotlin
933+
934+
This example shows how to build an aggregation pipeline that uses the
935+
``vectorSearch()`` method to perform a vector search with the following
936+
specifications:
937+
938+
- Searches ``plotEmbedding`` field values by using vector embeddings of a
939+
string value
940+
- Uses the ``mflix_movies_embedding_index`` vector search index
941+
- Considers up to 2 nearest neighbors
942+
- Returns 1 document
943+
- Filters for documents in which the ``year`` value is at least ``2016``
944+
945+
.. literalinclude:: /examples/generated/AggregatesBuilderTest.snippet.vector-search.kt
946+
:language: kotlin
947+
948+
To learn more about this helper, see the
949+
`vectorSearch() API documentation
950+
<{+api+}/apidocs/mongodb-driver-core/com/mongodb/client/model/Aggregates.html#vectorSearch(com.mongodb.client.model.search.FieldSearchPath,java.lang.Iterable,java.lang.String,long,long)>`__.

0 commit comments

Comments
 (0)