Skip to content

Commit 860d1bc

Browse files
authored
Make MongoDB vector field configurable (#160)
* Make MongoDB vector field configurable * Support multiple vector fields * Add README * comment lint * make it required and the only path
1 parent 8330d55 commit 860d1bc

File tree

3 files changed

+73
-30
lines changed

3 files changed

+73
-30
lines changed

README.md

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -299,11 +299,12 @@ docker run --net=host --rm -it registry.cloud.qdrant.io/library/qdrant-migration
299299

300300
#### MongoDB Options
301301

302-
| Flag | Description |
303-
| ---------------------- | ------------------------- |
304-
| `--mongodb.url` | MongoDB connection string |
305-
| `--mongodb.database` | MongoDB database name |
306-
| `--mongodb.collection` | MongoDB collection name |
302+
| Flag | Description |
303+
| ------------------------- | ----------------------------------------------------- |
304+
| `--mongodb.url` | MongoDB connection string |
305+
| `--mongodb.database` | MongoDB database name |
306+
| `--mongodb.collection` | MongoDB collection name |
307+
| `--mongodb.vector-fields` | MongoDB vector fields, comma separated **(required)** |
307308

308309
#### Qdrant Options
309310

cmd/migrate_from_mongo.go

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"fmt"
77
"os"
88
"os/signal"
9+
"slices"
910
"syscall"
1011

1112
"github.com/pterm/pterm"
@@ -19,10 +20,11 @@ import (
1920
)
2021

2122
type MigrateFromMongoDBCmd struct {
22-
MongoDB commons.MongoDBConfig `embed:"" prefix:"mongodb."`
23-
Qdrant commons.QdrantConfig `embed:"" prefix:"qdrant."`
24-
Migration commons.MigrationConfig `embed:"" prefix:"migration."`
25-
IdField string `prefix:"qdrant." help:"Field storing MongoDB IDs in Qdrant." default:"__id__"`
23+
MongoDB commons.MongoDBConfig `embed:"" prefix:"mongodb."`
24+
Qdrant commons.QdrantConfig `embed:"" prefix:"qdrant."`
25+
Migration commons.MigrationConfig `embed:"" prefix:"migration."`
26+
IdField string `prefix:"qdrant." help:"Field storing MongoDB IDs in Qdrant." default:"__id__"`
27+
VectorFields []string `required:"true" prefix:"mongodb." help:"Fields to use as vector."`
2628

2729
targetHost string
2830
targetPort int
@@ -186,9 +188,12 @@ func (r *MigrateFromMongoDBCmd) migrateData(ctx context.Context, sourceClient *m
186188
if fieldName == "_id" {
187189
continue
188190
}
189-
190-
if vector, ok := extractVector(value); ok {
191-
vectors[fieldName] = qdrant.NewVector(vector...)
191+
if slices.Contains(r.VectorFields, fieldName) {
192+
if vector, ok := extractVector(value); ok {
193+
vectors[fieldName] = qdrant.NewVector(vector...)
194+
} else {
195+
payload[fieldName] = value
196+
}
192197
} else {
193198
payload[fieldName] = value
194199
}

integration_tests/migrate_from_mongo_test.go

Lines changed: 55 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,13 @@ import (
1313
"github.com/qdrant/go-client/qdrant"
1414
)
1515

16+
const (
17+
// Test multiple vectors.
18+
vectorFieldText = "text_embedding"
19+
vectorFieldImage = "image_embedding"
20+
nonVectorField = "non_vector"
21+
)
22+
1623
func TestMigrateFromMongo(t *testing.T) {
1724
ctx := context.Background()
1825

@@ -44,22 +51,30 @@ func TestMigrateFromMongo(t *testing.T) {
4451
coll := db.Collection("testcoll")
4552

4653
testIDs := make([]string, totalEntries)
47-
testVectors := make([][]float32, totalEntries)
54+
testVectorsText := make([][]float32, totalEntries)
55+
testVectorsImage := make([][]float32, totalEntries)
4856
testDocs := make([]string, totalEntries)
4957
testSources := make([]string, totalEntries)
5058

59+
randomVectorPoints := randFloat32Values(dimension)
60+
// Test with additional non vector array
61+
nonVectorArray := []float32{1.0, 2.0, 3.0, 4.0, 5.0}
62+
5163
for i := 0; i < totalEntries; i++ {
5264
testIDs[i] = fmt.Sprintf("%d", i+1)
53-
testVectors[i] = randFloat32Values(dimension)
65+
testVectorsText[i] = randomVectorPoints
66+
testVectorsImage[i] = randomVectorPoints
5467
testDocs[i] = fmt.Sprintf("test doc %d", i+1)
5568
testSources[i] = fmt.Sprintf("source%d", i+1)
5669
_, err := coll.InsertOne(ctx, bson.M{
5770
// _id is a mandatory field in MongoDB, so we use it to store the ID.
5871
// If not specified, MongoDB will generate a random ObjectID.
59-
"_id": testIDs[i],
60-
"vector": testVectors[i],
61-
"doc": testDocs[i],
62-
"source": testSources[i],
72+
"_id": testIDs[i],
73+
vectorFieldText: testVectorsText[i],
74+
vectorFieldImage: testVectorsImage[i],
75+
"doc": testDocs[i],
76+
"source": testSources[i],
77+
nonVectorField: nonVectorArray,
6378
})
6479
require.NoError(t, err)
6580
}
@@ -77,7 +92,11 @@ func TestMigrateFromMongo(t *testing.T) {
7792
CollectionName: testCollectionName,
7893
VectorsConfig: qdrant.NewVectorsConfigMap(
7994
map[string]*qdrant.VectorParams{
80-
"vector": {
95+
vectorFieldText: {
96+
Size: uint64(dimension),
97+
Distance: qdrant.Distance_Dot,
98+
},
99+
vectorFieldImage: {
81100
Size: uint64(dimension),
82101
Distance: qdrant.Distance_Dot,
83102
},
@@ -95,6 +114,7 @@ func TestMigrateFromMongo(t *testing.T) {
95114
fmt.Sprintf("--qdrant.api-key=%s", qdrantAPIKey),
96115
fmt.Sprintf("--qdrant.collection=%s", testCollectionName),
97116
fmt.Sprintf("--qdrant.id-field=%s", idField),
117+
fmt.Sprintf("--mongodb.vector-fields=%s", fmt.Sprintf("%s,%s", vectorFieldText, vectorFieldImage)),
98118
}
99119

100120
runMigrationBinary(t, args)
@@ -109,19 +129,25 @@ func TestMigrateFromMongo(t *testing.T) {
109129
require.Len(t, points, len(testIDs))
110130

111131
expectedPoints := make(map[string]struct {
112-
doc string
113-
source string
114-
vector []float32
132+
doc string
133+
source string
134+
vector_text []float32
135+
vector_image []float32
136+
non_vector_array []float32
115137
})
116138
for i, id := range testIDs {
117139
expectedPoints[id] = struct {
118-
doc string
119-
source string
120-
vector []float32
140+
doc string
141+
source string
142+
vector_text []float32
143+
vector_image []float32
144+
non_vector_array []float32
121145
}{
122-
doc: testDocs[i],
123-
source: testSources[i],
124-
vector: testVectors[i],
146+
doc: testDocs[i],
147+
source: testSources[i],
148+
vector_text: testVectorsText[i],
149+
vector_image: testVectorsImage[i],
150+
non_vector_array: nonVectorArray,
125151
}
126152
}
127153

@@ -131,7 +157,18 @@ func TestMigrateFromMongo(t *testing.T) {
131157
require.True(t, exists)
132158
require.Equal(t, expected.doc, point.Payload["doc"].GetStringValue())
133159
require.Equal(t, expected.source, point.Payload["source"].GetStringValue())
134-
vector := point.Vectors.GetVectors().GetVectors()["vector"].GetData()
135-
require.Equal(t, expected.vector, vector)
160+
vectorText := point.Vectors.GetVectors().GetVectors()[vectorFieldText].GetData()
161+
require.Equal(t, expected.vector_text, vectorText)
162+
vectorImage := point.Vectors.GetVectors().GetVectors()[vectorFieldImage].GetData()
163+
require.Equal(t, expected.vector_image, vectorImage)
164+
nonVectorPayload := point.Payload[nonVectorField].GetListValue()
165+
nonVectorArray := make([]float32, len(nonVectorPayload.GetValues()))
166+
for i, val := range nonVectorPayload.GetValues() {
167+
nonVectorArray[i] = float32(val.GetDoubleValue())
168+
}
169+
require.Equal(t, expected.non_vector_array, nonVectorArray, "The non-vector array payload does not match the inserted data")
170+
// The named vector map should NOT contain nonVectorField
171+
_, nonVectorExistsAsVector := point.Vectors.GetVectors().GetVectors()[nonVectorField]
172+
require.False(t, nonVectorExistsAsVector, "%s should NOT exist as a named vector", nonVectorField)
136173
}
137174
}

0 commit comments

Comments
 (0)