Skip to content

Commit 0ec2435

Browse files
authored
feat: Allow customizing distance when migrating from Milvus (#47)
Signed-off-by: Anush008 <[email protected]>
1 parent d95ab43 commit 0ec2435

File tree

2 files changed

+30
-16
lines changed

2 files changed

+30
-16
lines changed

README.md

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -189,11 +189,12 @@ docker run --net=host --rm -it registry.cloud.qdrant.io/library/qdrant-migration
189189
190190
#### Qdrant Options
191191
192-
| Flag | Description |
193-
| ------------------------------- | --------------------------------------------------------------- |
194-
| `--qdrant.url` | Qdrant gRPC URL. Default: `"http://localhost:6334"` |
195-
| `--qdrant.collection` | Target collection name |
196-
| `--qdrant.api-key` | Qdrant API key |
192+
| Flag | Description |
193+
| -------------------------- | ---------------------------------------------------------------------------------------------------------------- |
194+
| `--qdrant.url` | Qdrant gRPC URL. Default: `"http://localhost:6334"` |
195+
| `--qdrant.collection` | Target collection name |
196+
| `--qdrant.api-key` | Qdrant API key |
197+
| `--qdrant.distance-metric` | Map of vector field names to distance metrics (`"cosine"`,`"dot"`,`"euclid"`,`"manhattan"`). Default: `"cosine"` |
197198
198199
* See [Shared Migration Options](#shared-migration-options) for common migration parameters.
199200

cmd/migrate_from_milvus.go

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,10 @@ import (
2020
)
2121

2222
type MigrateFromMilvusCmd struct {
23-
Milvus commons.MilvusConfig `embed:"" prefix:"milvus."`
24-
Qdrant commons.QdrantConfig `embed:"" prefix:"qdrant."`
25-
Migration commons.MigrationConfig `embed:"" prefix:"migration."`
23+
Milvus commons.MilvusConfig `embed:"" prefix:"milvus."`
24+
Qdrant commons.QdrantConfig `embed:"" prefix:"qdrant."`
25+
Migration commons.MigrationConfig `embed:"" prefix:"migration."`
26+
DistanceMetric map[string]string `prefix:"qdrant." help:"Map of vector field names to distance metrics (cosine,dot,euclid,manhattan). Default is cosine if not specified."`
2627

2728
targetHost string
2829
targetPort int
@@ -150,6 +151,13 @@ func (r *MigrateFromMilvusCmd) prepareTargetCollection(ctx context.Context, sour
150151
return fmt.Errorf("failed to describe Milvus collection: %w", err)
151152
}
152153

154+
distanceMapping := map[string]qdrant.Distance{
155+
"euclid": qdrant.Distance_Euclid,
156+
"cosine": qdrant.Distance_Cosine,
157+
"dot": qdrant.Distance_Dot,
158+
"manhattan": qdrant.Distance_Manhattan,
159+
}
160+
153161
vectorParamsMap := make(map[string]*qdrant.VectorParams)
154162
for _, field := range schema.Schema.Fields {
155163
if field.DataType == entity.FieldTypeFloatVector {
@@ -159,11 +167,17 @@ func (r *MigrateFromMilvusCmd) prepareTargetCollection(ctx context.Context, sour
159167
return fmt.Errorf("failed to parse vector dimension: %w", err)
160168
}
161169

170+
distanceMetric := "cosine"
171+
if specifiedDistance, ok := r.DistanceMetric[field.Name]; ok {
172+
if _, valid := distanceMapping[specifiedDistance]; !valid {
173+
return fmt.Errorf("invalid distance metric '%s' for vector '%s'", specifiedDistance, field.Name)
174+
}
175+
distanceMetric = specifiedDistance
176+
}
177+
162178
vectorParamsMap[field.Name] = &qdrant.VectorParams{
163-
Size: uint64(dimension),
164-
// TODO(Anush008): Get distance from Milvus somehow
165-
// field.TypeParams only has "dim"
166-
Distance: qdrant.Distance_Cosine,
179+
Size: uint64(dimension),
180+
Distance: distanceMapping[distanceMetric],
167181
}
168182
}
169183
}
@@ -224,6 +238,7 @@ func (r *MigrateFromMilvusCmd) migrateData(ctx context.Context, sourceClient *mi
224238
WithFilter(filter).
225239
WithOutputFields("*").
226240
WithLimit(batchSize))
241+
227242
if err != nil {
228243
return fmt.Errorf("failed to query Milvus: %w", err)
229244
}
@@ -293,10 +308,6 @@ func (r *MigrateFromMilvusCmd) migrateData(ctx context.Context, sourceClient *mi
293308

294309
bar.Add(len(targetPoints))
295310

296-
if result.ResultCount < batchSize {
297-
break
298-
}
299-
300311
}
301312

302313
pterm.Success.Printfln("Data migration finished successfully")
@@ -341,6 +352,8 @@ func extractValue(col column.Column, index int) (interface{}, error) {
341352
end := start + dim
342353
return vec[start:end], nil
343354

355+
// TODO(Anush008): Extract sparse vectors when it's out of beta in Milvus.
356+
// https://github.com/milvus-io/milvus-proto/blob/02ce2e62a9fd3053b5f2dc632aea32d289a562da/proto/schema.proto#L165
344357
default:
345358
return nil, fmt.Errorf("unsupported field type: %v", col.Type())
346359
}

0 commit comments

Comments
 (0)