Skip to content

Commit 84f4273

Browse files
committed
Merge branch 'main' into redis
Signed-off-by: Anush008 <anushshetty90@gmail.com>
2 parents e9e818c + 304bd85 commit 84f4273

15 files changed

+1365
-97
lines changed

README.md

Lines changed: 73 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ CLI tool for migrating data to [Qdrant](http://qdrant.tech) with support for res
1010
* [Chroma](https://trychroma.com/)
1111
* [Pinecone](https://www.pinecone.io/)
1212
* [Milvus](https://milvus.io/)
13+
* [Weaviate](https://weaviate.io/)
1314
* [Redis](https://redis.io)
1415
* Another [Qdrant](http://qdrant.tech/) instance
1516

@@ -102,7 +103,8 @@ Migrate data from a **Pinecone** database to **Qdrant**:
102103

103104
```bash
104105
migration pinecone \
105-
--pinecone.host 'https://example-index-12345.svc.region.pinecone.io' \
106+
--pinecone.index-host 'https://example-index.svc.region.pinecone.io' \
107+
--pinecone.index-name 'example-index' \
106108
--pinecone.api-key 'optional-pinecone-api-key' \
107109
--qdrant.url 'https://example.cloud-region.cloud-provider.cloud.qdrant.io:6334' \
108110
--qdrant.api-key 'optional-qdrant-api-key' \
@@ -114,17 +116,19 @@ With Docker:
114116
115117
```bash
116118
docker run --net=host --rm -it registry.cloud.qdrant.io/library/qdrant-migration pinecone \
117-
--pinecone.host 'https://example-index-12345.svc.region.pinecone.io' \
119+
--pinecone.index-host 'https://example-index.svc.region.pinecone.io' \
118120
...
119121
```
120122
121123
#### Pinecone Options
122124
123125
| Flag | Description |
124126
| ------------------------------- | --------------------------------------------------------------- |
125-
| `--pinecone.api-key` | Pinecone API key for authentication |
126-
| `--pinecone.host` | Pinecone index host URL (e.g., `https://your-pinecone-url`) |
127-
| `--pinecone.namespace` | Namespace of the partition to migrate |
127+
| `--pinecone.index-name` | Pinecone index name. |
128+
| `--pinecone.index-host` | Pinecone index host URL (e.g., `https://your-pinecone-url`) |
129+
| `--pinecone.api-key` | Pinecone API key for authentication. |
130+
| `--pinecone.namespace` | Namespace of the partition to migrate. Optional. |
131+
| `--pinecone.service-host` | Pinecone service host URL. Optional. |
128132
129133
#### Qdrant Options
130134
@@ -195,6 +199,70 @@ docker run --net=host --rm -it registry.cloud.qdrant.io/library/qdrant-migration
195199
196200
</details>
197201
202+
<details>
203+
204+
<summary><h3>From Weaviate</h3></summary>
205+
206+
Migrate data from a **Weaviate** database to **Qdrant**:
207+
208+
### 📥 Example
209+
210+
> Important ⚠️:
211+
> Weaviate does not expose vector dimensions and distance metric after a collection is created. [Reference](https://forum.weaviate.io/t/get-vector-dimension-of-a-collection/1769/).
212+
> Therefore, you must [manually create](https://qdrant.tech/documentation/concepts/collections/#create-a-collection) a Qdrant collection before starting the migration.
213+
> Ensure that the **vector dimensions in Qdrant exactly match** those used in Weaviate.
214+
215+
```bash
216+
migration weaviate \
217+
--weaviate.host 'example.c0.asia-southeast1.gcp.weaviate.cloud' \
218+
--weaviate.scheme 'https' \
219+
--weaviate.auth-type 'apiKey' \
220+
--weaviate.api-key 'optional-api-key' \
221+
--weaviate.class-name 'ExampleClass' \
222+
--qdrant.url 'http://localhost:6334' \
223+
--qdrant.collection 'target-collection' \
224+
--migration.batch-size 64
225+
```
226+
227+
With Docker:
228+
229+
```bash
230+
docker run --net=host --rm -it registry.cloud.qdrant.io/library/qdrant-migration weaviate \
231+
--weaviate.host 'example.c0.asia-southeast1.gcp.weaviate.cloud' \
232+
...
233+
```
234+
235+
#### Weaviate Options
236+
237+
| Flag | Description |
238+
| -------------------------- | ------------------------------------------------------------------------------------------------ |
239+
| `--weaviate.host` | Host of the Weaviate instance (e.g. `localhost:8080`) **(required)** |
240+
| `--weaviate.scheme` | Scheme of the Weaviate instance (e.g. `http` or `https`) _(default: http)_ |
241+
| `--weaviate.class-name` | Name of the Weaviate class to migrate **(required)** |
242+
| `--weaviate.auth-type` | Authentication type _(default: none)_. Options: `none`, `apiKey`, `password`, `client`, `bearer` |
243+
| `--weaviate.api-key` | API key for authentication (when `auth-type` is `apiKey`) |
244+
| `--weaviate.username` | Username for authentication (when `auth-type` is `password`) |
245+
| `--weaviate.password` | Password for authentication (when `auth-type` is `password`) |
246+
| `--weaviate.scopes` | Scopes for authentication (when `auth-type` is `password` or `client`) |
247+
| `--weaviate.client-secret` | Client secret for authentication (when `auth-type` is `client`) |
248+
| `--weaviate.token` | Bearer token for authentication (when `auth-type` is `bearer`) |
249+
| `--weaviate.refresh-token` | Refresh token for authentication (when `auth-type` is `bearer`) |
250+
| `--weaviate.expires-in` | Access token expiration time in seconds (when `auth-type` is `bearer`) |
251+
| `--weaviate.tenant` | Objects belonging to which tenant to migrate |
252+
253+
#### Qdrant Options
254+
255+
| Flag | Description |
256+
| ----------------------- | ---------------------------------------------------------------------------------------------------------------- |
257+
| `--qdrant.url` | Qdrant gRPC URL. Default: `"http://localhost:6334"` |
258+
| `--qdrant.collection` | Target collection name |
259+
| `--qdrant.api-key` | Qdrant API key |
260+
261+
* See [Shared Migration Options](#shared-migration-options) for common migration parameters.
262+
263+
</details>
264+
265+
198266
<details>
199267
200268
<summary><h3>From Redis</h3></summary>

cmd/migrate_from_chroma.go

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ import (
99
"os"
1010
"os/signal"
1111
"syscall"
12-
"time"
1312

1413
chroma "github.com/amikos-tech/chroma-go/pkg/api/v2"
1514
"github.com/pterm/pterm"
@@ -195,12 +194,11 @@ func (r *MigrateFromChromaCmd) prepareTargetCollection(ctx context.Context, coll
195194
return fmt.Errorf("failed to create target collection: %w", err)
196195
}
197196

198-
pterm.Success.Printfln("Created target collection '%s' with dimension", r.Qdrant.Collection)
197+
pterm.Success.Printfln("Created target collection '%s'", r.Qdrant.Collection)
199198
return nil
200199
}
201200

202201
func (r *MigrateFromChromaCmd) migrateData(ctx context.Context, collection chroma.Collection, targetClient *qdrant.Client, sourcePointCount uint64) error {
203-
startTime := time.Now()
204202
batchSize := r.Migration.BatchSize
205203

206204
var currentOffset uint64 = 0
@@ -295,16 +293,6 @@ func (r *MigrateFromChromaCmd) migrateData(ctx context.Context, collection chrom
295293
}
296294

297295
bar.Add(count)
298-
299-
// If one minute elapsed get updated sourcePointCount
300-
// Useful if any new points were added to the source during migration
301-
if time.Since(startTime) > time.Minute {
302-
sourcePointCount, err = r.countChromaVectors(ctx, collection)
303-
if err != nil {
304-
return fmt.Errorf("failed to count vectors in Chroma: %w", err)
305-
}
306-
bar.Total = int(sourcePointCount)
307-
}
308296
}
309297

310298
pterm.Success.Printfln("Data migration finished successfully")

cmd/migrate_from_milvus.go

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ import (
88
"os/signal"
99
"strconv"
1010
"syscall"
11-
"time"
1211

1312
"github.com/milvus-io/milvus/client/v2/column"
1413
"github.com/milvus-io/milvus/client/v2/entity"
@@ -182,7 +181,6 @@ func (r *MigrateFromMilvusCmd) prepareTargetCollection(ctx context.Context, sour
182181
}
183182

184183
func (r *MigrateFromMilvusCmd) migrateData(ctx context.Context, sourceClient *milvusclient.Client, targetClient *qdrant.Client, sourcePointCount uint64) error {
185-
startTime := time.Now()
186184
batchSize := r.Migration.BatchSize
187185

188186
var offsetID *qdrant.PointId
@@ -299,15 +297,6 @@ func (r *MigrateFromMilvusCmd) migrateData(ctx context.Context, sourceClient *mi
299297
break
300298
}
301299

302-
// If one minute elapsed get updated sourcePointCount.
303-
// Useful if any new points were added to the source during migration.
304-
if time.Since(startTime) > time.Minute {
305-
sourcePointCount, err = r.countMilvusVectors(ctx, sourceClient)
306-
if err != nil {
307-
return fmt.Errorf("failed to count vectors in Milvus: %w", err)
308-
}
309-
bar.Total = int(sourcePointCount)
310-
}
311300
}
312301

313302
pterm.Success.Printfln("Data migration finished successfully")

cmd/migrate_from_pinecone.go

Lines changed: 8 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,7 @@ import (
55
"fmt"
66
"os"
77
"os/signal"
8-
"strings"
98
"syscall"
10-
"time"
119

1210
"github.com/pinecone-io/go-pinecone/v3/pinecone"
1311
"github.com/pterm/pterm"
@@ -30,22 +28,13 @@ type MigrateFromPineconeCmd struct {
3028
targetTLS bool
3129
}
3230

33-
func stripScheme(input string) string {
34-
if idx := strings.Index(input, "://"); idx != -1 {
35-
return input[idx+3:]
36-
}
37-
return input
38-
}
39-
4031
func (r *MigrateFromPineconeCmd) Parse() error {
4132
var err error
4233
r.targetHost, r.targetPort, r.targetTLS, err = parseQdrantUrl(r.Qdrant.Url)
4334
if err != nil {
4435
return fmt.Errorf("failed to parse target URL: %w", err)
4536
}
4637

47-
r.Pinecone.Host = stripScheme(r.Pinecone.Host)
48-
4938
return nil
5039
}
5140

@@ -89,7 +78,7 @@ func (r *MigrateFromPineconeCmd) Run(globals *Globals) error {
8978
return fmt.Errorf("error preparing target collection: %w", err)
9079
}
9180

92-
displayMigrationStart("pinecone", r.Pinecone.Host, r.Qdrant.Collection)
81+
displayMigrationStart("pinecone", r.Pinecone.IndexHost, r.Qdrant.Collection)
9382

9483
err = r.migrateData(ctx, sourceIndexConn, targetClient, sourcePointCount)
9584
if err != nil {
@@ -111,14 +100,15 @@ func (r *MigrateFromPineconeCmd) Run(globals *Globals) error {
111100

112101
func (r *MigrateFromPineconeCmd) connectToPinecone() (*pinecone.Client, *pinecone.IndexConnection, error) {
113102
client, err := pinecone.NewClient(pinecone.NewClientParams{
103+
Host: r.Pinecone.ServiceHost,
114104
ApiKey: r.Pinecone.APIKey,
115105
})
116106
if err != nil {
117107
return nil, nil, fmt.Errorf("failed to create Pinecone client: %w", err)
118108
}
119109

120110
indexConn, err := client.Index(pinecone.NewIndexConnParams{
121-
Host: r.Pinecone.Host,
111+
Host: r.Pinecone.IndexHost,
122112
Namespace: r.Pinecone.Namespace,
123113
})
124114
if err != nil {
@@ -159,14 +149,14 @@ func (r *MigrateFromPineconeCmd) prepareTargetCollection(ctx context.Context, so
159149

160150
var foundIndex *pinecone.Index
161151
for i := range indexes {
162-
if indexes[i].Host == r.Pinecone.Host {
152+
if indexes[i].Name == r.Pinecone.IndexName {
163153
foundIndex = indexes[i]
164154
break
165155
}
166156
}
167157

168158
if foundIndex == nil {
169-
return fmt.Errorf("index %q not found in Pinecone", r.Pinecone.Host)
159+
return fmt.Errorf("index %q not found in Pinecone", r.Pinecone.IndexName)
170160
}
171161

172162
distanceMapping := map[pinecone.IndexMetric]qdrant.Distance{
@@ -208,14 +198,13 @@ func (r *MigrateFromPineconeCmd) prepareTargetCollection(ctx context.Context, so
208198
}
209199

210200
func (r *MigrateFromPineconeCmd) migrateData(ctx context.Context, sourceIndexConn *pinecone.IndexConnection, targetClient *qdrant.Client, sourcePointCount uint64) error {
211-
startTime := time.Now()
212201
batchSize := r.Migration.BatchSize
213202

214203
var offsetId *qdrant.PointId
215204
offsetCount := uint64(0)
216205

217206
if !r.Migration.Restart {
218-
id, offsetStored, err := commons.GetStartOffset(ctx, r.Migration.OffsetsCollection, targetClient, r.Pinecone.Host)
207+
id, offsetStored, err := commons.GetStartOffset(ctx, r.Migration.OffsetsCollection, targetClient, r.Pinecone.IndexHost)
219208
if err != nil {
220209
return fmt.Errorf("failed to get start offset: %w", err)
221210
}
@@ -241,7 +230,7 @@ func (r *MigrateFromPineconeCmd) migrateData(ctx context.Context, sourceIndexCon
241230
}
242231

243232
if len(listRes.VectorIds) < 1 {
244-
return fmt.Errorf("pinecone.ListVectors returned no IDs")
233+
break
245234
}
246235

247236
ids := make([]string, 0, len(listRes.VectorIds))
@@ -300,7 +289,7 @@ func (r *MigrateFromPineconeCmd) migrateData(ctx context.Context, sourceIndexCon
300289
if listRes.NextPaginationToken != nil {
301290
offsetCount += uint64(len(targetPoints))
302291
offsetId = qdrant.NewID(*listRes.NextPaginationToken)
303-
err = commons.StoreStartOffset(ctx, r.Migration.OffsetsCollection, targetClient, r.Pinecone.Host, offsetId, offsetCount)
292+
err = commons.StoreStartOffset(ctx, r.Migration.OffsetsCollection, targetClient, r.Pinecone.IndexHost, offsetId, offsetCount)
304293
if err != nil {
305294
return fmt.Errorf("failed to store offset: %w", err)
306295
}
@@ -312,15 +301,6 @@ func (r *MigrateFromPineconeCmd) migrateData(ctx context.Context, sourceIndexCon
312301
break
313302
}
314303

315-
// If one minute elapsed get updated sourcePointCount.
316-
// Useful if any new points were added to the source during migration.
317-
if time.Since(startTime) > time.Minute {
318-
sourcePointCount, err = r.countPineconeVectors(ctx, sourceIndexConn)
319-
if err != nil {
320-
return fmt.Errorf("failed to count vectors in Pinecone: %w", err)
321-
}
322-
bar.Total = int(sourcePointCount)
323-
}
324304
}
325305

326306
pterm.Success.Printfln("Data migration finished successfully")

cmd/migrate_from_qdrant.go

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ import (
66
"os"
77
"os/signal"
88
"syscall"
9-
"time"
109

1110
"github.com/pterm/pterm"
1211

@@ -213,7 +212,6 @@ func getFieldType(dataType qdrant.PayloadSchemaType) *qdrant.FieldType {
213212
}
214213

215214
func (r *MigrateFromQdrantCmd) migrateData(ctx context.Context, sourceClient *qdrant.Client, sourceCollection string, targetClient *qdrant.Client, targetCollection string, sourcePointCount uint64) error {
216-
startTime := time.Now()
217215
limit := uint32(r.Migration.BatchSize)
218216

219217
var offsetId *qdrant.PointId
@@ -323,18 +321,6 @@ func (r *MigrateFromQdrantCmd) migrateData(ctx context.Context, sourceClient *qd
323321
break
324322
}
325323

326-
// If one minute elapsed get updated sourcePointCount.
327-
// Useful if any new points were added to the source during migration.
328-
if time.Since(startTime) > time.Minute {
329-
sourcePointCount, err := sourceClient.Count(ctx, &qdrant.CountPoints{
330-
CollectionName: sourceCollection,
331-
Exact: qdrant.PtrOf(true),
332-
})
333-
if err != nil {
334-
return fmt.Errorf("failed to count points in source: %w", err)
335-
}
336-
bar.Total = int(sourcePointCount)
337-
}
338324
}
339325

340326
pterm.Success.Printfln("Data migration finished successfully")

0 commit comments

Comments
 (0)