Skip to content

Commit 304bd85

Browse files
authored
test: Add Pinecone source integration tests (#46)
* chore(deps): Use Anush008/chroma-go until amikos-tech/chroma-go # 221 is merged Signed-off-by: Anush008 <[email protected]> * chore: Revert back to amikos-tech/chroma-go Signed-off-by: Anush008 <[email protected]> * chore: tests Signed-off-by: Anush008 <[email protected]> * Chroma test Signed-off-by: Anush008 <[email protected]> * chore: Addressed lint issues Signed-off-by: Anush008 <[email protected]> * chore: No close client Signed-off-by: Anush008 <[email protected]> * test: Pinecone Signed-off-by: Anush008 <[email protected]> * chore: go mod tidy Signed-off-by: Anush008 <[email protected]> * test: Sparse vectors Signed-off-by: Anush008 <[email protected]> * chore: Run as from shell Signed-off-by: Anush008 <[email protected]> * chore: Test as binary Signed-off-by: Anush008 <[email protected]> * test: Updated implementation Signed-off-by: Anush008 <[email protected]> --------- Signed-off-by: Anush008 <[email protected]>
1 parent 7df7f87 commit 304bd85

File tree

9 files changed

+340
-27
lines changed

9 files changed

+340
-27
lines changed

README.md

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,8 @@ Migrate data from a **Pinecone** database to **Qdrant**:
102102

103103
```bash
104104
migration pinecone \
105-
--pinecone.host 'https://example-index-12345.svc.region.pinecone.io' \
105+
--pinecone.index-host 'https://example-index.svc.region.pinecone.io' \
106+
--pinecone.index-name 'example-index' \
106107
--pinecone.api-key 'optional-pinecone-api-key' \
107108
--qdrant.url 'https://example.cloud-region.cloud-provider.cloud.qdrant.io:6334' \
108109
--qdrant.api-key 'optional-qdrant-api-key' \
@@ -114,17 +115,19 @@ With Docker:
114115
115116
```bash
116117
docker run --net=host --rm -it registry.cloud.qdrant.io/library/qdrant-migration pinecone \
117-
--pinecone.host 'https://example-index-12345.svc.region.pinecone.io' \
118+
--pinecone.index-host 'https://example-index.svc.region.pinecone.io' \
118119
...
119120
```
120121
121122
#### Pinecone Options
122123
123124
| Flag | Description |
124125
| ------------------------------- | --------------------------------------------------------------- |
125-
| `--pinecone.api-key` | Pinecone API key for authentication |
126-
| `--pinecone.host` | Pinecone index host URL (e.g., `https://your-pinecone-url`) |
127-
| `--pinecone.namespace` | Namespace of the partition to migrate |
126+
| `--pinecone.index-name` | Pinecone index name. |
127+
| `--pinecone.index-host` | Pinecone index host URL (e.g., `https://your-pinecone-url`) |
128+
| `--pinecone.api-key` | Pinecone API key for authentication. |
129+
| `--pinecone.namespace` | Namespace of the partition to migrate. Optional. |
130+
| `--pinecone.service-host` | Pinecone service host URL. Optional. |
128131
129132
#### Qdrant Options
130133

cmd/migrate_from_pinecone.go

Lines changed: 8 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ import (
55
"fmt"
66
"os"
77
"os/signal"
8-
"strings"
98
"syscall"
109

1110
"github.com/pinecone-io/go-pinecone/v3/pinecone"
@@ -29,22 +28,13 @@ type MigrateFromPineconeCmd struct {
2928
targetTLS bool
3029
}
3130

32-
func stripScheme(input string) string {
33-
if idx := strings.Index(input, "://"); idx != -1 {
34-
return input[idx+3:]
35-
}
36-
return input
37-
}
38-
3931
func (r *MigrateFromPineconeCmd) Parse() error {
4032
var err error
4133
r.targetHost, r.targetPort, r.targetTLS, err = parseQdrantUrl(r.Qdrant.Url)
4234
if err != nil {
4335
return fmt.Errorf("failed to parse target URL: %w", err)
4436
}
4537

46-
r.Pinecone.Host = stripScheme(r.Pinecone.Host)
47-
4838
return nil
4939
}
5040

@@ -88,7 +78,7 @@ func (r *MigrateFromPineconeCmd) Run(globals *Globals) error {
8878
return fmt.Errorf("error preparing target collection: %w", err)
8979
}
9080

91-
displayMigrationStart("pinecone", r.Pinecone.Host, r.Qdrant.Collection)
81+
displayMigrationStart("pinecone", r.Pinecone.IndexHost, r.Qdrant.Collection)
9282

9383
err = r.migrateData(ctx, sourceIndexConn, targetClient, sourcePointCount)
9484
if err != nil {
@@ -110,14 +100,15 @@ func (r *MigrateFromPineconeCmd) Run(globals *Globals) error {
110100

111101
func (r *MigrateFromPineconeCmd) connectToPinecone() (*pinecone.Client, *pinecone.IndexConnection, error) {
112102
client, err := pinecone.NewClient(pinecone.NewClientParams{
103+
Host: r.Pinecone.ServiceHost,
113104
ApiKey: r.Pinecone.APIKey,
114105
})
115106
if err != nil {
116107
return nil, nil, fmt.Errorf("failed to create Pinecone client: %w", err)
117108
}
118109

119110
indexConn, err := client.Index(pinecone.NewIndexConnParams{
120-
Host: r.Pinecone.Host,
111+
Host: r.Pinecone.IndexHost,
121112
Namespace: r.Pinecone.Namespace,
122113
})
123114
if err != nil {
@@ -158,14 +149,14 @@ func (r *MigrateFromPineconeCmd) prepareTargetCollection(ctx context.Context, so
158149

159150
var foundIndex *pinecone.Index
160151
for i := range indexes {
161-
if indexes[i].Host == r.Pinecone.Host {
152+
if indexes[i].Name == r.Pinecone.IndexName {
162153
foundIndex = indexes[i]
163154
break
164155
}
165156
}
166157

167158
if foundIndex == nil {
168-
return fmt.Errorf("index %q not found in Pinecone", r.Pinecone.Host)
159+
return fmt.Errorf("index %q not found in Pinecone", r.Pinecone.IndexName)
169160
}
170161

171162
distanceMapping := map[pinecone.IndexMetric]qdrant.Distance{
@@ -213,7 +204,7 @@ func (r *MigrateFromPineconeCmd) migrateData(ctx context.Context, sourceIndexCon
213204
offsetCount := uint64(0)
214205

215206
if !r.Migration.Restart {
216-
id, offsetStored, err := commons.GetStartOffset(ctx, r.Migration.OffsetsCollection, targetClient, r.Pinecone.Host)
207+
id, offsetStored, err := commons.GetStartOffset(ctx, r.Migration.OffsetsCollection, targetClient, r.Pinecone.IndexHost)
217208
if err != nil {
218209
return fmt.Errorf("failed to get start offset: %w", err)
219210
}
@@ -239,7 +230,7 @@ func (r *MigrateFromPineconeCmd) migrateData(ctx context.Context, sourceIndexCon
239230
}
240231

241232
if len(listRes.VectorIds) < 1 {
242-
return fmt.Errorf("pinecone.ListVectors returned no IDs")
233+
break
243234
}
244235

245236
ids := make([]string, 0, len(listRes.VectorIds))
@@ -298,7 +289,7 @@ func (r *MigrateFromPineconeCmd) migrateData(ctx context.Context, sourceIndexCon
298289
if listRes.NextPaginationToken != nil {
299290
offsetCount += uint64(len(targetPoints))
300291
offsetId = qdrant.NewID(*listRes.NextPaginationToken)
301-
err = commons.StoreStartOffset(ctx, r.Migration.OffsetsCollection, targetClient, r.Pinecone.Host, offsetId, offsetCount)
292+
err = commons.StoreStartOffset(ctx, r.Migration.OffsetsCollection, targetClient, r.Pinecone.IndexHost, offsetId, offsetCount)
302293
if err != nil {
303294
return fmt.Errorf("failed to store offset: %w", err)
304295
}

cmd/root.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,3 +42,14 @@ func Execute(projectVersion, projectBuild string) {
4242
ctx.Exit(1)
4343
}
4444
}
45+
46+
func NewParser(args []string) (*kong.Context, error) {
47+
cli := &CLI{}
48+
49+
parser, err := kong.New(cli, kong.Bind(&cli.Globals))
50+
if err != nil {
51+
return nil, err
52+
}
53+
54+
return parser.Parse(args)
55+
}

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ require (
1818
github.com/weaviate/weaviate v1.27.0
1919
github.com/weaviate/weaviate-go-client/v4 v4.16.1
2020
google.golang.org/grpc v1.72.2
21+
google.golang.org/protobuf v1.36.6
2122
)
2223

2324
require (
@@ -173,7 +174,6 @@ require (
173174
google.golang.org/genproto v0.0.0-20250512202823-5a2f75b736a9 // indirect
174175
google.golang.org/genproto/googleapis/api v0.0.0-20250512202823-5a2f75b736a9 // indirect
175176
google.golang.org/genproto/googleapis/rpc v0.0.0-20250512202823-5a2f75b736a9 // indirect
176-
google.golang.org/protobuf v1.36.6 // indirect
177177
gopkg.in/inf.v0 v0.9.1 // indirect
178178
gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect
179179
gopkg.in/yaml.v2 v2.4.0 // indirect

integration_tests/commons_test.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ const (
1515
qdrantAPIKey = "00000000"
1616
totalEntries = 100
1717
dimension = 384
18+
idField = "__id__"
1819
)
1920

2021
func randFloat32Values(n int) []float32 {
@@ -25,6 +26,14 @@ func randFloat32Values(n int) []float32 {
2526
return values
2627
}
2728

29+
func randIndices(n int) []uint32 {
30+
indices := make([]uint32, n)
31+
for i := range indices {
32+
indices[i] = rand.Uint32()
33+
}
34+
return indices
35+
}
36+
2837
func runMigrationBinary(t *testing.T, args []string) {
2938
binaryPath := filepath.Join(t.TempDir(), "migration")
3039
cmd := exec.Command("go", "build", "-o", binaryPath, "main.go")

integration_tests/image_test.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,3 +69,24 @@ func weaviateContainer(ctx context.Context, t *testing.T) testcontainers.Contain
6969

7070
return container
7171
}
72+
73+
func pineconeContainer(ctx context.Context, t *testing.T) testcontainers.Container {
74+
75+
req := testcontainers.ContainerRequest{
76+
Image: "ghcr.io/pinecone-io/pinecone-local:latest",
77+
ExposedPorts: []string{"5081/tcp", "5082/tcp"},
78+
Env: map[string]string{
79+
"PORT": "5081",
80+
},
81+
WaitingFor: wait.ForAll(wait.ForListeningPort("5081/tcp").WithStartupTimeout(30*time.Second),
82+
wait.ForListeningPort("5082/tcp").WithStartupTimeout(30*time.Second)),
83+
}
84+
85+
container, err := testcontainers.GenericContainer(ctx, testcontainers.GenericContainerRequest{
86+
ContainerRequest: req,
87+
Started: true,
88+
})
89+
require.NoError(t, err)
90+
91+
return container
92+
}

integration_tests/migrate_from_chroma_test.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ const (
1818
sourceField = "source"
1919
denseVectorField = "dense_vector"
2020
distance = "euclid"
21-
idField = "__id__"
2221
)
2322

2423
func TestMigrateFromChroma(t *testing.T) {

0 commit comments

Comments
 (0)