Skip to content

Commit 662e9ae

Browse files
Model migration tutorial: use insert-only update mode and Cloud Inference (#2168)
* Switch to insert-only mode instead of conditional upserts * Make code snippets testable; use Cloud Inference * Use regular upserts instead of batch_update_points * Add snippets for TS, Rust, Java, C#, and Go
1 parent 0725a70 commit 662e9ae

File tree

49 files changed

+2257
-105
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+2257
-105
lines changed
Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
using Qdrant.Client;
2+
using Qdrant.Client.Grpc;
3+
4+
public class Snippet
5+
{
6+
public static async Task Run()
7+
{
8+
// @hide-start
9+
var client = new QdrantClient(
10+
host: "",
11+
port: 6334,
12+
https: true,
13+
apiKey: ""
14+
);
15+
16+
string NEW_COLLECTION = "new_collection";
17+
string OLD_COLLECTION = "old_collection";
18+
19+
string OLD_MODEL = "sentence-transformers/all-minilm-l6-v2";
20+
string NEW_MODEL = "qdrant/clip-vit-b-32-text";
21+
// @hide-end
22+
23+
// @block-start create-new-collection
24+
await client.CreateCollectionAsync(
25+
collectionName: NEW_COLLECTION,
26+
vectorsConfig: new VectorParams { Size = 512, Distance = Distance.Cosine }
27+
);
28+
// @block-end create-new-collection
29+
30+
// @block-start upsert-old-collection
31+
await client.UpsertAsync(
32+
collectionName: OLD_COLLECTION,
33+
points: new List<PointStruct>
34+
{
35+
new()
36+
{
37+
Id = 1,
38+
Vectors = new Document
39+
{
40+
Text = "Example document",
41+
Model = OLD_MODEL
42+
},
43+
Payload = { ["text"] = "Example document" }
44+
}
45+
}
46+
);
47+
// @block-end upsert-old-collection
48+
49+
// @block-start upsert-new-collection
50+
await client.UpsertAsync(
51+
collectionName: NEW_COLLECTION,
52+
points: new List<PointStruct>
53+
{
54+
new()
55+
{
56+
Id = 1,
57+
// Use the new embedding model to encode the document
58+
Vectors = new Document
59+
{
60+
Text = "Example document",
61+
Model = NEW_MODEL
62+
},
63+
Payload = { ["text"] = "Example document" }
64+
}
65+
}
66+
);
67+
// @block-end upsert-new-collection
68+
69+
// @block-start migrate-points
70+
PointId? lastOffset = null;
71+
uint limit = 100; // Number of points to read in each batch
72+
bool reachedEnd = false;
73+
74+
while (!reachedEnd)
75+
{
76+
// Get the next batch of points from the old collection
77+
var scrollResult = await client.ScrollAsync(
78+
collectionName: OLD_COLLECTION,
79+
limit: limit,
80+
offset: lastOffset,
81+
// Include payloads in the response, as we need them to re-embed the vectors
82+
payloadSelector: true,
83+
// We don't need the old vectors, so let's save on the bandwidth
84+
vectorsSelector: false
85+
);
86+
87+
var records = scrollResult.Result;
88+
lastOffset = scrollResult.NextPageOffset;
89+
90+
// Re-embed the points using the new model
91+
var points = new List<PointStruct>();
92+
foreach (var record in records)
93+
{
94+
var text = record.Payload.ContainsKey("text")
95+
? record.Payload["text"].StringValue
96+
: "";
97+
98+
points.Add(new PointStruct
99+
{
100+
// Keep the original ID to ensure consistency
101+
Id = record.Id,
102+
// Use the new embedding model to encode the text from the payload,
103+
// assuming that was the original source of the embedding
104+
Vectors = new Document
105+
{
106+
Text = text,
107+
Model = NEW_MODEL
108+
},
109+
// Keep the original payload
110+
Payload = { record.Payload }
111+
});
112+
}
113+
114+
// Upsert the re-embedded points into the new collection
115+
await client.UpsertAsync(
116+
new()
117+
{
118+
CollectionName = NEW_COLLECTION,
119+
Points = { points },
120+
// Only insert the point if a point with this ID does not already exist.
121+
UpdateMode = UpdateMode.InsertOnly
122+
}
123+
);
124+
125+
// Check if we reached the end of the collection
126+
reachedEnd = (lastOffset == null);
127+
}
128+
// @block-end migrate-points
129+
130+
// @block-start search-old-collection
131+
var results = await client.QueryAsync(
132+
collectionName: OLD_COLLECTION,
133+
query: new Document
134+
{
135+
Text = "my query",
136+
Model = OLD_MODEL
137+
},
138+
limit: 10
139+
);
140+
// @block-end search-old-collection
141+
142+
// @block-start search-new-collection
143+
results = await client.QueryAsync(
144+
collectionName: NEW_COLLECTION,
145+
query: new Document
146+
{
147+
Text = "my query",
148+
Model = NEW_MODEL
149+
},
150+
limit: 10
151+
);
152+
// @block-end search-new-collection
153+
}
154+
}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
```csharp
2+
await client.CreateCollectionAsync(
3+
collectionName: NEW_COLLECTION,
4+
vectorsConfig: new VectorParams { Size = 512, Distance = Distance.Cosine }
5+
);
6+
```
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
```go
2+
client.CreateCollection(context.Background(), &qdrant.CreateCollection{
3+
CollectionName: NEW_COLLECTION,
4+
VectorsConfig: qdrant.NewVectorsConfig(&qdrant.VectorParams{
5+
Size: 512, // Size of the new embedding vectors
6+
Distance: qdrant.Distance_Cosine,
7+
}),
8+
})
9+
```
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
```java
2+
client.createCollectionAsync(NEW_COLLECTION,
3+
VectorParams.newBuilder()
4+
.setSize(512) // Size of the new embedding vectors
5+
.setDistance(Distance.Cosine) // Similarity function for the new model
6+
.build()).get();
7+
```
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
```python
2+
client.create_collection(
3+
collection_name=NEW_COLLECTION,
4+
vectors_config=(
5+
models.VectorParams(
6+
size=512, # Size of the new embedding vectors
7+
distance=models.Distance.COSINE # Similarity function for the new model
8+
)
9+
)
10+
)
11+
```
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
```rust
2+
client
3+
.create_collection(
4+
CreateCollectionBuilder::new(new_collection)
5+
.vectors_config(VectorParamsBuilder::new(512, Distance::Cosine)), // Size of the new embedding vectors
6+
)
7+
.await?;
8+
```
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
```typescript
2+
await client.createCollection(NEW_COLLECTION, {
3+
vectors: {
4+
size: 512, // Size of the new embedding vectors
5+
distance: "Cosine", // Similarity function for the new model
6+
},
7+
});
8+
```
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
```csharp
2+
using Qdrant.Client;
3+
using Qdrant.Client.Grpc;
4+
5+
await client.CreateCollectionAsync(
6+
collectionName: NEW_COLLECTION,
7+
vectorsConfig: new VectorParams { Size = 512, Distance = Distance.Cosine }
8+
);
9+
10+
await client.UpsertAsync(
11+
collectionName: OLD_COLLECTION,
12+
points: new List<PointStruct>
13+
{
14+
new()
15+
{
16+
Id = 1,
17+
Vectors = new Document
18+
{
19+
Text = "Example document",
20+
Model = OLD_MODEL
21+
},
22+
Payload = { ["text"] = "Example document" }
23+
}
24+
}
25+
);
26+
27+
await client.UpsertAsync(
28+
collectionName: NEW_COLLECTION,
29+
points: new List<PointStruct>
30+
{
31+
new()
32+
{
33+
Id = 1,
34+
// Use the new embedding model to encode the document
35+
Vectors = new Document
36+
{
37+
Text = "Example document",
38+
Model = NEW_MODEL
39+
},
40+
Payload = { ["text"] = "Example document" }
41+
}
42+
}
43+
);
44+
45+
PointId? lastOffset = null;
46+
uint limit = 100; // Number of points to read in each batch
47+
bool reachedEnd = false;
48+
49+
while (!reachedEnd)
50+
{
51+
// Get the next batch of points from the old collection
52+
var scrollResult = await client.ScrollAsync(
53+
collectionName: OLD_COLLECTION,
54+
limit: limit,
55+
offset: lastOffset,
56+
// Include payloads in the response, as we need them to re-embed the vectors
57+
payloadSelector: true,
58+
// We don't need the old vectors, so let's save on the bandwidth
59+
vectorsSelector: false
60+
);
61+
62+
var records = scrollResult.Result;
63+
lastOffset = scrollResult.NextPageOffset;
64+
65+
// Re-embed the points using the new model
66+
var points = new List<PointStruct>();
67+
foreach (var record in records)
68+
{
69+
var text = record.Payload.ContainsKey("text")
70+
? record.Payload["text"].StringValue
71+
: "";
72+
73+
points.Add(new PointStruct
74+
{
75+
// Keep the original ID to ensure consistency
76+
Id = record.Id,
77+
// Use the new embedding model to encode the text from the payload,
78+
// assuming that was the original source of the embedding
79+
Vectors = new Document
80+
{
81+
Text = text,
82+
Model = NEW_MODEL
83+
},
84+
// Keep the original payload
85+
Payload = { record.Payload }
86+
});
87+
}
88+
89+
// Upsert the re-embedded points into the new collection
90+
await client.UpsertAsync(
91+
new()
92+
{
93+
CollectionName = NEW_COLLECTION,
94+
Points = { points },
95+
// Only insert the point if a point with this ID does not already exist.
96+
UpdateMode = UpdateMode.InsertOnly
97+
}
98+
);
99+
100+
// Check if we reached the end of the collection
101+
reachedEnd = (lastOffset == null);
102+
}
103+
104+
var results = await client.QueryAsync(
105+
collectionName: OLD_COLLECTION,
106+
query: new Document
107+
{
108+
Text = "my query",
109+
Model = OLD_MODEL
110+
},
111+
limit: 10
112+
);
113+
114+
results = await client.QueryAsync(
115+
collectionName: NEW_COLLECTION,
116+
query: new Document
117+
{
118+
Text = "my query",
119+
Model = NEW_MODEL
120+
},
121+
limit: 10
122+
);
123+
```

0 commit comments

Comments
 (0)