Skip to content

Commit 2926811

Browse files
authored
feat(qdrant-multivector): mapping 2-dim vectors to qdrant multivectors (#802)
1 parent 9ea8ca7 commit 2926811

File tree

2 files changed

+119
-37
lines changed

2 files changed

+119
-37
lines changed

docs/docs/ops/targets.md

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,11 +63,16 @@ Here's how CocoIndex data elements map to Qdrant elements during export:
6363
| a collected row | a point |
6464
| a field | a named vector, if fits into Qdrant vector; or a field within payload otherwise |
6565

66-
*Vector[Float32, N]*, *Vector[Float64, N]* and *Vector[Int64, N]* types fit into Qdrant vector.
66+
The following vector types fit into Qdrant vector:
67+
* One-dimensional vectors with fixed dimension, e.g. *Vector[Float32, N]*, *Vector[Float64, N]* and *Vector[Int64, N]*.
68+
We map them to [dense vectors](https://qdrant.tech/documentation/concepts/vectors/#dense-vectors).
69+
* Two-dimensional vectors whose inner layer is a one-dimensional vector with fixed dimension, e.g. *Vector[Vector[Float32, N]]*, *Vector[Vector[Int64, N]]*, *Vector[Vector[Float64, N]]*. The outer layer may or may not have a fixed dimension.
70+
We map them to [multivectors](https://qdrant.tech/documentation/concepts/vectors/#multivectors).
71+
6772

6873
:::warning vector type mapping to Qdrant
6974

70-
Since vectors in Qdrant must have fixed dimension, we only map vectors of number types with fixed dimension (i.e. *Vector[cocoindex.Float32, N]*, *Vector[cocoindex.Float64, N]*, and *Vector[cocoindex.Int64, N]*) to Qdrant vectors.
75+
Since vectors in Qdrant must have fixed dimension, we only map vectors of number types with fixed dimension to Qdrant vectors.
7176
For all other vector types, we map to Qdrant payload as JSON arrays.
7277

7378
:::

src/ops/targets/qdrant.rs

Lines changed: 112 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,10 @@ use crate::ops::registry::ExecutorFactoryRegistry;
77
use crate::setup;
88
use qdrant_client::Qdrant;
99
use qdrant_client::qdrant::{
10-
CreateCollectionBuilder, DeletePointsBuilder, Distance, NamedVectors, PointId, PointStruct,
11-
PointsIdsList, UpsertPointsBuilder, Value as QdrantValue, VectorParamsBuilder,
12-
VectorsConfigBuilder,
10+
CreateCollectionBuilder, DeletePointsBuilder, DenseVector, Distance, MultiDenseVector,
11+
MultiVectorComparator, MultiVectorConfigBuilder, NamedVectors, PointId, PointStruct,
12+
PointsIdsList, UpsertPointsBuilder, Value as QdrantValue, Vector as QdrantVector,
13+
VectorParamsBuilder, VectorsConfigBuilder,
1314
};
1415

1516
const DEFAULT_VECTOR_SIMILARITY_METRIC: spec::VectorSimilarityMetric =
@@ -38,34 +39,82 @@ struct Spec {
3839

3940
struct FieldInfo {
4041
field_schema: schema::FieldSchema,
41-
is_qdrant_vector: bool,
42+
vector_shape: Option<VectorShape>,
4243
}
4344

44-
fn parse_supported_vector_size(typ: &schema::ValueType) -> Option<usize> {
45-
match typ {
46-
schema::ValueType::Basic(schema::BasicValueType::Vector(vector_schema)) => {
47-
match &*vector_schema.element_type {
48-
schema::BasicValueType::Float32
49-
| schema::BasicValueType::Float64
50-
| schema::BasicValueType::Int64 => vector_schema.dimension,
45+
enum VectorShape {
46+
Vector(usize),
47+
MultiVector(usize),
48+
}
49+
50+
impl VectorShape {
51+
fn vector_size(&self) -> usize {
52+
match self {
53+
VectorShape::Vector(size) => *size,
54+
VectorShape::MultiVector(size) => *size,
55+
}
56+
}
57+
58+
fn multi_vector_comparator(&self) -> Option<MultiVectorComparator> {
59+
match self {
60+
VectorShape::MultiVector(_) => Some(MultiVectorComparator::MaxSim),
61+
_ => None,
62+
}
63+
}
64+
}
65+
66+
fn parse_vector_schema_shape(vector_schema: &schema::VectorTypeSchema) -> Option<VectorShape> {
67+
match &*vector_schema.element_type {
68+
schema::BasicValueType::Float32
69+
| schema::BasicValueType::Float64
70+
| schema::BasicValueType::Int64 => vector_schema.dimension.map(VectorShape::Vector),
71+
72+
schema::BasicValueType::Vector(nested_vector_schema) => {
73+
match parse_vector_schema_shape(nested_vector_schema) {
74+
Some(VectorShape::Vector(dim)) => Some(VectorShape::MultiVector(dim)),
5175
_ => None,
5276
}
5377
}
5478
_ => None,
5579
}
5680
}
5781

58-
fn encode_vector(v: &[BasicValue]) -> Result<Vec<f32>> {
59-
v.iter()
60-
.map(|elem| {
61-
Ok(match elem {
62-
BasicValue::Float32(f) => *f,
63-
BasicValue::Float64(f) => *f as f32,
64-
BasicValue::Int64(i) => *i as f32,
65-
_ => bail!("Unsupported vector type: {:?}", elem.kind()),
82+
fn parse_vector_shape(typ: &schema::ValueType) -> Option<VectorShape> {
83+
match typ {
84+
schema::ValueType::Basic(schema::BasicValueType::Vector(vector_schema)) => {
85+
parse_vector_schema_shape(vector_schema)
86+
}
87+
_ => None,
88+
}
89+
}
90+
91+
fn encode_dense_vector(v: &BasicValue) -> Result<DenseVector> {
92+
let vec = match v {
93+
BasicValue::Vector(v) => v
94+
.iter()
95+
.map(|elem| {
96+
Ok(match elem {
97+
BasicValue::Float32(f) => *f,
98+
BasicValue::Float64(f) => *f as f32,
99+
BasicValue::Int64(i) => *i as f32,
100+
_ => bail!("Unsupported vector type: {:?}", elem.kind()),
101+
})
66102
})
67-
})
68-
.collect::<Result<Vec<_>>>()
103+
.collect::<Result<Vec<_>>>()?,
104+
_ => bail!("Expected a vector field, got {:?}", v),
105+
};
106+
Ok(vec.into())
107+
}
108+
109+
fn encode_multi_dense_vector(v: &BasicValue) -> Result<MultiDenseVector> {
110+
let vecs = match v {
111+
BasicValue::Vector(v) => v
112+
.iter()
113+
.map(encode_dense_vector)
114+
.collect::<Result<Vec<_>>>()?,
115+
_ => bail!("Expected a vector field, got {:?}", v),
116+
};
117+
Ok(vecs.into())
69118
}
70119

71120
fn embedding_metric_to_qdrant(metric: spec::VectorSimilarityMetric) -> Result<Distance> {
@@ -90,6 +139,8 @@ struct CollectionKey {
90139
struct VectorDef {
91140
vector_size: usize,
92141
metric: spec::VectorSimilarityMetric,
142+
#[serde(default, skip_serializing_if = "Option::is_none")]
143+
multi_vector_comparator: Option<String>,
93144
}
94145
#[derive(Debug, Clone, Serialize, Deserialize)]
95146
struct SetupState {
@@ -168,10 +219,21 @@ impl SetupStatus {
168219
if !add_collection.vectors.is_empty() {
169220
let mut vectors_config = VectorsConfigBuilder::default();
170221
for (name, vector_def) in add_collection.vectors.iter() {
171-
let params = VectorParamsBuilder::new(
222+
let mut params = VectorParamsBuilder::new(
172223
vector_def.vector_size as u64,
173224
embedding_metric_to_qdrant(vector_def.metric)?,
174225
);
226+
if let Some(multi_vector_comparator) = &vector_def.multi_vector_comparator {
227+
params = params.multivector_config(MultiVectorConfigBuilder::new(
228+
MultiVectorComparator::from_str_name(multi_vector_comparator)
229+
.ok_or_else(|| {
230+
anyhow!(
231+
"unrecognized multi vector comparator: {}",
232+
multi_vector_comparator
233+
)
234+
})?,
235+
));
236+
}
175237
vectors_config.add_named_vector_params(name, params);
176238
}
177239
builder = builder.vectors_config(vectors_config);
@@ -247,15 +309,29 @@ fn values_to_payload(
247309

248310
for (value, field_info) in value_fields.iter().zip(fields_info.iter()) {
249311
let field_name = &field_info.field_schema.name;
250-
match value {
251-
Value::Basic(BasicValue::Vector(v)) if field_info.is_qdrant_vector => {
252-
let vector = encode_vector(v.as_ref())?;
253-
vectors = vectors.add_vector(field_name, vector);
312+
313+
match &field_info.vector_shape {
314+
Some(vector_shape) => {
315+
if value.is_null() {
316+
continue;
317+
}
318+
let vector: QdrantVector = match value {
319+
Value::Basic(basic_value) => match vector_shape {
320+
VectorShape::Vector(_) => encode_dense_vector(&basic_value)?.into(),
321+
VectorShape::MultiVector(_) => {
322+
encode_multi_dense_vector(&basic_value)?.into()
323+
}
324+
},
325+
_ => {
326+
bail!("Expected a vector field, got {:?}", value);
327+
}
328+
};
329+
vectors = vectors.add_vector(field_name.clone(), vector);
254330
}
255-
v => {
331+
None => {
256332
let json_value = serde_json::to_value(TypedValue {
257333
t: &field_info.field_schema.value_type.typ,
258-
v,
334+
v: value,
259335
})?;
260336
payload.insert(field_name.clone(), json_value.into());
261337
}
@@ -323,17 +399,14 @@ impl StorageFactoryBase for Factory {
323399
let mut unsupported_vector_fields = Vec::<(String, ValueType)>::new();
324400

325401
for field in d.value_fields_schema.iter() {
326-
let vector_size = parse_supported_vector_size(&field.value_type.typ);
327-
fields_info.push(FieldInfo {
328-
field_schema: field.clone(),
329-
is_qdrant_vector: vector_size.is_some(),
330-
});
331-
if let Some(vector_size) = vector_size {
402+
let vector_shape = parse_vector_shape(&field.value_type.typ);
403+
if let Some(vector_shape) = &vector_shape {
332404
vector_def.insert(
333405
field.name.clone(),
334406
VectorDef {
335-
vector_size,
407+
vector_size: vector_shape.vector_size(),
336408
metric: DEFAULT_VECTOR_SIMILARITY_METRIC,
409+
multi_vector_comparator: vector_shape.multi_vector_comparator().map(|s| s.as_str_name().to_string()),
337410
},
338411
);
339412
} else if matches!(
@@ -343,6 +416,10 @@ impl StorageFactoryBase for Factory {
343416
// This is a vector field but not supported by Qdrant
344417
unsupported_vector_fields.push((field.name.clone(), field.value_type.typ.clone()));
345418
}
419+
fields_info.push(FieldInfo {
420+
field_schema: field.clone(),
421+
vector_shape,
422+
});
346423
}
347424

348425
let mut specified_vector_fields = HashSet::new();

0 commit comments

Comments
 (0)