Skip to content

Commit c8a8d4d

Browse files
Add docvalue_fields Support for dense_vector Fields (#114484)
Currently dense_vector field don't support docvalue_fields. This add this support for debugging purposes. Users can inspect row values of their vectors even if the source is disabled. Co-authored-by: Mayya Sharipova <[email protected]>
1 parent d0bc527 commit c8a8d4d

File tree

8 files changed

+326
-5
lines changed

8 files changed

+326
-5
lines changed

docs/changelog/114484.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 114484
2+
summary: Add `docvalue_fields` Support for `dense_vector` Fields
3+
area: Search
4+
type: enhancement
5+
issues:
6+
- 108470
Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
setup:
2+
- requires:
3+
capabilities:
4+
- method: POST
5+
path: /_search
6+
capabilities: [ dense_vector_docvalue_fields ]
7+
test_runner_features: [ capabilities, close_to ]
8+
reason: Capability required to run test
9+
- do:
10+
indices.create:
11+
index: test
12+
body:
13+
mappings:
14+
properties:
15+
name:
16+
type: keyword
17+
vector1:
18+
type: dense_vector
19+
element_type: float
20+
dims: 5
21+
index: true
22+
vector2:
23+
type: dense_vector
24+
element_type: float
25+
dims: 5
26+
index: false
27+
vector3:
28+
type: dense_vector
29+
element_type: byte
30+
dims: 5
31+
index: true
32+
vector4:
33+
type: dense_vector
34+
element_type: byte
35+
dims: 5
36+
index: false
37+
vector5:
38+
type: dense_vector
39+
element_type: bit
40+
dims: 40
41+
index: true
42+
vector6:
43+
type: dense_vector
44+
element_type: bit
45+
dims: 40
46+
index: false
47+
- do:
48+
index:
49+
index: test
50+
id: "1"
51+
body:
52+
name: cow.jpg
53+
vector1: [230.0, 300.33, -34.8988, 15.555, -200.0]
54+
vector2: [130.0, 115.0, -1.02, 15.555, -100.0]
55+
vector3: [-1, 100, -13, 15, -128]
56+
vector4: [-1, 50, -1, 1, 120]
57+
vector5: [1, 111, -13, 15, -128]
58+
vector6: [-1, 11, 0, 12, 111]
59+
- do:
60+
index:
61+
index: test
62+
id: "2"
63+
body:
64+
name: moose.jpg
65+
vector1: [-0.5, 100.0, -13, 14.8, -156.0]
66+
vector4: [-1, 50, -1, 1, 120]
67+
vector5: [1, 111, -13, 15, -128]
68+
vector6: null
69+
- do:
70+
index:
71+
index: test
72+
id: "3"
73+
body:
74+
name: rabbit.jpg
75+
vector2: [130.0, 115.0, -1.02, 15.555, -100.0]
76+
vector3: [-1, 100, -13, 15, -128]
77+
78+
- do:
79+
indices.refresh: {}
80+
81+
---
82+
"Enable docvalue_fields parameter for dense_vector fields":
83+
- requires:
84+
capabilities:
85+
- method: POST
86+
path: /_search
87+
capabilities: [ dense_vector_docvalue_fields ]
88+
test_runner_features: capabilities
89+
reason: "Support for dense vector doc value fields capability required"
90+
- do:
91+
search:
92+
_source: false
93+
index: test
94+
body:
95+
docvalue_fields: [name, vector1, vector2, vector3, vector4, vector5, vector6]
96+
sort: name
97+
98+
99+
- match: {hits.hits.0._id: "1"}
100+
- match: {hits.hits.0.fields.name.0: "cow.jpg"}
101+
102+
- length: {hits.hits.0.fields.vector1.0: 5}
103+
- length: {hits.hits.0.fields.vector2.0: 5}
104+
- length: {hits.hits.0.fields.vector3.0: 5}
105+
- length: {hits.hits.0.fields.vector4.0: 5}
106+
- length: {hits.hits.0.fields.vector5.0: 5}
107+
- length: {hits.hits.0.fields.vector6.0: 5}
108+
109+
- close_to: { hits.hits.0.fields.vector1.0.0: { value: 230.0, error: 0.001 } }
110+
- close_to: { hits.hits.0.fields.vector1.0.1: { value: 300.33, error: 0.001 } }
111+
- close_to: { hits.hits.0.fields.vector1.0.2: { value: -34.8988, error: 0.001 } }
112+
- close_to: { hits.hits.0.fields.vector1.0.3: { value: 15.555, error: 0.001 } }
113+
- close_to: { hits.hits.0.fields.vector1.0.4: { value: -200.0, error: 0.001 } }
114+
115+
- close_to: { hits.hits.0.fields.vector2.0.0: { value: 130.0, error: 0.001 } }
116+
- close_to: { hits.hits.0.fields.vector2.0.1: { value: 115.0, error: 0.001 } }
117+
- close_to: { hits.hits.0.fields.vector2.0.2: { value: -1.02, error: 0.001 } }
118+
- close_to: { hits.hits.0.fields.vector2.0.3: { value: 15.555, error: 0.001 } }
119+
- close_to: { hits.hits.0.fields.vector2.0.4: { value: -100.0, error: 0.001 } }
120+
121+
- match: {hits.hits.0.fields.vector3.0: [-1, 100, -13, 15, -128]}
122+
- match: {hits.hits.0.fields.vector4.0: [-1, 50, -1, 1, 120]}
123+
- match: {hits.hits.0.fields.vector5.0: [1, 111, -13, 15, -128]}
124+
- match: {hits.hits.0.fields.vector6.0: [-1, 11, 0, 12, 111]}
125+
126+
127+
- match: {hits.hits.1._id: "2"}
128+
- match: {hits.hits.1.fields.name.0: "moose.jpg"}
129+
130+
- length: {hits.hits.1.fields.vector1.0: 5}
131+
- length: {hits.hits.1.fields.vector4.0: 5}
132+
- length: {hits.hits.1.fields.vector5.0: 5}
133+
- match: {hits.hits.1.fields.vector2: null}
134+
- match: {hits.hits.1.fields.vector3: null}
135+
- match: {hits.hits.1.fields.vector6: null}
136+
137+
- close_to: { hits.hits.1.fields.vector1.0.0: { value: -0.5, error: 0.001 } }
138+
- close_to: { hits.hits.1.fields.vector1.0.1: { value: 100.0, error: 0.001 } }
139+
- close_to: { hits.hits.1.fields.vector1.0.2: { value: -13, error: 0.001 } }
140+
- close_to: { hits.hits.1.fields.vector1.0.3: { value: 14.8, error: 0.001 } }
141+
- close_to: { hits.hits.1.fields.vector1.0.4: { value: -156.0, error: 0.001 } }
142+
143+
- match: {hits.hits.1.fields.vector4.0: [-1, 50, -1, 1, 120]}
144+
- match: {hits.hits.1.fields.vector5.0: [1, 111, -13, 15, -128]}
145+
146+
147+
- match: {hits.hits.2._id: "3"}
148+
- match: {hits.hits.2.fields.name.0: "rabbit.jpg"}
149+
150+
- length: {hits.hits.2.fields.vector2.0: 5}
151+
- length: {hits.hits.2.fields.vector3.0: 5}
152+
- match: {hits.hits.2.fields.vector1: null}
153+
- match: {hits.hits.2.fields.vector4: null}
154+
- match: {hits.hits.2.fields.vector5: null}
155+
- match: {hits.hits.2.fields.vector6: null}
156+
157+
- close_to: { hits.hits.2.fields.vector2.0.0: { value: 130.0, error: 0.001 } }
158+
- close_to: { hits.hits.2.fields.vector2.0.1: { value: 115.0, error: 0.001 } }
159+
- close_to: { hits.hits.2.fields.vector2.0.2: { value: -1.02, error: 0.001 } }
160+
- close_to: { hits.hits.2.fields.vector2.0.3: { value: 15.555, error: 0.001 } }
161+
- close_to: { hits.hits.2.fields.vector2.0.4: { value: -100.0, error: 0.001 } }
162+
163+
- match: {hits.hits.2.fields.vector3.0: [-1, 100, -13, 15, -128]}

server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1904,9 +1904,7 @@ protected Object parseSourceValue(Object value) {
19041904

19051905
@Override
19061906
public DocValueFormat docValueFormat(String format, ZoneId timeZone) {
1907-
throw new IllegalArgumentException(
1908-
"Field [" + name() + "] of type [" + typeName() + "] doesn't support docvalue_fields or aggregations"
1909-
);
1907+
return DocValueFormat.DENSE_VECTOR;
19101908
}
19111909

19121910
@Override

server/src/main/java/org/elasticsearch/index/mapper/vectors/VectorDVLeafFieldData.java

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,14 @@
1010
package org.elasticsearch.index.mapper.vectors;
1111

1212
import org.apache.lucene.index.BinaryDocValues;
13+
import org.apache.lucene.index.ByteVectorValues;
1314
import org.apache.lucene.index.DocValues;
15+
import org.apache.lucene.index.FloatVectorValues;
16+
import org.apache.lucene.index.KnnVectorValues;
1417
import org.apache.lucene.index.LeafReader;
18+
import org.apache.lucene.util.BytesRef;
1519
import org.elasticsearch.index.IndexVersion;
20+
import org.elasticsearch.index.fielddata.FormattedDocValues;
1621
import org.elasticsearch.index.fielddata.LeafFieldData;
1722
import org.elasticsearch.index.fielddata.SortedBinaryDocValues;
1823
import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.ElementType;
@@ -23,8 +28,12 @@
2328
import org.elasticsearch.script.field.vectors.ByteBinaryDenseVectorDocValuesField;
2429
import org.elasticsearch.script.field.vectors.ByteKnnDenseVectorDocValuesField;
2530
import org.elasticsearch.script.field.vectors.KnnDenseVectorDocValuesField;
31+
import org.elasticsearch.search.DocValueFormat;
2632

2733
import java.io.IOException;
34+
import java.util.Arrays;
35+
36+
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
2837

2938
final class VectorDVLeafFieldData implements LeafFieldData {
3039

@@ -76,4 +85,119 @@ public DocValuesScriptFieldFactory getScriptFieldFactory(String name) {
7685
}
7786
}
7887

88+
@Override
89+
public FormattedDocValues getFormattedValues(DocValueFormat format) {
90+
int dims = elementType == ElementType.BIT ? this.dims / Byte.SIZE : this.dims;
91+
return switch (elementType) {
92+
case BYTE, BIT -> new FormattedDocValues() {
93+
private byte[] vector = new byte[dims];
94+
private ByteVectorValues byteVectorValues; // use when indexed
95+
private KnnVectorValues.DocIndexIterator iterator; // use when indexed
96+
private BinaryDocValues binary; // use when not indexed
97+
{
98+
try {
99+
if (indexed) {
100+
byteVectorValues = reader.getByteVectorValues(field);
101+
iterator = (byteVectorValues == null) ? null : byteVectorValues.iterator();
102+
} else {
103+
binary = DocValues.getBinary(reader, field);
104+
}
105+
} catch (IOException e) {
106+
throw new IllegalStateException("Cannot load doc values", e);
107+
}
108+
109+
}
110+
111+
@Override
112+
public boolean advanceExact(int docId) throws IOException {
113+
if (indexed) {
114+
if (iteratorAdvanceExact(iterator, docId) == false) {
115+
return false;
116+
}
117+
vector = byteVectorValues.vectorValue(iterator.index());
118+
} else {
119+
if (binary == null || binary.advanceExact(docId) == false) {
120+
return false;
121+
}
122+
BytesRef ref = binary.binaryValue();
123+
System.arraycopy(ref.bytes, ref.offset, vector, 0, dims);
124+
}
125+
return true;
126+
}
127+
128+
@Override
129+
public int docValueCount() {
130+
return 1;
131+
}
132+
133+
public Object nextValue() {
134+
Byte[] vectorValue = new Byte[dims];
135+
for (int i = 0; i < dims; i++) {
136+
vectorValue[i] = vector[i];
137+
}
138+
return vectorValue;
139+
}
140+
};
141+
case FLOAT -> new FormattedDocValues() {
142+
float[] vector = new float[dims];
143+
private FloatVectorValues floatVectorValues; // use when indexed
144+
private KnnVectorValues.DocIndexIterator iterator; // use when indexed
145+
private BinaryDocValues binary; // use when not indexed
146+
{
147+
try {
148+
if (indexed) {
149+
floatVectorValues = reader.getFloatVectorValues(field);
150+
iterator = (floatVectorValues == null) ? null : floatVectorValues.iterator();
151+
} else {
152+
binary = DocValues.getBinary(reader, field);
153+
}
154+
} catch (IOException e) {
155+
throw new IllegalStateException("Cannot load doc values", e);
156+
}
157+
158+
}
159+
160+
@Override
161+
public boolean advanceExact(int docId) throws IOException {
162+
if (indexed) {
163+
if (iteratorAdvanceExact(iterator, docId) == false) {
164+
return false;
165+
}
166+
vector = floatVectorValues.vectorValue(iterator.index());
167+
} else {
168+
if (binary == null || binary.advanceExact(docId) == false) {
169+
return false;
170+
}
171+
BytesRef ref = binary.binaryValue();
172+
VectorEncoderDecoder.decodeDenseVector(indexVersion, ref, vector);
173+
}
174+
return true;
175+
}
176+
177+
@Override
178+
public int docValueCount() {
179+
return 1;
180+
}
181+
182+
@Override
183+
public Object nextValue() {
184+
return Arrays.copyOf(vector, vector.length);
185+
}
186+
};
187+
};
188+
}
189+
190+
private static boolean iteratorAdvanceExact(KnnVectorValues.DocIndexIterator iterator, int docId) throws IOException {
191+
if (iterator == null) return false;
192+
int currentDoc = iterator.docID();
193+
if (currentDoc == NO_MORE_DOCS || docId < currentDoc) {
194+
return false;
195+
} else if (docId > currentDoc) {
196+
currentDoc = iterator.advance(docId);
197+
if (currentDoc != docId) {
198+
return false;
199+
}
200+
}
201+
return true;
202+
}
79203
}

server/src/main/java/org/elasticsearch/rest/action/search/SearchCapabilities.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,16 @@ private SearchCapabilities() {}
2424
private static final String BIT_DENSE_VECTOR_SYNTHETIC_SOURCE_CAPABILITY = "bit_dense_vector_synthetic_source";
2525
/** Support Byte and Float with Bit dot product. */
2626
private static final String BYTE_FLOAT_BIT_DOT_PRODUCT_CAPABILITY = "byte_float_bit_dot_product";
27+
/** Support docvalue_fields parameter for `dense_vector` field. */
28+
private static final String DENSE_VECTOR_DOCVALUE_FIELDS = "dense_vector_docvalue_fields";
2729
/** Support transforming rank rrf queries to the corresponding rrf retriever. */
2830
private static final String TRANSFORM_RANK_RRF_TO_RETRIEVER = "transform_rank_rrf_to_retriever";
2931

3032
public static final Set<String> CAPABILITIES = Set.of(
3133
RANGE_REGEX_INTERVAL_QUERY_CAPABILITY,
3234
BIT_DENSE_VECTOR_SYNTHETIC_SOURCE_CAPABILITY,
3335
BYTE_FLOAT_BIT_DOT_PRODUCT_CAPABILITY,
36+
DENSE_VECTOR_DOCVALUE_FIELDS,
3437
TRANSFORM_RANK_RRF_TO_RETRIEVER
3538
);
3639
}

server/src/main/java/org/elasticsearch/search/DocValueFormat.java

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,31 @@ public String toString() {
168168
}
169169
};
170170

171+
DocValueFormat DENSE_VECTOR = DenseVectorDocValueFormat.INSTANCE;
172+
173+
/**
174+
* Singleton, stateless formatter, for dense vector values, no need to actually format anything
175+
*/
176+
class DenseVectorDocValueFormat implements DocValueFormat {
177+
178+
public static final DocValueFormat INSTANCE = new DenseVectorDocValueFormat();
179+
180+
private DenseVectorDocValueFormat() {}
181+
182+
@Override
183+
public String getWriteableName() {
184+
return "dense_vector";
185+
}
186+
187+
@Override
188+
public void writeTo(StreamOutput out) {}
189+
190+
@Override
191+
public String toString() {
192+
return "dense_vector";
193+
}
194+
};
195+
171196
DocValueFormat BINARY = BinaryDocValueFormat.INSTANCE;
172197

173198
/**

server/src/main/java/org/elasticsearch/search/SearchModule.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1020,6 +1020,7 @@ private void registerValueFormats() {
10201020
registerValueFormat(DocValueFormat.IP.getWriteableName(), in -> DocValueFormat.IP);
10211021
registerValueFormat(DocValueFormat.RAW.getWriteableName(), in -> DocValueFormat.RAW);
10221022
registerValueFormat(DocValueFormat.BINARY.getWriteableName(), in -> DocValueFormat.BINARY);
1023+
registerValueFormat(DocValueFormat.DENSE_VECTOR.getWriteableName(), in -> DocValueFormat.DENSE_VECTOR);
10231024
registerValueFormat(DocValueFormat.UNSIGNED_LONG_SHIFTED.getWriteableName(), in -> DocValueFormat.UNSIGNED_LONG_SHIFTED);
10241025
registerValueFormat(DocValueFormat.TIME_SERIES_ID.getWriteableName(), in -> DocValueFormat.TIME_SERIES_ID);
10251026
registerValueFormat(TS_ROUTING_HASH_DOC_VALUE_FORMAT.getWriteableName(), in -> TS_ROUTING_HASH_DOC_VALUE_FORMAT);

0 commit comments

Comments
 (0)