Skip to content

Commit b23d5a9

Browse files
committed
Support synthetic source with non indexed vectors
1 parent 2619fe6 commit b23d5a9

File tree

3 files changed

+155
-32
lines changed

3 files changed

+155
-32
lines changed

server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import org.apache.lucene.index.SortedSetDocValues;
2121
import org.apache.lucene.util.BytesRef;
2222
import org.elasticsearch.common.io.stream.ByteArrayStreamInput;
23+
import org.elasticsearch.index.IndexVersion;
2324
import org.elasticsearch.index.mapper.BlockLoader.BlockFactory;
2425
import org.elasticsearch.index.mapper.BlockLoader.BooleanBuilder;
2526
import org.elasticsearch.index.mapper.BlockLoader.Builder;
@@ -28,6 +29,7 @@
2829
import org.elasticsearch.index.mapper.BlockLoader.DoubleBuilder;
2930
import org.elasticsearch.index.mapper.BlockLoader.IntBuilder;
3031
import org.elasticsearch.index.mapper.BlockLoader.LongBuilder;
32+
import org.elasticsearch.index.mapper.vectors.VectorEncoderDecoder;
3133
import org.elasticsearch.search.fetch.StoredFieldsSpec;
3234

3335
import java.io.IOException;
@@ -828,6 +830,92 @@ public String toString() {
828830
}
829831
}
830832

833+
public static class DenseVectorFromBinaryBlockLoader extends DocValuesBlockLoader {
834+
private final String fieldName;
835+
private final int dims;
836+
private final IndexVersion indexVersion;
837+
838+
public DenseVectorFromBinaryBlockLoader(String fieldName, int dims, IndexVersion indexVersion) {
839+
this.fieldName = fieldName;
840+
this.dims = dims;
841+
this.indexVersion = indexVersion;
842+
}
843+
844+
@Override
845+
public Builder builder(BlockFactory factory, int expectedCount) {
846+
return factory.bytesRefs(expectedCount);
847+
}
848+
849+
@Override
850+
public AllReader reader(LeafReaderContext context) throws IOException {
851+
BinaryDocValues docValues = context.reader().getBinaryDocValues(fieldName);
852+
if (docValues == null) {
853+
return new ConstantNullsReader();
854+
}
855+
return new DenseVectorFromBinary(docValues, dims, indexVersion);
856+
}
857+
}
858+
859+
private static class DenseVectorFromBinary extends BlockDocValuesReader {
860+
private final BinaryDocValues docValues;
861+
private final IndexVersion indexVersion;
862+
private final float[] scratch;
863+
864+
private int docID = -1;
865+
866+
DenseVectorFromBinary(BinaryDocValues docValues, int dims, IndexVersion indexVersion) {
867+
this.docValues = docValues;
868+
this.scratch = new float[dims];
869+
this.indexVersion = indexVersion;
870+
}
871+
872+
@Override
873+
public BlockLoader.Block read(BlockFactory factory, Docs docs) throws IOException {
874+
try (BlockLoader.DoubleBuilder builder = factory.doubles(docs.count())) {
875+
for (int i = 0; i < docs.count(); i++) {
876+
int doc = docs.get(i);
877+
if (doc < docID) {
878+
throw new IllegalStateException("docs within same block must be in order");
879+
}
880+
read(doc, builder);
881+
}
882+
return builder.build();
883+
}
884+
}
885+
886+
@Override
887+
public void read(int docId, BlockLoader.StoredFields storedFields, Builder builder) throws IOException {
888+
read(docId, (DoubleBuilder) builder);
889+
}
890+
891+
private void read(int doc, DoubleBuilder builder) throws IOException {
892+
this.docID = doc;
893+
if (false == docValues.advanceExact(doc)) {
894+
builder.appendNull();
895+
return;
896+
}
897+
BytesRef bytesRef = docValues.binaryValue();
898+
assert bytesRef.length > 0;
899+
VectorEncoderDecoder.decodeDenseVector(indexVersion, bytesRef, scratch);
900+
901+
builder.beginPositionEntry();
902+
for (float value : scratch) {
903+
builder.appendDouble(value);
904+
}
905+
builder.endPositionEntry();
906+
}
907+
908+
@Override
909+
public int docId() {
910+
return docID;
911+
}
912+
913+
@Override
914+
public String toString() {
915+
return "DenseVectorFromBinary.Bytes";
916+
}
917+
}
918+
831919
public static class BooleansBlockLoader extends DocValuesBlockLoader {
832920
private final String fieldName;
833921

server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java

Lines changed: 33 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,8 @@ public DenseVectorFieldMapper build(MapperBuilderContext context) {
311311
indexed.getValue(),
312312
similarity.getValue(),
313313
indexOptions.getValue(),
314-
meta.getValue()
314+
meta.getValue(),
315+
context.isSourceSynthetic()
315316
),
316317
builderParams(this, context),
317318
indexOptions.getValue(),
@@ -2045,6 +2046,7 @@ public static final class DenseVectorFieldType extends SimpleMappedFieldType {
20452046
private final VectorSimilarity similarity;
20462047
private final IndexVersion indexVersionCreated;
20472048
private final IndexOptions indexOptions;
2049+
private final boolean isSyntheticSource;
20482050

20492051
public DenseVectorFieldType(
20502052
String name,
@@ -2054,7 +2056,8 @@ public DenseVectorFieldType(
20542056
boolean indexed,
20552057
VectorSimilarity similarity,
20562058
IndexOptions indexOptions,
2057-
Map<String, String> meta
2059+
Map<String, String> meta,
2060+
boolean isSyntheticSource
20582061
) {
20592062
super(name, indexed, false, indexed == false, TextSearchInfo.NONE, meta);
20602063
this.elementType = elementType;
@@ -2063,6 +2066,7 @@ public DenseVectorFieldType(
20632066
this.similarity = similarity;
20642067
this.indexVersionCreated = indexVersionCreated;
20652068
this.indexOptions = indexOptions;
2069+
this.isSyntheticSource = isSyntheticSource;
20662070
}
20672071

20682072
@Override
@@ -2083,18 +2087,6 @@ protected Object parseSourceValue(Object value) {
20832087
};
20842088
}
20852089

2086-
private SourceValueFetcher sourceValueFetcher(Set<String> sourcePaths) {
2087-
return new SourceValueFetcher(sourcePaths, null) {
2088-
@Override
2089-
protected Object parseSourceValue(Object value) {
2090-
if (value.equals("")) {
2091-
return null;
2092-
}
2093-
return NumberFieldMapper.NumberType.FLOAT.parse(value, false);
2094-
}
2095-
};
2096-
}
2097-
20982090
@Override
20992091
public DocValueFormat docValueFormat(String format, ZoneId timeZone) {
21002092
return DocValueFormat.DENSE_VECTOR;
@@ -2341,9 +2333,34 @@ public BlockLoader blockLoader(MappedFieldType.BlockLoaderContext blContext) {
23412333
return new BlockDocValuesReader.DenseVectorBlockLoader(name());
23422334
}
23432335

2336+
if (hasDocValues() && (blContext.fieldExtractPreference() != FieldExtractPreference.STORED || isSyntheticSource)) {
2337+
return new BlockDocValuesReader.DenseVectorFromBinaryBlockLoader(name(), dims, indexVersionCreated);
2338+
}
2339+
2340+
if (isSyntheticSource) {
2341+
return NumberFieldMapper.NumberType.floatingPointBlockLoaderFromFallbackSyntheticSource(
2342+
NumberFieldMapper.NumberType.FLOAT,
2343+
name(),
2344+
null,
2345+
false
2346+
);
2347+
}
2348+
23442349
BlockSourceReader.LeafIteratorLookup lookup = BlockSourceReader.lookupMatchingAll();
23452350
return new BlockSourceReader.DoublesBlockLoader(sourceValueFetcher(blContext.sourcePaths(name())), lookup);
23462351
}
2352+
2353+
private SourceValueFetcher sourceValueFetcher(Set<String> sourcePaths) {
2354+
return new SourceValueFetcher(sourcePaths, null) {
2355+
@Override
2356+
protected Object parseSourceValue(Object value) {
2357+
if (value.equals("")) {
2358+
return null;
2359+
}
2360+
return NumberFieldMapper.NumberType.FLOAT.parse(value, false);
2361+
}
2362+
};
2363+
}
23472364
}
23482365

23492366
private final IndexOptions indexOptions;
@@ -2398,7 +2415,8 @@ public void parse(DocumentParserContext context) throws IOException {
23982415
fieldType().indexed,
23992416
fieldType().similarity,
24002417
fieldType().indexOptions,
2401-
fieldType().meta()
2418+
fieldType().meta(),
2419+
fieldType().isSyntheticSource
24022420
);
24032421
Mapper update = new DenseVectorFieldMapper(
24042422
leafName(),

server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldTypeTests.java

Lines changed: 34 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,8 @@ private DenseVectorFieldType createFloatFieldType() {
124124
indexed,
125125
VectorSimilarity.COSINE,
126126
indexed ? randomIndexOptionsAll() : null,
127-
Collections.emptyMap()
127+
Collections.emptyMap(),
128+
false
128129
);
129130
}
130131

@@ -137,7 +138,8 @@ private DenseVectorFieldType createByteFieldType() {
137138
true,
138139
VectorSimilarity.COSINE,
139140
randomIndexOptionsNonQuantized(),
140-
Collections.emptyMap()
141+
Collections.emptyMap(),
142+
false
141143
);
142144
}
143145

@@ -210,7 +212,8 @@ public void testCreateNestedKnnQuery() {
210212
true,
211213
VectorSimilarity.COSINE,
212214
randomIndexOptionsAll(),
213-
Collections.emptyMap()
215+
Collections.emptyMap(),
216+
false
214217
);
215218
float[] queryVector = new float[dims];
216219
for (int i = 0; i < dims; i++) {
@@ -231,7 +234,8 @@ public void testCreateNestedKnnQuery() {
231234
true,
232235
VectorSimilarity.COSINE,
233236
randomIndexOptionsNonQuantized(),
234-
Collections.emptyMap()
237+
Collections.emptyMap(),
238+
false
235239
);
236240
byte[] queryVector = new byte[dims];
237241
float[] floatQueryVector = new float[dims];
@@ -263,7 +267,8 @@ public void testExactKnnQuery() {
263267
true,
264268
VectorSimilarity.COSINE,
265269
randomIndexOptionsAll(),
266-
Collections.emptyMap()
270+
Collections.emptyMap(),
271+
false
267272
);
268273
float[] queryVector = new float[dims];
269274
for (int i = 0; i < dims; i++) {
@@ -281,7 +286,8 @@ public void testExactKnnQuery() {
281286
true,
282287
VectorSimilarity.COSINE,
283288
randomIndexOptionsNonQuantized(),
284-
Collections.emptyMap()
289+
Collections.emptyMap(),
290+
false
285291
);
286292
byte[] queryVector = new byte[dims];
287293
for (int i = 0; i < dims; i++) {
@@ -301,7 +307,8 @@ public void testFloatCreateKnnQuery() {
301307
false,
302308
VectorSimilarity.COSINE,
303309
null,
304-
Collections.emptyMap()
310+
Collections.emptyMap(),
311+
false
305312
);
306313
IllegalArgumentException e = expectThrows(
307314
IllegalArgumentException.class,
@@ -325,7 +332,8 @@ public void testFloatCreateKnnQuery() {
325332
true,
326333
VectorSimilarity.DOT_PRODUCT,
327334
randomIndexOptionsAll(),
328-
Collections.emptyMap()
335+
Collections.emptyMap(),
336+
false
329337
);
330338
float[] queryVector = new float[BBQ_MIN_DIMS];
331339
for (int i = 0; i < BBQ_MIN_DIMS; i++) {
@@ -345,7 +353,8 @@ public void testFloatCreateKnnQuery() {
345353
true,
346354
VectorSimilarity.COSINE,
347355
randomIndexOptionsAll(),
348-
Collections.emptyMap()
356+
Collections.emptyMap(),
357+
false
349358
);
350359
e = expectThrows(
351360
IllegalArgumentException.class,
@@ -364,7 +373,8 @@ public void testCreateKnnQueryMaxDims() {
364373
true,
365374
VectorSimilarity.COSINE,
366375
randomIndexOptionsAll(),
367-
Collections.emptyMap()
376+
Collections.emptyMap(),
377+
false
368378
);
369379
float[] queryVector = new float[4096];
370380
for (int i = 0; i < 4096; i++) {
@@ -386,7 +396,8 @@ public void testCreateKnnQueryMaxDims() {
386396
true,
387397
VectorSimilarity.COSINE,
388398
randomIndexOptionsNonQuantized(),
389-
Collections.emptyMap()
399+
Collections.emptyMap(),
400+
false
390401
);
391402
byte[] queryVector = new byte[4096];
392403
for (int i = 0; i < 4096; i++) {
@@ -407,7 +418,8 @@ public void testByteCreateKnnQuery() {
407418
false,
408419
VectorSimilarity.COSINE,
409420
randomIndexOptionsNonQuantized(),
410-
Collections.emptyMap()
421+
Collections.emptyMap(),
422+
false
411423
);
412424
IllegalArgumentException e = expectThrows(
413425
IllegalArgumentException.class,
@@ -423,7 +435,8 @@ public void testByteCreateKnnQuery() {
423435
true,
424436
VectorSimilarity.COSINE,
425437
randomIndexOptionsNonQuantized(),
426-
Collections.emptyMap()
438+
Collections.emptyMap(),
439+
false
427440
);
428441
e = expectThrows(
429442
IllegalArgumentException.class,
@@ -448,7 +461,8 @@ public void testRescoreOversampleUsedWithoutQuantization() {
448461
true,
449462
VectorSimilarity.COSINE,
450463
randomIndexOptionsNonQuantized(),
451-
Collections.emptyMap()
464+
Collections.emptyMap(),
465+
false
452466
);
453467

454468
Query knnQuery = nonQuantizedField.createKnnQuery(
@@ -481,7 +495,8 @@ public void testRescoreOversampleModifiesNumCandidates() {
481495
true,
482496
VectorSimilarity.COSINE,
483497
randomIndexOptionsHnswQuantized(),
484-
Collections.emptyMap()
498+
Collections.emptyMap(),
499+
false
485500
);
486501

487502
// Total results is k, internal k is multiplied by oversample
@@ -502,7 +517,8 @@ public void testRescoreOversampleQueryOverrides() {
502517
true,
503518
VectorSimilarity.COSINE,
504519
randomIndexOptionsHnswQuantized(new DenseVectorFieldMapper.RescoreVector(randomFloatBetween(1.1f, 9.9f, false))),
505-
Collections.emptyMap()
520+
Collections.emptyMap(),
521+
false
506522
);
507523
Query query = fieldType.createKnnQuery(VectorData.fromFloats(new float[] { 1, 4, 10 }), 10, 100, 0f, null, null, null);
508524
assertTrue(query instanceof ESKnnFloatVectorQuery);
@@ -516,7 +532,8 @@ public void testRescoreOversampleQueryOverrides() {
516532
true,
517533
VectorSimilarity.COSINE,
518534
randomIndexOptionsHnswQuantized(new DenseVectorFieldMapper.RescoreVector(0)),
519-
Collections.emptyMap()
535+
Collections.emptyMap(),
536+
false
520537
);
521538
query = fieldType.createKnnQuery(VectorData.fromFloats(new float[] { 1, 4, 10 }), 10, 100, 2f, null, null, null);
522539
assertTrue(query instanceof RescoreKnnVectorQuery);

0 commit comments

Comments
 (0)