Skip to content

Commit 309e4c3

Browse files
committed
Introduce a synthetic vectors source loader and a synthetic vectors field loader in mapping
1 parent 746a233 commit 309e4c3

File tree

7 files changed

+394
-44
lines changed

7 files changed

+394
-44
lines changed

server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -453,6 +453,17 @@ public Map<String, NamedAnalyzer> indexAnalyzers() {
453453
return Map.of();
454454
}
455455

456+
/**
457+
* Returns a {@link SourceLoader.SyntheticVectorsLoader} instance responsible for loading
458+
* synthetic vector values from the index.
459+
*
460+
* @return a {@link SourceLoader.SyntheticVectorsLoader} used to extract synthetic vectors,
461+
* or {@code null} if no loader is provided or applicable in this context
462+
*/
463+
public SourceLoader.SyntheticVectorsLoader syntheticVectorsLoader() {
464+
return null;
465+
}
466+
456467
/**
457468
* <p>
458469
* Specifies the mode of synthetic source support by the mapper.

server/src/main/java/org/elasticsearch/index/mapper/Mapping.java

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,24 @@ Mapping mappingUpdate(RootObjectMapper rootObjectMapper) {
124124
return new Mapping(rootObjectMapper, metadataMappers, meta);
125125
}
126126

127+
/**
128+
* Returns a {@link SourceLoader.SyntheticVectorsLoader} that loads synthetic vector values
129+
* from a source document, optionally applying a {@link SourceFilter}.
130+
* <p>
131+
* The {@code filter}, if provided, can be used to limit which fields from the mapping
132+
* are considered when computing synthetic vectors. This allows for performance
133+
* optimizations or targeted vector extraction.
134+
* </p>
135+
*
136+
* @param filter an optional {@link SourceFilter} to restrict the fields considered during loading;
137+
* may be {@code null} to indicate no filtering
138+
* @return a {@link SourceLoader.SyntheticVectorsLoader} for extracting synthetic vectors,
139+
* potentially using the provided filter
140+
*/
141+
public SourceLoader.SyntheticVectorsLoader syntheticVectorsLoader(@Nullable SourceFilter filter) {
142+
return root.syntheticVectorsLoader(filter);
143+
}
144+
127145
private boolean isSourceSynthetic() {
128146
SourceFieldMapper sfm = (SourceFieldMapper) metadataMappersByName.get(SourceFieldMapper.NAME);
129147
return sfm != null && sfm.isSynthetic();

server/src/main/java/org/elasticsearch/index/mapper/MappingLookup.java

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ private CacheKey() {}
5151
private final Map<String, Mapper> fieldMappers;
5252
private final Map<String, ObjectMapper> objectMappers;
5353
private final Map<String, InferenceFieldMetadata> inferenceFields;
54+
private final Set<String> syntheticVectorFields;
5455
private final int runtimeFieldMappersCount;
5556
private final NestedLookup nestedLookup;
5657
private final FieldTypeLookup fieldTypeLookup;
@@ -188,12 +189,17 @@ private MappingLookup(
188189
this.fieldTypeLookup = new FieldTypeLookup(mappers, aliasMappers, passThroughMappers, runtimeFields);
189190

190191
Map<String, InferenceFieldMetadata> inferenceFields = new HashMap<>();
192+
List<String> syntheticVectorFields = new ArrayList<>();
191193
for (FieldMapper mapper : mappers) {
192194
if (mapper instanceof InferenceFieldMapper inferenceFieldMapper) {
193195
inferenceFields.put(mapper.fullPath(), inferenceFieldMapper.getMetadata(fieldTypeLookup.sourcePaths(mapper.fullPath())));
194196
}
197+
if (mapper.syntheticVectorsLoader() != null) {
198+
syntheticVectorFields.add(mapper.fullPath());
199+
}
195200
}
196201
this.inferenceFields = Map.copyOf(inferenceFields);
202+
this.syntheticVectorFields = Set.copyOf(syntheticVectorFields);
197203

198204
if (runtimeFields.isEmpty()) {
199205
// without runtime fields this is the same as the field type lookup
@@ -378,6 +384,11 @@ public Map<String, InferenceFieldMetadata> inferenceFields() {
378384
return inferenceFields;
379385
}
380386

387+
public Set<String> syntheticVectorFields() {
388+
return syntheticVectorFields;
389+
390+
}
391+
381392
public NestedLookup nestedLookup() {
382393
return nestedLookup;
383394
}
@@ -486,9 +497,29 @@ public SourceLoader newSourceLoader(@Nullable SourceFilter filter, SourceFieldMe
486497
if (isSourceSynthetic()) {
487498
return new SourceLoader.Synthetic(filter, () -> mapping.syntheticFieldLoader(filter), metrics);
488499
}
500+
var syntheticVectorsLoader = mapping.syntheticVectorsLoader(filter);
501+
if (syntheticVectorsLoader != null) {
502+
return new SourceLoader.SyntheticVectors(removeExcludedSyntheticVectorFields(filter), syntheticVectorsLoader);
503+
}
489504
return filter == null ? SourceLoader.FROM_STORED_SOURCE : new SourceLoader.Stored(filter);
490505
}
491506

507+
private SourceFilter removeExcludedSyntheticVectorFields(@Nullable SourceFilter filter) {
508+
if (filter == null || filter.getExcludes().length == 0) {
509+
return filter;
510+
}
511+
List<String> newExcludes = new ArrayList<>();
512+
for (var exclude : filter.getExcludes()) {
513+
if (syntheticVectorFields().contains(exclude) == false) {
514+
newExcludes.add(exclude);
515+
}
516+
}
517+
if (newExcludes.isEmpty() && filter.getIncludes().length == 0) {
518+
return null;
519+
}
520+
return new SourceFilter(filter.getIncludes(), newExcludes.toArray(String[]::new));
521+
}
522+
492523
/**
493524
* Returns if this mapping contains a data-stream's timestamp meta-field and this field is enabled.
494525
* Only indices that are a part of a data-stream have this meta-field enabled.

server/src/main/java/org/elasticsearch/index/mapper/NestedObjectMapper.java

Lines changed: 62 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import org.apache.lucene.search.ScoreMode;
1717
import org.apache.lucene.search.join.BitSetProducer;
1818
import org.apache.lucene.util.BitSet;
19+
import org.elasticsearch.common.CheckedBiConsumer;
1920
import org.elasticsearch.common.Explicit;
2021
import org.elasticsearch.common.lucene.search.Queries;
2122
import org.elasticsearch.common.xcontent.support.XContentMapValues;
@@ -408,6 +409,37 @@ protected MapperMergeContext createChildContext(MapperMergeContext mapperMergeCo
408409
);
409410
}
410411

412+
@Override
413+
protected SourceLoader.SyntheticVectorsLoader syntheticVectorsLoader(SourceFilter sourceFilter) {
414+
var patchLoader = super.syntheticVectorsLoader(sourceFilter);
415+
if (patchLoader == null) {
416+
return null;
417+
}
418+
return context -> {
419+
var leaf = patchLoader.leaf(context);
420+
if (leaf == null) {
421+
return null;
422+
}
423+
IndexSearcher searcher = new IndexSearcher(context.reader());
424+
searcher.setQueryCache(null);
425+
var childScorer = searcher.createWeight(nestedTypeFilter, ScoreMode.COMPLETE_NO_SCORES, 1f)
426+
.scorer(searcher.getLeafContexts().get(0));
427+
if (childScorer == null) {
428+
return null;
429+
}
430+
var parentsDocs = bitsetProducer.apply(parentTypeFilter).getBitSet(context);
431+
return (doc, acc) -> {
432+
List<SourceLoader.SyntheticVectorPatch> nestedPatches = new ArrayList<>();
433+
collectChildren(nestedTypePath, doc, parentsDocs, childScorer.iterator(), (offset, childId) -> {
434+
List<SourceLoader.SyntheticVectorPatch> childPatches = new ArrayList<>();
435+
leaf.load(childId, childPatches);
436+
nestedPatches.add(new SourceLoader.NestedOffsetSyntheticVectorPath(offset, childPatches));
437+
});
438+
acc.add(new SourceLoader.NestedSyntheticVectorPath(fullPath(), nestedPatches));
439+
};
440+
};
441+
}
442+
411443
@Override
412444
SourceLoader.SyntheticFieldLoader syntheticFieldLoader(SourceFilter filter, Collection<Mapper> mappers, boolean isFragment) {
413445
// IgnoredSourceFieldMapper integration takes care of writing the source for nested objects that enabled store_array_source.
@@ -459,33 +491,26 @@ public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf
459491

460492
IndexSearcher searcher = new IndexSearcher(leafReader);
461493
searcher.setQueryCache(null);
462-
var childScorer = searcher.createWeight(childFilter, ScoreMode.COMPLETE_NO_SCORES, 1f).scorer(leafReader.getContext());
494+
var childScorer = searcher.createWeight(childFilter, ScoreMode.COMPLETE_NO_SCORES, 1f)
495+
.scorer(searcher.getLeafContexts().get(0));
463496
if (childScorer != null) {
464497
var parentDocs = parentBitSetProducer.get().getBitSet(leafReader.getContext());
465498
return parentDoc -> {
466-
collectChildren(parentDoc, parentDocs, childScorer.iterator());
499+
children.clear();
500+
collectChildren(
501+
nestedTypePath,
502+
parentDoc,
503+
parentDocs,
504+
childScorer.iterator(),
505+
(offset, childId) -> children.add(childId)
506+
);
467507
return children.size() > 0;
468508
};
469509
} else {
470510
return parentDoc -> false;
471511
}
472512
}
473513

474-
private List<Integer> collectChildren(int parentDoc, BitSet parentDocs, DocIdSetIterator childIt) throws IOException {
475-
assert parentDoc < 0 || parentDocs.get(parentDoc) : "wrong context, doc " + parentDoc + " is not a parent of " + nestedTypePath;
476-
final int prevParentDoc = parentDoc > 0 ? parentDocs.prevSetBit(parentDoc - 1) : -1;
477-
int childDocId = childIt.docID();
478-
if (childDocId <= prevParentDoc) {
479-
childDocId = childIt.advance(prevParentDoc + 1);
480-
}
481-
482-
children.clear();
483-
for (; childDocId < parentDoc; childDocId = childIt.nextDoc()) {
484-
children.add(childDocId);
485-
}
486-
return children;
487-
}
488-
489514
@Override
490515
public boolean hasValue() {
491516
return children.size() > 0;
@@ -518,4 +543,24 @@ public void reset() {
518543
children.clear();
519544
}
520545
}
546+
547+
private static void collectChildren(
548+
String nestedTypePath,
549+
int parentDoc,
550+
BitSet parentDocs,
551+
DocIdSetIterator childIt,
552+
CheckedBiConsumer<Integer, Integer, IOException> childConsumer
553+
) throws IOException {
554+
assert parentDoc < 0 || parentDocs.get(parentDoc) : "wrong context, doc " + parentDoc + " is not a parent of " + nestedTypePath;
555+
final int prevParentDoc = parentDoc > 0 ? parentDocs.prevSetBit(parentDoc - 1) : -1;
556+
int childDocId = childIt.docID();
557+
if (childDocId <= prevParentDoc) {
558+
childDocId = childIt.advance(prevParentDoc + 1);
559+
}
560+
561+
int offset = 0;
562+
for (; childDocId < parentDoc; childDocId = childIt.nextDoc()) {
563+
childConsumer.accept(offset++, childDocId);
564+
}
565+
}
521566
}

server/src/main/java/org/elasticsearch/index/mapper/ObjectMapper.java

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
import java.util.Optional;
4444
import java.util.Set;
4545
import java.util.TreeMap;
46+
import java.util.stream.Collectors;
4647
import java.util.stream.Stream;
4748

4849
public class ObjectMapper extends Mapper {
@@ -911,6 +912,47 @@ public ObjectMapper findParentMapper(String leafFieldPath) {
911912
return null;
912913
}
913914

915+
private static SourceLoader.SyntheticVectorsLoader syntheticVectorsLoader(Mapper mapper, SourceFilter sourceFilter) {
916+
if (sourceFilter != null && sourceFilter.isPathFiltered(mapper.fullPath(), false)) {
917+
return null;
918+
}
919+
if (mapper instanceof ObjectMapper objMapper) {
920+
return objMapper.syntheticVectorsLoader(sourceFilter);
921+
} else if (mapper instanceof FieldMapper fieldMapper) {
922+
return fieldMapper.syntheticVectorsLoader();
923+
} else {
924+
return null;
925+
}
926+
}
927+
928+
SourceLoader.SyntheticVectorsLoader syntheticVectorsLoader(SourceFilter sourceFilter) {
929+
var loaders = mappers.values()
930+
.stream()
931+
.map(m -> syntheticVectorsLoader(m, sourceFilter))
932+
.filter(l -> l != null)
933+
.collect(Collectors.toList());
934+
if (loaders.isEmpty()) {
935+
return null;
936+
}
937+
return context -> {
938+
final List<SourceLoader.SyntheticVectorsLoader.Leaf> leaves = new ArrayList<>();
939+
for (var loader : loaders) {
940+
var leaf = loader.leaf(context);
941+
if (leaf != null) {
942+
leaves.add(leaf);
943+
}
944+
}
945+
if (leaves.isEmpty()) {
946+
return null;
947+
}
948+
return (doc, acc) -> {
949+
for (var leaf : leaves) {
950+
leaf.load(doc, acc);
951+
}
952+
};
953+
};
954+
}
955+
914956
SourceLoader.SyntheticFieldLoader syntheticFieldLoader(SourceFilter filter, Collection<Mapper> mappers, boolean isFragment) {
915957
var fields = mappers.stream()
916958
.sorted(Comparator.comparing(Mapper::fullPath))

0 commit comments

Comments
 (0)