Skip to content

Commit 9bfea9c

Browse files
committed
Handle the new setting in search and get api to exclude vectors when it is set to true
1 parent 309e4c3 commit 9bfea9c

File tree

4 files changed

+167
-81
lines changed

4 files changed

+167
-81
lines changed

server/src/main/java/org/elasticsearch/index/get/ShardGetService.java

Lines changed: 84 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,17 @@
1212
import org.apache.lucene.index.LeafReaderContext;
1313
import org.apache.lucene.index.SortedSetDocValues;
1414
import org.apache.lucene.search.IndexSearcher;
15+
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
1516
import org.elasticsearch.ElasticsearchException;
1617
import org.elasticsearch.common.bytes.BytesReference;
1718
import org.elasticsearch.common.document.DocumentField;
1819
import org.elasticsearch.common.lucene.uid.Versions;
1920
import org.elasticsearch.common.lucene.uid.VersionsAndSeqNoResolver.DocIdAndVersion;
2021
import org.elasticsearch.common.metrics.CounterMetric;
2122
import org.elasticsearch.common.metrics.MeanMetric;
23+
import org.elasticsearch.common.regex.Regex;
2224
import org.elasticsearch.core.Nullable;
25+
import org.elasticsearch.core.Tuple;
2326
import org.elasticsearch.index.IndexSettings;
2427
import org.elasticsearch.index.IndexVersions;
2528
import org.elasticsearch.index.VersionType;
@@ -39,6 +42,7 @@
3942
import org.elasticsearch.index.shard.AbstractIndexShardComponent;
4043
import org.elasticsearch.index.shard.IndexShard;
4144
import org.elasticsearch.index.shard.MultiEngineGet;
45+
import org.elasticsearch.search.fetch.subphase.FetchFieldsContext;
4246
import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
4347
import org.elasticsearch.search.lookup.Source;
4448
import org.elasticsearch.search.lookup.SourceFilter;
@@ -54,7 +58,9 @@
5458
import java.util.Set;
5559
import java.util.concurrent.TimeUnit;
5660
import java.util.function.Function;
61+
import java.util.stream.Collectors;
5762

63+
import static org.elasticsearch.index.IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING;
5864
import static org.elasticsearch.index.seqno.SequenceNumbers.UNASSIGNED_PRIMARY_TERM;
5965
import static org.elasticsearch.index.seqno.SequenceNumbers.UNASSIGNED_SEQ_NO;
6066

@@ -220,7 +226,7 @@ public GetResult getForUpdate(String id, long ifSeqNo, long ifPrimaryTerm, Strin
220226
VersionType.INTERNAL,
221227
ifSeqNo,
222228
ifPrimaryTerm,
223-
FetchSourceContext.FETCH_SOURCE,
229+
FetchSourceContext.FETCH_ALL_SOURCE,
224230
false,
225231
indexShard::get
226232
);
@@ -306,7 +312,12 @@ private GetResult innerGetFetch(
306312
Map<String, DocumentField> documentFields = null;
307313
Map<String, DocumentField> metadataFields = null;
308314
DocIdAndVersion docIdAndVersion = get.docIdAndVersion();
309-
var sourceFilter = fetchSourceContext.filter();
315+
316+
var res = maybeExcludeSyntheticVectorFields(mappingLookup, indexSettings, fetchSourceContext, null);
317+
if (res.v1() != fetchSourceContext) {
318+
fetchSourceContext = res.v1();
319+
}
320+
var sourceFilter = res.v2();
310321
SourceLoader loader = forceSyntheticSource
311322
? new SourceLoader.Synthetic(
312323
sourceFilter,
@@ -400,6 +411,77 @@ private GetResult innerGetFetch(
400411
);
401412
}
402413

414+
/**
415+
* Determines whether vector fields should be excluded from the source based on the {@link FetchSourceContext}.
416+
* Returns {@code true} if vector fields are explicitly marked to be excluded and {@code false} otherwise.
417+
*/
418+
public static boolean shouldExcludeVectorsFromSource(IndexSettings indexSettings, FetchSourceContext fetchSourceContext) {
419+
if (fetchSourceContext == null || fetchSourceContext.excludeVectors() == null) {
420+
return INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.get(indexSettings.getSettings());
421+
}
422+
return fetchSourceContext.excludeVectors();
423+
}
424+
425+
/**
426+
* Returns a {@link SourceFilter} that excludes vector fields not associated with semantic text fields,
427+
* unless vectors are explicitly requested to be included in the source.
428+
* Returns {@code null} when vectors should not be filtered out.
429+
*/
430+
public static Tuple<FetchSourceContext, SourceFilter> maybeExcludeSyntheticVectorFields(
431+
MappingLookup mappingLookup,
432+
IndexSettings indexSettings,
433+
FetchSourceContext fetchSourceContext,
434+
FetchFieldsContext fetchFieldsContext
435+
) {
436+
if (shouldExcludeVectorsFromSource(indexSettings, fetchSourceContext) == false) {
437+
return Tuple.tuple(fetchSourceContext, null);
438+
}
439+
var fetchFieldsAut = fetchFieldsContext != null && fetchFieldsContext.fields().size() > 0
440+
? new CharacterRunAutomaton(
441+
Regex.simpleMatchToAutomaton(fetchFieldsContext.fields().stream().map(f -> f.field).toArray(String[]::new))
442+
)
443+
: null;
444+
var inferenceFieldsAut = mappingLookup.inferenceFields().size() > 0
445+
? new CharacterRunAutomaton(
446+
Regex.simpleMatchToAutomaton(mappingLookup.inferenceFields().keySet().stream().map(f -> f + "*").toArray(String[]::new))
447+
)
448+
: null;
449+
450+
List<String> lateExcludes = new ArrayList<>();
451+
var excludes = mappingLookup.getFullNameToFieldType().values().stream().filter(MappedFieldType::isVectorEmbedding).filter(f -> {
452+
// Exclude the field specified by the `fields` option
453+
if (fetchFieldsAut != null && fetchFieldsAut.run(f.name())) {
454+
lateExcludes.add(f.name());
455+
return false;
456+
}
457+
// Exclude vectors from semantic text fields, as they are processed separately
458+
return inferenceFieldsAut == null || inferenceFieldsAut.run(f.name()) == false;
459+
}).map(f -> f.name()).collect(Collectors.toList());
460+
461+
var sourceFilter = excludes.isEmpty() ? null : new SourceFilter(new String[] {}, excludes.toArray(String[]::new));
462+
if (lateExcludes.size() > 0) {
463+
/**
464+
* Adds the vector field specified by the `fields` option to the excludes list of the fetch source context.
465+
* This ensures that vector fields are available to sub-fetch phases, but excluded during the {@link FetchSourcePhase}.
466+
*/
467+
if (fetchSourceContext != null && fetchSourceContext.excludes() != null) {
468+
for (var exclude : fetchSourceContext.excludes()) {
469+
lateExcludes.add(exclude);
470+
}
471+
}
472+
var newFetchSourceContext = fetchSourceContext == null
473+
? FetchSourceContext.of(true, false, null, lateExcludes.toArray(String[]::new))
474+
: FetchSourceContext.of(
475+
fetchSourceContext.fetchSource(),
476+
fetchSourceContext.excludeVectors(),
477+
fetchSourceContext.includes(),
478+
lateExcludes.toArray(String[]::new)
479+
);
480+
return Tuple.tuple(newFetchSourceContext, excludes.isEmpty() ? null : sourceFilter);
481+
}
482+
return Tuple.tuple(fetchSourceContext, sourceFilter);
483+
}
484+
403485
private static DocumentField loadIgnoredMetadataField(final DocIdAndVersion docIdAndVersion) throws IOException {
404486
final SortedSetDocValues ignoredDocValues = docIdAndVersion.reader.getContext()
405487
.reader()

server/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java

Lines changed: 12 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,10 @@
1313
import org.apache.logging.log4j.Logger;
1414
import org.apache.lucene.index.LeafReaderContext;
1515
import org.apache.lucene.search.TotalHits;
16-
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
1716
import org.elasticsearch.common.bytes.BytesReference;
18-
import org.elasticsearch.common.regex.Regex;
1917
import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader;
2018
import org.elasticsearch.index.fieldvisitor.StoredFieldLoader;
2119
import org.elasticsearch.index.mapper.IdLoader;
22-
import org.elasticsearch.index.mapper.MappedFieldType;
2320
import org.elasticsearch.index.mapper.SourceLoader;
2421
import org.elasticsearch.search.LeafNestedDocuments;
2522
import org.elasticsearch.search.NestedDocuments;
@@ -28,12 +25,10 @@
2825
import org.elasticsearch.search.SearchHits;
2926
import org.elasticsearch.search.SearchShardTarget;
3027
import org.elasticsearch.search.fetch.FetchSubPhase.HitContext;
31-
import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
3228
import org.elasticsearch.search.fetch.subphase.InnerHitsContext;
3329
import org.elasticsearch.search.fetch.subphase.InnerHitsPhase;
3430
import org.elasticsearch.search.internal.SearchContext;
3531
import org.elasticsearch.search.lookup.Source;
36-
import org.elasticsearch.search.lookup.SourceFilter;
3732
import org.elasticsearch.search.lookup.SourceProvider;
3833
import org.elasticsearch.search.profile.ProfileResult;
3934
import org.elasticsearch.search.profile.Profilers;
@@ -50,7 +45,8 @@
5045
import java.util.List;
5146
import java.util.Map;
5247
import java.util.function.Supplier;
53-
import java.util.stream.Collectors;
48+
49+
import static org.elasticsearch.index.get.ShardGetService.maybeExcludeSyntheticVectorFields;
5450

5551
/**
5652
* Fetch phase of a search request, used to fetch the actual top matching documents to be returned to the client, identified
@@ -122,8 +118,16 @@ private SearchHits buildSearchHits(SearchContext context, int[] docIdsToLoad, Pr
122118
// - Speed up retrieval of the synthetic source
123119
// Note: These vectors will no longer be accessible via _source for any sub-fetch processors,
124120
// but they are typically accessed through doc values instead (e.g: re-scorer).
125-
SourceFilter sourceFilter = maybeExcludeNonSemanticTextVectorFields(context);
126-
SourceLoader sourceLoader = context.newSourceLoader(sourceFilter);
121+
var res = maybeExcludeSyntheticVectorFields(
122+
context.getSearchExecutionContext().getMappingLookup(),
123+
context.getSearchExecutionContext().getIndexSettings(),
124+
context.fetchSourceContext(),
125+
context.fetchFieldsContext()
126+
);
127+
if (context.fetchSourceContext() != res.v1()) {
128+
context.fetchSourceContext(res.v1());
129+
}
130+
SourceLoader sourceLoader = context.newSourceLoader(res.v2());
127131
FetchContext fetchContext = new FetchContext(context, sourceLoader);
128132

129133
PreloadedSourceProvider sourceProvider = new PreloadedSourceProvider();
@@ -444,70 +448,4 @@ public String toString() {
444448
}
445449
};
446450
}
447-
448-
/**
449-
* Determines whether vector fields should be excluded from the source based on the {@link FetchSourceContext}.
450-
* Returns {@code true} if vector fields are explicitly marked to be excluded and {@code false} otherwise.
451-
*/
452-
private static boolean shouldExcludeVectorsFromSource(SearchContext context) {
453-
if (context.fetchSourceContext() == null) {
454-
return false;
455-
}
456-
return context.fetchSourceContext().excludeVectors() != null && context.fetchSourceContext().excludeVectors();
457-
}
458-
459-
/**
460-
* Returns a {@link SourceFilter} that excludes vector fields not associated with semantic text fields,
461-
* unless vectors are explicitly requested to be included in the source.
462-
* Returns {@code null} when vectors should not be filtered out.
463-
*/
464-
private static SourceFilter maybeExcludeNonSemanticTextVectorFields(SearchContext context) {
465-
if (shouldExcludeVectorsFromSource(context) == false) {
466-
return null;
467-
}
468-
var lookup = context.getSearchExecutionContext().getMappingLookup();
469-
var fetchFieldsAut = context.fetchFieldsContext() != null && context.fetchFieldsContext().fields().size() > 0
470-
? new CharacterRunAutomaton(
471-
Regex.simpleMatchToAutomaton(context.fetchFieldsContext().fields().stream().map(f -> f.field).toArray(String[]::new))
472-
)
473-
: null;
474-
var inferenceFieldsAut = lookup.inferenceFields().size() > 0
475-
? new CharacterRunAutomaton(
476-
Regex.simpleMatchToAutomaton(lookup.inferenceFields().keySet().stream().map(f -> f + "*").toArray(String[]::new))
477-
)
478-
: null;
479-
480-
List<String> lateExcludes = new ArrayList<>();
481-
var excludes = lookup.getFullNameToFieldType().values().stream().filter(MappedFieldType::isVectorEmbedding).filter(f -> {
482-
// Exclude the field specified by the `fields` option
483-
if (fetchFieldsAut != null && fetchFieldsAut.run(f.name())) {
484-
lateExcludes.add(f.name());
485-
return false;
486-
}
487-
// Exclude vectors from semantic text fields, as they are processed separately
488-
return inferenceFieldsAut == null || inferenceFieldsAut.run(f.name()) == false;
489-
}).map(f -> f.name()).collect(Collectors.toList());
490-
491-
if (lateExcludes.size() > 0) {
492-
/**
493-
* Adds the vector field specified by the `fields` option to the excludes list of the fetch source context.
494-
* This ensures that vector fields are available to sub-fetch phases, but excluded during the {@link FetchSourcePhase}.
495-
*/
496-
if (context.fetchSourceContext() != null && context.fetchSourceContext().excludes() != null) {
497-
for (var exclude : context.fetchSourceContext().excludes()) {
498-
lateExcludes.add(exclude);
499-
}
500-
}
501-
var fetchSourceContext = context.fetchSourceContext() == null
502-
? FetchSourceContext.of(true, false, null, lateExcludes.toArray(String[]::new))
503-
: FetchSourceContext.of(
504-
context.fetchSourceContext().fetchSource(),
505-
context.fetchSourceContext().excludeVectors(),
506-
context.fetchSourceContext().includes(),
507-
lateExcludes.toArray(String[]::new)
508-
);
509-
context.fetchSourceContext(fetchSourceContext);
510-
}
511-
return excludes.isEmpty() ? null : new SourceFilter(new String[] {}, excludes.toArray(String[]::new));
512-
}
513451
}

server/src/main/java/org/elasticsearch/search/fetch/subphase/FetchSourceContext.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ public class FetchSourceContext implements Writeable, ToXContentObject {
4141
public static final ParseField EXCLUDES_FIELD = new ParseField("excludes", "exclude");
4242

4343
public static final FetchSourceContext FETCH_SOURCE = new FetchSourceContext(true, null, Strings.EMPTY_ARRAY, Strings.EMPTY_ARRAY);
44+
public static final FetchSourceContext FETCH_ALL_SOURCE = new FetchSourceContext(true, false, Strings.EMPTY_ARRAY, Strings.EMPTY_ARRAY);
45+
4446
public static final FetchSourceContext DO_NOT_FETCH_SOURCE = new FetchSourceContext(
4547
false,
4648
null,

0 commit comments

Comments
 (0)