|
12 | 12 | import org.apache.lucene.index.LeafReaderContext; |
13 | 13 | import org.apache.lucene.index.SortedSetDocValues; |
14 | 14 | import org.apache.lucene.search.IndexSearcher; |
| 15 | +import org.apache.lucene.util.automaton.CharacterRunAutomaton; |
15 | 16 | import org.elasticsearch.ElasticsearchException; |
16 | 17 | import org.elasticsearch.common.bytes.BytesReference; |
17 | 18 | import org.elasticsearch.common.document.DocumentField; |
18 | 19 | import org.elasticsearch.common.lucene.uid.Versions; |
19 | 20 | import org.elasticsearch.common.lucene.uid.VersionsAndSeqNoResolver.DocIdAndVersion; |
20 | 21 | import org.elasticsearch.common.metrics.CounterMetric; |
21 | 22 | import org.elasticsearch.common.metrics.MeanMetric; |
| 23 | +import org.elasticsearch.common.regex.Regex; |
22 | 24 | import org.elasticsearch.core.Nullable; |
| 25 | +import org.elasticsearch.core.Tuple; |
23 | 26 | import org.elasticsearch.index.IndexSettings; |
24 | 27 | import org.elasticsearch.index.IndexVersions; |
25 | 28 | import org.elasticsearch.index.VersionType; |
|
39 | 42 | import org.elasticsearch.index.shard.AbstractIndexShardComponent; |
40 | 43 | import org.elasticsearch.index.shard.IndexShard; |
41 | 44 | import org.elasticsearch.index.shard.MultiEngineGet; |
| 45 | +import org.elasticsearch.search.fetch.subphase.FetchFieldsContext; |
42 | 46 | import org.elasticsearch.search.fetch.subphase.FetchSourceContext; |
43 | 47 | import org.elasticsearch.search.lookup.Source; |
44 | 48 | import org.elasticsearch.search.lookup.SourceFilter; |
|
54 | 58 | import java.util.Set; |
55 | 59 | import java.util.concurrent.TimeUnit; |
56 | 60 | import java.util.function.Function; |
| 61 | +import java.util.stream.Collectors; |
57 | 62 |
|
| 63 | +import static org.elasticsearch.index.IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING; |
58 | 64 | import static org.elasticsearch.index.seqno.SequenceNumbers.UNASSIGNED_PRIMARY_TERM; |
59 | 65 | import static org.elasticsearch.index.seqno.SequenceNumbers.UNASSIGNED_SEQ_NO; |
60 | 66 |
|
@@ -220,7 +226,7 @@ public GetResult getForUpdate(String id, long ifSeqNo, long ifPrimaryTerm, Strin |
220 | 226 | VersionType.INTERNAL, |
221 | 227 | ifSeqNo, |
222 | 228 | ifPrimaryTerm, |
223 | | - FetchSourceContext.FETCH_SOURCE, |
| 229 | + FetchSourceContext.FETCH_ALL_SOURCE, |
224 | 230 | false, |
225 | 231 | indexShard::get |
226 | 232 | ); |
@@ -306,7 +312,12 @@ private GetResult innerGetFetch( |
306 | 312 | Map<String, DocumentField> documentFields = null; |
307 | 313 | Map<String, DocumentField> metadataFields = null; |
308 | 314 | DocIdAndVersion docIdAndVersion = get.docIdAndVersion(); |
309 | | - var sourceFilter = fetchSourceContext.filter(); |
| 315 | + |
| 316 | + var res = maybeExcludeSyntheticVectorFields(mappingLookup, indexSettings, fetchSourceContext, null); |
| 317 | + if (res.v1() != fetchSourceContext) { |
| 318 | + fetchSourceContext = res.v1(); |
| 319 | + } |
| 320 | + var sourceFilter = res.v2(); |
310 | 321 | SourceLoader loader = forceSyntheticSource |
311 | 322 | ? new SourceLoader.Synthetic( |
312 | 323 | sourceFilter, |
@@ -400,6 +411,77 @@ private GetResult innerGetFetch( |
400 | 411 | ); |
401 | 412 | } |
402 | 413 |
|
| 414 | + /** |
| 415 | + * Determines whether vector fields should be excluded from the source based on the {@link FetchSourceContext}. |
| 416 | + * Returns {@code true} if vector fields are explicitly marked to be excluded and {@code false} otherwise. |
| 417 | + */ |
| 418 | + public static boolean shouldExcludeVectorsFromSource(IndexSettings indexSettings, FetchSourceContext fetchSourceContext) { |
| 419 | + if (fetchSourceContext == null || fetchSourceContext.excludeVectors() == null) { |
| 420 | + return INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.get(indexSettings.getSettings()); |
| 421 | + } |
| 422 | + return fetchSourceContext.excludeVectors(); |
| 423 | + } |
| 424 | + |
| 425 | + /** |
| 426 | + * Returns a {@link SourceFilter} that excludes vector fields not associated with semantic text fields, |
| 427 | + * unless vectors are explicitly requested to be included in the source. |
| 428 | + * Returns {@code null} when vectors should not be filtered out. |
| 429 | + */ |
| 430 | + public static Tuple<FetchSourceContext, SourceFilter> maybeExcludeSyntheticVectorFields( |
| 431 | + MappingLookup mappingLookup, |
| 432 | + IndexSettings indexSettings, |
| 433 | + FetchSourceContext fetchSourceContext, |
| 434 | + FetchFieldsContext fetchFieldsContext |
| 435 | + ) { |
| 436 | + if (shouldExcludeVectorsFromSource(indexSettings, fetchSourceContext) == false) { |
| 437 | + return Tuple.tuple(fetchSourceContext, null); |
| 438 | + } |
| 439 | + var fetchFieldsAut = fetchFieldsContext != null && fetchFieldsContext.fields().size() > 0 |
| 440 | + ? new CharacterRunAutomaton( |
| 441 | + Regex.simpleMatchToAutomaton(fetchFieldsContext.fields().stream().map(f -> f.field).toArray(String[]::new)) |
| 442 | + ) |
| 443 | + : null; |
| 444 | + var inferenceFieldsAut = mappingLookup.inferenceFields().size() > 0 |
| 445 | + ? new CharacterRunAutomaton( |
| 446 | + Regex.simpleMatchToAutomaton(mappingLookup.inferenceFields().keySet().stream().map(f -> f + "*").toArray(String[]::new)) |
| 447 | + ) |
| 448 | + : null; |
| 449 | + |
| 450 | + List<String> lateExcludes = new ArrayList<>(); |
| 451 | + var excludes = mappingLookup.getFullNameToFieldType().values().stream().filter(MappedFieldType::isVectorEmbedding).filter(f -> { |
| 452 | + // Exclude the field specified by the `fields` option |
| 453 | + if (fetchFieldsAut != null && fetchFieldsAut.run(f.name())) { |
| 454 | + lateExcludes.add(f.name()); |
| 455 | + return false; |
| 456 | + } |
| 457 | + // Exclude vectors from semantic text fields, as they are processed separately |
| 458 | + return inferenceFieldsAut == null || inferenceFieldsAut.run(f.name()) == false; |
| 459 | + }).map(f -> f.name()).collect(Collectors.toList()); |
| 460 | + |
| 461 | + var sourceFilter = excludes.isEmpty() ? null : new SourceFilter(new String[] {}, excludes.toArray(String[]::new)); |
| 462 | + if (lateExcludes.size() > 0) { |
| 463 | + /** |
| 464 | + * Adds the vector field specified by the `fields` option to the excludes list of the fetch source context. |
| 465 | + * This ensures that vector fields are available to sub-fetch phases, but excluded during the {@link FetchSourcePhase}. |
| 466 | + */ |
| 467 | + if (fetchSourceContext != null && fetchSourceContext.excludes() != null) { |
| 468 | + for (var exclude : fetchSourceContext.excludes()) { |
| 469 | + lateExcludes.add(exclude); |
| 470 | + } |
| 471 | + } |
| 472 | + var newFetchSourceContext = fetchSourceContext == null |
| 473 | + ? FetchSourceContext.of(true, false, null, lateExcludes.toArray(String[]::new)) |
| 474 | + : FetchSourceContext.of( |
| 475 | + fetchSourceContext.fetchSource(), |
| 476 | + fetchSourceContext.excludeVectors(), |
| 477 | + fetchSourceContext.includes(), |
| 478 | + lateExcludes.toArray(String[]::new) |
| 479 | + ); |
| 480 | + return Tuple.tuple(newFetchSourceContext, excludes.isEmpty() ? null : sourceFilter); |
| 481 | + } |
| 482 | + return Tuple.tuple(fetchSourceContext, sourceFilter); |
| 483 | + } |
| 484 | + |
403 | 485 | private static DocumentField loadIgnoredMetadataField(final DocIdAndVersion docIdAndVersion) throws IOException { |
404 | 486 | final SortedSetDocValues ignoredDocValues = docIdAndVersion.reader.getContext() |
405 | 487 | .reader() |
|
0 commit comments